@@ -3,6 +3,7 @@ package org.dbpedia.extraction.scripts
33import java .util .Locale
44import java .io .File
55import org .dbpedia .extraction .util .RichFile .wrapFile
6+ import org .dbpedia .extraction .util .RichString .wrapString
67import org .dbpedia .extraction .util .StringPlusser
78import org .dbpedia .extraction .scripts .IOUtils .readLines
89import scala .collection .mutable .{Map ,HashMap }
@@ -90,7 +91,7 @@ abstract class Fileset(
9091
9192 protected def path (language : String , modifier : String , format : String ): String
9293
93- val anchor = name.replace (" " , " " ).toLowerCase(Locale .ENGLISH )
94+ val anchor = name.replaceChars (" () " , " - " ).toLowerCase(Locale .ENGLISH )
9495}
9596
9697class Ontology (name : String , file : String , text : String )
@@ -139,8 +140,8 @@ val datasets = List(
139140 ),
140141 List (
141142 new Dataset (" Titles" , " labels" , " //Titles of all Wikipedia Articles in the corresponding language.//" ),
142- new Dataset (" Short Abstracts" , " short_abstracts" , " //Short Abstracts (max. 500 chars long) of Wikipedia articles//" ),
143- new Dataset (" Extended Abstracts" , " long_abstracts" , " //Additional, extended English abstracts .//" ),
143+ new Dataset (" Short Abstracts" , " short_abstracts" , " //Short Abstracts (max. 500 characters long) of Wikipedia articles. //" ),
144+ new Dataset (" Extended Abstracts" , " long_abstracts" , " //Full abstracts of Wikipedia articles, usually the first section .//" ),
144145 new Dataset (" Images" , " images" , " //Main image and corresponding thumbnail from Wikipedia article.//" )
145146 ),
146147 List (
@@ -262,7 +263,8 @@ def generate: Unit = {
262263 " \n " +
263264 " [[#1]] Most files were packed with ((http://compression.ca/pbzip2/ pbzip2)), which generates concatenated streams. " +
264265 " Some older bzip2 decompressors, for example ((https://issues.apache.org/jira/browse/COMPRESS-162 Apache Commons Compress before version 1.4)), " +
265- " cannot handle this format. Please make sure that you use the latest version and let us know if you experience any problems.\n " +
266+ " cannot handle this format. Please make sure that you use the latest version. " +
267+ " ((https://lists.sourceforge.net/lists/listinfo/dbpedia-discussion/ Let us know)) if you experience any problems.\n " +
266268 " \n " +
267269 mark(" " )
268270
0 commit comments