@@ -21,7 +21,7 @@ import org.apache.commons.compress.compressors.bzip2.{BZip2CompressorInputStream
2121object LanguageSpecificLinksGenerator {
2222
2323 // Todo: remove and include from org.dbpedia.extraction.util.IOUtils when merging Dump branch to main
24- object IOUtils {
24+ object IOUtils {
2525
2626 /**
2727 * Map from file suffix (without "." dot) to output stream wrapper
@@ -89,7 +89,8 @@ object LanguageSpecificLinksGenerator {
8989 if (! filesWriters.contains(fileName))
9090 {
9191 val file = new File (fileName)
92- val outputStream = new FileOutputStream (file)
92+ val richFile = new RichFile (file)
93+ val outputStream = IOUtils .outputStream(richFile)
9394 val outputStreamWriter = new OutputStreamWriter (outputStream)
9495 val bufferedWriter : BufferedWriter = new BufferedWriter (outputStreamWriter)
9596
@@ -124,24 +125,30 @@ object LanguageSpecificLinksGenerator {
124125 */
125126 if (option == " 0" )
126127 {
128+
127129 val inFile = new File (args(1 ))
128- val file = Source .fromFile(inFile)
130+ val inRichFile = new RichFile (inFile)
131+ val in = IOUtils .inputStream(inRichFile)
132+ val lines = Source .fromInputStream(in," UTF-8" ).getLines()
133+
129134
130135 // languagelinks triples needed are those contain schema:about predicates and wikipediapages subjects which indicated wikipedia page
131- val regx = """ .*\.wikipedia.org\/wiki.*<http:\/\/schema\.org\/about>""" .r
136+ val cond1 = " wikipedia.org/wiki"
137+ val cond2 = " <http://schema.org/about>"
132138
133139
134- for (ln <- file.getLines ){
140+ for (ln <- lines ){
135141 val triple = split(ln);
136142
137143 // check if the triple is in the correct .ttl format
138144 if (triple.length == 4 ){
139145
140- if (regx.findFirstIn(ln) != None ){
146+ if (ln.contains(cond1)&& ln.contains(cond2))
147+ {
141148 triple(0 ) = triple(0 ).replace(" .wikipedia.org/wiki" ," .dbpedia.org/resource" )
142149 val sub = UriDecoder .decode(triple(2 ))
143150 val obj = UriDecoder .decode(triple(0 ))
144- logToFile(" ./languagelinks.ttl" ,sub+ " " + " <http://www.w3.org/2002/07/owl#sameAs>" + " " + obj+ " ." )
151+ logToFile(" ./languagelinks.ttl.gz " ,sub+ " " + " <http://www.w3.org/2002/07/owl#sameAs>" + " " + obj+ " ." )
145152 }
146153
147154 }
@@ -222,7 +229,6 @@ object LanguageSpecificLinksGenerator {
222229 if (option == " test" )
223230 {
224231 val fileName = new File (args(1 ))
225-
226232 val file = new RichFile (fileName)
227233 val in = IOUtils .inputStream(file)
228234 val lines = Source .fromInputStream(in," UTF-8" ).getLines()
0 commit comments