@@ -9,7 +9,6 @@ import sys.process._
99
1010/**
1111 * User: hadyelsahar
12- *
1312 */
1413
1514
@@ -66,39 +65,6 @@ object LanguageSpecificLinksGenerator {
6665 }
6766 }
6867
69-
70- /**
71- * util function to add paddings to all lines to make them of same length
72- * @param fileIn
73- * @param fileOut
74- */
75- private def padFile (fileIn : String , fileOut: String )
76- {
77- val baseDir = new File (fileIn)
78-
79- // finding maximum length of line in the triples file
80- var maxLength = 0
81-
82- for (ln <- Source .fromFile(baseDir).getLines)
83- {
84- if (ln.length > maxLength) maxLength = ln.length
85- }
86-
87- CreateFile (fileOut)
88-
89- // adding paddings
90- for (ln <- Source .fromFile(baseDir).getLines)
91- {
92- val newline = ln+ " " * (maxLength- ln.length)
93- LogToFile (fileOut,newline)
94- }
95-
96- CloseWriters ()
97- }
98-
99-
100-
101-
10268 def main (args : Array [String ]) {
10369 // todo : add some requires here to check for arguments
10470 // arg1 = 0
@@ -135,75 +101,91 @@ object LanguageSpecificLinksGenerator {
135101
136102 }
137103
138- padFile(" ./languagelinks.ttl" ," ./languagelinks_Padded.ttl" )
139-
104+ CloseWriters
140105
141106 }
142107
143-
144108 /**
145- * option 2 :
109+ * option 1 :
146110 * ---------
147111 * from the extracted languagelinks.nt file
148112 * extracting language links and save them in languagelinks folder
149113 */
150114 if (option == " 1" )
151115 {
116+ val startTime = System .nanoTime
117+
118+ // opening master file for language links
152119 val baseDir = new File (args(1 ))
153120 val file = Source .fromFile(baseDir)
154121
122+ // creating folder for output files
123+ new File (" ./llinkfiles" ).mkdir()
155124
156- for (ln <- file.getLines){
157- var triple = split(ln);
125+ var Q = " "
126+ var oldQ = " "
127+ var triplesObjects = List [String ]()
128+ val lines = file.getLines
129+ for (ln <- lines){
158130
159- if (triple.length == 4 ){
160- val Q1 = triple(0 )
161- val Obj1 = triple(2 )
162-
163- val langRegx = """ <http:\/\/(.*).dbpedia.org\/resource\/.*>""" .r
164- val langRegx(lang) = triple(2 )
131+ val triple = split(ln);
165132
166- // make folder for ll files
167- new File (" ./llinkfiles" ).mkdir()
133+ // gather all objects of triples until the subject changes
134+ oldQ = Q
135+ Q = triple(0 )
136+ val tripleObj = triple(2 )
168137
169- // create languagefile for each language if doesn't exist before
170- val LLFileName = " ./llinkfiles/interlanguage_links_same_as_" + lang+ " .ttl"
138+ // for each chuck ( the subject changed or if it's the last line ) , make combinations and save to files
171139
172- if (! filesWriters.contains( LLFileName ) )
140+ if ((oldQ != Q && oldQ != " " ) || ! lines.hasNext )
173141 {
174- CreateFile (LLFileName )
175- }
142+ // println(oldQ)
143+ for (obj <- triplesObjects)
144+ {
145+ // extracting language
146+ val langRegx = """ <http:\/\/(.*).dbpedia.org\/resource\/.*>""" .r
147+ val langRegx(lang) = obj
176148
177- // iterate over all triples todo: change to more efficient way
178- for (ln <- Source .fromFile(baseDir).getLines){
149+ // creating file for language if not exists
150+ val fileName = " ./llinkfiles/interlanguage_links_same_as_ " + lang + " .ttl "
179151
180- triple = split(ln);
152+ if (! filesWriters.contains(fileName))
153+ {
154+ CreateFile (fileName)
155+ }
181156
182- if (triple.length == 4 ){
183- val Q2 = triple( 0 )
184- val Obj2 = triple( 2 )
185- val langRegx(innerLang) = triple( 2 )
157+ // creating combination string
158+ var LLString : String = " "
159+ // removing itself
160+ val innerTripleObjects = triplesObjects.diff( List (obj) )
186161
187- if (lang != innerLang && Q1 == Q2 )
162+
163+ for (obj2 <- innerTripleObjects)
188164 {
189- LogToFile (LLFileName ,Obj1 + " " + " <http://www.w3.org/2002/07/owl#sameAs>" + " " + Obj2 + " ." )
165+ // LLString += obj +" <http://www.w3.org/2002/07/owl#sameAs> " +obj2+" .\n"
166+ LogToFile (fileName,obj + " <http://www.w3.org/2002/07/owl#sameAs> " + obj2+ " .\n " )
190167 }
168+
169+ // LogToFile(fileName,LLString)
191170 }
171+
172+ // empty the Chunk container
173+ triplesObjects = List [String ]()
192174 }
175+
176+ triplesObjects = triplesObjects :+ tripleObj
193177 }
194- }
195- CloseWriters ()
196- }
197178
179+ CloseWriters ()
198180
199- if (option == " test" )
200- {
181+ print(" time taken: " + (System .nanoTime - startTime)/ 1000000000 + " secs" )
201182
202- padFile(args(1 ),args(1 ).replace(" test" ," testpadded" ))
203183
204184 }
205185
186+
206187 }
207188
189+
208190}
209191
0 commit comments