@@ -67,6 +67,37 @@ object LanguageSpecificLinksGenerator {
6767 }
6868
6969
70+ /**
71+ * util function to add paddings to all lines to make them of same length
72+ * @param fileIn
73+ * @param fileOut
74+ */
75+ private def padFile (fileIn : String , fileOut: String )
76+ {
77+ val baseDir = new File (fileIn)
78+
79+ // finding maximum length of line in the triples file
80+ var maxLength = 0
81+
82+ for (ln <- Source .fromFile(baseDir).getLines)
83+ {
84+ if (ln.length > maxLength) maxLength = ln.length
85+ }
86+
87+ CreateFile (fileOut)
88+
89+ // adding paddings
90+ for (ln <- Source .fromFile(baseDir).getLines)
91+ {
92+ val newline = ln+ " " * (maxLength- ln.length)
93+ LogToFile (fileOut,newline)
94+ }
95+
96+ CloseWriters ()
97+ }
98+
99+
100+
70101
71102 def main (args : Array [String ]) {
72103 // todo : add some requires here to check for arguments
@@ -78,7 +109,7 @@ object LanguageSpecificLinksGenerator {
78109 * -----------
79110 *extracting language links related properties from the WikiData RDF Dumb File
80111 * and save them in a separated languagelinks.nt file
81- */
112+ */
82113 if (option == " 0" )
83114 {
84115 val baseDir = new File (args(1 ))
@@ -97,18 +128,21 @@ object LanguageSpecificLinksGenerator {
97128
98129 if (Regx .findFirstIn(ln) != None ){
99130 triple(0 ) = triple(0 ).replace(" .wikipedia.org/wiki" ," .dbpedia.org/resource" )
100- LogToFile (" ./languagelinks.nt " ,triple(2 )+ " " + " <http://www.w3.org/2002/07/owl#sameAs>" + " " + triple(0 )+ " ." )
131+ LogToFile (" ./languagelinks.ttl " ,triple(2 )+ " " + " <http://www.w3.org/2002/07/owl#sameAs>" + " " + triple(0 )+ " ." )
101132 }
102133
103134 }
104135
105136 }
106137
107- CloseWriters ()
138+ padFile(" ./languagelinks.ttl" ," ./languagelinks_Padded.ttl" )
139+
140+
108141 }
109142
143+
110144 /**
111- * option 1 :
145+ * option 2 :
112146 * ---------
113147 * from the extracted languagelinks.nt file
114148 * extracting language links and save them in languagelinks folder
@@ -142,6 +176,7 @@ object LanguageSpecificLinksGenerator {
142176
143177 // iterate over all triples todo: change to more efficient way
144178 for (ln <- Source .fromFile(baseDir).getLines){
179+
145180 triple = split(ln);
146181
147182 if (triple.length == 4 ){
@@ -161,6 +196,13 @@ object LanguageSpecificLinksGenerator {
161196 }
162197
163198
199+ if (option == " test" )
200+ {
201+
202+ padFile(args(1 ),args(1 ).replace(" test" ," testpadded" ))
203+
204+ }
205+
164206 }
165207
166208}
0 commit comments