Skip to content

Commit b15a0b7

Browse files
committed
adding padding function for text files
1 parent 383173d commit b15a0b7

File tree

1 file changed

+46
-4
lines changed

1 file changed

+46
-4
lines changed

scripts/src/main/scala/org/dbpedia/extraction/scripts/LanguageSpecificLinksGenerator.scala

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,37 @@ object LanguageSpecificLinksGenerator {
6767
}
6868

6969

70+
/**
71+
* util function to add paddings to all lines to make them of same length
72+
* @param fileIn
73+
* @param fileOut
74+
*/
75+
private def padFile (fileIn :String , fileOut:String)
76+
{
77+
val baseDir = new File(fileIn)
78+
79+
//finding maximum length of line in the triples file
80+
var maxLength=0
81+
82+
for(ln <-Source.fromFile(baseDir).getLines)
83+
{
84+
if(ln.length > maxLength) maxLength = ln.length
85+
}
86+
87+
CreateFile(fileOut)
88+
89+
//adding paddings
90+
for(ln <-Source.fromFile(baseDir).getLines)
91+
{
92+
val newline = ln+" "*(maxLength-ln.length)
93+
LogToFile(fileOut,newline)
94+
}
95+
96+
CloseWriters()
97+
}
98+
99+
100+
70101

71102
def main(args: Array[String]) {
72103
//todo : add some requires here to check for arguments
@@ -78,7 +109,7 @@ object LanguageSpecificLinksGenerator {
78109
* -----------
79110
*extracting language links related properties from the WikiData RDF Dumb File
80111
* and save them in a separated languagelinks.nt file
81-
*/
112+
*/
82113
if(option == "0")
83114
{
84115
val baseDir = new File(args(1))
@@ -97,18 +128,21 @@ object LanguageSpecificLinksGenerator {
97128

98129
if(Regx.findFirstIn(ln) != None ){
99130
triple(0) = triple(0).replace(".wikipedia.org/wiki",".dbpedia.org/resource")
100-
LogToFile("./languagelinks.nt",triple(2)+" "+"<http://www.w3.org/2002/07/owl#sameAs>"+" "+triple(0)+" .")
131+
LogToFile("./languagelinks.ttl",triple(2)+" "+"<http://www.w3.org/2002/07/owl#sameAs>"+" "+triple(0)+" .")
101132
}
102133

103134
}
104135

105136
}
106137

107-
CloseWriters()
138+
padFile("./languagelinks.ttl","./languagelinks_Padded.ttl")
139+
140+
108141
}
109142

143+
110144
/**
111-
* option 1:
145+
* option 2:
112146
* ---------
113147
* from the extracted languagelinks.nt file
114148
* extracting language links and save them in languagelinks folder
@@ -142,6 +176,7 @@ object LanguageSpecificLinksGenerator {
142176

143177
//iterate over all triples todo: change to more efficient way
144178
for(ln <- Source.fromFile(baseDir).getLines){
179+
145180
triple = split(ln);
146181

147182
if(triple.length ==4 ){
@@ -161,6 +196,13 @@ object LanguageSpecificLinksGenerator {
161196
}
162197

163198

199+
if(option =="test")
200+
{
201+
202+
padFile(args(1),args(1).replace("test","testpadded"))
203+
204+
}
205+
164206
}
165207

166208
}

0 commit comments

Comments
 (0)