Skip to content

Commit 0d434ed

Browse files
committed
adapted the parser to changes in wikidata json format
1 parent 3ed5cb8 commit 0d434ed

File tree

1 file changed

+41
-10
lines changed

1 file changed

+41
-10
lines changed

core/src/main/scala/org/dbpedia/extraction/wikiparser/impl/json/JsonWikiParser.scala

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,33 +69,62 @@ class JsonWikiParser {
6969
case _ => throw new IllegalStateException("Invalid JSON representation!")
7070
}
7171

72+
73+
//wikidata new format : http://pastie.org/8859751#1015,1028
7274
/** get all nodes under json key "links" which will be in the form
73-
* {
74-
* "arwiki": "نيويورك (مدينة)",
75-
* "frwiki": "New York",
76-
* "eowiki": "Novjorko",
77-
* "plwiki": "Nowy Jork"
78-
* }
75+
* "links":{
76+
"enwiki":{
77+
"name":"Scotland",
78+
"badges":[
79+
80+
]
81+
},
82+
"nlwiki":{
83+
"name":"Schotland",
84+
"badges":[
85+
86+
]
87+
}}
7988
*/
89+
8090
val interLinks = (jsonObjMap \ "links") match {
8191
case JObject(links) => links
8292
case _ => List()
8393
}
8494

8595

8696
var interLinksMap = collection.mutable.Map[String, List[String]]()
97+
8798
var values = List[String]()
8899

89100
interLinks.foreach { interLink : JField =>
90101
interLink.name match {
91102
//use regex to remove the convert arwiki -> ar
92103

93104
case WikiLanguageRegex(lang) => {
94-
var wikiPageName :String = interLink.value.extract[String]
95-
val suffix = wikiPageName.replace(" ","_")
96-
val prefix = if (lang=="en") "" else lang+"."
97105

98-
values ::= "http://"+prefix+"dbpedia.org/resource/"+suffix+""
106+
//extract wikipage name from json
107+
var wikiPageName = ""
108+
109+
interLink.value.asInstanceOf[JObject].obj.foreach { j : JField =>
110+
if (j.name == "name")
111+
{
112+
wikiPageName = j.value.extract[String]
113+
}
114+
}
115+
//check if wikiPageName is not empty
116+
wikiPageName match {
117+
case "" =>
118+
case _ => {
119+
120+
val suffix = wikiPageName.replace(" ","_")
121+
val prefix = if (lang=="en") "" else lang+"."
122+
123+
values ::= "http://"+prefix+"dbpedia.org/resource/"+suffix+""
124+
}
125+
}
126+
127+
99128
}
100129
case _ =>
101130
}
@@ -145,6 +174,8 @@ class JsonWikiParser {
145174
// "pl": "Nowy Jork",
146175
// }
147176
// Json sample : http://pastebin.com/zygpzhJK
177+
// new format after - 4/3/2014
178+
// http://pastie.org/8859751#1015,1028
148179

149180
val interLinks = (jsonObjMap \ "label") match {
150181
case JObject(links) => links

0 commit comments

Comments
 (0)