@@ -164,19 +164,29 @@ class JsonWikiParser {
164164
165165
166166 /**
167- * Main functionality is parsing the WikiData Json page and extract facts in different languages the form
167+ * Main functionality is parsing the WikiData Json page and extract facts with it's Datatype
168168 *
169- * <http://www.w3. org/2000/01/rdf-schema#label > "New York City"@en
170- * "New York "@fr
171- * "New York"@co
172- * @param page
173- * @return SimpleObject that contains no UriTriples and it's valueTriples are filled with different labels on the form
174- * Labelproperty - >
175- * lang -> label
169+ * time triple: <http://wikidata. org/entity/P227 > "+00000001931-03-03T00:00:00Z"^^xsd:date
170+ * URI triple: <http://wikidata.org/entity/P5> http://wikidata.org/entity/Q22552>
171+ * String triple: <http://wikidata.org/entity/P2> "anyString"
172+ * coordinates triple: <http://wikidata.org/entity/P225> "122 2215"
173+ * <http://www.w3.org/2003/01/geo/wgs84_pos#lat> "31.2167"
174+ * <http://www.w3.org/2003/01/geo/wgs84_pos#geometry> "POINT(31.2167 30.0333)"^^<http://www.openlinksw.com/schemas/virtrdf#Geometry >
175+ * <http://www.w3.org/2003/01/geo/wgs84_pos#long> "30.00333"
176176 *
177- * <http://www.w3.org/2000/01/rdf-schema#label> ->
178- * "en" -> "New York City"
179- * "fr" -> "New York" "co" -> "New York"
177+ * scenario is as following :
178+ * 1- check that m (claim) has "value" not some value or no value
179+ * 2- check that it's "rank":1
180+ * 3- check for the third item in the claim
181+ * a- string > write as it is
182+ * b- time > - take time property of the 4th item "time":"+00000001931-03-03T00:00:00Z" and it's type would be xsd:datetime
183+ * c- globe coordinate > - for unmapped facts change them to "lat long" without datatype
184+ * - for mapped facts create three triples lat , long , gpoint and return them as value types
185+ *
186+ * d- common media > relpace spaces with _ and add "http://commons.wikimedia.org/wiki/File:" to begining of it and it's datatype is null
187+ * e- wikibase-entityid : get entity id /numeric-id and add "http://wikipeida.dbpedia.org/resource/Q" to it
188+ *
189+ * 4- depending on the output type decide to add it to the URITriples or ValuesTriples or MappedValueTriples or MappedURItriples
180190 */
181191 def getFacts (page : WikiPage ) : List [Node ] = {
182192
@@ -194,24 +204,9 @@ class JsonWikiParser {
194204 /** get all nodes under json key "claims" which will be in the form
195205 *Json sample : http://pastebin.com/9H6s2Nid
196206 */
197-
198- /** scenario is as following :
199- * 1- check that m has "value" not some value or no value
200- * 2- check that it's "rank":1
201- * 3- check for the third item in the claim
202- * a- string > write as it is
203- * b- time > take time property of the 4th item "time":"+00000001931-03-03T00:00:00Z" and it's type would be xsd:datetime
204- * c- globe coordinate > change them to DBpedia point(lat long)
205- * d- common media > relpace spaces with _ and add "http://commons.wikimedia.org/wiki/File:" to begining of it and it's datatype is null
206- * e- wikibase-entityid : get entity id /numeric-id and add "http://wikipeida.dbpedia.org/resource/Q" to it
207- *
208- * 4- depending on the output type decide to add it to the URITriples or ValuesTriples
209- */
210-
211207 var valueTriples = collection.mutable.Map [String , collection.mutable.Map [String ,String ]]()
212208 var URITriples = collection.mutable.Map [String , List [String ]]()
213209
214-
215210 // get claims only whose are values and has rank ==1 in List[JObject]
216211
217212 val claims = for {
@@ -226,7 +221,9 @@ class JsonWikiParser {
226221 for (claim <- claims)
227222 {
228223 val values = collection.mutable.Map [String ,String ]()
224+ val mappedValues = collection.mutable.Map [String ,String ]()
229225 var Uris = List [String ]()
226+ val mappedUris = List [String ]()
230227 val propID = (claim \ " m" )(1 ).extract[Int ]
231228 val property = " http://www.wikidata.org/entity/P" + propID
232229
@@ -246,11 +243,28 @@ class JsonWikiParser {
246243 val value = " http://commons.wikimedia.org/wiki/File:" + (claim \ " m" )(3 ).extract[String ].replace(" " ," _" ) // "" empty datatype means no datatype for URIs and URLs
247244 values += value -> " CommonMediaFile"
248245 valueTriples += property -> values
246+
247+ var commonMediaValues = collection.mutable.Map [String ,String ]()
248+ var commonMediaValueTriples = collection.mutable.Map [String , collection.mutable.Map [String ,String ]]()
249+
250+ commonMediaValues += (claim \ " m" )(3 ).extract[String ] -> " "
251+ commonMediaValueTriples += property -> commonMediaValues
252+
253+ nodes::= new SimpleNode (null ,commonMediaValueTriples,SimpleNode .CommonMediaFacts )
249254 }
250255 else
251256 {
252257 values += (claim \ " m" )(3 ).extract[String ] -> " "
253258 valueTriples += property -> values
259+
260+
261+ var stringValues = collection.mutable.Map [String ,String ]()
262+ var stringValueTriples = collection.mutable.Map [String , collection.mutable.Map [String ,String ]]()
263+
264+ stringValues += (claim \ " m" )(3 ).extract[String ] -> " "
265+ stringValueTriples += property -> stringValues
266+
267+ nodes::= new SimpleNode (null ,stringValueTriples,SimpleNode .StringFacts )
254268 }
255269 }
256270
@@ -259,14 +273,40 @@ class JsonWikiParser {
259273 {
260274 values += ((claim \ " m" )(3 )\ " time" ).extract[String ] -> " xsd:date"
261275 valueTriples += property -> values
276+
277+
278+ var timeValues = collection.mutable.Map [String ,String ]()
279+ var timeValueTriples = collection.mutable.Map [String , collection.mutable.Map [String ,String ]]()
280+
281+ timeValues += ((claim \ " m" )(3 )\ " time" ).extract[String ] -> " xsd:date"
282+ timeValueTriples += property -> timeValues
283+
284+
285+ nodes::= new SimpleNode (null ,timeValueTriples,SimpleNode .TimeFacts )
262286 }
263287 case " globecoordinate" =>
264288 {
265289 val lat = ((claim \ " m" )(3 )\ " latitude" ).extract[Int ]
266290 val long = ((claim \ " m" )(3 )\ " longitude" ).extract[Int ]
267291
292+ // for wikidata parser
268293 values += lat + " " + long -> " "
269294 valueTriples += property -> values
295+
296+ // for mappedwikidata parser
297+ // todo : add properties values in the wikidata mapped extractors
298+ var coordinatesValueTriples = collection.mutable.Map [String , collection.mutable.Map [String ,String ]]()
299+
300+ val latValue = collection.mutable.Map [String ,String ](lat.toString -> " " )
301+ coordinatesValueTriples += " geo:lat" -> latValue
302+
303+ val longValue = collection.mutable.Map [String ,String ](long.toString -> " " )
304+ coordinatesValueTriples += " geo:long" -> longValue
305+
306+ val pointValue = collection.mutable.Map [String ,String ](" POINT(" + lat + " " + long+ " )" -> " http://www.openlinksw.com/schemas/virtrdf#Geometry" )
307+ coordinatesValueTriples += " georss:point" -> pointValue
308+
309+ nodes::= new SimpleNode (null ,coordinatesValueTriples,SimpleNode .CoordinatesFacts )
270310 }
271311 case _=>
272312
@@ -280,7 +320,10 @@ class JsonWikiParser {
280320 }
281321
282322
283- // helper function for checking the type of property , used in getFacts method
323+
324+
325+
326+ // helper function for checking the type of property , used in getFacts method
284327 def isCommonMediaFiles (prop: String ) : Boolean = {
285328 val commonMediaFilesProperties = List (" P10" ," P109" ," P117" ," P14" ," P15" ," P154" ," P158" ," P18" ," P181" ," P207" ," P242" ," P367" ," P368" ," P41" ," P443" ," P491" ," P51" ," P623" ," P692" ," P94" )
286329 commonMediaFilesProperties.contains(prop)
@@ -289,5 +332,4 @@ class JsonWikiParser {
289332
290333
291334
292- }
293-
335+ }
0 commit comments