Skip to content

Commit 80fdc9e

Browse files
committed
Merge branch 'master' of https://github.com/dbpedia/extraction-framework into parseJson
2 parents ce48c3f + a503473 commit 80fdc9e

File tree

8 files changed

+31
-6
lines changed

8 files changed

+31
-6
lines changed

core/src/main/scala/org/dbpedia/extraction/dataparser/DateTimeParser.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ class DateTimeParser ( context : {
6868

6969
private val MonthYearRegex = ("""(?iu)""" + prefix + """("""+monthRegex+""")\]?\]?,?\s*\[?\[?([0-9]{1,4})\s*(""" + eraRegex + """)?""" + postfix).r
7070

71-
private val YearRegex = ("""(?iu)""" + prefix + """(?<![\d\pL\w])(\d{1,4})(?!\d)\s*(""" + eraRegex + """)?""" + postfix).r
71+
private val YearRegex = ("""(?iu)""" + prefix + """(?<![\d\pL\w])(-?\d{1,4})(?!\d)\s*(""" + eraRegex + """)?""" + postfix).r
7272

7373

7474
override def parse(node : Node) : Option[Date] =
@@ -164,6 +164,7 @@ class DateTimeParser ( context : {
164164
catch
165165
{
166166
case e : IllegalArgumentException =>
167+
case e : MatchError =>
167168
}
168169
}
169170
}

core/src/main/scala/org/dbpedia/extraction/ontology/Ontology.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,7 @@ class Ontology (
1515
val classes : Map[String, OntologyClass],
1616
val properties : Map[String, OntologyProperty],
1717
val datatypes : Map[String, Datatype],
18-
val specializations : Map[(OntologyClass, OntologyProperty), UnitDatatype]
18+
val specializations : Map[(OntologyClass, OntologyProperty), UnitDatatype],
19+
val equivalentPropertiesMap : Map[OntologyProperty,Set[OntologyProperty]],
20+
val equivalentClassesMap : Map[OntologyProperty,Set[OntologyProperty]]
1921
)

core/src/main/scala/org/dbpedia/extraction/ontology/RdfNamespace.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ object RdfNamespace {
6060
val SKOS = ns("skos", "http://www.w3.org/2004/02/skos/core#")
6161
val SCHEMA = ns("schema", "http://schema.org/", false)
6262
val BIBO = ns("bibo", "http://purl.org/ontology/bibo/", false)
63+
val WIKIDATA = ns("wikidata", "http://www.wikidata.org/entity/", false)
6364

6465
/**
6566
* @return namespace for prefix and suffix, default namespace and full name if no match found

core/src/main/scala/org/dbpedia/extraction/ontology/io/OntologyReader.scala

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,12 @@ class OntologyReader
7272
val name = getName(page.title, _.capitalize(page.title.language.locale))
7373

7474
ontologyBuilder.classes ::= loadClass(name, templateNode)
75+
// Fill the equivalentClass map
7576

76-
for(specificProperty <- loadSpecificProperties(name, templateNode))
77+
for(specificProperty <- loadSpecificProperties(name, templateNode))
7778
{
7879
ontologyBuilder.specializedProperties ::= specificProperty
80+
// To check for equivalent Property Map
7981
}
8082
}
8183
else if(templateName == OntologyReader.OBJECTPROPERTY_NAME || templateName == OntologyReader.DATATYPEPROPERTY_NAME)
@@ -85,6 +87,7 @@ class OntologyReader
8587
for(property <- loadOntologyProperty(name, templateNode))
8688
{
8789
ontologyBuilder.properties ::= property
90+
// Fill the equivalentProperty map
8891
}
8992
}
9093
// TODO: read datatypes
@@ -298,6 +301,8 @@ class OntologyReader
298301
var properties = List[PropertyBuilder]()
299302
var datatypes = List[Datatype]()
300303
var specializedProperties = List[SpecificPropertyBuilder]()
304+
var equivalentPropertiesMap = Map[OntologyProperty,Set[OntologyProperty]] ()
305+
var equivalentClassesMap = Map[OntologyProperty,Set[OntologyProperty]] ()
301306

302307
def build() : Ontology =
303308
{
@@ -308,7 +313,9 @@ class OntologyReader
308313
new Ontology( classes.flatMap(_.build(classMap)).map(c => (c.name, c)).toMap,
309314
properties.flatMap(_.build(classMap, typeMap)).map(p => (p.name, p)).toMap,
310315
datatypes.map(t => (t.name, t)).toMap,
311-
specializedProperties.flatMap(_.build(classMap, propertyMap, typeMap)).toMap )
316+
specializedProperties.flatMap(_.build(classMap, propertyMap, typeMap)).toMap,
317+
equivalentPropertiesMap,
318+
equivalentClassesMap)
312319
}
313320
}
314321

core/src/main/scala/org/dbpedia/extraction/util/Language.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ object Language extends (String => Language)
100100
// Mappings are mostly based on similarity of the languages and in some cases on the regions where a related language is spoken.
101101
// See NonIsoLanguagesMappingTest and run it regularly.
102102
// TODO: move these to a config file
103+
// TODO: is this map still necessary? Since JDK 7, Locale officially handles three-letter codes.
103104
val nonIsoCodes = Map(
104105
"ace" -> "id", // Acehnese
105106
"als" -> "sq", // Tosk Albanian
@@ -204,6 +205,7 @@ object Language extends (String => Language)
204205
"tet" -> "id", // Tetum
205206
"tpi" -> "en", // Tok Pisin
206207
"tum" -> "ny", // Tumbuka
208+
"tyv" -> "ru", // Tuvan
207209
"udm" -> "ru", // Udmurt
208210
"vec" -> "it", // Venetian
209211
"vep" -> "fi",

core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,11 @@ class DateTimeParserTest extends FlatSpec with ShouldMatchers
535535
{
536536
parse("fr", "xsd:date", "15 mars 44") should equal (Some("1944-03-15"))
537537
}
538-
/*"DataParser" should "return date (Jully the 13th of the year 100 before J.-C.)" in
538+
"DataParser" should "return date (January the 1st of the year -711)" in
539+
{
540+
parse("fr", "xsd:date", "{{Date de naissance|1|1|-711}}") should equal (Some("-0711-01-01"))
541+
}
542+
/*"DataParser" should "return date (July the 13th of the year 100 before J.-C.)" in
539543
{
540544
parse("fr", "xsd:date", "13 juillet -100 av. J.-C.") should equal (Some("-0100-07-13"))
541545
}*/

dump/src/main/scala/org/dbpedia/extraction/dump/download/LanguageDownloader.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ class LanguageDownloader(baseUrl: URL, baseDir: File, wikiName: String, language
8080

8181
// all the links we need
8282
val links = new HashMap[String, String]()
83-
for (fileName <- fileNames) links(fileName) = "<a href=\""+wiki+"-"+date+"-"+fileName+"\">"
83+
// Here we should set "<a href=\"/"+wiki+"/"+date+"/"+wiki+"-"+date+"-"+fileName+"\">"
84+
// but "\"/"+wiki+"/"+date+"/" does not exists in incremental updates, keeping the trailing "\">" should do the trick
85+
for (fileName <- fileNames) links(fileName) = wiki+"-"+date+"-"+fileName+"\">"
8486

8587
downloader.downloadTo(datePage, dateDir) // creates index.html
8688
forEachLine(new File(dateDir, "index.html")) { line =>

live/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,12 @@
183183
<artifactId>model</artifactId>
184184
<version>0.4</version>
185185
<scope>compile</scope>
186+
<exclusions>
187+
<exclusion>
188+
<groupId>com.owldl</groupId>
189+
<artifactId>pellet</artifactId>
190+
</exclusion>
191+
</exclusions>
186192
</dependency>
187193

188194
</dependencies>

0 commit comments

Comments
 (0)