Skip to content

Commit c78c2d1

Browse files
committed
Merge pull request dbpedia#77 from ziorufus/Esperanto
Configuration for Esperanto
2 parents 6d71d61 + 0c4b0d7 commit c78c2d1

File tree

5 files changed

+42
-27
lines changed

5 files changed

+42
-27
lines changed

core/src/main/scala/org/dbpedia/extraction/config/dataparser/DateTimeParserConfig.scala

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ object DateTimeParserConfig
1212
"el" -> Map("ιανουάριος"->1,"φεβρουάριος"->2,"μάρτιος"->3,"απρίλιος"->4,"μάϊος"->5,"μάιος"->5,"ιούνιος"->6,"ιούλιος"->7,"αύγουστος"->8,"σεπτέμβριος"->9,"οκτώβριος"->10,"νοέμβριος"->11,"δεκέμβριος"->12,
1313
"ιανουαρίου"->1,"φεβρουαρίου"->2,"μαρτίου"->3,"απριλίου"->4,"μαΐου"->5,"μαίου"->5,"ιουνίου"->6,"ιουλίου"->7,"αυγούστου"->8,"σεπτεμβρίου"->9,"οκτωβρίου"->10,"νοεμβρίου"->11,"δεκεμβρίου"->12),
1414
"en" -> Map("january"->1,"february"->2,"march"->3,"april"->4,"may"->5,"june"->6,"july"->7,"august"->8,"september"->9,"october"->10,"november"->11,"december"->12),
15+
"eo" -> Map("januaro"->1,"februaro"->2,"marto"->3,"aprilo"->4,"majo"->5,"junio"->6,"julio"->7,"aŭgusto"->8,"septembro"->9,"oktobro"->10,"novembro"->11,"decembro"->12),
1516
"es" -> Map("enero"->1,"febrero"->2,"marzo"->3,"abril"->4,"mayo"->5,"junio"->6,"julio"->7,"agosto"->8,"septiembre"->9,"octubre"->10,"noviembre"->11,"diciembre"->12),
1617
"fr" -> Map("janvier"->1,"février"->2,"mars"->3,"avril"->4,"mai"->5,"juin"->6,"juillet"->7,"août"->8,"septembre"->9,"octobre"->10,"novembre"->11,"décembre"->12),
1718
"hr" -> Map("siječanj"->1,"veljača"->2,"ožujak"->3,"travanj"->4,"svibanj"->5,"lipanj"->6,"srpanj"->7,"kolovoz"->8,"rujan"->9,"listopad"->10,"studeni"->11,"prosinac"->12),
@@ -36,22 +37,24 @@ object DateTimeParserConfig
3637
// Don't change this unless you know how it is done.
3738
"ar" -> Map("ق.م." -> -1, "م." -> 1),
3839
"el" -> Map("ΠΧ"-> -1, "Π\\\\."-> -1, "Π\\"-> -1 , "ΜΧ"-> 1 , "Μ\\\\."-> 1, "Μ\\"-> 1),
40+
"eo" -> Map("a.K." -> -1, "p.K." -> -1),
3941
"es" -> Map("AC"-> -1, "A\\.C\\."-> -1, "DC"-> 1, "D\\.C\\."-> 1, "AD"-> 1, "A\\.D\\."-> 1, "AEC"-> 1, "A\\.E\\.C\\."-> 1 , "EC"-> 1, "E\\.C\\."-> 1),
42+
"fr" -> Map("av\\. J\\.-C\\."-> -1, "ap\\. J\\.-C\\." -> 1),
4043
"it" -> Map("AC"-> -1, "A\\.C\\."-> -1, "DC"-> 1, "D\\.C\\."-> 1, "AD"-> 1, "A\\.D\\."-> 1, "PEV"-> -1, "P\\.E\\.V\\."-> -1, "EV"-> 1, "E\\.V\\." -> 1),
4144
"nl" -> Map("v\\.Chr\\." -> -1, "n\\.C\\."-> 1, "v\\.C\\." -> -1, "n\\.Chr\\."-> 1, "voor Chr\\." -> -1, "na Chr\\."-> 1),
42-
"pt" -> Map("AC"-> -1, "A\\.C\\."-> -1, "DC"-> 1, "D\\.C\\."-> 1, "AD"-> 1, "A\\.D\\."-> 1, "AEC"-> 1, "A\\.E\\.C\\."-> 1 , "EC"-> 1, "E\\.C\\."-> 1),
43-
"fr" -> Map("av\\. J\\.-C\\."-> -1, "ap\\. J\\.-C\\." -> 1)
45+
"pt" -> Map("AC"-> -1, "A\\.C\\."-> -1, "DC"-> 1, "D\\.C\\."-> 1, "AD"-> 1, "A\\.D\\."-> 1, "AEC"-> 1, "A\\.E\\.C\\."-> 1 , "EC"-> 1, "E\\.C\\."-> 1)
4446
)
4547

4648
//suffixes for 1st, 2nd etc. (maybe add this to infobox extractor RankRegex val)
4749
val cardinalityRegexMap = Map(
4850
"en" -> "st|nd|rd|th",
4951
"el" -> "η|ης",
52+
"eo" -> "-a|-an",
5053
"es" -> "°|\\.°|°\\.",
54+
"fr" -> "er|nd|ème",
5155
"it" -> "°|\\.°|°\\.",
5256
"nl" -> "ste|de|e",
53-
"pt" -> "°|\\.°|°\\.",
54-
"fr" -> "er|nd|ème"
57+
"pt" -> "°|\\.°|°\\."
5558
)
5659

5760
//specifies for a template name (lower-cased) the property keys of year, month and day
@@ -128,6 +131,11 @@ object DateTimeParserConfig
128131
"adina" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"),
129132
"adin parentesigabea" -> Map ("year" -> "1", "month"-> "2", "day" -> "3")
130133
),
134+
"fr" -> Map(
135+
"date" -> Map ("year" -> "3", "month"-> "2", "day" -> "1"),
136+
"date de naissance" -> Map ("year" -> "3", "month"-> "2", "day" -> "1"),
137+
"date de décès" -> Map ("year" -> "3", "month"-> "2", "day" -> "1")
138+
),
131139
"id" -> Map(
132140
"Mula tanggal dan usia" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"),
133141
"Tanggal lahir dan umur" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"),
@@ -213,11 +221,6 @@ object DateTimeParserConfig
213221
"Дата народження" -> Map ("year" -> "3", "month"-> "2", "day" -> "1"),
214222
"Народився" -> Map ("year" -> "3", "month"-> "2", "day" -> "1"),
215223
"Дата смерті" -> Map ("year" -> "3", "month"-> "2", "day" -> "1")
216-
),
217-
"fr" -> Map(
218-
"date" -> Map ("year" -> "3", "month"-> "2", "day" -> "1"),
219-
"date de naissance" -> Map ("year" -> "3", "month"-> "2", "day" -> "1"),
220-
"date de décès" -> Map ("year" -> "3", "month"-> "2", "day" -> "1")
221224
)
222225
)
223226

core/src/main/scala/org/dbpedia/extraction/config/dataparser/ParserUtilsConfig.scala

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,29 @@ object ParserUtilsConfig
4646
"τρισεκατομμυρια" -> 12,
4747
"τετράκις εκατομμύρια" -> 15
4848
),
49-
49+
"eo" -> Map(
50+
"mil" -> 3,
51+
"miliono" -> 6,
52+
"miliardo" -> 9,
53+
"biliono" -> 12,
54+
"biliardo" -> 15,
55+
"triliono" -> 18
56+
),
57+
"es" -> Map(
58+
"mil" -> 3,
59+
"millón" -> 6,
60+
"millardo" -> 9,
61+
"billón" -> 12,
62+
"trillón" -> 18,
63+
"cuatrillón" -> 24
64+
),
65+
"fr" -> Map(
66+
"mille" -> 3,
67+
"million" -> 6,
68+
"milliard" -> 9,
69+
"billion" -> 12,
70+
"trillion" -> 18
71+
),
5072
"nl" -> Map(
5173
"honderd" -> 2,
5274
"duizend" -> 3,
@@ -77,21 +99,6 @@ object ParserUtilsConfig
7799
"biliony" -> 12,
78100
"bilionów" -> 12,
79101
"bln" -> 12
80-
),
81-
"es" -> Map(
82-
"mil" -> 3,
83-
"millón" -> 6,
84-
"millardo" -> 9,
85-
"billón" -> 12,
86-
"trillón" -> 18,
87-
"cuatrillón" -> 24
88-
),
89-
"fr" -> Map(
90-
"mille" -> 3,
91-
"million" -> 6,
92-
"milliard" -> 9,
93-
"billion" -> 12,
94-
"trillion" -> 18
95102
)
96103
)
97104

core/src/main/scala/org/dbpedia/extraction/config/mappings/DisambiguationExtractorConfig.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ object DisambiguationExtractorConfig
1111
"de" -> " (Begriffsklärung)",
1212
"el" -> " (αποσαφήνιση)",
1313
"en" -> " (disambiguation)",
14+
"eo" -> " (apartigilo)",
1415
"es" -> " (desambiguación)",
1516
"eu" -> " (argipena)",
1617
"fr" -> " (homonymie)",

core/src/main/scala/org/dbpedia/extraction/config/mappings/HomepageExtractorConfig.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ object HomepageExtractorConfig
1414
"de" -> Set("website", "homepage", "webpräsenz", "web", "site", "siteweb", "site web"),/*cleanup*/
1515
"el" -> Set("ιστότοπος", "ιστοσελίδα"),
1616
"en" -> Set("website", "homepage", "web", "site"),
17+
"eo" -> Set("ĉefpaĝo", "retejo"),
1718
"es" -> Set("website", "homepage", "web", "site", "siteweb", "site web", "página", "sitio", "pagina"),/*cleanup*/
1819
"eu" -> Set("webgunea"),
1920
"fr" -> Set("website", "homepage", "web", "site", "siteweb", "site web"),/*cleanup*/
@@ -33,6 +34,7 @@ object HomepageExtractorConfig
3334
"de" -> "Weblinks?",
3435
"el" -> "(?:Εξωτερικοί σύνδεσμοι|Εξωτερικές συνδέσεις)",
3536
"en" -> "External links?",
37+
"eo" -> "Eksteraj ligiloj",
3638
"es" -> "(?:Enlaces externos|Enlace externo|Links externos|Link externo)",
3739
"eu" -> "Kanpo loturak?",
3840
"fr" -> "(?:Lien externe|Liens externes|Liens et documents externes)",
@@ -50,6 +52,7 @@ object HomepageExtractorConfig
5052
"de" -> "offizielle",
5153
"el" -> "(?:επίσημος|επίσημη)",
5254
"en" -> "official",
55+
"eo" -> "oficiala",
5356
"es" -> "oficial",
5457
"eu" -> "ofiziala?",
5558
"fr" -> "officiel",

core/src/main/scala/org/dbpedia/extraction/config/mappings/InfoboxExtractorConfig.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@ object InfoboxExtractorConfig
88

99
val ignoreTemplates = Set("redirect", "seealso", "see_also", "main", "cquote", "chess diagram", "ipa", "lang", "تحويل")
1010

11-
val ignoreTemplatesRegex = List("cite.*".r, "citation.*".r, "assessment.*".r, "zh-.*".r, "llang.*".r, "IPA-.*".r)
11+
val ignoreTemplatesRegex = List("cite.*".r, "citation.*".r, "assessment.*".r, "zh-.*".r, "llang.*".r, "IPA-.*".r, "citajxo.*".r)
1212

1313
val ignoreProperties = Map (
1414
"en"-> Set("image", "image_photo"),
1515
"ar"-> Set("صورة"),
1616
"id"-> Set("foto", "gambar"),
17-
"el"-> Set("εικόνα", "εικονα", "Εικόνα", "Εικονα", "χάρτης", "Χάρτης")
17+
"el"-> Set("εικόνα", "εικονα", "Εικόνα", "Εικονα", "χάρτης", "Χάρτης"),
18+
"eo"-> Set("dosiero")
1819
)
1920

2021
// TODO: i18n

0 commit comments

Comments
 (0)