Skip to content

Commit 95dd250

Browse files
committed
Merge branch 'master' of https://github.com/dbpedia/extraction-framework into lang-link-extract
2 parents 39821c8 + 8c79b99 commit 95dd250

File tree

321 files changed

+1439
-840
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

321 files changed

+1439
-840
lines changed

core/src/main/scala/org/dbpedia/extraction/config/dataparser/DateTimeParserConfig.scala

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ object DateTimeParserConfig
1212
"el" -> Map("ιανουάριος"->1,"φεβρουάριος"->2,"μάρτιος"->3,"απρίλιος"->4,"μάϊος"->5,"μάιος"->5,"ιούνιος"->6,"ιούλιος"->7,"αύγουστος"->8,"σεπτέμβριος"->9,"οκτώβριος"->10,"νοέμβριος"->11,"δεκέμβριος"->12,
1313
"ιανουαρίου"->1,"φεβρουαρίου"->2,"μαρτίου"->3,"απριλίου"->4,"μαΐου"->5,"μαίου"->5,"ιουνίου"->6,"ιουλίου"->7,"αυγούστου"->8,"σεπτεμβρίου"->9,"οκτωβρίου"->10,"νοεμβρίου"->11,"δεκεμβρίου"->12),
1414
"en" -> Map("january"->1,"february"->2,"march"->3,"april"->4,"may"->5,"june"->6,"july"->7,"august"->8,"september"->9,"october"->10,"november"->11,"december"->12),
15+
"eo" -> Map("januaro"->1,"februaro"->2,"marto"->3,"aprilo"->4,"majo"->5,"junio"->6,"julio"->7,"aŭgusto"->8,"septembro"->9,"oktobro"->10,"novembro"->11,"decembro"->12),
1516
"es" -> Map("enero"->1,"febrero"->2,"marzo"->3,"abril"->4,"mayo"->5,"junio"->6,"julio"->7,"agosto"->8,"septiembre"->9,"octubre"->10,"noviembre"->11,"diciembre"->12),
1617
"fr" -> Map("janvier"->1,"février"->2,"mars"->3,"avril"->4,"mai"->5,"juin"->6,"juillet"->7,"août"->8,"septembre"->9,"octobre"->10,"novembre"->11,"décembre"->12),
1718
"hr" -> Map("siječanj"->1,"veljača"->2,"ožujak"->3,"travanj"->4,"svibanj"->5,"lipanj"->6,"srpanj"->7,"kolovoz"->8,"rujan"->9,"listopad"->10,"studeni"->11,"prosinac"->12),
19+
"id" -> Map("januari"->1,"februari"->2,"maret"->3,"april"->4,"mei"->5,"juni"->6,"juli"->7,"agustus"->8,"september"->9,"oktober"->10,"november"->11,"desember"->12),
1820
"it" -> Map("gennaio"->1,"febbraio"->2,"marzo"->3,"aprile"->4,"maggio"->5,"giugno"->6,"luglio"->7,"agosto"->8,"settembre"->9,"ottobre"->10,"novembre"->11,"dicembre"->12),
1921
"nl" -> Map("januari"->1,"februari"->2,"maart"->3,"april"->4,"mei"->5,"juni"->6,"juli"->7,"augustus"->8,"september"->9,"oktober"->10,"november"->11,"december"->12),
2022
"pl" -> Map("stycznia"->1,"lutego"->2,"marca"->3,"kwietnia"->4,"maja"->5,"czerwca"->6,"lipca"->7,"sierpnia"->8,"września"->9,"października"->10,"listopada"->11,"grudnia"->12),
@@ -35,7 +37,9 @@ object DateTimeParserConfig
3537
// Don't change this unless you know how it is done.
3638
"ar" -> Map("ق.م." -> -1, "م." -> 1),
3739
"el" -> Map("ΠΧ"-> -1, "Π\\\\."-> -1, "Π\\"-> -1 , "ΜΧ"-> 1 , "Μ\\\\."-> 1, "Μ\\"-> 1),
40+
"eo" -> Map("a.K." -> -1, "p.K." -> -1),
3841
"es" -> Map("AC"-> -1, "A\\.C\\."-> -1, "DC"-> 1, "D\\.C\\."-> 1, "AD"-> 1, "A\\.D\\."-> 1, "AEC"-> 1, "A\\.E\\.C\\."-> 1 , "EC"-> 1, "E\\.C\\."-> 1),
42+
"fr" -> Map("av\\. J\\.-C\\."-> -1, "ap\\. J\\.-C\\." -> 1),
3943
"it" -> Map("AC"-> -1, "A\\.C\\."-> -1, "DC"-> 1, "D\\.C\\."-> 1, "AD"-> 1, "A\\.D\\."-> 1, "PEV"-> -1, "P\\.E\\.V\\."-> -1, "EV"-> 1, "E\\.V\\." -> 1),
4044
"nl" -> Map("v\\.Chr\\." -> -1, "n\\.C\\."-> 1, "v\\.C\\." -> -1, "n\\.Chr\\."-> 1, "voor Chr\\." -> -1, "na Chr\\."-> 1),
4145
"pt" -> Map("AC"-> -1, "A\\.C\\."-> -1, "DC"-> 1, "D\\.C\\."-> 1, "AD"-> 1, "A\\.D\\."-> 1, "AEC"-> 1, "A\\.E\\.C\\."-> 1 , "EC"-> 1, "E\\.C\\."-> 1)
@@ -45,7 +49,9 @@ object DateTimeParserConfig
4549
val cardinalityRegexMap = Map(
4650
"en" -> "st|nd|rd|th",
4751
"el" -> "η|ης",
52+
"eo" -> "-a|-an",
4853
"es" -> "°|\\.°|°\\.",
54+
"fr" -> "er|nd|ème",
4955
"it" -> "°|\\.°|°\\.",
5056
"nl" -> "ste|de|e",
5157
"pt" -> "°|\\.°|°\\."
@@ -77,7 +83,11 @@ object DateTimeParserConfig
7783
//conditional mapping .. for multiple matching ifPropertyNumHasValue could be a regex (not implemented for multiple)
7884
"birthdeathage" -> Map ("ifPropertyNum" -> "1", "ifPropertyNumHasValue" -> "B", //"BirthDeathAge"
7985
"year" -> "2", "month"-> "3", "day" -> "4",
80-
"elseYear" -> "4", "elseMonth"-> "5", "elseDay" -> "6")
86+
"elseYear" -> "4", "elseMonth"-> "5", "elseDay" -> "6"),
87+
"NBA Year" -> Map ("year" -> "1"),
88+
"Nbay" -> Map ("year" -> "1"),
89+
"NHL_Year" -> Map ("year" -> "1"),
90+
"nhly" -> Map ("year" -> "1")
8191
),
8292

8393
// alphabetically for other languages
@@ -121,6 +131,29 @@ object DateTimeParserConfig
121131
"adina" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"),
122132
"adin parentesigabea" -> Map ("year" -> "1", "month"-> "2", "day" -> "3")
123133
),
134+
"fr" -> Map(
135+
"date" -> Map ("year" -> "3", "month"-> "2", "day" -> "1"),
136+
"date de naissance" -> Map ("year" -> "3", "month"-> "2", "day" -> "1"),
137+
"date de décès" -> Map ("year" -> "3", "month"-> "2", "day" -> "1")
138+
),
139+
"id" -> Map(
140+
"Mula tanggal dan usia" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"),
141+
"Tanggal lahir dan umur" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"),
142+
"Tanggal kematian dan umur" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"),
143+
"Umur pada tanggal" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"),
144+
"umur" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"),
145+
"Tanggal lahir dan umur2/doc" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"),
146+
"Tanggal lahir dan umur2" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"),
147+
"lahirmati" -> Map ("year" -> "2", "month"-> "3", "day" -> "4"),
148+
"birth date and age" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"), //"Birth date and age"
149+
"birth date and age2" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"), //"Birth date and age2"
150+
"death date and age" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"), //"Death date and age"
151+
"birth date" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"), //"Birth date"
152+
"death date" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"), //"Death date"
153+
"bda" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"), //"Bda"
154+
"dob" -> Map ("year" -> "1", "month"-> "2", "day" -> "3"), //"Dob"
155+
"start date" -> Map ("year" -> "1", "month"-> "2", "day" -> "3") //"Start date"
156+
),
124157
"it" -> Map(
125158
"Data nascita" -> Map ("year" -> "3", "month"-> "2", "day" -> "1"),
126159
"data nascita" -> Map ("year" -> "3", "month"-> "2", "day" -> "1"),

core/src/main/scala/org/dbpedia/extraction/config/dataparser/DurationParserConfig.scala

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,14 @@ object DurationParserConfig
217217
"meses" -> "month",
218218
"año" -> "year",
219219
"años" -> "year"
220-
)
220+
),
221+
"id" -> Map(
222+
"detik" -> "second",
223+
"menit" -> "minute",
224+
"jam" -> "hour",
225+
"hari" -> "day",
226+
"bulan" -> "month",
227+
"tahun" -> "year"
228+
)
221229
)
222230
}

core/src/main/scala/org/dbpedia/extraction/config/dataparser/FlagTemplateParserConfig.scala

Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,6 +1499,254 @@ object FlagTemplateParserConfig
14991499
"ZWE"->"Zimbabwe",
15001500
"ARE"->"Zjednoczone Emiraty Arabskie"
15011501
), // pl end
1502+
"id" ->
1503+
Map(
1504+
"AFG"->"Afganistan",
1505+
"ALB"->"Albania",
1506+
"DZA"->"Aljazair",
1507+
"AND"->"Andorra",
1508+
"AGO"->"Angola",
1509+
"AIA"->"Anguila",
1510+
"ATA"->"Antarktika",
1511+
"ATG"->"Antigua dan Barbuda",
1512+
"SAU"->"Arab Saudi",
1513+
"ARG"->"Argentina",
1514+
"ARM"->"Armenia",
1515+
"ABW"->"Aruba",
1516+
"AUS"->"Australia",
1517+
"AUT"->"Austria",
1518+
"AZE"->"Azerbaijan",
1519+
"BHS"->"Bahama",
1520+
"BHR"->"Bahrain",
1521+
"BGD"->"Bangladesh",
1522+
"BRB"->"Barbados",
1523+
"BEL"->"Belgia",
1524+
"BLZ"->"Belize",
1525+
"BEN"->"Benin",
1526+
"BMU"->"Bermuda",
1527+
"BTN"->"Bhutan",
1528+
"BLR"->"Belarus",
1529+
"MMR"->"Myanmar",
1530+
"BOL"->"Bolivia",
1531+
"BIH"->"Bosnia dan Herzegovina",
1532+
"BWA"->"Botswana",
1533+
"BRA"->"Brasil",
1534+
"BRN"->"Brunei",
1535+
"IOT"->"Teritori Samudra Hindia Britania",
1536+
"VGB"->"Kepulauan Virgin Britania Raya",
1537+
"BGR"->"Bulgaria",
1538+
"BFA"->"Burkina Faso",
1539+
"BDI"->"Burundi",
1540+
"CHL"->"Chili",
1541+
"CHN"->"Republik Rakyat Cina",
1542+
"HRV"->"Kroasia",
1543+
"CYP"->"Siprus",
1544+
"TCD"->"Chad",
1545+
"MNE"->"Montenegro",
1546+
"CZE"->"Republik Ceko",
1547+
"UMI"->"Kepulauan Terluar Kecil Amerika Serikat",
1548+
"DNK"->"Denmark",
1549+
"COD"->"Republik Demokratik Kongo",
1550+
"DMA"->"Dominika",
1551+
"DOM"->"Republik Dominika",
1552+
"DJI"->"Djibouti",
1553+
"EGY"->"Mesir",
1554+
"ECU"->"Ekuador",
1555+
"ERI"->"Eritrea",
1556+
"EST"->"Estonia",
1557+
"ETH"->"Ethiopia",
1558+
"FLK"->"Kepulauan Falkland",
1559+
"FJI"->"Fiji",
1560+
"PHL"->"Filipina",
1561+
"FIN"->"Finlandia",
1562+
"FRA"->"Perancis",
1563+
"ATF"->"Teritori Perancis Selatan",
1564+
"GAB"->"Gabon",
1565+
"GMB"->"Gambia",
1566+
"SGS"->"Georgia Selatan dan Kepulauan Sandwich Selatan",
1567+
"GHA"->"Ghana",
1568+
"GIB"->"Gibraltar",
1569+
"GRC"->"Yunani",
1570+
"GRD"->"Grenada",
1571+
"GRL"->"Greenland",
1572+
"GEO"->"Georgia",
1573+
"GUM"->"Guam",
1574+
"GGY"->"Guernsey",
1575+
"GUF"->"Guyana Perancis",
1576+
"GUY"->"Guyana",
1577+
"GLP"->"Guadeloupe",
1578+
"GTM"->"Guatemala",
1579+
"GNB"->"Guinea-Bissau",
1580+
"GNQ"->"Guinea Khatulistiwa",
1581+
"GIN"->"Guinea",
1582+
"HTI"->"Haiti",
1583+
"ESP"->"Spanyol",
1584+
"NLD"->"Belanda",
1585+
"HND"->"Honduras",
1586+
"HKG"->"Hong Kong",
1587+
"IND"->"India",
1588+
"IDN"->"Indonesia",
1589+
"IRQ"->"Irak",
1590+
"IRN"->"Iran",
1591+
"IRL"->"Republik Irlandia",
1592+
"ISL"->"Islandia",
1593+
"ISR"->"Israel",
1594+
"JAM"->"Jamaika",
1595+
"JPN"->"Jepang",
1596+
"YEM"->"Yaman",
1597+
"JEY"->"Jersey",
1598+
"JOR"->"Yordania",
1599+
"CYM"->"Kepulauan Cayman",
1600+
"KHM"->"Kamboja",
1601+
"CMR"->"Kamerun",
1602+
"CAN"->"Kanadá",
1603+
"QAT"->"Qatar",
1604+
"KAZ"->"Kazakhstan",
1605+
"KEN"->"Kenya",
1606+
"KGZ"->"Kirgizstan",
1607+
"KIR"->"Kiribati",
1608+
"COL"->"Kolombia",
1609+
"COM"->"Komoro",
1610+
"COG"->"Republik Kongo",
1611+
"KOR"->"Republik Korea",
1612+
"PRK"->"Republik Rakyat Demokratik Korea",
1613+
"CRI"->"Kosta Rika",
1614+
"CUB"->"Kuba",
1615+
"KWT"->"Kuwait",
1616+
"LAO"->"Laos",
1617+
"LSO"->"Lesotho",
1618+
"LBN"->"Lebanon",
1619+
"LBR"->"Liberia",
1620+
"LBY"->"Libya",
1621+
"LIE"->"Liechtenstein",
1622+
"LTU"->"Lituania",
1623+
"LUX"->"Luksemburg",
1624+
"LVA"->"Latvia",
1625+
"MKD"->"Republik Makedonia",
1626+
"MDG"->"Madagaskar",
1627+
"MYT"->"Mayotte",
1628+
"MAC"->"Makau",
1629+
"MWI"->"Malawi",
1630+
"MDV"->"Maladewa",
1631+
"MYS"->"Malaysia",
1632+
"MLI"->"Mali",
1633+
"MLT"->"Malta",
1634+
"MNP"->"Kepulauan Mariana Utara",
1635+
"MAR"->"Maroko",
1636+
"MTQ"->"Martinik",
1637+
"MRT"->"Mauritania",
1638+
"MUS"->"Mauritius",
1639+
"MEX"->"Meksiko",
1640+
"FSM"->"Federasi Mikronesia",
1641+
"MDA"->"Moldova",
1642+
"MCO"->"Monako",
1643+
"MNG"->"Mongolia",
1644+
"MSR"->"Montserrat",
1645+
"MOZ"->"Mozambik",
1646+
"NAM"->"Namibia",
1647+
"NRU"->"Nauru",
1648+
"NPL"->"Nepal",
1649+
"DEU"->"Jerman",
1650+
"NER"->"Niger",
1651+
"NGA"->"Nigeria",
1652+
"NIC"->"Nikaragua",
1653+
"NIU"->"Niue",
1654+
"NFK"->"Pulau Norfolk",
1655+
"NOR"->"Norwegia",
1656+
"NCL"->"Kaledonia Baru",
1657+
"NZL"->"Selandia Baru",
1658+
"OMN"->"Oman",
1659+
"PAK"->"Pakistan",
1660+
"PLW"->"Palau",
1661+
"PSE"->"Palestina",
1662+
"PAN"->"Panama",
1663+
"PNG"->"Papua Nugini",
1664+
"PRY"->"Paraguay",
1665+
"PER"->"Peru",
1666+
"PCN"->"Kepulauan Pitcairn",
1667+
"PYF"->"Polinesia Perancis",
1668+
"POL"->"Polandia",
1669+
"PRI"->"Puerto Riko",
1670+
"PRT"->"Portugal",
1671+
"TWN"->"Taiwan",
1672+
"ZAF"->"Afrika Selatan",
1673+
"CAF"->"Republik Afrika Tengah",
1674+
"CPV"->"Tanjung Verde",
1675+
"REU"->"Reunion",
1676+
"RUS"->"Rusia",
1677+
"ROU"->"Rumania",
1678+
"RWA"->"Rwanda",
1679+
"ESH"->"Sahara Barat",
1680+
"KNA"->"Saint Kitts dan Nevis",
1681+
"LCA"->"Saint Lucia",
1682+
"VCT"->"Saint Vincent dan Grenadines",
1683+
"BLM"->"Saint-Barthélemy",
1684+
"MAF"->"Saint Martin (Perancis)",
1685+
"SPM"->"Saint Pierre dan Miquelon",
1686+
"SLV"->"El Salvador",
1687+
"ASM"->"Samoa Amerika",
1688+
"WSM"->"Samoa",
1689+
"SMR"->"San Marino",
1690+
"SEN"->"Senegal",
1691+
"SRB"->"Serbia",
1692+
"SYC"->"Seychelles",
1693+
"SLE"->"Sierra Leone",
1694+
"SGP"->"Singapura",
1695+
"SVK"->"Slowakia",
1696+
"SVN"->"Slovenia",
1697+
"SOM"->"Somalia",
1698+
"LKA"->"Sri Lanka",
1699+
"USA"->"Amerika Serikat",
1700+
"SWZ"->"Swaziland",
1701+
"SDN"->"Sudan",
1702+
"SUR"->"Suriname",
1703+
"SJM"->"Svalbard dan Jan Mayen",
1704+
"SYR"->"Suriah",
1705+
"CHE"->"Swiss",
1706+
"SWE"->"Swedia",
1707+
"TJK"->"Tajikistán",
1708+
"THA"->"Thailand",
1709+
"TZA"->"Tanzania",
1710+
"TLS"->"Timor Leste",
1711+
"TGO"->"Togo",
1712+
"TKL"->"Tokelau",
1713+
"TON"->"Tonga",
1714+
"TTO"->"Trinidad dan Tobago",
1715+
"TUN"->"Tunisia",
1716+
"TUR"->"Turki",
1717+
"TKM"->"Turkmenistan",
1718+
"TCA"->"Kepulauan Turks dan Caicos",
1719+
"TUV"->"Tuvalu",
1720+
"UGA"->"Uganda",
1721+
"UKR"->"Ukraina",
1722+
"URY"->"Uruguay",
1723+
"UZB"->"Uzbekistan",
1724+
"VUT"->"Vanuatu",
1725+
"WLF"->"Wallis dan Futuna",
1726+
"VAT"->"Vatikan",
1727+
"VEN"->"Venezuela",
1728+
"HUN"->"Hongaria",
1729+
"GBR"->"Britania Raya",
1730+
"VNM"->"Vietnam",
1731+
"ITA"->"Italia",
1732+
"CIV"->"Pantai Gading",
1733+
"BVT"->"Pulau Bouvet",
1734+
"CXR"->"Pulau Natal",
1735+
"IMN"->"Pulau Man",
1736+
"SHN"->"Saint Helena, Ascension, dan Tristan da Cunha",
1737+
"ALA"->"Kepulauan Aland",
1738+
"COK"->"Kepulauan Cook",
1739+
"VIR"->"Kepulauan Virgin Amerika Serikat",
1740+
"HMD"->"Pulau Heard dan Kepulauan McDonald",
1741+
"CCK"->"Kepulauan Cocos(Keeling)",
1742+
"MHL"->"Kepulauan Marshall",
1743+
"FRO"->"Kepulauan Faroe",
1744+
"SLB"->"Kepulauan Solomon",
1745+
"STP"->"Sao Tome dan Principe",
1746+
"ZMB"->"Zambia",
1747+
"ZWE"->"Zimbabwe",
1748+
"ARE"->"Uni Emirat Arab"
1749+
), //end id
15021750
"es" ->
15031751
Map(
15041752
"AFG"->"Afganistán",

core/src/main/scala/org/dbpedia/extraction/config/dataparser/ParserUtilsConfig.scala

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,29 @@ object ParserUtilsConfig
4646
"τρισεκατομμυρια" -> 12,
4747
"τετράκις εκατομμύρια" -> 15
4848
),
49-
49+
"eo" -> Map(
50+
"mil" -> 3,
51+
"miliono" -> 6,
52+
"miliardo" -> 9,
53+
"biliono" -> 12,
54+
"biliardo" -> 15,
55+
"triliono" -> 18
56+
),
57+
"es" -> Map(
58+
"mil" -> 3,
59+
"millón" -> 6,
60+
"millardo" -> 9,
61+
"billón" -> 12,
62+
"trillón" -> 18,
63+
"cuatrillón" -> 24
64+
),
65+
"fr" -> Map(
66+
"mille" -> 3,
67+
"million" -> 6,
68+
"milliard" -> 9,
69+
"billion" -> 12,
70+
"trillion" -> 18
71+
),
5072
"nl" -> Map(
5173
"honderd" -> 2,
5274
"duizend" -> 3,
@@ -77,21 +99,6 @@ object ParserUtilsConfig
7799
"biliony" -> 12,
78100
"bilionów" -> 12,
79101
"bln" -> 12
80-
),
81-
"es" -> Map(
82-
"mil" -> 3,
83-
"millón" -> 6,
84-
"millardo" -> 9,
85-
"billón" -> 12,
86-
"trillón" -> 18,
87-
"cuatrillón" -> 24
88-
),
89-
"fr" -> Map(
90-
"mille" -> 3,
91-
"million" -> 6,
92-
"milliard" -> 9,
93-
"billion" -> 12,
94-
"trillion" -> 18
95102
)
96103
)
97104

0 commit comments

Comments
 (0)