Skip to content

Commit e91a701

Browse files
committed
manually merge pull request dbpedia#95 / fixxes issue dbpedia#23
1 parent 90f8f5b commit e91a701

File tree

4 files changed

+30
-12
lines changed

4 files changed

+30
-12
lines changed

server/src/main/scala/org/dbpedia/extraction/server/DynamicExtractionManager.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package org.dbpedia.extraction.server
22

33
import org.dbpedia.extraction.sources.WikiPage
4-
import org.dbpedia.extraction.mappings.{Mappings,RootExtractor}
4+
import org.dbpedia.extraction.mappings.{Redirects, Mappings, RootExtractor}
55
import org.dbpedia.extraction.util.Language
66
import org.dbpedia.extraction.ontology.Ontology
77
import org.dbpedia.extraction.wikiparser.{PageNode, WikiTitle}
@@ -20,8 +20,8 @@ import scala.actors.Actor
2020
* mappingPageSource is called by loadMappings in the base class,
2121
* ontologyPages is called by loadOntology in the base class.
2222
*/
23-
class DynamicExtractionManager(update: (Language, Mappings) => Unit, languages : Seq[Language], paths: Paths)
24-
extends ExtractionManager(languages, paths)
23+
class DynamicExtractionManager(update: (Language, Mappings) => Unit, languages : Seq[Language], paths: Paths, redirects: Map[Language, Redirects])
24+
extends ExtractionManager(languages, paths, redirects)
2525
{
2626
// TODO: remove this field. Clients should get the ontology pages directly from the
2727
// mappings wiki, not from here. We don't want to keep all ontology pages in memory.

server/src/main/scala/org/dbpedia/extraction/server/ExtractionManager.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import java.net.URL
1818
* or they can support lazy loading of context parameters.
1919
*/
2020

21-
abstract class ExtractionManager(languages : Seq[Language], paths: Paths)
21+
abstract class ExtractionManager(languages : Seq[Language], paths: Paths, redirects: Map[Language, Redirects])
2222
{
2323
self =>
2424

@@ -167,8 +167,9 @@ abstract class ExtractionManager(languages : Seq[Language], paths: Paths)
167167
{
168168
new RootExtractor(
169169
new CompositePageNodeExtractor(
170-
new LabelExtractor(new {val ontology = self.ontology; val language = lang}),
171-
new MappingExtractor(new {val mappings = self.mappings(lang); val redirects = new Redirects(Map())})
170+
new LabelExtractor(new {val ontology = self.ontology; val language = lang}),
171+
new MappingExtractor(new {val mappings = self.mappings(lang);
172+
val redirects = self.redirects.getOrElse(lang, new Redirects(Map()))})
172173
)
173174
)
174175
}

server/src/main/scala/org/dbpedia/extraction/server/Server.scala

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,28 @@ import java.io.File
44
import java.net.{URI,URL}
55
import java.util.logging.{Level,Logger}
66
import scala.collection.immutable.SortedMap
7-
import org.dbpedia.extraction.mappings.{LabelExtractor,MappingExtractor}
7+
import org.dbpedia.extraction.mappings.{Redirects, LabelExtractor, MappingExtractor, Mappings}
88
import org.dbpedia.extraction.util.Language
99
import org.dbpedia.extraction.util.Language.wikiCodeOrdering
10-
import org.dbpedia.extraction.mappings.Mappings
1110
import org.dbpedia.extraction.server.stats.MappingStatsManager
1211
import com.sun.jersey.api.container.httpserver.HttpServerFactory
1312
import com.sun.jersey.api.core.{ResourceConfig,PackagesResourceConfig}
1413
import org.dbpedia.extraction.util.StringUtils.prettyMillis
15-
import org.dbpedia.extraction.wikiparser.Namespace
14+
import org.dbpedia.extraction.wikiparser.{WikiTitle, Namespace}
15+
import Server._
1616

1717
class Server(private val password : String, langs : Seq[Language], val paths: Paths)
1818
{
1919
val managers = {
2020
val tuples = langs.map(lang => lang -> new MappingStatsManager(paths.statsDir, lang))
2121
SortedMap(tuples: _*)
2222
}
23+
24+
val redirects = {
25+
managers.map(manager => (manager._1, buildTemplateRedirects(manager._2.wikiStats.redirects, manager._1))).toMap
26+
}
2327

24-
val extractor: ExtractionManager = new DynamicExtractionManager(managers(_).updateStats(_), langs, paths)
28+
val extractor: ExtractionManager = new DynamicExtractionManager(managers(_).updateStats(_), langs, paths, redirects)
2529

2630
extractor.updateAll
2731

@@ -80,4 +84,17 @@ object Server
8084

8185
logger.info("DBpedia server started in "+prettyMillis(System.currentTimeMillis - millis) + " listening on " + uri)
8286
}
87+
88+
/**
89+
* Builds template redirects from Wiki statistics as collected by {@link CreateMappingStats}
90+
* Main purpose is to clean template names from the template namespace so that redirects can be used in Extractors
91+
* (Extractors use decoded wiki titles)
92+
* @param redirects
93+
* @return
94+
*/
95+
def buildTemplateRedirects(redirects: Map[String, String], language: Language): Redirects = {
96+
new Redirects(redirects.map { case (from, to) =>
97+
(WikiTitle.parse(from, language).decoded, WikiTitle.parse(to, language).decoded)
98+
}.toMap)
99+
}
83100
}

server/src/main/scala/org/dbpedia/extraction/server/StaticExtractionManager.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ import java.io.File
1212
* Is NOT able to update the ontology or the mappings.
1313
* This manager is good for testing locally.
1414
*/
15-
class StaticExtractionManager(update: (Language, Mappings) => Unit, languages : Seq[Language], paths: Paths)
16-
extends ExtractionManager(languages, paths)
15+
class StaticExtractionManager(update: (Language, Mappings) => Unit, languages : Seq[Language], paths: Paths, redirects: Map[Language, Redirects])
16+
extends ExtractionManager(languages, paths, redirects)
1717
{
1818
@volatile private lazy val _ontologyPages : Map[WikiTitle, PageNode] = loadOntologyPages
1919

0 commit comments

Comments
 (0)