Skip to content

Commit 240df02

Browse files
committed
2 parents 5c88506 + e544af4 commit 240df02

File tree

1 file changed

+13
-3
lines changed

1 file changed

+13
-3
lines changed

core/src/main/scala/org/dbpedia/extraction/util/WikiApi.scala

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,15 +111,15 @@ class WikiApi(url: URL, language: Language)
111111
/**
112112
* Retrieves multiple pages by their title.
113113
*
114-
* @param pageIds The titles of the pages to be downloaded.
114+
* @param titles The titles of the pages to be downloaded.
115115
*/
116116
def retrievePagesByTitle[U](titles : Iterable[WikiTitle]) = new Traversable[WikiPage]
117117
{
118118
override def foreach[U](proc : WikiPage => U) : Unit =
119119
{
120120
for(titleGroup <- titles.grouped(pageDownloadLimit))
121121
{
122-
val response = query("?action=query&format=xml&prop=revisions&titles=" + titleGroup.map(t => URLEncoder.encode(t.encodedWithNamespace, "UTF-8")).mkString("|") + "&rvprop=ids|content|timestamp|user|userid")
122+
val response = query("?action=query&format=xml&prop=revisions&titles=" + titleGroup.map(formatWikiTitle).mkString("|") + "&rvprop=ids|content|timestamp|user|userid")
123123
processPages(response, proc)
124124
}
125125
}
@@ -138,7 +138,7 @@ class WikiApi(url: URL, language: Language)
138138

139139
proc(
140140
new WikiPage(
141-
title = WikiTitle.parse((page \ "@title").head.text, language),
141+
title = WikiTitle.parse((page \ "@title").head.text, language),
142142
redirect = null, // TODO: read redirect from XML
143143
id = (page \ "@pageid").head.text,
144144
revision = (rev \ "@revid").head.text,
@@ -243,4 +243,14 @@ class WikiApi(url: URL, language: Language)
243243

244244
throw new IllegalStateException("Should never get there")
245245
}
246+
247+
/**
248+
* Formats {@param title} to be used with MediaWiki API
249+
*
250+
* @param title
251+
* @return
252+
*/
253+
private def formatWikiTitle(title: WikiTitle) : String = {
254+
URLEncoder.encode(title.decodedWithNamespace.replace(' ', '_'), "UTF-8")
255+
}
246256
}

0 commit comments

Comments
 (0)