dbpedia
diff --git a/‎core/src/main/java/org/dbpedia/extraction/nif/LinkExtractor.java‎
Lines changed: 10 additions & 26 deletions b/‎core/src/main/java/org/dbpedia/extraction/nif/LinkExtractor.java‎
Lines changed: 10 additions & 26 deletions
diff --git a/‎core/src/main/scala/org/dbpedia/extraction/config/Config.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/dbpedia/extraction/config/Config.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/dbpedia/extraction/mappings/PlainAbstractExtractor.scala‎
Lines changed: 0 additions & 1 deletion b/‎core/src/main/scala/org/dbpedia/extraction/mappings/PlainAbstractExtractor.scala‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/dbpedia/extraction/nif/HtmlNifExtractor.scala‎
Lines changed: 2 additions & 15 deletions b/‎core/src/main/scala/org/dbpedia/extraction/nif/HtmlNifExtractor.scala‎
Lines changed: 2 additions & 15 deletions
diff --git a/‎core/src/main/scala/org/dbpedia/extraction/nif/WikipediaNifExtractorRest.scala‎
Lines changed: 8 additions & 18 deletions b/‎core/src/main/scala/org/dbpedia/extraction/nif/WikipediaNifExtractorRest.scala‎
Lines changed: 8 additions & 18 deletions
diff --git a/‎core/src/main/scala/org/dbpedia/extraction/util/MediaWikiConnectorRest.scala‎
Lines changed: 0 additions & 9 deletions b/‎core/src/main/scala/org/dbpedia/extraction/util/MediaWikiConnectorRest.scala‎
Lines changed: 0 additions & 9 deletions
@@ -43,7 +43,6 @@ public void head(Node node, int depth) {
 			return;
 		}
 
-
         if(paragraph == null) {
 			paragraph = new Paragraph(0, "", "p");
 		}
@@ -63,7 +62,6 @@ public void head(Node node, int depth) {
 
 		  //this text node is the content of an <a> element: make a new nif:Word
 		  if(inLink) {
-
               if(!tempText.trim().startsWith(this.context.wikipediaTemplateString + ":"))  //not!
               {
                   tempLink.setLinkText(tempText);
@@ -75,11 +73,9 @@ public void head(Node node, int depth) {
 				  errors.add("found Template in resource: " + this.context.resource + ": " + tempText);
 				  return;
 			  }
-
 		  }
 		  else
 		    paragraph.addText(tempText);
-
 		}
 
 		else if(node.nodeName().equals("a")) {
@@ -93,24 +89,20 @@ else if(node.nodeName().equals("a")) {
 			 * see Schopenhauer: https://en.wikipedia.org/w/api.php?uselang=en&format=xml&action=parse&prop=text&pageid=17340400
 			 */
             String linkPrefix = "/wiki/";
-			// SPECIAL CASE FOR RESTAPI PARSING
-
-			if(node.hasAttr("rel")){
 
+			// SPECIAL CASE FOR RESTAPI PARSING https://en.wikipedia.org/api/rest_v1/
+			if(node.hasAttr("rel")) {
 				String relType = node.attr("rel");
 				if(relType.equals("mw:WikiLink")){
-
-						tempLink = new Link();
-						String uri = cleanLink(node.attr("href"), false);
-						setUri(uri);
-
-
+					tempLink = new Link();
+					String uri = cleanLink(node.attr("href"), false);
+					setUri(uri);
 				} else if (relType.equals("mw:ExtLink")) {
-						tempLink = new Link();
-						String uri = cleanLink(node.attr("href"), true);
-						setUri(uri);
+					tempLink = new Link();
+					String uri = cleanLink(node.attr("href"), true);
+					setUri(uri);
 				}
-			}else{
+			} else {
 				// standard wikilinks
 				if (link.contains(linkPrefix) && !link.contains(":")) {
 					tempLink = new Link();
@@ -147,8 +139,6 @@ else if(node.nodeName().equals("a")) {
 					skipLevel = depth;
 				}
 			}
-
-
         } else if(node.nodeName().equals("p")) {
             if(paragraph != null) {
                 addParagraph("p");
@@ -201,17 +191,14 @@ private String cleanLink(String uri, boolean external) {
 			//TODO central string management
 			if(!this.context.language.equals("en")) {
 				uri="http://"+this.context.language+".dbpedia.org/resource/"+uri;
-				
-			}
-			else {
+			} else {
 				uri="http://dbpedia.org/resource/"+uri;
 			}
 			uri = uri.replace("&action=edit&redlink=1", "");
 
 		} else {
 			//there are links that contain illegal hostnames
 			try {
-
 				if(uri.startsWith("//"))
 					uri = "http:"+uri;
 				uri = URLEncoder.encode(uri,"UTF-8");
@@ -226,11 +213,8 @@ private String cleanLink(String uri, boolean external) {
 	}
 
 	public void tail(Node node, int depth) {
-
-
 		if(skipLevel>0) {
 			if(skipLevel==depth) {
-
 				skipLevel = -1;
 				return;
 			} else {
 
@@ -97,7 +97,7 @@ class Config(val configPath: String) extends
     * Number of parallel processes allowed. Depends on the number of cores, type of disk, and IO speed
     *
     */
-  lazy val parallelProcesses: Int = this.getProperty("parallel-processes", "1").trim.toInt
+  lazy val parallelProcesses: Int = this.getProperty("parallel-processes", "4").trim.toInt
 
   lazy val sparkMaster: String = Option(getString(this, "spark-master")).getOrElse("local[*]")
 
 
@@ -78,7 +78,6 @@ extends WikiPageExtractor
         //val abstractWikiText = getAbstractWikiText(pageNode)
         // if(abstractWikiText == "") return Seq.empty
 
-
         val mwConnector = new MediawikiConnectorConfigured(context.configFile.mediawikiConnection, context.configFile.abstractParameters.abstractTags.split(","))
         val text = mwConnector.retrievePage(pageNode.title, apiParametersFormat, pageNode.isRetry) match {
           case Some(t) => PlainAbstractExtractor.postProcessExtractedHtml(pageNode.title, replacePatterns(t))
 
@@ -59,38 +59,30 @@ abstract class HtmlNifExtractor(nifContextIri: String, language: String, nifPara
 
     val sections = getRelevantParagraphs(html)
 
-    var  context = ""
+    var context = ""
     var offset = 0
-
     val quads = for(section <- sections) yield {
       extractTextFromHtml(section, new NifExtractorContext(language, subjectIri, templateString)) match {
         case Success(extractionResults) => {
           sectionMap.put(section, extractionResults)
           sectionMap.put(extractionResults, extractionResults)
 
-
           if (context.length != 0) {
             context = context + "\n\n"
             offset += 2
           }
-
           var quads = if(nifParameters.abstractsOnly)
             Seq()
           else
             makeStructureElements(extractionResults, nifContextIri, graphIri, offset)
 
-
           offset += extractionResults.getExtractedLength
           context += extractionResults.getExtractedText
 
           //collect additional triples
           quads ++= extendSectionTriples(extractionResults, graphIri, subjectIri)
-
-
           //forward exceptions
           extractionResults.errors.foreach(exceptionHandle(_, RecordSeverity.Warning, null))
-
-
           quads
         }
         case Failure(e) => {
@@ -151,7 +143,6 @@ abstract class HtmlNifExtractor(nifContextIri: String, language: String, nifPara
         triples += nifStructure(p.getSectionIri(), RdfNamespace.NIF.append("nextSection"), sectionUri, sourceUrl, null)
       case None =>
     }
-
     section.getTop match{
       case Some(p) =>
         triples += nifStructure(sectionUri, RdfNamespace.NIF.append("superString"), p.getSectionIri(), sourceUrl, null)
@@ -177,8 +168,6 @@ abstract class HtmlNifExtractor(nifContextIri: String, language: String, nifPara
         if (section.next.isEmpty)
           triples += nifStructure(section.getTop.get.getSectionIri(), RdfNamespace.NIF.append("lastSection"), sectionUri, sourceUrl, null)
       }
-
-
     }
 
     //further specifying paragraphs of every section
@@ -355,9 +344,7 @@ abstract class HtmlNifExtractor(nifContextIri: String, language: String, nifPara
   }
 
   protected def getJsoupDoc(html: String): Document = {
-
-    var html_clean=cleanHtml(html)
-     val doc = Jsoup.parse( html_clean)
+    val doc = Jsoup.parse(cleanHtml(html))
 
     //delete queries
     for(query <- cssSelectorConfigMap.removeElements)
 
@@ -14,14 +14,13 @@ import scala.language.reflectiveCalls
   * Created by Chile on 1/19/2017.
   */
 class WikipediaNifExtractorRest(
-                               context : {
-                                 def ontology : Ontology
-                                 def language : Language
-                                 def configFile : Config
-                               },
-                               wikiPage: WikiPage
-                             )
-  extends WikipediaNifExtractor ( context ,wikiPage)  {
+    context : {
+      def ontology : Ontology
+      def language : Language
+      def configFile : Config
+    },
+    wikiPage: WikiPage
+  ) extends WikipediaNifExtractor(context ,wikiPage) {
 
 
   /**
@@ -52,7 +51,7 @@ class WikipediaNifExtractorRest(
 
           title match {
 
-            case Some(t) if super.isWikiNextTitle(t) && !processEnd=>
+            case Some(t) if super.isWikiNextTitle(t) && !processEnd =>
 
               //calculate the section number by looking at the <h2> to <h4> tags
               val depth = Integer.parseInt(t.asInstanceOf[org.jsoup.nodes.Element].tagName().substring(1)) - 1
@@ -67,9 +66,7 @@ class WikipediaNifExtractorRest(
                 if (currentSection.size == depth - 1)
                   currentSection.append(zw + 1)
               }
-
               subnodes = subnodes.drop(1)
-
               val section = new PageSection(
                 //previous section (if on same depth level
                 prev = currentSection.last match {
@@ -89,7 +86,6 @@ class WikipediaNifExtractorRest(
                 //take all following tags until you hit another title or end of content
                 content = Seq(t) ++ subnodes.takeWhile(node => !node.nodeName().matches("h\\d") && !node.nodeName().matches("section"))
               )
-
               tocMap.append(section)
             case None => processEnd=true
             case _ => processEnd=true
@@ -98,13 +94,10 @@ class WikipediaNifExtractorRest(
           getSection(subnodes)
           subnodes =  subnodes.drop(1)
         }
-
         subnodes = subnodes.dropWhile(node => !node.nodeName().matches("h\\d") && !node.nodeName().matches("section"))
       }
-
     }
 
-
     val abstractSect=doc.select("body").select("section").first.childNodes.asScala //get first section
     val ab = abstractSect.filter(node => node.nodeName() == "p") //move cursor to abstract
 
@@ -131,7 +124,4 @@ class WikipediaNifExtractorRest(
     }
     tocMap
   }
-
-
-
 }
@@ -23,7 +23,6 @@ class MediaWikiConnectorRest(connectionConfig: MediaWikiConnection, xmlPath: Seq
   protected val apiProfile: String = connectionConfig.profile
   protected val userAgent: String = connectionConfig.useragent
 
-
   /**
    * Retrieves a Wikipedia page.
    *
@@ -52,11 +51,8 @@ class MediaWikiConnectorRest(connectionConfig: MediaWikiConnection, xmlPath: Seq
     val parameters = "redirect=true"
     val apiUrl: URL = new URL(url.concat(titleParam).concat("?"+parameters))
 
-
-
     //println(s"mediawikiurl: $apiUrl")
 
-
     for (counter <- 1 to maxRetries) {
 
       val conn = apiUrl.openConnection
@@ -77,8 +73,6 @@ class MediaWikiConnectorRest(connectionConfig: MediaWikiConnection, xmlPath: Seq
       val answerClean = answerHeader.asScala.filterKeys(_ != null)
 
       if(conn.getHeaderField(null).contains("HTTP/1.1 200 OK") ){
-
-
         val end = java.time.LocalTime.now()
         conn match {
           case connection: HttpURLConnection =>
@@ -106,15 +100,12 @@ class MediaWikiConnectorRest(connectionConfig: MediaWikiConnection, xmlPath: Seq
           //println("WITH EXPONENTIAL BACK OFF" + counter)
           //println("Sleeping time double >>>>>>>>>>>" + pow(waiting_time, counter))
           //println("Sleeping time int >>>>>>>>>>>" + sleepMs)
-
         }
         if (counter < maxRetries)
           Thread.sleep(sleepMs)
         else
           throw new Exception("Timeout error retrieving abstract of " + pageTitle + " in " + counter + " tries.")
       } else {
-
-
         //println(s"mediawikiurl: $apiUrl?$parameters")
         return parsedAnswer match {
           case Success(str) => Option(str)
Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,7 @@ class Config(val configPath: String) extends`
`97`	`97`	`* Number of parallel processes allowed. Depends on the number of cores, type of disk, and IO speed`
`98`	`98`	`*`
`99`	`99`	`*/`
`100`		`- lazy val parallelProcesses: Int = this.getProperty("parallel-processes", "1").trim.toInt`
	`100`	`+ lazy val parallelProcesses: Int = this.getProperty("parallel-processes", "4").trim.toInt`
`101`	`101`
`102`	`102`	`lazy val sparkMaster: String = Option(getString(this, "spark-master")).getOrElse("local[*]")`
`103`	`103`