Adding methods to query Wik revision history

kkasunperera · kkasunperera · commit 483ffa82f5c3 · 2013-09-12T09:39:51.000+05:30
ReadXMLFile class parse the out put results XML file and count the
number of revisions
diff --git a/WikipediaCategoryProcessor/src/main/java/org/dbpedia/kasun/wikiquery/ReadXMLFile.java b/WikipediaCategoryProcessor/src/main/java/org/dbpedia/kasun/wikiquery/ReadXMLFile.java
@@ -0,0 +1,134 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+/**
+ *
+ * Date Author Changes Sep 10, 2013 Kasun Perera Created
+ *
+ */
+package org.dbpedia.kasun.wikiquery;
+
+
+/**
+ * TODO- describe the purpose of the class
+ *
+ */
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.DocumentBuilder;
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Node;
+import org.w3c.dom.Element;
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+
+public class ReadXMLFile
+{
+
+    public static void ReadFile( String filename )
+    {
+        //public static void ReadFile(File fXmlFile) {
+        try
+        {
+
+            File fXmlFile = new File( filename );
+            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
+            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+            Document doc = dBuilder.parse( fXmlFile );
+
+            //optional, but recommended
+            //read this - http://stackoverflow.com/questions/13786607/normalization-in-dom-parsing-with-java-how-does-it-work
+            doc.getDocumentElement().normalize();
+
+            System.out.println( "Root element :" + doc.getDocumentElement().getNodeName() );
+
+            NodeList nList = doc.getElementsByTagName( "rev" );
+
+            System.out.println( "----------------------------" );
+
+            for ( int temp = 0; temp < nList.getLength(); temp++ )
+            {
+
+                Node nNode = nList.item( temp );
+
+                System.out.println( "Current Element :" + nNode.getNodeName() );
+
+                if ( nNode.getNodeType() == Node.ELEMENT_NODE )
+                {
+
+
+                    Element eElement = (Element) nNode;
+
+                    System.out.println( "Revision22222 id : " + eElement.getAttribute( "revid" ) );
+//			System.out.println("First Name : " + eElement.getElementsByTagName("firstname").item(0).getTextContent());
+//			System.out.println("Last Name : " + eElement.getElementsByTagName("lastname").item(0).getTextContent());
+//			System.out.println("Nick Name : " + eElement.getElementsByTagName("nickname").item(0).getTextContent());
+//			System.out.println("Salary : " + eElement.getElementsByTagName("salary").item(0).getTextContent());
+
+                }
+            }
+        } catch ( Exception e )
+        {
+            e.printStackTrace();
+        }
+    }
+
+    public static int ReadFile( Document doc ,String urlParameters, String url) throws UnsupportedEncodingException
+    {
+
+          int numberOfRevisions=0;  
+        //public static void ReadFile(File fXmlFile) {
+        try
+        {
+            doc.getDocumentElement().normalize();
+
+          //  System.out.println( "Root element :" + doc.getDocumentElement().getNodeName() );
+
+            NodeList continueNodeList = doc.getElementsByTagName( "revisions" );
+            if ( continueNodeList.getLength() > 0 )
+            {
+                Node continueNode = continueNodeList.item( 0 );
+
+                Element continueElement = (Element) continueNode;
+              //  String urlParameters = "fName=" + URLEncoder.encode( "???", "UTF-8" ) + "&lName=" + URLEncoder.encode( "???", "UTF-8" );
+       // String url = "http://en.wikipedia.org/w/api.php?action=query&format=xml&prop=revisions&titles=Mother&rvlimit=max&rvstart=20130604000000&rvcontinue="+continueElement.getAttribute( "rvcontinue" );
+      
+                //  System.out.println("Calling recursive function using rivision Id "+ continueElement.getAttribute( "rvcontinue" ));
+                numberOfRevisions=ReadFile(RevisionHistory.excutePost( url+ "&rvcontinue="+continueElement.getAttribute( "rvcontinue" ), urlParameters ),urlParameters, url );
+              
+              //  System.out.println( "Continue revision Id : " + continueElement.getAttribute( "rvcontinue" ) );
+            }
+
+            NodeList nList = doc.getElementsByTagName( "rev" );
+
+          //  System.out.println( "number of nodes" + nList.getLength());
+/*
+            for ( int temp = 0; temp < nList.getLength(); temp++ )
+            {
+
+                Node nNode = nList.item( temp );
+
+             //   System.out.println( "\nCurrent Element :" + nNode.getNodeName() + " count: " + temp );
+
+                if ( nNode.getNodeType() == Node.ELEMENT_NODE )
+                {
+
+                    Element eElement = (Element) nNode;
+
+                    System.out.println( "Revision id : " + eElement.getAttribute( "revid" ) );
+
+                }
+            }
+            */
+            
+            return numberOfRevisions+ nList.getLength();
+        } catch ( Exception e )
+        {
+            e.printStackTrace();
+            return 0;
+            
+        }
+    }
+}
diff --git a/WikipediaCategoryProcessor/src/main/java/org/dbpedia/kasun/wikiquery/RevisionHistory.java b/WikipediaCategoryProcessor/src/main/java/org/dbpedia/kasun/wikiquery/RevisionHistory.java
@@ -0,0 +1,110 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+/**
+ *
+ * Date Author Changes Sep 10, 2013 Kasun Perera Created
+ *
+ */
+package org.dbpedia.kasun.wikiquery;
+
+
+import java.io.*;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import org.w3c.dom.Document;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+
+/**
+ * TODO- describe the purpose of the class
+ *
+ */
+public class RevisionHistory
+{
+
+   // public static String excutePost( String targetURL, String urlParameters )
+         public static Document excutePost( String targetURL, String urlParameters )
+    {
+        URL url;
+        HttpURLConnection connection = null;
+        try
+        {
+            //Create connection
+            url = new URL( targetURL );
+            connection = (HttpURLConnection) url.openConnection();
+            connection.setRequestMethod( "GET" );
+            connection.setRequestProperty( "Accept", "application/xml" );
+
+            //connection.setRequestProperty( "Content-Length", ""+ Integer.toString( urlParameters.getBytes().length ) );
+           // connection.setRequestProperty( "Content-Language", "en-US" );
+
+            connection.setUseCaches( false );
+            connection.setDoInput( true );
+            connection.setDoOutput( true );
+
+            //Send request
+            DataOutputStream wr = new DataOutputStream(
+                connection.getOutputStream() );
+            wr.writeBytes( urlParameters );
+            wr.flush();
+            wr.close();
+
+            //Get Response	
+            InputStream is = connection.getInputStream();
+            
+            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+DocumentBuilder db = dbf.newDocumentBuilder();
+Document doc = (Document) db.parse(is);
+
+
+/*
+            BufferedReader rd = new BufferedReader( new InputStreamReader( is ) );
+            String line;
+
+           
+                // Create temp file.
+                File temp = File.createTempFile( "pattern", ".xml" );
+
+                // Delete temp file when program exits.
+                temp.deleteOnExit();
+
+                // Write to temp file
+                BufferedWriter out = new BufferedWriter( new FileWriter( temp ) );
+               
+            
+
+            StringBuffer response = new StringBuffer();
+            while ( ( line = rd.readLine() ) != null )
+            {
+                 out.write( line + "\n" );
+               
+                System.out.println( line + "\n" );
+                response.append( line + "\n" );
+                //  response.append( '\r' );
+            }
+            rd.close();
+             out.close();
+             
+             
+             */
+              return doc;
+          //  return response.toString();
+
+        } catch ( Exception e )
+        {
+
+            e.printStackTrace();
+            return null;
+
+        } finally
+        {
+
+            if ( connection != null )
+            {
+                connection.disconnect();
+            }
+        }
+    }
+}
diff --git a/WikipediaCategoryProcessor/src/main/java/org/dbpedia/kasun/wikiquery/WikiQuery.java b/WikipediaCategoryProcessor/src/main/java/org/dbpedia/kasun/wikiquery/WikiQuery.java
@@ -0,0 +1,49 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+
+
+/** 
+ *   KarshaAnnotate- Annotation tool for financial documents
+ *  
+ * 
+ *      Date             Author          Changes 
+ *      Sep 10, 2013     Kasun Perera    Created   
+ * 
+ */ 
+
+package org.dbpedia.kasun.wikiquery;
+
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+
+
+
+/**
+ * TODO- describe the  purpose  of  the  class
+ * 
+ */
+public class WikiQuery {
+    
+    public static void main(String[] args ) throws UnsupportedEncodingException {
+        
+        int pageId=83430;
+        
+        String urlParameters = "fName=" + URLEncoder.encode("???", "UTF-8") + "&lName=" + URLEncoder.encode("???", "UTF-8");
+        //timestamp June 4th, 2013 00:00:00 UTC=20130604000000
+       // String url="http://en.wikipedia.org/w/api.php?action=query&format=xml&prop=revisions&titles=Mother&rvlimit=max&rvstart=20130604000000";
+   String url="http://en.wikipedia.org/w/api.php?action=query&format=xml&prop=revisions&pageids="+pageId+"&rvlimit=max&rvstart=20130604000000";
+ 
+        //pageid
+        // RevisionHistory.excutePost( url, urlParameters );
+      //  ReadXMLFile.ReadFile( "C:\\Users\\lsf\\Documents\\NetBeansProjects\\WikipediaCategoryProcessor\\api.xml");
+    int totalRevisions= ReadXMLFile.ReadFile(RevisionHistory.excutePost( url, urlParameters ),urlParameters,url);
+    System.out.println("totalRevisions "+ totalRevisions);
+    }
+
+}