Skip to content

Commit 483ffa8

Browse files
committed
Adding methods to query Wik revision history
ReadXMLFile class parse the out put results XML file and count the number of revisions
1 parent 022a693 commit 483ffa8

File tree

3 files changed

+293
-0
lines changed

3 files changed

+293
-0
lines changed
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
* To change this template, choose Tools | Templates
3+
* and open the template in the editor.
4+
*/
5+
/**
6+
*
7+
* Date Author Changes Sep 10, 2013 Kasun Perera Created
8+
*
9+
*/
10+
package org.dbpedia.kasun.wikiquery;
11+
12+
13+
/**
14+
* TODO- describe the purpose of the class
15+
*
16+
*/
17+
import javax.xml.parsers.DocumentBuilderFactory;
18+
import javax.xml.parsers.DocumentBuilder;
19+
import org.w3c.dom.Document;
20+
import org.w3c.dom.NodeList;
21+
import org.w3c.dom.Node;
22+
import org.w3c.dom.Element;
23+
import java.io.File;
24+
import java.io.UnsupportedEncodingException;
25+
import java.net.URLEncoder;
26+
27+
public class ReadXMLFile
28+
{
29+
30+
public static void ReadFile( String filename )
31+
{
32+
//public static void ReadFile(File fXmlFile) {
33+
try
34+
{
35+
36+
File fXmlFile = new File( filename );
37+
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
38+
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
39+
Document doc = dBuilder.parse( fXmlFile );
40+
41+
//optional, but recommended
42+
//read this - http://stackoverflow.com/questions/13786607/normalization-in-dom-parsing-with-java-how-does-it-work
43+
doc.getDocumentElement().normalize();
44+
45+
System.out.println( "Root element :" + doc.getDocumentElement().getNodeName() );
46+
47+
NodeList nList = doc.getElementsByTagName( "rev" );
48+
49+
System.out.println( "----------------------------" );
50+
51+
for ( int temp = 0; temp < nList.getLength(); temp++ )
52+
{
53+
54+
Node nNode = nList.item( temp );
55+
56+
System.out.println( "Current Element :" + nNode.getNodeName() );
57+
58+
if ( nNode.getNodeType() == Node.ELEMENT_NODE )
59+
{
60+
61+
62+
Element eElement = (Element) nNode;
63+
64+
System.out.println( "Revision22222 id : " + eElement.getAttribute( "revid" ) );
65+
// System.out.println("First Name : " + eElement.getElementsByTagName("firstname").item(0).getTextContent());
66+
// System.out.println("Last Name : " + eElement.getElementsByTagName("lastname").item(0).getTextContent());
67+
// System.out.println("Nick Name : " + eElement.getElementsByTagName("nickname").item(0).getTextContent());
68+
// System.out.println("Salary : " + eElement.getElementsByTagName("salary").item(0).getTextContent());
69+
70+
}
71+
}
72+
} catch ( Exception e )
73+
{
74+
e.printStackTrace();
75+
}
76+
}
77+
78+
public static int ReadFile( Document doc ,String urlParameters, String url) throws UnsupportedEncodingException
79+
{
80+
81+
int numberOfRevisions=0;
82+
//public static void ReadFile(File fXmlFile) {
83+
try
84+
{
85+
doc.getDocumentElement().normalize();
86+
87+
// System.out.println( "Root element :" + doc.getDocumentElement().getNodeName() );
88+
89+
NodeList continueNodeList = doc.getElementsByTagName( "revisions" );
90+
if ( continueNodeList.getLength() > 0 )
91+
{
92+
Node continueNode = continueNodeList.item( 0 );
93+
94+
Element continueElement = (Element) continueNode;
95+
// String urlParameters = "fName=" + URLEncoder.encode( "???", "UTF-8" ) + "&lName=" + URLEncoder.encode( "???", "UTF-8" );
96+
// String url = "http://en.wikipedia.org/w/api.php?action=query&format=xml&prop=revisions&titles=Mother&rvlimit=max&rvstart=20130604000000&rvcontinue="+continueElement.getAttribute( "rvcontinue" );
97+
98+
// System.out.println("Calling recursive function using rivision Id "+ continueElement.getAttribute( "rvcontinue" ));
99+
numberOfRevisions=ReadFile(RevisionHistory.excutePost( url+ "&rvcontinue="+continueElement.getAttribute( "rvcontinue" ), urlParameters ),urlParameters, url );
100+
101+
// System.out.println( "Continue revision Id : " + continueElement.getAttribute( "rvcontinue" ) );
102+
}
103+
104+
NodeList nList = doc.getElementsByTagName( "rev" );
105+
106+
// System.out.println( "number of nodes" + nList.getLength());
107+
/*
108+
for ( int temp = 0; temp < nList.getLength(); temp++ )
109+
{
110+
111+
Node nNode = nList.item( temp );
112+
113+
// System.out.println( "\nCurrent Element :" + nNode.getNodeName() + " count: " + temp );
114+
115+
if ( nNode.getNodeType() == Node.ELEMENT_NODE )
116+
{
117+
118+
Element eElement = (Element) nNode;
119+
120+
System.out.println( "Revision id : " + eElement.getAttribute( "revid" ) );
121+
122+
}
123+
}
124+
*/
125+
126+
return numberOfRevisions+ nList.getLength();
127+
} catch ( Exception e )
128+
{
129+
e.printStackTrace();
130+
return 0;
131+
132+
}
133+
}
134+
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*
2+
* To change this template, choose Tools | Templates
3+
* and open the template in the editor.
4+
*/
5+
/**
6+
*
7+
* Date Author Changes Sep 10, 2013 Kasun Perera Created
8+
*
9+
*/
10+
package org.dbpedia.kasun.wikiquery;
11+
12+
13+
import java.io.*;
14+
import java.net.HttpURLConnection;
15+
import java.net.URL;
16+
import org.w3c.dom.Document;
17+
import javax.xml.parsers.DocumentBuilder;
18+
import javax.xml.parsers.DocumentBuilderFactory;
19+
20+
/**
21+
* TODO- describe the purpose of the class
22+
*
23+
*/
24+
public class RevisionHistory
25+
{
26+
27+
// public static String excutePost( String targetURL, String urlParameters )
28+
public static Document excutePost( String targetURL, String urlParameters )
29+
{
30+
URL url;
31+
HttpURLConnection connection = null;
32+
try
33+
{
34+
//Create connection
35+
url = new URL( targetURL );
36+
connection = (HttpURLConnection) url.openConnection();
37+
connection.setRequestMethod( "GET" );
38+
connection.setRequestProperty( "Accept", "application/xml" );
39+
40+
//connection.setRequestProperty( "Content-Length", ""+ Integer.toString( urlParameters.getBytes().length ) );
41+
// connection.setRequestProperty( "Content-Language", "en-US" );
42+
43+
connection.setUseCaches( false );
44+
connection.setDoInput( true );
45+
connection.setDoOutput( true );
46+
47+
//Send request
48+
DataOutputStream wr = new DataOutputStream(
49+
connection.getOutputStream() );
50+
wr.writeBytes( urlParameters );
51+
wr.flush();
52+
wr.close();
53+
54+
//Get Response
55+
InputStream is = connection.getInputStream();
56+
57+
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
58+
DocumentBuilder db = dbf.newDocumentBuilder();
59+
Document doc = (Document) db.parse(is);
60+
61+
62+
/*
63+
BufferedReader rd = new BufferedReader( new InputStreamReader( is ) );
64+
String line;
65+
66+
67+
// Create temp file.
68+
File temp = File.createTempFile( "pattern", ".xml" );
69+
70+
// Delete temp file when program exits.
71+
temp.deleteOnExit();
72+
73+
// Write to temp file
74+
BufferedWriter out = new BufferedWriter( new FileWriter( temp ) );
75+
76+
77+
78+
StringBuffer response = new StringBuffer();
79+
while ( ( line = rd.readLine() ) != null )
80+
{
81+
out.write( line + "\n" );
82+
83+
System.out.println( line + "\n" );
84+
response.append( line + "\n" );
85+
// response.append( '\r' );
86+
}
87+
rd.close();
88+
out.close();
89+
90+
91+
*/
92+
return doc;
93+
// return response.toString();
94+
95+
} catch ( Exception e )
96+
{
97+
98+
e.printStackTrace();
99+
return null;
100+
101+
} finally
102+
{
103+
104+
if ( connection != null )
105+
{
106+
connection.disconnect();
107+
}
108+
}
109+
}
110+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* To change this template, choose Tools | Templates
3+
* and open the template in the editor.
4+
*/
5+
6+
7+
8+
/**
9+
* KarshaAnnotate- Annotation tool for financial documents
10+
*
11+
*
12+
* Date Author Changes
13+
* Sep 10, 2013 Kasun Perera Created
14+
*
15+
*/
16+
17+
package org.dbpedia.kasun.wikiquery;
18+
19+
20+
import java.io.FileNotFoundException;
21+
import java.io.IOException;
22+
import java.io.UnsupportedEncodingException;
23+
import java.net.URLEncoder;
24+
25+
26+
27+
/**
28+
* TODO- describe the purpose of the class
29+
*
30+
*/
31+
public class WikiQuery {
32+
33+
public static void main(String[] args ) throws UnsupportedEncodingException {
34+
35+
int pageId=83430;
36+
37+
String urlParameters = "fName=" + URLEncoder.encode("???", "UTF-8") + "&lName=" + URLEncoder.encode("???", "UTF-8");
38+
//timestamp June 4th, 2013 00:00:00 UTC=20130604000000
39+
// String url="http://en.wikipedia.org/w/api.php?action=query&format=xml&prop=revisions&titles=Mother&rvlimit=max&rvstart=20130604000000";
40+
String url="http://en.wikipedia.org/w/api.php?action=query&format=xml&prop=revisions&pageids="+pageId+"&rvlimit=max&rvstart=20130604000000";
41+
42+
//pageid
43+
// RevisionHistory.excutePost( url, urlParameters );
44+
// ReadXMLFile.ReadFile( "C:\\Users\\lsf\\Documents\\NetBeansProjects\\WikipediaCategoryProcessor\\api.xml");
45+
int totalRevisions= ReadXMLFile.ReadFile(RevisionHistory.excutePost( url, urlParameters ),urlParameters,url);
46+
System.out.println("totalRevisions "+ totalRevisions);
47+
}
48+
49+
}

0 commit comments

Comments
 (0)