Skip to content

Commit 962abb4

Browse files
committed
Allow users to specify the URL template to use
1 parent 464bb36 commit 962abb4

File tree

4 files changed

+37
-20
lines changed

4 files changed

+37
-20
lines changed

bin/dump_restbase

100644100755
File mode changed.

bin/dump_wiki

100644100755
Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ var makeDump = require('../lib/htmldump');
88
var argParser = require('yargs')
99
.usage('Create a HTML dump in a subdir\n\n'
1010
+ '\nExample:\n$0 --domain en.wikipedia.org --ns 0 --apiURL http://en.wikipedia.org/w/api.php')
11-
.demand(['apiURL', 'domain', 'ns', 'host'])
11+
.demand(['apiURL', 'ns', 'host'])
1212
.options('h', {
1313
alias: 'help'
1414
})
15-
.alias('domain', 'prefix')
15+
.options('domain', {
16+
alias: 'prefix',
17+
default: ''
18+
})
1619
.options('d', {
1720
alias : 'saveDir',
1821
default : ''
@@ -36,10 +39,14 @@ var argParser = require('yargs')
3639
alias: 'concurrency',
3740
default: 50
3841
})
42+
.options('u', {
43+
alias: 'url',
44+
default: '{{host}}/{{domain}}/v1/page/html/{title}/{oldid}'
45+
})
3946
//.default('apiURL', 'http://en.wikipedia.org/w/api.php')
4047
//.default('prefix', 'en.wikipedia.org')
4148
//.default('ns', '0')
42-
.default('host', 'http://rest.wikimedia.org');
49+
//.default('host', 'http://rest.wikimedia.org');
4350

4451
var argv = argParser.argv;
4552
if (argv.h) {

lib/htmldump.js

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
require('core-js/shim');
44

55
var P = require('bluebird');
6+
var Template = require('swagger-router').Template;
67

78
var makeFileStore = require('./filestore');
89
var makeSQLiteStore = require('./sqlitestore');
@@ -21,6 +22,9 @@ process.on('SIGUSR2', function() {
2122
var preq = require('preq');
2223
var PromiseStream = require('./PromiseStream');
2324

25+
// the request template used for fetching each individual article
26+
var articleReqTpl;
27+
2428
function getArticles (options, res) {
2529
if (!res || res.next === 'finished') {
2630
// nothing more to do.
@@ -92,24 +96,12 @@ function dumpArticle (options, title, oldid) {
9296
if (options.verbose) {
9397
console.log('Dumping', title, oldid);
9498
}
95-
var url = options.host + '/' + options.prefix
96-
+ '/v1/page/html/' + encodeURIComponent(title) + '/' + oldid;
97-
return preq.get({
98-
uri: url,
99-
headers: {
100-
'user-agent': options.userAgent,
101-
'accept-encoding': 'gzip'
102-
},
103-
retries: 5,
104-
timeout: 60000,
105-
// Request a Buffer by default, don't decode to a String. This
106-
// saves CPU cycles, but also a lot of memory as large strings are
107-
// stored in the old space of the JS heap while Buffers are stored
108-
// outside the JS heap.
109-
encoding: null
110-
})
99+
return preq.get(articleReqTpl.expand({
100+
request: {
101+
params: Object.assign({title: title, oldid: oldid}, options)
102+
}
103+
}))
111104
.then(function(res) {
112-
//console.log('done', title);
113105
if (options.store) {
114106
return options.store.saveArticle(res.body, title, oldid);
115107
}
@@ -220,6 +212,23 @@ function makeDump (options) {
220212
storeSetup = makeSQLiteStore(options);
221213
}
222214

215+
// set up the article request template once on start-up
216+
articleReqTpl = new Template({
217+
method: 'get',
218+
uri: options.url,
219+
headers: {
220+
'user-agent': options.userAgent,
221+
'accept-encoding': 'gzip'
222+
},
223+
retries: 5,
224+
timeout: 60000,
225+
// Request a Buffer by default, don't decode to a String. This
226+
// saves CPU cycles, but also a lot of memory as large strings are
227+
// stored in the old space of the JS heap while Buffers are stored
228+
// outside the JS heap.
229+
encoding: null
230+
});
231+
223232
return storeSetup
224233
.then(function(store) {
225234
options.store = store;

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"core-js": "^1.0.0",
2424
"preq": "~0.3.1",
2525
"sqlite3": "^3.0.5",
26+
"swagger-router": "^0.5.6",
2627
"yargs": "~1.2.1"
2728
}
2829
}

0 commit comments

Comments
 (0)