Skip to content

Commit f25b6a2

Browse files
committed
Update htmlspider from production
1 parent 7a87313 commit f25b6a2

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

htmlspider.js

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ var fs = Promise.promisifyAll(require('fs'));
99
var PromiseStream = require('./PromiseStream');
1010

1111
// Article dump parallelism
12-
var maxConcurrency = 70;
12+
var maxConcurrency = 10;
1313

1414
function getArticles (apiURL, namespace, res) {
1515
var next = res.next || '';
@@ -23,7 +23,7 @@ function getArticles (apiURL, namespace, res) {
2323
+ namespace + '&format=json&gapcontinue=' + encodeURIComponent( next );
2424
//console.log(url);
2525

26-
return preq.get(url, { retries: 10 })
26+
return preq.get(url, { timeout: 60* 1000, retries: 5 })
2727
.then(function(res) {
2828
res = res.body;
2929
var articles = [];
@@ -69,11 +69,11 @@ function makeDump (apiURL, prefix, ns, host) {
6969
next: ''
7070
};
7171

72+
// XXX: abstract this into some kind of buffered 'spread' utility
7273
var articleStream = new PromiseStream(getArticles.bind(null, apiURL, ns),
73-
{next: ''}, maxConcurrency);
74+
{next: ''}, 6);
7475
var articles = [];
7576
var waiters = [];
76-
7777
function processArticles (newArticles) {
7878
articles = newArticles.articles;
7979
while(waiters.length && articles.length) {
@@ -109,7 +109,7 @@ function makeDump (apiURL, prefix, ns, host) {
109109
});
110110
}
111111

112-
var dumpStream = new PromiseStream(dumpOne, undefined, maxConcurrency, maxConcurrency);
112+
var dumpStream = new PromiseStream(dumpOne, undefined, 1, maxConcurrency);
113113

114114
function loop () {
115115
return dumpStream.next()

0 commit comments

Comments
 (0)