Skip to content

Commit a771af2

Browse files
committed
Check whether a revision already exists locally
1 parent 5e3e0dd commit a771af2

File tree

1 file changed

+53
-19
lines changed

1 file changed

+53
-19
lines changed

htmldumper.js

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,28 @@ function getArticles (options, res) {
6464
});
6565
}
6666

67+
function checkArticle (options, title, oldid) {
68+
var dumpDir = options.saveDir + '/' + options.prefix;
69+
var dirName = dumpDir + '/' + encodeURIComponent(title);
70+
var fileName = dirName + '/' + oldid;
71+
return fs.statAsync(fileName)
72+
.catch(function(e) {
73+
return false;
74+
})
75+
.then(function(fileStats) {
76+
// Check if we already have this article revision
77+
if (fileStats && fileStats.isFile()) {
78+
// We already have the article, nothing to do.
79+
// XXX: Also track / check last-modified time for template
80+
// re-expansions without revisions change
81+
console.log('Exists:', title, oldid);
82+
return true;
83+
} else {
84+
return false;
85+
}
86+
});
87+
}
88+
6789
function saveArticle (options, body, title, oldid) {
6890
var dumpDir = options.saveDir + '/' + options.prefix;
6991
var dirName = dumpDir + '/' + encodeURIComponent(title);
@@ -89,25 +111,37 @@ function saveArticle (options, body, title, oldid) {
89111
}
90112

91113
function dumpArticle (options, title, oldid) {
92-
console.log('Dumping', title, oldid);
93-
var url = 'http://' + options.host + '/' + options.prefix
94-
+ '/v1/page/' + encodeURIComponent(title) + '/html/' + oldid;
95-
return preq.get({
96-
uri: url,
97-
retries: 5,
98-
timeout: 60000,
99-
// Request a Buffer by default, don't decode to a String. This
100-
// saves CPU cycles, but also a lot of memory as large strings are
101-
// stored in the old space of the JS heap while Buffers are stored
102-
// outside the JS heap.
103-
encoding: null
104-
})
105-
.then(function(res) {
106-
//console.log('done', title);
107-
if (options.saveDir) {
108-
return saveArticle(options, res.body, title, oldid);
109-
}
110-
});
114+
var checkRevision;
115+
if (options.saveDir) {
116+
checkRevision = checkArticle(options, title, oldid);
117+
} else {
118+
checkRevision = Promise.resolve(false);
119+
}
120+
121+
return checkRevision
122+
.then(function(checkResult) {
123+
if (!checkResult) {
124+
console.log('Dumping', title, oldid);
125+
var url = 'http://' + options.host + '/' + options.prefix
126+
+ '/v1/page/' + encodeURIComponent(title) + '/html/' + oldid;
127+
return preq.get({
128+
uri: url,
129+
retries: 5,
130+
timeout: 60000,
131+
// Request a Buffer by default, don't decode to a String. This
132+
// saves CPU cycles, but also a lot of memory as large strings are
133+
// stored in the old space of the JS heap while Buffers are stored
134+
// outside the JS heap.
135+
encoding: null
136+
})
137+
.then(function(res) {
138+
//console.log('done', title);
139+
if (options.saveDir) {
140+
return saveArticle(options, res.body, title, oldid);
141+
}
142+
});
143+
}
144+
});
111145
}
112146

113147
// Processes chunks of articles one by one

0 commit comments

Comments
 (0)