Skip to content

Commit af16683

Browse files
author
Marko Obrovac
committed
Parsoid: Create the modules for both variants
Using the Parsoid class, we create two new modules for the JS and PHP variant, respectively. Each module declares a new class that inherits from Parsoid, but sets up their methods for making requests to Parsoid as well as methods for obtaining the bucket URIs to use. Bug: T230791
1 parent 2e9a6e8 commit af16683

File tree

5 files changed

+230
-108
lines changed

5 files changed

+230
-108
lines changed

lib/parsoid.js

Lines changed: 30 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ const URI = HyperSwitch.URI;
66
const HTTPError = HyperSwitch.HTTPError;
77

88
const uuidv1 = require('uuid/v1');
9-
const uuidUtils = require('../lib/uuidUtils');
9+
const uuidUtils = require('./uuidUtils');
1010

11-
const mwUtil = require('../lib/mwUtil');
11+
const mwUtil = require('./mwUtil');
1212

1313
// Temporary work-around for Parsoid issue
1414
// https://phabricator.wikimedia.org/T93715
@@ -104,12 +104,15 @@ class ParsoidService {
104104

105105
_initOpts(opts = {}) {
106106
this.options = opts;
107-
this.parsoidHost = opts.parsoidHost;
107+
this.parsoidUri = opts.host || opts.parsoidHost;
108108
this.options.stash_ratelimit = opts.stash_ratelimit || 5;
109-
this.options.grace_ttl = opts.grace_ttl || 86400;
110109
this._blacklist = compileReRenderBlacklist(opts.rerenderBlacklist);
111-
if (!opts.parsoidHost) {
112-
throw new Error('Parsoid module: the option parsoidHost must be provided!');
110+
if (!this.parsoidUri) {
111+
throw new Error('Parsoid module: the option host must be provided!');
112+
}
113+
// remove the trailing slash, if any
114+
if (this.parsoidUri.slice(-1) === '/') {
115+
this.parsoidUri = this.parsoidUri.slice(0, -1);
113116
}
114117
}
115118

@@ -142,34 +145,6 @@ class ParsoidService {
142145
}
143146
}
144147

145-
/**
146-
* Get the URI of a bucket for the latest Parsoid content.
147-
* @param {string} domain the domain name.
148-
* @param {string} title the article title.
149-
* @return {HyperSwitch.URI}
150-
*/
151-
getLatestBucketURI(domain, title) {
152-
return new URI([
153-
domain, 'sys', 'key_value', 'parsoid', title
154-
]);
155-
}
156-
157-
/**
158-
* Get the URI of a bucket for stashing Parsoid content. Used both for stashing
159-
* original HTML/Data-Parsoid for normal edits as well as for stashing transforms.
160-
*
161-
* @param {string} domain the domain name.
162-
* @param {string} title the article title.
163-
* @param {number} revision the revision of the article.
164-
* @param {string} tid the TID of the content.
165-
* @return {HyperSwitch.URI}
166-
*/
167-
getStashBucketURI(domain, title, revision, tid) {
168-
return new URI([
169-
domain, 'sys', 'key_value', 'parsoid-stash', `${title}:${revision}:${tid}`
170-
]);
171-
}
172-
173148
/**
174149
* Get full content from the stash bucket.
175150
* @param {HyperSwitch} hyper the hyper object to route requests
@@ -183,7 +158,7 @@ class ParsoidService {
183158
*/
184159
_getStashedContent(hyper, domain, title, revision, tid) {
185160
return hyper.get({
186-
uri: this.getStashBucketURI(domain, title, revision, tid)
161+
uri: this._getStashBucketURI(domain, title, revision, tid)
187162
})
188163
.then((res) => {
189164
res = res.body;
@@ -198,7 +173,7 @@ class ParsoidService {
198173
const htmlResponse = parsoidResp.body.html;
199174
const etag = mwUtil.parseETag(parsoidResp.headers.etag);
200175
return hyper.put({
201-
uri: this.getStashBucketURI(rp.domain, rp.title, etag.rev, etag.tid),
176+
uri: this._getStashBucketURI(rp.domain, rp.title, etag.rev, etag.tid),
202177
// Note. The headers we are storing here are for the whole pagebundle response.
203178
// The individual components of the pagebundle contain their own headers that
204179
// which are used to generate actual responses.
@@ -230,7 +205,7 @@ class ParsoidService {
230205
saveParsoidResultToLatest(hyper, domain, title, parsoidResp) {
231206
const dataParsoidResponse = parsoidResp.body['data-parsoid'];
232207
const htmlResponse = parsoidResp.body.html;
233-
return hyper.get({ uri: this.getLatestBucketURI(domain, title) })
208+
return hyper.get({ uri: this._getLatestBucketURI(domain, title) })
234209
.then((existingRes) => {
235210
// TODO: This is a race condition and we're doing a write after read
236211
// in a distributed concurrent environment. For revisions this should
@@ -244,7 +219,7 @@ class ParsoidService {
244219
return existingRes;
245220
})
246221
.catch({ status: 404 }, { status: 412 }, () => hyper.put({
247-
uri: this.getLatestBucketURI(domain, title),
222+
uri: this._getLatestBucketURI(domain, title),
248223
// Note. The headers we are storing here are for the whole pagebundle response.
249224
// The individual components of the pagebundle contain their own headers that
250225
// which are used to generate actual responses.
@@ -270,7 +245,7 @@ class ParsoidService {
270245
const etag = mwUtil.makeETag(rp.revision, tid, 'stash');
271246
const wtType = req.original && req.original.headers['content-type'] || 'text/plain';
272247
return transformPromise.then((original) => hyper.put({
273-
uri: this.getStashBucketURI(rp.domain, rp.title, rp.revision, tid),
248+
uri: this._getStashBucketURI(rp.domain, rp.title, rp.revision, tid),
274249
headers: {
275250
'x-store-etag': etag,
276251
'content-type': 'application/octet-stream',
@@ -310,9 +285,9 @@ class ParsoidService {
310285
*/
311286
_getContentWithFallback(hyper, domain, title, revision, tid) {
312287
if (!revision && !tid) {
313-
return hyper.get({ uri: this.getLatestBucketURI(domain, title) });
288+
return hyper.get({ uri: this._getLatestBucketURI(domain, title) });
314289
} else if (!tid) {
315-
return hyper.get({ uri: this.getLatestBucketURI(domain, title) })
290+
return hyper.get({ uri: this._getLatestBucketURI(domain, title) })
316291
.then((res) => {
317292
const resEtag = mwUtil.parseETag(res.headers.etag);
318293
if (revision !== resEtag.rev) {
@@ -322,10 +297,10 @@ class ParsoidService {
322297
});
323298
} else {
324299
return hyper.get({
325-
uri: this.getStashBucketURI(domain, title, revision, tid)
300+
uri: this._getStashBucketURI(domain, title, revision, tid)
326301
})
327302
.catch({ status: 404 }, () =>
328-
hyper.get({ uri: this.getLatestBucketURI(domain, title) })
303+
hyper.get({ uri: this._getLatestBucketURI(domain, title) })
329304
.then((res) => {
330305
const resEtag = mwUtil.parseETag(res.headers.etag);
331306
if (revision !== resEtag.rev || tid !== resEtag.tid) {
@@ -339,12 +314,10 @@ class ParsoidService {
339314

340315
_getPageBundleFromParsoid(hyper, req) {
341316
const rp = req.params;
342-
const parsoidURI = `${this.parsoidHost}/${rp.domain}/v3/page/pagebundle/` +
343-
`${encodeURIComponent(rp.title)}/${rp.revision}`;
344-
return hyper.get({
345-
uri: new URI(parsoidURI),
346-
headers: req.headers
347-
});
317+
return hyper.get(this._getParsoidReq(
318+
req,
319+
`page/pagebundle/${encodeURIComponent(rp.title)}/${rp.revision}`
320+
));
348321
}
349322

350323
/**
@@ -654,18 +627,18 @@ class ParsoidService {
654627
parsoidExtraPath = `/${parsoidExtraPath}`;
655628
}
656629

657-
const parsoidReq = {
658-
uri: `${this.parsoidHost}/${rp.domain}/v3/transform/` +
659-
`${parsoidFrom}/to/${parsoidTo}${parsoidExtraPath}`,
660-
headers: {
630+
const parsoidReq = this._getParsoidReq(
631+
req,
632+
`transform/${parsoidFrom}/to/${parsoidTo}${parsoidExtraPath}`,
633+
{
661634
'content-type': 'application/json',
662635
'user-agent': req['user-agent'],
663636
'content-language': req.headers['content-language'],
664637
accept: req.headers.accept,
665638
'accept-language': req.headers['accept-language']
666639
},
667-
body: req.body
668-
};
640+
req.body
641+
);
669642

670643
const transformPromise = hyper.post(parsoidReq);
671644
if (req.body.stash && from === 'wikitext' && to === 'html') {
@@ -677,12 +650,11 @@ class ParsoidService {
677650

678651
getLintErrors(hyper, req) {
679652
const rp = req.params;
680-
let path = `${this.parsoidHost}/${rp.domain}/v3/transform/` +
681-
`wikitext/to/lint/${encodeURIComponent(rp.title)}`;
653+
let path = `transform/wikitext/to/lint/${encodeURIComponent(rp.title)}`;
682654
if (rp.revision) {
683655
path += `/${rp.revision}`;
684656
}
685-
return hyper.post({ uri: path });
657+
return hyper.post(this._getParsoidReq(req, path, {}));
686658
}
687659

688660
makeTransform(from, to) {

projects/sys/default.wmf.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ paths:
1111
- path: sys/page_save.js
1212
/parsoid:
1313
x-modules:
14-
- path: sys/parsoid.js
14+
- path: sys/parsoid-js.js
1515
options:
16-
parsoidHost: '{{options.parsoid.host}}'
16+
host: '{{options.parsoid.host}}'
1717
response_cache_control: '{{options.purged_cache_control}}'
1818
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
1919
# A list of pages that we don't currently want to re-render on

sys/parsoid-js.js

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
'use strict';
2+
3+
const HyperSwitch = require('hyperswitch');
4+
5+
const Parsoid = require('../lib/parsoid.js');
6+
const spec = HyperSwitch.utils.loadSpec(`${__dirname}/parsoid.yaml`);
7+
8+
const URI = HyperSwitch.URI;
9+
10+
class ParsoidJS extends Parsoid {
11+
12+
/**
13+
* Assembles the request that is to be used to call the Parsoid service
14+
*
15+
* @param {Object} req the original request received by the module
16+
* @param {string} path the path portion of the URI, without the domain or API version
17+
* @param {Object} [headers] the headers to send, defaults to req.headers
18+
* @param {Object} [body] the body of the request, defaults to undefined
19+
* @return {Object} the request object to send
20+
*/
21+
_getParsoidReq(req, path, headers, body) {
22+
return {
23+
uri: new URI(`${this.parsoidUri}/${req.params.domain}/v3/${path}`),
24+
headers: headers || req.headers,
25+
body
26+
};
27+
}
28+
29+
/**
30+
* Gets the URI of a bucket for the latest Parsoid content
31+
*
32+
* @param {string} domain the domain name
33+
* @param {string} title the article title
34+
* @return {HyperSwitch.URI}
35+
*/
36+
_getLatestBucketURI(domain, title) {
37+
return new URI([
38+
domain, 'sys', 'key_value', 'parsoid', title
39+
]);
40+
}
41+
42+
/**
43+
* Gets the URI of a bucket for stashing Parsoid content. Used both for stashing
44+
* original HTML/Data-Parsoid for normal edits as well as for stashing transforms
45+
*
46+
* @param {string} domain the domain name
47+
* @param {string} title the article title
48+
* @param {number} revision the revision of the article
49+
* @param {string} tid the TID of the content
50+
* @return {HyperSwitch.URI}
51+
*/
52+
_getStashBucketURI(domain, title, revision, tid) {
53+
return new URI([
54+
domain, 'sys', 'key_value', 'parsoid-stash', `${title}:${revision}:${tid}`
55+
]);
56+
}
57+
58+
}
59+
60+
module.exports = (options = {}) => {
61+
const ps = new ParsoidJS(options);
62+
return {
63+
spec,
64+
operations: {
65+
// Revision retrieval per format
66+
getHtml: ps.getFormat.bind(ps, 'html'),
67+
getDataParsoid: ps.getFormat.bind(ps, 'data-parsoid'),
68+
getLintErrors: ps.getLintErrors.bind(ps),
69+
// Transforms
70+
transformHtmlToHtml: ps.makeTransform('html', 'html'),
71+
transformHtmlToWikitext: ps.makeTransform('html', 'wikitext'),
72+
transformWikitextToHtml: ps.makeTransform('wikitext', 'html'),
73+
transformWikitextToLint: ps.makeTransform('wikitext', 'lint'),
74+
transformChangesToWikitext: ps.makeTransform('changes', 'wikitext')
75+
},
76+
// Dynamic resource dependencies, specific to implementation
77+
resources: [
78+
{
79+
uri: '/{domain}/sys/key_value/parsoid',
80+
headers: {
81+
'content-type': 'application/json'
82+
},
83+
body: {
84+
valueType: 'blob'
85+
}
86+
},
87+
{
88+
uri: '/{domain}/sys/key_value/parsoid-stash',
89+
headers: {
90+
'content-type': 'application/json'
91+
},
92+
body: {
93+
valueType: 'blob',
94+
default_time_to_live: options.grace_ttl || 86400
95+
}
96+
}
97+
]
98+
};
99+
};

0 commit comments

Comments
 (0)