Skip to content
Merged
Prev Previous commit
Next Next commit
Parsoid: Create the modules for both variants
Using the Parsoid class, we create two new modules for the JS and PHP
variant, respectively. Each module declares a new class that inherits
from Parsoid, but sets up their methods for making requests to Parsoid
as well as methods for obtaining the bucket URIs to use.

Bug: T230791
  • Loading branch information
Marko Obrovac committed Oct 16, 2019
commit 56cbde4aa05c708d146b02d20c90aa8ce15e678f
88 changes: 30 additions & 58 deletions lib/parsoid.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ const URI = HyperSwitch.URI;
const HTTPError = HyperSwitch.HTTPError;

const uuidv1 = require('uuid/v1');
const uuidUtils = require('../lib/uuidUtils');
const uuidUtils = require('./uuidUtils');

const mwUtil = require('../lib/mwUtil');
const mwUtil = require('./mwUtil');

// Temporary work-around for Parsoid issue
// https://phabricator.wikimedia.org/T93715
Expand Down Expand Up @@ -104,12 +104,15 @@ class ParsoidService {

_initOpts(opts = {}) {
this.options = opts;
this.parsoidHost = opts.parsoidHost;
this.parsoidUri = opts.host || opts.parsoidHost;
this.options.stash_ratelimit = opts.stash_ratelimit || 5;
this.options.grace_ttl = opts.grace_ttl || 86400;
this._blacklist = compileReRenderBlacklist(opts.rerenderBlacklist);
if (!opts.parsoidHost) {
throw new Error('Parsoid module: the option parsoidHost must be provided!');
if (!this.parsoidUri) {
throw new Error('Parsoid module: the option host must be provided!');
}
// remove the trailing slash, if any
if (this.parsoidUri.slice(-1) === '/') {
this.parsoidUri = this.parsoidUri.slice(0, -1);
}
}

Expand Down Expand Up @@ -142,34 +145,6 @@ class ParsoidService {
}
}

/**
* Get the URI of a bucket for the latest Parsoid content.
* @param {string} domain the domain name.
* @param {string} title the article title.
* @return {HyperSwitch.URI}
*/
getLatestBucketURI(domain, title) {
return new URI([
domain, 'sys', 'key_value', 'parsoid', title
]);
}

/**
* Get the URI of a bucket for stashing Parsoid content. Used both for stashing
* original HTML/Data-Parsoid for normal edits as well as for stashing transforms.
*
* @param {string} domain the domain name.
* @param {string} title the article title.
* @param {number} revision the revision of the article.
* @param {string} tid the TID of the content.
* @return {HyperSwitch.URI}
*/
getStashBucketURI(domain, title, revision, tid) {
return new URI([
domain, 'sys', 'key_value', 'parsoid-stash', `${title}:${revision}:${tid}`
]);
}

/**
* Get full content from the stash bucket.
* @param {HyperSwitch} hyper the hyper object to route requests
Expand All @@ -183,7 +158,7 @@ class ParsoidService {
*/
_getStashedContent(hyper, domain, title, revision, tid) {
return hyper.get({
uri: this.getStashBucketURI(domain, title, revision, tid)
uri: this._getStashBucketURI(domain, title, revision, tid)
})
.then((res) => {
res = res.body;
Expand All @@ -198,7 +173,7 @@ class ParsoidService {
const htmlResponse = parsoidResp.body.html;
const etag = mwUtil.parseETag(parsoidResp.headers.etag);
return hyper.put({
uri: this.getStashBucketURI(rp.domain, rp.title, etag.rev, etag.tid),
uri: this._getStashBucketURI(rp.domain, rp.title, etag.rev, etag.tid),
// Note. The headers we are storing here are for the whole pagebundle response.
// The individual components of the pagebundle contain their own headers that
// which are used to generate actual responses.
Expand Down Expand Up @@ -230,7 +205,7 @@ class ParsoidService {
saveParsoidResultToLatest(hyper, domain, title, parsoidResp) {
const dataParsoidResponse = parsoidResp.body['data-parsoid'];
const htmlResponse = parsoidResp.body.html;
return hyper.get({ uri: this.getLatestBucketURI(domain, title) })
return hyper.get({ uri: this._getLatestBucketURI(domain, title) })
.then((existingRes) => {
// TODO: This is a race condition and we're doing a write after read
// in a distributed concurrent environment. For revisions this should
Expand All @@ -244,7 +219,7 @@ class ParsoidService {
return existingRes;
})
.catch({ status: 404 }, { status: 412 }, () => hyper.put({
uri: this.getLatestBucketURI(domain, title),
uri: this._getLatestBucketURI(domain, title),
// Note. The headers we are storing here are for the whole pagebundle response.
// The individual components of the pagebundle contain their own headers that
// which are used to generate actual responses.
Expand All @@ -270,7 +245,7 @@ class ParsoidService {
const etag = mwUtil.makeETag(rp.revision, tid, 'stash');
const wtType = req.original && req.original.headers['content-type'] || 'text/plain';
return transformPromise.then((original) => hyper.put({
uri: this.getStashBucketURI(rp.domain, rp.title, rp.revision, tid),
uri: this._getStashBucketURI(rp.domain, rp.title, rp.revision, tid),
headers: {
'x-store-etag': etag,
'content-type': 'application/octet-stream',
Expand Down Expand Up @@ -310,9 +285,9 @@ class ParsoidService {
*/
_getContentWithFallback(hyper, domain, title, revision, tid) {
if (!revision && !tid) {
return hyper.get({ uri: this.getLatestBucketURI(domain, title) });
return hyper.get({ uri: this._getLatestBucketURI(domain, title) });
} else if (!tid) {
return hyper.get({ uri: this.getLatestBucketURI(domain, title) })
return hyper.get({ uri: this._getLatestBucketURI(domain, title) })
.then((res) => {
const resEtag = mwUtil.parseETag(res.headers.etag);
if (revision !== resEtag.rev) {
Expand All @@ -322,10 +297,10 @@ class ParsoidService {
});
} else {
return hyper.get({
uri: this.getStashBucketURI(domain, title, revision, tid)
uri: this._getStashBucketURI(domain, title, revision, tid)
})
.catch({ status: 404 }, () =>
hyper.get({ uri: this.getLatestBucketURI(domain, title) })
hyper.get({ uri: this._getLatestBucketURI(domain, title) })
.then((res) => {
const resEtag = mwUtil.parseETag(res.headers.etag);
if (revision !== resEtag.rev || tid !== resEtag.tid) {
Expand All @@ -339,12 +314,10 @@ class ParsoidService {

_getPageBundleFromParsoid(hyper, req) {
const rp = req.params;
const parsoidURI = `${this.parsoidHost}/${rp.domain}/v3/page/pagebundle/` +
`${encodeURIComponent(rp.title)}/${rp.revision}`;
return hyper.get({
uri: new URI(parsoidURI),
headers: req.headers
});
return hyper.get(this._getParsoidReq(
req,
`page/pagebundle/${encodeURIComponent(rp.title)}/${rp.revision}`
));
}

/**
Expand Down Expand Up @@ -654,18 +627,18 @@ class ParsoidService {
parsoidExtraPath = `/${parsoidExtraPath}`;
}

const parsoidReq = {
uri: `${this.parsoidHost}/${rp.domain}/v3/transform/` +
`${parsoidFrom}/to/${parsoidTo}${parsoidExtraPath}`,
headers: {
const parsoidReq = this._getParsoidReq(
req,
`transform/${parsoidFrom}/to/${parsoidTo}${parsoidExtraPath}`,
{
'content-type': 'application/json',
'user-agent': req['user-agent'],
'content-language': req.headers['content-language'],
accept: req.headers.accept,
'accept-language': req.headers['accept-language']
},
body: req.body
};
req.body
);

const transformPromise = hyper.post(parsoidReq);
if (req.body.stash && from === 'wikitext' && to === 'html') {
Expand All @@ -677,12 +650,11 @@ class ParsoidService {

getLintErrors(hyper, req) {
const rp = req.params;
let path = `${this.parsoidHost}/${rp.domain}/v3/transform/` +
`wikitext/to/lint/${encodeURIComponent(rp.title)}`;
let path = `transform/wikitext/to/lint/${encodeURIComponent(rp.title)}`;
if (rp.revision) {
path += `/${rp.revision}`;
}
return hyper.post({ uri: path });
return hyper.post(this._getParsoidReq(req, path, {}));
}

makeTransform(from, to) {
Expand Down
4 changes: 2 additions & 2 deletions projects/sys/default.wmf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ paths:
- path: sys/page_save.js
/parsoid:
x-modules:
- path: sys/parsoid.js
- path: sys/parsoid-js.js
options:
parsoidHost: '{{options.parsoid.host}}'
host: '{{options.parsoid.host}}'
response_cache_control: '{{options.purged_cache_control}}'
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
# A list of pages that we don't currently want to re-render on
Expand Down
99 changes: 99 additions & 0 deletions sys/parsoid-js.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
'use strict';

const HyperSwitch = require('hyperswitch');

const Parsoid = require('../lib/parsoid.js');
const spec = HyperSwitch.utils.loadSpec(`${__dirname}/parsoid.yaml`);

const URI = HyperSwitch.URI;

class ParsoidJS extends Parsoid {

/**
* Assembles the request that is to be used to call the Parsoid service
*
* @param {Object} req the original request received by the module
* @param {string} path the path portion of the URI, without the domain or API version
* @param {Object} [headers] the headers to send, defaults to req.headers
* @param {Object} [body] the body of the request, defaults to undefined
* @return {Object} the request object to send
*/
_getParsoidReq(req, path, headers, body) {
return {
uri: new URI(`${this.parsoidUri}/${req.params.domain}/v3/${path}`),
headers: headers || req.headers,
body
};
}

/**
* Gets the URI of a bucket for the latest Parsoid content
*
* @param {string} domain the domain name
* @param {string} title the article title
* @return {HyperSwitch.URI}
*/
_getLatestBucketURI(domain, title) {
return new URI([
domain, 'sys', 'key_value', 'parsoid', title
]);
}

/**
* Gets the URI of a bucket for stashing Parsoid content. Used both for stashing
* original HTML/Data-Parsoid for normal edits as well as for stashing transforms
*
* @param {string} domain the domain name
* @param {string} title the article title
* @param {number} revision the revision of the article
* @param {string} tid the TID of the content
* @return {HyperSwitch.URI}
*/
_getStashBucketURI(domain, title, revision, tid) {
return new URI([
domain, 'sys', 'key_value', 'parsoid-stash', `${title}:${revision}:${tid}`
]);
}

}

module.exports = (options = {}) => {
const ps = new ParsoidJS(options);
return {
spec,
operations: {
// Revision retrieval per format
getHtml: ps.getFormat.bind(ps, 'html'),
getDataParsoid: ps.getFormat.bind(ps, 'data-parsoid'),
getLintErrors: ps.getLintErrors.bind(ps),
// Transforms
transformHtmlToHtml: ps.makeTransform('html', 'html'),
transformHtmlToWikitext: ps.makeTransform('html', 'wikitext'),
transformWikitextToHtml: ps.makeTransform('wikitext', 'html'),
transformWikitextToLint: ps.makeTransform('wikitext', 'lint'),
transformChangesToWikitext: ps.makeTransform('changes', 'wikitext')
},
// Dynamic resource dependencies, specific to implementation
resources: [
{
uri: '/{domain}/sys/key_value/parsoid',
headers: {
'content-type': 'application/json'
},
body: {
valueType: 'blob'
}
},
{
uri: '/{domain}/sys/key_value/parsoid-stash',
headers: {
'content-type': 'application/json'
},
body: {
valueType: 'blob',
default_time_to_live: options.grace_ttl || 86400
}
}
]
};
};
Loading