Skip to content
Prev Previous commit
Next Next commit
Parsoid: Add the variant proxy
The proxy allows directing requests to either variant. It loads both
variants' modules internally and uses their operations to complete
requests. It is designed in such a way so as to allow an easy transition
between fully using JS to fully using PHP with no config changes. When
first introduced, its defaults emulate the JS-only scenario. Once the
switch is fully achieved, then simply changing `sys/parsoid.js` for
`sys/parsoid-php.js` in `projects/sys/default.wmf.yaml` with no config
change results in having a fully-functional Parsoid/PHP module. The
proxy can. thus, function properly with only one of variant modules
loaded and configured.

In order to support the transition period, the proxy has three modes of
operation: single, mirror and split. In single mode, only one variant is
used, defined by the `default_variant` configuration value, defaulting
to `js`. This allows us to start using the proxy with no config changes.
In the final stages of the transition (before we remove the proxy), it
can be changed to `php` to only use the PHP variant. The mirror mode is
used to asynchronously mirror traffic to the PHP variant. Requests are
issued to both variants, but only the JS one is returned. The amount of
traffic to be mirrored can be tuned with the `percentage` configuration
parameter. The imporant caveat here is that only requests for
`/page/{format}` end points are mirrored - we cannot do so reliably for
transforms since they rely on stashed content, which is likely not to be
available for the PHP variant. Furthermore, when the proxy is configured
in mirror mode, dependency update events are emitted only for the JS
variant, so as to avoid duplicates. Finally, the split mode is used to
split the traffic between the two variants based on the request domain.
If one of the patterns given in the `pattern` configuration parameter is
matched, then the variant not defined in `default_variant` is used,
otherwise the default one is used. This mode supports the second stage
of the transition, where JS will be authoritative for the majority of
domains, while we will be slowly moving projects one by one (or group by
group) over to using Parsoid/PHP.

Apart from these modes, the proxy also supports clients directly telling
it which variant to use. If the incoming request has the
`PARSOID_VARIANT` cookie or the `X-Parsoid-Variant` header set, then the
request is sent directly to that variant regardless of the proxy's mode.
When deciding where to send the request, the proxy gives precedence to
the header in case both are set.

Bug: T230791
  • Loading branch information
Marko Obrovac committed Oct 16, 2019
commit eec926eabc5cf5d1f4978e09192ae1bc7b2e1cac
5 changes: 4 additions & 1 deletion lib/parsoid.js
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,10 @@ class ParsoidService {
}
})
.then(() => {
const dependencyUpdate = _dependenciesUpdate(hyper, req, newContent);
let dependencyUpdate = P.resolve();
if (!this.options.skip_updates) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unrelated I believe, but huge kudos for finding this :)

dependencyUpdate = _dependenciesUpdate(hyper, req, newContent);
}
if (mwUtil.isNoCacheRequest(req)) {
// Finish background updates before returning
return dependencyUpdate.thenReturn(res);
Expand Down
4 changes: 3 additions & 1 deletion projects/sys/default.wmf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ paths:
- path: sys/page_save.js
/parsoid:
x-modules:
- path: sys/parsoid-js.js
- path: sys/parsoid.js
options:
host: '{{options.parsoid.host}}'
php_host: '{{options.parsoid.php_host}}'
response_cache_control: '{{options.purged_cache_control}}'
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
proxy: '{{options.parsoid.proxy}}'
# A list of pages that we don't currently want to re-render on
# each edit. Most of these are huge bot-edited pages, which are
# rarely viewed in any case.
Expand Down
204 changes: 204 additions & 0 deletions sys/parsoid.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
'use strict';

const P = require('bluebird');
const HyperSwitch = require('hyperswitch');

const mwUtil = require('../lib/mwUtil');

const HTTPError = HyperSwitch.HTTPError;
const spec = HyperSwitch.utils.loadSpec(`${__dirname}/parsoid.yaml`);

const OPERATIONS = [
'getHtml',
'getDataParsoid',
'getLintErrors',
'transformHtmlToHtml',
'transformHtmlToWikitext',
'transformWikitextToHtml',
'transformWikitextToLint',
'transformChangesToWikitext'
];

const invert = (v) => v === 'js' ? 'php' : 'js';

class ParsoidProxy {

constructor(opts = {}) {
const modOpts = this._initOpts(opts);
const jsOpts = Object.assign({}, modOpts);
const phpOpts = Object.assign({}, modOpts);
delete jsOpts.php_host;
phpOpts.host = phpOpts.php_host;
delete phpOpts.php_host;
this._initMods(jsOpts, phpOpts);
}

_initOpts(opts) {
const retOpts = Object.assign({}, opts);
retOpts.host = retOpts.host || retOpts.parsoidHost;
if (!retOpts.host && !retOpts.php_host) {
throw new Error('Parsoid proxy: no host option specified!');
}
this.options = retOpts.proxy || {};
// possible values are 'js' and 'php'
this.default_variant = this.options.default_variant || 'js';
if (!['js', 'php'].includes(this.default_variant)) {
throw new Error('Parsoid proxy: valid variants are js and php!');
}
// possible values are 'single', 'mirror' and 'split'
this.mode = this.options.mode || 'single';
if (!['single', 'mirror', 'split'].includes(this.mode)) {
throw new Error('Parsoid proxy: valid modes are single, mirror and split!');
}
this.percentage = parseFloat(this.options.percentage || 0);
if (isNaN(this.percentage) || this.percentage < 0 || this.percentage > 100) {
throw new Error('Parsoid proxy: percentage must a number between 0 and 100!');
}
if (this.percentage === 0 && this.mode === 'mirror') {
// a special case of mirror mode with 0% is in fact the single mode
this.mode = 'single';
}
this.splitRegex = mwUtil.constructRegex(this.options.pattern);
if (!this.splitRegex && this.mode === 'split') {
// split mode with no pattern is single mode
this.mode = 'single';
this.splitRegex = /^$/;
} else if (this.mode !== 'split') {
this.splitRegex = /^$/;
}
this.resources = [];
delete retOpts.parsoidHost;
delete retOpts.proxy;
return retOpts;
}

_initMods(jsOpts, phpOpts) {
if (!phpOpts.host) {
if (this.mode !== 'single') {
// php_host was not provided but the config expects
// both modules to be functional, so error out
throw new Error('Parsoid proxy: expected both host and php_host options!');
}
if (this.default_variant === 'php') {
phpOpts.host = jsOpts.host;
delete jsOpts.host;
}
}
if (this.mode === 'mirror') {
if (this.default_variant === 'php') {
throw new Error('Parsoid proxy: when mirroring, only js can be the default variant!');
}
// js is the default, so don't let php issue dependency update events
phpOpts.skip_updates = true;
}
this.mods = {
js: this._addMod('js', jsOpts),
php: this._addMod('php', phpOpts)
};
}

_backendNotSupported() {
throw new HTTPError({
status: 400,
body: {
type: 'bad_request',
description: 'Parsoid variant not configured!'
}
});
}

_addMod(variant, opts) {
if (opts.host) {
const mod = require(`./parsoid-${variant}.js`)(opts);
// we are interested only in the operations and resources
this.resources = this.resources.concat(mod.resources);
return mod.operations;
}
// return operations that error out if no host is specified
const ret = {};
OPERATIONS.forEach((o) => {
ret[o] = this._backendNotSupported;
});
return ret;
}

_getStickyVariant(hyper, req) {
let variant = hyper._rootReq.headers['x-parsoid-variant'] ||
req.headers['x-parsoid-variant'];
if (!variant && hyper._rootReq.headers.cookie) {
const match = /parsoid_variant=([^;]+)/i.exec(hyper._rootReq.headers.cookie);
if (match) {
variant = match[1];
}
}
if (!variant) {
return undefined;
}
variant = variant.toLowerCase();
if (!['js', 'php'].includes(variant)) {
throw new HTTPError({
status: 400,
body: {
type: 'bad_request',
description: `Parsoid variant ${variant} not configured!`
}
});
}
return variant;
}

_req(variant, operation, hyper, req, setHdr = true) {
if (setHdr) {
req.headers = req.headers || {};
req.headers['x-parsoid-variant'] = variant;
}
return this.mods[variant][operation](hyper, req)
.then((res) => {
res.headers = res.headers || {};
res.headers['x-parsoid-variant'] = variant;
return P.resolve(res);
});
}

doRequest(operation, hyper, req) {
let variant = this._getStickyVariant(hyper, req);
if (variant) {
// the variant has been set explicitly by the client, honour it
return this._req(variant, operation, hyper, req);
}
variant = this.default_variant;
// mirror mode works only for getFormat, since for mirroring
// tranforms we would need to be sure we have the php output
// stashed
if (this.mode === 'mirror' && !/transform/.test(operation)) {
if (Math.round(Math.random() * 100) <= this.percentage) {
// issue an async request to the second variant and
// don't wait for the return value
this._req(invert(variant), operation, hyper, req, false)
.catch((e) => hyper.logger.log(`info/parsoidproxy/${invert(variant)}`, e));
}
}
// we can now safely check simply where to direct the request using
// splitRegex because it won't match anything for any mode other than split
variant = this.splitRegex.test(req.params.domain) ? invert(variant) : variant;
return this._req(variant, operation, hyper, req);
}

getOperations() {
const ret = {};
OPERATIONS.forEach((o) => {
ret[o] = this.doRequest.bind(this, o);
});
return ret;
}

}

module.exports = (options = {}) => {
const ps = new ParsoidProxy(options);
return {
spec,
operations: ps.getOperations(),
resources: ps.resources
};
};