Skip to content

Commit 81f6d4e

Browse files
author
Marko Obrovac
committed
Parsoid: Add the variant proxy
The proxy allows directing requests to either variant. It loads both variants' modules internally and uses their operations to complete requests. It is designed in such a way so as to allow an easy transition between fully using JS to fully using PHP with no config changes. When first introduced, its defaults emulate the JS-only scenario. Once the switch is fully achieved, then simply changing `sys/parsoid.js` for `sys/parsoid-php.js` in `projects/sys/default.wmf.yaml` with no config change results in having a fully-functional Parsoid/PHP module. The proxy can. thus, function properly with only one of variant modules loaded and configured. In order to support the transition period, the proxy has three modes of operation: single, mirror and split. In single mode, only one variant is used, defined by the `default_variant` configuration value, defaulting to `js`. This allows us to start using the proxy with no config changes. In the final stages of the transition (before we remove the proxy), it can be changed to `php` to only use the PHP variant. The mirror mode is used to asynchronously mirror traffic to the PHP variant. Requests are issued to both variants, but only the JS one is returned. The amount of traffic to be mirrored can be tuned with the `percentage` configuration parameter. The imporant caveat here is that only requests for `/page/{format}` end points are mirrored - we cannot do so reliably for transforms since they rely on stashed content, which is likely not to be available for the PHP variant. Furthermore, when the proxy is configured in mirror mode, dependency update events are emitted only for the JS variant, so as to avoid duplicates. Finally, the split mode is used to split the traffic between the two variants based on the request domain. If one of the patterns given in the `pattern` configuration parameter is matched, then the variant not defined in `default_variant` is used, otherwise the default one is used. This mode supports the second stage of the transition, where JS will be authoritative for the majority of domains, while we will be slowly moving projects one by one (or group by group) over to using Parsoid/PHP. Apart from these modes, the proxy also supports clients directly telling it which variant to use. If the incoming request has the `PARSOID_VARIANT` cookie or the `X-Parsoid-Variant` header set, then the request is sent directly to that variant regardless of the proxy's mode. When deciding where to send the request, the proxy gives precedence to the header in case both are set. Bug: T230791
1 parent af16683 commit 81f6d4e

File tree

3 files changed

+211
-2
lines changed

3 files changed

+211
-2
lines changed

lib/parsoid.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,10 @@ class ParsoidService {
368368
}
369369
})
370370
.then(() => {
371-
const dependencyUpdate = _dependenciesUpdate(hyper, req, newContent);
371+
let dependencyUpdate = P.resolve();
372+
if (!this.options.skip_updates) {
373+
dependencyUpdate = _dependenciesUpdate(hyper, req, newContent);
374+
}
372375
if (mwUtil.isNoCacheRequest(req)) {
373376
// Finish background updates before returning
374377
return dependencyUpdate.thenReturn(res);

projects/sys/default.wmf.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@ paths:
1111
- path: sys/page_save.js
1212
/parsoid:
1313
x-modules:
14-
- path: sys/parsoid-js.js
14+
- path: sys/parsoid.js
1515
options:
1616
host: '{{options.parsoid.host}}'
17+
php_host: '{{options.parsoid.php_host}}'
1718
response_cache_control: '{{options.purged_cache_control}}'
1819
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
20+
proxy: '{{options.parsoid.proxy}}'
1921
# A list of pages that we don't currently want to re-render on
2022
# each edit. Most of these are huge bot-edited pages, which are
2123
# rarely viewed in any case.

sys/parsoid.js

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
'use strict';
2+
3+
const P = require('bluebird');
4+
const HyperSwitch = require('hyperswitch');
5+
6+
const mwUtil = require('../lib/mwUtil');
7+
8+
const HTTPError = HyperSwitch.HTTPError;
9+
const spec = HyperSwitch.utils.loadSpec(`${__dirname}/parsoid.yaml`);
10+
11+
const OPERATIONS = [
12+
'getHtml',
13+
'getDataParsoid',
14+
'getLintErrors',
15+
'transformHtmlToHtml',
16+
'transformHtmlToWikitext',
17+
'transformWikitextToHtml',
18+
'transformWikitextToLint',
19+
'transformChangesToWikitext'
20+
];
21+
22+
const invert = (v) => v === 'js' ? 'php' : 'js';
23+
24+
class ParsoidProxy {
25+
26+
constructor(opts = {}) {
27+
const modOpts = this._initOpts(opts);
28+
const jsOpts = Object.assign({}, modOpts);
29+
const phpOpts = Object.assign({}, modOpts);
30+
delete jsOpts.php_host;
31+
phpOpts.host = phpOpts.php_host;
32+
delete phpOpts.php_host;
33+
this._initMods(jsOpts, phpOpts);
34+
}
35+
36+
_initOpts(opts) {
37+
const retOpts = Object.assign({}, opts);
38+
retOpts.host = retOpts.host || retOpts.parsoidHost;
39+
if (!retOpts.host || !retOpts.php_host) {
40+
throw new Error('Parsoid proxy: no host option specified!');
41+
}
42+
this.options = retOpts.proxy || {};
43+
// possible values are 'js' and 'php'
44+
this.default_variant = this.options.default_variant || 'js';
45+
if (!['js', 'php'].includes(this.default_variant)) {
46+
throw new Error('Parsoid proxy: valid variants are js and php!');
47+
}
48+
// possible values are 'single', 'mirror' and 'split'
49+
this.mode = this.options.mode || 'single';
50+
if (!['single', 'mirror', 'split'].includes(this.mode)) {
51+
throw new Error('Parsoid proxy: valid modes are single, mirror and split!');
52+
}
53+
this.percentage = parseFloat(this.options.percentage) || 0;
54+
if (isNaN(this.percentage) || this.percentage < 0 || this.percentage > 100) {
55+
throw new Error('Parsoid proxy: percentage must a number between 0 and 100!');
56+
}
57+
if (this.percentage === 0 && this.mode === 'mirror') {
58+
// a special case of mirror mode with 0% is in fact the single mode
59+
this.mode = 'single';
60+
}
61+
this.splitRegex = mwUtil.constructRegex(this.options.pattern);
62+
if (!this.splitRegex && this.mode === 'split') {
63+
// split mode with no pattern is single mode
64+
this.mode = 'single';
65+
this.splitRegex = /^$/;
66+
} else if (this.mode !== 'split') {
67+
this.splitRegex = /^$/;
68+
}
69+
this.resources = [];
70+
delete retOpts.parsoidHost;
71+
delete retOpts.proxy;
72+
return retOpts;
73+
}
74+
75+
_initMods(jsOpts, phpOpts) {
76+
if (!phpOpts.host) {
77+
if (this.mode !== 'single') {
78+
// php_host was not provided but the config expects
79+
// both modules to be functional, so error out
80+
throw new Error('Parsoid proxy: expected both host and php_host options!');
81+
}
82+
if (this.default_variant === 'php') {
83+
phpOpts.host = jsOpts.host;
84+
delete jsOpts.host;
85+
}
86+
}
87+
if (this.mode === 'mirror') {
88+
if (this.default_variant === 'php') {
89+
throw new Error('Parsoid proxy: when mirroring, only js can be the default variant!');
90+
}
91+
// js is the default, so don't let php issue dependency update events
92+
phpOpts.skip_updates = true;
93+
}
94+
this.mods = {
95+
js: this._addMod('js', jsOpts),
96+
php: this._addMod('php', phpOpts)
97+
};
98+
}
99+
100+
_backendNotSupported() {
101+
throw new HTTPError({
102+
status: 400,
103+
body: {
104+
type: 'bad_request',
105+
description: 'Parsoid variant not configured!'
106+
}
107+
});
108+
}
109+
110+
_addMod(variant, opts) {
111+
if (opts.host) {
112+
const mod = require(`./parsoid-${variant}.js`)(opts);
113+
// we are interested only in the operations and resources
114+
this.resources = this.resources.concat(mod.resources);
115+
return mod.operations;
116+
}
117+
// return operations that error out if no host is specified
118+
const ret = {};
119+
OPERATIONS.forEach((o) => {
120+
ret[o] = this._backendNotSupported;
121+
});
122+
return ret;
123+
}
124+
125+
_getStickyVariant(hyper, req) {
126+
let variant = hyper._rootReq.headers['x-parsoid-variant'] ||
127+
req.headers['x-parsoid-variant'];
128+
if (!variant && hyper._rootReq.headers.cookie) {
129+
const match = /parsoid_variant=([^;]+)/i.exec(hyper._rootReq.headers.cookie);
130+
if (match) {
131+
variant = match[1];
132+
}
133+
}
134+
if (!variant) {
135+
return undefined;
136+
}
137+
variant = variant.toLowerCase();
138+
if (!['js', 'php'].includes(variant)) {
139+
throw new HTTPError({
140+
status: 400,
141+
body: {
142+
type: 'bad_request',
143+
description: `Parsoid variant ${variant} not configured!`
144+
}
145+
});
146+
}
147+
return variant;
148+
}
149+
150+
_req(variant, operation, hyper, req, setHdr = true) {
151+
if (setHdr) {
152+
req.headers = req.headers || {};
153+
req.headers['x-parsoid-variant'] = variant;
154+
}
155+
return this.mods[variant][operation](hyper, req)
156+
.then((res) => {
157+
res.headers = res.headers || {};
158+
res.headers['x-parsoid-variant'] = variant;
159+
return P.resolve(res);
160+
});
161+
}
162+
163+
doRequest(operation, hyper, req) {
164+
let variant = this._getStickyVariant(hyper, req);
165+
if (variant) {
166+
// the variant has been set explicitly by the client, honour it
167+
return this._req(variant, operation, hyper, req);
168+
}
169+
variant = this.default_variant;
170+
// mirror mode works only for getFormat, since for mirroring
171+
// tranforms we would need to be sure we have the php output
172+
// stashed
173+
if (this.mode === 'mirror' && !/transform/.test(operation)) {
174+
if (Math.round(Math.random() * 100) <= this.percentage) {
175+
// issue an async request to the second variant and
176+
// don't wait for the return value
177+
this._req(invert(variant), operation, hyper, req, false)
178+
.catch((e) => hyper.logger.log(`info/parsoidproxy/${invert(variant)}`, e));
179+
}
180+
}
181+
// we can now safely check simply where to direct the request using
182+
// splitRegex because it won't match anything for any mode other than split
183+
variant = this.splitRegex.test(req.params.domain) ? invert(variant) : variant;
184+
return this._req(variant, operation, hyper, req);
185+
}
186+
187+
getOperations() {
188+
const ret = {};
189+
OPERATIONS.forEach((o) => {
190+
ret[o] = this.doRequest.bind(this, o);
191+
});
192+
return ret;
193+
}
194+
195+
}
196+
197+
module.exports = (options = {}) => {
198+
const ps = new ParsoidProxy(options);
199+
return {
200+
spec,
201+
operations: ps.getOperations(),
202+
resources: ps.resources
203+
};
204+
};

0 commit comments

Comments
 (0)