Skip to content

Commit d055b63

Browse files
committed
Changes for T230791 Have a Mechanism for Storing and Retrieving Parsoid HTML from JS and PHP
1 parent d08ad0e commit d055b63

File tree

8 files changed

+364
-8
lines changed

8 files changed

+364
-8
lines changed

config.example.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@ services:
5252
# XXX Check the base RESTBase URI
5353
baseUriTemplate: "{{'http://{domain}:7231/{domain}/v1'}}"
5454
parsoid:
55-
# XXX Check Parsoid URL!
55+
# XXX Check Parsoid/JS URL!
5656
host: http://localhost:8142
57+
# XXX Check Parsoid/PHP URL!
58+
host_php: http://localhost:8142
5759
table:
5860
backend: sqlite
5961
dbname: db.sqlite3

config.frontend.test.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ default_project: &default_project
99
options: &default_options
1010
parsoid:
1111
host: https://parsoid-beta.wmflabs.org
12+
host_php: https://parsoid-beta.wmflabs.org
1213
grace_ttl: 1000000
1314
action:
1415
apiUriTemplate: "{{'https://{domain}/w/api.php'}}"

config.fullstack.test.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ default_project: &default_project
88
options: &default_options
99
parsoid:
1010
host: https://parsoid-beta.wmflabs.org
11+
host_php: https://parsoid-beta.wmflabs.org
1112
grace_ttl: 1000000
1213
action:
1314
apiUriTemplate: "{{'https://{domain}/w/api.php'}}"

projects/example.yaml

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,226 @@ paths:
5252
x-modules:
5353
- path: sys/page_save.js
5454
/parsoid:
55+
x-modules:
56+
- path: sys/parsoid_proxy.js
57+
options:
58+
parsoidHost: '{{options.parsoid.host}}'
59+
parsoidPhpHost: '{{options.parsoid.host_php}}'
60+
response_cache_control: '{{options.purged_cache_control}}'
61+
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
62+
/parsoidjs:
5563
x-modules:
5664
- path: sys/parsoid.js
5765
options:
5866
parsoidHost: '{{options.parsoid.host}}'
5967
response_cache_control: '{{options.purged_cache_control}}'
6068
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
69+
# A list of pages that we don't currently want to re-render on
70+
# each edit. Most of these are huge bot-edited pages, which are
71+
# rarely viewed in any case.
72+
rerenderBlacklist:
73+
# en wiki
74+
en.wikipedia.org:
75+
- 'User:B-bot/Event_log'
76+
- 'User:DeltaQuad/UAA/Wait'
77+
- 'User:JamesR/AdminStats'
78+
- 'User:Kudpung/Dashboard'
79+
# Various dashboards
80+
- 'User:Breawycker/Wikipedia'
81+
- 'User:Sonia/dashboard'
82+
- 'User:Ocaasi/dashboard'
83+
- 'User:Nolelover'
84+
- 'User:Calmer_Waters'
85+
- '/User%3ARedwolf24\//'
86+
- 'User:Technical_13/dashboard'
87+
- 'Template:Cratstats'
88+
# Cyberbot is creating 90% of null edits
89+
- '/^User:Cyberbot_I\//'
90+
- '/^User:Cyberbot_II\//'
91+
- '/^User:Cyberpower678\//'
92+
- '/^User:Darts170Darts170\//'
93+
- 'صارف:Cyberbot_I/Run/Adminstats'
94+
- 'Defnyddiwr:Cyberbot_I/Run/Adminstats'
95+
- 'User:Pentjuuu!.!/sandbox'
96+
- 'User:AllyD/CSDlog'
97+
- 'User:Peter_I._Vardy/sandbox-13'
98+
- 'User:I_dream_of_horses/CSD_log'
99+
- 'User:MJ180MJ180/sandbox'
100+
- 'Talk:United_States_presidential_election,_2016'
101+
- 'Wikipedia:Reference_desk/Humanities'
102+
- 'Wikipedia:WikiProject_Deletion_sorting/People'
103+
- 'Wikipedia:WikiProject_Deletion_sorting/United_States_of_America'
104+
- 'Wikipedia:Articles_for_creation/Redirects'
105+
- 'Wikipedia:Administrators%27_noticeboard/Incidents'
106+
# Wikipedia
107+
ca.wikipedia.org:
108+
- 'Usuari:TronaBot/log:Activitat_reversors_per_hores'
109+
ceb.wikipedia.org:
110+
- 'Gumagamit:Lsjbot/Anomalier-PRIVAT'
111+
- 'Gumagamit:Lsjbot/Kartrutor2'
112+
de.wikipedia.org:
113+
- '/The_Big_Bang_Theory\/Staffel/'
114+
- 'Wikipedia:Café'
115+
- 'Wikipedia:Defekte_Weblinks/Bot2015-Problem'
116+
- 'Wikipedia_Diskussion:Hauptseite/Schon_gewusst'
117+
- 'Benutzer:Anglo-Araneophilus/Almigdad_Mojalli'
118+
- 'Benutzer:Wartungsstube/Berlin'
119+
- 'Benutzer:Wartungsstube/Musik'
120+
- 'Benutzer:Wartungsstube/Unternehmen'
121+
- 'Benutzer:Wartungsstube/Schifffahrt'
122+
- 'Benutzer:Verum/ege'
123+
- 'Benutzer:Septembermorgen/Bottabelle/Französische_Kantone_N–Z'
124+
- 'Wikipedia:WikiProjekt_Planen_und_Bauen/Zu_überarbeitende_Artikel'
125+
es.wikipedia.org:
126+
- 'Wikipedia:Café/Archivo/Miscelánea/Actual'
127+
fr.wikipedia.org:
128+
- 'Utilisateur:ZéroBot/Log/Erreurs'
129+
- 'Utilisateur:SyntaxTerror/Ajouts_du_modèle_Autorité'
130+
- '/^Utilisateur:[\s\S]+[Bb]rouillon'
131+
- 'Discussion_utilisateur:NaggoBot/CommonsDR'
132+
- 'Projet:France/Annonces/Admissibilité'
133+
- '/Wikipédia:Le_saviez-vous_.+/Anecdotes_proposées/'
134+
hy.wikipedia.org:
135+
- "/Մասնակից:Omicroñ\\'R/"
136+
it.wikipedia.org:
137+
- 'Utente:Effems/Sandbox7'
138+
nl.wikipedia.org:
139+
- 'Gebruiker:Eg-T2g/Kladblok'
140+
pt.wikipedia.org:
141+
- 'Wikipédia:Pedidos/Bloqueio'
142+
ru.wikipedia.org:
143+
- 'Википедия:Форум/Технический'
144+
- 'Портал:Герпетология'
145+
sv.wikipedia.org:
146+
- 'Användare:Lsjbot/Anomalier-PRIVAT'
147+
- 'Användare:Lsjbot/Namnkonflikter-PRIVAT'
148+
ur.wikipedia.org:
149+
- 'نام_مقامات_ایل'
150+
- 'نام_مقامات_ڈی'
151+
- 'نام_مقامات_جے'
152+
- 'نام_مقامات_جی'
153+
- 'نام_مقامات_ایچ'
154+
- 'نام_مقامات_ایم'
155+
- 'نام_مقامات_ایس'
156+
zh.wikipedia.org:
157+
- 'Wikipedia:互助客栈/条目探讨'
158+
- 'Draft:日本人工湖列表'
159+
# Wikisource
160+
pl.wikisource.org:
161+
- '/^Wśród_czarnych\//'
162+
# Wikimedia
163+
commons.wikimedia.org:
164+
- '/Commons:Featured_picture_candidates\//'
165+
- 'Commons:Quality_images/Subject/Places/Natural_structures'
166+
- '/Commons:Undeletion_requests\//'
167+
- '/Commons:WikiProject_Aviation\/recent_uploads\//'
168+
- '/^(?:User|Benutzer):/'
169+
/parsoidphp:
170+
x-modules:
171+
- path: sys/parsoid.js
172+
options:
173+
parsoidHost: '{{options.parsoid.host_php}}'
174+
response_cache_control: '{{options.purged_cache_control}}'
175+
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
176+
# A list of pages that we don't currently want to re-render on
177+
# each edit. Most of these are huge bot-edited pages, which are
178+
# rarely viewed in any case.
179+
rerenderBlacklist:
180+
# en wiki
181+
en.wikipedia.org:
182+
- 'User:B-bot/Event_log'
183+
- 'User:DeltaQuad/UAA/Wait'
184+
- 'User:JamesR/AdminStats'
185+
- 'User:Kudpung/Dashboard'
186+
# Various dashboards
187+
- 'User:Breawycker/Wikipedia'
188+
- 'User:Sonia/dashboard'
189+
- 'User:Ocaasi/dashboard'
190+
- 'User:Nolelover'
191+
- 'User:Calmer_Waters'
192+
- '/User%3ARedwolf24\//'
193+
- 'User:Technical_13/dashboard'
194+
- 'Template:Cratstats'
195+
# Cyberbot is creating 90% of null edits
196+
- '/^User:Cyberbot_I\//'
197+
- '/^User:Cyberbot_II\//'
198+
- '/^User:Cyberpower678\//'
199+
- '/^User:Darts170Darts170\//'
200+
- 'صارف:Cyberbot_I/Run/Adminstats'
201+
- 'Defnyddiwr:Cyberbot_I/Run/Adminstats'
202+
- 'User:Pentjuuu!.!/sandbox'
203+
- 'User:AllyD/CSDlog'
204+
- 'User:Peter_I._Vardy/sandbox-13'
205+
- 'User:I_dream_of_horses/CSD_log'
206+
- 'User:MJ180MJ180/sandbox'
207+
- 'Talk:United_States_presidential_election,_2016'
208+
- 'Wikipedia:Reference_desk/Humanities'
209+
- 'Wikipedia:WikiProject_Deletion_sorting/People'
210+
- 'Wikipedia:WikiProject_Deletion_sorting/United_States_of_America'
211+
- 'Wikipedia:Articles_for_creation/Redirects'
212+
- 'Wikipedia:Administrators%27_noticeboard/Incidents'
213+
# Wikipedia
214+
ca.wikipedia.org:
215+
- 'Usuari:TronaBot/log:Activitat_reversors_per_hores'
216+
ceb.wikipedia.org:
217+
- 'Gumagamit:Lsjbot/Anomalier-PRIVAT'
218+
- 'Gumagamit:Lsjbot/Kartrutor2'
219+
de.wikipedia.org:
220+
- '/The_Big_Bang_Theory\/Staffel/'
221+
- 'Wikipedia:Café'
222+
- 'Wikipedia:Defekte_Weblinks/Bot2015-Problem'
223+
- 'Wikipedia_Diskussion:Hauptseite/Schon_gewusst'
224+
- 'Benutzer:Anglo-Araneophilus/Almigdad_Mojalli'
225+
- 'Benutzer:Wartungsstube/Berlin'
226+
- 'Benutzer:Wartungsstube/Musik'
227+
- 'Benutzer:Wartungsstube/Unternehmen'
228+
- 'Benutzer:Wartungsstube/Schifffahrt'
229+
- 'Benutzer:Verum/ege'
230+
- 'Benutzer:Septembermorgen/Bottabelle/Französische_Kantone_N–Z'
231+
- 'Wikipedia:WikiProjekt_Planen_und_Bauen/Zu_überarbeitende_Artikel'
232+
es.wikipedia.org:
233+
- 'Wikipedia:Café/Archivo/Miscelánea/Actual'
234+
fr.wikipedia.org:
235+
- 'Utilisateur:ZéroBot/Log/Erreurs'
236+
- 'Utilisateur:SyntaxTerror/Ajouts_du_modèle_Autorité'
237+
- '/^Utilisateur:[\s\S]+[Bb]rouillon'
238+
- 'Discussion_utilisateur:NaggoBot/CommonsDR'
239+
- 'Projet:France/Annonces/Admissibilité'
240+
- '/Wikipédia:Le_saviez-vous_.+/Anecdotes_proposées/'
241+
hy.wikipedia.org:
242+
- "/Մասնակից:Omicroñ\\'R/"
243+
it.wikipedia.org:
244+
- 'Utente:Effems/Sandbox7'
245+
nl.wikipedia.org:
246+
- 'Gebruiker:Eg-T2g/Kladblok'
247+
pt.wikipedia.org:
248+
- 'Wikipédia:Pedidos/Bloqueio'
249+
ru.wikipedia.org:
250+
- 'Википедия:Форум/Технический'
251+
- 'Портал:Герпетология'
252+
sv.wikipedia.org:
253+
- 'Användare:Lsjbot/Anomalier-PRIVAT'
254+
- 'Användare:Lsjbot/Namnkonflikter-PRIVAT'
255+
ur.wikipedia.org:
256+
- 'نام_مقامات_ایل'
257+
- 'نام_مقامات_ڈی'
258+
- 'نام_مقامات_جے'
259+
- 'نام_مقامات_جی'
260+
- 'نام_مقامات_ایچ'
261+
- 'نام_مقامات_ایم'
262+
- 'نام_مقامات_ایس'
263+
zh.wikipedia.org:
264+
- 'Wikipedia:互助客栈/条目探讨'
265+
- 'Draft:日本人工湖列表'
266+
# Wikisource
267+
pl.wikisource.org:
268+
- '/^Wśród_czarnych\//'
269+
# Wikimedia
270+
commons.wikimedia.org:
271+
- '/Commons:Featured_picture_candidates\//'
272+
- 'Commons:Quality_images/Subject/Places/Natural_structures'
273+
- '/Commons:Undeletion_requests\//'
274+
- '/Commons:WikiProject_Aviation\/recent_uploads\//'
275+
- '/^(?:User|Benutzer):/'
61276
options: '{{options}}'
62277

projects/sys/default.wmf.yaml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@ paths:
1111
- path: sys/page_save.js
1212
/parsoid:
1313
x-modules:
14-
- path: sys/parsoid.js
15-
options:
14+
- path: sys/parsoid_proxy.js
15+
/parsoidjs:
16+
x-modules:
17+
- path: sys/parsoid.js
18+
options: &parsoidopts
1619
parsoidHost: '{{options.parsoid.host}}'
20+
bucketName: 'parsoid'
1721
response_cache_control: '{{options.purged_cache_control}}'
1822
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
1923
# A list of pages that we don't currently want to re-render on
@@ -116,6 +120,13 @@ paths:
116120
- '/Commons:Undeletion_requests\//'
117121
- '/Commons:WikiProject_Aviation\/recent_uploads\//'
118122
- '/^(?:User|Benutzer):/'
123+
/parsoidphp:
124+
x-modules:
125+
- path: sys/parsoid.js
126+
options:
127+
<<: *parsoidopts
128+
parsoidHost: '{{options.parsoid.host_php}}'
129+
bucketName: 'parsoidphp'
119130
/events:
120131
x-modules:
121132
- path: sys/events.js

sys/parsoid.js

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,11 @@ class ParsoidService {
125125
_initOpts(opts = {}) {
126126
this.options = opts;
127127
this.parsoidHost = opts.parsoidHost;
128+
this.bucketName = opts.bucketName || 'parsoid';
128129
this.options.stash_ratelimit = opts.stash_ratelimit || 5;
129130
this.options.grace_ttl = opts.grace_ttl || 86400;
130131
this._blacklist = compileReRenderBlacklist(opts.rerenderBlacklist);
131-
if (!opts.parsoidHost) {
132+
if (!this.parsoidHost) {
132133
throw new Error('Parsoid module: the option parsoidHost must be provided!');
133134
}
134135
}
@@ -170,7 +171,7 @@ class ParsoidService {
170171
*/
171172
getLatestBucketURI(domain, title) {
172173
return new URI([
173-
domain, 'sys', 'key_value', 'parsoid', title
174+
domain, 'sys', 'key_value', this.bucketName, title
174175
]);
175176
}
176177

@@ -186,7 +187,7 @@ class ParsoidService {
186187
*/
187188
getStashBucketURI(domain, title, revision, tid) {
188189
return new URI([
189-
domain, 'sys', 'key_value', 'parsoid-stash', `${title}:${revision}:${tid}`
190+
domain, 'sys', 'key_value', `${this.bucketName}-stash`, `${title}:${revision}:${tid}`
190191
]);
191192
}
192193

@@ -799,7 +800,7 @@ module.exports = (options) => {
799800
// Dynamic resource dependencies, specific to implementation
800801
resources: [
801802
{
802-
uri: '/{domain}/sys/key_value/parsoid',
803+
uri: `/{domain}/sys/key_value/${ps.bucketName}`,
803804
headers: {
804805
'content-type': 'application/json'
805806
},
@@ -808,7 +809,7 @@ module.exports = (options) => {
808809
}
809810
},
810811
{
811-
uri: '/{domain}/sys/key_value/parsoid-stash',
812+
uri: `/{domain}/sys/key_value/${ps.bucketName}-stash`,
812813
headers: {
813814
'content-type': 'application/json'
814815
},

sys/parsoid_proxy.js

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
'use strict';
2+
3+
/*
4+
* Simple proxy to route requests to the client-requested
5+
* Parsoid variant (JS or PHP) during the transition
6+
* period. Parsoid/JS is being phased out and replaced by
7+
* Parsoid/PHP.
8+
*/
9+
10+
const HyperSwitch = require('hyperswitch');
11+
const URI = HyperSwitch.URI;
12+
const mwUtil = require('../lib/mwUtil');
13+
const VARIANT_HDR_NAME = 'x-parsoid-variant';
14+
15+
module.exports = () => {
16+
return {
17+
spec: {
18+
paths: {
19+
'/{+path}': {
20+
all: {
21+
operationId: 'proxy_parsoid_variant'
22+
}
23+
}
24+
}
25+
},
26+
operations: {
27+
proxy_parsoid_variant: (hyper, req) => {
28+
const rootReqHeaders = hyper._rootReq.headers || {};
29+
if (!rootReqHeaders[VARIANT_HDR_NAME]) {
30+
rootReqHeaders[VARIANT_HDR_NAME] = 'JS';
31+
}
32+
const isPhpVariant = /PHP/i.test(rootReqHeaders[VARIANT_HDR_NAME]);
33+
34+
return hyper.request({
35+
method: req.method,
36+
uri: new URI(req.uri.toString().replace('/parsoid/',
37+
isPhpVariant ? '/parsoidphp/' : '/parsoidjs/')),
38+
headers: req.headers,
39+
body: req.body,
40+
query: req.query,
41+
params: req.params
42+
})
43+
.then((res) => {
44+
if (res) {
45+
res.headers = res.headers || {};
46+
res.headers[VARIANT_HDR_NAME] = rootReqHeaders[VARIANT_HDR_NAME];
47+
mwUtil.addVaryHeader(res, VARIANT_HDR_NAME);
48+
}
49+
return res;
50+
});
51+
}
52+
}
53+
};
54+
};

0 commit comments

Comments
 (0)