Skip to content

Commit 511f2ed

Browse files
committed
Changes for T230791 Have a Mechanism for Storing and Retrieving Parsoid HTML from JS and PHP
1 parent d08ad0e commit 511f2ed

File tree

8 files changed

+512
-4
lines changed

8 files changed

+512
-4
lines changed

config.example.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@ services:
5252
# XXX Check the base RESTBase URI
5353
baseUriTemplate: "{{'http://{domain}:7231/{domain}/v1'}}"
5454
parsoid:
55-
# XXX Check Parsoid URL!
55+
# XXX Check Parsoid/JS URL!
5656
host: http://localhost:8142
57+
# XXX Check Parsoid/PHP URL!
58+
host_php: http://localhost:8142
5759
table:
5860
backend: sqlite
5961
dbname: db.sqlite3

config.frontend.test.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ default_project: &default_project
99
options: &default_options
1010
parsoid:
1111
host: https://parsoid-beta.wmflabs.org
12+
host_php: https://parsoid-beta.wmflabs.org
1213
grace_ttl: 1000000
1314
action:
1415
apiUriTemplate: "{{'https://{domain}/w/api.php'}}"

config.fullstack.test.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@ default_project: &default_project
66
x-modules:
77
- path: projects/v1/default.wmf.yaml
88
options: &default_options
9+
block_external_reqs: true
910
parsoid:
1011
host: https://parsoid-beta.wmflabs.org
12+
host_php: https://parsoid-beta.wmflabs.org
1113
grace_ttl: 1000000
1214
action:
1315
apiUriTemplate: "{{'https://{domain}/w/api.php'}}"

projects/example.yaml

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,226 @@ paths:
5252
x-modules:
5353
- path: sys/page_save.js
5454
/parsoid:
55+
x-modules:
56+
- path: sys/parsoid_proxy.js
57+
options:
58+
parsoidHost: '{{options.parsoid.host}}'
59+
parsoidPhpHost: '{{options.parsoid.host_php}}'
60+
response_cache_control: '{{options.purged_cache_control}}'
61+
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
62+
/parsoidjs:
5563
x-modules:
5664
- path: sys/parsoid.js
5765
options:
5866
parsoidHost: '{{options.parsoid.host}}'
5967
response_cache_control: '{{options.purged_cache_control}}'
6068
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
69+
# A list of pages that we don't currently want to re-render on
70+
# each edit. Most of these are huge bot-edited pages, which are
71+
# rarely viewed in any case.
72+
rerenderBlacklist:
73+
# en wiki
74+
en.wikipedia.org:
75+
- 'User:B-bot/Event_log'
76+
- 'User:DeltaQuad/UAA/Wait'
77+
- 'User:JamesR/AdminStats'
78+
- 'User:Kudpung/Dashboard'
79+
# Various dashboards
80+
- 'User:Breawycker/Wikipedia'
81+
- 'User:Sonia/dashboard'
82+
- 'User:Ocaasi/dashboard'
83+
- 'User:Nolelover'
84+
- 'User:Calmer_Waters'
85+
- '/User%3ARedwolf24\//'
86+
- 'User:Technical_13/dashboard'
87+
- 'Template:Cratstats'
88+
# Cyberbot is creating 90% of null edits
89+
- '/^User:Cyberbot_I\//'
90+
- '/^User:Cyberbot_II\//'
91+
- '/^User:Cyberpower678\//'
92+
- '/^User:Darts170Darts170\//'
93+
- 'صارف:Cyberbot_I/Run/Adminstats'
94+
- 'Defnyddiwr:Cyberbot_I/Run/Adminstats'
95+
- 'User:Pentjuuu!.!/sandbox'
96+
- 'User:AllyD/CSDlog'
97+
- 'User:Peter_I._Vardy/sandbox-13'
98+
- 'User:I_dream_of_horses/CSD_log'
99+
- 'User:MJ180MJ180/sandbox'
100+
- 'Talk:United_States_presidential_election,_2016'
101+
- 'Wikipedia:Reference_desk/Humanities'
102+
- 'Wikipedia:WikiProject_Deletion_sorting/People'
103+
- 'Wikipedia:WikiProject_Deletion_sorting/United_States_of_America'
104+
- 'Wikipedia:Articles_for_creation/Redirects'
105+
- 'Wikipedia:Administrators%27_noticeboard/Incidents'
106+
# Wikipedia
107+
ca.wikipedia.org:
108+
- 'Usuari:TronaBot/log:Activitat_reversors_per_hores'
109+
ceb.wikipedia.org:
110+
- 'Gumagamit:Lsjbot/Anomalier-PRIVAT'
111+
- 'Gumagamit:Lsjbot/Kartrutor2'
112+
de.wikipedia.org:
113+
- '/The_Big_Bang_Theory\/Staffel/'
114+
- 'Wikipedia:Café'
115+
- 'Wikipedia:Defekte_Weblinks/Bot2015-Problem'
116+
- 'Wikipedia_Diskussion:Hauptseite/Schon_gewusst'
117+
- 'Benutzer:Anglo-Araneophilus/Almigdad_Mojalli'
118+
- 'Benutzer:Wartungsstube/Berlin'
119+
- 'Benutzer:Wartungsstube/Musik'
120+
- 'Benutzer:Wartungsstube/Unternehmen'
121+
- 'Benutzer:Wartungsstube/Schifffahrt'
122+
- 'Benutzer:Verum/ege'
123+
- 'Benutzer:Septembermorgen/Bottabelle/Französische_Kantone_N–Z'
124+
- 'Wikipedia:WikiProjekt_Planen_und_Bauen/Zu_überarbeitende_Artikel'
125+
es.wikipedia.org:
126+
- 'Wikipedia:Café/Archivo/Miscelánea/Actual'
127+
fr.wikipedia.org:
128+
- 'Utilisateur:ZéroBot/Log/Erreurs'
129+
- 'Utilisateur:SyntaxTerror/Ajouts_du_modèle_Autorité'
130+
- '/^Utilisateur:[\s\S]+[Bb]rouillon'
131+
- 'Discussion_utilisateur:NaggoBot/CommonsDR'
132+
- 'Projet:France/Annonces/Admissibilité'
133+
- '/Wikipédia:Le_saviez-vous_.+/Anecdotes_proposées/'
134+
hy.wikipedia.org:
135+
- "/Մասնակից:Omicroñ\\'R/"
136+
it.wikipedia.org:
137+
- 'Utente:Effems/Sandbox7'
138+
nl.wikipedia.org:
139+
- 'Gebruiker:Eg-T2g/Kladblok'
140+
pt.wikipedia.org:
141+
- 'Wikipédia:Pedidos/Bloqueio'
142+
ru.wikipedia.org:
143+
- 'Википедия:Форум/Технический'
144+
- 'Портал:Герпетология'
145+
sv.wikipedia.org:
146+
- 'Användare:Lsjbot/Anomalier-PRIVAT'
147+
- 'Användare:Lsjbot/Namnkonflikter-PRIVAT'
148+
ur.wikipedia.org:
149+
- 'نام_مقامات_ایل'
150+
- 'نام_مقامات_ڈی'
151+
- 'نام_مقامات_جے'
152+
- 'نام_مقامات_جی'
153+
- 'نام_مقامات_ایچ'
154+
- 'نام_مقامات_ایم'
155+
- 'نام_مقامات_ایس'
156+
zh.wikipedia.org:
157+
- 'Wikipedia:互助客栈/条目探讨'
158+
- 'Draft:日本人工湖列表'
159+
# Wikisource
160+
pl.wikisource.org:
161+
- '/^Wśród_czarnych\//'
162+
# Wikimedia
163+
commons.wikimedia.org:
164+
- '/Commons:Featured_picture_candidates\//'
165+
- 'Commons:Quality_images/Subject/Places/Natural_structures'
166+
- '/Commons:Undeletion_requests\//'
167+
- '/Commons:WikiProject_Aviation\/recent_uploads\//'
168+
- '/^(?:User|Benutzer):/'
169+
/parsoidphp:
170+
x-modules:
171+
- path: sys/parsoid.js
172+
options:
173+
parsoidHost: '{{options.parsoid.host_php}}'
174+
response_cache_control: '{{options.purged_cache_control}}'
175+
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
176+
# A list of pages that we don't currently want to re-render on
177+
# each edit. Most of these are huge bot-edited pages, which are
178+
# rarely viewed in any case.
179+
rerenderBlacklist:
180+
# en wiki
181+
en.wikipedia.org:
182+
- 'User:B-bot/Event_log'
183+
- 'User:DeltaQuad/UAA/Wait'
184+
- 'User:JamesR/AdminStats'
185+
- 'User:Kudpung/Dashboard'
186+
# Various dashboards
187+
- 'User:Breawycker/Wikipedia'
188+
- 'User:Sonia/dashboard'
189+
- 'User:Ocaasi/dashboard'
190+
- 'User:Nolelover'
191+
- 'User:Calmer_Waters'
192+
- '/User%3ARedwolf24\//'
193+
- 'User:Technical_13/dashboard'
194+
- 'Template:Cratstats'
195+
# Cyberbot is creating 90% of null edits
196+
- '/^User:Cyberbot_I\//'
197+
- '/^User:Cyberbot_II\//'
198+
- '/^User:Cyberpower678\//'
199+
- '/^User:Darts170Darts170\//'
200+
- 'صارف:Cyberbot_I/Run/Adminstats'
201+
- 'Defnyddiwr:Cyberbot_I/Run/Adminstats'
202+
- 'User:Pentjuuu!.!/sandbox'
203+
- 'User:AllyD/CSDlog'
204+
- 'User:Peter_I._Vardy/sandbox-13'
205+
- 'User:I_dream_of_horses/CSD_log'
206+
- 'User:MJ180MJ180/sandbox'
207+
- 'Talk:United_States_presidential_election,_2016'
208+
- 'Wikipedia:Reference_desk/Humanities'
209+
- 'Wikipedia:WikiProject_Deletion_sorting/People'
210+
- 'Wikipedia:WikiProject_Deletion_sorting/United_States_of_America'
211+
- 'Wikipedia:Articles_for_creation/Redirects'
212+
- 'Wikipedia:Administrators%27_noticeboard/Incidents'
213+
# Wikipedia
214+
ca.wikipedia.org:
215+
- 'Usuari:TronaBot/log:Activitat_reversors_per_hores'
216+
ceb.wikipedia.org:
217+
- 'Gumagamit:Lsjbot/Anomalier-PRIVAT'
218+
- 'Gumagamit:Lsjbot/Kartrutor2'
219+
de.wikipedia.org:
220+
- '/The_Big_Bang_Theory\/Staffel/'
221+
- 'Wikipedia:Café'
222+
- 'Wikipedia:Defekte_Weblinks/Bot2015-Problem'
223+
- 'Wikipedia_Diskussion:Hauptseite/Schon_gewusst'
224+
- 'Benutzer:Anglo-Araneophilus/Almigdad_Mojalli'
225+
- 'Benutzer:Wartungsstube/Berlin'
226+
- 'Benutzer:Wartungsstube/Musik'
227+
- 'Benutzer:Wartungsstube/Unternehmen'
228+
- 'Benutzer:Wartungsstube/Schifffahrt'
229+
- 'Benutzer:Verum/ege'
230+
- 'Benutzer:Septembermorgen/Bottabelle/Französische_Kantone_N–Z'
231+
- 'Wikipedia:WikiProjekt_Planen_und_Bauen/Zu_überarbeitende_Artikel'
232+
es.wikipedia.org:
233+
- 'Wikipedia:Café/Archivo/Miscelánea/Actual'
234+
fr.wikipedia.org:
235+
- 'Utilisateur:ZéroBot/Log/Erreurs'
236+
- 'Utilisateur:SyntaxTerror/Ajouts_du_modèle_Autorité'
237+
- '/^Utilisateur:[\s\S]+[Bb]rouillon'
238+
- 'Discussion_utilisateur:NaggoBot/CommonsDR'
239+
- 'Projet:France/Annonces/Admissibilité'
240+
- '/Wikipédia:Le_saviez-vous_.+/Anecdotes_proposées/'
241+
hy.wikipedia.org:
242+
- "/Մասնակից:Omicroñ\\'R/"
243+
it.wikipedia.org:
244+
- 'Utente:Effems/Sandbox7'
245+
nl.wikipedia.org:
246+
- 'Gebruiker:Eg-T2g/Kladblok'
247+
pt.wikipedia.org:
248+
- 'Wikipédia:Pedidos/Bloqueio'
249+
ru.wikipedia.org:
250+
- 'Википедия:Форум/Технический'
251+
- 'Портал:Герпетология'
252+
sv.wikipedia.org:
253+
- 'Användare:Lsjbot/Anomalier-PRIVAT'
254+
- 'Användare:Lsjbot/Namnkonflikter-PRIVAT'
255+
ur.wikipedia.org:
256+
- 'نام_مقامات_ایل'
257+
- 'نام_مقامات_ڈی'
258+
- 'نام_مقامات_جے'
259+
- 'نام_مقامات_جی'
260+
- 'نام_مقامات_ایچ'
261+
- 'نام_مقامات_ایم'
262+
- 'نام_مقامات_ایس'
263+
zh.wikipedia.org:
264+
- 'Wikipedia:互助客栈/条目探讨'
265+
- 'Draft:日本人工湖列表'
266+
# Wikisource
267+
pl.wikisource.org:
268+
- '/^Wśród_czarnych\//'
269+
# Wikimedia
270+
commons.wikimedia.org:
271+
- '/Commons:Featured_picture_candidates\//'
272+
- 'Commons:Quality_images/Subject/Places/Natural_structures'
273+
- '/Commons:Undeletion_requests\//'
274+
- '/Commons:WikiProject_Aviation\/recent_uploads\//'
275+
- '/^(?:User|Benutzer):/'
61276
options: '{{options}}'
62277

projects/sys/default.wmf.yaml

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,124 @@ paths:
1010
x-modules:
1111
- path: sys/page_save.js
1212
/parsoid:
13+
x-modules:
14+
- path: sys/parsoid_proxy.js
15+
options:
16+
parsoidPhpHost: '{{options.parsoid.host_php}}'
17+
/parsoidjs:
1318
x-modules:
1419
- path: sys/parsoid.js
1520
options:
1621
parsoidHost: '{{options.parsoid.host}}'
22+
bucketName: 'parsoid'
23+
response_cache_control: '{{options.purged_cache_control}}'
24+
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
25+
# A list of pages that we don't currently want to re-render on
26+
# each edit. Most of these are huge bot-edited pages, which are
27+
# rarely viewed in any case.
28+
rerenderBlacklist:
29+
# en wiki
30+
en.wikipedia.org:
31+
- 'User:B-bot/Event_log'
32+
- 'User:DeltaQuad/UAA/Wait'
33+
- 'User:JamesR/AdminStats'
34+
- 'User:Kudpung/Dashboard'
35+
# Various dashboards
36+
- 'User:Breawycker/Wikipedia'
37+
- 'User:Sonia/dashboard'
38+
- 'User:Ocaasi/dashboard'
39+
- 'User:Nolelover'
40+
- 'User:Calmer_Waters'
41+
- '/User%3ARedwolf24\//'
42+
- 'User:Technical_13/dashboard'
43+
- 'Template:Cratstats'
44+
# Cyberbot is creating 90% of null edits
45+
- '/^User:Cyberbot_I\//'
46+
- '/^User:Cyberbot_II\//'
47+
- '/^User:Cyberpower678\//'
48+
- '/^User:Darts170Darts170\//'
49+
- 'صارف:Cyberbot_I/Run/Adminstats'
50+
- 'Defnyddiwr:Cyberbot_I/Run/Adminstats'
51+
- 'User:Pentjuuu!.!/sandbox'
52+
- 'User:AllyD/CSDlog'
53+
- 'User:Peter_I._Vardy/sandbox-13'
54+
- 'User:I_dream_of_horses/CSD_log'
55+
- 'User:MJ180MJ180/sandbox'
56+
- 'Talk:United_States_presidential_election,_2016'
57+
- 'Wikipedia:Reference_desk/Humanities'
58+
- 'Wikipedia:WikiProject_Deletion_sorting/People'
59+
- 'Wikipedia:WikiProject_Deletion_sorting/United_States_of_America'
60+
- 'Wikipedia:Articles_for_creation/Redirects'
61+
- 'Wikipedia:Administrators%27_noticeboard/Incidents'
62+
# Wikipedia
63+
ca.wikipedia.org:
64+
- 'Usuari:TronaBot/log:Activitat_reversors_per_hores'
65+
ceb.wikipedia.org:
66+
- 'Gumagamit:Lsjbot/Anomalier-PRIVAT'
67+
- 'Gumagamit:Lsjbot/Kartrutor2'
68+
de.wikipedia.org:
69+
- '/The_Big_Bang_Theory\/Staffel/'
70+
- 'Wikipedia:Café'
71+
- 'Wikipedia:Defekte_Weblinks/Bot2015-Problem'
72+
- 'Wikipedia_Diskussion:Hauptseite/Schon_gewusst'
73+
- 'Benutzer:Anglo-Araneophilus/Almigdad_Mojalli'
74+
- 'Benutzer:Wartungsstube/Berlin'
75+
- 'Benutzer:Wartungsstube/Musik'
76+
- 'Benutzer:Wartungsstube/Unternehmen'
77+
- 'Benutzer:Wartungsstube/Schifffahrt'
78+
- 'Benutzer:Verum/ege'
79+
- 'Benutzer:Septembermorgen/Bottabelle/Französische_Kantone_N–Z'
80+
- 'Wikipedia:WikiProjekt_Planen_und_Bauen/Zu_überarbeitende_Artikel'
81+
es.wikipedia.org:
82+
- 'Wikipedia:Café/Archivo/Miscelánea/Actual'
83+
fr.wikipedia.org:
84+
- 'Utilisateur:ZéroBot/Log/Erreurs'
85+
- 'Utilisateur:SyntaxTerror/Ajouts_du_modèle_Autorité'
86+
- '/^Utilisateur:[\s\S]+[Bb]rouillon'
87+
- 'Discussion_utilisateur:NaggoBot/CommonsDR'
88+
- 'Projet:France/Annonces/Admissibilité'
89+
- '/Wikipédia:Le_saviez-vous_.+/Anecdotes_proposées/'
90+
hy.wikipedia.org:
91+
- "/Մասնակից:Omicroñ\\'R/"
92+
it.wikipedia.org:
93+
- 'Utente:Effems/Sandbox7'
94+
nl.wikipedia.org:
95+
- 'Gebruiker:Eg-T2g/Kladblok'
96+
pt.wikipedia.org:
97+
- 'Wikipédia:Pedidos/Bloqueio'
98+
ru.wikipedia.org:
99+
- 'Википедия:Форум/Технический'
100+
- 'Портал:Герпетология'
101+
sv.wikipedia.org:
102+
- 'Användare:Lsjbot/Anomalier-PRIVAT'
103+
- 'Användare:Lsjbot/Namnkonflikter-PRIVAT'
104+
ur.wikipedia.org:
105+
- 'نام_مقامات_ایل'
106+
- 'نام_مقامات_ڈی'
107+
- 'نام_مقامات_جے'
108+
- 'نام_مقامات_جی'
109+
- 'نام_مقامات_ایچ'
110+
- 'نام_مقامات_ایم'
111+
- 'نام_مقامات_ایس'
112+
zh.wikipedia.org:
113+
- 'Wikipedia:互助客栈/条目探讨'
114+
- 'Draft:日本人工湖列表'
115+
# Wikisource
116+
pl.wikisource.org:
117+
- '/^Wśród_czarnych\//'
118+
# Wikimedia
119+
commons.wikimedia.org:
120+
- '/Commons:Featured_picture_candidates\//'
121+
- 'Commons:Quality_images/Subject/Places/Natural_structures'
122+
- '/Commons:Undeletion_requests\//'
123+
- '/Commons:WikiProject_Aviation\/recent_uploads\//'
124+
- '/^(?:User|Benutzer):/'
125+
/parsoidphp:
126+
x-modules:
127+
- path: sys/parsoid.js
128+
options:
129+
parsoidHost: '{{options.parsoid.host_php}}'
130+
bucketName: 'parsoidphp'
17131
response_cache_control: '{{options.purged_cache_control}}'
18132
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
19133
# A list of pages that we don't currently want to re-render on

0 commit comments

Comments
 (0)