Skip to content

Commit 6fc85aa

Browse files
committed
Preprocess: Fix filenames that contain special characters
1 parent 41eb3d8 commit 6fc85aa

File tree

3 files changed

+95
-40
lines changed

3 files changed

+95
-40
lines changed

preprocess.py

Lines changed: 87 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
import fnmatch
2121
import re
2222
import os
23+
import sys
24+
import shutil
25+
import urllib.parse
26+
from xml_utils import xml_escape, xml_unescape
2327

2428
# copy the source tree
2529
os.system('rm -rf output/reference')
@@ -66,18 +70,80 @@
6670
# remove what's left
6771
os.system('rm -r '+ path)
6872

69-
# find all html and css files
73+
74+
# find files that need to be renamed
75+
files_rename_qs = []
76+
files_rename_quot = []
77+
files_loader = []
78+
for root, dirnames, filenames in os.walk('output/reference/'):
79+
for filename in fnmatch.filter(filenames, '*[?]*'):
80+
files_rename_qs.append((root, filename))
81+
for filename in fnmatch.filter(filenames, '*"*'):
82+
files_rename_quot.append((root, filename))
83+
for filename in fnmatch.filter(filenames, 'load.php[?]*'):
84+
files_loader.append((root, filename))
85+
86+
for root,fn in files_loader:
87+
files_rename_qs.remove((root,fn))
88+
89+
# strip query strings from filenames to support Windows filesystems
90+
rename_map = []
91+
def rename_file(root, fn, new_fn):
92+
path = os.path.join(root,fn)
93+
new_path = os.path.join(root,new_fn)
94+
shutil.move(path, new_path)
95+
rename_map.append((fn, new_fn))
96+
97+
for root,fn in files_rename_qs:
98+
rename_file(root, fn, re.sub('\?.*', '', fn))
99+
for root,fn in files_rename_quot:
100+
rename_file(root, fn, re.sub('"', '_q_', fn))
101+
102+
# map loader names to more recognizable names
103+
for root,fn in files_loader:
104+
if re.search("modules=site&only=scripts", fn):
105+
new_fn = "site_scripts.js"
106+
elif re.search("modules=site&only=styles", fn):
107+
new_fn = "site_modules.css"
108+
elif re.search("modules=skins.*&only=scripts", fn):
109+
new_fn = "skin_scripts.js"
110+
elif re.search("modules=startup&only=scripts", fn):
111+
new_fn = "startup_scripts.js"
112+
elif re.search("modules=.*ext.*&only=styles", fn):
113+
new_fn = "ext.css"
114+
else:
115+
print("Loader file " + fn + " does not match any known files")
116+
sys.exit(1)
117+
118+
rename_file(root, fn, new_fn)
119+
120+
# find files that need to be preprocessed
70121
html_files = []
71-
css_files = []
72122
for root, dirnames, filenames in os.walk('output/reference/'):
73123
for filename in fnmatch.filter(filenames, '*.html'):
74124
html_files.append(os.path.join(root, filename))
75-
for filename in fnmatch.filter(filenames, '*.css'):
76-
css_files.append(os.path.join(root, filename))
77-
78125

79126
#temporary fix
80-
r3 = re.compile('<style[^<]*?<[^<]*?MediaWiki:Geshi\.css[^<]*?<\/style>', re.MULTILINE)
127+
r1 = re.compile('<style[^<]*?<[^<]*?MediaWiki:Geshi\.css[^<]*?<\/style>', re.MULTILINE)
128+
129+
# fix links to files in rename_map
130+
rlink = re.compile('((?:src|href)=")([^"]*)(")')
131+
132+
def rlink_fix(match):
133+
pre = match.group(1)
134+
target = match.group(2)
135+
post = match.group(3)
136+
137+
target = xml_unescape(target)
138+
target = urllib.parse.unquote(target)
139+
for fn,new_fn in rename_map:
140+
target = target.replace(fn, new_fn)
141+
target = target.replace('../../upload.cppreference.com/mwiki/','../common/')
142+
target = target.replace('../mwiki/','../common/')
143+
target = re.sub('(\.php|\.css)\?.*', '\\1', target)
144+
target = urllib.parse.quote(target)
145+
target = xml_escape(target)
146+
return pre + target + post
81147

82148
# clean the html files
83149
for fn in html_files:
@@ -86,36 +152,41 @@
86152
f.close()
87153

88154
text = r1.sub('', text);
89-
text = r2.sub('', text);
90-
text = r3.sub('', text);
155+
text = rlink.sub(rlink_fix, text)
91156

92157
f = open(fn, "w")
93158
f.write(text)
94159
f.close()
95160

96161
tmpfile = fn + '.tmp';
97-
os.system('xsltproc --novalid --html --encoding UTF-8 preprocess.xsl "' + fn + '" > "' + tmpfile + '"')
162+
ret = os.system('xsltproc --novalid --html --encoding UTF-8 preprocess.xsl "' + fn + '" > "' + tmpfile + '"')
163+
if ret != 0:
164+
print("FAIL: " + fn)
165+
continue
98166
os.system('mv "' + tmpfile + '" "' + fn + '"')
99167

100-
# append css modifications to the css files
168+
# append css modifications
101169

102170
f = open("preprocess-css.css", "r")
103171
css_app = f.read()
104172
f.close()
173+
f = open("output/reference/common/site_modules.css", "a")
174+
f.write(css_app)
175+
f.close()
105176

106-
for fn in css_files:
177+
# fix css files
178+
179+
for fn in [ "output/reference/common/site_modules.css",
180+
"output/reference/common/ext.css"]:
107181
f = open(fn, "r")
108182
text = f.read()
109183
f.close()
110184

185+
# note that query string is not used in css files
186+
111187
text = text.replace('../DejaVuSansMonoCondensed60.ttf', 'DejaVuSansMonoCondensed60.ttf')
112188
text = text.replace('../DejaVuSansMonoCondensed75.ttf', 'DejaVuSansMonoCondensed75.ttf')
113189

114-
if (re.search('DejaVuSansMonoCondensed60', text)):
115-
# assume this is minified MediaWiki:Common.css
116-
# append the modifications
117-
text += css_app
118-
119190
# QT Help viewer doesn't understand nth-child
120191
text = text.replace('nth-child(1)', 'first-child')
121192

preprocess.xsl

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -86,28 +86,4 @@
8686
<xsl:template match="/html/body/script[contains(@src, 'google-analytics.com/ga.js')]"/>
8787
<xsl:template match="/html/body/script[contains(text(),'pageTracker')]"/>
8888

89-
<!-- update links to resources: -->
90-
<xsl:template match="//@href | //@src">
91-
<xsl:variable name="fixed_url">
92-
<xsl:choose>
93-
<xsl:when test="contains(.,'../../upload.cppreference.com/mwiki/')">
94-
<xsl:value-of select="str:replace(.,'../../upload.cppreference.com/mwiki/','../common/')"/>
95-
</xsl:when>
96-
<xsl:otherwise>
97-
<xsl:value-of select="str:replace(.,'../mwiki/','../common/')"/>
98-
</xsl:otherwise>
99-
</xsl:choose>
100-
</xsl:variable>
101-
<xsl:attribute name="{name()}">
102-
<xsl:choose>
103-
<xsl:when test="contains($fixed_url, '.css?') or contains($fixed_url, '.php?')">
104-
<xsl:copy-of select="substring-before($fixed_url,'?')"/>
105-
</xsl:when>
106-
<xsl:otherwise>
107-
<xsl:copy-of select="$fixed_url"/>
108-
</xsl:otherwise>
109-
</xsl:choose>
110-
</xsl:attribute>
111-
</xsl:template>
112-
11389
</xsl:stylesheet>

xml_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,11 @@
2828

2929
def xml_escape(text):
3030
return "".join(xml_escape_table.get(c,c) for c in text)
31+
32+
def xml_unescape(text):
33+
text = text.replace("&quot;", '"')
34+
text = text.replace("&apos;", "'")
35+
text = text.replace("&gt;", ">")
36+
text = text.replace("&lt;", "<")
37+
text = text.replace("&amp;", "&")
38+
return text

0 commit comments

Comments
 (0)