Skip to content

Commit 899fd03

Browse files
committed
preprocess: Apply relative link fixes directly to attrs
1 parent de36eb5 commit 899fd03

File tree

1 file changed

+10
-14
lines changed

1 file changed

+10
-14
lines changed

preprocess.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -145,22 +145,16 @@ def find_html_files(root):
145145
html_files.append(os.path.join(dir, filename))
146146
return html_files
147147

148-
def rlink_fix(rename_map, match):
149-
pre = match.group(1)
150-
target = match.group(2)
151-
post = match.group(3)
152-
153-
target = xml_unescape(target)
148+
def fix_relative_link(rename_map, target):
154149
target = urllib.parse.unquote(target)
155150
for dir,fn,new_fn in rename_map:
156151
target = target.replace(fn, new_fn)
157152
target = target.replace('../../upload.cppreference.com/mwiki/','../common/')
158153
target = target.replace('../mwiki/','../common/')
159154
target = re.sub('(\.php|\.css)\?.*', '\\1', target)
160155
target = urllib.parse.quote(target)
161-
target = xml_escape(target)
162-
target = target.replace('%23', '#');
163-
return pre + target + post
156+
target = target.replace('%23', '#')
157+
return target
164158

165159
def has_class(el, classes_to_check):
166160
value = el.get('class')
@@ -233,15 +227,17 @@ def preprocess_html_file(root, fn, rename_map):
233227
elif el.text is not None and ('google-analytics.com/ga.js' in el.text or 'pageTracker' in el.text):
234228
el.getparent().remove(el)
235229

230+
# apply changes to links caused by file renames
231+
for el in html.xpath('//*[@src or @href]'):
232+
if el.get('src') is not None:
233+
el.set('src', fix_relative_link(rename_map, el.get('src')))
234+
elif el.get('href') is not None:
235+
el.set('href', fix_relative_link(rename_map, el.get('href')))
236+
236237
for err in parser.error_log:
237238
print("HTML WARN: {0}".format(err))
238239
text = etree.tostring(html, encoding=str, method="html")
239240

240-
# fix links to files in rename_map
241-
rlink = re.compile('((?:src|href)=")([^"]*)(")')
242-
243-
text = rlink.sub(lambda match: rlink_fix(rename_map, match), text)
244-
245241
f = open(fn, "w")
246242
f.write(text)
247243
f.close()

0 commit comments

Comments
 (0)