Skip to content

Commit 8b1e5fa

Browse files
authored
Merge pull request p12tic#65 from p12tic/concurrent-preprocess
Preprocess: Run HTML preprocessing concurrently
2 parents 7d59aee + 5c427aa commit 8b1e5fa

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

commands/preprocess.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
# along with this program. If not, see http://www.gnu.org/licenses/.
1919

2020
import fnmatch
21+
import io
2122
from lxml import etree
2223
import re
2324
import os
@@ -313,6 +314,7 @@ def remove_ads(html):
313314
def preprocess_html_file(root, fn, rename_map):
314315
parser = etree.HTMLParser()
315316
html = etree.parse(fn, parser)
317+
output = io.StringIO()
316318

317319
# remove external links to unused resources
318320
for el in html.xpath('/html/head/link'):
@@ -332,9 +334,10 @@ def preprocess_html_file(root, fn, rename_map):
332334
el.set('href', transform_link(rename_map, el.get('href'), fn, root))
333335

334336
for err in parser.error_log:
335-
print("HTML WARN: {0}".format(err))
337+
print("HTML WARN: {0}".format(err), file=output)
336338

337339
html.write(fn, encoding='utf-8', method='html')
340+
return output.getvalue()
338341

339342
def preprocess_css_file(fn):
340343
f = open(fn, "r", encoding='utf-8')

preprocess.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from commands import preprocess
2121
import argparse
22+
import concurrent.futures
2223
import os
2324
import shutil
2425

@@ -41,6 +42,18 @@ def main():
4142
preprocess.rename_files(rename_map)
4243

4344
# clean the html files
45+
file_list = preprocess.find_html_files(root)
46+
47+
with concurrent.futures.ProcessPoolExecutor() as executor:
48+
futures = [executor.submit(preprocess.preprocess_html_file, root, fn,
49+
rename_map)
50+
for fn in enumerate(file_list)]
51+
52+
for future in futures:
53+
output = future.result()
54+
if len(output) > 0:
55+
print(output)
56+
4457
for fn in preprocess.find_html_files(root):
4558
preprocess.preprocess_html_file(root, fn, rename_map)
4659

0 commit comments

Comments
 (0)