Skip to content

Commit a6b72dd

Browse files
authored
Merge pull request p12tic#62 from mokibit/fix-qch-css
Improve html presentation in qch file
2 parents 479e167 + aa3a0be commit a6b72dd

File tree

8 files changed

+2966
-306
lines changed

8 files changed

+2966
-306
lines changed

commands/preprocess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def preprocess_html_file(root, fn, rename_map):
227227
for el in html.xpath('//*'):
228228
if has_class(el, ['noprint', 'editsection']):
229229
el.getparent().remove(el)
230-
if el.get('id') == 'toc':
230+
if el.get('id') in ['toc', 'catlinks']:
231231
el.getparent().remove(el)
232232

233233
# remove see also links between C and C++ documentations

commands/preprocess_cssless.py

Lines changed: 245 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,39 +26,266 @@
2626
import warnings
2727
import io
2828

29-
def preprocess_html_merge_css(src_path, dst_path):
29+
def preprocess_html_merge_cssless(src_path, dst_path):
30+
with open(src_path, 'r') as a_file:
31+
content = a_file.read()
32+
parser = etree.HTMLParser()
33+
stripped = content.strip()
34+
root = etree.fromstring(stripped, parser)
35+
36+
output = preprocess_html_merge_css(root, src_path)
37+
strip_style_tags(root)
38+
remove_display_none(root)
39+
convert_span_tables_to_tr_td(root)
40+
convert_inline_block_elements_to_table(root)
41+
convert_zero_td_width_to_nonzero(root)
42+
convert_font_size_property_to_pt(root, 16)
43+
convert_table_border_top_to_tr_background(root)
44+
45+
head = os.path.dirname(dst_path)
46+
os.makedirs(head, exist_ok=True)
47+
48+
with open(dst_path, 'wb') as a_file:
49+
root.getroottree().write(a_file, pretty_print=True, method="html",
50+
encoding='utf-8')
51+
return output
52+
53+
def silence_cssutils_warnings():
3054
log = logging.Logger('ignore')
3155
output = io.StringIO()
3256
handler = logging.StreamHandler(stream=output)
3357
formatter = logging.Formatter('%(levelname)s, %(message)s')
3458
handler.setFormatter(formatter)
3559
log.addHandler(handler)
36-
# cssutils_logging_handler of Premailer.__init__ is insufficient to silence
37-
# warnings to stderr in non-verbose mode
3860
cssutils.log.setLog(log)
3961

40-
with open(src_path, 'r') as a_file:
41-
content = a_file.read()
42-
parser = etree.HTMLParser()
43-
stripped = content.strip()
44-
root = etree.fromstring(stripped, parser)
62+
return output
63+
64+
def preprocess_html_merge_css(root, src_path):
65+
# cssutils_logging_handler of Premailer.__init__ is insufficient to silence
66+
# warnings to stderr in non-verbose mode
67+
output = silence_cssutils_warnings()
4568

4669
with warnings.catch_warnings():
4770
warnings.simplefilter("ignore")
4871
premailer = Premailer(root, base_url=src_path,
4972
disable_link_rewrites=True, remove_classes=True)
50-
5173
root = premailer.transform().getroot()
5274

53-
head = os.path.dirname(dst_path)
54-
os.makedirs(head, exist_ok=True)
75+
return output.getvalue()
5576

56-
# completely remove content of style tags and tags
57-
nondata_tags = ['style']
58-
strip_elements(root, *nondata_tags)
77+
def strip_style_tags(root):
78+
strip_elements(root, 'style')
5979

60-
with open(dst_path, 'wb') as a_file:
61-
root.getroottree().write(a_file, pretty_print=True, method="html",
62-
encoding='utf-8')
80+
def needs_td_wrapper(element):
81+
# element has table:row
82+
if len(element.getchildren()) == 0:
83+
return True
84+
for el in element.getchildren():
85+
if has_css_property_value(el, 'display', 'table-row') or \
86+
has_css_property_value(el, 'display', 'table-cell'):
87+
return False
88+
return True
6389

64-
return output.getvalue()
90+
def remove_css_property(element, property_name):
91+
atrib = cssutils.parseStyle(element.get('style'))
92+
atrib.removeProperty(property_name)
93+
element.set('style', atrib.getCssText(separator=''))
94+
if len(element.get('style')) == 0:
95+
element.attrib.pop('style')
96+
97+
98+
def get_css_property_value(el, prop_name):
99+
atrib = cssutils.parseStyle(el.get('style'))
100+
value = atrib.getPropertyCSSValue(prop_name)
101+
if value:
102+
return value.cssText
103+
return None
104+
105+
def has_css_property_value(el, prop_name, prop_value):
106+
value = get_css_property_value(el, prop_name)
107+
if value and value == prop_value:
108+
return True
109+
return False
110+
111+
def set_css_property_value(el, prop_name, prop_value):
112+
atrib = cssutils.parseStyle(el.get('style'))
113+
atrib.setProperty(prop_name, prop_value)
114+
el.set('style', atrib.getCssText(separator=''))
115+
116+
def convert_display_property_to_html_tag(element, element_tag, display_value):
117+
str_attrib_value = element.get('style')
118+
if str_attrib_value is None:
119+
return False
120+
if has_css_property_value(element, 'display', display_value):
121+
element.tag = element_tag
122+
remove_css_property(element, 'display')
123+
return True
124+
125+
def convert_span_table_to_tr_td(table_el):
126+
table_el.tag = 'table'
127+
remove_css_property(table_el, 'display')
128+
129+
for element in table_el.getchildren():
130+
tag_renamed = convert_display_property_to_html_tag(element, 'tr',
131+
'table-row')
132+
if tag_renamed:
133+
if needs_td_wrapper(element):
134+
td = etree.Element('td')
135+
for el in element.getchildren():
136+
element.remove(el)
137+
td.append(el)
138+
element.append(td)
139+
td.text = element.text
140+
element.text = None
141+
else:
142+
for child in element:
143+
convert_display_property_to_html_tag(child, 'td',
144+
'table-cell')
145+
146+
def wrap_element(el, tag_name, style):
147+
new_el = etree.Element(tag_name)
148+
new_el.set('style', style)
149+
el.addprevious(new_el)
150+
new_el.insert(0, el)
151+
152+
def remove_display_none(root_el):
153+
for el in root_el.xpath('//*[contains(@style, "display")]'):
154+
if has_css_property_value(el, 'display', 'none'):
155+
el.getparent().remove(el)
156+
157+
def convert_span_tables_to_tr_td(root_el):
158+
159+
# note that the following xpath expressions match only the prefix of the
160+
# CSS property value
161+
table_els = root_el.xpath('//span[contains(@style, "display:table")]')
162+
163+
for table_el in table_els:
164+
if has_css_property_value(table_el, 'display', 'table'):
165+
convert_span_table_to_tr_td(table_el)
166+
#wrap_element(table_el, 'div', 'display:inline')
167+
#convert_span_table_to_tr_td(table_el)
168+
169+
return root_el
170+
171+
def convert_inline_block_elements_to_table(root_el):
172+
for el in root_el.xpath('//*[contains(@style, "display")]'):
173+
if not has_css_property_value(el, 'display', 'inline-block') and \
174+
not has_css_property_value(el, 'display', 'inline-table'):
175+
continue
176+
177+
elements_to_put_into_table = [el]
178+
el = el.getnext()
179+
180+
# find subsequent inline block elements
181+
while el is not None:
182+
if has_css_property_value(el, 'display', 'inline-block') or \
183+
has_css_property_value(el, 'display', 'inline-table'):
184+
elements_to_put_into_table.append(el)
185+
else:
186+
break
187+
el = el.getnext()
188+
189+
# only makes sense to put two or more to table
190+
if len(elements_to_put_into_table) < 2:
191+
continue
192+
193+
# create table and put elements into it
194+
table_el = etree.Element('table')
195+
table_el.set('style', 'padding:0; margin:0; border:none;')
196+
197+
elements_to_put_into_table[0].addprevious(table_el)
198+
199+
tr = etree.SubElement(table_el, 'tr')
200+
201+
for el in elements_to_put_into_table:
202+
td = etree.SubElement(tr, 'td')
203+
el.getparent().remove(el)
204+
td.append(el)
205+
206+
def get_table_rows(table_el):
207+
for el in table_el.iterchildren(['th', 'tr']):
208+
yield el
209+
for el in table_el.iterchildren(['thead', 'tbody', 'tfoot']):
210+
for el2 in el.iterchildren(['th', 'tr']):
211+
yield el2
212+
213+
def get_max_number_of_columns(table_el):
214+
max_tds = 0
215+
for row_el in get_table_rows(table_el):
216+
max_tds = max(max_tds, len(list(row_el.iterchildren('td'))))
217+
return max_tds
218+
219+
def clear_tr_border_top(tr_el):
220+
for td_el in tr_el.iterchildren('td'):
221+
remove_css_property(td_el, 'border-top')
222+
223+
def has_tr_border_top(tr_el):
224+
for td_el in tr_el.iterchildren('td'):
225+
if get_css_property_value(td_el, 'border-top') is not None:
226+
return True
227+
return False
228+
229+
def has_table_border_top(table_el):
230+
for tr_el in get_table_rows(table_el):
231+
if has_tr_border_top(tr_el):
232+
return True
233+
return False
234+
235+
def convert_table_border_top_to_tr_background(root_el):
236+
for table_el in root_el.iter('table'):
237+
if not has_table_border_top(table_el):
238+
continue
239+
240+
td_count = get_max_number_of_columns(table_el)
241+
for tr_el in get_table_rows(table_el):
242+
if has_tr_border_top(tr_el):
243+
# TODO: handle border properties
244+
clear_tr_border_top(tr_el)
245+
border_tr = etree.Element('tr')
246+
border_td = etree.SubElement(border_tr, 'td')
247+
border_td.set('colspan', str(td_count))
248+
border_td.set('style', 'height:1px; font-size:1px; '
249+
'background-color: #ccc;')
250+
tr_el.addprevious(border_tr)
251+
252+
253+
def convert_zero_td_width_to_nonzero(root_el):
254+
for el in root_el.xpath('//*[contains(@style, "width")]'):
255+
if has_css_property_value(el, 'width', '0%'):
256+
el.attrib['width'] = "1px"
257+
remove_css_property(el, 'width')
258+
259+
for el in root_el.xpath('//*[contains(@width, "0%")]'):
260+
el.attrib['width'] = "1px"
261+
262+
def apply_font_size(size, parent_size_pt):
263+
size = size.strip()
264+
265+
if size[-2:] == 'em':
266+
value_number = float(size[:-2].strip())
267+
return value_number*parent_size_pt
268+
269+
if size[-2:] in ['pt', 'px']:
270+
return float(size[:-2].strip())
271+
272+
if size[-1] == '%':
273+
value_number = float(size[:-1].strip())/100
274+
return value_number*parent_size_pt
275+
276+
return parent_size_pt
277+
278+
def convert_font_size_property_to_pt_recurse(el, parent_size_pt):
279+
size_value = get_css_property_value(el,"font-size")
280+
281+
if size_value:
282+
el_size_pt = apply_font_size(size_value, parent_size_pt)
283+
set_css_property_value(el, "font-size", "{}pt".format(el_size_pt))
284+
else:
285+
el_size_pt = parent_size_pt
286+
287+
for child in el.getchildren():
288+
convert_font_size_property_to_pt_recurse(child, el_size_pt)
289+
290+
def convert_font_size_property_to_pt(root_el, default_size):
291+
convert_font_size_property_to_pt_recurse(root_el, default_size)

devhelp2qch.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from copy import deepcopy
2525
import sys
2626
import argparse
27+
from index_transform.devhelp_qch import convert_devhelp_to_qch
2728

2829
def main():
2930
parser = argparse.ArgumentParser(prog='devhelp2qch.py')
@@ -43,7 +44,7 @@ def main():
4344
file_tree = etree.parse(file_path, parser)
4445

4546
out_f = open(dst_path, 'wb')
46-
out_f.write(convert_devhelp_to_qch(in_tree.getroot(), file_tree.getroot(), v_folder)
47+
out_f.write(convert_devhelp_to_qch(in_tree.getroot(), file_tree.getroot(), v_folder))
4748
out_f.close()
4849

4950
if __name__ == "__main__":

preprocess_qch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def main():
5252
paths_list.append(tuple)
5353

5454
with concurrent.futures.ProcessPoolExecutor() as executor:
55-
futures = [ (executor.submit(preprocess_html_merge_css,
55+
futures = [ (executor.submit(preprocess_html_merge_cssless,
5656
src_path, dst_path), i)
5757
for i, (src_path, dst_path) in enumerate(paths_list) ]
5858

0 commit comments

Comments
 (0)