|
26 | 26 | import warnings |
27 | 27 | import io |
28 | 28 |
|
29 | | -def preprocess_html_merge_css(src_path, dst_path): |
| 29 | +def preprocess_html_merge_cssless(src_path, dst_path): |
| 30 | + with open(src_path, 'r') as a_file: |
| 31 | + content = a_file.read() |
| 32 | + parser = etree.HTMLParser() |
| 33 | + stripped = content.strip() |
| 34 | + root = etree.fromstring(stripped, parser) |
| 35 | + |
| 36 | + output = preprocess_html_merge_css(root, src_path) |
| 37 | + strip_style_tags(root) |
| 38 | + remove_display_none(root) |
| 39 | + convert_span_tables_to_tr_td(root) |
| 40 | + convert_inline_block_elements_to_table(root) |
| 41 | + convert_zero_td_width_to_nonzero(root) |
| 42 | + convert_font_size_property_to_pt(root, 16) |
| 43 | + convert_table_border_top_to_tr_background(root) |
| 44 | + |
| 45 | + head = os.path.dirname(dst_path) |
| 46 | + os.makedirs(head, exist_ok=True) |
| 47 | + |
| 48 | + with open(dst_path, 'wb') as a_file: |
| 49 | + root.getroottree().write(a_file, pretty_print=True, method="html", |
| 50 | + encoding='utf-8') |
| 51 | + return output |
| 52 | + |
| 53 | +def silence_cssutils_warnings(): |
30 | 54 | log = logging.Logger('ignore') |
31 | 55 | output = io.StringIO() |
32 | 56 | handler = logging.StreamHandler(stream=output) |
33 | 57 | formatter = logging.Formatter('%(levelname)s, %(message)s') |
34 | 58 | handler.setFormatter(formatter) |
35 | 59 | log.addHandler(handler) |
36 | | - # cssutils_logging_handler of Premailer.__init__ is insufficient to silence |
37 | | - # warnings to stderr in non-verbose mode |
38 | 60 | cssutils.log.setLog(log) |
39 | 61 |
|
40 | | - with open(src_path, 'r') as a_file: |
41 | | - content = a_file.read() |
42 | | - parser = etree.HTMLParser() |
43 | | - stripped = content.strip() |
44 | | - root = etree.fromstring(stripped, parser) |
| 62 | + return output |
| 63 | + |
| 64 | +def preprocess_html_merge_css(root, src_path): |
| 65 | + # cssutils_logging_handler of Premailer.__init__ is insufficient to silence |
| 66 | + # warnings to stderr in non-verbose mode |
| 67 | + output = silence_cssutils_warnings() |
45 | 68 |
|
46 | 69 | with warnings.catch_warnings(): |
47 | 70 | warnings.simplefilter("ignore") |
48 | 71 | premailer = Premailer(root, base_url=src_path, |
49 | 72 | disable_link_rewrites=True, remove_classes=True) |
50 | | - |
51 | 73 | root = premailer.transform().getroot() |
52 | 74 |
|
53 | | - head = os.path.dirname(dst_path) |
54 | | - os.makedirs(head, exist_ok=True) |
| 75 | + return output.getvalue() |
55 | 76 |
|
56 | | - # completely remove content of style tags and tags |
57 | | - nondata_tags = ['style'] |
58 | | - strip_elements(root, *nondata_tags) |
| 77 | +def strip_style_tags(root): |
| 78 | + strip_elements(root, 'style') |
59 | 79 |
|
60 | | - with open(dst_path, 'wb') as a_file: |
61 | | - root.getroottree().write(a_file, pretty_print=True, method="html", |
62 | | - encoding='utf-8') |
| 80 | +def needs_td_wrapper(element): |
| 81 | + # element has table:row |
| 82 | + if len(element.getchildren()) == 0: |
| 83 | + return True |
| 84 | + for el in element.getchildren(): |
| 85 | + if has_css_property_value(el, 'display', 'table-row') or \ |
| 86 | + has_css_property_value(el, 'display', 'table-cell'): |
| 87 | + return False |
| 88 | + return True |
63 | 89 |
|
64 | | - return output.getvalue() |
| 90 | +def remove_css_property(element, property_name): |
| 91 | + atrib = cssutils.parseStyle(element.get('style')) |
| 92 | + atrib.removeProperty(property_name) |
| 93 | + element.set('style', atrib.getCssText(separator='')) |
| 94 | + if len(element.get('style')) == 0: |
| 95 | + element.attrib.pop('style') |
| 96 | + |
| 97 | + |
| 98 | +def get_css_property_value(el, prop_name): |
| 99 | + atrib = cssutils.parseStyle(el.get('style')) |
| 100 | + value = atrib.getPropertyCSSValue(prop_name) |
| 101 | + if value: |
| 102 | + return value.cssText |
| 103 | + return None |
| 104 | + |
| 105 | +def has_css_property_value(el, prop_name, prop_value): |
| 106 | + value = get_css_property_value(el, prop_name) |
| 107 | + if value and value == prop_value: |
| 108 | + return True |
| 109 | + return False |
| 110 | + |
| 111 | +def set_css_property_value(el, prop_name, prop_value): |
| 112 | + atrib = cssutils.parseStyle(el.get('style')) |
| 113 | + atrib.setProperty(prop_name, prop_value) |
| 114 | + el.set('style', atrib.getCssText(separator='')) |
| 115 | + |
| 116 | +def convert_display_property_to_html_tag(element, element_tag, display_value): |
| 117 | + str_attrib_value = element.get('style') |
| 118 | + if str_attrib_value is None: |
| 119 | + return False |
| 120 | + if has_css_property_value(element, 'display', display_value): |
| 121 | + element.tag = element_tag |
| 122 | + remove_css_property(element, 'display') |
| 123 | + return True |
| 124 | + |
| 125 | +def convert_span_table_to_tr_td(table_el): |
| 126 | + table_el.tag = 'table' |
| 127 | + remove_css_property(table_el, 'display') |
| 128 | + |
| 129 | + for element in table_el.getchildren(): |
| 130 | + tag_renamed = convert_display_property_to_html_tag(element, 'tr', |
| 131 | + 'table-row') |
| 132 | + if tag_renamed: |
| 133 | + if needs_td_wrapper(element): |
| 134 | + td = etree.Element('td') |
| 135 | + for el in element.getchildren(): |
| 136 | + element.remove(el) |
| 137 | + td.append(el) |
| 138 | + element.append(td) |
| 139 | + td.text = element.text |
| 140 | + element.text = None |
| 141 | + else: |
| 142 | + for child in element: |
| 143 | + convert_display_property_to_html_tag(child, 'td', |
| 144 | + 'table-cell') |
| 145 | + |
| 146 | +def wrap_element(el, tag_name, style): |
| 147 | + new_el = etree.Element(tag_name) |
| 148 | + new_el.set('style', style) |
| 149 | + el.addprevious(new_el) |
| 150 | + new_el.insert(0, el) |
| 151 | + |
| 152 | +def remove_display_none(root_el): |
| 153 | + for el in root_el.xpath('//*[contains(@style, "display")]'): |
| 154 | + if has_css_property_value(el, 'display', 'none'): |
| 155 | + el.getparent().remove(el) |
| 156 | + |
| 157 | +def convert_span_tables_to_tr_td(root_el): |
| 158 | + |
| 159 | + # note that the following xpath expressions match only the prefix of the |
| 160 | + # CSS property value |
| 161 | + table_els = root_el.xpath('//span[contains(@style, "display:table")]') |
| 162 | + |
| 163 | + for table_el in table_els: |
| 164 | + if has_css_property_value(table_el, 'display', 'table'): |
| 165 | + convert_span_table_to_tr_td(table_el) |
| 166 | + #wrap_element(table_el, 'div', 'display:inline') |
| 167 | + #convert_span_table_to_tr_td(table_el) |
| 168 | + |
| 169 | + return root_el |
| 170 | + |
| 171 | +def convert_inline_block_elements_to_table(root_el): |
| 172 | + for el in root_el.xpath('//*[contains(@style, "display")]'): |
| 173 | + if not has_css_property_value(el, 'display', 'inline-block') and \ |
| 174 | + not has_css_property_value(el, 'display', 'inline-table'): |
| 175 | + continue |
| 176 | + |
| 177 | + elements_to_put_into_table = [el] |
| 178 | + el = el.getnext() |
| 179 | + |
| 180 | + # find subsequent inline block elements |
| 181 | + while el is not None: |
| 182 | + if has_css_property_value(el, 'display', 'inline-block') or \ |
| 183 | + has_css_property_value(el, 'display', 'inline-table'): |
| 184 | + elements_to_put_into_table.append(el) |
| 185 | + else: |
| 186 | + break |
| 187 | + el = el.getnext() |
| 188 | + |
| 189 | + # only makes sense to put two or more to table |
| 190 | + if len(elements_to_put_into_table) < 2: |
| 191 | + continue |
| 192 | + |
| 193 | + # create table and put elements into it |
| 194 | + table_el = etree.Element('table') |
| 195 | + table_el.set('style', 'padding:0; margin:0; border:none;') |
| 196 | + |
| 197 | + elements_to_put_into_table[0].addprevious(table_el) |
| 198 | + |
| 199 | + tr = etree.SubElement(table_el, 'tr') |
| 200 | + |
| 201 | + for el in elements_to_put_into_table: |
| 202 | + td = etree.SubElement(tr, 'td') |
| 203 | + el.getparent().remove(el) |
| 204 | + td.append(el) |
| 205 | + |
| 206 | +def get_table_rows(table_el): |
| 207 | + for el in table_el.iterchildren(['th', 'tr']): |
| 208 | + yield el |
| 209 | + for el in table_el.iterchildren(['thead', 'tbody', 'tfoot']): |
| 210 | + for el2 in el.iterchildren(['th', 'tr']): |
| 211 | + yield el2 |
| 212 | + |
| 213 | +def get_max_number_of_columns(table_el): |
| 214 | + max_tds = 0 |
| 215 | + for row_el in get_table_rows(table_el): |
| 216 | + max_tds = max(max_tds, len(list(row_el.iterchildren('td')))) |
| 217 | + return max_tds |
| 218 | + |
| 219 | +def clear_tr_border_top(tr_el): |
| 220 | + for td_el in tr_el.iterchildren('td'): |
| 221 | + remove_css_property(td_el, 'border-top') |
| 222 | + |
| 223 | +def has_tr_border_top(tr_el): |
| 224 | + for td_el in tr_el.iterchildren('td'): |
| 225 | + if get_css_property_value(td_el, 'border-top') is not None: |
| 226 | + return True |
| 227 | + return False |
| 228 | + |
| 229 | +def has_table_border_top(table_el): |
| 230 | + for tr_el in get_table_rows(table_el): |
| 231 | + if has_tr_border_top(tr_el): |
| 232 | + return True |
| 233 | + return False |
| 234 | + |
| 235 | +def convert_table_border_top_to_tr_background(root_el): |
| 236 | + for table_el in root_el.iter('table'): |
| 237 | + if not has_table_border_top(table_el): |
| 238 | + continue |
| 239 | + |
| 240 | + td_count = get_max_number_of_columns(table_el) |
| 241 | + for tr_el in get_table_rows(table_el): |
| 242 | + if has_tr_border_top(tr_el): |
| 243 | + # TODO: handle border properties |
| 244 | + clear_tr_border_top(tr_el) |
| 245 | + border_tr = etree.Element('tr') |
| 246 | + border_td = etree.SubElement(border_tr, 'td') |
| 247 | + border_td.set('colspan', str(td_count)) |
| 248 | + border_td.set('style', 'height:1px; font-size:1px; ' |
| 249 | + 'background-color: #ccc;') |
| 250 | + tr_el.addprevious(border_tr) |
| 251 | + |
| 252 | + |
| 253 | +def convert_zero_td_width_to_nonzero(root_el): |
| 254 | + for el in root_el.xpath('//*[contains(@style, "width")]'): |
| 255 | + if has_css_property_value(el, 'width', '0%'): |
| 256 | + el.attrib['width'] = "1px" |
| 257 | + remove_css_property(el, 'width') |
| 258 | + |
| 259 | + for el in root_el.xpath('//*[contains(@width, "0%")]'): |
| 260 | + el.attrib['width'] = "1px" |
| 261 | + |
| 262 | +def apply_font_size(size, parent_size_pt): |
| 263 | + size = size.strip() |
| 264 | + |
| 265 | + if size[-2:] == 'em': |
| 266 | + value_number = float(size[:-2].strip()) |
| 267 | + return value_number*parent_size_pt |
| 268 | + |
| 269 | + if size[-2:] in ['pt', 'px']: |
| 270 | + return float(size[:-2].strip()) |
| 271 | + |
| 272 | + if size[-1] == '%': |
| 273 | + value_number = float(size[:-1].strip())/100 |
| 274 | + return value_number*parent_size_pt |
| 275 | + |
| 276 | + return parent_size_pt |
| 277 | + |
| 278 | +def convert_font_size_property_to_pt_recurse(el, parent_size_pt): |
| 279 | + size_value = get_css_property_value(el,"font-size") |
| 280 | + |
| 281 | + if size_value: |
| 282 | + el_size_pt = apply_font_size(size_value, parent_size_pt) |
| 283 | + set_css_property_value(el, "font-size", "{}pt".format(el_size_pt)) |
| 284 | + else: |
| 285 | + el_size_pt = parent_size_pt |
| 286 | + |
| 287 | + for child in el.getchildren(): |
| 288 | + convert_font_size_property_to_pt_recurse(child, el_size_pt) |
| 289 | + |
| 290 | +def convert_font_size_property_to_pt(root_el, default_size): |
| 291 | + convert_font_size_property_to_pt_recurse(root_el, default_size) |
0 commit comments