Mercurial > p > roundup > code
annotate roundup/cgi/TAL/HTMLTALParser.py @ 8264:09e8d1a4c796
docs: clarify wording, fix index, add superseder link
Make superseder, messages etc. properties index entries point to the
right place.
Link to description of using Superseder in the original overview.
fix bad wording on boolean properties.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Wed, 08 Jan 2025 11:39:54 -0500 |
| parents | 23b8e6067f7c |
| children |
| rev | line source |
|---|---|
| 1049 | 1 ############################################################################## |
| 2 # | |
| 3 # Copyright (c) 2001, 2002 Zope Corporation and Contributors. | |
| 4 # All Rights Reserved. | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
5 # |
| 1049 | 6 # This software is subject to the provisions of the Zope Public License, |
| 7 # Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. | |
| 8 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED | |
| 9 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 10 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
11 # FOR A PARTICULAR PURPOSE. |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
12 # |
| 1049 | 13 ############################################################################## |
| 14 """ | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
15 Parse HTML and compile to TALInterpreter intermediate code. |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
16 """ |
| 1049 | 17 |
| 18 import sys | |
| 19 | |
|
5388
d26921b851c3
Python 3 preparation: make relative imports explicit.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5381
diff
changeset
|
20 from .TALGenerator import TALGenerator |
|
d26921b851c3
Python 3 preparation: make relative imports explicit.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5381
diff
changeset
|
21 from .HTMLParser import HTMLParser, HTMLParseError |
|
d26921b851c3
Python 3 preparation: make relative imports explicit.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5381
diff
changeset
|
22 from .TALDefs import \ |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
23 ZOPE_METAL_NS, ZOPE_TAL_NS, ZOPE_I18N_NS, METALError, TALError, I18NError |
| 1049 | 24 |
| 25 BOOLEAN_HTML_ATTRS = [ | |
| 26 # List of Boolean attributes in HTML that may be given in | |
| 27 # minimized form (e.g. <img ismap> rather than <img ismap="">) | |
| 28 # From http://www.w3.org/TR/xhtml1/#guidelines (C.10) | |
| 29 "compact", "nowrap", "ismap", "declare", "noshade", "checked", | |
| 30 "disabled", "readonly", "multiple", "selected", "noresize", | |
| 31 "defer" | |
| 32 ] | |
| 33 | |
| 34 EMPTY_HTML_TAGS = [ | |
| 35 # List of HTML tags with an empty content model; these are | |
| 36 # rendered in minimized form, e.g. <img />. | |
| 37 # From http://www.w3.org/TR/xhtml1/#dtds | |
| 38 "base", "meta", "link", "hr", "br", "param", "img", "area", | |
| 39 "input", "col", "basefont", "isindex", "frame", | |
| 40 ] | |
| 41 | |
| 42 PARA_LEVEL_HTML_TAGS = [ | |
| 43 # List of HTML elements that close open paragraph-level elements | |
| 44 # and are themselves paragraph-level. | |
| 45 "h1", "h2", "h3", "h4", "h5", "h6", "p", | |
| 46 ] | |
| 47 | |
| 48 BLOCK_CLOSING_TAG_MAP = { | |
| 49 "tr": ("tr", "td", "th"), | |
| 50 "td": ("td", "th"), | |
| 51 "th": ("td", "th"), | |
| 52 "li": ("li",), | |
| 53 "dd": ("dd", "dt"), | |
| 54 "dt": ("dd", "dt"), | |
| 55 } | |
| 56 | |
| 57 BLOCK_LEVEL_HTML_TAGS = [ | |
| 58 # List of HTML tags that denote larger sections than paragraphs. | |
| 59 "blockquote", "table", "tr", "th", "td", "thead", "tfoot", "tbody", | |
| 60 "noframe", "ul", "ol", "li", "dl", "dt", "dd", "div", | |
| 61 ] | |
| 62 | |
| 63 TIGHTEN_IMPLICIT_CLOSE_TAGS = (PARA_LEVEL_HTML_TAGS | |
|
5395
23b8e6067f7c
Python 3 preparation: update calls to dict methods.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5388
diff
changeset
|
64 + list(BLOCK_CLOSING_TAG_MAP.keys())) |
| 1049 | 65 |
| 66 | |
| 67 class NestingError(HTMLParseError): | |
| 68 """Exception raised when elements aren't properly nested.""" | |
| 69 | |
| 70 def __init__(self, tagstack, endtag, position=(None, None)): | |
| 71 self.endtag = endtag | |
| 72 if tagstack: | |
| 73 if len(tagstack) == 1: | |
| 74 msg = ('Open tag <%s> does not match close tag </%s>' | |
| 75 % (tagstack[0], endtag)) | |
| 76 else: | |
| 77 msg = ('Open tags <%s> do not match close tag </%s>' | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
78 % ('>, <'.join(tagstack), endtag)) |
| 1049 | 79 else: |
| 80 msg = 'No tags are open to match </%s>' % endtag | |
| 81 HTMLParseError.__init__(self, msg, position) | |
| 82 | |
| 83 class EmptyTagError(NestingError): | |
| 84 """Exception raised when empty elements have an end tag.""" | |
| 85 | |
| 86 def __init__(self, tag, position=(None, None)): | |
| 87 self.tag = tag | |
| 88 msg = 'Close tag </%s> should be removed' % tag | |
| 89 HTMLParseError.__init__(self, msg, position) | |
| 90 | |
| 91 class OpenTagError(NestingError): | |
| 92 """Exception raised when a tag is not allowed in another tag.""" | |
| 93 | |
| 94 def __init__(self, tagstack, tag, position=(None, None)): | |
| 95 self.tag = tag | |
| 96 msg = 'Tag <%s> is not allowed in <%s>' % (tag, tagstack[-1]) | |
| 97 HTMLParseError.__init__(self, msg, position) | |
| 98 | |
| 99 class HTMLTALParser(HTMLParser): | |
| 100 | |
| 101 # External API | |
| 102 | |
| 103 def __init__(self, gen=None): | |
| 104 HTMLParser.__init__(self) | |
| 105 if gen is None: | |
| 106 gen = TALGenerator(xml=0) | |
| 107 self.gen = gen | |
| 108 self.tagstack = [] | |
| 109 self.nsstack = [] | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
110 self.nsdict = {'tal': ZOPE_TAL_NS, |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
111 'metal': ZOPE_METAL_NS, |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
112 'i18n': ZOPE_I18N_NS, |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
113 } |
| 1049 | 114 |
| 115 def parseFile(self, file): | |
| 116 f = open(file) | |
| 117 data = f.read() | |
| 118 f.close() | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
119 try: |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
120 self.parseString(data) |
|
5248
198b6e810c67
Use Python-3-compatible 'as' syntax for except statements
Eric S. Raymond <esr@thyrsus.com>
parents:
2348
diff
changeset
|
121 except TALError as e: |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
122 e.setFile(file) |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
123 raise |
| 1049 | 124 |
| 125 def parseString(self, data): | |
| 126 self.feed(data) | |
| 127 self.close() | |
| 128 while self.tagstack: | |
| 129 self.implied_endtag(self.tagstack[-1], 2) | |
| 130 assert self.nsstack == [], self.nsstack | |
| 131 | |
| 132 def getCode(self): | |
| 133 return self.gen.getCode() | |
| 134 | |
| 135 def getWarnings(self): | |
| 136 return () | |
| 137 | |
| 138 # Overriding HTMLParser methods | |
| 139 | |
| 140 def handle_starttag(self, tag, attrs): | |
| 141 self.close_para_tags(tag) | |
| 142 self.scan_xmlns(attrs) | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
143 tag, attrlist, taldict, metaldict, i18ndict \ |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
144 = self.process_ns(tag, attrs) |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
145 if tag in EMPTY_HTML_TAGS and taldict.get("content"): |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
146 raise TALError( |
|
5377
12fe83f90f0d
Python 3 preparation: use repr() instead of ``.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5248
diff
changeset
|
147 "empty HTML tags cannot use tal:content: %s" % repr(tag), |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
148 self.getpos()) |
| 1049 | 149 self.tagstack.append(tag) |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
150 self.gen.emitStartElement(tag, attrlist, taldict, metaldict, i18ndict, |
| 1049 | 151 self.getpos()) |
| 152 if tag in EMPTY_HTML_TAGS: | |
| 153 self.implied_endtag(tag, -1) | |
| 154 | |
| 155 def handle_startendtag(self, tag, attrs): | |
| 156 self.close_para_tags(tag) | |
| 157 self.scan_xmlns(attrs) | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
158 tag, attrlist, taldict, metaldict, i18ndict \ |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
159 = self.process_ns(tag, attrs) |
| 1049 | 160 if taldict.get("content"): |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
161 if tag in EMPTY_HTML_TAGS: |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
162 raise TALError( |
|
5377
12fe83f90f0d
Python 3 preparation: use repr() instead of ``.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5248
diff
changeset
|
163 "empty HTML tags cannot use tal:content: %s" % repr(tag), |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
164 self.getpos()) |
| 1049 | 165 self.gen.emitStartElement(tag, attrlist, taldict, metaldict, |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
166 i18ndict, self.getpos()) |
| 1049 | 167 self.gen.emitEndElement(tag, implied=-1) |
| 168 else: | |
| 169 self.gen.emitStartElement(tag, attrlist, taldict, metaldict, | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
170 i18ndict, self.getpos(), isend=1) |
| 1049 | 171 self.pop_xmlns() |
| 172 | |
| 173 def handle_endtag(self, tag): | |
| 174 if tag in EMPTY_HTML_TAGS: | |
| 175 # </img> etc. in the source is an error | |
| 176 raise EmptyTagError(tag, self.getpos()) | |
| 177 self.close_enclosed_tags(tag) | |
| 178 self.gen.emitEndElement(tag) | |
| 179 self.pop_xmlns() | |
| 180 self.tagstack.pop() | |
| 181 | |
| 182 def close_para_tags(self, tag): | |
| 183 if tag in EMPTY_HTML_TAGS: | |
| 184 return | |
| 185 close_to = -1 | |
|
5381
0942fe89e82e
Python 3 preparation: change "x.has_key(y)" to "y in x".
Joseph Myers <jsm@polyomino.org.uk>
parents:
5377
diff
changeset
|
186 if tag in BLOCK_CLOSING_TAG_MAP: |
| 1049 | 187 blocks_to_close = BLOCK_CLOSING_TAG_MAP[tag] |
| 188 for i in range(len(self.tagstack)): | |
| 189 t = self.tagstack[i] | |
| 190 if t in blocks_to_close: | |
| 191 if close_to == -1: | |
| 192 close_to = i | |
| 193 elif t in BLOCK_LEVEL_HTML_TAGS: | |
| 194 close_to = -1 | |
| 195 elif tag in PARA_LEVEL_HTML_TAGS + BLOCK_LEVEL_HTML_TAGS: | |
| 196 i = len(self.tagstack) - 1 | |
| 197 while i >= 0: | |
| 198 closetag = self.tagstack[i] | |
| 199 if closetag in BLOCK_LEVEL_HTML_TAGS: | |
| 200 break | |
| 201 if closetag in PARA_LEVEL_HTML_TAGS: | |
| 202 if closetag != "p": | |
| 203 raise OpenTagError(self.tagstack, tag, self.getpos()) | |
| 204 close_to = i | |
| 205 i = i - 1 | |
| 206 if close_to >= 0: | |
| 207 while len(self.tagstack) > close_to: | |
| 208 self.implied_endtag(self.tagstack[-1], 1) | |
| 209 | |
| 210 def close_enclosed_tags(self, tag): | |
| 211 if tag not in self.tagstack: | |
| 212 raise NestingError(self.tagstack, tag, self.getpos()) | |
| 213 while tag != self.tagstack[-1]: | |
| 214 self.implied_endtag(self.tagstack[-1], 1) | |
| 215 assert self.tagstack[-1] == tag | |
| 216 | |
| 217 def implied_endtag(self, tag, implied): | |
| 218 assert tag == self.tagstack[-1] | |
| 219 assert implied in (-1, 1, 2) | |
| 220 isend = (implied < 0) | |
| 221 if tag in TIGHTEN_IMPLICIT_CLOSE_TAGS: | |
| 222 # Pick out trailing whitespace from the program, and | |
| 223 # insert the close tag before the whitespace. | |
| 224 white = self.gen.unEmitWhitespace() | |
| 225 else: | |
| 226 white = None | |
| 227 self.gen.emitEndElement(tag, isend=isend, implied=implied) | |
| 228 if white: | |
| 229 self.gen.emitRawText(white) | |
| 230 self.tagstack.pop() | |
| 231 self.pop_xmlns() | |
| 232 | |
| 233 def handle_charref(self, name): | |
| 234 self.gen.emitRawText("&#%s;" % name) | |
| 235 | |
| 236 def handle_entityref(self, name): | |
| 237 self.gen.emitRawText("&%s;" % name) | |
| 238 | |
| 239 def handle_data(self, data): | |
| 240 self.gen.emitRawText(data) | |
| 241 | |
| 242 def handle_comment(self, data): | |
| 243 self.gen.emitRawText("<!--%s-->" % data) | |
| 244 | |
| 245 def handle_decl(self, data): | |
| 246 self.gen.emitRawText("<!%s>" % data) | |
| 247 | |
| 248 def handle_pi(self, data): | |
| 249 self.gen.emitRawText("<?%s>" % data) | |
| 250 | |
| 251 # Internal thingies | |
| 252 | |
| 253 def scan_xmlns(self, attrs): | |
| 254 nsnew = {} | |
| 255 for key, value in attrs: | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
256 if key.startswith("xmlns:"): |
| 1049 | 257 nsnew[key[6:]] = value |
| 258 if nsnew: | |
| 259 self.nsstack.append(self.nsdict) | |
| 260 self.nsdict = self.nsdict.copy() | |
| 261 self.nsdict.update(nsnew) | |
| 262 else: | |
| 263 self.nsstack.append(self.nsdict) | |
| 264 | |
| 265 def pop_xmlns(self): | |
| 266 self.nsdict = self.nsstack.pop() | |
| 267 | |
| 268 def fixname(self, name): | |
| 269 if ':' in name: | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
270 prefix, suffix = name.split(':', 1) |
| 1049 | 271 if prefix == 'xmlns': |
| 272 nsuri = self.nsdict.get(suffix) | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
273 if nsuri in (ZOPE_TAL_NS, ZOPE_METAL_NS, ZOPE_I18N_NS): |
| 1049 | 274 return name, name, prefix |
| 275 else: | |
| 276 nsuri = self.nsdict.get(prefix) | |
| 277 if nsuri == ZOPE_TAL_NS: | |
| 278 return name, suffix, 'tal' | |
| 279 elif nsuri == ZOPE_METAL_NS: | |
| 280 return name, suffix, 'metal' | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
281 elif nsuri == ZOPE_I18N_NS: |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
282 return name, suffix, 'i18n' |
| 1049 | 283 return name, name, 0 |
| 284 | |
| 285 def process_ns(self, name, attrs): | |
| 286 attrlist = [] | |
| 287 taldict = {} | |
| 288 metaldict = {} | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
289 i18ndict = {} |
| 1049 | 290 name, namebase, namens = self.fixname(name) |
| 291 for item in attrs: | |
| 292 key, value = item | |
| 293 key, keybase, keyns = self.fixname(key) | |
| 294 ns = keyns or namens # default to tag namespace | |
| 295 if ns and ns != 'unknown': | |
| 296 item = (key, value, ns) | |
| 297 if ns == 'tal': | |
|
5381
0942fe89e82e
Python 3 preparation: change "x.has_key(y)" to "y in x".
Joseph Myers <jsm@polyomino.org.uk>
parents:
5377
diff
changeset
|
298 if keybase in taldict: |
| 1049 | 299 raise TALError("duplicate TAL attribute " + |
|
5377
12fe83f90f0d
Python 3 preparation: use repr() instead of ``.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5248
diff
changeset
|
300 repr(keybase), self.getpos()) |
| 1049 | 301 taldict[keybase] = value |
| 302 elif ns == 'metal': | |
|
5381
0942fe89e82e
Python 3 preparation: change "x.has_key(y)" to "y in x".
Joseph Myers <jsm@polyomino.org.uk>
parents:
5377
diff
changeset
|
303 if keybase in metaldict: |
| 1049 | 304 raise METALError("duplicate METAL attribute " + |
|
5377
12fe83f90f0d
Python 3 preparation: use repr() instead of ``.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5248
diff
changeset
|
305 repr(keybase), self.getpos()) |
| 1049 | 306 metaldict[keybase] = value |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
307 elif ns == 'i18n': |
|
5381
0942fe89e82e
Python 3 preparation: change "x.has_key(y)" to "y in x".
Joseph Myers <jsm@polyomino.org.uk>
parents:
5377
diff
changeset
|
308 if keybase in i18ndict: |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
309 raise I18NError("duplicate i18n attribute " + |
|
5377
12fe83f90f0d
Python 3 preparation: use repr() instead of ``.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5248
diff
changeset
|
310 repr(keybase), self.getpos()) |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
311 i18ndict[keybase] = value |
| 1049 | 312 attrlist.append(item) |
| 313 if namens in ('metal', 'tal'): | |
| 314 taldict['tal tag'] = namens | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
315 return name, attrlist, taldict, metaldict, i18ndict |
