Mercurial > p > roundup > code
annotate roundup/cgi/TAL/HTMLTALParser.py @ 4651:beb8d43f4d9d
issue2550765: Don't show links in calendar that will fail. Found and fixed by Cédric Krier.
| author | Bernhard Reiter <bernhard@intevation.de> |
|---|---|
| date | Wed, 01 Aug 2012 08:49:41 +0200 |
| parents | 8c2402a78bb0 |
| children | 198b6e810c67 |
| rev | line source |
|---|---|
| 1049 | 1 ############################################################################## |
| 2 # | |
| 3 # Copyright (c) 2001, 2002 Zope Corporation and Contributors. | |
| 4 # All Rights Reserved. | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
5 # |
| 1049 | 6 # This software is subject to the provisions of the Zope Public License, |
| 7 # Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. | |
| 8 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED | |
| 9 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 10 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
11 # FOR A PARTICULAR PURPOSE. |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
12 # |
| 1049 | 13 ############################################################################## |
| 14 """ | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
15 Parse HTML and compile to TALInterpreter intermediate code. |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
16 """ |
| 1049 | 17 |
| 18 import sys | |
| 19 | |
| 20 from TALGenerator import TALGenerator | |
| 21 from HTMLParser import HTMLParser, HTMLParseError | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
22 from TALDefs import \ |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
23 ZOPE_METAL_NS, ZOPE_TAL_NS, ZOPE_I18N_NS, METALError, TALError, I18NError |
| 1049 | 24 |
| 25 BOOLEAN_HTML_ATTRS = [ | |
| 26 # List of Boolean attributes in HTML that may be given in | |
| 27 # minimized form (e.g. <img ismap> rather than <img ismap="">) | |
| 28 # From http://www.w3.org/TR/xhtml1/#guidelines (C.10) | |
| 29 "compact", "nowrap", "ismap", "declare", "noshade", "checked", | |
| 30 "disabled", "readonly", "multiple", "selected", "noresize", | |
| 31 "defer" | |
| 32 ] | |
| 33 | |
| 34 EMPTY_HTML_TAGS = [ | |
| 35 # List of HTML tags with an empty content model; these are | |
| 36 # rendered in minimized form, e.g. <img />. | |
| 37 # From http://www.w3.org/TR/xhtml1/#dtds | |
| 38 "base", "meta", "link", "hr", "br", "param", "img", "area", | |
| 39 "input", "col", "basefont", "isindex", "frame", | |
| 40 ] | |
| 41 | |
| 42 PARA_LEVEL_HTML_TAGS = [ | |
| 43 # List of HTML elements that close open paragraph-level elements | |
| 44 # and are themselves paragraph-level. | |
| 45 "h1", "h2", "h3", "h4", "h5", "h6", "p", | |
| 46 ] | |
| 47 | |
| 48 BLOCK_CLOSING_TAG_MAP = { | |
| 49 "tr": ("tr", "td", "th"), | |
| 50 "td": ("td", "th"), | |
| 51 "th": ("td", "th"), | |
| 52 "li": ("li",), | |
| 53 "dd": ("dd", "dt"), | |
| 54 "dt": ("dd", "dt"), | |
| 55 } | |
| 56 | |
| 57 BLOCK_LEVEL_HTML_TAGS = [ | |
| 58 # List of HTML tags that denote larger sections than paragraphs. | |
| 59 "blockquote", "table", "tr", "th", "td", "thead", "tfoot", "tbody", | |
| 60 "noframe", "ul", "ol", "li", "dl", "dt", "dd", "div", | |
| 61 ] | |
| 62 | |
| 63 TIGHTEN_IMPLICIT_CLOSE_TAGS = (PARA_LEVEL_HTML_TAGS | |
| 64 + BLOCK_CLOSING_TAG_MAP.keys()) | |
| 65 | |
| 66 | |
| 67 class NestingError(HTMLParseError): | |
| 68 """Exception raised when elements aren't properly nested.""" | |
| 69 | |
| 70 def __init__(self, tagstack, endtag, position=(None, None)): | |
| 71 self.endtag = endtag | |
| 72 if tagstack: | |
| 73 if len(tagstack) == 1: | |
| 74 msg = ('Open tag <%s> does not match close tag </%s>' | |
| 75 % (tagstack[0], endtag)) | |
| 76 else: | |
| 77 msg = ('Open tags <%s> do not match close tag </%s>' | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
78 % ('>, <'.join(tagstack), endtag)) |
| 1049 | 79 else: |
| 80 msg = 'No tags are open to match </%s>' % endtag | |
| 81 HTMLParseError.__init__(self, msg, position) | |
| 82 | |
| 83 class EmptyTagError(NestingError): | |
| 84 """Exception raised when empty elements have an end tag.""" | |
| 85 | |
| 86 def __init__(self, tag, position=(None, None)): | |
| 87 self.tag = tag | |
| 88 msg = 'Close tag </%s> should be removed' % tag | |
| 89 HTMLParseError.__init__(self, msg, position) | |
| 90 | |
| 91 class OpenTagError(NestingError): | |
| 92 """Exception raised when a tag is not allowed in another tag.""" | |
| 93 | |
| 94 def __init__(self, tagstack, tag, position=(None, None)): | |
| 95 self.tag = tag | |
| 96 msg = 'Tag <%s> is not allowed in <%s>' % (tag, tagstack[-1]) | |
| 97 HTMLParseError.__init__(self, msg, position) | |
| 98 | |
| 99 class HTMLTALParser(HTMLParser): | |
| 100 | |
| 101 # External API | |
| 102 | |
| 103 def __init__(self, gen=None): | |
| 104 HTMLParser.__init__(self) | |
| 105 if gen is None: | |
| 106 gen = TALGenerator(xml=0) | |
| 107 self.gen = gen | |
| 108 self.tagstack = [] | |
| 109 self.nsstack = [] | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
110 self.nsdict = {'tal': ZOPE_TAL_NS, |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
111 'metal': ZOPE_METAL_NS, |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
112 'i18n': ZOPE_I18N_NS, |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
113 } |
| 1049 | 114 |
| 115 def parseFile(self, file): | |
| 116 f = open(file) | |
| 117 data = f.read() | |
| 118 f.close() | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
119 try: |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
120 self.parseString(data) |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
121 except TALError, e: |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
122 e.setFile(file) |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
123 raise |
| 1049 | 124 |
| 125 def parseString(self, data): | |
| 126 self.feed(data) | |
| 127 self.close() | |
| 128 while self.tagstack: | |
| 129 self.implied_endtag(self.tagstack[-1], 2) | |
| 130 assert self.nsstack == [], self.nsstack | |
| 131 | |
| 132 def getCode(self): | |
| 133 return self.gen.getCode() | |
| 134 | |
| 135 def getWarnings(self): | |
| 136 return () | |
| 137 | |
| 138 # Overriding HTMLParser methods | |
| 139 | |
| 140 def handle_starttag(self, tag, attrs): | |
| 141 self.close_para_tags(tag) | |
| 142 self.scan_xmlns(attrs) | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
143 tag, attrlist, taldict, metaldict, i18ndict \ |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
144 = self.process_ns(tag, attrs) |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
145 if tag in EMPTY_HTML_TAGS and taldict.get("content"): |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
146 raise TALError( |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
147 "empty HTML tags cannot use tal:content: %s" % `tag`, |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
148 self.getpos()) |
| 1049 | 149 self.tagstack.append(tag) |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
150 self.gen.emitStartElement(tag, attrlist, taldict, metaldict, i18ndict, |
| 1049 | 151 self.getpos()) |
| 152 if tag in EMPTY_HTML_TAGS: | |
| 153 self.implied_endtag(tag, -1) | |
| 154 | |
| 155 def handle_startendtag(self, tag, attrs): | |
| 156 self.close_para_tags(tag) | |
| 157 self.scan_xmlns(attrs) | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
158 tag, attrlist, taldict, metaldict, i18ndict \ |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
159 = self.process_ns(tag, attrs) |
| 1049 | 160 if taldict.get("content"): |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
161 if tag in EMPTY_HTML_TAGS: |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
162 raise TALError( |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
163 "empty HTML tags cannot use tal:content: %s" % `tag`, |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
164 self.getpos()) |
| 1049 | 165 self.gen.emitStartElement(tag, attrlist, taldict, metaldict, |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
166 i18ndict, self.getpos()) |
| 1049 | 167 self.gen.emitEndElement(tag, implied=-1) |
| 168 else: | |
| 169 self.gen.emitStartElement(tag, attrlist, taldict, metaldict, | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
170 i18ndict, self.getpos(), isend=1) |
| 1049 | 171 self.pop_xmlns() |
| 172 | |
| 173 def handle_endtag(self, tag): | |
| 174 if tag in EMPTY_HTML_TAGS: | |
| 175 # </img> etc. in the source is an error | |
| 176 raise EmptyTagError(tag, self.getpos()) | |
| 177 self.close_enclosed_tags(tag) | |
| 178 self.gen.emitEndElement(tag) | |
| 179 self.pop_xmlns() | |
| 180 self.tagstack.pop() | |
| 181 | |
| 182 def close_para_tags(self, tag): | |
| 183 if tag in EMPTY_HTML_TAGS: | |
| 184 return | |
| 185 close_to = -1 | |
| 186 if BLOCK_CLOSING_TAG_MAP.has_key(tag): | |
| 187 blocks_to_close = BLOCK_CLOSING_TAG_MAP[tag] | |
| 188 for i in range(len(self.tagstack)): | |
| 189 t = self.tagstack[i] | |
| 190 if t in blocks_to_close: | |
| 191 if close_to == -1: | |
| 192 close_to = i | |
| 193 elif t in BLOCK_LEVEL_HTML_TAGS: | |
| 194 close_to = -1 | |
| 195 elif tag in PARA_LEVEL_HTML_TAGS + BLOCK_LEVEL_HTML_TAGS: | |
| 196 i = len(self.tagstack) - 1 | |
| 197 while i >= 0: | |
| 198 closetag = self.tagstack[i] | |
| 199 if closetag in BLOCK_LEVEL_HTML_TAGS: | |
| 200 break | |
| 201 if closetag in PARA_LEVEL_HTML_TAGS: | |
| 202 if closetag != "p": | |
| 203 raise OpenTagError(self.tagstack, tag, self.getpos()) | |
| 204 close_to = i | |
| 205 i = i - 1 | |
| 206 if close_to >= 0: | |
| 207 while len(self.tagstack) > close_to: | |
| 208 self.implied_endtag(self.tagstack[-1], 1) | |
| 209 | |
| 210 def close_enclosed_tags(self, tag): | |
| 211 if tag not in self.tagstack: | |
| 212 raise NestingError(self.tagstack, tag, self.getpos()) | |
| 213 while tag != self.tagstack[-1]: | |
| 214 self.implied_endtag(self.tagstack[-1], 1) | |
| 215 assert self.tagstack[-1] == tag | |
| 216 | |
| 217 def implied_endtag(self, tag, implied): | |
| 218 assert tag == self.tagstack[-1] | |
| 219 assert implied in (-1, 1, 2) | |
| 220 isend = (implied < 0) | |
| 221 if tag in TIGHTEN_IMPLICIT_CLOSE_TAGS: | |
| 222 # Pick out trailing whitespace from the program, and | |
| 223 # insert the close tag before the whitespace. | |
| 224 white = self.gen.unEmitWhitespace() | |
| 225 else: | |
| 226 white = None | |
| 227 self.gen.emitEndElement(tag, isend=isend, implied=implied) | |
| 228 if white: | |
| 229 self.gen.emitRawText(white) | |
| 230 self.tagstack.pop() | |
| 231 self.pop_xmlns() | |
| 232 | |
| 233 def handle_charref(self, name): | |
| 234 self.gen.emitRawText("&#%s;" % name) | |
| 235 | |
| 236 def handle_entityref(self, name): | |
| 237 self.gen.emitRawText("&%s;" % name) | |
| 238 | |
| 239 def handle_data(self, data): | |
| 240 self.gen.emitRawText(data) | |
| 241 | |
| 242 def handle_comment(self, data): | |
| 243 self.gen.emitRawText("<!--%s-->" % data) | |
| 244 | |
| 245 def handle_decl(self, data): | |
| 246 self.gen.emitRawText("<!%s>" % data) | |
| 247 | |
| 248 def handle_pi(self, data): | |
| 249 self.gen.emitRawText("<?%s>" % data) | |
| 250 | |
| 251 # Internal thingies | |
| 252 | |
| 253 def scan_xmlns(self, attrs): | |
| 254 nsnew = {} | |
| 255 for key, value in attrs: | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
256 if key.startswith("xmlns:"): |
| 1049 | 257 nsnew[key[6:]] = value |
| 258 if nsnew: | |
| 259 self.nsstack.append(self.nsdict) | |
| 260 self.nsdict = self.nsdict.copy() | |
| 261 self.nsdict.update(nsnew) | |
| 262 else: | |
| 263 self.nsstack.append(self.nsdict) | |
| 264 | |
| 265 def pop_xmlns(self): | |
| 266 self.nsdict = self.nsstack.pop() | |
| 267 | |
| 268 def fixname(self, name): | |
| 269 if ':' in name: | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
270 prefix, suffix = name.split(':', 1) |
| 1049 | 271 if prefix == 'xmlns': |
| 272 nsuri = self.nsdict.get(suffix) | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
273 if nsuri in (ZOPE_TAL_NS, ZOPE_METAL_NS, ZOPE_I18N_NS): |
| 1049 | 274 return name, name, prefix |
| 275 else: | |
| 276 nsuri = self.nsdict.get(prefix) | |
| 277 if nsuri == ZOPE_TAL_NS: | |
| 278 return name, suffix, 'tal' | |
| 279 elif nsuri == ZOPE_METAL_NS: | |
| 280 return name, suffix, 'metal' | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
281 elif nsuri == ZOPE_I18N_NS: |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
282 return name, suffix, 'i18n' |
| 1049 | 283 return name, name, 0 |
| 284 | |
| 285 def process_ns(self, name, attrs): | |
| 286 attrlist = [] | |
| 287 taldict = {} | |
| 288 metaldict = {} | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
289 i18ndict = {} |
| 1049 | 290 name, namebase, namens = self.fixname(name) |
| 291 for item in attrs: | |
| 292 key, value = item | |
| 293 key, keybase, keyns = self.fixname(key) | |
| 294 ns = keyns or namens # default to tag namespace | |
| 295 if ns and ns != 'unknown': | |
| 296 item = (key, value, ns) | |
| 297 if ns == 'tal': | |
| 298 if taldict.has_key(keybase): | |
| 299 raise TALError("duplicate TAL attribute " + | |
| 300 `keybase`, self.getpos()) | |
| 301 taldict[keybase] = value | |
| 302 elif ns == 'metal': | |
| 303 if metaldict.has_key(keybase): | |
| 304 raise METALError("duplicate METAL attribute " + | |
| 305 `keybase`, self.getpos()) | |
| 306 metaldict[keybase] = value | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
307 elif ns == 'i18n': |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
308 if i18ndict.has_key(keybase): |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
309 raise I18NError("duplicate i18n attribute " + |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
310 `keybase`, self.getpos()) |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
311 i18ndict[keybase] = value |
| 1049 | 312 attrlist.append(item) |
| 313 if namens in ('metal', 'tal'): | |
| 314 taldict['tal tag'] = namens | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
315 return name, attrlist, taldict, metaldict, i18ndict |
