annotate TAL/HTMLTALParser.py @ 996:03cc9a57cb4c

missed one
author Richard Jones <richard@users.sourceforge.net>
date Fri, 30 Aug 2002 08:46:22 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
996
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
1 ##############################################################################
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
2 #
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
3 # Copyright (c) 2001, 2002 Zope Corporation and Contributors.
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
4 # All Rights Reserved.
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
5 #
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
6 # This software is subject to the provisions of the Zope Public License,
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
7 # Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
8 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
9 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
10 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
11 # FOR A PARTICULAR PURPOSE
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
12 #
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
13 ##############################################################################
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
14 """
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
15 Parse HTML and compile to TALInterpreter intermediate code.
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
16 """
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
17
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
18 import sys
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
19 import string
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
20
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
21 from TALGenerator import TALGenerator
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
22 from TALDefs import ZOPE_METAL_NS, ZOPE_TAL_NS, METALError, TALError
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
23 from HTMLParser import HTMLParser, HTMLParseError
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
24
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
25 BOOLEAN_HTML_ATTRS = [
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
26 # List of Boolean attributes in HTML that may be given in
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
27 # minimized form (e.g. <img ismap> rather than <img ismap="">)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
28 # From http://www.w3.org/TR/xhtml1/#guidelines (C.10)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
29 "compact", "nowrap", "ismap", "declare", "noshade", "checked",
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
30 "disabled", "readonly", "multiple", "selected", "noresize",
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
31 "defer"
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
32 ]
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
33
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
34 EMPTY_HTML_TAGS = [
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
35 # List of HTML tags with an empty content model; these are
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
36 # rendered in minimized form, e.g. <img />.
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
37 # From http://www.w3.org/TR/xhtml1/#dtds
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
38 "base", "meta", "link", "hr", "br", "param", "img", "area",
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
39 "input", "col", "basefont", "isindex", "frame",
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
40 ]
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
41
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
42 PARA_LEVEL_HTML_TAGS = [
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
43 # List of HTML elements that close open paragraph-level elements
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
44 # and are themselves paragraph-level.
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
45 "h1", "h2", "h3", "h4", "h5", "h6", "p",
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
46 ]
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
47
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
48 BLOCK_CLOSING_TAG_MAP = {
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
49 "tr": ("tr", "td", "th"),
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
50 "td": ("td", "th"),
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
51 "th": ("td", "th"),
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
52 "li": ("li",),
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
53 "dd": ("dd", "dt"),
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
54 "dt": ("dd", "dt"),
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
55 }
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
56
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
57 BLOCK_LEVEL_HTML_TAGS = [
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
58 # List of HTML tags that denote larger sections than paragraphs.
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
59 "blockquote", "table", "tr", "th", "td", "thead", "tfoot", "tbody",
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
60 "noframe", "ul", "ol", "li", "dl", "dt", "dd", "div",
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
61 ]
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
62
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
63 TIGHTEN_IMPLICIT_CLOSE_TAGS = (PARA_LEVEL_HTML_TAGS
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
64 + BLOCK_CLOSING_TAG_MAP.keys())
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
65
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
66
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
67 class NestingError(HTMLParseError):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
68 """Exception raised when elements aren't properly nested."""
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
69
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
70 def __init__(self, tagstack, endtag, position=(None, None)):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
71 self.endtag = endtag
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
72 if tagstack:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
73 if len(tagstack) == 1:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
74 msg = ('Open tag <%s> does not match close tag </%s>'
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
75 % (tagstack[0], endtag))
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
76 else:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
77 msg = ('Open tags <%s> do not match close tag </%s>'
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
78 % (string.join(tagstack, '>, <'), endtag))
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
79 else:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
80 msg = 'No tags are open to match </%s>' % endtag
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
81 HTMLParseError.__init__(self, msg, position)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
82
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
83 class EmptyTagError(NestingError):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
84 """Exception raised when empty elements have an end tag."""
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
85
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
86 def __init__(self, tag, position=(None, None)):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
87 self.tag = tag
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
88 msg = 'Close tag </%s> should be removed' % tag
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
89 HTMLParseError.__init__(self, msg, position)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
90
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
91 class OpenTagError(NestingError):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
92 """Exception raised when a tag is not allowed in another tag."""
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
93
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
94 def __init__(self, tagstack, tag, position=(None, None)):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
95 self.tag = tag
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
96 msg = 'Tag <%s> is not allowed in <%s>' % (tag, tagstack[-1])
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
97 HTMLParseError.__init__(self, msg, position)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
98
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
99 class HTMLTALParser(HTMLParser):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
100
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
101 # External API
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
102
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
103 def __init__(self, gen=None):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
104 HTMLParser.__init__(self)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
105 if gen is None:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
106 gen = TALGenerator(xml=0)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
107 self.gen = gen
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
108 self.tagstack = []
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
109 self.nsstack = []
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
110 self.nsdict = {'tal': ZOPE_TAL_NS, 'metal': ZOPE_METAL_NS}
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
111
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
112 def parseFile(self, file):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
113 f = open(file)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
114 data = f.read()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
115 f.close()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
116 self.parseString(data)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
117
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
118 def parseString(self, data):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
119 self.feed(data)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
120 self.close()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
121 while self.tagstack:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
122 self.implied_endtag(self.tagstack[-1], 2)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
123 assert self.nsstack == [], self.nsstack
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
124
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
125 def getCode(self):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
126 return self.gen.getCode()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
127
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
128 def getWarnings(self):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
129 return ()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
130
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
131 # Overriding HTMLParser methods
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
132
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
133 def handle_starttag(self, tag, attrs):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
134 self.close_para_tags(tag)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
135 self.scan_xmlns(attrs)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
136 tag, attrlist, taldict, metaldict = self.process_ns(tag, attrs)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
137 self.tagstack.append(tag)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
138 self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
139 self.getpos())
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
140 if tag in EMPTY_HTML_TAGS:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
141 self.implied_endtag(tag, -1)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
142
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
143 def handle_startendtag(self, tag, attrs):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
144 self.close_para_tags(tag)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
145 self.scan_xmlns(attrs)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
146 tag, attrlist, taldict, metaldict = self.process_ns(tag, attrs)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
147 if taldict.get("content"):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
148 self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
149 self.getpos())
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
150 self.gen.emitEndElement(tag, implied=-1)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
151 else:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
152 self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
153 self.getpos(), isend=1)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
154 self.pop_xmlns()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
155
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
156 def handle_endtag(self, tag):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
157 if tag in EMPTY_HTML_TAGS:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
158 # </img> etc. in the source is an error
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
159 raise EmptyTagError(tag, self.getpos())
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
160 self.close_enclosed_tags(tag)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
161 self.gen.emitEndElement(tag)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
162 self.pop_xmlns()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
163 self.tagstack.pop()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
164
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
165 def close_para_tags(self, tag):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
166 if tag in EMPTY_HTML_TAGS:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
167 return
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
168 close_to = -1
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
169 if BLOCK_CLOSING_TAG_MAP.has_key(tag):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
170 blocks_to_close = BLOCK_CLOSING_TAG_MAP[tag]
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
171 for i in range(len(self.tagstack)):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
172 t = self.tagstack[i]
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
173 if t in blocks_to_close:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
174 if close_to == -1:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
175 close_to = i
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
176 elif t in BLOCK_LEVEL_HTML_TAGS:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
177 close_to = -1
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
178 elif tag in PARA_LEVEL_HTML_TAGS + BLOCK_LEVEL_HTML_TAGS:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
179 i = len(self.tagstack) - 1
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
180 while i >= 0:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
181 closetag = self.tagstack[i]
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
182 if closetag in BLOCK_LEVEL_HTML_TAGS:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
183 break
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
184 if closetag in PARA_LEVEL_HTML_TAGS:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
185 if closetag != "p":
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
186 raise OpenTagError(self.tagstack, tag, self.getpos())
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
187 close_to = i
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
188 i = i - 1
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
189 if close_to >= 0:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
190 while len(self.tagstack) > close_to:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
191 self.implied_endtag(self.tagstack[-1], 1)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
192
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
193 def close_enclosed_tags(self, tag):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
194 if tag not in self.tagstack:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
195 raise NestingError(self.tagstack, tag, self.getpos())
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
196 while tag != self.tagstack[-1]:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
197 self.implied_endtag(self.tagstack[-1], 1)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
198 assert self.tagstack[-1] == tag
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
199
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
200 def implied_endtag(self, tag, implied):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
201 assert tag == self.tagstack[-1]
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
202 assert implied in (-1, 1, 2)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
203 isend = (implied < 0)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
204 if tag in TIGHTEN_IMPLICIT_CLOSE_TAGS:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
205 # Pick out trailing whitespace from the program, and
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
206 # insert the close tag before the whitespace.
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
207 white = self.gen.unEmitWhitespace()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
208 else:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
209 white = None
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
210 self.gen.emitEndElement(tag, isend=isend, implied=implied)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
211 if white:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
212 self.gen.emitRawText(white)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
213 self.tagstack.pop()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
214 self.pop_xmlns()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
215
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
216 def handle_charref(self, name):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
217 self.gen.emitRawText("&#%s;" % name)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
218
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
219 def handle_entityref(self, name):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
220 self.gen.emitRawText("&%s;" % name)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
221
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
222 def handle_data(self, data):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
223 self.gen.emitRawText(data)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
224
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
225 def handle_comment(self, data):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
226 self.gen.emitRawText("<!--%s-->" % data)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
227
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
228 def handle_decl(self, data):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
229 self.gen.emitRawText("<!%s>" % data)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
230
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
231 def handle_pi(self, data):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
232 self.gen.emitRawText("<?%s>" % data)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
233
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
234 # Internal thingies
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
235
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
236 def scan_xmlns(self, attrs):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
237 nsnew = {}
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
238 for key, value in attrs:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
239 if key[:6] == "xmlns:":
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
240 nsnew[key[6:]] = value
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
241 if nsnew:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
242 self.nsstack.append(self.nsdict)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
243 self.nsdict = self.nsdict.copy()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
244 self.nsdict.update(nsnew)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
245 else:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
246 self.nsstack.append(self.nsdict)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
247
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
248 def pop_xmlns(self):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
249 self.nsdict = self.nsstack.pop()
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
250
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
251 def fixname(self, name):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
252 if ':' in name:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
253 prefix, suffix = string.split(name, ':', 1)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
254 if prefix == 'xmlns':
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
255 nsuri = self.nsdict.get(suffix)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
256 if nsuri in (ZOPE_TAL_NS, ZOPE_METAL_NS):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
257 return name, name, prefix
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
258 else:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
259 nsuri = self.nsdict.get(prefix)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
260 if nsuri == ZOPE_TAL_NS:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
261 return name, suffix, 'tal'
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
262 elif nsuri == ZOPE_METAL_NS:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
263 return name, suffix, 'metal'
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
264 return name, name, 0
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
265
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
266 def process_ns(self, name, attrs):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
267 attrlist = []
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
268 taldict = {}
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
269 metaldict = {}
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
270 name, namebase, namens = self.fixname(name)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
271 for item in attrs:
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
272 key, value = item
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
273 key, keybase, keyns = self.fixname(key)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
274 ns = keyns or namens # default to tag namespace
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
275 if ns and ns != 'unknown':
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
276 item = (key, value, ns)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
277 if ns == 'tal':
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
278 if taldict.has_key(keybase):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
279 raise TALError("duplicate TAL attribute " +
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
280 `keybase`, self.getpos())
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
281 taldict[keybase] = value
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
282 elif ns == 'metal':
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
283 if metaldict.has_key(keybase):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
284 raise METALError("duplicate METAL attribute " +
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
285 `keybase`, self.getpos())
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
286 metaldict[keybase] = value
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
287 attrlist.append(item)
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
288 if namens in ('metal', 'tal'):
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
289 taldict['tal tag'] = namens
03cc9a57cb4c missed one
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
290 return name, attrlist, taldict, metaldict

Roundup Issue Tracker: http://roundup-tracker.org/