1049
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
1 ##############################################################################
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
2 #
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
3 # Copyright (c) 2001, 2002 Zope Corporation and Contributors.
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
4 # All Rights Reserved.
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
5 #
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
6 # This software is subject to the provisions of the Zope Public License,
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
7 # Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
8 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
9 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
10 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
11 # FOR A PARTICULAR PURPOSE
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
12 #
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
13 ##############################################################################
|
|
2005
|
14 """Parse HTML and compile to TALInterpreter intermediate code.
|
1049
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
15 """
|
|
2005
|
16 __docformat__ = 'restructuredtext'
|
1049
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
17
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
18 import sys
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
19 import string
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
20
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
21 from TALGenerator import TALGenerator
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
22 from TALDefs import ZOPE_METAL_NS, ZOPE_TAL_NS, METALError, TALError
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
23 from HTMLParser import HTMLParser, HTMLParseError
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
24
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
25 BOOLEAN_HTML_ATTRS = [
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
26 # List of Boolean attributes in HTML that may be given in
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
27 # minimized form (e.g. <img ismap> rather than <img ismap="">)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
28 # From http://www.w3.org/TR/xhtml1/#guidelines (C.10)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
29 "compact", "nowrap", "ismap", "declare", "noshade", "checked",
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
30 "disabled", "readonly", "multiple", "selected", "noresize",
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
31 "defer"
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
32 ]
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
33
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
34 EMPTY_HTML_TAGS = [
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
35 # List of HTML tags with an empty content model; these are
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
36 # rendered in minimized form, e.g. <img />.
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
37 # From http://www.w3.org/TR/xhtml1/#dtds
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
38 "base", "meta", "link", "hr", "br", "param", "img", "area",
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
39 "input", "col", "basefont", "isindex", "frame",
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
40 ]
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
41
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
42 PARA_LEVEL_HTML_TAGS = [
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
43 # List of HTML elements that close open paragraph-level elements
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
44 # and are themselves paragraph-level.
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
45 "h1", "h2", "h3", "h4", "h5", "h6", "p",
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
46 ]
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
47
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
48 BLOCK_CLOSING_TAG_MAP = {
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
49 "tr": ("tr", "td", "th"),
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
50 "td": ("td", "th"),
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
51 "th": ("td", "th"),
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
52 "li": ("li",),
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
53 "dd": ("dd", "dt"),
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
54 "dt": ("dd", "dt"),
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
55 }
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
56
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
57 BLOCK_LEVEL_HTML_TAGS = [
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
58 # List of HTML tags that denote larger sections than paragraphs.
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
59 "blockquote", "table", "tr", "th", "td", "thead", "tfoot", "tbody",
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
60 "noframe", "ul", "ol", "li", "dl", "dt", "dd", "div",
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
61 ]
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
62
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
63 TIGHTEN_IMPLICIT_CLOSE_TAGS = (PARA_LEVEL_HTML_TAGS
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
64 + BLOCK_CLOSING_TAG_MAP.keys())
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
65
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
66
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
67 class NestingError(HTMLParseError):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
68 """Exception raised when elements aren't properly nested."""
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
69
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
70 def __init__(self, tagstack, endtag, position=(None, None)):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
71 self.endtag = endtag
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
72 if tagstack:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
73 if len(tagstack) == 1:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
74 msg = ('Open tag <%s> does not match close tag </%s>'
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
75 % (tagstack[0], endtag))
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
76 else:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
77 msg = ('Open tags <%s> do not match close tag </%s>'
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
78 % (string.join(tagstack, '>, <'), endtag))
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
79 else:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
80 msg = 'No tags are open to match </%s>' % endtag
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
81 HTMLParseError.__init__(self, msg, position)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
82
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
83 class EmptyTagError(NestingError):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
84 """Exception raised when empty elements have an end tag."""
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
85
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
86 def __init__(self, tag, position=(None, None)):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
87 self.tag = tag
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
88 msg = 'Close tag </%s> should be removed' % tag
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
89 HTMLParseError.__init__(self, msg, position)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
90
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
91 class OpenTagError(NestingError):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
92 """Exception raised when a tag is not allowed in another tag."""
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
93
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
94 def __init__(self, tagstack, tag, position=(None, None)):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
95 self.tag = tag
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
96 msg = 'Tag <%s> is not allowed in <%s>' % (tag, tagstack[-1])
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
97 HTMLParseError.__init__(self, msg, position)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
98
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
99 class HTMLTALParser(HTMLParser):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
100
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
101 # External API
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
102
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
103 def __init__(self, gen=None):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
104 HTMLParser.__init__(self)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
105 if gen is None:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
106 gen = TALGenerator(xml=0)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
107 self.gen = gen
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
108 self.tagstack = []
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
109 self.nsstack = []
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
110 self.nsdict = {'tal': ZOPE_TAL_NS, 'metal': ZOPE_METAL_NS}
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
111
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
112 def parseFile(self, file):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
113 f = open(file)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
114 data = f.read()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
115 f.close()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
116 self.parseString(data)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
117
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
118 def parseString(self, data):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
119 self.feed(data)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
120 self.close()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
121 while self.tagstack:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
122 self.implied_endtag(self.tagstack[-1], 2)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
123 assert self.nsstack == [], self.nsstack
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
124
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
125 def getCode(self):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
126 return self.gen.getCode()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
127
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
128 def getWarnings(self):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
129 return ()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
130
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
131 # Overriding HTMLParser methods
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
132
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
133 def handle_starttag(self, tag, attrs):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
134 self.close_para_tags(tag)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
135 self.scan_xmlns(attrs)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
136 tag, attrlist, taldict, metaldict = self.process_ns(tag, attrs)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
137 self.tagstack.append(tag)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
138 self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
139 self.getpos())
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
140 if tag in EMPTY_HTML_TAGS:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
141 self.implied_endtag(tag, -1)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
142
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
143 def handle_startendtag(self, tag, attrs):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
144 self.close_para_tags(tag)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
145 self.scan_xmlns(attrs)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
146 tag, attrlist, taldict, metaldict = self.process_ns(tag, attrs)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
147 if taldict.get("content"):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
148 self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
149 self.getpos())
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
150 self.gen.emitEndElement(tag, implied=-1)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
151 else:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
152 self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
153 self.getpos(), isend=1)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
154 self.pop_xmlns()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
155
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
156 def handle_endtag(self, tag):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
157 if tag in EMPTY_HTML_TAGS:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
158 # </img> etc. in the source is an error
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
159 raise EmptyTagError(tag, self.getpos())
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
160 self.close_enclosed_tags(tag)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
161 self.gen.emitEndElement(tag)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
162 self.pop_xmlns()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
163 self.tagstack.pop()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
164
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
165 def close_para_tags(self, tag):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
166 if tag in EMPTY_HTML_TAGS:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
167 return
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
168 close_to = -1
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
169 if BLOCK_CLOSING_TAG_MAP.has_key(tag):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
170 blocks_to_close = BLOCK_CLOSING_TAG_MAP[tag]
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
171 for i in range(len(self.tagstack)):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
172 t = self.tagstack[i]
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
173 if t in blocks_to_close:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
174 if close_to == -1:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
175 close_to = i
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
176 elif t in BLOCK_LEVEL_HTML_TAGS:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
177 close_to = -1
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
178 elif tag in PARA_LEVEL_HTML_TAGS + BLOCK_LEVEL_HTML_TAGS:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
179 i = len(self.tagstack) - 1
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
180 while i >= 0:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
181 closetag = self.tagstack[i]
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
182 if closetag in BLOCK_LEVEL_HTML_TAGS:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
183 break
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
184 if closetag in PARA_LEVEL_HTML_TAGS:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
185 if closetag != "p":
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
186 raise OpenTagError(self.tagstack, tag, self.getpos())
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
187 close_to = i
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
188 i = i - 1
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
189 if close_to >= 0:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
190 while len(self.tagstack) > close_to:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
191 self.implied_endtag(self.tagstack[-1], 1)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
192
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
193 def close_enclosed_tags(self, tag):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
194 if tag not in self.tagstack:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
195 raise NestingError(self.tagstack, tag, self.getpos())
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
196 while tag != self.tagstack[-1]:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
197 self.implied_endtag(self.tagstack[-1], 1)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
198 assert self.tagstack[-1] == tag
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
199
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
200 def implied_endtag(self, tag, implied):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
201 assert tag == self.tagstack[-1]
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
202 assert implied in (-1, 1, 2)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
203 isend = (implied < 0)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
204 if tag in TIGHTEN_IMPLICIT_CLOSE_TAGS:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
205 # Pick out trailing whitespace from the program, and
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
206 # insert the close tag before the whitespace.
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
207 white = self.gen.unEmitWhitespace()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
208 else:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
209 white = None
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
210 self.gen.emitEndElement(tag, isend=isend, implied=implied)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
211 if white:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
212 self.gen.emitRawText(white)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
213 self.tagstack.pop()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
214 self.pop_xmlns()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
215
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
216 def handle_charref(self, name):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
217 self.gen.emitRawText("&#%s;" % name)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
218
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
219 def handle_entityref(self, name):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
220 self.gen.emitRawText("&%s;" % name)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
221
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
222 def handle_data(self, data):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
223 self.gen.emitRawText(data)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
224
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
225 def handle_comment(self, data):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
226 self.gen.emitRawText("<!--%s-->" % data)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
227
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
228 def handle_decl(self, data):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
229 self.gen.emitRawText("<!%s>" % data)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
230
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
231 def handle_pi(self, data):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
232 self.gen.emitRawText("<?%s>" % data)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
233
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
234 # Internal thingies
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
235
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
236 def scan_xmlns(self, attrs):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
237 nsnew = {}
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
238 for key, value in attrs:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
239 if key[:6] == "xmlns:":
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
240 nsnew[key[6:]] = value
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
241 if nsnew:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
242 self.nsstack.append(self.nsdict)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
243 self.nsdict = self.nsdict.copy()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
244 self.nsdict.update(nsnew)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
245 else:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
246 self.nsstack.append(self.nsdict)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
247
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
248 def pop_xmlns(self):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
249 self.nsdict = self.nsstack.pop()
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
250
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
251 def fixname(self, name):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
252 if ':' in name:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
253 prefix, suffix = string.split(name, ':', 1)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
254 if prefix == 'xmlns':
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
255 nsuri = self.nsdict.get(suffix)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
256 if nsuri in (ZOPE_TAL_NS, ZOPE_METAL_NS):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
257 return name, name, prefix
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
258 else:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
259 nsuri = self.nsdict.get(prefix)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
260 if nsuri == ZOPE_TAL_NS:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
261 return name, suffix, 'tal'
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
262 elif nsuri == ZOPE_METAL_NS:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
263 return name, suffix, 'metal'
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
264 return name, name, 0
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
265
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
266 def process_ns(self, name, attrs):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
267 attrlist = []
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
268 taldict = {}
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
269 metaldict = {}
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
270 name, namebase, namens = self.fixname(name)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
271 for item in attrs:
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
272 key, value = item
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
273 key, keybase, keyns = self.fixname(key)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
274 ns = keyns or namens # default to tag namespace
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
275 if ns and ns != 'unknown':
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
276 item = (key, value, ns)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
277 if ns == 'tal':
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
278 if taldict.has_key(keybase):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
279 raise TALError("duplicate TAL attribute " +
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
280 `keybase`, self.getpos())
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
281 taldict[keybase] = value
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
282 elif ns == 'metal':
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
283 if metaldict.has_key(keybase):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
284 raise METALError("duplicate METAL attribute " +
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
285 `keybase`, self.getpos())
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
286 metaldict[keybase] = value
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
287 attrlist.append(item)
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
288 if namens in ('metal', 'tal'):
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
289 taldict['tal tag'] = namens
|
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
290 return name, attrlist, taldict, metaldict
|