annotate roundup/cgi/TAL/markupbase.py @ 2119:cc4667ef3f12

Added the ability to toggle where error messages go. They either go to the user (default, for backwards compatibility), the dispatcher, or both. These are able to be toggled via settings in config.py. Please refer to upgrading.txt for more details. (And Richard, let me know if I've done anything wrong with this checkin. :))
author Eddie Parker <eparker@users.sourceforge.net>
date Thu, 25 Mar 2004 19:27:15 +0000
parents fc52d57c6c3e
children 8c2402a78bb0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2005
fc52d57c6c3e documentation cleanup
Richard Jones <richard@users.sourceforge.net>
parents: 1049
diff changeset
1 """Shared support for scanning document type declarations in HTML and XHTML.
fc52d57c6c3e documentation cleanup
Richard Jones <richard@users.sourceforge.net>
parents: 1049
diff changeset
2 """
fc52d57c6c3e documentation cleanup
Richard Jones <richard@users.sourceforge.net>
parents: 1049
diff changeset
3 __docformat__ = 'restructuredtext'
1049
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
4
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
5 import re
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
6 import string
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
7
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
8 _declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
9 _declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
10
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
11 del re
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
12
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
13
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
14 class ParserBase:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
15 """Parser base class which provides some common support methods used
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
16 by the SGML/HTML and XHTML parsers."""
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
17
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
18 def reset(self):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
19 self.lineno = 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
20 self.offset = 0
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
21
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
22 def getpos(self):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
23 """Return current line number and offset."""
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
24 return self.lineno, self.offset
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
25
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
26 # Internal -- update line number and offset. This should be
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
27 # called for each piece of data exactly once, in order -- in other
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
28 # words the concatenation of all the input strings to this
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
29 # function should be exactly the entire input.
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
30 def updatepos(self, i, j):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
31 if i >= j:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
32 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
33 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
34 nlines = string.count(rawdata, "\n", i, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
35 if nlines:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
36 self.lineno = self.lineno + nlines
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
37 pos = string.rindex(rawdata, "\n", i, j) # Should not fail
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
38 self.offset = j-(pos+1)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
39 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
40 self.offset = self.offset + j-i
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
41 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
42
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
43 _decl_otherchars = ''
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
44
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
45 # Internal -- parse declaration (for use by subclasses).
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
46 def parse_declaration(self, i):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
47 # This is some sort of declaration; in "HTML as
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
48 # deployed," this should only be the document type
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
49 # declaration ("<!DOCTYPE html...>").
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
50 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
51 import sys
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
52 j = i + 2
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
53 assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
54 if rawdata[j:j+1] in ("-", ""):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
55 # Start of comment followed by buffer boundary,
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
56 # or just a buffer boundary.
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
57 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
58 # in practice, this should look like: ((name|stringlit) S*)+ '>'
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
59 n = len(rawdata)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
60 decltype, j = self._scan_name(j, i)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
61 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
62 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
63 if decltype == "doctype":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
64 self._decl_otherchars = ''
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
65 while j < n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
66 c = rawdata[j]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
67 if c == ">":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
68 # end of declaration syntax
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
69 data = rawdata[i+2:j]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
70 if decltype == "doctype":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
71 self.handle_decl(data)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
72 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
73 self.unknown_decl(data)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
74 return j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
75 if c in "\"'":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
76 m = _declstringlit_match(rawdata, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
77 if not m:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
78 return -1 # incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
79 j = m.end()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
80 elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
81 name, j = self._scan_name(j, i)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
82 elif c in self._decl_otherchars:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
83 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
84 elif c == "[":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
85 if decltype == "doctype":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
86 j = self._parse_doctype_subset(j + 1, i)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
87 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
88 self.error("unexpected '[' char in declaration")
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
89 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
90 self.error(
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
91 "unexpected %s char in declaration" % `rawdata[j]`)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
92 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
93 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
94 return -1 # incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
95
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
96 # Internal -- scan past the internal subset in a <!DOCTYPE declaration,
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
97 # returning the index just past any whitespace following the trailing ']'.
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
98 def _parse_doctype_subset(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
99 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
100 n = len(rawdata)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
101 j = i
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
102 while j < n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
103 c = rawdata[j]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
104 if c == "<":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
105 s = rawdata[j:j+2]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
106 if s == "<":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
107 # end of buffer; incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
108 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
109 if s != "<!":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
110 self.updatepos(declstartpos, j + 1)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
111 self.error("unexpected char in internal subset (in %s)"
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
112 % `s`)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
113 if (j + 2) == n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
114 # end of buffer; incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
115 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
116 if (j + 4) > n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
117 # end of buffer; incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
118 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
119 if rawdata[j:j+4] == "<!--":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
120 j = self.parse_comment(j, report=0)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
121 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
122 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
123 continue
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
124 name, j = self._scan_name(j + 2, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
125 if j == -1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
126 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
127 if name not in ("attlist", "element", "entity", "notation"):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
128 self.updatepos(declstartpos, j + 2)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
129 self.error(
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
130 "unknown declaration %s in internal subset" % `name`)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
131 # handle the individual names
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
132 meth = getattr(self, "_parse_doctype_" + name)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
133 j = meth(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
134 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
135 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
136 elif c == "%":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
137 # parameter entity reference
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
138 if (j + 1) == n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
139 # end of buffer; incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
140 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
141 s, j = self._scan_name(j + 1, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
142 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
143 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
144 if rawdata[j] == ";":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
145 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
146 elif c == "]":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
147 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
148 while j < n and rawdata[j] in string.whitespace:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
149 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
150 if j < n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
151 if rawdata[j] == ">":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
152 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
153 self.updatepos(declstartpos, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
154 self.error("unexpected char after internal subset")
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
155 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
156 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
157 elif c in string.whitespace:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
158 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
159 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
160 self.updatepos(declstartpos, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
161 self.error("unexpected char %s in internal subset" % `c`)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
162 # end of buffer reached
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
163 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
164
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
165 # Internal -- scan past <!ELEMENT declarations
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
166 def _parse_doctype_element(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
167 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
168 n = len(rawdata)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
169 name, j = self._scan_name(i, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
170 if j == -1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
171 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
172 # style content model; just skip until '>'
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
173 if '>' in rawdata[j:]:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
174 return string.find(rawdata, ">", j) + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
175 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
176
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
177 # Internal -- scan past <!ATTLIST declarations
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
178 def _parse_doctype_attlist(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
179 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
180 name, j = self._scan_name(i, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
181 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
182 if c == "":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
183 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
184 if c == ">":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
185 return j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
186 while 1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
187 # scan a series of attribute descriptions; simplified:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
188 # name type [value] [#constraint]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
189 name, j = self._scan_name(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
190 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
191 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
192 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
193 if c == "":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
194 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
195 if c == "(":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
196 # an enumerated type; look for ')'
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
197 if ")" in rawdata[j:]:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
198 j = string.find(rawdata, ")", j) + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
199 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
200 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
201 while rawdata[j:j+1] in string.whitespace:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
202 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
203 if not rawdata[j:]:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
204 # end of buffer, incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
205 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
206 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
207 name, j = self._scan_name(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
208 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
209 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
210 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
211 if c in "'\"":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
212 m = _declstringlit_match(rawdata, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
213 if m:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
214 j = m.end()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
215 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
216 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
217 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
218 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
219 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
220 if c == "#":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
221 if rawdata[j:] == "#":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
222 # end of buffer
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
223 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
224 name, j = self._scan_name(j + 1, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
225 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
226 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
227 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
228 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
229 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
230 if c == '>':
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
231 # all done
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
232 return j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
233
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
234 # Internal -- scan past <!NOTATION declarations
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
235 def _parse_doctype_notation(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
236 name, j = self._scan_name(i, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
237 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
238 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
239 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
240 while 1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
241 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
242 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
243 # end of buffer; incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
244 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
245 if c == '>':
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
246 return j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
247 if c in "'\"":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
248 m = _declstringlit_match(rawdata, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
249 if not m:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
250 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
251 j = m.end()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
252 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
253 name, j = self._scan_name(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
254 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
255 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
256
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
257 # Internal -- scan past <!ENTITY declarations
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
258 def _parse_doctype_entity(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
259 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
260 if rawdata[i:i+1] == "%":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
261 j = i + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
262 while 1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
263 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
264 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
265 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
266 if c in string.whitespace:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
267 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
268 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
269 break
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
270 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
271 j = i
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
272 name, j = self._scan_name(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
273 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
274 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
275 while 1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
276 c = self.rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
277 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
278 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
279 if c in "'\"":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
280 m = _declstringlit_match(rawdata, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
281 if m:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
282 j = m.end()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
283 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
284 return -1 # incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
285 elif c == ">":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
286 return j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
287 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
288 name, j = self._scan_name(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
289 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
290 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
291
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
292 # Internal -- scan a name token and the new position and the token, or
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
293 # return -1 if we've reached the end of the buffer.
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
294 def _scan_name(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
295 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
296 n = len(rawdata)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
297 if i == n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
298 return None, -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
299 m = _declname_match(rawdata, i)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
300 if m:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
301 s = m.group()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
302 name = string.strip(s)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
303 if (i + len(s)) == n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
304 return None, -1 # end of buffer
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
305 return string.lower(name), m.end()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
306 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
307 self.updatepos(declstartpos, i)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
308 self.error("expected name token", self.getpos())

Roundup Issue Tracker: http://roundup-tracker.org/