annotate roundup/cgi/TAL/markupbase.py @ 2314:7c8d2e9a0566

fix DateHTMLProperty so local() can override user timezone [SF#953678]
author Richard Jones <richard@users.sourceforge.net>
date Sat, 15 May 2004 03:53:43 +0000
parents fc52d57c6c3e
children 8c2402a78bb0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2005
fc52d57c6c3e documentation cleanup
Richard Jones <richard@users.sourceforge.net>
parents: 1049
diff changeset
1 """Shared support for scanning document type declarations in HTML and XHTML.
fc52d57c6c3e documentation cleanup
Richard Jones <richard@users.sourceforge.net>
parents: 1049
diff changeset
2 """
fc52d57c6c3e documentation cleanup
Richard Jones <richard@users.sourceforge.net>
parents: 1049
diff changeset
3 __docformat__ = 'restructuredtext'
1049
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
4
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
5 import re
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
6 import string
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
7
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
8 _declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
9 _declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
10
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
11 del re
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
12
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
13
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
14 class ParserBase:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
15 """Parser base class which provides some common support methods used
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
16 by the SGML/HTML and XHTML parsers."""
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
17
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
18 def reset(self):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
19 self.lineno = 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
20 self.offset = 0
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
21
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
22 def getpos(self):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
23 """Return current line number and offset."""
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
24 return self.lineno, self.offset
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
25
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
26 # Internal -- update line number and offset. This should be
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
27 # called for each piece of data exactly once, in order -- in other
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
28 # words the concatenation of all the input strings to this
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
29 # function should be exactly the entire input.
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
30 def updatepos(self, i, j):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
31 if i >= j:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
32 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
33 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
34 nlines = string.count(rawdata, "\n", i, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
35 if nlines:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
36 self.lineno = self.lineno + nlines
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
37 pos = string.rindex(rawdata, "\n", i, j) # Should not fail
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
38 self.offset = j-(pos+1)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
39 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
40 self.offset = self.offset + j-i
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
41 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
42
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
43 _decl_otherchars = ''
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
44
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
45 # Internal -- parse declaration (for use by subclasses).
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
46 def parse_declaration(self, i):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
47 # This is some sort of declaration; in "HTML as
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
48 # deployed," this should only be the document type
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
49 # declaration ("<!DOCTYPE html...>").
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
50 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
51 import sys
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
52 j = i + 2
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
53 assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
54 if rawdata[j:j+1] in ("-", ""):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
55 # Start of comment followed by buffer boundary,
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
56 # or just a buffer boundary.
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
57 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
58 # in practice, this should look like: ((name|stringlit) S*)+ '>'
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
59 n = len(rawdata)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
60 decltype, j = self._scan_name(j, i)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
61 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
62 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
63 if decltype == "doctype":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
64 self._decl_otherchars = ''
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
65 while j < n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
66 c = rawdata[j]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
67 if c == ">":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
68 # end of declaration syntax
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
69 data = rawdata[i+2:j]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
70 if decltype == "doctype":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
71 self.handle_decl(data)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
72 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
73 self.unknown_decl(data)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
74 return j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
75 if c in "\"'":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
76 m = _declstringlit_match(rawdata, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
77 if not m:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
78 return -1 # incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
79 j = m.end()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
80 elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
81 name, j = self._scan_name(j, i)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
82 elif c in self._decl_otherchars:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
83 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
84 elif c == "[":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
85 if decltype == "doctype":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
86 j = self._parse_doctype_subset(j + 1, i)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
87 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
88 self.error("unexpected '[' char in declaration")
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
89 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
90 self.error(
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
91 "unexpected %s char in declaration" % `rawdata[j]`)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
92 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
93 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
94 return -1 # incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
95
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
96 # Internal -- scan past the internal subset in a <!DOCTYPE declaration,
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
97 # returning the index just past any whitespace following the trailing ']'.
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
98 def _parse_doctype_subset(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
99 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
100 n = len(rawdata)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
101 j = i
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
102 while j < n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
103 c = rawdata[j]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
104 if c == "<":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
105 s = rawdata[j:j+2]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
106 if s == "<":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
107 # end of buffer; incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
108 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
109 if s != "<!":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
110 self.updatepos(declstartpos, j + 1)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
111 self.error("unexpected char in internal subset (in %s)"
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
112 % `s`)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
113 if (j + 2) == n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
114 # end of buffer; incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
115 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
116 if (j + 4) > n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
117 # end of buffer; incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
118 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
119 if rawdata[j:j+4] == "<!--":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
120 j = self.parse_comment(j, report=0)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
121 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
122 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
123 continue
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
124 name, j = self._scan_name(j + 2, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
125 if j == -1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
126 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
127 if name not in ("attlist", "element", "entity", "notation"):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
128 self.updatepos(declstartpos, j + 2)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
129 self.error(
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
130 "unknown declaration %s in internal subset" % `name`)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
131 # handle the individual names
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
132 meth = getattr(self, "_parse_doctype_" + name)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
133 j = meth(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
134 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
135 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
136 elif c == "%":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
137 # parameter entity reference
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
138 if (j + 1) == n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
139 # end of buffer; incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
140 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
141 s, j = self._scan_name(j + 1, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
142 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
143 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
144 if rawdata[j] == ";":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
145 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
146 elif c == "]":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
147 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
148 while j < n and rawdata[j] in string.whitespace:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
149 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
150 if j < n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
151 if rawdata[j] == ">":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
152 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
153 self.updatepos(declstartpos, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
154 self.error("unexpected char after internal subset")
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
155 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
156 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
157 elif c in string.whitespace:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
158 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
159 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
160 self.updatepos(declstartpos, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
161 self.error("unexpected char %s in internal subset" % `c`)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
162 # end of buffer reached
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
163 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
164
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
165 # Internal -- scan past <!ELEMENT declarations
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
166 def _parse_doctype_element(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
167 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
168 n = len(rawdata)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
169 name, j = self._scan_name(i, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
170 if j == -1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
171 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
172 # style content model; just skip until '>'
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
173 if '>' in rawdata[j:]:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
174 return string.find(rawdata, ">", j) + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
175 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
176
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
177 # Internal -- scan past <!ATTLIST declarations
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
178 def _parse_doctype_attlist(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
179 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
180 name, j = self._scan_name(i, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
181 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
182 if c == "":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
183 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
184 if c == ">":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
185 return j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
186 while 1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
187 # scan a series of attribute descriptions; simplified:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
188 # name type [value] [#constraint]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
189 name, j = self._scan_name(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
190 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
191 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
192 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
193 if c == "":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
194 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
195 if c == "(":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
196 # an enumerated type; look for ')'
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
197 if ")" in rawdata[j:]:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
198 j = string.find(rawdata, ")", j) + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
199 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
200 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
201 while rawdata[j:j+1] in string.whitespace:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
202 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
203 if not rawdata[j:]:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
204 # end of buffer, incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
205 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
206 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
207 name, j = self._scan_name(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
208 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
209 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
210 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
211 if c in "'\"":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
212 m = _declstringlit_match(rawdata, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
213 if m:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
214 j = m.end()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
215 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
216 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
217 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
218 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
219 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
220 if c == "#":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
221 if rawdata[j:] == "#":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
222 # end of buffer
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
223 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
224 name, j = self._scan_name(j + 1, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
225 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
226 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
227 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
228 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
229 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
230 if c == '>':
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
231 # all done
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
232 return j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
233
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
234 # Internal -- scan past <!NOTATION declarations
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
235 def _parse_doctype_notation(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
236 name, j = self._scan_name(i, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
237 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
238 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
239 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
240 while 1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
241 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
242 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
243 # end of buffer; incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
244 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
245 if c == '>':
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
246 return j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
247 if c in "'\"":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
248 m = _declstringlit_match(rawdata, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
249 if not m:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
250 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
251 j = m.end()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
252 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
253 name, j = self._scan_name(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
254 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
255 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
256
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
257 # Internal -- scan past <!ENTITY declarations
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
258 def _parse_doctype_entity(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
259 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
260 if rawdata[i:i+1] == "%":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
261 j = i + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
262 while 1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
263 c = rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
264 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
265 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
266 if c in string.whitespace:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
267 j = j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
268 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
269 break
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
270 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
271 j = i
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
272 name, j = self._scan_name(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
273 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
274 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
275 while 1:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
276 c = self.rawdata[j:j+1]
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
277 if not c:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
278 return -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
279 if c in "'\"":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
280 m = _declstringlit_match(rawdata, j)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
281 if m:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
282 j = m.end()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
283 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
284 return -1 # incomplete
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
285 elif c == ">":
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
286 return j + 1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
287 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
288 name, j = self._scan_name(j, declstartpos)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
289 if j < 0:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
290 return j
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
291
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
292 # Internal -- scan a name token and the new position and the token, or
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
293 # return -1 if we've reached the end of the buffer.
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
294 def _scan_name(self, i, declstartpos):
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
295 rawdata = self.rawdata
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
296 n = len(rawdata)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
297 if i == n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
298 return None, -1
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
299 m = _declname_match(rawdata, i)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
300 if m:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
301 s = m.group()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
302 name = string.strip(s)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
303 if (i + len(s)) == n:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
304 return None, -1 # end of buffer
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
305 return string.lower(name), m.end()
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
306 else:
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
307 self.updatepos(declstartpos, i)
Richard Jones <richard@users.sourceforge.net>
parents:
diff changeset
308 self.error("expected name token", self.getpos())

Roundup Issue Tracker: http://roundup-tracker.org/