Mercurial Repository: p/roundup/code: roundup/cgi/TAL/HTMLParser.py comparison

comparison roundup/cgi/TAL/HTMLParser.py @ 2348:8c2402a78bb0

beginning getting ZPT up to date: TAL first

author	Richard Jones <richard@users.sourceforge.net>
date	Fri, 21 May 2004 05:36:30 +0000
parents	fc52d57c6c3e
children	63868084b8bb

comparison

equal deleted inserted replaced

-:fbbda3b1816d
+:8c2402a78bb0
 """A parser for HTML and XHTML."""
-__docformat__ = 'restructuredtext'
 # This file is based on sgmllib.py, but the API is slightly different.
 # XXX There should be a way to distinguish between PCDATA (parsed
 # character data -- the normal case), RCDATA (replaceable character
 # and CDATA (character data -- only end tags are special).
 import markupbase
 import re
-import string
 # Regular expressions used for parsing
 interesting_normal = re.compile('[&<]')
 interesting_cdata = re.compile(r'<(/|\Z)')
 # Now parse the data between i+1 and j into a tag and attrs
 attrs = []
 match = tagfind.match(rawdata, i+1)
 assert match, 'unexpected call to parse_starttag()'
 k = match.end()
-self.lasttag = tag = string.lower(rawdata[i+1:k])
+self.lasttag = tag = rawdata[i+1:k].lower()
 while k < endpos:
 m = attrfind.match(rawdata, k)
 if not m:
 break
 attrvalue = None
 elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
 attrvalue[:1] == '"' == attrvalue[-1:]:
 attrvalue = attrvalue[1:-1]
 attrvalue = self.unescape(attrvalue)
-attrs.append((string.lower(attrname), attrvalue))
+attrs.append((attrname.lower(), attrvalue))
 k = m.end()
-end = string.strip(rawdata[k:endpos])
+end = rawdata[k:endpos].strip()
 if end not in (">", "/>"):
 lineno, offset = self.getpos()
 if "\n" in self.__starttag_text:
-lineno = lineno + string.count(self.__starttag_text, "\n")
+lineno = lineno + self.__starttag_text.count("\n")
 offset = len(self.__starttag_text) \
-- string.rfind(self.__starttag_text, "\n")
+- self.__starttag_text.rfind("\n")
 else:
 offset = offset + len(self.__starttag_text)
 self.error("junk characters in start tag: %s"
 % `rawdata[k:endpos][:20]`)
 if end[-2:] == '/>':
 return -1
 j = match.end()
 match = endtagfind.match(rawdata, i) # </ + tag + >
 if not match:
 self.error("bad end tag: %s" % `rawdata[i:j]`)
-tag = string.lower(match.group(1))
+tag = match.group(1).lower()
 if (  self.cdata_endtag is not None
 and tag != self.cdata_endtag):
 # Should be a mismatched end tag, but we'll treat it
 # as text anyway, since most HTML authors aren't
 # interested in the finer points of syntax.
 # Internal -- helper to remove special character quoting
 def unescape(self, s):
 if '&' not in s:
 return s
-s = string.replace(s, "&lt;", "<")
+s = s.replace("&lt;", "<")
-s = string.replace(s, "&gt;", ">")
+s = s.replace("&gt;", ">")
-s = string.replace(s, "&apos;", "'")
+s = s.replace("&apos;", "'")
-s = string.replace(s, "&quot;", '"')
+s = s.replace("&quot;", '"')
-s = string.replace(s, "&amp;", "&") # Must be last
+s = s.replace("&amp;", "&") # Must be last
 return s

Mercurial > p > roundup > code

comparison roundup/cgi/TAL/HTMLParser.py @ 2348:8c2402a78bb0