Mercurial > p > roundup > code
view roundup/cgi/TAL/markupbase.py @ 5414:3fa026621f69
Python 3 preparation: comparisons.
Python 3 no longer has the cmp function, or cmp= arguments to sorting
functions / methods (key= must be used instead), and requires rich
comparison methods such as __lt__ to be defined instead of using
__cmp__. All of the comparison mechanisms supported in Python 3 are
also supported in Python 2.
This patch makes the corresponding changes in Roundup to use key
functions and rich comparison methods. In the case of the
JournalPassword and Permission classes, only __eq__ and __ne__ are
defined as I don't see ordered comparisons as useful there (and for
Permission, the old __cmp__ function didn't try to provide a valid
ordering). In the case of the Date class, I kept the __cmp__ method
and implemented the others in terms of it, to avoid excess
repetitiveness in duplicating implementation code for all six rich
comparison methods.
In roundup/admin.py, help_commands_html used operator.attrgetter to
produce the second argument of sorted() - which would be reasonable
for a key function, but the second argument is the cmp function in
Python 2, not a key function (and the key function must be a named
argument not a positional argument in Python 3). That function
appears to be completely unused, so I expect that code never worked.
This patch adds the missing key= to that sorted() call, but it would
also be reasonable to remove the unused function completely instead.
| author | Joseph Myers <jsm@polyomino.org.uk> |
|---|---|
| date | Wed, 25 Jul 2018 00:39:37 +0000 |
| parents | 12fe83f90f0d |
| children |
line wrap: on
line source
"""Shared support for scanning document type declarations in HTML and XHTML.""" import re, string _declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match _declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match del re class ParserBase: """Parser base class which provides some common support methods used by the SGML/HTML and XHTML parsers.""" def reset(self): self.lineno = 1 self.offset = 0 def getpos(self): """Return current line number and offset.""" return self.lineno, self.offset def error(self, message): """Return an error, showing current line number and offset. Concrete subclasses *must* override this method. """ raise NotImplementedError # Internal -- update line number and offset. This should be # called for each piece of data exactly once, in order -- in other # words the concatenation of all the input strings to this # function should be exactly the entire input. def updatepos(self, i, j): if i >= j: return j rawdata = self.rawdata nlines = rawdata.count("\n", i, j) if nlines: self.lineno = self.lineno + nlines pos = rawdata.rindex("\n", i, j) # Should not fail self.offset = j-(pos+1) else: self.offset = self.offset + j-i return j _decl_otherchars = '' # Internal -- parse declaration (for use by subclasses). def parse_declaration(self, i): # This is some sort of declaration; in "HTML as # deployed," this should only be the document type # declaration ("<!DOCTYPE html...>"). rawdata = self.rawdata import sys j = i + 2 assert rawdata[i:j] == "<!", "unexpected call to parse_declaration" if rawdata[j:j+1] in ("-", ""): # Start of comment followed by buffer boundary, # or just a buffer boundary. return -1 # in practice, this should look like: ((name|stringlit) S*)+ '>' n = len(rawdata) decltype, j = self._scan_name(j, i) if j < 0: return j if decltype == "doctype": self._decl_otherchars = '' while j < n: c = rawdata[j] if c == ">": # end of declaration syntax data = rawdata[i+2:j] if decltype == "doctype": self.handle_decl(data) else: self.unknown_decl(data) return j + 1 if c in "\"'": m = _declstringlit_match(rawdata, j) if not m: return -1 # incomplete j = m.end() elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": name, j = self._scan_name(j, i) elif c in self._decl_otherchars: j = j + 1 elif c == "[": if decltype == "doctype": j = self._parse_doctype_subset(j + 1, i) else: self.error("unexpected '[' char in declaration") else: self.error( "unexpected %s char in declaration" % repr(rawdata[j])) if j < 0: return j return -1 # incomplete # Internal -- scan past the internal subset in a <!DOCTYPE declaration, # returning the index just past any whitespace following the trailing ']'. def _parse_doctype_subset(self, i, declstartpos): rawdata = self.rawdata n = len(rawdata) j = i while j < n: c = rawdata[j] if c == "<": s = rawdata[j:j+2] if s == "<": # end of buffer; incomplete return -1 if s != "<!": self.updatepos(declstartpos, j + 1) self.error("unexpected char in internal subset (in %s)" % repr(s)) if (j + 2) == n: # end of buffer; incomplete return -1 if (j + 4) > n: # end of buffer; incomplete return -1 if rawdata[j:j+4] == "<!--": j = self.parse_comment(j, report=0) if j < 0: return j continue name, j = self._scan_name(j + 2, declstartpos) if j == -1: return -1 if name not in ("attlist", "element", "entity", "notation"): self.updatepos(declstartpos, j + 2) self.error( "unknown declaration %s in internal subset" % repr(name)) # handle the individual names meth = getattr(self, "_parse_doctype_" + name) j = meth(j, declstartpos) if j < 0: return j elif c == "%": # parameter entity reference if (j + 1) == n: # end of buffer; incomplete return -1 s, j = self._scan_name(j + 1, declstartpos) if j < 0: return j if rawdata[j] == ";": j = j + 1 elif c == "]": j = j + 1 while j < n and rawdata[j] in string.whitespace: j = j + 1 if j < n: if rawdata[j] == ">": return j self.updatepos(declstartpos, j) self.error("unexpected char after internal subset") else: return -1 elif c in string.whitespace: j = j + 1 else: self.updatepos(declstartpos, j) self.error("unexpected char %s in internal subset" % repr(c)) # end of buffer reached return -1 # Internal -- scan past <!ELEMENT declarations def _parse_doctype_element(self, i, declstartpos): rawdata = self.rawdata n = len(rawdata) name, j = self._scan_name(i, declstartpos) if j == -1: return -1 # style content model; just skip until '>' if '>' in rawdata[j:]: return rawdata.find(">", j) + 1 return -1 # Internal -- scan past <!ATTLIST declarations def _parse_doctype_attlist(self, i, declstartpos): rawdata = self.rawdata name, j = self._scan_name(i, declstartpos) c = rawdata[j:j+1] if c == "": return -1 if c == ">": return j + 1 while 1: # scan a series of attribute descriptions; simplified: # name type [value] [#constraint] name, j = self._scan_name(j, declstartpos) if j < 0: return j c = rawdata[j:j+1] if c == "": return -1 if c == "(": # an enumerated type; look for ')' if ")" in rawdata[j:]: j = rawdata.find(")", j) + 1 else: return -1 while rawdata[j:j+1].isspace(): j = j + 1 if not rawdata[j:]: # end of buffer, incomplete return -1 else: name, j = self._scan_name(j, declstartpos) c = rawdata[j:j+1] if not c: return -1 if c in "'\"": m = _declstringlit_match(rawdata, j) if m: j = m.end() else: return -1 c = rawdata[j:j+1] if not c: return -1 if c == "#": if rawdata[j:] == "#": # end of buffer return -1 name, j = self._scan_name(j + 1, declstartpos) if j < 0: return j c = rawdata[j:j+1] if not c: return -1 if c == '>': # all done return j + 1 # Internal -- scan past <!NOTATION declarations def _parse_doctype_notation(self, i, declstartpos): name, j = self._scan_name(i, declstartpos) if j < 0: return j rawdata = self.rawdata while 1: c = rawdata[j:j+1] if not c: # end of buffer; incomplete return -1 if c == '>': return j + 1 if c in "'\"": m = _declstringlit_match(rawdata, j) if not m: return -1 j = m.end() else: name, j = self._scan_name(j, declstartpos) if j < 0: return j # Internal -- scan past <!ENTITY declarations def _parse_doctype_entity(self, i, declstartpos): rawdata = self.rawdata if rawdata[i:i+1] == "%": j = i + 1 while 1: c = rawdata[j:j+1] if not c: return -1 if c in string.whitespace: j = j + 1 else: break else: j = i name, j = self._scan_name(j, declstartpos) if j < 0: return j while 1: c = self.rawdata[j:j+1] if not c: return -1 if c in "'\"": m = _declstringlit_match(rawdata, j) if m: j = m.end() else: return -1 # incomplete elif c == ">": return j + 1 else: name, j = self._scan_name(j, declstartpos) if j < 0: return j # Internal -- scan a name token and the new position and the token, or # return -1 if we've reached the end of the buffer. def _scan_name(self, i, declstartpos): rawdata = self.rawdata n = len(rawdata) if i == n: return None, -1 m = _declname_match(rawdata, i) if m: s = m.group() name = s.strip() if (i + len(s)) == n: return None, -1 # end of buffer return name.lower(), m.end() else: self.updatepos(declstartpos, i) self.error("expected name token")
