Mercurial > p > roundup > code
view roundup/cgi/TAL/markupbase.py @ 7658:d30e534b078a
clarify doc on dispatcher_email config setting.
An issue was brought up on the mailing list.
https://sourceforge.net/p/roundup/mailman/message/43383465/
The description of dispatcher_email sounds like it should be sent
email on issue creation. That's not it's role. Try to make it's role
more obvious.
Fix config.ini and reference.txt description.
Add the newissuecopy.py detector to send email on the creation of an
issue
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Tue, 10 Oct 2023 20:33:22 -0400 |
| parents | 12fe83f90f0d |
| children |
line wrap: on
line source
"""Shared support for scanning document type declarations in HTML and XHTML.""" import re, string _declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match _declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match del re class ParserBase: """Parser base class which provides some common support methods used by the SGML/HTML and XHTML parsers.""" def reset(self): self.lineno = 1 self.offset = 0 def getpos(self): """Return current line number and offset.""" return self.lineno, self.offset def error(self, message): """Return an error, showing current line number and offset. Concrete subclasses *must* override this method. """ raise NotImplementedError # Internal -- update line number and offset. This should be # called for each piece of data exactly once, in order -- in other # words the concatenation of all the input strings to this # function should be exactly the entire input. def updatepos(self, i, j): if i >= j: return j rawdata = self.rawdata nlines = rawdata.count("\n", i, j) if nlines: self.lineno = self.lineno + nlines pos = rawdata.rindex("\n", i, j) # Should not fail self.offset = j-(pos+1) else: self.offset = self.offset + j-i return j _decl_otherchars = '' # Internal -- parse declaration (for use by subclasses). def parse_declaration(self, i): # This is some sort of declaration; in "HTML as # deployed," this should only be the document type # declaration ("<!DOCTYPE html...>"). rawdata = self.rawdata import sys j = i + 2 assert rawdata[i:j] == "<!", "unexpected call to parse_declaration" if rawdata[j:j+1] in ("-", ""): # Start of comment followed by buffer boundary, # or just a buffer boundary. return -1 # in practice, this should look like: ((name|stringlit) S*)+ '>' n = len(rawdata) decltype, j = self._scan_name(j, i) if j < 0: return j if decltype == "doctype": self._decl_otherchars = '' while j < n: c = rawdata[j] if c == ">": # end of declaration syntax data = rawdata[i+2:j] if decltype == "doctype": self.handle_decl(data) else: self.unknown_decl(data) return j + 1 if c in "\"'": m = _declstringlit_match(rawdata, j) if not m: return -1 # incomplete j = m.end() elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": name, j = self._scan_name(j, i) elif c in self._decl_otherchars: j = j + 1 elif c == "[": if decltype == "doctype": j = self._parse_doctype_subset(j + 1, i) else: self.error("unexpected '[' char in declaration") else: self.error( "unexpected %s char in declaration" % repr(rawdata[j])) if j < 0: return j return -1 # incomplete # Internal -- scan past the internal subset in a <!DOCTYPE declaration, # returning the index just past any whitespace following the trailing ']'. def _parse_doctype_subset(self, i, declstartpos): rawdata = self.rawdata n = len(rawdata) j = i while j < n: c = rawdata[j] if c == "<": s = rawdata[j:j+2] if s == "<": # end of buffer; incomplete return -1 if s != "<!": self.updatepos(declstartpos, j + 1) self.error("unexpected char in internal subset (in %s)" % repr(s)) if (j + 2) == n: # end of buffer; incomplete return -1 if (j + 4) > n: # end of buffer; incomplete return -1 if rawdata[j:j+4] == "<!--": j = self.parse_comment(j, report=0) if j < 0: return j continue name, j = self._scan_name(j + 2, declstartpos) if j == -1: return -1 if name not in ("attlist", "element", "entity", "notation"): self.updatepos(declstartpos, j + 2) self.error( "unknown declaration %s in internal subset" % repr(name)) # handle the individual names meth = getattr(self, "_parse_doctype_" + name) j = meth(j, declstartpos) if j < 0: return j elif c == "%": # parameter entity reference if (j + 1) == n: # end of buffer; incomplete return -1 s, j = self._scan_name(j + 1, declstartpos) if j < 0: return j if rawdata[j] == ";": j = j + 1 elif c == "]": j = j + 1 while j < n and rawdata[j] in string.whitespace: j = j + 1 if j < n: if rawdata[j] == ">": return j self.updatepos(declstartpos, j) self.error("unexpected char after internal subset") else: return -1 elif c in string.whitespace: j = j + 1 else: self.updatepos(declstartpos, j) self.error("unexpected char %s in internal subset" % repr(c)) # end of buffer reached return -1 # Internal -- scan past <!ELEMENT declarations def _parse_doctype_element(self, i, declstartpos): rawdata = self.rawdata n = len(rawdata) name, j = self._scan_name(i, declstartpos) if j == -1: return -1 # style content model; just skip until '>' if '>' in rawdata[j:]: return rawdata.find(">", j) + 1 return -1 # Internal -- scan past <!ATTLIST declarations def _parse_doctype_attlist(self, i, declstartpos): rawdata = self.rawdata name, j = self._scan_name(i, declstartpos) c = rawdata[j:j+1] if c == "": return -1 if c == ">": return j + 1 while 1: # scan a series of attribute descriptions; simplified: # name type [value] [#constraint] name, j = self._scan_name(j, declstartpos) if j < 0: return j c = rawdata[j:j+1] if c == "": return -1 if c == "(": # an enumerated type; look for ')' if ")" in rawdata[j:]: j = rawdata.find(")", j) + 1 else: return -1 while rawdata[j:j+1].isspace(): j = j + 1 if not rawdata[j:]: # end of buffer, incomplete return -1 else: name, j = self._scan_name(j, declstartpos) c = rawdata[j:j+1] if not c: return -1 if c in "'\"": m = _declstringlit_match(rawdata, j) if m: j = m.end() else: return -1 c = rawdata[j:j+1] if not c: return -1 if c == "#": if rawdata[j:] == "#": # end of buffer return -1 name, j = self._scan_name(j + 1, declstartpos) if j < 0: return j c = rawdata[j:j+1] if not c: return -1 if c == '>': # all done return j + 1 # Internal -- scan past <!NOTATION declarations def _parse_doctype_notation(self, i, declstartpos): name, j = self._scan_name(i, declstartpos) if j < 0: return j rawdata = self.rawdata while 1: c = rawdata[j:j+1] if not c: # end of buffer; incomplete return -1 if c == '>': return j + 1 if c in "'\"": m = _declstringlit_match(rawdata, j) if not m: return -1 j = m.end() else: name, j = self._scan_name(j, declstartpos) if j < 0: return j # Internal -- scan past <!ENTITY declarations def _parse_doctype_entity(self, i, declstartpos): rawdata = self.rawdata if rawdata[i:i+1] == "%": j = i + 1 while 1: c = rawdata[j:j+1] if not c: return -1 if c in string.whitespace: j = j + 1 else: break else: j = i name, j = self._scan_name(j, declstartpos) if j < 0: return j while 1: c = self.rawdata[j:j+1] if not c: return -1 if c in "'\"": m = _declstringlit_match(rawdata, j) if m: j = m.end() else: return -1 # incomplete elif c == ">": return j + 1 else: name, j = self._scan_name(j, declstartpos) if j < 0: return j # Internal -- scan a name token and the new position and the token, or # return -1 if we've reached the end of the buffer. def _scan_name(self, i, declstartpos): rawdata = self.rawdata n = len(rawdata) if i == n: return None, -1 m = _declname_match(rawdata, i) if m: s = m.group() name = s.strip() if (i + len(s)) == n: return None, -1 # end of buffer return name.lower(), m.end() else: self.updatepos(declstartpos, i) self.error("expected name token")
