Mercurial > p > roundup > code
view roundup/cgi/TAL/HTMLTALParser.py @ 8411:ef1ea918b07a reauth-confirm_id
feat(security): Add user confirmation/reauth for sensitive changes
Auditors can raise Reauth(reason) exception to require the user to
enter a token (e.g. account password) to verify the user is performing
the change.
Naming is subject to change.
actions.py: New ReauthAction class handler and verifyPassword() method
for overriding if needed.
client.py: Handle Reauth exception by calling Client:reauth() method.
Default client:reauth method. Add 'reauth' action declaration.
exceptions.py: Define and document Reauth exception as a subclass of
RoundupCGIException.
templating.py: Define method utils.embed_form_fields().
The original form making a change to the database has a lot of form
fields. These need to be resubmitted to Roundup as part of the form
submission that verifies the user's password.
This method turns all non file form fields into type=hidden inputs.
It escapes the names and values to prevent XSS.
For file form fields, it base64 encodes the contents and puts them
in hidden pre blocks. The pre blocks have data attributes for the
filename, filetype and the original field name. (Note the original
field name is not used.)
This stops the file content data (maybe binary e.g. jpegs) from
breaking the html page. The reauth template runs JavaScript that
turns the encoded data inside the pre tags back into a file. Then
it adds a multiple file input control to the page and attaches all
the files to it. This file input is submitted with the rest of the
fields.
_generic.reauth.html (multiple tracker templates): Generates a form
with id=reauth_form to:
display any message from the Reauth exception to the user (e.g. why
user is asked to auth).
get the user's password
submit the form
embed all the form data that triggered the reauth
recreate any file data that was submitted as part of the form and
generate a new file input to push the data to the back end
It has the JavaScript routine (as an IIFE) that regenerates a file
input without user intervention.
All the TAL based tracker templates use the same form. There is also
one for the jinja2 template. The JavaScript for both is the same.
reference.txt: document embed_form_fields utility method.
upgrading.txt: initial upgrading docs.
TODO:
Finalize naming. I am leaning toward ConfirmID rather than Reauth.
Still looking for a standard name for this workflow.
Externalize the javascript in _generic.reauth.html to a seperate file
and use utils.readfile() to embed it or change the script to load it
from a @@file url.
Clean up upgrading.txt with just steps to implement and less feature
detail/internals.
Document internals/troubleshooting in reference.txt.
Add tests using live server.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Mon, 11 Aug 2025 14:01:12 -0400 |
| parents | 23b8e6067f7c |
| children |
line wrap: on
line source
############################################################################## # # Copyright (c) 2001, 2002 Zope Corporation and Contributors. # All Rights Reserved. # # This software is subject to the provisions of the Zope Public License, # Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # FOR A PARTICULAR PURPOSE. # ############################################################################## """ Parse HTML and compile to TALInterpreter intermediate code. """ import sys from .TALGenerator import TALGenerator from .HTMLParser import HTMLParser, HTMLParseError from .TALDefs import \ ZOPE_METAL_NS, ZOPE_TAL_NS, ZOPE_I18N_NS, METALError, TALError, I18NError BOOLEAN_HTML_ATTRS = [ # List of Boolean attributes in HTML that may be given in # minimized form (e.g. <img ismap> rather than <img ismap="">) # From http://www.w3.org/TR/xhtml1/#guidelines (C.10) "compact", "nowrap", "ismap", "declare", "noshade", "checked", "disabled", "readonly", "multiple", "selected", "noresize", "defer" ] EMPTY_HTML_TAGS = [ # List of HTML tags with an empty content model; these are # rendered in minimized form, e.g. <img />. # From http://www.w3.org/TR/xhtml1/#dtds "base", "meta", "link", "hr", "br", "param", "img", "area", "input", "col", "basefont", "isindex", "frame", ] PARA_LEVEL_HTML_TAGS = [ # List of HTML elements that close open paragraph-level elements # and are themselves paragraph-level. "h1", "h2", "h3", "h4", "h5", "h6", "p", ] BLOCK_CLOSING_TAG_MAP = { "tr": ("tr", "td", "th"), "td": ("td", "th"), "th": ("td", "th"), "li": ("li",), "dd": ("dd", "dt"), "dt": ("dd", "dt"), } BLOCK_LEVEL_HTML_TAGS = [ # List of HTML tags that denote larger sections than paragraphs. "blockquote", "table", "tr", "th", "td", "thead", "tfoot", "tbody", "noframe", "ul", "ol", "li", "dl", "dt", "dd", "div", ] TIGHTEN_IMPLICIT_CLOSE_TAGS = (PARA_LEVEL_HTML_TAGS + list(BLOCK_CLOSING_TAG_MAP.keys())) class NestingError(HTMLParseError): """Exception raised when elements aren't properly nested.""" def __init__(self, tagstack, endtag, position=(None, None)): self.endtag = endtag if tagstack: if len(tagstack) == 1: msg = ('Open tag <%s> does not match close tag </%s>' % (tagstack[0], endtag)) else: msg = ('Open tags <%s> do not match close tag </%s>' % ('>, <'.join(tagstack), endtag)) else: msg = 'No tags are open to match </%s>' % endtag HTMLParseError.__init__(self, msg, position) class EmptyTagError(NestingError): """Exception raised when empty elements have an end tag.""" def __init__(self, tag, position=(None, None)): self.tag = tag msg = 'Close tag </%s> should be removed' % tag HTMLParseError.__init__(self, msg, position) class OpenTagError(NestingError): """Exception raised when a tag is not allowed in another tag.""" def __init__(self, tagstack, tag, position=(None, None)): self.tag = tag msg = 'Tag <%s> is not allowed in <%s>' % (tag, tagstack[-1]) HTMLParseError.__init__(self, msg, position) class HTMLTALParser(HTMLParser): # External API def __init__(self, gen=None): HTMLParser.__init__(self) if gen is None: gen = TALGenerator(xml=0) self.gen = gen self.tagstack = [] self.nsstack = [] self.nsdict = {'tal': ZOPE_TAL_NS, 'metal': ZOPE_METAL_NS, 'i18n': ZOPE_I18N_NS, } def parseFile(self, file): f = open(file) data = f.read() f.close() try: self.parseString(data) except TALError as e: e.setFile(file) raise def parseString(self, data): self.feed(data) self.close() while self.tagstack: self.implied_endtag(self.tagstack[-1], 2) assert self.nsstack == [], self.nsstack def getCode(self): return self.gen.getCode() def getWarnings(self): return () # Overriding HTMLParser methods def handle_starttag(self, tag, attrs): self.close_para_tags(tag) self.scan_xmlns(attrs) tag, attrlist, taldict, metaldict, i18ndict \ = self.process_ns(tag, attrs) if tag in EMPTY_HTML_TAGS and taldict.get("content"): raise TALError( "empty HTML tags cannot use tal:content: %s" % repr(tag), self.getpos()) self.tagstack.append(tag) self.gen.emitStartElement(tag, attrlist, taldict, metaldict, i18ndict, self.getpos()) if tag in EMPTY_HTML_TAGS: self.implied_endtag(tag, -1) def handle_startendtag(self, tag, attrs): self.close_para_tags(tag) self.scan_xmlns(attrs) tag, attrlist, taldict, metaldict, i18ndict \ = self.process_ns(tag, attrs) if taldict.get("content"): if tag in EMPTY_HTML_TAGS: raise TALError( "empty HTML tags cannot use tal:content: %s" % repr(tag), self.getpos()) self.gen.emitStartElement(tag, attrlist, taldict, metaldict, i18ndict, self.getpos()) self.gen.emitEndElement(tag, implied=-1) else: self.gen.emitStartElement(tag, attrlist, taldict, metaldict, i18ndict, self.getpos(), isend=1) self.pop_xmlns() def handle_endtag(self, tag): if tag in EMPTY_HTML_TAGS: # </img> etc. in the source is an error raise EmptyTagError(tag, self.getpos()) self.close_enclosed_tags(tag) self.gen.emitEndElement(tag) self.pop_xmlns() self.tagstack.pop() def close_para_tags(self, tag): if tag in EMPTY_HTML_TAGS: return close_to = -1 if tag in BLOCK_CLOSING_TAG_MAP: blocks_to_close = BLOCK_CLOSING_TAG_MAP[tag] for i in range(len(self.tagstack)): t = self.tagstack[i] if t in blocks_to_close: if close_to == -1: close_to = i elif t in BLOCK_LEVEL_HTML_TAGS: close_to = -1 elif tag in PARA_LEVEL_HTML_TAGS + BLOCK_LEVEL_HTML_TAGS: i = len(self.tagstack) - 1 while i >= 0: closetag = self.tagstack[i] if closetag in BLOCK_LEVEL_HTML_TAGS: break if closetag in PARA_LEVEL_HTML_TAGS: if closetag != "p": raise OpenTagError(self.tagstack, tag, self.getpos()) close_to = i i = i - 1 if close_to >= 0: while len(self.tagstack) > close_to: self.implied_endtag(self.tagstack[-1], 1) def close_enclosed_tags(self, tag): if tag not in self.tagstack: raise NestingError(self.tagstack, tag, self.getpos()) while tag != self.tagstack[-1]: self.implied_endtag(self.tagstack[-1], 1) assert self.tagstack[-1] == tag def implied_endtag(self, tag, implied): assert tag == self.tagstack[-1] assert implied in (-1, 1, 2) isend = (implied < 0) if tag in TIGHTEN_IMPLICIT_CLOSE_TAGS: # Pick out trailing whitespace from the program, and # insert the close tag before the whitespace. white = self.gen.unEmitWhitespace() else: white = None self.gen.emitEndElement(tag, isend=isend, implied=implied) if white: self.gen.emitRawText(white) self.tagstack.pop() self.pop_xmlns() def handle_charref(self, name): self.gen.emitRawText("&#%s;" % name) def handle_entityref(self, name): self.gen.emitRawText("&%s;" % name) def handle_data(self, data): self.gen.emitRawText(data) def handle_comment(self, data): self.gen.emitRawText("<!--%s-->" % data) def handle_decl(self, data): self.gen.emitRawText("<!%s>" % data) def handle_pi(self, data): self.gen.emitRawText("<?%s>" % data) # Internal thingies def scan_xmlns(self, attrs): nsnew = {} for key, value in attrs: if key.startswith("xmlns:"): nsnew[key[6:]] = value if nsnew: self.nsstack.append(self.nsdict) self.nsdict = self.nsdict.copy() self.nsdict.update(nsnew) else: self.nsstack.append(self.nsdict) def pop_xmlns(self): self.nsdict = self.nsstack.pop() def fixname(self, name): if ':' in name: prefix, suffix = name.split(':', 1) if prefix == 'xmlns': nsuri = self.nsdict.get(suffix) if nsuri in (ZOPE_TAL_NS, ZOPE_METAL_NS, ZOPE_I18N_NS): return name, name, prefix else: nsuri = self.nsdict.get(prefix) if nsuri == ZOPE_TAL_NS: return name, suffix, 'tal' elif nsuri == ZOPE_METAL_NS: return name, suffix, 'metal' elif nsuri == ZOPE_I18N_NS: return name, suffix, 'i18n' return name, name, 0 def process_ns(self, name, attrs): attrlist = [] taldict = {} metaldict = {} i18ndict = {} name, namebase, namens = self.fixname(name) for item in attrs: key, value = item key, keybase, keyns = self.fixname(key) ns = keyns or namens # default to tag namespace if ns and ns != 'unknown': item = (key, value, ns) if ns == 'tal': if keybase in taldict: raise TALError("duplicate TAL attribute " + repr(keybase), self.getpos()) taldict[keybase] = value elif ns == 'metal': if keybase in metaldict: raise METALError("duplicate METAL attribute " + repr(keybase), self.getpos()) metaldict[keybase] = value elif ns == 'i18n': if keybase in i18ndict: raise I18NError("duplicate i18n attribute " + repr(keybase), self.getpos()) i18ndict[keybase] = value attrlist.append(item) if namens in ('metal', 'tal'): taldict['tal tag'] = namens return name, attrlist, taldict, metaldict, i18ndict
