view roundup/anypy/email_.py @ 5548:fea11d05110e

Avoid errors from selecting "no selection" on multilink (issue2550722). As discussed in issue 2550722 there are various cases where selecting "no selection" on a multilink can result in inappropriate errors from Roundup: * If selecting "no selection" produces a null edit (a value was set in the multilink in an edit with an error, then removed again, along with all other changes, in the next form submission), so the page is rendered from the form contents including the "-<id>" value for "no selection" for the multilink. * If creating an item with a nonempty value for a multilink has an error, and the resubmission changes that multilink to "no selection" (and this in turn has subcases, according to whether the creation then succeeds or fails on the resubmission, which need fixes in different places in the Roundup code). All of these cases have in common that it is expected and OK to have a "-<id>" value for a submission for a multilink when <id> is not set in that multilink in the database (because the original attempt to set <id> in that multilink had an error), so the hyperdb.py logic to give an error in that case is thus removed. In the subcase of the second case where the resubmission with "no selection" has an error, the templating code tries to produce a menu entry for the "-<id>" multilink value, which also results in an error, hence the templating.py change to ignore such values in the list for a multilink.
author Joseph Myers <jsm@polyomino.org.uk>
date Thu, 27 Sep 2018 11:33:01 +0000
parents 29346d92d80c
children cacef71b3a54
line wrap: on
line source

import re
import binascii
import email
from email import quoprimime, base64mime

if str == bytes:
    message_from_bytes = email.message_from_string
    message_from_binary_file = email.message_from_file
else:
    message_from_bytes = email.message_from_bytes
    message_from_binary_file = email.message_from_binary_file

## please import this file if you are using the email module

# Match encoded-word strings in the form =?charset?q?Hello_World?=
ecre = re.compile(r'''
  =\?                   # literal =?
  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
  \?                    # literal ?
  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
  \?                    # literal ?
  (?P<encoded>.*?)      # non-greedy up to the next ?= is the encoded string
  \?=                   # literal ?=
  ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)


# Fixed header parser, see my proposed patch and discussions:
# http://bugs.python.org/issue1079 "decode_header does not follow RFC 2047"
# http://bugs.python.org/issue1467619 "Header.decode_header eats up spaces"
# This implements the decode_header specific parts of my proposed patch
# backported to python2.X
def decode_header(header):
    """Decode a message header value without converting charset.

    Returns a list of (string, charset) pairs containing each of the decoded
    parts of the header.  Charset is None for non-encoded parts of the header,
    otherwise a lower-case string containing the name of the character set
    specified in the encoded string.

    header may be a string that may or may not contain RFC2047 encoded words,
    or it may be a Header object.

    An email.errors.HeaderParseError may be raised when certain decoding error
    occurs (e.g. a base64 decoding exception).
    """
    # If it is a Header object, we can just return the encoded chunks.
    if hasattr(header, '_chunks'):
        return [(_charset._encode(string, str(charset)), str(charset))
                    for string, charset in header._chunks]
    # If no encoding, just return the header with no charset.
    if not ecre.search(header):
        return [(header, None)]
    # First step is to parse all the encoded parts into triplets of the form
    # (encoded_string, encoding, charset).  For unencoded strings, the last
    # two parts will be None.
    words = []
    for line in header.splitlines():
        parts = ecre.split(line)
        first = True
        while parts:
            unencoded = parts.pop(0)
            if first:
                unencoded = unencoded.lstrip()
                first = False
            if unencoded:
                words.append((unencoded, None, None))
            if parts:
                charset = parts.pop(0).lower()
                encoding = parts.pop(0).lower()
                encoded = parts.pop(0)
                words.append((encoded, encoding, charset))
    # Now loop over words and remove words that consist of whitespace
    # between two encoded strings.
    import sys
    droplist = []
    for n, w in enumerate(words):
        if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
            droplist.append(n-1)
    for d in reversed(droplist):
        del words[d]

    # The next step is to decode each encoded word by applying the reverse
    # base64 or quopri transformation.  decoded_words is now a list of the
    # form (decoded_word, charset).
    decoded_words = []
    for encoded_string, encoding, charset in words:
        if encoding is None:
            # This is an unencoded word.
            decoded_words.append((encoded_string, charset))
        elif encoding == 'q':
            word = quoprimime.header_decode(encoded_string)
            decoded_words.append((word, charset))
        elif encoding == 'b':
            paderr = len(encoded_string) % 4   # Postel's law: add missing padding
            if paderr:
                encoded_string += '==='[:4 - paderr]
            try:
                word = base64mime.decode(encoded_string)
            except binascii.Error:
                raise email.errors.HeaderParseError('Base64 decoding error')
            else:
                decoded_words.append((word, charset))
        else:
            raise AssertionError('Unexpected encoding: ' + encoding)
    # Now convert all words to bytes and collapse consecutive runs of
    # similarly encoded words.
    collapsed = []
    last_word = last_charset = None
    for word, charset in decoded_words:
        if isinstance(word, str):
            pass
        if last_word is None:
            last_word = word
            last_charset = charset
        elif charset != last_charset:
            collapsed.append((last_word, last_charset))
            last_word = word
            last_charset = charset
        elif last_charset is None:
            BSPACE = b' '
            last_word += BSPACE + word
        else:
            last_word += word
    collapsed.append((last_word, last_charset))
    return collapsed


Roundup Issue Tracker: http://roundup-tracker.org/