Mercurial > p > roundup > code
view roundup/anypy/email_.py @ 7800:2d4684e4702d
fix: enhancement to history command output and % template fix.
Rather than using the key field, use the label field for descriptions.
Call cls.labelprop(default_to_id=True) so it returns id rather than
the first sorted property name.
If labelprop() returns 'id' or 'title', we return nothing. 'id' means
there is no label set and no properties named 'name' or 'title'. So
have the caller do whatever it wants (prepend classname for example)
when there is no human readable name. This prevents %(name)s%(key)s
from producing: 23(23).
Also don't accept the 'title' property. Titles can be too
long. Arguably we could: '%(name)20s' to limit the title
length. However without ellipses or something truncating the title
might be confusing. So again pretend there is no human readable name.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Tue, 12 Mar 2024 11:52:17 -0400 |
| parents | 609c5fd638e8 |
| children |
line wrap: on
line source
import binascii import email import re from email import base64mime, quoprimime from email import charset as _charset if str is bytes: message_from_bytes = email.message_from_string message_from_binary_file = email.message_from_file else: message_from_bytes = email.message_from_bytes message_from_binary_file = email.message_from_binary_file ## please import this file if you are using the email module # Match encoded-word strings in the form =?charset?q?Hello_World?= ecre = re.compile(r''' =\? # literal =? (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset \? # literal ? (?P<encoding>[qb]) # either a "q" or a "b", case insensitive \? # literal ? (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?= ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) # Fixed header parser, see my proposed patch and discussions: # http://bugs.python.org/issue1079 "decode_header does not follow RFC 2047" # http://bugs.python.org/issue1467619 "Header.decode_header eats up spaces" # This implements the decode_header specific parts of my proposed patch # backported to python2.X def decode_header(header): """Decode a message header value without converting charset. Returns a list of (string, charset) pairs containing each of the decoded parts of the header. Charset is None for non-encoded parts of the header, otherwise a lower-case string containing the name of the character set specified in the encoded string. header may be a string that may or may not contain RFC2047 encoded words, or it may be a Header object. An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). """ # If it is a Header object, we can just return the encoded chunks. if hasattr(header, '_chunks'): return [(_charset._encode(string, str(charset)), str(charset)) for string, charset in header._chunks] # If no encoding, just return the header with no charset. if not ecre.search(header): return [(header, None)] # First step is to parse all the encoded parts into triplets of the form # (encoded_string, encoding, charset). For unencoded strings, the last # two parts will be None. words = [] for line in header.splitlines(): parts = ecre.split(line) first = True while parts: unencoded = parts.pop(0) if first: unencoded = unencoded.lstrip() first = False if unencoded: words.append((unencoded, None, None)) if parts: charset = parts.pop(0).lower() encoding = parts.pop(0).lower() encoded = parts.pop(0) words.append((encoded, encoding, charset)) # Now loop over words and remove words that consist of whitespace # between two encoded strings. droplist = [] for n, w in enumerate(words): if n > 1 and w[1] and words[n - 2][1] and words[n - 1][0].isspace(): droplist.append(n - 1) for d in reversed(droplist): del words[d] # The next step is to decode each encoded word by applying the reverse # base64 or quopri transformation. decoded_words is now a list of the # form (decoded_word, charset). decoded_words = [] for encoded_string, encoding, charset in words: if encoding is None: # This is an unencoded word. decoded_words.append((encoded_string, charset)) elif encoding == 'q': word = quoprimime.header_decode(encoded_string) decoded_words.append((word, charset)) elif encoding == 'b': # Postel's law: add missing padding paderr = len(encoded_string) % 4 if paderr: encoded_string += '==='[:4 - paderr] # noqa: PLW2901 try: word = base64mime.decode(encoded_string) except binascii.Error: raise email.errors.HeaderParseError('Base64 decoding error') else: decoded_words.append((word, charset)) else: raise AssertionError('Unexpected encoding: ' + encoding) # Now convert all words to bytes and collapse consecutive runs of # similarly encoded words. collapsed = [] last_word = last_charset = None for word, charset in decoded_words: # ruff: noqa: PLW2901 - loop var word is overwritten if isinstance(word, str) and bytes is not str: word = bytes(word, 'raw-unicode-escape') # PLW2901 if last_word is None: last_word = word last_charset = charset elif charset != last_charset: collapsed.append((last_word, last_charset)) last_word = word last_charset = charset elif last_charset is None: BSPACE = b' ' last_word += BSPACE + word else: last_word += word collapsed.append((last_word, last_charset)) return collapsed
