Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 27 additions & 8 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,16 @@
NLSET = {'\n', '\r'}
SPECIALSNL = SPECIALS | NLSET


def make_quoted_pairs(value):
"""Escape dquote and backslash for use within a quoted-string."""
return str(value).replace('\\', '\\\\').replace('"', '\\"')


def quote_string(value):
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
escaped = make_quoted_pairs(value)
return f'"{escaped}"'


# Match a RFC 2047 word, looks like =?utf-8?q?someword?=
rfc2047_matcher = re.compile(r'''
Expand Down Expand Up @@ -1012,6 +1020,8 @@ def _get_ptext_to_endchars(value, endchars):
a flag that is True iff there were any quoted printables decoded.

"""
if not value:
return '', '', False
fragment, *remainder = _wsp_splitter(value, 1)
vchars = []
escape = False
Expand Down Expand Up @@ -1045,7 +1055,7 @@ def get_fws(value):
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
return fws, newvalue

def get_encoded_word(value):
def get_encoded_word(value, terminal_type='vtext'):
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="

"""
Expand Down Expand Up @@ -1084,7 +1094,7 @@ def get_encoded_word(value):
ew.append(token)
continue
chars, *remainder = _wsp_splitter(text, 1)
vtext = ValueTerminal(chars, 'vtext')
vtext = ValueTerminal(chars, terminal_type)
_validate_xtext(vtext)
ew.append(vtext)
text = ''.join(remainder)
Expand Down Expand Up @@ -1126,7 +1136,7 @@ def get_unstructured(value):
valid_ew = True
if value.startswith('=?'):
try:
token, value = get_encoded_word(value)
token, value = get_encoded_word(value, 'utext')
except _InvalidEwError:
valid_ew = False
except errors.HeaderParseError:
Expand Down Expand Up @@ -1155,7 +1165,7 @@ def get_unstructured(value):
# the parser to go in an infinite loop.
if valid_ew and rfc2047_matcher.search(tok):
tok, *remainder = value.partition('=?')
vtext = ValueTerminal(tok, 'vtext')
vtext = ValueTerminal(tok, 'utext')
_validate_xtext(vtext)
unstructured.append(vtext)
value = ''.join(remainder)
Expand Down Expand Up @@ -1565,7 +1575,7 @@ def get_dtext(value):
def _check_for_early_dl_end(value, domain_literal):
if value:
return False
domain_literal.append(errors.InvalidHeaderDefect(
domain_literal.defects.append(errors.InvalidHeaderDefect(
"end of input inside domain-literal"))
domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
return True
Expand All @@ -1584,9 +1594,9 @@ def get_domain_literal(value):
raise errors.HeaderParseError("expected '[' at start of domain-literal "
"but found '{}'".format(value))
value = value[1:]
domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
if _check_for_early_dl_end(value, domain_literal):
return domain_literal, value
domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
if value[0] in WSP:
token, value = get_fws(value)
domain_literal.append(token)
Expand Down Expand Up @@ -2805,7 +2815,7 @@ def _refold_parse_tree(parse_tree, *, policy):
continue
tstr = str(part)
if not want_encoding:
if part.token_type == 'ptext':
if part.token_type in ('ptext', 'vtext'):
# Encode if tstr contains special characters.
want_encoding = not SPECIALSNL.isdisjoint(tstr)
else:
Expand Down Expand Up @@ -2905,6 +2915,15 @@ def _refold_parse_tree(parse_tree, *, policy):
if not hasattr(part, 'encode'):
# It's not a terminal, try folding the subparts.
newparts = list(part)
if part.token_type == 'bare-quoted-string':
# To fold a quoted string we need to create a list of terminal
# tokens that will render the leading and trailing quotes
# and use quoted pairs in the value as appropriate.
newparts = (
[ValueTerminal('"', 'ptext')] +
[ValueTerminal(make_quoted_pairs(p), 'ptext')
for p in newparts] +
[ValueTerminal('"', 'ptext')])
if not part.as_ew_allowed:
wrap_as_ew_blocked += 1
newparts.append(end_ew_not_allowed)
Expand Down
13 changes: 8 additions & 5 deletions Lib/email/_parseaddr.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,9 @@ def _parsedate_tz(data):
return None
# Check for a yy specified in two-digit format, then convert it to the
# appropriate four-digit format, according to the POSIX standard. RFC 822
# calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
# mandates a 4-digit yy. For more information, see the documentation for
# calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) already
# mandated a 4-digit yy, and RFC 5322 (which obsoletes RFC 2822) continues
# this requirement. For more information, see the documentation for
# the time module.
if yy < 100:
# The year is between 1969 and 1999 (inclusive).
Expand Down Expand Up @@ -233,9 +234,11 @@ def __init__(self, field):
self.CR = '\r\n'
self.FWS = self.LWS + self.CR
self.atomends = self.specials + self.LWS + self.CR
# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
# is obsolete syntax. RFC 2822 requires that we recognize obsolete
# syntax, so allow dots in phrases.
# Note that RFC 2822 section 4.1 introduced '.' as obs-phrase to handle
# existing practice (periods in display names), even though it was not
# allowed in RFC 822. RFC 5322 section 4.1 (which obsoletes RFC 2822)
# continues this requirement. We must recognize obsolete syntax, so
# allow dots in phrases.
self.phraseends = self.atomends.replace('.', '')
self.field = field
self.commentlist = []
Expand Down
2 changes: 1 addition & 1 deletion Lib/email/_policybase.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def _fold(self, name, value, sanitize):
h = value
if h is not None:
# The Header class interprets a value of None for maxlinelen as the
# default value of 78, as recommended by RFC 2822.
# default value of 78, as recommended by RFC 5322 section 2.1.1.
maxlinelen = 0
if self.max_line_length is not None:
maxlinelen = self.max_line_length
Expand Down
12 changes: 7 additions & 5 deletions Lib/email/contentmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import email.charset
import email.message
import email.errors
import sys
from email import quoprimime

class ContentManager:
Expand Down Expand Up @@ -142,22 +143,23 @@ def _encode_base64(data, max_line_length):


def _encode_text(string, charset, cte, policy):
# If max_line_length is 0 or None, there is no limit.
maxlen = policy.max_line_length or sys.maxsize
lines = string.encode(charset).splitlines()
linesep = policy.linesep.encode('ascii')
def embedded_body(lines): return linesep.join(lines) + linesep
def normal_body(lines): return b'\n'.join(lines) + b'\n'
if cte is None:
# Use heuristics to decide on the "best" encoding.
if max((len(x) for x in lines), default=0) <= policy.max_line_length:
if max(map(len, lines), default=0) <= maxlen:
try:
return '7bit', normal_body(lines).decode('ascii')
except UnicodeDecodeError:
pass
if policy.cte_type == '8bit':
return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
sniff = embedded_body(lines[:10])
sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
policy.max_line_length)
sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), maxlen)
sniff_base64 = binascii.b2a_base64(sniff)
# This is a little unfair to qp; it includes lineseps, base64 doesn't.
if len(sniff_qp) > len(sniff_base64):
Expand All @@ -172,9 +174,9 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n'
data = normal_body(lines).decode('ascii', 'surrogateescape')
elif cte == 'quoted-printable':
data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
policy.max_line_length)
maxlen)
elif cte == 'base64':
data = _encode_base64(embedded_body(lines), policy.max_line_length)
data = _encode_base64(embedded_body(lines), maxlen)
else:
raise ValueError("Unknown content transfer encoding {}".format(cte))
return cte, data
Expand Down
4 changes: 2 additions & 2 deletions Lib/email/feedparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
NLCRE_bol = re.compile(r'(\r\n|\r|\n)')
NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z')
NLCRE_crack = re.compile(r'(\r\n|\r|\n)')
# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
# RFC 5322 section 3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
# except controls, SP, and ":".
headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
EMPTYSTRING = ''
Expand Down Expand Up @@ -294,7 +294,7 @@ def _parsegen(self):
return
if self._cur.get_content_maintype() == 'message':
# The message claims to be a message/* type, then what follows is
# another RFC 2822 message.
# another RFC 5322 message.
for retval in self._parsegen():
if retval is NeedMoreData:
yield NeedMoreData
Expand Down
2 changes: 1 addition & 1 deletion Lib/email/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *,
expanded to 8 spaces) than maxheaderlen, the header will split as
defined in the Header class. Set maxheaderlen to zero to disable
header wrapping. The default is 78, as recommended (but not required)
by RFC 2822.
by RFC 5322 section 2.1.1.

The policy keyword specifies a policy object that controls a number of
aspects of the generator's operation. If no policy is specified,
Expand Down
17 changes: 13 additions & 4 deletions Lib/email/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,22 @@
def decode_header(header):
"""Decode a message header value without converting charset.

Returns a list of (string, charset) pairs containing each of the decoded
parts of the header. Charset is None for non-encoded parts of the header,
otherwise a lower-case string containing the name of the character set
specified in the encoded string.
For historical reasons, this function may return either:

1. A list of length 1 containing a pair (str, None).
2. A list of (bytes, charset) pairs containing each of the decoded
parts of the header. Charset is None for non-encoded parts of the header,
otherwise a lower-case string containing the name of the character set
specified in the encoded string.

header may be a string that may or may not contain RFC2047 encoded words,
or it may be a Header object.

An email.errors.HeaderParseError may be raised when certain decoding error
occurs (e.g. a base64 decoding exception).

This function exists for backwards compatibility only. For new code, we
recommend using email.headerregistry.HeaderRegistry instead.
"""
# If it is a Header object, we can just return the encoded chunks.
if hasattr(header, '_chunks'):
Expand Down Expand Up @@ -161,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None,
This function takes one of those sequence of pairs and returns a Header
instance. Optional maxlinelen, header_name, and continuation_ws are as in
the Header constructor.

This function exists for backwards compatibility only, and is not
recommended for use in new code.
"""
h = Header(maxlinelen=maxlinelen, header_name=header_name,
continuation_ws=continuation_ws)
Expand Down
40 changes: 26 additions & 14 deletions Lib/email/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,19 +74,25 @@ def _parseparam(s):
# RDM This might be a Header, so for now stringify it.
s = ';' + str(s)
plist = []
while s[:1] == ';':
s = s[1:]
end = s.find(';')
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
end = s.find(';', end + 1)
start = 0
while s.find(';', start) == start:
start += 1
end = s.find(';', start)
ind, diff = start, 0
while end > 0:
diff += s.count('"', ind, end) - s.count('\\"', ind, end)
if diff % 2 == 0:
break
end, ind = ind, s.find(';', end + 1)
if end < 0:
end = len(s)
f = s[:end]
if '=' in f:
i = f.index('=')
f = f[:i].strip().lower() + '=' + f[i+1:].strip()
i = s.find('=', start, end)
if i == -1:
f = s[start:end]
else:
f = s[start:i].rstrip().lower() + '=' + s[i+1:end].lstrip()
plist.append(f.strip())
s = s[end:]
start = end
return plist


Expand Down Expand Up @@ -135,7 +141,7 @@ def _decode_uu(encoded):
class Message:
"""Basic message object.

A message object is defined as something that has a bunch of RFC 2822
A message object is defined as something that has a bunch of RFC 5322
headers and a payload. It may optionally have an envelope header
(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
multipart or a message/rfc822), then the payload is a list of Message
Expand Down Expand Up @@ -286,8 +292,12 @@ def get_payload(self, i=None, decode=False):
if i is not None and not isinstance(self._payload, list):
raise TypeError('Expected list, got %s' % type(self._payload))
payload = self._payload
# cte might be a Header, so for now stringify it.
cte = str(self.get('content-transfer-encoding', '')).lower()
cte = self.get('content-transfer-encoding', '')
if hasattr(cte, 'cte'):
cte = cte.cte
else:
# cte might be a Header, so for now stringify it.
cte = str(cte).strip().lower()
# payload may be bytes here.
if not decode:
if isinstance(payload, str) and utils._has_surrogates(payload):
Expand All @@ -309,6 +319,8 @@ def get_payload(self, i=None, decode=False):
# If it does happen, turn the string into bytes in a way
# guaranteed not to fail.
bpayload = payload.encode('raw-unicode-escape')
else:
bpayload = payload
if cte == 'quoted-printable':
return quopri.decodestring(bpayload)
elif cte == 'base64':
Expand Down Expand Up @@ -560,7 +572,7 @@ def add_header(self, _name, _value, **_params):

msg.add_header('content-disposition', 'attachment', filename='bud.gif')
msg.add_header('content-disposition', 'attachment',
filename=('utf-8', '', Fußballer.ppt'))
filename=('utf-8', '', 'Fußballer.ppt'))
msg.add_header('content-disposition', 'attachment',
filename='Fußballer.ppt'))
"""
Expand Down
14 changes: 7 additions & 7 deletions Lib/email/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
# Contact: email-sig@python.org

"""A parser of RFC 2822 and MIME email messages."""
"""A parser of RFC 5322 and MIME email messages."""

__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
'FeedParser', 'BytesFeedParser']
Expand All @@ -15,14 +15,14 @@

class Parser:
def __init__(self, _class=None, *, policy=compat32):
"""Parser of RFC 2822 and MIME email messages.
"""Parser of RFC 5322 and MIME email messages.

Creates an in-memory object tree representing the email message, which
can then be manipulated and turned over to a Generator to return the
textual representation of the message.

The string must be formatted as a block of RFC 2822 headers and header
continuation lines, optionally preceded by a `Unix-from' header. The
The string must be formatted as a block of RFC 5322 headers and header
continuation lines, optionally preceded by a 'Unix-from' header. The
header block is terminated either by the end of the string or by a
blank line.

Expand Down Expand Up @@ -75,14 +75,14 @@ def parsestr(self, text, headersonly=True):
class BytesParser:

def __init__(self, *args, **kw):
"""Parser of binary RFC 2822 and MIME email messages.
"""Parser of binary RFC 5322 and MIME email messages.

Creates an in-memory object tree representing the email message, which
can then be manipulated and turned over to a Generator to return the
textual representation of the message.

The input must be formatted as a block of RFC 2822 headers and header
continuation lines, optionally preceded by a `Unix-from' header. The
The input must be formatted as a block of RFC 5322 headers and header
continuation lines, optionally preceded by a 'Unix-from' header. The
header block is terminated either by the end of the input or by a
blank line.

Expand Down
10 changes: 8 additions & 2 deletions Lib/email/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,14 @@ def decode_params(params):
for name, continuations in rfc2231_params.items():
value = []
extended = False
# Sort by number
continuations.sort()
# Sort by number, treating None as 0 if there is no 0,
# and ignore it if there is already a 0.
has_zero = any(x[0] == 0 for x in continuations)
if has_zero:
continuations = [x for x in continuations if x[0] is not None]
else:
continuations = [(x[0] or 0, x[1], x[2]) for x in continuations]
continuations.sort(key=lambda x: x[0])
# And now append all values in numerical order, converting
# %-encodings for the encoded segments. If any of the
# continuation names ends in a *, then the entire string, after
Expand Down
Loading
Loading