Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3001,6 +3001,13 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
encoded_word = _ew.encode(leading_whitespace, charset=encode_as)
lines[-1] += encoded_word
leading_whitespace = ''
# The leading whitespace was encoded as its own encoded word and
# appended to the line, so recompute the space left on the line.
remaining_space = maxlen - len(lines[-1])
text_space = remaining_space - chrome_len
if text_space <= 0:
lines.append(' ')
Comment on lines +3006 to +3009

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Start a new line when no character fits after the whitespace word

When the recomputed text_space is positive but too small for the encoded representation of the first character, the shrinking loop below reduces to_encode_word to '' and emits an invalid empty encoded-word. For example, folding the new regression input with max_line_length=28 now produces =?utf-8?q?_?==?utf-8?q??=. RFC 2047 requires nonempty encoded text; detect the empty candidate and move the actual word to a new continuation line instead.

Useful? React with 👍 / 👎.

continue

to_encode_word = to_encode[:text_space]
encoded_word = _ew.encode(to_encode_word, charset=encode_as)
Expand Down
15 changes: 15 additions & 0 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3123,6 +3123,21 @@ def test_unknown_after_unknown(self):
"=?unknown-8bit?q?=A4?="),
prefix + "=?unknown-8bit?q?=C2?=\n =?unknown-8bit?q?=A4?=\n")

def test_long_ew_after_encoded_continuation_whitespace(self):
# When an encoded word begins a continuation line, the line's leading
# whitespace is emitted as its own encoded word. The space it consumes
# must be subtracted from the budget, otherwise the following encoded
# word overflows max_line_length.
policy = self.policy.clone(max_line_length=40)
self._test(parser.get_unstructured(
'a b c d ä d .,ä, dc cbaö,ä.,baaöa üa.,ü,c,äöäa,bööüc üü'),
'a b c d =?utf-8?b?w6QgZCAuLMOkLA==?= dc\n'
' =?utf-8?q?_?==?utf-8?b?Y2Jhw7Ysw6Qu?=\n'
' =?utf-8?b?LGJhYcO2YSDDvGEuLMO8LGMs?=\n'
' =?utf-8?b?w6TDtsOkYSxiw7bDtsO8YyA=?=\n'
' =?utf-8?q?=C3=BC=C3=BC?=\n',
policy=policy)

# XXX Need test of an encoded word so long that it needs to be wrapped

def test_simple_address(self):
Expand Down
Loading