Skip to content

Commit

Permalink
gh-98188: Fix EmailMessage.get_payload to decode data when CTE value …
Browse files Browse the repository at this point in the history
…has extra text (#127547)

Up to this point message handling has been very strict with regards to content encoding values: mixed case was accepted, but trailing blanks or other text would cause decoding failure, even if the first token was a valid encoding.  By Postel's Rule we should go ahead and decode as long as we can recognize that first token.  We have not thought of any security or backward compatibility concerns with this fix.

This fix does introduce a new technique/pattern to the Message code: we look to see if the header has a 'cte' attribute, and if so we use that.  This effectively promotes the header API exposed by HeaderRegistry to an API that any header parser "should" support.  This seems like a reasonable thing to do.  It is not, however, a requirement, as the string value of the header is still used if there is no cte attribute.

The full fix (ignore any trailing blanks or blank-separated trailing text) applies only to the non-compat32 API.  compat32 is only fixed to the extent that it now ignores trailing spaces.  Note that the HeaderRegistry parsing still records a HeaderDefect if there is extra text.

Co-authored-by: Bénédikt Tran <[email protected]>
  • Loading branch information
RanKKI and picnixz authored Jan 6, 2025
1 parent 3b231be commit a62ba52
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 2 deletions.
8 changes: 6 additions & 2 deletions Lib/email/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,12 @@ def get_payload(self, i=None, decode=False):
if i is not None and not isinstance(self._payload, list):
raise TypeError('Expected list, got %s' % type(self._payload))
payload = self._payload
# cte might be a Header, so for now stringify it.
cte = str(self.get('content-transfer-encoding', '')).lower()
cte = self.get('content-transfer-encoding', '')
if hasattr(cte, 'cte'):
cte = cte.cte
else:
# cte might be a Header, so for now stringify it.
cte = str(cte).strip().lower()
# payload may be bytes here.
if not decode:
if isinstance(payload, str) and utils._has_surrogates(payload):
Expand Down
44 changes: 44 additions & 0 deletions Lib/test/test_email/test_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,16 @@ def test_unicode_body_defaults_to_utf8_encoding(self):
w4kgdGVzdGFiYwo=
"""))

def test_string_payload_with_base64_cte(self):
msg = email.message_from_string(textwrap.dedent("""\
Content-Transfer-Encoding: base64
SGVsbG8uIFRlc3Rpbmc=
"""), policy=email.policy.default)
self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
self.assertDefectsEqual(msg['content-transfer-encoding'].defects, [])



# Test the email.encoders module
class TestEncoders(unittest.TestCase):
Expand Down Expand Up @@ -2352,6 +2362,40 @@ def test_missing_header_body_separator(self):
self.assertDefectsEqual(msg.defects,
[errors.MissingHeaderBodySeparatorDefect])

def test_string_payload_with_extra_space_after_cte(self):
# https://github.com/python/cpython/issues/98188
cte = "base64 "
msg = email.message_from_string(textwrap.dedent(f"""\
Content-Transfer-Encoding: {cte}
SGVsbG8uIFRlc3Rpbmc=
"""), policy=email.policy.default)
self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
self.assertDefectsEqual(msg['content-transfer-encoding'].defects, [])

def test_string_payload_with_extra_text_after_cte(self):
msg = email.message_from_string(textwrap.dedent("""\
Content-Transfer-Encoding: base64 some text
SGVsbG8uIFRlc3Rpbmc=
"""), policy=email.policy.default)
self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
cte = msg['content-transfer-encoding']
self.assertDefectsEqual(cte.defects, [email.errors.InvalidHeaderDefect])

def test_string_payload_with_extra_space_after_cte_compat32(self):
cte = "base64 "
msg = email.message_from_string(textwrap.dedent(f"""\
Content-Transfer-Encoding: {cte}
SGVsbG8uIFRlc3Rpbmc=
"""), policy=email.policy.compat32)
pasted_cte = msg['content-transfer-encoding']
self.assertEqual(pasted_cte, cte)
self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
self.assertDefectsEqual(msg.defects, [])



# Test RFC 2047 header encoding and decoding
class TestRFC2047(TestEmailBase):
Expand Down
5 changes: 5 additions & 0 deletions Lib/test/test_email/test_headerregistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,11 @@ def cte_as_value(self,
'7bit',
[errors.InvalidHeaderDefect]),

'extra_space_after_cte': (
'base64 ',
'base64',
[]),

}


Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -1129,6 +1129,7 @@ Gregor Lingl
Everett Lipman
Mirko Liss
Alexander Liu
Hui Liu
Yuan Liu
Nick Lockwood
Stephanie Lockwood
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix an issue in :meth:`email.message.Message.get_payload` where data
cannot be decoded if the Content Transfer Encoding mechanism contains
trailing whitespaces or additional junk text. Patch by Hui Liu.

0 comments on commit a62ba52

Please sign in to comment.