From 802a56c87dfb64869f01ed251a592cf34b83b46c Mon Sep 17 00:00:00 2001 From: medmunds Date: Mon, 2 Apr 2018 15:50:00 -0700 Subject: [PATCH] Inbound: fix 8bit Unicode parsing as escape sequences on Python 3 Work around Python 3 email parser change that can turn Unicode characters into \u escape sequences when parsing a message (or attachment) that uses "Content-Transfer-Encoding: 8bit". --- anymail/inbound.py | 4 ++++ tests/test_inbound.py | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/anymail/inbound.py b/anymail/inbound.py index ec65215..24a8db0 100644 --- a/anymail/inbound.py +++ b/anymail/inbound.py @@ -199,6 +199,10 @@ class AnymailInboundMessage(Message, object): # `object` ensures new-style clas @classmethod def parse_raw_mime(cls, s): """Returns a new AnymailInboundMessage parsed from str s""" + if isinstance(s, six.text_type): + # Avoid Python 3.x issue https://bugs.python.org/issue18271 + # (See test_inbound: test_parse_raw_mime_8bit_utf8) + return cls.parse_raw_mime_bytes(s.encode('utf-8')) return EmailParser(cls).parsestr(s) @classmethod diff --git a/tests/test_inbound.py b/tests/test_inbound.py index 697ac87..5a4e0d8 100644 --- a/tests/test_inbound.py +++ b/tests/test_inbound.py @@ -7,6 +7,7 @@ from email.utils import collapse_rfc2231_value from textwrap import dedent from unittest import skipIf +from django.core.mail import SafeMIMEText from django.test import SimpleTestCase from anymail.inbound import AnymailInboundMessage @@ -166,6 +167,18 @@ class AnymailInboundMessageConstructionTests(SimpleTestCase): self.assertEqual(msg.get_content_bytes(), b'\xD8i estas retpo\xFEto.\r\n') self.assertEqual(msg.defects, []) + def test_parse_raw_mime_8bit_utf8(self): + # In come cases, the message below ends up with 'Content-Transfer-Encoding: 8bit', + # so needs to be parsed as bytes, not text (see https://bugs.python.org/issue18271). + # Message.as_string() returns str, which is is bytes on Python 2 and text on Python 3. + # (This might be a Django bug; plain old MIMEText avoids the problem by using + # 'Content-Transfer-Encoding: base64', which parses fine as text or bytes. + # Django <1.11 on Python 3 also used base64.) + # Either way, AnymailInboundMessage should try to sidestep the whole issue. + raw = SafeMIMEText("Unicode ✓", "plain", "utf-8").as_string() + msg = AnymailInboundMessage.parse_raw_mime(raw) + self.assertEqual(msg.text, "Unicode ✓") # *not* "Unicode \\u2713" + def test_parse_raw_mime_file_text(self): with open(sample_email_path(), mode="r") as fp: msg = AnymailInboundMessage.parse_raw_mime_file(fp)