Inbound: fix 8bit Unicode parsing as escape sequences on Python 3

Work around Python 3 email parser change that can turn Unicode
characters into \u escape sequences when parsing a message (or
attachment) that uses "Content-Transfer-Encoding: 8bit".
This commit is contained in:
medmunds
2018-04-02 15:50:00 -07:00
parent 008aef237e
commit 802a56c87d
2 changed files with 17 additions and 0 deletions

View File

@@ -199,6 +199,10 @@ class AnymailInboundMessage(Message, object): # `object` ensures new-style clas
@classmethod
def parse_raw_mime(cls, s):
"""Returns a new AnymailInboundMessage parsed from str s"""
if isinstance(s, six.text_type):
# Avoid Python 3.x issue https://bugs.python.org/issue18271
# (See test_inbound: test_parse_raw_mime_8bit_utf8)
return cls.parse_raw_mime_bytes(s.encode('utf-8'))
return EmailParser(cls).parsestr(s)
@classmethod

View File

@@ -7,6 +7,7 @@ from email.utils import collapse_rfc2231_value
from textwrap import dedent
from unittest import skipIf
from django.core.mail import SafeMIMEText
from django.test import SimpleTestCase
from anymail.inbound import AnymailInboundMessage
@@ -166,6 +167,18 @@ class AnymailInboundMessageConstructionTests(SimpleTestCase):
self.assertEqual(msg.get_content_bytes(), b'\xD8i estas retpo\xFEto.\r\n')
self.assertEqual(msg.defects, [])
def test_parse_raw_mime_8bit_utf8(self):
# In come cases, the message below ends up with 'Content-Transfer-Encoding: 8bit',
# so needs to be parsed as bytes, not text (see https://bugs.python.org/issue18271).
# Message.as_string() returns str, which is is bytes on Python 2 and text on Python 3.
# (This might be a Django bug; plain old MIMEText avoids the problem by using
# 'Content-Transfer-Encoding: base64', which parses fine as text or bytes.
# Django <1.11 on Python 3 also used base64.)
# Either way, AnymailInboundMessage should try to sidestep the whole issue.
raw = SafeMIMEText("Unicode ✓", "plain", "utf-8").as_string()
msg = AnymailInboundMessage.parse_raw_mime(raw)
self.assertEqual(msg.text, "Unicode ✓") # *not* "Unicode \\u2713"
def test_parse_raw_mime_file_text(self):
with open(sample_email_path(), mode="r") as fp:
msg = AnymailInboundMessage.parse_raw_mime_file(fp)