Inbound: fix 8bit Unicode parsing as escape sequences on Python 3

Work around Python 3 email parser change that can turn Unicode
characters into \u escape sequences when parsing a message (or
attachment) that uses "Content-Transfer-Encoding: 8bit".
This commit is contained in:
medmunds
2018-04-02 15:50:00 -07:00
parent 008aef237e
commit 802a56c87d
2 changed files with 17 additions and 0 deletions

View File

@@ -199,6 +199,10 @@ class AnymailInboundMessage(Message, object): # `object` ensures new-style clas
@classmethod @classmethod
def parse_raw_mime(cls, s): def parse_raw_mime(cls, s):
"""Returns a new AnymailInboundMessage parsed from str s""" """Returns a new AnymailInboundMessage parsed from str s"""
if isinstance(s, six.text_type):
# Avoid Python 3.x issue https://bugs.python.org/issue18271
# (See test_inbound: test_parse_raw_mime_8bit_utf8)
return cls.parse_raw_mime_bytes(s.encode('utf-8'))
return EmailParser(cls).parsestr(s) return EmailParser(cls).parsestr(s)
@classmethod @classmethod

View File

@@ -7,6 +7,7 @@ from email.utils import collapse_rfc2231_value
from textwrap import dedent from textwrap import dedent
from unittest import skipIf from unittest import skipIf
from django.core.mail import SafeMIMEText
from django.test import SimpleTestCase from django.test import SimpleTestCase
from anymail.inbound import AnymailInboundMessage from anymail.inbound import AnymailInboundMessage
@@ -166,6 +167,18 @@ class AnymailInboundMessageConstructionTests(SimpleTestCase):
self.assertEqual(msg.get_content_bytes(), b'\xD8i estas retpo\xFEto.\r\n') self.assertEqual(msg.get_content_bytes(), b'\xD8i estas retpo\xFEto.\r\n')
self.assertEqual(msg.defects, []) self.assertEqual(msg.defects, [])
def test_parse_raw_mime_8bit_utf8(self):
# In come cases, the message below ends up with 'Content-Transfer-Encoding: 8bit',
# so needs to be parsed as bytes, not text (see https://bugs.python.org/issue18271).
# Message.as_string() returns str, which is is bytes on Python 2 and text on Python 3.
# (This might be a Django bug; plain old MIMEText avoids the problem by using
# 'Content-Transfer-Encoding: base64', which parses fine as text or bytes.
# Django <1.11 on Python 3 also used base64.)
# Either way, AnymailInboundMessage should try to sidestep the whole issue.
raw = SafeMIMEText("Unicode ✓", "plain", "utf-8").as_string()
msg = AnymailInboundMessage.parse_raw_mime(raw)
self.assertEqual(msg.text, "Unicode ✓") # *not* "Unicode \\u2713"
def test_parse_raw_mime_file_text(self): def test_parse_raw_mime_file_text(self):
with open(sample_email_path(), mode="r") as fp: with open(sample_email_path(), mode="r") as fp:
msg = AnymailInboundMessage.parse_raw_mime_file(fp) msg = AnymailInboundMessage.parse_raw_mime_file(fp)