mirror of
https://github.com/pacnpal/django-anymail.git
synced 2025-12-20 11:51:05 -05:00
Inbound: fix 8bit Unicode parsing as escape sequences on Python 3
Work around Python 3 email parser change that can turn Unicode characters into \u escape sequences when parsing a message (or attachment) that uses "Content-Transfer-Encoding: 8bit".
This commit is contained in:
@@ -199,6 +199,10 @@ class AnymailInboundMessage(Message, object): # `object` ensures new-style clas
|
|||||||
@classmethod
|
@classmethod
|
||||||
def parse_raw_mime(cls, s):
|
def parse_raw_mime(cls, s):
|
||||||
"""Returns a new AnymailInboundMessage parsed from str s"""
|
"""Returns a new AnymailInboundMessage parsed from str s"""
|
||||||
|
if isinstance(s, six.text_type):
|
||||||
|
# Avoid Python 3.x issue https://bugs.python.org/issue18271
|
||||||
|
# (See test_inbound: test_parse_raw_mime_8bit_utf8)
|
||||||
|
return cls.parse_raw_mime_bytes(s.encode('utf-8'))
|
||||||
return EmailParser(cls).parsestr(s)
|
return EmailParser(cls).parsestr(s)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from email.utils import collapse_rfc2231_value
|
|||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
from unittest import skipIf
|
from unittest import skipIf
|
||||||
|
|
||||||
|
from django.core.mail import SafeMIMEText
|
||||||
from django.test import SimpleTestCase
|
from django.test import SimpleTestCase
|
||||||
|
|
||||||
from anymail.inbound import AnymailInboundMessage
|
from anymail.inbound import AnymailInboundMessage
|
||||||
@@ -166,6 +167,18 @@ class AnymailInboundMessageConstructionTests(SimpleTestCase):
|
|||||||
self.assertEqual(msg.get_content_bytes(), b'\xD8i estas retpo\xFEto.\r\n')
|
self.assertEqual(msg.get_content_bytes(), b'\xD8i estas retpo\xFEto.\r\n')
|
||||||
self.assertEqual(msg.defects, [])
|
self.assertEqual(msg.defects, [])
|
||||||
|
|
||||||
|
def test_parse_raw_mime_8bit_utf8(self):
|
||||||
|
# In come cases, the message below ends up with 'Content-Transfer-Encoding: 8bit',
|
||||||
|
# so needs to be parsed as bytes, not text (see https://bugs.python.org/issue18271).
|
||||||
|
# Message.as_string() returns str, which is is bytes on Python 2 and text on Python 3.
|
||||||
|
# (This might be a Django bug; plain old MIMEText avoids the problem by using
|
||||||
|
# 'Content-Transfer-Encoding: base64', which parses fine as text or bytes.
|
||||||
|
# Django <1.11 on Python 3 also used base64.)
|
||||||
|
# Either way, AnymailInboundMessage should try to sidestep the whole issue.
|
||||||
|
raw = SafeMIMEText("Unicode ✓", "plain", "utf-8").as_string()
|
||||||
|
msg = AnymailInboundMessage.parse_raw_mime(raw)
|
||||||
|
self.assertEqual(msg.text, "Unicode ✓") # *not* "Unicode \\u2713"
|
||||||
|
|
||||||
def test_parse_raw_mime_file_text(self):
|
def test_parse_raw_mime_file_text(self):
|
||||||
with open(sample_email_path(), mode="r") as fp:
|
with open(sample_email_path(), mode="r") as fp:
|
||||||
msg = AnymailInboundMessage.parse_raw_mime_file(fp)
|
msg = AnymailInboundMessage.parse_raw_mime_file(fp)
|
||||||
|
|||||||
Reference in New Issue
Block a user