From eab11ed53e4094ba97e4365de518b3e5af918bb5 Mon Sep 17 00:00:00 2001 From: medmunds Date: Sat, 24 Mar 2018 17:46:18 -0700 Subject: [PATCH] Inbound: test parsing RFC2231 MIME header parameters And decide not to work around a Python 3.3 bug accessing MIME headers that have non-ASCII characters in params. The bug is fixed in the Python 3.4 email package (and didn't exist in Python 2.7). Python 3.3 was only supported with Django 1.8. --- tests/test_inbound.py | 33 ++++++++++++++++++++++++++++++++- tests/utils.py | 17 +++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/tests/test_inbound.py b/tests/test_inbound.py index 353673c..aff8215 100644 --- a/tests/test_inbound.py +++ b/tests/test_inbound.py @@ -2,12 +2,15 @@ from __future__ import unicode_literals from base64 import b64encode +from email.utils import collapse_rfc2231_value from textwrap import dedent +from unittest import skipIf from django.test import SimpleTestCase from anymail.inbound import AnymailInboundMessage -from .utils import SAMPLE_IMAGE_FILENAME, sample_image_content + +from .utils import SAMPLE_IMAGE_FILENAME, python_has_broken_mime_param_handling, sample_image_content SAMPLE_IMAGE_CONTENT = sample_image_content() @@ -458,3 +461,31 @@ class EmailParserWorkaroundTests(SimpleTestCase): # Replace illegal encodings (rather than causing error): self.assertEqual(msg["X-Broken"], "Not a char: \N{REPLACEMENT CHARACTER}.") + + @skipIf(python_has_broken_mime_param_handling(), + "This Python has a buggy email package that crashes on non-ASCII " + "characters in RFC2231-encoded MIME header parameters") + def test_parse_encoded_params(self): + raw = dedent("""\ + MIME-Version: 1.0 + Content-Type: multipart/mixed; boundary="this_is_a_boundary" + + --this_is_a_boundary + Content-Type: text/plain; charset="UTF-8" + + This is the body + + --this_is_a_boundary + Content-Type: text/plain; name*=us-ascii''TPS%20Report + Content-Disposition: attachment; + filename*=iso-8859-1''Une%20pi%E8ce%20jointe%2Etxt + + This is an attachment + --this_is_a_boundary-- + """) + msg = AnymailInboundMessage.parse_raw_mime(raw) + att = msg.attachments[0] + self.assertTrue(att.is_attachment()) + self.assertEqual(att.get_content_disposition(), "attachment") + self.assertEqual(collapse_rfc2231_value(att.get_param("Name", header="Content-Type")), "TPS Report") + self.assertEqual(att.get_filename(), "Une pièce jointe.txt") diff --git a/tests/utils.py b/tests/utils.py index 479a4e0..b833a18 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -222,3 +222,20 @@ class ClientWithCsrfChecks(Client): def __init__(self, **defaults): super(ClientWithCsrfChecks, self).__init__( enforce_csrf_checks=True, **defaults) + + +def python_has_broken_mime_param_handling(): + # In Python 3.3 (only), trying to access any parsed MIME header will crash if the header + # has parameters with non-ASCII characters. (Common in, e.g., attachment filenames.) + # The bug is somewhere within email._header_value_parser.parse_mime_parameters, and is too + # complicated to work around for an uncommon version combination (Django 1.8 on Python 3.3). + # If you run into it, please upgrade to (at least) Python 3.4. + try: + from email.policy import default + default.header_fetch_parse("Content-Type", "plain; name*=iso-8859-1''Une%20pi%E8ce") + except ImportError: + return False # Python 2 (or pre Python 3.3) -- bug doesn't apply + except UnicodeEncodeError: + return True # this is the bug + else: + return False # worked fine