From 3f63fdd7134e7f665e3298c6909094add9195fe2 Mon Sep 17 00:00:00 2001 From: medmunds Date: Wed, 10 Oct 2018 15:02:13 -0700 Subject: [PATCH] Mailgun: Fix lost attachments with non-ASCII filenames. Workaround requests/requests#4652 (urllib3/urllib3#303), where uploaded files in multipart/form-data are improperly given RFC 2231 encoded filenames. That format is not accepted by Mailgun's API (and is prohibited by RFC 7578), resulting in the attachments being silently dropped. Fix is to patch up the multipart/form-data before posting to remove the RFC 2231 encoding. Fixes #125 --- CHANGELOG.rst | 10 +++++++ anymail/backends/mailgun.py | 45 ++++++++++++++++++++++++++++ tests/test_mailgun_backend.py | 49 +++++++++++++++++++++++++++++-- tests/test_mailgun_integration.py | 8 +++-- 4 files changed, 107 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c039ea2..9c18a2d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -38,6 +38,14 @@ Features backends (all but Amazon SES and SparkPost). Because this can expose API keys and other sensitive info in log files, it should not be used in production. +Fixes +~~~~~ + +* **Mailgun:** Fix problem where attachments with non-ASCII filenames would be lost. + (Works around Requests/urllib3 issue encoding multipart/form-data filenames in a way + that isn't RFC 7578 compliant. Thanks to `@decibyte`_ for catching the problem and + `@costela`_ for identifying the related Mailgun API oddity.) + v4.2 ---- @@ -833,6 +841,8 @@ Features .. _#115: https://github.com/anymail/issues/115 .. _@calvin: https://github.com/calvin +.. _@costela: https://github.com/costela +.. _@decibyte: https://github.com/decibyte .. _@joshkersey: https://github.com/joshkersey .. _@Lekensteyn: https://github.com/Lekensteyn .. _@lewistaylor: https://github.com/lewistaylor diff --git a/anymail/backends/mailgun.py b/anymail/backends/mailgun.py index 3ba3eeb..7c2db20 100644 --- a/anymail/backends/mailgun.py +++ b/anymail/backends/mailgun.py @@ -1,4 +1,7 @@ from datetime import datetime +from email.utils import encode_rfc2231 + +from requests import Request from ..exceptions import AnymailRequestsAPIError, AnymailError from ..message import AnymailRecipientStatus @@ -78,6 +81,35 @@ class MailgunPayload(RequestsPayload): backend=self.backend, email_message=self.message, payload=self) return "%s/messages" % self.sender_domain + def get_request_params(self, api_url): + params = super(MailgunPayload, self).get_request_params(api_url) + non_ascii_filenames = [filename + for (field, (filename, content, mimetype)) in params["files"] + if filename is not None and not isascii(filename)] + if non_ascii_filenames: + # Workaround https://github.com/requests/requests/issues/4652: + # Mailgun expects RFC 7578 compliant multipart/form-data, and is confused + # by Requests/urllib3's improper use of RFC 2231 encoded filename parameters + # ("filename*=utf-8''...") in Content-Disposition headers. + # The workaround is to pre-generate the (non-compliant) form-data body, and + # replace 'filename*={RFC 2231 encoded}' with 'filename="{UTF-8 bytes}"'. + # Replace _only_ the filenames that will be problems (not all "filename*=...") + # to minimize potential side effects--e.g., in attached messages that might + # have their own attachments with (correctly) RFC 2231 encoded filenames. + prepared = Request(**params).prepare() + form_data = prepared.body # bytes + for filename in non_ascii_filenames: # text + rfc2231_filename = encode_rfc2231( # wants a str (text in PY3, bytes in PY2) + filename if isinstance(filename, str) else filename.encode("utf-8"), + charset="utf-8") + form_data = form_data.replace( + b'filename*=%s' % rfc2231_filename.encode("utf-8"), + b'filename="%s"' % filename.encode("utf-8")) + params["data"] = form_data + params["headers"]["Content-Type"] = prepared.headers["Content-Type"] # multipart/form-data; boundary=... + params["files"] = None # these are now in the form_data body + return params + def serialize_data(self): self.populate_recipient_variables() return self.data @@ -113,6 +145,7 @@ class MailgunPayload(RequestsPayload): def init_payload(self): self.data = {} # {field: [multiple, values]} self.files = [] # [(field, multiple), (field, values)] + self.headers = {} def set_from_email_list(self, emails): # Mailgun supports multiple From email addresses @@ -204,3 +237,15 @@ class MailgunPayload(RequestsPayload): # Allow override of sender_domain via esp_extra # (but pop it out of params to send to Mailgun) self.sender_domain = self.data.pop("sender_domain", self.sender_domain) + + +def isascii(s): + """Returns True if str s is entirely ASCII characters. + + (Compare to Python 3.7 `str.isascii()`.) + """ + try: + s.encode("ascii") + except UnicodeEncodeError: + return False + return True diff --git a/tests/test_mailgun_backend.py b/tests/test_mailgun_backend.py index 20f9e34..93da086 100644 --- a/tests/test_mailgun_backend.py +++ b/tests/test_mailgun_backend.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- from datetime import date, datetime +from textwrap import dedent + try: from email import message_from_bytes except ImportError: @@ -171,9 +173,50 @@ class MailgunBackendStandardEmailTests(MailgunBackendMockAPITestCase): def test_unicode_attachment_correctly_decoded(self): self.message.attach(u"Une pièce jointe.html", u'

\u2019

', mimetype='text/html') self.message.send() - files = self.get_api_call_files() - attachments = [value for (field, value) in files if field == 'attachment'] - self.assertEqual(len(attachments), 1) + + # Verify the RFC 7578 compliance workaround has kicked in: + data = self.get_api_call_data().decode("utf-8") + self.assertNotIn("filename*=", data) # No RFC 2231 encoding + self.assertIn(u'Content-Disposition: form-data; name="attachment"; filename="Une pièce jointe.html"', data) + + files = self.get_api_call_files(required=False) + self.assertFalse(files) # files should have been moved to formdata body + + def test_rfc_7578_compliance(self): + # Check some corner cases in the workaround that undoes RFC 2231 multipart/form-data encoding... + self.message.subject = u"Testing for filename*=utf-8''problems" + self.message.body = u"The attached message should have an attachment named 'vedhæftet fil.txt'" + # A forwarded message with its own attachment: + forwarded_message = dedent("""\ + MIME-Version: 1.0 + From: sender@example.com + Subject: This is a test message + Content-Type: multipart/mixed; boundary="boundary" + + --boundary + Content-Type: text/plain + + This message has an attached file with a non-ASCII filename. + --boundary + Content-Type: text/plain; name*=utf-8''vedh%C3%A6ftet%20fil.txt + Content-Disposition: attachment; filename*=utf-8''vedh%C3%A6ftet%20fil.txt + + This is an attachment. + --boundary-- + """) + self.message.attach(u"besked med vedhæftede filer", forwarded_message, "message/rfc822") + self.message.send() + + data = self.get_api_call_data().decode("utf-8").replace("\r\n", "\n") + # Top-level attachment (in form-data) should have RFC 7578 filename (raw Unicode): + self.assertIn( + u'Content-Disposition: form-data; name="attachment"; filename="besked med vedhæftede filer"', data) + # Embedded message/rfc822 attachment should retain its RFC 2231 encoded filename: + self.assertIn("Content-Type: text/plain; name*=utf-8''vedh%C3%A6ftet%20fil.txt", data) + self.assertIn("Content-Disposition: attachment; filename*=utf-8''vedh%C3%A6ftet%20fil.txt", data) + # References to RFC 2231 in message text should remain intact: + self.assertIn("Testing for filename*=utf-8''problems", data) + self.assertIn(u"The attached message should have an attachment named 'vedhæftet fil.txt'", data) def test_embedded_images(self): image_filename = SAMPLE_IMAGE_FILENAME diff --git a/tests/test_mailgun_integration.py b/tests/test_mailgun_integration.py index e8b388d..6acbf59 100644 --- a/tests/test_mailgun_integration.py +++ b/tests/test_mailgun_integration.py @@ -1,3 +1,7 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + import os import logging import unittest @@ -116,7 +120,7 @@ class MailgunBackendIntegrationTests(SimpleTestCase, AnymailTestMixin): track_opens=True, ) message.attach("attachment1.txt", "Here is some\ntext for you", "text/plain") - message.attach("attachment2.csv", "ID,Name\n1,3", "text/csv") + message.attach("vedhæftet fil.csv", "ID,Name\n1,3", "text/csv") cid = message.attach_inline_image_file(sample_image_path(), domain=MAILGUN_TEST_DOMAIN) message.attach_alternative( "
This is the html body
" % cid, @@ -151,7 +155,7 @@ class MailgunBackendIntegrationTests(SimpleTestCase, AnymailTestMixin): self.assertEqual(len(attachments), 2) # because inline image shouldn't be an attachment self.assertEqual(attachments[0]["filename"], "attachment1.txt") self.assertEqual(attachments[0]["content-type"], "text/plain") - self.assertEqual(attachments[1]["filename"], "attachment2.csv") + self.assertEqual(attachments[1]["filename"], "vedhæftet fil.csv") self.assertEqual(attachments[1]["content-type"], "text/csv") # No other fields are verifiable from the event data.