diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c039ea2..9c18a2d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -38,6 +38,14 @@ Features backends (all but Amazon SES and SparkPost). Because this can expose API keys and other sensitive info in log files, it should not be used in production. +Fixes +~~~~~ + +* **Mailgun:** Fix problem where attachments with non-ASCII filenames would be lost. + (Works around Requests/urllib3 issue encoding multipart/form-data filenames in a way + that isn't RFC 7578 compliant. Thanks to `@decibyte`_ for catching the problem and + `@costela`_ for identifying the related Mailgun API oddity.) + v4.2 ---- @@ -833,6 +841,8 @@ Features .. _#115: https://github.com/anymail/issues/115 .. _@calvin: https://github.com/calvin +.. _@costela: https://github.com/costela +.. _@decibyte: https://github.com/decibyte .. _@joshkersey: https://github.com/joshkersey .. _@Lekensteyn: https://github.com/Lekensteyn .. _@lewistaylor: https://github.com/lewistaylor diff --git a/anymail/backends/mailgun.py b/anymail/backends/mailgun.py index 3ba3eeb..7c2db20 100644 --- a/anymail/backends/mailgun.py +++ b/anymail/backends/mailgun.py @@ -1,4 +1,7 @@ from datetime import datetime +from email.utils import encode_rfc2231 + +from requests import Request from ..exceptions import AnymailRequestsAPIError, AnymailError from ..message import AnymailRecipientStatus @@ -78,6 +81,35 @@ class MailgunPayload(RequestsPayload): backend=self.backend, email_message=self.message, payload=self) return "%s/messages" % self.sender_domain + def get_request_params(self, api_url): + params = super(MailgunPayload, self).get_request_params(api_url) + non_ascii_filenames = [filename + for (field, (filename, content, mimetype)) in params["files"] + if filename is not None and not isascii(filename)] + if non_ascii_filenames: + # Workaround https://github.com/requests/requests/issues/4652: + # Mailgun expects RFC 7578 compliant multipart/form-data, and is confused + # by Requests/urllib3's improper use of RFC 2231 encoded filename parameters + # ("filename*=utf-8''...") in Content-Disposition headers. + # The workaround is to pre-generate the (non-compliant) form-data body, and + # replace 'filename*={RFC 2231 encoded}' with 'filename="{UTF-8 bytes}"'. + # Replace _only_ the filenames that will be problems (not all "filename*=...") + # to minimize potential side effects--e.g., in attached messages that might + # have their own attachments with (correctly) RFC 2231 encoded filenames. + prepared = Request(**params).prepare() + form_data = prepared.body # bytes + for filename in non_ascii_filenames: # text + rfc2231_filename = encode_rfc2231( # wants a str (text in PY3, bytes in PY2) + filename if isinstance(filename, str) else filename.encode("utf-8"), + charset="utf-8") + form_data = form_data.replace( + b'filename*=%s' % rfc2231_filename.encode("utf-8"), + b'filename="%s"' % filename.encode("utf-8")) + params["data"] = form_data + params["headers"]["Content-Type"] = prepared.headers["Content-Type"] # multipart/form-data; boundary=... + params["files"] = None # these are now in the form_data body + return params + def serialize_data(self): self.populate_recipient_variables() return self.data @@ -113,6 +145,7 @@ class MailgunPayload(RequestsPayload): def init_payload(self): self.data = {} # {field: [multiple, values]} self.files = [] # [(field, multiple), (field, values)] + self.headers = {} def set_from_email_list(self, emails): # Mailgun supports multiple From email addresses @@ -204,3 +237,15 @@ class MailgunPayload(RequestsPayload): # Allow override of sender_domain via esp_extra # (but pop it out of params to send to Mailgun) self.sender_domain = self.data.pop("sender_domain", self.sender_domain) + + +def isascii(s): + """Returns True if str s is entirely ASCII characters. + + (Compare to Python 3.7 `str.isascii()`.) + """ + try: + s.encode("ascii") + except UnicodeEncodeError: + return False + return True diff --git a/tests/test_mailgun_backend.py b/tests/test_mailgun_backend.py index 20f9e34..93da086 100644 --- a/tests/test_mailgun_backend.py +++ b/tests/test_mailgun_backend.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- from datetime import date, datetime +from textwrap import dedent + try: from email import message_from_bytes except ImportError: @@ -171,9 +173,50 @@ class MailgunBackendStandardEmailTests(MailgunBackendMockAPITestCase): def test_unicode_attachment_correctly_decoded(self): self.message.attach(u"Une pièce jointe.html", u'

\u2019

', mimetype='text/html') self.message.send() - files = self.get_api_call_files() - attachments = [value for (field, value) in files if field == 'attachment'] - self.assertEqual(len(attachments), 1) + + # Verify the RFC 7578 compliance workaround has kicked in: + data = self.get_api_call_data().decode("utf-8") + self.assertNotIn("filename*=", data) # No RFC 2231 encoding + self.assertIn(u'Content-Disposition: form-data; name="attachment"; filename="Une pièce jointe.html"', data) + + files = self.get_api_call_files(required=False) + self.assertFalse(files) # files should have been moved to formdata body + + def test_rfc_7578_compliance(self): + # Check some corner cases in the workaround that undoes RFC 2231 multipart/form-data encoding... + self.message.subject = u"Testing for filename*=utf-8''problems" + self.message.body = u"The attached message should have an attachment named 'vedhæftet fil.txt'" + # A forwarded message with its own attachment: + forwarded_message = dedent("""\ + MIME-Version: 1.0 + From: sender@example.com + Subject: This is a test message + Content-Type: multipart/mixed; boundary="boundary" + + --boundary + Content-Type: text/plain + + This message has an attached file with a non-ASCII filename. + --boundary + Content-Type: text/plain; name*=utf-8''vedh%C3%A6ftet%20fil.txt + Content-Disposition: attachment; filename*=utf-8''vedh%C3%A6ftet%20fil.txt + + This is an attachment. + --boundary-- + """) + self.message.attach(u"besked med vedhæftede filer", forwarded_message, "message/rfc822") + self.message.send() + + data = self.get_api_call_data().decode("utf-8").replace("\r\n", "\n") + # Top-level attachment (in form-data) should have RFC 7578 filename (raw Unicode): + self.assertIn( + u'Content-Disposition: form-data; name="attachment"; filename="besked med vedhæftede filer"', data) + # Embedded message/rfc822 attachment should retain its RFC 2231 encoded filename: + self.assertIn("Content-Type: text/plain; name*=utf-8''vedh%C3%A6ftet%20fil.txt", data) + self.assertIn("Content-Disposition: attachment; filename*=utf-8''vedh%C3%A6ftet%20fil.txt", data) + # References to RFC 2231 in message text should remain intact: + self.assertIn("Testing for filename*=utf-8''problems", data) + self.assertIn(u"The attached message should have an attachment named 'vedhæftet fil.txt'", data) def test_embedded_images(self): image_filename = SAMPLE_IMAGE_FILENAME diff --git a/tests/test_mailgun_integration.py b/tests/test_mailgun_integration.py index e8b388d..6acbf59 100644 --- a/tests/test_mailgun_integration.py +++ b/tests/test_mailgun_integration.py @@ -1,3 +1,7 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + import os import logging import unittest @@ -116,7 +120,7 @@ class MailgunBackendIntegrationTests(SimpleTestCase, AnymailTestMixin): track_opens=True, ) message.attach("attachment1.txt", "Here is some\ntext for you", "text/plain") - message.attach("attachment2.csv", "ID,Name\n1,3", "text/csv") + message.attach("vedhæftet fil.csv", "ID,Name\n1,3", "text/csv") cid = message.attach_inline_image_file(sample_image_path(), domain=MAILGUN_TEST_DOMAIN) message.attach_alternative( "
This is the html body
" % cid, @@ -151,7 +155,7 @@ class MailgunBackendIntegrationTests(SimpleTestCase, AnymailTestMixin): self.assertEqual(len(attachments), 2) # because inline image shouldn't be an attachment self.assertEqual(attachments[0]["filename"], "attachment1.txt") self.assertEqual(attachments[0]["content-type"], "text/plain") - self.assertEqual(attachments[1]["filename"], "attachment2.csv") + self.assertEqual(attachments[1]["filename"], "vedhæftet fil.csv") self.assertEqual(attachments[1]["content-type"], "text/csv") # No other fields are verifiable from the event data.