Mailgun: Fix lost attachments with non-ASCII filenames.

Workaround requests/requests#4652 (urllib3/urllib3#303), where
uploaded files in multipart/form-data are improperly given RFC 2231
encoded filenames. That format is not accepted by Mailgun's API (and is
prohibited by RFC 7578), resulting in the attachments being silently
dropped.

Fix is to patch up the multipart/form-data before posting to remove
the RFC 2231 encoding.

Fixes #125
This commit is contained in:
medmunds
2018-10-10 15:02:13 -07:00
parent ddafac9fbd
commit 3f63fdd713
4 changed files with 107 additions and 5 deletions

View File

@@ -38,6 +38,14 @@ Features
backends (all but Amazon SES and SparkPost). Because this can expose API keys and
other sensitive info in log files, it should not be used in production.
Fixes
~~~~~
* **Mailgun:** Fix problem where attachments with non-ASCII filenames would be lost.
(Works around Requests/urllib3 issue encoding multipart/form-data filenames in a way
that isn't RFC 7578 compliant. Thanks to `@decibyte`_ for catching the problem and
`@costela`_ for identifying the related Mailgun API oddity.)
v4.2
----
@@ -833,6 +841,8 @@ Features
.. _#115: https://github.com/anymail/issues/115
.. _@calvin: https://github.com/calvin
.. _@costela: https://github.com/costela
.. _@decibyte: https://github.com/decibyte
.. _@joshkersey: https://github.com/joshkersey
.. _@Lekensteyn: https://github.com/Lekensteyn
.. _@lewistaylor: https://github.com/lewistaylor

View File

@@ -1,4 +1,7 @@
from datetime import datetime
from email.utils import encode_rfc2231
from requests import Request
from ..exceptions import AnymailRequestsAPIError, AnymailError
from ..message import AnymailRecipientStatus
@@ -78,6 +81,35 @@ class MailgunPayload(RequestsPayload):
backend=self.backend, email_message=self.message, payload=self)
return "%s/messages" % self.sender_domain
def get_request_params(self, api_url):
params = super(MailgunPayload, self).get_request_params(api_url)
non_ascii_filenames = [filename
for (field, (filename, content, mimetype)) in params["files"]
if filename is not None and not isascii(filename)]
if non_ascii_filenames:
# Workaround https://github.com/requests/requests/issues/4652:
# Mailgun expects RFC 7578 compliant multipart/form-data, and is confused
# by Requests/urllib3's improper use of RFC 2231 encoded filename parameters
# ("filename*=utf-8''...") in Content-Disposition headers.
# The workaround is to pre-generate the (non-compliant) form-data body, and
# replace 'filename*={RFC 2231 encoded}' with 'filename="{UTF-8 bytes}"'.
# Replace _only_ the filenames that will be problems (not all "filename*=...")
# to minimize potential side effects--e.g., in attached messages that might
# have their own attachments with (correctly) RFC 2231 encoded filenames.
prepared = Request(**params).prepare()
form_data = prepared.body # bytes
for filename in non_ascii_filenames: # text
rfc2231_filename = encode_rfc2231( # wants a str (text in PY3, bytes in PY2)
filename if isinstance(filename, str) else filename.encode("utf-8"),
charset="utf-8")
form_data = form_data.replace(
b'filename*=%s' % rfc2231_filename.encode("utf-8"),
b'filename="%s"' % filename.encode("utf-8"))
params["data"] = form_data
params["headers"]["Content-Type"] = prepared.headers["Content-Type"] # multipart/form-data; boundary=...
params["files"] = None # these are now in the form_data body
return params
def serialize_data(self):
self.populate_recipient_variables()
return self.data
@@ -113,6 +145,7 @@ class MailgunPayload(RequestsPayload):
def init_payload(self):
self.data = {} # {field: [multiple, values]}
self.files = [] # [(field, multiple), (field, values)]
self.headers = {}
def set_from_email_list(self, emails):
# Mailgun supports multiple From email addresses
@@ -204,3 +237,15 @@ class MailgunPayload(RequestsPayload):
# Allow override of sender_domain via esp_extra
# (but pop it out of params to send to Mailgun)
self.sender_domain = self.data.pop("sender_domain", self.sender_domain)
def isascii(s):
"""Returns True if str s is entirely ASCII characters.
(Compare to Python 3.7 `str.isascii()`.)
"""
try:
s.encode("ascii")
except UnicodeEncodeError:
return False
return True

View File

@@ -1,6 +1,8 @@
# -*- coding: utf-8 -*-
from datetime import date, datetime
from textwrap import dedent
try:
from email import message_from_bytes
except ImportError:
@@ -171,9 +173,50 @@ class MailgunBackendStandardEmailTests(MailgunBackendMockAPITestCase):
def test_unicode_attachment_correctly_decoded(self):
self.message.attach(u"Une pièce jointe.html", u'<p>\u2019</p>', mimetype='text/html')
self.message.send()
files = self.get_api_call_files()
attachments = [value for (field, value) in files if field == 'attachment']
self.assertEqual(len(attachments), 1)
# Verify the RFC 7578 compliance workaround has kicked in:
data = self.get_api_call_data().decode("utf-8")
self.assertNotIn("filename*=", data) # No RFC 2231 encoding
self.assertIn(u'Content-Disposition: form-data; name="attachment"; filename="Une pièce jointe.html"', data)
files = self.get_api_call_files(required=False)
self.assertFalse(files) # files should have been moved to formdata body
def test_rfc_7578_compliance(self):
# Check some corner cases in the workaround that undoes RFC 2231 multipart/form-data encoding...
self.message.subject = u"Testing for filename*=utf-8''problems"
self.message.body = u"The attached message should have an attachment named 'vedhæftet fil.txt'"
# A forwarded message with its own attachment:
forwarded_message = dedent("""\
MIME-Version: 1.0
From: sender@example.com
Subject: This is a test message
Content-Type: multipart/mixed; boundary="boundary"
--boundary
Content-Type: text/plain
This message has an attached file with a non-ASCII filename.
--boundary
Content-Type: text/plain; name*=utf-8''vedh%C3%A6ftet%20fil.txt
Content-Disposition: attachment; filename*=utf-8''vedh%C3%A6ftet%20fil.txt
This is an attachment.
--boundary--
""")
self.message.attach(u"besked med vedhæftede filer", forwarded_message, "message/rfc822")
self.message.send()
data = self.get_api_call_data().decode("utf-8").replace("\r\n", "\n")
# Top-level attachment (in form-data) should have RFC 7578 filename (raw Unicode):
self.assertIn(
u'Content-Disposition: form-data; name="attachment"; filename="besked med vedhæftede filer"', data)
# Embedded message/rfc822 attachment should retain its RFC 2231 encoded filename:
self.assertIn("Content-Type: text/plain; name*=utf-8''vedh%C3%A6ftet%20fil.txt", data)
self.assertIn("Content-Disposition: attachment; filename*=utf-8''vedh%C3%A6ftet%20fil.txt", data)
# References to RFC 2231 in message text should remain intact:
self.assertIn("Testing for filename*=utf-8''problems", data)
self.assertIn(u"The attached message should have an attachment named 'vedhæftet fil.txt'", data)
def test_embedded_images(self):
image_filename = SAMPLE_IMAGE_FILENAME

View File

@@ -1,3 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import logging
import unittest
@@ -116,7 +120,7 @@ class MailgunBackendIntegrationTests(SimpleTestCase, AnymailTestMixin):
track_opens=True,
)
message.attach("attachment1.txt", "Here is some\ntext for you", "text/plain")
message.attach("attachment2.csv", "ID,Name\n1,3", "text/csv")
message.attach("vedhæftet fil.csv", "ID,Name\n1,3", "text/csv")
cid = message.attach_inline_image_file(sample_image_path(), domain=MAILGUN_TEST_DOMAIN)
message.attach_alternative(
"<div>This is the <i>html</i> body <img src='cid:%s'></div>" % cid,
@@ -151,7 +155,7 @@ class MailgunBackendIntegrationTests(SimpleTestCase, AnymailTestMixin):
self.assertEqual(len(attachments), 2) # because inline image shouldn't be an attachment
self.assertEqual(attachments[0]["filename"], "attachment1.txt")
self.assertEqual(attachments[0]["content-type"], "text/plain")
self.assertEqual(attachments[1]["filename"], "attachment2.csv")
self.assertEqual(attachments[1]["filename"], "vedhæftet fil.csv")
self.assertEqual(attachments[1]["content-type"], "text/csv")
# No other fields are verifiable from the event data.