diff --git a/CHANGELOG.rst b/CHANGELOG.rst index dcc0193..9f2afce 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -30,6 +30,23 @@ vNext *Unreleased changes in main branch* +Fixes +~~~~~ + +* **Mailgun and SendGrid inbound:** Work around a Django limitation that + drops attachments with certain filenames. The missing attachments + are now simply omitted from the resulting inbound message. (In earlier + releases, they would cause a MultiValueDictKeyError in Anymail's + inbound webhook.) + + Anymail documentation now recommends using Mailgun's and SendGrid's "raw MIME" + inbound options, which avoid the problem and preserve all attachments. + + See `Mailgun inbound `__ + and `SendGrid inbound `__ + for details. (Thanks to `@erikdrums`_ for reporting and helping investigate the problem.) + + Other ~~~~~ @@ -1299,6 +1316,7 @@ Features .. _@coupa-anya: https://github.com/coupa-anya .. _@decibyte: https://github.com/decibyte .. _@dominik-lekse: https://github.com/dominik-lekse +.. _@erikdrums: https://github.com/erikdrums .. _@ewingrj: https://github.com/ewingrj .. _@fdemmer: https://github.com/fdemmer .. _@Flexonze: https://github.com/Flexonze diff --git a/anymail/inbound.py b/anymail/inbound.py index c4f4b22..083aad1 100644 --- a/anymail/inbound.py +++ b/anymail/inbound.py @@ -99,7 +99,7 @@ class AnymailInboundMessage(Message): def inline_attachments(self): """dict of Content-ID: attachment (as MIMEPart objects)""" return {unquote(part['Content-ID']): part for part in self.walk() - if part.is_inline_attachment() and part['Content-ID']} + if part.is_inline_attachment() and part['Content-ID'] is not None} def get_address_header(self, header): """Return the value of header parsed into a (possibly-empty) list of EmailAddress objects""" @@ -299,11 +299,11 @@ class AnymailInboundMessage(Message): # some sort of lazy attachment where the content is only pulled in if/when # requested (and then use file.chunks() to minimize memory usage) return cls.construct_attachment( - content_type=file.content_type, + content_type=getattr(file, 'content_type', None), content=file.read(), - filename=file.name, + filename=getattr(file, 'name', None), content_id=content_id, - charset=file.charset) + charset=getattr(file, 'charset', None)) @classmethod def construct_attachment(cls, content_type, content, diff --git a/anymail/webhooks/mailgun.py b/anymail/webhooks/mailgun.py index 50c6d9a..0eb50e1 100644 --- a/anymail/webhooks/mailgun.py +++ b/anymail/webhooks/mailgun.py @@ -395,17 +395,27 @@ class MailgunInboundWebhookView(MailgunBaseWebhookView): except (KeyError, TypeError): attachments = None else: - # Load attachments from posted files: Mailgun file field names are 1-based - att_ids = ['attachment-%d' % i for i in range(1, attachment_count+1)] - att_cids = { # filename: content-id (invert content-id-map) - att_id: cid for cid, att_id - in json.loads(request.POST.get('content-id-map', '{}')).items() - } - attachments = [ - AnymailInboundMessage.construct_attachment_from_uploaded_file( - request.FILES[att_id], content_id=att_cids.get(att_id, None)) - for att_id in att_ids - ] + # Load attachments from posted files: attachment-1, attachment-2, etc. + # content-id-map is {content-id: attachment-id}, identifying which files are inline attachments. + # Invert it to {attachment-id: content-id}, while handling potentially duplicate content-ids. + field_to_content_id = json.loads( + request.POST.get('content-id-map', '{}'), + object_pairs_hook=lambda pairs: {att_id: cid for (cid, att_id) in pairs}) + attachments = [] + for n in range(1, attachment_count+1): + attachment_id = "attachment-%d" % n + try: + file = request.FILES[attachment_id] + except KeyError: + # Django's multipart/form-data handling drops FILES with certain + # filenames (for security) or with empty filenames (Django ticket 15879). + # (To avoid this problem, use Mailgun's "raw MIME" inbound option.) + pass + else: + content_id = field_to_content_id.get(attachment_id) + attachment = AnymailInboundMessage.construct_attachment_from_uploaded_file( + file, content_id=content_id) + attachments.append(attachment) return AnymailInboundMessage.construct( headers=json.loads(request.POST['message-headers']), # includes From, To, Cc, Subject, etc. diff --git a/anymail/webhooks/sendgrid.py b/anymail/webhooks/sendgrid.py index a84434d..f48a52b 100644 --- a/anymail/webhooks/sendgrid.py +++ b/anymail/webhooks/sendgrid.py @@ -181,12 +181,22 @@ class SendGridInboundWebhookView(AnymailBaseWebhookView): attachments = None else: # Load attachments from posted files - attachments = [ - AnymailInboundMessage.construct_attachment_from_uploaded_file( - request.FILES[att_id], - content_id=attachment_info[att_id].get("content-id", None)) - for att_id in sorted(attachment_info.keys()) - ] + attachments = [] + for attachment_id in sorted(attachment_info.keys()): + try: + file = request.FILES[attachment_id] + except KeyError: + # Django's multipart/form-data handling drops FILES with certain + # filenames (for security) or with empty filenames (Django ticket 15879). + # (To avoid this problem, enable SendGrid's "raw, full MIME" inbound option.) + pass + else: + # (This deliberately ignores attachment_info[attachment_id]["filename"], + # which has not passed through Django's filename sanitization.) + content_id = attachment_info[attachment_id].get("content-id") + attachment = AnymailInboundMessage.construct_attachment_from_uploaded_file( + file, content_id=content_id) + attachments.append(attachment) default_charset = request.POST.encoding.lower() # (probably utf-8) text = request.POST.get('text') diff --git a/docs/esps/mailgun.rst b/docs/esps/mailgun.rst index 595c74b..28afad8 100644 --- a/docs/esps/mailgun.rst +++ b/docs/esps/mailgun.rst @@ -490,25 +490,22 @@ Inbound webhook --------------- If you want to receive email from Mailgun through Anymail's normalized :ref:`inbound ` -handling, follow Mailgun's `Receiving, Storing and Fowarding Messages`_ guide to set up -an inbound route that forwards to Anymail's inbound webhook. (You can configure routes -using Mailgun's API, or simply using the `Mailgun receiving config`_.) +handling, follow Mailgun's `Receiving, Forwarding and Storing Messages`_ guide to set up +an inbound route that forwards to Anymail's inbound webhook. Create an inbound route +in Mailgun's dashboard on the `Email Receiving panel`_, or use Mailgun's API. -The *action* for your route will be either: +Use this url as the route's "forward" destination: - :samp:`forward("https://{random}:{random}@{yoursite.example.com}/anymail/mailgun/inbound/")` - :samp:`forward("https://{random}:{random}@{yoursite.example.com}/anymail/mailgun/inbound_mime/")` + :samp:`https://{random}:{random}@{yoursite.example.com}/anymail/mailgun/inbound_mime/` - * *forward* is required to select Mailgun's "forward" action - (Anymail does not support using the "store" action) * *random:random* is an :setting:`ANYMAIL_WEBHOOK_SECRET` shared secret * *yoursite.example.com* is your Django site + * :samp:`mime` at the end tells Mailgun to supply the entire message in "raw MIME" format + (see note below) -Anymail accepts either of Mailgun's "fully-parsed" (.../inbound/) and "raw MIME" (.../inbound_mime/) -formats; the URL tells Mailgun which you want. Because Anymail handles parsing and normalizing the data, -both are equally easy to use. The raw MIME option will give the most accurate representation of *any* -received email (including complex forms like multi-message mailing list digests). The fully-parsed option -*may* use less memory while processing messages with many large attachments. +You must use Mailgun's "forward" route action; Anymail does not currently support "store and notify." +(For debugging, you might find it helpful to *also* enable the "store" route action to keep a copy +of inbound messages on Mailgun's servers, but Anymail's inbound webhook won't work as a store-notify url.) If you want to use Anymail's normalized :attr:`~anymail.inbound.AnymailInboundMessage.spam_detected` and :attr:`~anymail.inbound.AnymailInboundMessage.spam_score` attributes, you'll need to set your Mailgun @@ -519,11 +516,24 @@ Anymail will verify Mailgun inbound message events using your :setting:`MAILGUN_WEBHOOK_SIGNING_KEY ` Anymail setting. By default, Mailgun's webhook signature provides similar security to Anymail's shared webhook secret, so it's acceptable to omit the -:setting:`ANYMAIL_WEBHOOK_SECRET` setting (and "random:random@" portion of the -action) with Mailgun inbound routing. +:setting:`ANYMAIL_WEBHOOK_SECRET` setting (and :samp:`{random:random}@` portion of the +forwarding url) with Mailgun inbound routing. + +.. note:: + + Anymail also supports Mailgun's "fully-parsed" inbound message format, but the "raw MIME" + version is preferred to get the most accurate representation of any received email. + Using raw MIME also avoids a limitation in Django's :mimetype:`multipart/form-data` handling + that can strip attachments with certain filenames (and inline images without filenames). + + To use Mailgun's fully-parsed format, change :samp:`.../inbound_mime/` to just + :samp:`.../inbound/` at the end of the route forwarding url. + + .. versionchanged:: vNext + Using Mailgun's full-parsed (not raw MIME) inbound message format is no longer recommended. -.. _Receiving, Storing and Fowarding Messages: +.. _Receiving, Forwarding and Storing Messages: https://documentation.mailgun.com/en/latest/user_manual.html#receiving-forwarding-and-storing-messages -.. _Mailgun receiving config: https://app.mailgun.com/app/receiving/routes +.. _Email Receiving panel: https://app.mailgun.com/app/receiving/routes .. _Mailgun domains config: https://app.mailgun.com/app/sending/domains diff --git a/docs/esps/sendgrid.rst b/docs/esps/sendgrid.rst index 7282eab..6a7d827 100644 --- a/docs/esps/sendgrid.rst +++ b/docs/esps/sendgrid.rst @@ -428,17 +428,24 @@ The Destination URL setting will be: * *random:random* is an :setting:`ANYMAIL_WEBHOOK_SECRET` shared secret * *yoursite.example.com* is your Django site -Be sure the URL has a trailing slash. (SendGrid's inbound processing won't follow Django's +You should enable SendGrid's "POST the raw, full MIME message" checkbox (see note below). +And be sure the URL has a trailing slash. (SendGrid's inbound processing won't follow Django's :setting:`APPEND_SLASH` redirect.) If you want to use Anymail's normalized :attr:`~anymail.inbound.AnymailInboundMessage.spam_detected` and :attr:`~anymail.inbound.AnymailInboundMessage.spam_score` attributes, be sure to enable the "Check incoming emails for spam" checkbox. -In most cases, you should enable SendGrid's "POST the raw, full MIME message" checkbox. -Anymail should work either way (and you can change the option at any time), but enabling -raw MIME will give the most accurate representation of *any* received email (including -complex forms like multi-message mailing list digests). +.. note:: + + Anymail supports either option for SendGrid's "POST the raw, full MIME message" checkbox, but + enabling this setting is preferred to get the most accurate representation of any received email. + Using raw MIME also avoids a limitation in Django's :mimetype:`multipart/form-data` handling + that can strip attachments with certain filenames. + + .. versionchanged:: vNext + Leaving SendGrid's "full MIME" checkbox disabled is no longer recommended. + .. _Inbound Parse Webhook: https://sendgrid.com/docs/Classroom/Basics/Inbound_Parse_Webhook/setting_up_the_inbound_parse_webhook.html diff --git a/tests/test_mailgun_inbound.py b/tests/test_mailgun_inbound.py index dfc2b92..3ba7dce 100644 --- a/tests/test_mailgun_inbound.py +++ b/tests/test_mailgun_inbound.py @@ -15,7 +15,7 @@ from anymail.webhooks.mailgun import MailgunInboundWebhookView from .test_mailgun_webhooks import ( TEST_WEBHOOK_SIGNING_KEY, mailgun_sign_payload, mailgun_sign_legacy_payload, querydict_to_postdict) -from .utils import sample_image_content, sample_email_content +from .utils import sample_image_content, sample_email_content, encode_multipart, make_fileobj from .webhook_cases import WebhookTestCase @@ -104,6 +104,7 @@ class MailgunInboundTestCase(WebhookTestCase): att2.name = 'image.png' email_content = sample_email_content() att3 = BytesIO(email_content) + att3.name = '\\share\\mail\\forwarded.msg' att3.content_type = 'message/rfc822; charset="us-ascii"' raw_event = mailgun_sign_legacy_payload({ 'message-headers': '[]', @@ -125,6 +126,7 @@ class MailgunInboundTestCase(WebhookTestCase): self.assertEqual(attachments[0].get_filename(), 'test.txt') self.assertEqual(attachments[0].get_content_type(), 'text/plain') self.assertEqual(attachments[0].get_content_text(), 'test attachment') + self.assertEqual(attachments[1].get_filename(), 'forwarded.msg') # Django strips paths self.assertEqual(attachments[1].get_content_type(), 'message/rfc822') self.assertEqualIgnoringHeaderFolding(attachments[1].get_content_bytes(), email_content) @@ -135,6 +137,68 @@ class MailgunInboundTestCase(WebhookTestCase): self.assertEqual(inline.get_content_type(), 'image/png') self.assertEqual(inline.get_content_bytes(), image_content) + def test_filtered_attachment_filenames(self): + # Make sure the inbound webhook can deal with missing fields caused by + # Django's multipart/form-data filename filtering. (The attachments are lost, + # but shouldn't cause errors in the inbound webhook.) + filenames = [ + "", "path\\", "path/" + ".", "path\\.", "path/.", + "..", "path\\..", "path/..", + ] + num_attachments = len(filenames) + payload = { + "attachment-%d" % (i+1): make_fileobj("content", filename=filenames[i], content_type="text/pdf") + for i in range(num_attachments) + } + payload.update({ + 'message-headers': '[]', + 'attachment-count': str(num_attachments), + }) + + # Must do our own multipart/form-data encoding for empty filenames: + response = self.client.post('/anymail/mailgun/inbound/', + data=encode_multipart("BoUnDaRy", mailgun_sign_legacy_payload(payload)), + content_type="multipart/form-data; boundary=BoUnDaRy") + self.assertEqual(response.status_code, 200) + kwargs = self.assert_handler_called_once_with(self.inbound_handler, sender=MailgunInboundWebhookView, + event=ANY, esp_name='Mailgun') + + # Different Django releases strip different filename patterns. + # Just verify that at least some attachments got dropped (so the test is valid) + # without causing an error in the inbound webhook: + attachments = kwargs['event'].message.attachments + self.assertLess(len(attachments), num_attachments) + + def test_unusual_content_id_map(self): + # Under unknown conditions, Mailgun appears to generate a content-id-map with multiple + # empty keys (and possibly other duplicate keys). We still want to correctly identify + # inline attachments from it. + raw_event = mailgun_sign_legacy_payload({ + 'message-headers': '[]', + 'attachment-count': '4', + 'content-id-map': '{"": "attachment-1", "": "attachment-2",' + ' "": "attachment-3", "": "attachment-4"}', + 'attachment-1': make_fileobj("att1"), + 'attachment-2': make_fileobj("att2"), + 'attachment-3': make_fileobj("att3"), + 'attachment-4': make_fileobj("att4"), + }) + + response = self.client.post('/anymail/mailgun/inbound/', data=raw_event) + self.assertEqual(response.status_code, 200) + kwargs = self.assert_handler_called_once_with(self.inbound_handler, sender=MailgunInboundWebhookView, + event=ANY, esp_name='Mailgun') + event = kwargs['event'] + message = event.message + self.assertEqual(len(message.attachments), 0) # all inlines + inlines = [part for part in message.walk() if part.is_inline_attachment()] + self.assertEqual(len(inlines), 4) + self.assertEqual(inlines[0]["Content-ID"], "") + self.assertEqual(inlines[1]["Content-ID"], "") + self.assertEqual(inlines[2]["Content-ID"], "") + self.assertEqual(inlines[3]["Content-ID"], "") + def test_inbound_mime(self): # Mailgun provides the full, raw MIME message if the webhook url ends in 'mime' raw_event = mailgun_sign_legacy_payload({ diff --git a/tests/test_sendgrid_inbound.py b/tests/test_sendgrid_inbound.py index 68d0b98..5389959 100644 --- a/tests/test_sendgrid_inbound.py +++ b/tests/test_sendgrid_inbound.py @@ -9,7 +9,7 @@ from anymail.inbound import AnymailInboundMessage from anymail.signals import AnymailInboundEvent from anymail.webhooks.sendgrid import SendGridInboundWebhookView -from .utils import dedent_bytes, sample_image_content, sample_email_content +from .utils import dedent_bytes, sample_image_content, sample_email_content, encode_multipart, make_fileobj from .webhook_cases import WebhookTestCase @@ -96,15 +96,16 @@ class SendgridInboundTestCase(WebhookTestCase): att2.name = 'image.png' email_content = sample_email_content() att3 = BytesIO(email_content) + att3.name = '\\share\\mail\\forwarded.msg' att3.content_type = 'message/rfc822; charset="us-ascii"' raw_event = { 'headers': '', 'attachments': '3', 'attachment-info': json.dumps({ - "attachment3": {"filename": "", "name": "", "charset": "US-ASCII", "type": "message/rfc822"}, - "attachment2": {"filename": "image.png", "name": "image.png", "type": "image/png", - "content-id": "abc123"}, - "attachment1": {"filename": "test.txt", "name": "test.txt", "type": "text/plain"}, + "attachment3": {"filename": "\\share\\mail\\forwarded.msg", + "charset": "US-ASCII", "type": "message/rfc822"}, + "attachment2": {"filename": "image.png", "type": "image/png", "content-id": "abc123"}, + "attachment1": {"filename": "test.txt", "charset": "UTF-8", "type": "text/plain"}, }), 'content-ids': '{"abc123": "attachment2"}', 'attachment1': att1, @@ -123,6 +124,7 @@ class SendgridInboundTestCase(WebhookTestCase): self.assertEqual(attachments[0].get_filename(), 'test.txt') self.assertEqual(attachments[0].get_content_type(), 'text/plain') self.assertEqual(attachments[0].get_content_text(), 'test attachment') + self.assertEqual(attachments[1].get_filename(), 'forwarded.msg') # Django strips path self.assertEqual(attachments[1].get_content_type(), 'message/rfc822') self.assertEqualIgnoringHeaderFolding(attachments[1].get_content_bytes(), email_content) @@ -133,6 +135,45 @@ class SendgridInboundTestCase(WebhookTestCase): self.assertEqual(inline.get_content_type(), 'image/png') self.assertEqual(inline.get_content_bytes(), image_content) + def test_filtered_attachment_filenames(self): + # Make sure the inbound webhook can deal with missing fields caused by + # Django's multipart/form-data filename filtering. (The attachments are lost, + # but shouldn't cause errors in the inbound webhook.) + filenames = [ + "", "path\\", "path/" + ".", "path\\.", "path/.", + "..", "path\\..", "path/..", + ] + num_attachments = len(filenames) + payload = { + "attachment%d" % (i+1): make_fileobj("content", filename=filenames[i], content_type="text/pdf") + for i in range(num_attachments) + } + attachment_info = { + key: {"filename": value.name, "type": "text/pdf"} + for key, value in payload.items() + } + payload.update({ + 'headers': '', + 'attachments': str(num_attachments), + 'attachment-info': json.dumps(attachment_info), + }) + + # Must do our own form-data encoding to properly test empty attachment filenames. + # Must do our own multipart/form-data encoding for empty filenames: + response = self.client.post('/anymail/sendgrid/inbound/', + data=encode_multipart("BoUnDaRy", payload), + content_type="multipart/form-data; boundary=BoUnDaRy") + self.assertEqual(response.status_code, 200) + kwargs = self.assert_handler_called_once_with(self.inbound_handler, sender=SendGridInboundWebhookView, + event=ANY, esp_name='SendGrid') + + # Different Django releases strip different filename patterns. + # Just verify that at least some attachments got dropped (so the test is valid) + # without causing an error in the inbound webhook: + attachments = kwargs['event'].message.attachments + self.assertLess(len(attachments), num_attachments) + def test_inbound_mime(self): # SendGrid has an option to send the full, raw MIME message raw_event = { diff --git a/tests/utils.py b/tests/utils.py index eb282e6..d2da4cb 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -5,12 +5,12 @@ import uuid import warnings from base64 import b64decode from contextlib import contextmanager -from io import StringIO +from io import BytesIO, StringIO from pathlib import Path from unittest import TestCase from unittest.util import safe_repr -from django.test import Client +import django.test.client def decode_att(att): @@ -177,7 +177,7 @@ class AnymailTestMixin(TestCase): sys.stdout = old_stdout -class ClientWithCsrfChecks(Client): +class ClientWithCsrfChecks(django.test.Client): """Django test Client that enforces CSRF checks https://docs.djangoproject.com/en/stable/ref/csrf/#testing @@ -227,3 +227,35 @@ def dedent_bytes(text): if margin: text = re.sub(b'(?m)^' + margin, b'', text) return text + + +def make_fileobj(content, filename=None, content_type=None, encoding=None): + """ + Returns a file-like object that can be used in Django test Client + post data to simulate an uploaded file. + """ + # The logic that unpacks this is in django.test.client.encode_file. + if isinstance(content, str): + content = content.encode(encoding or 'utf-8') + fileobj = BytesIO(content) + if filename is not None: + fileobj.name = filename + if content_type is not None: + fileobj.content_type = content_type + return fileobj + + +def encode_multipart(boundary, data): + """ + Version of :func:`django.test.client.encode_multipart` that allows + empty filenames. (The original function substitutes the field's + name if a file has an empty name.) + """ + # For simplicity, encode with the original function, and then + # replace any 'filename=""' with 'filename=""'. This isn't + # entirely robust, but is sufficient for testing use. + encoded = django.test.client.encode_multipart(boundary, data) + re_keys = r"|".join(re.escape(key) for key in data.keys()) + return re.sub( + rb'filename="(%s)"' % re_keys.encode("ascii"), + b'filename=""', encoded)