From 09f21a5c2d15fb6723cf632282d1927ba7fba977 Mon Sep 17 00:00:00 2001 From: medmunds Date: Tue, 10 May 2022 11:47:57 -0700 Subject: [PATCH] Mailgun/SendGrid inbound: workaround Django filename issue Workaround for Django multipart/form-data limitation where certain attachment filenames cause fields to be dropped or to end up in request.POST rather than request.FILES. Handle the MultiValueDictKeyError in inbound webhooks when this has occurred. Also update docs to recommend avoiding the problem by using Mailgun and SendGrid's "raw MIME" options. Also handle reported cases of empty, duplicate keys in Mailgun's content-id-map. Fixes #272 --- CHANGELOG.rst | 18 ++++++++++ anymail/inbound.py | 8 ++--- anymail/webhooks/mailgun.py | 32 +++++++++++------ anymail/webhooks/sendgrid.py | 22 ++++++++---- docs/esps/mailgun.rst | 44 ++++++++++++++--------- docs/esps/sendgrid.rst | 17 ++++++--- tests/test_mailgun_inbound.py | 66 +++++++++++++++++++++++++++++++++- tests/test_sendgrid_inbound.py | 51 +++++++++++++++++++++++--- tests/utils.py | 38 ++++++++++++++++++-- 9 files changed, 244 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index dcc0193..9f2afce 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -30,6 +30,23 @@ vNext *Unreleased changes in main branch* +Fixes +~~~~~ + +* **Mailgun and SendGrid inbound:** Work around a Django limitation that + drops attachments with certain filenames. The missing attachments + are now simply omitted from the resulting inbound message. (In earlier + releases, they would cause a MultiValueDictKeyError in Anymail's + inbound webhook.) + + Anymail documentation now recommends using Mailgun's and SendGrid's "raw MIME" + inbound options, which avoid the problem and preserve all attachments. + + See `Mailgun inbound `__ + and `SendGrid inbound `__ + for details. (Thanks to `@erikdrums`_ for reporting and helping investigate the problem.) + + Other ~~~~~ @@ -1299,6 +1316,7 @@ Features .. _@coupa-anya: https://github.com/coupa-anya .. _@decibyte: https://github.com/decibyte .. _@dominik-lekse: https://github.com/dominik-lekse +.. _@erikdrums: https://github.com/erikdrums .. _@ewingrj: https://github.com/ewingrj .. _@fdemmer: https://github.com/fdemmer .. _@Flexonze: https://github.com/Flexonze diff --git a/anymail/inbound.py b/anymail/inbound.py index c4f4b22..083aad1 100644 --- a/anymail/inbound.py +++ b/anymail/inbound.py @@ -99,7 +99,7 @@ class AnymailInboundMessage(Message): def inline_attachments(self): """dict of Content-ID: attachment (as MIMEPart objects)""" return {unquote(part['Content-ID']): part for part in self.walk() - if part.is_inline_attachment() and part['Content-ID']} + if part.is_inline_attachment() and part['Content-ID'] is not None} def get_address_header(self, header): """Return the value of header parsed into a (possibly-empty) list of EmailAddress objects""" @@ -299,11 +299,11 @@ class AnymailInboundMessage(Message): # some sort of lazy attachment where the content is only pulled in if/when # requested (and then use file.chunks() to minimize memory usage) return cls.construct_attachment( - content_type=file.content_type, + content_type=getattr(file, 'content_type', None), content=file.read(), - filename=file.name, + filename=getattr(file, 'name', None), content_id=content_id, - charset=file.charset) + charset=getattr(file, 'charset', None)) @classmethod def construct_attachment(cls, content_type, content, diff --git a/anymail/webhooks/mailgun.py b/anymail/webhooks/mailgun.py index 50c6d9a..0eb50e1 100644 --- a/anymail/webhooks/mailgun.py +++ b/anymail/webhooks/mailgun.py @@ -395,17 +395,27 @@ class MailgunInboundWebhookView(MailgunBaseWebhookView): except (KeyError, TypeError): attachments = None else: - # Load attachments from posted files: Mailgun file field names are 1-based - att_ids = ['attachment-%d' % i for i in range(1, attachment_count+1)] - att_cids = { # filename: content-id (invert content-id-map) - att_id: cid for cid, att_id - in json.loads(request.POST.get('content-id-map', '{}')).items() - } - attachments = [ - AnymailInboundMessage.construct_attachment_from_uploaded_file( - request.FILES[att_id], content_id=att_cids.get(att_id, None)) - for att_id in att_ids - ] + # Load attachments from posted files: attachment-1, attachment-2, etc. + # content-id-map is {content-id: attachment-id}, identifying which files are inline attachments. + # Invert it to {attachment-id: content-id}, while handling potentially duplicate content-ids. + field_to_content_id = json.loads( + request.POST.get('content-id-map', '{}'), + object_pairs_hook=lambda pairs: {att_id: cid for (cid, att_id) in pairs}) + attachments = [] + for n in range(1, attachment_count+1): + attachment_id = "attachment-%d" % n + try: + file = request.FILES[attachment_id] + except KeyError: + # Django's multipart/form-data handling drops FILES with certain + # filenames (for security) or with empty filenames (Django ticket 15879). + # (To avoid this problem, use Mailgun's "raw MIME" inbound option.) + pass + else: + content_id = field_to_content_id.get(attachment_id) + attachment = AnymailInboundMessage.construct_attachment_from_uploaded_file( + file, content_id=content_id) + attachments.append(attachment) return AnymailInboundMessage.construct( headers=json.loads(request.POST['message-headers']), # includes From, To, Cc, Subject, etc. diff --git a/anymail/webhooks/sendgrid.py b/anymail/webhooks/sendgrid.py index a84434d..f48a52b 100644 --- a/anymail/webhooks/sendgrid.py +++ b/anymail/webhooks/sendgrid.py @@ -181,12 +181,22 @@ class SendGridInboundWebhookView(AnymailBaseWebhookView): attachments = None else: # Load attachments from posted files - attachments = [ - AnymailInboundMessage.construct_attachment_from_uploaded_file( - request.FILES[att_id], - content_id=attachment_info[att_id].get("content-id", None)) - for att_id in sorted(attachment_info.keys()) - ] + attachments = [] + for attachment_id in sorted(attachment_info.keys()): + try: + file = request.FILES[attachment_id] + except KeyError: + # Django's multipart/form-data handling drops FILES with certain + # filenames (for security) or with empty filenames (Django ticket 15879). + # (To avoid this problem, enable SendGrid's "raw, full MIME" inbound option.) + pass + else: + # (This deliberately ignores attachment_info[attachment_id]["filename"], + # which has not passed through Django's filename sanitization.) + content_id = attachment_info[attachment_id].get("content-id") + attachment = AnymailInboundMessage.construct_attachment_from_uploaded_file( + file, content_id=content_id) + attachments.append(attachment) default_charset = request.POST.encoding.lower() # (probably utf-8) text = request.POST.get('text') diff --git a/docs/esps/mailgun.rst b/docs/esps/mailgun.rst index 595c74b..28afad8 100644 --- a/docs/esps/mailgun.rst +++ b/docs/esps/mailgun.rst @@ -490,25 +490,22 @@ Inbound webhook --------------- If you want to receive email from Mailgun through Anymail's normalized :ref:`inbound ` -handling, follow Mailgun's `Receiving, Storing and Fowarding Messages`_ guide to set up -an inbound route that forwards to Anymail's inbound webhook. (You can configure routes -using Mailgun's API, or simply using the `Mailgun receiving config`_.) +handling, follow Mailgun's `Receiving, Forwarding and Storing Messages`_ guide to set up +an inbound route that forwards to Anymail's inbound webhook. Create an inbound route +in Mailgun's dashboard on the `Email Receiving panel`_, or use Mailgun's API. -The *action* for your route will be either: +Use this url as the route's "forward" destination: - :samp:`forward("https://{random}:{random}@{yoursite.example.com}/anymail/mailgun/inbound/")` - :samp:`forward("https://{random}:{random}@{yoursite.example.com}/anymail/mailgun/inbound_mime/")` + :samp:`https://{random}:{random}@{yoursite.example.com}/anymail/mailgun/inbound_mime/` - * *forward* is required to select Mailgun's "forward" action - (Anymail does not support using the "store" action) * *random:random* is an :setting:`ANYMAIL_WEBHOOK_SECRET` shared secret * *yoursite.example.com* is your Django site + * :samp:`mime` at the end tells Mailgun to supply the entire message in "raw MIME" format + (see note below) -Anymail accepts either of Mailgun's "fully-parsed" (.../inbound/) and "raw MIME" (.../inbound_mime/) -formats; the URL tells Mailgun which you want. Because Anymail handles parsing and normalizing the data, -both are equally easy to use. The raw MIME option will give the most accurate representation of *any* -received email (including complex forms like multi-message mailing list digests). The fully-parsed option -*may* use less memory while processing messages with many large attachments. +You must use Mailgun's "forward" route action; Anymail does not currently support "store and notify." +(For debugging, you might find it helpful to *also* enable the "store" route action to keep a copy +of inbound messages on Mailgun's servers, but Anymail's inbound webhook won't work as a store-notify url.) If you want to use Anymail's normalized :attr:`~anymail.inbound.AnymailInboundMessage.spam_detected` and :attr:`~anymail.inbound.AnymailInboundMessage.spam_score` attributes, you'll need to set your Mailgun @@ -519,11 +516,24 @@ Anymail will verify Mailgun inbound message events using your :setting:`MAILGUN_WEBHOOK_SIGNING_KEY ` Anymail setting. By default, Mailgun's webhook signature provides similar security to Anymail's shared webhook secret, so it's acceptable to omit the -:setting:`ANYMAIL_WEBHOOK_SECRET` setting (and "random:random@" portion of the -action) with Mailgun inbound routing. +:setting:`ANYMAIL_WEBHOOK_SECRET` setting (and :samp:`{random:random}@` portion of the +forwarding url) with Mailgun inbound routing. + +.. note:: + + Anymail also supports Mailgun's "fully-parsed" inbound message format, but the "raw MIME" + version is preferred to get the most accurate representation of any received email. + Using raw MIME also avoids a limitation in Django's :mimetype:`multipart/form-data` handling + that can strip attachments with certain filenames (and inline images without filenames). + + To use Mailgun's fully-parsed format, change :samp:`.../inbound_mime/` to just + :samp:`.../inbound/` at the end of the route forwarding url. + + .. versionchanged:: vNext + Using Mailgun's full-parsed (not raw MIME) inbound message format is no longer recommended. -.. _Receiving, Storing and Fowarding Messages: +.. _Receiving, Forwarding and Storing Messages: https://documentation.mailgun.com/en/latest/user_manual.html#receiving-forwarding-and-storing-messages -.. _Mailgun receiving config: https://app.mailgun.com/app/receiving/routes +.. _Email Receiving panel: https://app.mailgun.com/app/receiving/routes .. _Mailgun domains config: https://app.mailgun.com/app/sending/domains diff --git a/docs/esps/sendgrid.rst b/docs/esps/sendgrid.rst index 7282eab..6a7d827 100644 --- a/docs/esps/sendgrid.rst +++ b/docs/esps/sendgrid.rst @@ -428,17 +428,24 @@ The Destination URL setting will be: * *random:random* is an :setting:`ANYMAIL_WEBHOOK_SECRET` shared secret * *yoursite.example.com* is your Django site -Be sure the URL has a trailing slash. (SendGrid's inbound processing won't follow Django's +You should enable SendGrid's "POST the raw, full MIME message" checkbox (see note below). +And be sure the URL has a trailing slash. (SendGrid's inbound processing won't follow Django's :setting:`APPEND_SLASH` redirect.) If you want to use Anymail's normalized :attr:`~anymail.inbound.AnymailInboundMessage.spam_detected` and :attr:`~anymail.inbound.AnymailInboundMessage.spam_score` attributes, be sure to enable the "Check incoming emails for spam" checkbox. -In most cases, you should enable SendGrid's "POST the raw, full MIME message" checkbox. -Anymail should work either way (and you can change the option at any time), but enabling -raw MIME will give the most accurate representation of *any* received email (including -complex forms like multi-message mailing list digests). +.. note:: + + Anymail supports either option for SendGrid's "POST the raw, full MIME message" checkbox, but + enabling this setting is preferred to get the most accurate representation of any received email. + Using raw MIME also avoids a limitation in Django's :mimetype:`multipart/form-data` handling + that can strip attachments with certain filenames. + + .. versionchanged:: vNext + Leaving SendGrid's "full MIME" checkbox disabled is no longer recommended. + .. _Inbound Parse Webhook: https://sendgrid.com/docs/Classroom/Basics/Inbound_Parse_Webhook/setting_up_the_inbound_parse_webhook.html diff --git a/tests/test_mailgun_inbound.py b/tests/test_mailgun_inbound.py index dfc2b92..3ba7dce 100644 --- a/tests/test_mailgun_inbound.py +++ b/tests/test_mailgun_inbound.py @@ -15,7 +15,7 @@ from anymail.webhooks.mailgun import MailgunInboundWebhookView from .test_mailgun_webhooks import ( TEST_WEBHOOK_SIGNING_KEY, mailgun_sign_payload, mailgun_sign_legacy_payload, querydict_to_postdict) -from .utils import sample_image_content, sample_email_content +from .utils import sample_image_content, sample_email_content, encode_multipart, make_fileobj from .webhook_cases import WebhookTestCase @@ -104,6 +104,7 @@ class MailgunInboundTestCase(WebhookTestCase): att2.name = 'image.png' email_content = sample_email_content() att3 = BytesIO(email_content) + att3.name = '\\share\\mail\\forwarded.msg' att3.content_type = 'message/rfc822; charset="us-ascii"' raw_event = mailgun_sign_legacy_payload({ 'message-headers': '[]', @@ -125,6 +126,7 @@ class MailgunInboundTestCase(WebhookTestCase): self.assertEqual(attachments[0].get_filename(), 'test.txt') self.assertEqual(attachments[0].get_content_type(), 'text/plain') self.assertEqual(attachments[0].get_content_text(), 'test attachment') + self.assertEqual(attachments[1].get_filename(), 'forwarded.msg') # Django strips paths self.assertEqual(attachments[1].get_content_type(), 'message/rfc822') self.assertEqualIgnoringHeaderFolding(attachments[1].get_content_bytes(), email_content) @@ -135,6 +137,68 @@ class MailgunInboundTestCase(WebhookTestCase): self.assertEqual(inline.get_content_type(), 'image/png') self.assertEqual(inline.get_content_bytes(), image_content) + def test_filtered_attachment_filenames(self): + # Make sure the inbound webhook can deal with missing fields caused by + # Django's multipart/form-data filename filtering. (The attachments are lost, + # but shouldn't cause errors in the inbound webhook.) + filenames = [ + "", "path\\", "path/" + ".", "path\\.", "path/.", + "..", "path\\..", "path/..", + ] + num_attachments = len(filenames) + payload = { + "attachment-%d" % (i+1): make_fileobj("content", filename=filenames[i], content_type="text/pdf") + for i in range(num_attachments) + } + payload.update({ + 'message-headers': '[]', + 'attachment-count': str(num_attachments), + }) + + # Must do our own multipart/form-data encoding for empty filenames: + response = self.client.post('/anymail/mailgun/inbound/', + data=encode_multipart("BoUnDaRy", mailgun_sign_legacy_payload(payload)), + content_type="multipart/form-data; boundary=BoUnDaRy") + self.assertEqual(response.status_code, 200) + kwargs = self.assert_handler_called_once_with(self.inbound_handler, sender=MailgunInboundWebhookView, + event=ANY, esp_name='Mailgun') + + # Different Django releases strip different filename patterns. + # Just verify that at least some attachments got dropped (so the test is valid) + # without causing an error in the inbound webhook: + attachments = kwargs['event'].message.attachments + self.assertLess(len(attachments), num_attachments) + + def test_unusual_content_id_map(self): + # Under unknown conditions, Mailgun appears to generate a content-id-map with multiple + # empty keys (and possibly other duplicate keys). We still want to correctly identify + # inline attachments from it. + raw_event = mailgun_sign_legacy_payload({ + 'message-headers': '[]', + 'attachment-count': '4', + 'content-id-map': '{"": "attachment-1", "": "attachment-2",' + ' "": "attachment-3", "": "attachment-4"}', + 'attachment-1': make_fileobj("att1"), + 'attachment-2': make_fileobj("att2"), + 'attachment-3': make_fileobj("att3"), + 'attachment-4': make_fileobj("att4"), + }) + + response = self.client.post('/anymail/mailgun/inbound/', data=raw_event) + self.assertEqual(response.status_code, 200) + kwargs = self.assert_handler_called_once_with(self.inbound_handler, sender=MailgunInboundWebhookView, + event=ANY, esp_name='Mailgun') + event = kwargs['event'] + message = event.message + self.assertEqual(len(message.attachments), 0) # all inlines + inlines = [part for part in message.walk() if part.is_inline_attachment()] + self.assertEqual(len(inlines), 4) + self.assertEqual(inlines[0]["Content-ID"], "") + self.assertEqual(inlines[1]["Content-ID"], "") + self.assertEqual(inlines[2]["Content-ID"], "") + self.assertEqual(inlines[3]["Content-ID"], "") + def test_inbound_mime(self): # Mailgun provides the full, raw MIME message if the webhook url ends in 'mime' raw_event = mailgun_sign_legacy_payload({ diff --git a/tests/test_sendgrid_inbound.py b/tests/test_sendgrid_inbound.py index 68d0b98..5389959 100644 --- a/tests/test_sendgrid_inbound.py +++ b/tests/test_sendgrid_inbound.py @@ -9,7 +9,7 @@ from anymail.inbound import AnymailInboundMessage from anymail.signals import AnymailInboundEvent from anymail.webhooks.sendgrid import SendGridInboundWebhookView -from .utils import dedent_bytes, sample_image_content, sample_email_content +from .utils import dedent_bytes, sample_image_content, sample_email_content, encode_multipart, make_fileobj from .webhook_cases import WebhookTestCase @@ -96,15 +96,16 @@ class SendgridInboundTestCase(WebhookTestCase): att2.name = 'image.png' email_content = sample_email_content() att3 = BytesIO(email_content) + att3.name = '\\share\\mail\\forwarded.msg' att3.content_type = 'message/rfc822; charset="us-ascii"' raw_event = { 'headers': '', 'attachments': '3', 'attachment-info': json.dumps({ - "attachment3": {"filename": "", "name": "", "charset": "US-ASCII", "type": "message/rfc822"}, - "attachment2": {"filename": "image.png", "name": "image.png", "type": "image/png", - "content-id": "abc123"}, - "attachment1": {"filename": "test.txt", "name": "test.txt", "type": "text/plain"}, + "attachment3": {"filename": "\\share\\mail\\forwarded.msg", + "charset": "US-ASCII", "type": "message/rfc822"}, + "attachment2": {"filename": "image.png", "type": "image/png", "content-id": "abc123"}, + "attachment1": {"filename": "test.txt", "charset": "UTF-8", "type": "text/plain"}, }), 'content-ids': '{"abc123": "attachment2"}', 'attachment1': att1, @@ -123,6 +124,7 @@ class SendgridInboundTestCase(WebhookTestCase): self.assertEqual(attachments[0].get_filename(), 'test.txt') self.assertEqual(attachments[0].get_content_type(), 'text/plain') self.assertEqual(attachments[0].get_content_text(), 'test attachment') + self.assertEqual(attachments[1].get_filename(), 'forwarded.msg') # Django strips path self.assertEqual(attachments[1].get_content_type(), 'message/rfc822') self.assertEqualIgnoringHeaderFolding(attachments[1].get_content_bytes(), email_content) @@ -133,6 +135,45 @@ class SendgridInboundTestCase(WebhookTestCase): self.assertEqual(inline.get_content_type(), 'image/png') self.assertEqual(inline.get_content_bytes(), image_content) + def test_filtered_attachment_filenames(self): + # Make sure the inbound webhook can deal with missing fields caused by + # Django's multipart/form-data filename filtering. (The attachments are lost, + # but shouldn't cause errors in the inbound webhook.) + filenames = [ + "", "path\\", "path/" + ".", "path\\.", "path/.", + "..", "path\\..", "path/..", + ] + num_attachments = len(filenames) + payload = { + "attachment%d" % (i+1): make_fileobj("content", filename=filenames[i], content_type="text/pdf") + for i in range(num_attachments) + } + attachment_info = { + key: {"filename": value.name, "type": "text/pdf"} + for key, value in payload.items() + } + payload.update({ + 'headers': '', + 'attachments': str(num_attachments), + 'attachment-info': json.dumps(attachment_info), + }) + + # Must do our own form-data encoding to properly test empty attachment filenames. + # Must do our own multipart/form-data encoding for empty filenames: + response = self.client.post('/anymail/sendgrid/inbound/', + data=encode_multipart("BoUnDaRy", payload), + content_type="multipart/form-data; boundary=BoUnDaRy") + self.assertEqual(response.status_code, 200) + kwargs = self.assert_handler_called_once_with(self.inbound_handler, sender=SendGridInboundWebhookView, + event=ANY, esp_name='SendGrid') + + # Different Django releases strip different filename patterns. + # Just verify that at least some attachments got dropped (so the test is valid) + # without causing an error in the inbound webhook: + attachments = kwargs['event'].message.attachments + self.assertLess(len(attachments), num_attachments) + def test_inbound_mime(self): # SendGrid has an option to send the full, raw MIME message raw_event = { diff --git a/tests/utils.py b/tests/utils.py index eb282e6..d2da4cb 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -5,12 +5,12 @@ import uuid import warnings from base64 import b64decode from contextlib import contextmanager -from io import StringIO +from io import BytesIO, StringIO from pathlib import Path from unittest import TestCase from unittest.util import safe_repr -from django.test import Client +import django.test.client def decode_att(att): @@ -177,7 +177,7 @@ class AnymailTestMixin(TestCase): sys.stdout = old_stdout -class ClientWithCsrfChecks(Client): +class ClientWithCsrfChecks(django.test.Client): """Django test Client that enforces CSRF checks https://docs.djangoproject.com/en/stable/ref/csrf/#testing @@ -227,3 +227,35 @@ def dedent_bytes(text): if margin: text = re.sub(b'(?m)^' + margin, b'', text) return text + + +def make_fileobj(content, filename=None, content_type=None, encoding=None): + """ + Returns a file-like object that can be used in Django test Client + post data to simulate an uploaded file. + """ + # The logic that unpacks this is in django.test.client.encode_file. + if isinstance(content, str): + content = content.encode(encoding or 'utf-8') + fileobj = BytesIO(content) + if filename is not None: + fileobj.name = filename + if content_type is not None: + fileobj.content_type = content_type + return fileobj + + +def encode_multipart(boundary, data): + """ + Version of :func:`django.test.client.encode_multipart` that allows + empty filenames. (The original function substitutes the field's + name if a file has an empty name.) + """ + # For simplicity, encode with the original function, and then + # replace any 'filename=""' with 'filename=""'. This isn't + # entirely robust, but is sufficient for testing use. + encoded = django.test.client.encode_multipart(boundary, data) + re_keys = r"|".join(re.escape(key) for key in data.keys()) + return re.sub( + rb'filename="(%s)"' % re_keys.encode("ascii"), + b'filename=""', encoded)