Mailgun/SendGrid inbound: workaround Django filename issue

Workaround for Django multipart/form-data limitation
where certain attachment filenames cause fields to be dropped
or to end up in request.POST rather than request.FILES.

Handle the MultiValueDictKeyError in inbound webhooks when
this has occurred. Also update docs to recommend avoiding
the problem by using Mailgun and SendGrid's "raw MIME" options.

Also handle reported cases of empty, duplicate keys in Mailgun's
content-id-map.

Fixes #272
This commit is contained in:
medmunds
2022-05-10 11:47:57 -07:00
committed by Mike Edmunds
parent 6a2e30ba8f
commit 09f21a5c2d
9 changed files with 244 additions and 52 deletions

View File

@@ -30,6 +30,23 @@ vNext
*Unreleased changes in main branch*
Fixes
~~~~~
* **Mailgun and SendGrid inbound:** Work around a Django limitation that
drops attachments with certain filenames. The missing attachments
are now simply omitted from the resulting inbound message. (In earlier
releases, they would cause a MultiValueDictKeyError in Anymail's
inbound webhook.)
Anymail documentation now recommends using Mailgun's and SendGrid's "raw MIME"
inbound options, which avoid the problem and preserve all attachments.
See `Mailgun inbound <https://anymail.readthedocs.io/en/latest/esps/mailgun/#mailgun-inbound>`__
and `SendGrid inbound <https://anymail.readthedocs.io/en/latest/esps/sendgrid/#sendgrid-inbound>`__
for details. (Thanks to `@erikdrums`_ for reporting and helping investigate the problem.)
Other
~~~~~
@@ -1299,6 +1316,7 @@ Features
.. _@coupa-anya: https://github.com/coupa-anya
.. _@decibyte: https://github.com/decibyte
.. _@dominik-lekse: https://github.com/dominik-lekse
.. _@erikdrums: https://github.com/erikdrums
.. _@ewingrj: https://github.com/ewingrj
.. _@fdemmer: https://github.com/fdemmer
.. _@Flexonze: https://github.com/Flexonze

View File

@@ -99,7 +99,7 @@ class AnymailInboundMessage(Message):
def inline_attachments(self):
"""dict of Content-ID: attachment (as MIMEPart objects)"""
return {unquote(part['Content-ID']): part for part in self.walk()
if part.is_inline_attachment() and part['Content-ID']}
if part.is_inline_attachment() and part['Content-ID'] is not None}
def get_address_header(self, header):
"""Return the value of header parsed into a (possibly-empty) list of EmailAddress objects"""
@@ -299,11 +299,11 @@ class AnymailInboundMessage(Message):
# some sort of lazy attachment where the content is only pulled in if/when
# requested (and then use file.chunks() to minimize memory usage)
return cls.construct_attachment(
content_type=file.content_type,
content_type=getattr(file, 'content_type', None),
content=file.read(),
filename=file.name,
filename=getattr(file, 'name', None),
content_id=content_id,
charset=file.charset)
charset=getattr(file, 'charset', None))
@classmethod
def construct_attachment(cls, content_type, content,

View File

@@ -395,17 +395,27 @@ class MailgunInboundWebhookView(MailgunBaseWebhookView):
except (KeyError, TypeError):
attachments = None
else:
# Load attachments from posted files: Mailgun file field names are 1-based
att_ids = ['attachment-%d' % i for i in range(1, attachment_count+1)]
att_cids = { # filename: content-id (invert content-id-map)
att_id: cid for cid, att_id
in json.loads(request.POST.get('content-id-map', '{}')).items()
}
attachments = [
AnymailInboundMessage.construct_attachment_from_uploaded_file(
request.FILES[att_id], content_id=att_cids.get(att_id, None))
for att_id in att_ids
]
# Load attachments from posted files: attachment-1, attachment-2, etc.
# content-id-map is {content-id: attachment-id}, identifying which files are inline attachments.
# Invert it to {attachment-id: content-id}, while handling potentially duplicate content-ids.
field_to_content_id = json.loads(
request.POST.get('content-id-map', '{}'),
object_pairs_hook=lambda pairs: {att_id: cid for (cid, att_id) in pairs})
attachments = []
for n in range(1, attachment_count+1):
attachment_id = "attachment-%d" % n
try:
file = request.FILES[attachment_id]
except KeyError:
# Django's multipart/form-data handling drops FILES with certain
# filenames (for security) or with empty filenames (Django ticket 15879).
# (To avoid this problem, use Mailgun's "raw MIME" inbound option.)
pass
else:
content_id = field_to_content_id.get(attachment_id)
attachment = AnymailInboundMessage.construct_attachment_from_uploaded_file(
file, content_id=content_id)
attachments.append(attachment)
return AnymailInboundMessage.construct(
headers=json.loads(request.POST['message-headers']), # includes From, To, Cc, Subject, etc.

View File

@@ -181,12 +181,22 @@ class SendGridInboundWebhookView(AnymailBaseWebhookView):
attachments = None
else:
# Load attachments from posted files
attachments = [
AnymailInboundMessage.construct_attachment_from_uploaded_file(
request.FILES[att_id],
content_id=attachment_info[att_id].get("content-id", None))
for att_id in sorted(attachment_info.keys())
]
attachments = []
for attachment_id in sorted(attachment_info.keys()):
try:
file = request.FILES[attachment_id]
except KeyError:
# Django's multipart/form-data handling drops FILES with certain
# filenames (for security) or with empty filenames (Django ticket 15879).
# (To avoid this problem, enable SendGrid's "raw, full MIME" inbound option.)
pass
else:
# (This deliberately ignores attachment_info[attachment_id]["filename"],
# which has not passed through Django's filename sanitization.)
content_id = attachment_info[attachment_id].get("content-id")
attachment = AnymailInboundMessage.construct_attachment_from_uploaded_file(
file, content_id=content_id)
attachments.append(attachment)
default_charset = request.POST.encoding.lower() # (probably utf-8)
text = request.POST.get('text')

View File

@@ -490,25 +490,22 @@ Inbound webhook
---------------
If you want to receive email from Mailgun through Anymail's normalized :ref:`inbound <inbound>`
handling, follow Mailgun's `Receiving, Storing and Fowarding Messages`_ guide to set up
an inbound route that forwards to Anymail's inbound webhook. (You can configure routes
using Mailgun's API, or simply using the `Mailgun receiving config`_.)
handling, follow Mailgun's `Receiving, Forwarding and Storing Messages`_ guide to set up
an inbound route that forwards to Anymail's inbound webhook. Create an inbound route
in Mailgun's dashboard on the `Email Receiving panel`_, or use Mailgun's API.
The *action* for your route will be either:
Use this url as the route's "forward" destination:
:samp:`forward("https://{random}:{random}@{yoursite.example.com}/anymail/mailgun/inbound/")`
:samp:`forward("https://{random}:{random}@{yoursite.example.com}/anymail/mailgun/inbound_mime/")`
:samp:`https://{random}:{random}@{yoursite.example.com}/anymail/mailgun/inbound_mime/`
* *forward* is required to select Mailgun's "forward" action
(Anymail does not support using the "store" action)
* *random:random* is an :setting:`ANYMAIL_WEBHOOK_SECRET` shared secret
* *yoursite.example.com* is your Django site
* :samp:`mime` at the end tells Mailgun to supply the entire message in "raw MIME" format
(see note below)
Anymail accepts either of Mailgun's "fully-parsed" (.../inbound/) and "raw MIME" (.../inbound_mime/)
formats; the URL tells Mailgun which you want. Because Anymail handles parsing and normalizing the data,
both are equally easy to use. The raw MIME option will give the most accurate representation of *any*
received email (including complex forms like multi-message mailing list digests). The fully-parsed option
*may* use less memory while processing messages with many large attachments.
You must use Mailgun's "forward" route action; Anymail does not currently support "store and notify."
(For debugging, you might find it helpful to *also* enable the "store" route action to keep a copy
of inbound messages on Mailgun's servers, but Anymail's inbound webhook won't work as a store-notify url.)
If you want to use Anymail's normalized :attr:`~anymail.inbound.AnymailInboundMessage.spam_detected` and
:attr:`~anymail.inbound.AnymailInboundMessage.spam_score` attributes, you'll need to set your Mailgun
@@ -519,11 +516,24 @@ Anymail will verify Mailgun inbound message events using your
:setting:`MAILGUN_WEBHOOK_SIGNING_KEY <ANYMAIL_MAILGUN_WEBHOOK_SIGNING_KEY>`
Anymail setting. By default, Mailgun's webhook signature provides similar security
to Anymail's shared webhook secret, so it's acceptable to omit the
:setting:`ANYMAIL_WEBHOOK_SECRET` setting (and "random:random@" portion of the
action) with Mailgun inbound routing.
:setting:`ANYMAIL_WEBHOOK_SECRET` setting (and :samp:`{random:random}@` portion of the
forwarding url) with Mailgun inbound routing.
.. note::
Anymail also supports Mailgun's "fully-parsed" inbound message format, but the "raw MIME"
version is preferred to get the most accurate representation of any received email.
Using raw MIME also avoids a limitation in Django's :mimetype:`multipart/form-data` handling
that can strip attachments with certain filenames (and inline images without filenames).
To use Mailgun's fully-parsed format, change :samp:`.../inbound_mime/` to just
:samp:`.../inbound/` at the end of the route forwarding url.
.. versionchanged:: vNext
Using Mailgun's full-parsed (not raw MIME) inbound message format is no longer recommended.
.. _Receiving, Storing and Fowarding Messages:
.. _Receiving, Forwarding and Storing Messages:
https://documentation.mailgun.com/en/latest/user_manual.html#receiving-forwarding-and-storing-messages
.. _Mailgun receiving config: https://app.mailgun.com/app/receiving/routes
.. _Email Receiving panel: https://app.mailgun.com/app/receiving/routes
.. _Mailgun domains config: https://app.mailgun.com/app/sending/domains

View File

@@ -428,17 +428,24 @@ The Destination URL setting will be:
* *random:random* is an :setting:`ANYMAIL_WEBHOOK_SECRET` shared secret
* *yoursite.example.com* is your Django site
Be sure the URL has a trailing slash. (SendGrid's inbound processing won't follow Django's
You should enable SendGrid's "POST the raw, full MIME message" checkbox (see note below).
And be sure the URL has a trailing slash. (SendGrid's inbound processing won't follow Django's
:setting:`APPEND_SLASH` redirect.)
If you want to use Anymail's normalized :attr:`~anymail.inbound.AnymailInboundMessage.spam_detected` and
:attr:`~anymail.inbound.AnymailInboundMessage.spam_score` attributes, be sure to enable the "Check
incoming emails for spam" checkbox.
In most cases, you should enable SendGrid's "POST the raw, full MIME message" checkbox.
Anymail should work either way (and you can change the option at any time), but enabling
raw MIME will give the most accurate representation of *any* received email (including
complex forms like multi-message mailing list digests).
.. note::
Anymail supports either option for SendGrid's "POST the raw, full MIME message" checkbox, but
enabling this setting is preferred to get the most accurate representation of any received email.
Using raw MIME also avoids a limitation in Django's :mimetype:`multipart/form-data` handling
that can strip attachments with certain filenames.
.. versionchanged:: vNext
Leaving SendGrid's "full MIME" checkbox disabled is no longer recommended.
.. _Inbound Parse Webhook:
https://sendgrid.com/docs/Classroom/Basics/Inbound_Parse_Webhook/setting_up_the_inbound_parse_webhook.html

View File

@@ -15,7 +15,7 @@ from anymail.webhooks.mailgun import MailgunInboundWebhookView
from .test_mailgun_webhooks import (
TEST_WEBHOOK_SIGNING_KEY, mailgun_sign_payload,
mailgun_sign_legacy_payload, querydict_to_postdict)
from .utils import sample_image_content, sample_email_content
from .utils import sample_image_content, sample_email_content, encode_multipart, make_fileobj
from .webhook_cases import WebhookTestCase
@@ -104,6 +104,7 @@ class MailgunInboundTestCase(WebhookTestCase):
att2.name = 'image.png'
email_content = sample_email_content()
att3 = BytesIO(email_content)
att3.name = '\\share\\mail\\forwarded.msg'
att3.content_type = 'message/rfc822; charset="us-ascii"'
raw_event = mailgun_sign_legacy_payload({
'message-headers': '[]',
@@ -125,6 +126,7 @@ class MailgunInboundTestCase(WebhookTestCase):
self.assertEqual(attachments[0].get_filename(), 'test.txt')
self.assertEqual(attachments[0].get_content_type(), 'text/plain')
self.assertEqual(attachments[0].get_content_text(), 'test attachment')
self.assertEqual(attachments[1].get_filename(), 'forwarded.msg') # Django strips paths
self.assertEqual(attachments[1].get_content_type(), 'message/rfc822')
self.assertEqualIgnoringHeaderFolding(attachments[1].get_content_bytes(), email_content)
@@ -135,6 +137,68 @@ class MailgunInboundTestCase(WebhookTestCase):
self.assertEqual(inline.get_content_type(), 'image/png')
self.assertEqual(inline.get_content_bytes(), image_content)
def test_filtered_attachment_filenames(self):
# Make sure the inbound webhook can deal with missing fields caused by
# Django's multipart/form-data filename filtering. (The attachments are lost,
# but shouldn't cause errors in the inbound webhook.)
filenames = [
"", "path\\", "path/"
".", "path\\.", "path/.",
"..", "path\\..", "path/..",
]
num_attachments = len(filenames)
payload = {
"attachment-%d" % (i+1): make_fileobj("content", filename=filenames[i], content_type="text/pdf")
for i in range(num_attachments)
}
payload.update({
'message-headers': '[]',
'attachment-count': str(num_attachments),
})
# Must do our own multipart/form-data encoding for empty filenames:
response = self.client.post('/anymail/mailgun/inbound/',
data=encode_multipart("BoUnDaRy", mailgun_sign_legacy_payload(payload)),
content_type="multipart/form-data; boundary=BoUnDaRy")
self.assertEqual(response.status_code, 200)
kwargs = self.assert_handler_called_once_with(self.inbound_handler, sender=MailgunInboundWebhookView,
event=ANY, esp_name='Mailgun')
# Different Django releases strip different filename patterns.
# Just verify that at least some attachments got dropped (so the test is valid)
# without causing an error in the inbound webhook:
attachments = kwargs['event'].message.attachments
self.assertLess(len(attachments), num_attachments)
def test_unusual_content_id_map(self):
# Under unknown conditions, Mailgun appears to generate a content-id-map with multiple
# empty keys (and possibly other duplicate keys). We still want to correctly identify
# inline attachments from it.
raw_event = mailgun_sign_legacy_payload({
'message-headers': '[]',
'attachment-count': '4',
'content-id-map': '{"": "attachment-1", "": "attachment-2",'
' "<abc>": "attachment-3", "<abc>": "attachment-4"}',
'attachment-1': make_fileobj("att1"),
'attachment-2': make_fileobj("att2"),
'attachment-3': make_fileobj("att3"),
'attachment-4': make_fileobj("att4"),
})
response = self.client.post('/anymail/mailgun/inbound/', data=raw_event)
self.assertEqual(response.status_code, 200)
kwargs = self.assert_handler_called_once_with(self.inbound_handler, sender=MailgunInboundWebhookView,
event=ANY, esp_name='Mailgun')
event = kwargs['event']
message = event.message
self.assertEqual(len(message.attachments), 0) # all inlines
inlines = [part for part in message.walk() if part.is_inline_attachment()]
self.assertEqual(len(inlines), 4)
self.assertEqual(inlines[0]["Content-ID"], "")
self.assertEqual(inlines[1]["Content-ID"], "")
self.assertEqual(inlines[2]["Content-ID"], "<abc>")
self.assertEqual(inlines[3]["Content-ID"], "<abc>")
def test_inbound_mime(self):
# Mailgun provides the full, raw MIME message if the webhook url ends in 'mime'
raw_event = mailgun_sign_legacy_payload({

View File

@@ -9,7 +9,7 @@ from anymail.inbound import AnymailInboundMessage
from anymail.signals import AnymailInboundEvent
from anymail.webhooks.sendgrid import SendGridInboundWebhookView
from .utils import dedent_bytes, sample_image_content, sample_email_content
from .utils import dedent_bytes, sample_image_content, sample_email_content, encode_multipart, make_fileobj
from .webhook_cases import WebhookTestCase
@@ -96,15 +96,16 @@ class SendgridInboundTestCase(WebhookTestCase):
att2.name = 'image.png'
email_content = sample_email_content()
att3 = BytesIO(email_content)
att3.name = '\\share\\mail\\forwarded.msg'
att3.content_type = 'message/rfc822; charset="us-ascii"'
raw_event = {
'headers': '',
'attachments': '3',
'attachment-info': json.dumps({
"attachment3": {"filename": "", "name": "", "charset": "US-ASCII", "type": "message/rfc822"},
"attachment2": {"filename": "image.png", "name": "image.png", "type": "image/png",
"content-id": "abc123"},
"attachment1": {"filename": "test.txt", "name": "test.txt", "type": "text/plain"},
"attachment3": {"filename": "\\share\\mail\\forwarded.msg",
"charset": "US-ASCII", "type": "message/rfc822"},
"attachment2": {"filename": "image.png", "type": "image/png", "content-id": "abc123"},
"attachment1": {"filename": "test.txt", "charset": "UTF-8", "type": "text/plain"},
}),
'content-ids': '{"abc123": "attachment2"}',
'attachment1': att1,
@@ -123,6 +124,7 @@ class SendgridInboundTestCase(WebhookTestCase):
self.assertEqual(attachments[0].get_filename(), 'test.txt')
self.assertEqual(attachments[0].get_content_type(), 'text/plain')
self.assertEqual(attachments[0].get_content_text(), 'test attachment')
self.assertEqual(attachments[1].get_filename(), 'forwarded.msg') # Django strips path
self.assertEqual(attachments[1].get_content_type(), 'message/rfc822')
self.assertEqualIgnoringHeaderFolding(attachments[1].get_content_bytes(), email_content)
@@ -133,6 +135,45 @@ class SendgridInboundTestCase(WebhookTestCase):
self.assertEqual(inline.get_content_type(), 'image/png')
self.assertEqual(inline.get_content_bytes(), image_content)
def test_filtered_attachment_filenames(self):
# Make sure the inbound webhook can deal with missing fields caused by
# Django's multipart/form-data filename filtering. (The attachments are lost,
# but shouldn't cause errors in the inbound webhook.)
filenames = [
"", "path\\", "path/"
".", "path\\.", "path/.",
"..", "path\\..", "path/..",
]
num_attachments = len(filenames)
payload = {
"attachment%d" % (i+1): make_fileobj("content", filename=filenames[i], content_type="text/pdf")
for i in range(num_attachments)
}
attachment_info = {
key: {"filename": value.name, "type": "text/pdf"}
for key, value in payload.items()
}
payload.update({
'headers': '',
'attachments': str(num_attachments),
'attachment-info': json.dumps(attachment_info),
})
# Must do our own form-data encoding to properly test empty attachment filenames.
# Must do our own multipart/form-data encoding for empty filenames:
response = self.client.post('/anymail/sendgrid/inbound/',
data=encode_multipart("BoUnDaRy", payload),
content_type="multipart/form-data; boundary=BoUnDaRy")
self.assertEqual(response.status_code, 200)
kwargs = self.assert_handler_called_once_with(self.inbound_handler, sender=SendGridInboundWebhookView,
event=ANY, esp_name='SendGrid')
# Different Django releases strip different filename patterns.
# Just verify that at least some attachments got dropped (so the test is valid)
# without causing an error in the inbound webhook:
attachments = kwargs['event'].message.attachments
self.assertLess(len(attachments), num_attachments)
def test_inbound_mime(self):
# SendGrid has an option to send the full, raw MIME message
raw_event = {

View File

@@ -5,12 +5,12 @@ import uuid
import warnings
from base64 import b64decode
from contextlib import contextmanager
from io import StringIO
from io import BytesIO, StringIO
from pathlib import Path
from unittest import TestCase
from unittest.util import safe_repr
from django.test import Client
import django.test.client
def decode_att(att):
@@ -177,7 +177,7 @@ class AnymailTestMixin(TestCase):
sys.stdout = old_stdout
class ClientWithCsrfChecks(Client):
class ClientWithCsrfChecks(django.test.Client):
"""Django test Client that enforces CSRF checks
https://docs.djangoproject.com/en/stable/ref/csrf/#testing
@@ -227,3 +227,35 @@ def dedent_bytes(text):
if margin:
text = re.sub(b'(?m)^' + margin, b'', text)
return text
def make_fileobj(content, filename=None, content_type=None, encoding=None):
"""
Returns a file-like object that can be used in Django test Client
post data to simulate an uploaded file.
"""
# The logic that unpacks this is in django.test.client.encode_file.
if isinstance(content, str):
content = content.encode(encoding or 'utf-8')
fileobj = BytesIO(content)
if filename is not None:
fileobj.name = filename
if content_type is not None:
fileobj.content_type = content_type
return fileobj
def encode_multipart(boundary, data):
"""
Version of :func:`django.test.client.encode_multipart` that allows
empty filenames. (The original function substitutes the field's
name if a file has an empty name.)
"""
# For simplicity, encode with the original function, and then
# replace any 'filename="<key>"' with 'filename=""'. This isn't
# entirely robust, but is sufficient for testing use.
encoded = django.test.client.encode_multipart(boundary, data)
re_keys = r"|".join(re.escape(key) for key in data.keys())
return re.sub(
rb'filename="(%s)"' % re_keys.encode("ascii"),
b'filename=""', encoded)