mirror of
https://github.com/pacnpal/django-anymail.git
synced 2025-12-20 03:41:05 -05:00
SendGrid: fix inbound webhook Unicode error when not utf-8
Fix a crash or text-mangling issue when an inbound message uses a charset other than utf-8 for its text or html body, and SendGrid's "post raw" inbound parse option is *not* enabled. Update docs to recommend "post raw" option. Fixes #187
This commit is contained in:
@@ -42,6 +42,11 @@ Fixes
|
|||||||
* **Mailjet:** Fix TypeError when sending to or from addresses with display names
|
* **Mailjet:** Fix TypeError when sending to or from addresses with display names
|
||||||
containing commas (introduced in Django 2.2.15, 3.0.9, and 3.1).
|
containing commas (introduced in Django 2.2.15, 3.0.9, and 3.1).
|
||||||
|
|
||||||
|
* **SendGrid:** Fix UnicodeError in inbound webhook, when receiving message using
|
||||||
|
charsets other than utf-8, and *not* using SendGrid's "post raw" inbound parse
|
||||||
|
option. Also update docs to recommend "post raw" with SendGrid inbound. (Thanks to
|
||||||
|
`@tcourtqtm`_ for reporting the issue.)
|
||||||
|
|
||||||
|
|
||||||
Features
|
Features
|
||||||
~~~~~~~~
|
~~~~~~~~
|
||||||
@@ -1104,6 +1109,7 @@ Features
|
|||||||
.. _@sebbacon: https://github.com/sebbacon
|
.. _@sebbacon: https://github.com/sebbacon
|
||||||
.. _@swrobel: https://github.com/swrobel
|
.. _@swrobel: https://github.com/swrobel
|
||||||
.. _@Thorbenl: https://github.com/Thorbenl
|
.. _@Thorbenl: https://github.com/Thorbenl
|
||||||
|
.. _@tcourtqtm: https://github.com/tcourtqtm
|
||||||
.. _@varche1: https://github.com/varche1
|
.. _@varche1: https://github.com/varche1
|
||||||
.. _@vgrebenschikov: https://github.com/vgrebenschikov
|
.. _@vgrebenschikov: https://github.com/vgrebenschikov
|
||||||
.. _@yourcelf: https://github.com/yourcelf
|
.. _@yourcelf: https://github.com/yourcelf
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from datetime import datetime
|
|||||||
from django.utils.timezone import utc
|
from django.utils.timezone import utc
|
||||||
|
|
||||||
from .base import AnymailBaseWebhookView
|
from .base import AnymailBaseWebhookView
|
||||||
|
from .._email_compat import EmailBytesParser
|
||||||
from ..inbound import AnymailInboundMessage
|
from ..inbound import AnymailInboundMessage
|
||||||
from ..signals import inbound, tracking, AnymailInboundEvent, AnymailTrackingEvent, EventType, RejectReason
|
from ..signals import inbound, tracking, AnymailInboundEvent, AnymailTrackingEvent, EventType, RejectReason
|
||||||
|
|
||||||
@@ -131,6 +132,9 @@ class SendGridInboundWebhookView(AnymailBaseWebhookView):
|
|||||||
# Inbound uses the entire Django request as esp_event, because we need POST and FILES.
|
# Inbound uses the entire Django request as esp_event, because we need POST and FILES.
|
||||||
# Note that request.POST is case-sensitive (unlike email.message.Message headers).
|
# Note that request.POST is case-sensitive (unlike email.message.Message headers).
|
||||||
esp_event = request
|
esp_event = request
|
||||||
|
# Must access body before any POST fields, or it won't be available if we need
|
||||||
|
# it later (see text_charset and html_charset handling below).
|
||||||
|
_ensure_body_is_available_later = request.body # noqa: F841
|
||||||
if 'headers' in request.POST:
|
if 'headers' in request.POST:
|
||||||
# Default (not "Send Raw") inbound fields
|
# Default (not "Send Raw") inbound fields
|
||||||
message = self.message_from_sendgrid_parsed(esp_event)
|
message = self.message_from_sendgrid_parsed(esp_event)
|
||||||
@@ -183,11 +187,33 @@ class SendGridInboundWebhookView(AnymailBaseWebhookView):
|
|||||||
for att_id in sorted(attachment_info.keys())
|
for att_id in sorted(attachment_info.keys())
|
||||||
]
|
]
|
||||||
|
|
||||||
|
default_charset = request.POST.encoding.lower() # (probably utf-8)
|
||||||
|
text = request.POST.get('text')
|
||||||
|
text_charset = charsets.get('text', default_charset).lower()
|
||||||
|
html = request.POST.get('html')
|
||||||
|
html_charset = charsets.get('html', default_charset).lower()
|
||||||
|
if (text and text_charset != default_charset) or (html and html_charset != default_charset):
|
||||||
|
# Django has parsed text and/or html fields using the wrong charset.
|
||||||
|
# We need to re-parse the raw form data and decode each field separately,
|
||||||
|
# using the indicated charsets. The email package parses multipart/form-data
|
||||||
|
# retaining bytes content. (In theory, we could instead just change
|
||||||
|
# request.encoding and access the POST fields again, per Django docs,
|
||||||
|
# but that seems to be have bugs around the cached request._files.)
|
||||||
|
raw_data = b"".join([
|
||||||
|
b"Content-Type: ", request.META['CONTENT_TYPE'].encode('ascii'),
|
||||||
|
b"\r\n\r\n",
|
||||||
|
request.body
|
||||||
|
])
|
||||||
|
parsed_parts = EmailBytesParser().parsebytes(raw_data).get_payload()
|
||||||
|
for part in parsed_parts:
|
||||||
|
name = part.get_param('name', header='content-disposition')
|
||||||
|
if name == 'text':
|
||||||
|
text = part.get_payload(decode=True).decode(text_charset)
|
||||||
|
elif name == 'html':
|
||||||
|
html = part.get_payload(decode=True).decode(html_charset)
|
||||||
|
# (subject, from, to, etc. are parsed from raw headers field,
|
||||||
|
# so no need to worry about their separate POST field charsets)
|
||||||
|
|
||||||
return AnymailInboundMessage.construct(
|
return AnymailInboundMessage.construct(
|
||||||
raw_headers=request.POST.get('headers', ""), # includes From, To, Cc, Subject, etc.
|
raw_headers=request.POST.get('headers', ""), # includes From, To, Cc, Subject, etc.
|
||||||
text=request.POST.get('text', None),
|
text=text, html=html, attachments=attachments)
|
||||||
text_charset=charsets.get('text', 'utf-8'),
|
|
||||||
html=request.POST.get('html', None),
|
|
||||||
html_charset=charsets.get('html', 'utf-8'),
|
|
||||||
attachments=attachments,
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -426,10 +426,10 @@ If you want to use Anymail's normalized :attr:`~anymail.inbound.AnymailInboundMe
|
|||||||
:attr:`~anymail.inbound.AnymailInboundMessage.spam_score` attributes, be sure to enable the "Check
|
:attr:`~anymail.inbound.AnymailInboundMessage.spam_score` attributes, be sure to enable the "Check
|
||||||
incoming emails for spam" checkbox.
|
incoming emails for spam" checkbox.
|
||||||
|
|
||||||
You have a choice for SendGrid's "POST the raw, full MIME message" checkbox. Anymail will handle
|
In most cases, you should enable SendGrid's "POST the raw, full MIME message" checkbox.
|
||||||
either option (and you can change it at any time). Enabling raw MIME will give the most accurate
|
Anymail should work either way (and you can change the option at any time), but enabling
|
||||||
representation of *any* received email (including complex forms like multi-message mailing list
|
raw MIME will give the most accurate representation of *any* received email (including
|
||||||
digests). But disabling it *may* use less memory while processing messages with many large attachments.
|
complex forms like multi-message mailing list digests).
|
||||||
|
|
||||||
.. _Inbound Parse Webhook:
|
.. _Inbound Parse Webhook:
|
||||||
https://sendgrid.com/docs/Classroom/Basics/Inbound_Parse_Webhook/setting_up_the_inbound_parse_webhook.html
|
https://sendgrid.com/docs/Classroom/Basics/Inbound_Parse_Webhook/setting_up_the_inbound_parse_webhook.html
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
|
||||||
@@ -9,7 +11,7 @@ from anymail.inbound import AnymailInboundMessage
|
|||||||
from anymail.signals import AnymailInboundEvent
|
from anymail.signals import AnymailInboundEvent
|
||||||
from anymail.webhooks.sendgrid import SendGridInboundWebhookView
|
from anymail.webhooks.sendgrid import SendGridInboundWebhookView
|
||||||
|
|
||||||
from .utils import sample_image_content, sample_email_content
|
from .utils import dedent_bytes, sample_image_content, sample_email_content
|
||||||
from .webhook_cases import WebhookTestCase
|
from .webhook_cases import WebhookTestCase
|
||||||
|
|
||||||
|
|
||||||
@@ -183,3 +185,59 @@ class SendgridInboundTestCase(WebhookTestCase):
|
|||||||
self.assertEqual(message.subject, 'Raw MIME test')
|
self.assertEqual(message.subject, 'Raw MIME test')
|
||||||
self.assertEqual(message.text, u"It's a body\N{HORIZONTAL ELLIPSIS}\n")
|
self.assertEqual(message.text, u"It's a body\N{HORIZONTAL ELLIPSIS}\n")
|
||||||
self.assertEqual(message.html, u"""<div dir="ltr">It's a body\N{HORIZONTAL ELLIPSIS}</div>\n""")
|
self.assertEqual(message.html, u"""<div dir="ltr">It's a body\N{HORIZONTAL ELLIPSIS}</div>\n""")
|
||||||
|
|
||||||
|
def test_inbound_charsets(self):
|
||||||
|
# Captured (sanitized) from actual SendGrid inbound webhook payload 7/2020,
|
||||||
|
# using a test message constructed with a variety of charsets:
|
||||||
|
raw_post = dedent_bytes(b"""\
|
||||||
|
--xYzZY
|
||||||
|
Content-Disposition: form-data; name="headers"
|
||||||
|
|
||||||
|
Date: Fri, 24 Jul 2020 16:43:46 UTC
|
||||||
|
To: =?utf-8?q?R=C3=A9cipiendaire_pr=C3=A9cieux?= <inbound@sg.example.com>
|
||||||
|
From: =?utf-8?q?Op=C3=A9rateur?= de test <sender@example.com>
|
||||||
|
Subject: =?cp850?q?Como_usted_pidi=A2?=
|
||||||
|
|
||||||
|
--xYzZY
|
||||||
|
Content-Disposition: form-data; name="subject"
|
||||||
|
|
||||||
|
Como usted pidi\xa2
|
||||||
|
--xYzZY
|
||||||
|
Content-Disposition: form-data; name="to"
|
||||||
|
|
||||||
|
R\xc3\xa9cipiendaire pr\xc3\xa9cieux <inbound@sg.example.com>
|
||||||
|
--xYzZY
|
||||||
|
Content-Disposition: form-data; name="html"
|
||||||
|
|
||||||
|
<p>\xbfEsto se ve como esperabas?</p>
|
||||||
|
--xYzZY
|
||||||
|
Content-Disposition: form-data; name="from"
|
||||||
|
|
||||||
|
Op\xc3\xa9rateur de test <sender@example.com>
|
||||||
|
--xYzZY
|
||||||
|
Content-Disposition: form-data; name="text"
|
||||||
|
|
||||||
|
Test the ESP\x92s inbound charset handling\x85
|
||||||
|
--xYzZY
|
||||||
|
Content-Disposition: form-data; name="charsets"
|
||||||
|
|
||||||
|
{"to":"UTF-8","cc":"UTF-8","html":"iso-8859-1","subject":"cp850","from":"UTF-8","text":"windows-1252"}
|
||||||
|
--xYzZY--
|
||||||
|
""").replace(b"\n", b"\r\n")
|
||||||
|
|
||||||
|
response = self.client.post('/anymail/sendgrid/inbound/', data=raw_post,
|
||||||
|
content_type="multipart/form-data; boundary=xYzZY")
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
kwargs = self.assert_handler_called_once_with(self.inbound_handler, sender=SendGridInboundWebhookView,
|
||||||
|
event=ANY, esp_name='SendGrid')
|
||||||
|
event = kwargs['event']
|
||||||
|
message = event.message
|
||||||
|
|
||||||
|
self.assertEqual(message.from_email.display_name, u"Opérateur de test")
|
||||||
|
self.assertEqual(message.from_email.addr_spec, "sender@example.com")
|
||||||
|
self.assertEqual(len(message.to), 1)
|
||||||
|
self.assertEqual(message.to[0].display_name, u"Récipiendaire précieux")
|
||||||
|
self.assertEqual(message.to[0].addr_spec, "inbound@sg.example.com")
|
||||||
|
self.assertEqual(message.subject, u"Como usted pidió")
|
||||||
|
self.assertEqual(message.text, u"Test the ESP’s inbound charset handling…")
|
||||||
|
self.assertEqual(message.html, u"<p>¿Esto se ve como esperabas?</p>")
|
||||||
|
|||||||
@@ -324,3 +324,45 @@ class ClientWithCsrfChecks(Client):
|
|||||||
def __init__(self, **defaults):
|
def __init__(self, **defaults):
|
||||||
super(ClientWithCsrfChecks, self).__init__(
|
super(ClientWithCsrfChecks, self).__init__(
|
||||||
enforce_csrf_checks=True, **defaults)
|
enforce_csrf_checks=True, **defaults)
|
||||||
|
|
||||||
|
|
||||||
|
# dedent for bytestrs
|
||||||
|
# https://stackoverflow.com/a/39841195/647002
|
||||||
|
_whitespace_only_re = re.compile(b'^[ \t]+$', re.MULTILINE)
|
||||||
|
_leading_whitespace_re = re.compile(b'(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
|
||||||
|
|
||||||
|
|
||||||
|
def dedent_bytes(text):
|
||||||
|
"""textwrap.dedent, but for bytes"""
|
||||||
|
# Look for the longest leading string of spaces and tabs common to
|
||||||
|
# all lines.
|
||||||
|
margin = None
|
||||||
|
text = _whitespace_only_re.sub(b'', text)
|
||||||
|
indents = _leading_whitespace_re.findall(text)
|
||||||
|
for indent in indents:
|
||||||
|
if margin is None:
|
||||||
|
margin = indent
|
||||||
|
|
||||||
|
# Current line more deeply indented than previous winner:
|
||||||
|
# no change (previous winner is still on top).
|
||||||
|
elif indent.startswith(margin):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Current line consistent with and no deeper than previous winner:
|
||||||
|
# it's the new winner.
|
||||||
|
elif margin.startswith(indent):
|
||||||
|
margin = indent
|
||||||
|
|
||||||
|
# Find the largest common whitespace between current line
|
||||||
|
# and previous winner.
|
||||||
|
else:
|
||||||
|
for i, (x, y) in enumerate(zip(margin, indent)):
|
||||||
|
if x != y:
|
||||||
|
margin = margin[:i]
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
margin = margin[:len(indent)]
|
||||||
|
|
||||||
|
if margin:
|
||||||
|
text = re.sub(b'(?m)^' + margin, b'', text)
|
||||||
|
return text
|
||||||
|
|||||||
Reference in New Issue
Block a user