mirror of
https://github.com/pacnpal/django-anymail.git
synced 2025-12-20 03:41:05 -05:00
SendGrid: fix inbound webhook Unicode error when not utf-8
Fix a crash or text-mangling issue when an inbound message uses a charset other than utf-8 for its text or html body, and SendGrid's "post raw" inbound parse option is *not* enabled. Update docs to recommend "post raw" option. Fixes #187
This commit is contained in:
@@ -42,6 +42,11 @@ Fixes
|
||||
* **Mailjet:** Fix TypeError when sending to or from addresses with display names
|
||||
containing commas (introduced in Django 2.2.15, 3.0.9, and 3.1).
|
||||
|
||||
* **SendGrid:** Fix UnicodeError in inbound webhook, when receiving message using
|
||||
charsets other than utf-8, and *not* using SendGrid's "post raw" inbound parse
|
||||
option. Also update docs to recommend "post raw" with SendGrid inbound. (Thanks to
|
||||
`@tcourtqtm`_ for reporting the issue.)
|
||||
|
||||
|
||||
Features
|
||||
~~~~~~~~
|
||||
@@ -1104,6 +1109,7 @@ Features
|
||||
.. _@sebbacon: https://github.com/sebbacon
|
||||
.. _@swrobel: https://github.com/swrobel
|
||||
.. _@Thorbenl: https://github.com/Thorbenl
|
||||
.. _@tcourtqtm: https://github.com/tcourtqtm
|
||||
.. _@varche1: https://github.com/varche1
|
||||
.. _@vgrebenschikov: https://github.com/vgrebenschikov
|
||||
.. _@yourcelf: https://github.com/yourcelf
|
||||
|
||||
@@ -4,6 +4,7 @@ from datetime import datetime
|
||||
from django.utils.timezone import utc
|
||||
|
||||
from .base import AnymailBaseWebhookView
|
||||
from .._email_compat import EmailBytesParser
|
||||
from ..inbound import AnymailInboundMessage
|
||||
from ..signals import inbound, tracking, AnymailInboundEvent, AnymailTrackingEvent, EventType, RejectReason
|
||||
|
||||
@@ -131,6 +132,9 @@ class SendGridInboundWebhookView(AnymailBaseWebhookView):
|
||||
# Inbound uses the entire Django request as esp_event, because we need POST and FILES.
|
||||
# Note that request.POST is case-sensitive (unlike email.message.Message headers).
|
||||
esp_event = request
|
||||
# Must access body before any POST fields, or it won't be available if we need
|
||||
# it later (see text_charset and html_charset handling below).
|
||||
_ensure_body_is_available_later = request.body # noqa: F841
|
||||
if 'headers' in request.POST:
|
||||
# Default (not "Send Raw") inbound fields
|
||||
message = self.message_from_sendgrid_parsed(esp_event)
|
||||
@@ -183,11 +187,33 @@ class SendGridInboundWebhookView(AnymailBaseWebhookView):
|
||||
for att_id in sorted(attachment_info.keys())
|
||||
]
|
||||
|
||||
default_charset = request.POST.encoding.lower() # (probably utf-8)
|
||||
text = request.POST.get('text')
|
||||
text_charset = charsets.get('text', default_charset).lower()
|
||||
html = request.POST.get('html')
|
||||
html_charset = charsets.get('html', default_charset).lower()
|
||||
if (text and text_charset != default_charset) or (html and html_charset != default_charset):
|
||||
# Django has parsed text and/or html fields using the wrong charset.
|
||||
# We need to re-parse the raw form data and decode each field separately,
|
||||
# using the indicated charsets. The email package parses multipart/form-data
|
||||
# retaining bytes content. (In theory, we could instead just change
|
||||
# request.encoding and access the POST fields again, per Django docs,
|
||||
# but that seems to be have bugs around the cached request._files.)
|
||||
raw_data = b"".join([
|
||||
b"Content-Type: ", request.META['CONTENT_TYPE'].encode('ascii'),
|
||||
b"\r\n\r\n",
|
||||
request.body
|
||||
])
|
||||
parsed_parts = EmailBytesParser().parsebytes(raw_data).get_payload()
|
||||
for part in parsed_parts:
|
||||
name = part.get_param('name', header='content-disposition')
|
||||
if name == 'text':
|
||||
text = part.get_payload(decode=True).decode(text_charset)
|
||||
elif name == 'html':
|
||||
html = part.get_payload(decode=True).decode(html_charset)
|
||||
# (subject, from, to, etc. are parsed from raw headers field,
|
||||
# so no need to worry about their separate POST field charsets)
|
||||
|
||||
return AnymailInboundMessage.construct(
|
||||
raw_headers=request.POST.get('headers', ""), # includes From, To, Cc, Subject, etc.
|
||||
text=request.POST.get('text', None),
|
||||
text_charset=charsets.get('text', 'utf-8'),
|
||||
html=request.POST.get('html', None),
|
||||
html_charset=charsets.get('html', 'utf-8'),
|
||||
attachments=attachments,
|
||||
)
|
||||
text=text, html=html, attachments=attachments)
|
||||
|
||||
@@ -426,10 +426,10 @@ If you want to use Anymail's normalized :attr:`~anymail.inbound.AnymailInboundMe
|
||||
:attr:`~anymail.inbound.AnymailInboundMessage.spam_score` attributes, be sure to enable the "Check
|
||||
incoming emails for spam" checkbox.
|
||||
|
||||
You have a choice for SendGrid's "POST the raw, full MIME message" checkbox. Anymail will handle
|
||||
either option (and you can change it at any time). Enabling raw MIME will give the most accurate
|
||||
representation of *any* received email (including complex forms like multi-message mailing list
|
||||
digests). But disabling it *may* use less memory while processing messages with many large attachments.
|
||||
In most cases, you should enable SendGrid's "POST the raw, full MIME message" checkbox.
|
||||
Anymail should work either way (and you can change the option at any time), but enabling
|
||||
raw MIME will give the most accurate representation of *any* received email (including
|
||||
complex forms like multi-message mailing list digests).
|
||||
|
||||
.. _Inbound Parse Webhook:
|
||||
https://sendgrid.com/docs/Classroom/Basics/Inbound_Parse_Webhook/setting_up_the_inbound_parse_webhook.html
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
from textwrap import dedent
|
||||
|
||||
@@ -9,7 +11,7 @@ from anymail.inbound import AnymailInboundMessage
|
||||
from anymail.signals import AnymailInboundEvent
|
||||
from anymail.webhooks.sendgrid import SendGridInboundWebhookView
|
||||
|
||||
from .utils import sample_image_content, sample_email_content
|
||||
from .utils import dedent_bytes, sample_image_content, sample_email_content
|
||||
from .webhook_cases import WebhookTestCase
|
||||
|
||||
|
||||
@@ -183,3 +185,59 @@ class SendgridInboundTestCase(WebhookTestCase):
|
||||
self.assertEqual(message.subject, 'Raw MIME test')
|
||||
self.assertEqual(message.text, u"It's a body\N{HORIZONTAL ELLIPSIS}\n")
|
||||
self.assertEqual(message.html, u"""<div dir="ltr">It's a body\N{HORIZONTAL ELLIPSIS}</div>\n""")
|
||||
|
||||
def test_inbound_charsets(self):
|
||||
# Captured (sanitized) from actual SendGrid inbound webhook payload 7/2020,
|
||||
# using a test message constructed with a variety of charsets:
|
||||
raw_post = dedent_bytes(b"""\
|
||||
--xYzZY
|
||||
Content-Disposition: form-data; name="headers"
|
||||
|
||||
Date: Fri, 24 Jul 2020 16:43:46 UTC
|
||||
To: =?utf-8?q?R=C3=A9cipiendaire_pr=C3=A9cieux?= <inbound@sg.example.com>
|
||||
From: =?utf-8?q?Op=C3=A9rateur?= de test <sender@example.com>
|
||||
Subject: =?cp850?q?Como_usted_pidi=A2?=
|
||||
|
||||
--xYzZY
|
||||
Content-Disposition: form-data; name="subject"
|
||||
|
||||
Como usted pidi\xa2
|
||||
--xYzZY
|
||||
Content-Disposition: form-data; name="to"
|
||||
|
||||
R\xc3\xa9cipiendaire pr\xc3\xa9cieux <inbound@sg.example.com>
|
||||
--xYzZY
|
||||
Content-Disposition: form-data; name="html"
|
||||
|
||||
<p>\xbfEsto se ve como esperabas?</p>
|
||||
--xYzZY
|
||||
Content-Disposition: form-data; name="from"
|
||||
|
||||
Op\xc3\xa9rateur de test <sender@example.com>
|
||||
--xYzZY
|
||||
Content-Disposition: form-data; name="text"
|
||||
|
||||
Test the ESP\x92s inbound charset handling\x85
|
||||
--xYzZY
|
||||
Content-Disposition: form-data; name="charsets"
|
||||
|
||||
{"to":"UTF-8","cc":"UTF-8","html":"iso-8859-1","subject":"cp850","from":"UTF-8","text":"windows-1252"}
|
||||
--xYzZY--
|
||||
""").replace(b"\n", b"\r\n")
|
||||
|
||||
response = self.client.post('/anymail/sendgrid/inbound/', data=raw_post,
|
||||
content_type="multipart/form-data; boundary=xYzZY")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
kwargs = self.assert_handler_called_once_with(self.inbound_handler, sender=SendGridInboundWebhookView,
|
||||
event=ANY, esp_name='SendGrid')
|
||||
event = kwargs['event']
|
||||
message = event.message
|
||||
|
||||
self.assertEqual(message.from_email.display_name, u"Opérateur de test")
|
||||
self.assertEqual(message.from_email.addr_spec, "sender@example.com")
|
||||
self.assertEqual(len(message.to), 1)
|
||||
self.assertEqual(message.to[0].display_name, u"Récipiendaire précieux")
|
||||
self.assertEqual(message.to[0].addr_spec, "inbound@sg.example.com")
|
||||
self.assertEqual(message.subject, u"Como usted pidió")
|
||||
self.assertEqual(message.text, u"Test the ESP’s inbound charset handling…")
|
||||
self.assertEqual(message.html, u"<p>¿Esto se ve como esperabas?</p>")
|
||||
|
||||
@@ -324,3 +324,45 @@ class ClientWithCsrfChecks(Client):
|
||||
def __init__(self, **defaults):
|
||||
super(ClientWithCsrfChecks, self).__init__(
|
||||
enforce_csrf_checks=True, **defaults)
|
||||
|
||||
|
||||
# dedent for bytestrs
|
||||
# https://stackoverflow.com/a/39841195/647002
|
||||
_whitespace_only_re = re.compile(b'^[ \t]+$', re.MULTILINE)
|
||||
_leading_whitespace_re = re.compile(b'(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
|
||||
|
||||
|
||||
def dedent_bytes(text):
|
||||
"""textwrap.dedent, but for bytes"""
|
||||
# Look for the longest leading string of spaces and tabs common to
|
||||
# all lines.
|
||||
margin = None
|
||||
text = _whitespace_only_re.sub(b'', text)
|
||||
indents = _leading_whitespace_re.findall(text)
|
||||
for indent in indents:
|
||||
if margin is None:
|
||||
margin = indent
|
||||
|
||||
# Current line more deeply indented than previous winner:
|
||||
# no change (previous winner is still on top).
|
||||
elif indent.startswith(margin):
|
||||
pass
|
||||
|
||||
# Current line consistent with and no deeper than previous winner:
|
||||
# it's the new winner.
|
||||
elif margin.startswith(indent):
|
||||
margin = indent
|
||||
|
||||
# Find the largest common whitespace between current line
|
||||
# and previous winner.
|
||||
else:
|
||||
for i, (x, y) in enumerate(zip(margin, indent)):
|
||||
if x != y:
|
||||
margin = margin[:i]
|
||||
break
|
||||
else:
|
||||
margin = margin[:len(indent)]
|
||||
|
||||
if margin:
|
||||
text = re.sub(b'(?m)^' + margin, b'', text)
|
||||
return text
|
||||
|
||||
Reference in New Issue
Block a user