From 0ac248254e3d16e3bad839ba147152bd59acbb6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Martinez?= Date: Fri, 28 Jul 2023 00:10:58 +0200 Subject: [PATCH] Inbound: improve inline content handling * refactor: derive `AnymailInboundMessage` from `email.message.EmailMessage` rather than legacy Python 2.7 `email.message.Message` * feat(inbound): replace confusing `inline_attachments` with `content_id_map` and `inlines`; rename `is_inline_attachment` to `is_inline`; deprecate old names Closes #328 --------- Co-authored-by: Mike Edmunds --- CHANGELOG.rst | 28 +++++++++ anymail/inbound.py | 59 +++++++++++++------ docs/inbound.rst | 93 +++++++++++++++++++++--------- tests/test_amazon_ses_backend.py | 2 +- tests/test_amazon_ses_backendv1.py | 2 +- tests/test_inbound.py | 72 +++++++++++++++++++++-- tests/test_mailersend_inbound.py | 2 +- tests/test_mailgun_inbound.py | 4 +- tests/test_mailjet_inbound.py | 2 +- tests/test_postal_inbound.py | 2 +- tests/test_postmark_inbound.py | 2 +- tests/test_sendgrid_inbound.py | 2 +- tests/test_sparkpost_inbound.py | 2 +- 13 files changed, 212 insertions(+), 60 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ee38612..7ec1607 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -30,6 +30,33 @@ vNext *Unreleased changes* +Features +~~~~~~~~ + +* **Inbound:** Improve `AnymailInboundMessage`'s handling of inline content: + + * Rename `inline_attachments` to `content_id_map`, more accurately reflecting its function. + * Add new `inlines` property that provides a complete list of inline content, + whether or not it includes a *Content-ID*. This is helpful for accessing + inline images that appear directly in a *multipart/mixed* body, such as those + created by the Apple Mail app. + * Rename `is_inline_attachment()` to just `is_inline()`. + + The renamed items are still available, but deprecated, under their old names. + See `docs `__. + (Thanks to `@martinezleoml`_.) + +* **Inbound:** `AnymailInboundMessage` now derives from Python's + `email.message.EmailMessage`, which provides improved compatibility with + email standards. (Thanks to `@martinezleoml`_.) + + +Deprecations +~~~~~~~~~~~~ + +* **Inbound:** `AnymailInboundMessage.inline_attachments` and `.is_inline_attachment()` + have been renamed---see above. + Other ~~~~~ @@ -1525,6 +1552,7 @@ Features .. _@Lekensteyn: https://github.com/Lekensteyn .. _@lewistaylor: https://github.com/lewistaylor .. _@mark-mishyn: https://github.com/mark-mishyn +.. _@martinezleoml: https://github.com/martinezleoml .. _@mbk-ok: https://github.com/mbk-ok .. _@mwheels: https://github.com/mwheels .. _@nuschk: https://github.com/nuschk diff --git a/anymail/inbound.py b/anymail/inbound.py index 97d7d54..061efe6 100644 --- a/anymail/inbound.py +++ b/anymail/inbound.py @@ -1,31 +1,33 @@ +import warnings from base64 import b64decode -from email.message import Message +from email.message import EmailMessage from email.parser import BytesParser, Parser from email.policy import default as default_policy from email.utils import unquote from django.core.files.uploadedfile import SimpleUploadedFile +from .exceptions import AnymailDeprecationWarning from .utils import angle_wrap, parse_address_list, parse_rfc2822date -class AnymailInboundMessage(Message): +class AnymailInboundMessage(EmailMessage): """ A normalized, parsed inbound email message. - A subclass of email.message.Message, with some additional - convenience properties, plus helpful methods backported - from Python 3.6+ email.message.EmailMessage (or really, MIMEPart) + A subclass of email.message.EmailMessage, with some additional + convenience properties. """ - # Why Python email.message.Message rather than django.core.mail.EmailMessage? + # Why Python email.message.EmailMessage rather than django.core.mail.EmailMessage? # Django's EmailMessage is really intended for constructing a (limited subset of) - # Message to send; Message is better designed for representing arbitrary messages: + # an EmailMessage to send; Python's EmailMessage is better designed for representing + # arbitrary messages: # - # * Message is easily parsed from raw mime (which is an inbound format provided - # by many ESPs), and can accurately represent any mime email received - # * Message can represent repeated header fields (e.g., "Received") which - # are common in inbound messages + # * Python's EmailMessage is easily parsed from raw mime (which is an inbound format + # provided by many ESPs), and can accurately represent any mime email received + # * Python's EmailMessage can represent repeated header fields (e.g., "Received") + # which are common in inbound messages # * Django's EmailMessage defaults a bunch of properties in ways that aren't helpful # (e.g., from_email from settings) @@ -103,13 +105,30 @@ class AnymailInboundMessage(Message): """list of attachments (as MIMEPart objects); excludes inlines""" return [part for part in self.walk() if part.is_attachment()] + @property + def inlines(self): + """list of inline parts (as MIMEPart objects)""" + return [part for part in self.walk() if part.is_inline()] + @property def inline_attachments(self): + """DEPRECATED: use content_id_map instead""" + warnings.warn( + "inline_attachments has been renamed to content_id_map and will be removed" + " in the near future.", + AnymailDeprecationWarning, + ) + + return self.content_id_map + + @property + def content_id_map(self): """dict of Content-ID: attachment (as MIMEPart objects)""" + return { unquote(part["Content-ID"]): part for part in self.walk() - if part.is_inline_attachment() and part["Content-ID"] is not None + if part.is_inline() and part["Content-ID"] is not None } def get_address_header(self, header): @@ -143,13 +162,19 @@ class AnymailInboundMessage(Message): return part.get_content_text() return None - # Hoisted from email.message.MIMEPart - def is_attachment(self): - return self.get_content_disposition() == "attachment" + def is_inline(self): + return self.get_content_disposition() == "inline" # New for Anymail def is_inline_attachment(self): - return self.get_content_disposition() == "inline" + """DEPRECATED: use in_inline instead""" + warnings.warn( + "is_inline_attachment has been renamed to is_inline and will be removed" + " in the near future.", + AnymailDeprecationWarning, + ) + + return self.is_inline() def get_content_bytes(self): """Return the raw payload bytes""" @@ -331,7 +356,7 @@ class AnymailInboundMessage(Message): if attachments is not None: for attachment in attachments: - if attachment.is_inline_attachment(): + if attachment.is_inline(): related.attach(attachment) else: msg.attach(attachment) diff --git a/docs/inbound.rst b/docs/inbound.rst index 58dfb4a..1898cc5 100644 --- a/docs/inbound.rst +++ b/docs/inbound.rst @@ -136,10 +136,17 @@ Normalized inbound message .. class:: anymail.inbound.AnymailInboundMessage The :attr:`~AnymailInboundEvent.message` attribute of an :class:`AnymailInboundEvent` - is an AnymailInboundMessage---an extension of Python's standard :class:`email.message.Message` + is an AnymailInboundMessage---an extension of Python's standard :class:`email.message.EmailMessage` with additional features to simplify inbound handling. - In addition to the base :class:`~email.message.Message` functionality, it includes these attributes: + .. versionchanged:: 10.1 + + Earlier releases extended Python's legacy :class:`email.message.Message` class. + :class:`~email.message.EmailMessage` is a superset that fixes bugs and improves + compatibility with email standards. + + In addition to the base :class:`~email.message.EmailMessage` functionality, + :class:`!AnymailInboundMessage` includes these attributes: .. attribute:: envelope_sender @@ -221,6 +228,10 @@ Normalized inbound message The message's plaintext message body as a `str`, or `None` if the message doesn't include a plaintext body. + For certain messages that are sent as plaintext with inline images + (such as those sometimes composed by the Apple Mail app), this will + include only the text before the first inline image. + .. attribute:: html The message's HTML message body as a `str`, or `None` if the @@ -228,17 +239,36 @@ Normalized inbound message .. attribute:: attachments - A `list` of all (non-inline) attachments to the message, or an empty list if there are - no attachments. See :ref:`inbound-attachments` below for the contents of each list item. + A `list` of all attachments to the message, or an empty list if there are + no attachments. See :ref:`inbound-attachments` below a description of the values. - .. attribute:: inline_attachments + If the inbound message includes an attached message, :attr:`!attachments` + will include the attached message and all of *its* attachments, recursively. + Consider Python's :meth:`~email.message.EmailMessage.iter_attachments` as an + alternative that doesn't descend into attached messages. - A `dict` mapping inline Content-ID references to attachment content. Each key is an + .. attribute:: inlines + + A `list` of all inline content parts in the message, or an empty list if none. + See :ref:`inbound-attachments` below for a description of the values. + + Like :attr:`attachments`, this will recursively descend into any attached messages. + + .. versionadded:: 10.1 + + .. attribute:: content_id_map + + A `dict` mapping inline Content-ID references to inline content. Each key is an "unquoted" cid without angle brackets. E.g., if the :attr:`html` body contains ````, you could get that inline image using - ``message.inline_attachments["abc123..."]``. + ``message.content_id_map["abc123..."]``. - The content of each attachment is described in :ref:`inbound-attachments` below. + The value of each item is described in :ref:`inbound-attachments` below. + + .. versionadded:: 10.1 + + This property was previously available as :attr:`!inline_attachments`. + The old name still works, but is deprecated. .. attribute:: spam_score @@ -267,38 +297,39 @@ Normalized inbound message .. rubric:: Other headers, complex messages, etc. - You can use all of Python's :class:`email.message.Message` features with an + You can use all of Python's :class:`email.message.EmailMessage` features with an AnymailInboundMessage. For example, you can access message headers using - Message's :meth:`mapping interface `: + EmailMessage's :meth:`mapping interface `: .. code-block:: python message['reply-to'] # the Reply-To header (header keys are case-insensitive) message.getall('DKIM-Signature') # list of all DKIM-Signature headers - And you can use Message methods like :meth:`~email.message.Message.walk` and - :meth:`~email.message.Message.get_content_type` to examine more-complex + And you can use Message methods like :meth:`~email.message.EmailMessage.walk` and + :meth:`~email.message.EmailMessage.get_content_type` to examine more-complex multipart MIME messages (digests, delivery reports, or whatever). .. _inbound-attachments: -Handling Inbound Attachments ----------------------------- +Attached and inline content +--------------------------- -Anymail converts each inbound attachment to a specialized MIME object with +Anymail converts each inbound attachment and inline content to a specialized MIME object with additional methods for handling attachments and integrating with Django. -The attachment objects in an AnymailInboundMessage's -:attr:`~AnymailInboundMessage.attachments` list and -:attr:`~AnymailInboundMessage.inline_attachments` dict +The objects in an AnymailInboundMessage's +:attr:`~anymail.inbound.AnymailInboundMessage.attachments`, +:attr:`~anymail.inbound.AnymailInboundMessage.inlines`, +and :attr:`~anymail.inbound.AnymailInboundMessage.content_id_map` have these methods: .. class:: AnymailInboundMessage .. method:: as_uploaded_file() - Returns the attachment converted to a Django :class:`~django.core.files.uploadedfile.UploadedFile` + Returns the content converted to a Django :class:`~django.core.files.uploadedfile.UploadedFile` object. This is suitable for assigning to a model's :class:`~django.db.models.FileField` or :class:`~django.db.models.ImageField`: @@ -322,9 +353,9 @@ have these methods: attachments are essentially user-uploaded content, so you should :ref:`never trust the sender `.) - See the Python docs for more info on :meth:`email.message.Message.get_content_type`, - :meth:`~email.message.Message.get_content_maintype`, and - :meth:`~email.message.Message.get_content_subtype`. + See the Python docs for more info on :meth:`email.message.EmailMessage.get_content_type`, + :meth:`~email.message.EmailMessage.get_content_maintype`, and + :meth:`~email.message.EmailMessage.get_content_subtype`. (Note that you *cannot* determine the attachment type using code like ``issubclass(attachment, email.mime.image.MIMEImage)``. You should instead use something @@ -341,13 +372,19 @@ have these methods: .. method:: is_attachment() - Returns `True` for a (non-inline) attachment, `False` otherwise. - - .. method:: is_inline_attachment() - - Returns `True` for an inline attachment (one with :mailheader:`Content-Disposition` "inline"), + Returns `True` for attachment content (with :mailheader:`Content-Disposition` "attachment"), `False` otherwise. + .. method:: is_inline() + + Returns `True` for inline content (with :mailheader:`Content-Disposition` "inline"), + `False` otherwise. + + .. versionchanged:: 10.1 + + This method was previously named :meth:`!is_inline_attachment`; + the old name still works, but is deprecated. + .. method:: get_content_disposition() Returns the lowercased value (without parameters) of the attachment's @@ -374,7 +411,7 @@ have these methods: An Anymail inbound attachment is actually just an :class:`AnymailInboundMessage` instance, following the Python email package's usual recursive representation of MIME messages. - All :class:`AnymailInboundMessage` and :class:`email.message.Message` functionality + All :class:`AnymailInboundMessage` and :class:`email.message.EmailMessage` functionality is available on attachment objects (though of course not all features are meaningful in all contexts). This can be helpful for, e.g., parsing email messages that are forwarded as attachments diff --git a/tests/test_amazon_ses_backend.py b/tests/test_amazon_ses_backend.py index 52f73cf..f85b0fe 100644 --- a/tests/test_amazon_ses_backend.py +++ b/tests/test_amazon_ses_backend.py @@ -267,7 +267,7 @@ class AmazonSESBackendStandardEmailTests(AmazonSESBackendMockAPITestCase): self.assertEqual(sent_message.html, html_content) - inlines = sent_message.inline_attachments + inlines = sent_message.content_id_map self.assertEqual(len(inlines), 1) self.assertEqual(inlines[cid].get_content_type(), "image/png") self.assertEqual(inlines[cid].get_filename(), image_filename) diff --git a/tests/test_amazon_ses_backendv1.py b/tests/test_amazon_ses_backendv1.py index ac78e23..46644c3 100644 --- a/tests/test_amazon_ses_backendv1.py +++ b/tests/test_amazon_ses_backendv1.py @@ -274,7 +274,7 @@ class AmazonSESBackendStandardEmailTests(AmazonSESBackendMockAPITestCase): self.assertEqual(sent_message.html, html_content) - inlines = sent_message.inline_attachments + inlines = sent_message.content_id_map self.assertEqual(len(inlines), 1) self.assertEqual(inlines[cid].get_content_type(), "image/png") self.assertEqual(inlines[cid].get_filename(), image_filename) diff --git a/tests/test_inbound.py b/tests/test_inbound.py index 0f0d75b..23e6ed7 100644 --- a/tests/test_inbound.py +++ b/tests/test_inbound.py @@ -6,6 +6,7 @@ from textwrap import dedent from django.core.mail import SafeMIMEText from django.test import SimpleTestCase +from anymail.exceptions import AnymailDeprecationWarning from anymail.inbound import AnymailInboundMessage from .utils import SAMPLE_IMAGE_FILENAME, sample_email_path, sample_image_content @@ -200,7 +201,7 @@ class AnymailInboundMessageConstructionTests(SimpleTestCase): content_id="inline-id", ) self.assertEqual(att.get_filename(), "Simulácia.txt") - self.assertTrue(att.is_inline_attachment()) + self.assertTrue(att.is_inline()) self.assertEqual(att.get_content_text(), "Unicode ✓") def test_parse_raw_mime(self): @@ -446,7 +447,7 @@ class AnymailInboundMessageConveniencePropTests(SimpleTestCase): # Default empty list self.assertEqual(AnymailInboundMessage().attachments, []) - def test_inline_attachments_prop(self): + def test_content_id_map_prop(self): att = AnymailInboundMessage.construct_attachment( "image/png", SAMPLE_IMAGE_CONTENT, @@ -455,10 +456,64 @@ class AnymailInboundMessageConveniencePropTests(SimpleTestCase): ) msg = AnymailInboundMessage.construct(attachments=[att]) - self.assertEqual(msg.inline_attachments, {"abc123": att}) + self.assertEqual(msg.content_id_map, {"abc123": att}) + + with self.assertWarnsMessage( + AnymailDeprecationWarning, + "inline_attachments has been renamed to content_id_map and will be removed " + "in the near future.", + ): + self.assertEqual(msg.inline_attachments, {"abc123": att}) # Default empty dict - self.assertEqual(AnymailInboundMessage().inline_attachments, {}) + self.assertEqual(AnymailInboundMessage().content_id_map, {}) + + def test_inlines_prop(self): + raw = dedent( + """\ + MIME-Version: 1.0 + Subject: Message with inline parts + Content-Type: multipart/mixed; boundary="boundary-orig" + + --boundary-orig + Content-Type: text/html; charset="UTF-8" + + Here is a message! + + --boundary-orig + Content-Type: image/png; name="sample_image.png" + Content-Disposition: inline + Content-ID: + Content-Transfer-Encoding: base64 + + {image_content_base64} + + --boundary-orig + Content-Type: image/png; name="sample_image_without_cid.png" + Content-Disposition: inline + Content-Transfer-Encoding: base64 + + {image_content_base64} + + --boundary-orig-- + """ + ).format(image_content_base64=b64encode(SAMPLE_IMAGE_CONTENT).decode("ascii")) + + msg = AnymailInboundMessage.parse_raw_mime(raw) + inlines = msg.inlines + + self.assertEqual(len(inlines), 2) + + self.assertEqual(inlines[0].get_content_type(), "image/png") + self.assertEqual(inlines[0].as_uploaded_file().name, "sample_image.png") + + self.assertEqual(inlines[1].get_content_type(), "image/png") + self.assertEqual( + inlines[1].as_uploaded_file().name, "sample_image_without_cid.png" + ) + + self.assertEqual(len(msg.content_id_map.items()), 1) + self.assertIn("abc123", msg.content_id_map) def test_attachment_as_uploaded_file(self): raw = dedent( @@ -609,9 +664,16 @@ class AnymailInboundMessageAttachedMessageTests(SimpleTestCase): orig_inline_att = orig_msg.get_payload(1) self.assertEqual(orig_inline_att.get_content_type(), "image/png") - self.assertTrue(orig_inline_att.is_inline_attachment()) + self.assertTrue(orig_inline_att.is_inline()) self.assertEqual(orig_inline_att.get_payload(decode=True), SAMPLE_IMAGE_CONTENT) + with self.assertWarnsMessage( + AnymailDeprecationWarning, + "is_inline_attachment has been renamed to is_inline and will be removed in " + "the near future.", + ): + self.assertTrue(orig_inline_att.is_inline_attachment()) + def test_construct_rfc822_attachment_from_data(self): # constructed message/rfc822 attachment should end up as parsed message # (same as if attachment was parsed from raw mime, as in previous test) diff --git a/tests/test_mailersend_inbound.py b/tests/test_mailersend_inbound.py index 413c7a9..f5de8c5 100644 --- a/tests/test_mailersend_inbound.py +++ b/tests/test_mailersend_inbound.py @@ -323,7 +323,7 @@ class MailerSendInboundTestCase(MailerSendWebhookTestCase): ], ) - inlines = message.inline_attachments + inlines = message.content_id_map self.assertEqual(len(inlines), 1) inline = inlines["ii_letc8ro50"] self.assertEqual(inline.get_filename(), "sample_image.png") diff --git a/tests/test_mailgun_inbound.py b/tests/test_mailgun_inbound.py index 958b49d..e825be4 100644 --- a/tests/test_mailgun_inbound.py +++ b/tests/test_mailgun_inbound.py @@ -182,7 +182,7 @@ class MailgunInboundTestCase(WebhookTestCase): attachments[1].get_content_bytes(), email_content ) - inlines = message.inline_attachments + inlines = message.content_id_map self.assertEqual(len(inlines), 1) inline = inlines["abc123"] self.assertEqual(inline.get_filename(), "image.png") @@ -266,7 +266,7 @@ class MailgunInboundTestCase(WebhookTestCase): event = kwargs["event"] message = event.message self.assertEqual(len(message.attachments), 0) # all inlines - inlines = [part for part in message.walk() if part.is_inline_attachment()] + inlines = [part for part in message.walk() if part.is_inline()] self.assertEqual(len(inlines), 4) self.assertEqual(inlines[0]["Content-ID"], "") self.assertEqual(inlines[1]["Content-ID"], "") diff --git a/tests/test_mailjet_inbound.py b/tests/test_mailjet_inbound.py index 00a0bda..0141684 100644 --- a/tests/test_mailjet_inbound.py +++ b/tests/test_mailjet_inbound.py @@ -205,7 +205,7 @@ class MailjetInboundTestCase(WebhookTestCase): attachments[1].get_content_bytes(), email_content ) - inlines = message.inline_attachments + inlines = message.content_id_map self.assertEqual(len(inlines), 1) inline = inlines["abc123"] self.assertEqual(inline.get_filename(), "image.png") diff --git a/tests/test_postal_inbound.py b/tests/test_postal_inbound.py index 42f8d4b..08bdef7 100644 --- a/tests/test_postal_inbound.py +++ b/tests/test_postal_inbound.py @@ -204,7 +204,7 @@ class PostalInboundTestCase(WebhookTestCase): attachments[1].get_content_bytes(), email_content ) - inlines = message.inline_attachments + inlines = message.content_id_map self.assertEqual(len(inlines), 1) inline = inlines["abc123"] self.assertEqual(inline.get_filename(), "image.png") diff --git a/tests/test_postmark_inbound.py b/tests/test_postmark_inbound.py index a88fe05..10e98fd 100644 --- a/tests/test_postmark_inbound.py +++ b/tests/test_postmark_inbound.py @@ -191,7 +191,7 @@ class PostmarkInboundTestCase(WebhookTestCase): attachments[1].get_content_bytes(), email_content ) - inlines = message.inline_attachments + inlines = message.content_id_map self.assertEqual(len(inlines), 1) inline = inlines["abc123"] self.assertEqual(inline.get_filename(), "image.png") diff --git a/tests/test_sendgrid_inbound.py b/tests/test_sendgrid_inbound.py index 8181d58..ff68b9a 100644 --- a/tests/test_sendgrid_inbound.py +++ b/tests/test_sendgrid_inbound.py @@ -170,7 +170,7 @@ class SendgridInboundTestCase(WebhookTestCase): attachments[1].get_content_bytes(), email_content ) - inlines = message.inline_attachments + inlines = message.content_id_map self.assertEqual(len(inlines), 1) inline = inlines["abc123"] self.assertEqual(inline.get_filename(), "image.png") diff --git a/tests/test_sparkpost_inbound.py b/tests/test_sparkpost_inbound.py index 1713d6f..c37648a 100644 --- a/tests/test_sparkpost_inbound.py +++ b/tests/test_sparkpost_inbound.py @@ -205,7 +205,7 @@ class SparkpostInboundTestCase(WebhookTestCase): self.assertEqual(att_message["Subject"], "Test email") self.assertEqual(att_message.text, "Hi Bob, This is a message. Thanks!\n") - inlines = message.inline_attachments + inlines = message.content_id_map self.assertEqual(len(inlines), 1) inline = inlines["abc123"] self.assertEqual(inline.get_filename(), "image.png")