Inbound: improve inline content handling

* refactor: derive `AnymailInboundMessage` from `email.message.EmailMessage`
  rather than legacy Python 2.7 `email.message.Message`

* feat(inbound): replace confusing `inline_attachments` with `content_id_map`
  and `inlines`; rename `is_inline_attachment` to `is_inline`; deprecate old names 

Closes #328

---------

Co-authored-by: Mike Edmunds <medmunds@gmail.com>
This commit is contained in:
Léo Martinez
2023-07-28 00:10:58 +02:00
committed by GitHub
parent bc8ef9af0f
commit 0ac248254e
13 changed files with 212 additions and 60 deletions

View File

@@ -30,6 +30,33 @@ vNext
*Unreleased changes*
Features
~~~~~~~~
* **Inbound:** Improve `AnymailInboundMessage`'s handling of inline content:
* Rename `inline_attachments` to `content_id_map`, more accurately reflecting its function.
* Add new `inlines` property that provides a complete list of inline content,
whether or not it includes a *Content-ID*. This is helpful for accessing
inline images that appear directly in a *multipart/mixed* body, such as those
created by the Apple Mail app.
* Rename `is_inline_attachment()` to just `is_inline()`.
The renamed items are still available, but deprecated, under their old names.
See `docs <http://anymail.dev/en/latest/inbound/#anymail.inbound.AnymailInboundMessage>`__.
(Thanks to `@martinezleoml`_.)
* **Inbound:** `AnymailInboundMessage` now derives from Python's
`email.message.EmailMessage`, which provides improved compatibility with
email standards. (Thanks to `@martinezleoml`_.)
Deprecations
~~~~~~~~~~~~
* **Inbound:** `AnymailInboundMessage.inline_attachments` and `.is_inline_attachment()`
have been renamed---see above.
Other
~~~~~
@@ -1525,6 +1552,7 @@ Features
.. _@Lekensteyn: https://github.com/Lekensteyn
.. _@lewistaylor: https://github.com/lewistaylor
.. _@mark-mishyn: https://github.com/mark-mishyn
.. _@martinezleoml: https://github.com/martinezleoml
.. _@mbk-ok: https://github.com/mbk-ok
.. _@mwheels: https://github.com/mwheels
.. _@nuschk: https://github.com/nuschk

View File

@@ -1,31 +1,33 @@
import warnings
from base64 import b64decode
from email.message import Message
from email.message import EmailMessage
from email.parser import BytesParser, Parser
from email.policy import default as default_policy
from email.utils import unquote
from django.core.files.uploadedfile import SimpleUploadedFile
from .exceptions import AnymailDeprecationWarning
from .utils import angle_wrap, parse_address_list, parse_rfc2822date
class AnymailInboundMessage(Message):
class AnymailInboundMessage(EmailMessage):
"""
A normalized, parsed inbound email message.
A subclass of email.message.Message, with some additional
convenience properties, plus helpful methods backported
from Python 3.6+ email.message.EmailMessage (or really, MIMEPart)
A subclass of email.message.EmailMessage, with some additional
convenience properties.
"""
# Why Python email.message.Message rather than django.core.mail.EmailMessage?
# Why Python email.message.EmailMessage rather than django.core.mail.EmailMessage?
# Django's EmailMessage is really intended for constructing a (limited subset of)
# Message to send; Message is better designed for representing arbitrary messages:
# an EmailMessage to send; Python's EmailMessage is better designed for representing
# arbitrary messages:
#
# * Message is easily parsed from raw mime (which is an inbound format provided
# by many ESPs), and can accurately represent any mime email received
# * Message can represent repeated header fields (e.g., "Received") which
# are common in inbound messages
# * Python's EmailMessage is easily parsed from raw mime (which is an inbound format
# provided by many ESPs), and can accurately represent any mime email received
# * Python's EmailMessage can represent repeated header fields (e.g., "Received")
# which are common in inbound messages
# * Django's EmailMessage defaults a bunch of properties in ways that aren't helpful
# (e.g., from_email from settings)
@@ -103,13 +105,30 @@ class AnymailInboundMessage(Message):
"""list of attachments (as MIMEPart objects); excludes inlines"""
return [part for part in self.walk() if part.is_attachment()]
@property
def inlines(self):
"""list of inline parts (as MIMEPart objects)"""
return [part for part in self.walk() if part.is_inline()]
@property
def inline_attachments(self):
"""DEPRECATED: use content_id_map instead"""
warnings.warn(
"inline_attachments has been renamed to content_id_map and will be removed"
" in the near future.",
AnymailDeprecationWarning,
)
return self.content_id_map
@property
def content_id_map(self):
"""dict of Content-ID: attachment (as MIMEPart objects)"""
return {
unquote(part["Content-ID"]): part
for part in self.walk()
if part.is_inline_attachment() and part["Content-ID"] is not None
if part.is_inline() and part["Content-ID"] is not None
}
def get_address_header(self, header):
@@ -143,13 +162,19 @@ class AnymailInboundMessage(Message):
return part.get_content_text()
return None
# Hoisted from email.message.MIMEPart
def is_attachment(self):
return self.get_content_disposition() == "attachment"
def is_inline(self):
return self.get_content_disposition() == "inline"
# New for Anymail
def is_inline_attachment(self):
return self.get_content_disposition() == "inline"
"""DEPRECATED: use in_inline instead"""
warnings.warn(
"is_inline_attachment has been renamed to is_inline and will be removed"
" in the near future.",
AnymailDeprecationWarning,
)
return self.is_inline()
def get_content_bytes(self):
"""Return the raw payload bytes"""
@@ -331,7 +356,7 @@ class AnymailInboundMessage(Message):
if attachments is not None:
for attachment in attachments:
if attachment.is_inline_attachment():
if attachment.is_inline():
related.attach(attachment)
else:
msg.attach(attachment)

View File

@@ -136,10 +136,17 @@ Normalized inbound message
.. class:: anymail.inbound.AnymailInboundMessage
The :attr:`~AnymailInboundEvent.message` attribute of an :class:`AnymailInboundEvent`
is an AnymailInboundMessage---an extension of Python's standard :class:`email.message.Message`
is an AnymailInboundMessage---an extension of Python's standard :class:`email.message.EmailMessage`
with additional features to simplify inbound handling.
In addition to the base :class:`~email.message.Message` functionality, it includes these attributes:
.. versionchanged:: 10.1
Earlier releases extended Python's legacy :class:`email.message.Message` class.
:class:`~email.message.EmailMessage` is a superset that fixes bugs and improves
compatibility with email standards.
In addition to the base :class:`~email.message.EmailMessage` functionality,
:class:`!AnymailInboundMessage` includes these attributes:
.. attribute:: envelope_sender
@@ -221,6 +228,10 @@ Normalized inbound message
The message's plaintext message body as a `str`, or `None` if the
message doesn't include a plaintext body.
For certain messages that are sent as plaintext with inline images
(such as those sometimes composed by the Apple Mail app), this will
include only the text before the first inline image.
.. attribute:: html
The message's HTML message body as a `str`, or `None` if the
@@ -228,17 +239,36 @@ Normalized inbound message
.. attribute:: attachments
A `list` of all (non-inline) attachments to the message, or an empty list if there are
no attachments. See :ref:`inbound-attachments` below for the contents of each list item.
A `list` of all attachments to the message, or an empty list if there are
no attachments. See :ref:`inbound-attachments` below a description of the values.
.. attribute:: inline_attachments
If the inbound message includes an attached message, :attr:`!attachments`
will include the attached message and all of *its* attachments, recursively.
Consider Python's :meth:`~email.message.EmailMessage.iter_attachments` as an
alternative that doesn't descend into attached messages.
A `dict` mapping inline Content-ID references to attachment content. Each key is an
.. attribute:: inlines
A `list` of all inline content parts in the message, or an empty list if none.
See :ref:`inbound-attachments` below for a description of the values.
Like :attr:`attachments`, this will recursively descend into any attached messages.
.. versionadded:: 10.1
.. attribute:: content_id_map
A `dict` mapping inline Content-ID references to inline content. Each key is an
"unquoted" cid without angle brackets. E.g., if the :attr:`html` body contains
``<img src="cid:abc123...">``, you could get that inline image using
``message.inline_attachments["abc123..."]``.
``message.content_id_map["abc123..."]``.
The content of each attachment is described in :ref:`inbound-attachments` below.
The value of each item is described in :ref:`inbound-attachments` below.
.. versionadded:: 10.1
This property was previously available as :attr:`!inline_attachments`.
The old name still works, but is deprecated.
.. attribute:: spam_score
@@ -267,38 +297,39 @@ Normalized inbound message
.. rubric:: Other headers, complex messages, etc.
You can use all of Python's :class:`email.message.Message` features with an
You can use all of Python's :class:`email.message.EmailMessage` features with an
AnymailInboundMessage. For example, you can access message headers using
Message's :meth:`mapping interface <email.message.Message.__getitem__>`:
EmailMessage's :meth:`mapping interface <email.message.EmailMessage.__getitem__>`:
.. code-block:: python
message['reply-to'] # the Reply-To header (header keys are case-insensitive)
message.getall('DKIM-Signature') # list of all DKIM-Signature headers
And you can use Message methods like :meth:`~email.message.Message.walk` and
:meth:`~email.message.Message.get_content_type` to examine more-complex
And you can use Message methods like :meth:`~email.message.EmailMessage.walk` and
:meth:`~email.message.EmailMessage.get_content_type` to examine more-complex
multipart MIME messages (digests, delivery reports, or whatever).
.. _inbound-attachments:
Handling Inbound Attachments
----------------------------
Attached and inline content
---------------------------
Anymail converts each inbound attachment to a specialized MIME object with
Anymail converts each inbound attachment and inline content to a specialized MIME object with
additional methods for handling attachments and integrating with Django.
The attachment objects in an AnymailInboundMessage's
:attr:`~AnymailInboundMessage.attachments` list and
:attr:`~AnymailInboundMessage.inline_attachments` dict
The objects in an AnymailInboundMessage's
:attr:`~anymail.inbound.AnymailInboundMessage.attachments`,
:attr:`~anymail.inbound.AnymailInboundMessage.inlines`,
and :attr:`~anymail.inbound.AnymailInboundMessage.content_id_map`
have these methods:
.. class:: AnymailInboundMessage
.. method:: as_uploaded_file()
Returns the attachment converted to a Django :class:`~django.core.files.uploadedfile.UploadedFile`
Returns the content converted to a Django :class:`~django.core.files.uploadedfile.UploadedFile`
object. This is suitable for assigning to a model's :class:`~django.db.models.FileField`
or :class:`~django.db.models.ImageField`:
@@ -322,9 +353,9 @@ have these methods:
attachments are essentially user-uploaded content, so you should
:ref:`never trust the sender <inbound-security>`.)
See the Python docs for more info on :meth:`email.message.Message.get_content_type`,
:meth:`~email.message.Message.get_content_maintype`, and
:meth:`~email.message.Message.get_content_subtype`.
See the Python docs for more info on :meth:`email.message.EmailMessage.get_content_type`,
:meth:`~email.message.EmailMessage.get_content_maintype`, and
:meth:`~email.message.EmailMessage.get_content_subtype`.
(Note that you *cannot* determine the attachment type using code like
``issubclass(attachment, email.mime.image.MIMEImage)``. You should instead use something
@@ -341,13 +372,19 @@ have these methods:
.. method:: is_attachment()
Returns `True` for a (non-inline) attachment, `False` otherwise.
.. method:: is_inline_attachment()
Returns `True` for an inline attachment (one with :mailheader:`Content-Disposition` "inline"),
Returns `True` for attachment content (with :mailheader:`Content-Disposition` "attachment"),
`False` otherwise.
.. method:: is_inline()
Returns `True` for inline content (with :mailheader:`Content-Disposition` "inline"),
`False` otherwise.
.. versionchanged:: 10.1
This method was previously named :meth:`!is_inline_attachment`;
the old name still works, but is deprecated.
.. method:: get_content_disposition()
Returns the lowercased value (without parameters) of the attachment's
@@ -374,7 +411,7 @@ have these methods:
An Anymail inbound attachment is actually just an :class:`AnymailInboundMessage` instance,
following the Python email package's usual recursive representation of MIME messages.
All :class:`AnymailInboundMessage` and :class:`email.message.Message` functionality
All :class:`AnymailInboundMessage` and :class:`email.message.EmailMessage` functionality
is available on attachment objects (though of course not all features are meaningful in all contexts).
This can be helpful for, e.g., parsing email messages that are forwarded as attachments

View File

@@ -267,7 +267,7 @@ class AmazonSESBackendStandardEmailTests(AmazonSESBackendMockAPITestCase):
self.assertEqual(sent_message.html, html_content)
inlines = sent_message.inline_attachments
inlines = sent_message.content_id_map
self.assertEqual(len(inlines), 1)
self.assertEqual(inlines[cid].get_content_type(), "image/png")
self.assertEqual(inlines[cid].get_filename(), image_filename)

View File

@@ -274,7 +274,7 @@ class AmazonSESBackendStandardEmailTests(AmazonSESBackendMockAPITestCase):
self.assertEqual(sent_message.html, html_content)
inlines = sent_message.inline_attachments
inlines = sent_message.content_id_map
self.assertEqual(len(inlines), 1)
self.assertEqual(inlines[cid].get_content_type(), "image/png")
self.assertEqual(inlines[cid].get_filename(), image_filename)

View File

@@ -6,6 +6,7 @@ from textwrap import dedent
from django.core.mail import SafeMIMEText
from django.test import SimpleTestCase
from anymail.exceptions import AnymailDeprecationWarning
from anymail.inbound import AnymailInboundMessage
from .utils import SAMPLE_IMAGE_FILENAME, sample_email_path, sample_image_content
@@ -200,7 +201,7 @@ class AnymailInboundMessageConstructionTests(SimpleTestCase):
content_id="inline-id",
)
self.assertEqual(att.get_filename(), "Simulácia.txt")
self.assertTrue(att.is_inline_attachment())
self.assertTrue(att.is_inline())
self.assertEqual(att.get_content_text(), "Unicode ✓")
def test_parse_raw_mime(self):
@@ -446,7 +447,7 @@ class AnymailInboundMessageConveniencePropTests(SimpleTestCase):
# Default empty list
self.assertEqual(AnymailInboundMessage().attachments, [])
def test_inline_attachments_prop(self):
def test_content_id_map_prop(self):
att = AnymailInboundMessage.construct_attachment(
"image/png",
SAMPLE_IMAGE_CONTENT,
@@ -455,10 +456,64 @@ class AnymailInboundMessageConveniencePropTests(SimpleTestCase):
)
msg = AnymailInboundMessage.construct(attachments=[att])
self.assertEqual(msg.content_id_map, {"abc123": att})
with self.assertWarnsMessage(
AnymailDeprecationWarning,
"inline_attachments has been renamed to content_id_map and will be removed "
"in the near future.",
):
self.assertEqual(msg.inline_attachments, {"abc123": att})
# Default empty dict
self.assertEqual(AnymailInboundMessage().inline_attachments, {})
self.assertEqual(AnymailInboundMessage().content_id_map, {})
def test_inlines_prop(self):
raw = dedent(
"""\
MIME-Version: 1.0
Subject: Message with inline parts
Content-Type: multipart/mixed; boundary="boundary-orig"
--boundary-orig
Content-Type: text/html; charset="UTF-8"
<img src="cid:abc123"> Here is a message!
--boundary-orig
Content-Type: image/png; name="sample_image.png"
Content-Disposition: inline
Content-ID: <abc123>
Content-Transfer-Encoding: base64
{image_content_base64}
--boundary-orig
Content-Type: image/png; name="sample_image_without_cid.png"
Content-Disposition: inline
Content-Transfer-Encoding: base64
{image_content_base64}
--boundary-orig--
"""
).format(image_content_base64=b64encode(SAMPLE_IMAGE_CONTENT).decode("ascii"))
msg = AnymailInboundMessage.parse_raw_mime(raw)
inlines = msg.inlines
self.assertEqual(len(inlines), 2)
self.assertEqual(inlines[0].get_content_type(), "image/png")
self.assertEqual(inlines[0].as_uploaded_file().name, "sample_image.png")
self.assertEqual(inlines[1].get_content_type(), "image/png")
self.assertEqual(
inlines[1].as_uploaded_file().name, "sample_image_without_cid.png"
)
self.assertEqual(len(msg.content_id_map.items()), 1)
self.assertIn("abc123", msg.content_id_map)
def test_attachment_as_uploaded_file(self):
raw = dedent(
@@ -609,9 +664,16 @@ class AnymailInboundMessageAttachedMessageTests(SimpleTestCase):
orig_inline_att = orig_msg.get_payload(1)
self.assertEqual(orig_inline_att.get_content_type(), "image/png")
self.assertTrue(orig_inline_att.is_inline_attachment())
self.assertTrue(orig_inline_att.is_inline())
self.assertEqual(orig_inline_att.get_payload(decode=True), SAMPLE_IMAGE_CONTENT)
with self.assertWarnsMessage(
AnymailDeprecationWarning,
"is_inline_attachment has been renamed to is_inline and will be removed in "
"the near future.",
):
self.assertTrue(orig_inline_att.is_inline_attachment())
def test_construct_rfc822_attachment_from_data(self):
# constructed message/rfc822 attachment should end up as parsed message
# (same as if attachment was parsed from raw mime, as in previous test)

View File

@@ -323,7 +323,7 @@ class MailerSendInboundTestCase(MailerSendWebhookTestCase):
],
)
inlines = message.inline_attachments
inlines = message.content_id_map
self.assertEqual(len(inlines), 1)
inline = inlines["ii_letc8ro50"]
self.assertEqual(inline.get_filename(), "sample_image.png")

View File

@@ -182,7 +182,7 @@ class MailgunInboundTestCase(WebhookTestCase):
attachments[1].get_content_bytes(), email_content
)
inlines = message.inline_attachments
inlines = message.content_id_map
self.assertEqual(len(inlines), 1)
inline = inlines["abc123"]
self.assertEqual(inline.get_filename(), "image.png")
@@ -266,7 +266,7 @@ class MailgunInboundTestCase(WebhookTestCase):
event = kwargs["event"]
message = event.message
self.assertEqual(len(message.attachments), 0) # all inlines
inlines = [part for part in message.walk() if part.is_inline_attachment()]
inlines = [part for part in message.walk() if part.is_inline()]
self.assertEqual(len(inlines), 4)
self.assertEqual(inlines[0]["Content-ID"], "")
self.assertEqual(inlines[1]["Content-ID"], "")

View File

@@ -205,7 +205,7 @@ class MailjetInboundTestCase(WebhookTestCase):
attachments[1].get_content_bytes(), email_content
)
inlines = message.inline_attachments
inlines = message.content_id_map
self.assertEqual(len(inlines), 1)
inline = inlines["abc123"]
self.assertEqual(inline.get_filename(), "image.png")

View File

@@ -204,7 +204,7 @@ class PostalInboundTestCase(WebhookTestCase):
attachments[1].get_content_bytes(), email_content
)
inlines = message.inline_attachments
inlines = message.content_id_map
self.assertEqual(len(inlines), 1)
inline = inlines["abc123"]
self.assertEqual(inline.get_filename(), "image.png")

View File

@@ -191,7 +191,7 @@ class PostmarkInboundTestCase(WebhookTestCase):
attachments[1].get_content_bytes(), email_content
)
inlines = message.inline_attachments
inlines = message.content_id_map
self.assertEqual(len(inlines), 1)
inline = inlines["abc123"]
self.assertEqual(inline.get_filename(), "image.png")

View File

@@ -170,7 +170,7 @@ class SendgridInboundTestCase(WebhookTestCase):
attachments[1].get_content_bytes(), email_content
)
inlines = message.inline_attachments
inlines = message.content_id_map
self.assertEqual(len(inlines), 1)
inline = inlines["abc123"]
self.assertEqual(inline.get_filename(), "image.png")

View File

@@ -205,7 +205,7 @@ class SparkpostInboundTestCase(WebhookTestCase):
self.assertEqual(att_message["Subject"], "Test email")
self.assertEqual(att_message.text, "Hi Bob, This is a message. Thanks!\n")
inlines = message.inline_attachments
inlines = message.content_id_map
self.assertEqual(len(inlines), 1)
inline = inlines["abc123"]
self.assertEqual(inline.get_filename(), "image.png")