Files
django-anymail/anymail/webhooks/mailgun.py
Tim Schilling a4f50c4340 Confirm support for Django 4.1
Replaces deprecated `django.utils.timezone.utc` with
`datetime.timezone.utc` (available since Python 3.2).
2022-08-24 13:24:27 -07:00

426 lines
20 KiB
Python

import json
from datetime import datetime, timezone
import hashlib
import hmac
from django.utils.crypto import constant_time_compare
from .base import AnymailBaseWebhookView
from ..exceptions import AnymailConfigurationError, AnymailWebhookValidationFailure, AnymailInvalidAddress
from ..inbound import AnymailInboundMessage
from ..signals import inbound, tracking, AnymailInboundEvent, AnymailTrackingEvent, EventType, RejectReason
from ..utils import get_anymail_setting, combine, querydict_getfirst, parse_single_address, UNSET
class MailgunBaseWebhookView(AnymailBaseWebhookView):
"""Base view class for Mailgun webhooks"""
esp_name = "Mailgun"
warn_if_no_basic_auth = False # because we validate against signature
webhook_signing_key = None # (Declaring class attr allows override by kwargs in View.as_view.)
# The `api_key` attribute name is still allowed for compatibility with earlier Anymail releases.
api_key = None # (Declaring class attr allows override by kwargs in View.as_view.)
def __init__(self, **kwargs):
# webhook_signing_key: falls back to api_key if webhook_signing_key not provided
api_key = get_anymail_setting('api_key', esp_name=self.esp_name,
kwargs=kwargs, allow_bare=True, default=None)
webhook_signing_key = get_anymail_setting('webhook_signing_key', esp_name=self.esp_name,
kwargs=kwargs, default=UNSET if api_key is None else api_key)
self.webhook_signing_key = webhook_signing_key.encode('ascii') # hmac.new requires bytes key
super().__init__(**kwargs)
def validate_request(self, request):
super().validate_request(request) # first check basic auth if enabled
if request.content_type == "application/json":
# New-style webhook: json payload with separate signature block
try:
event = json.loads(request.body.decode('utf-8'))
signature_block = event['signature']
token = signature_block['token']
timestamp = signature_block['timestamp']
signature = signature_block['signature']
except (KeyError, ValueError, UnicodeDecodeError) as err:
raise AnymailWebhookValidationFailure(
"Mailgun webhook called with invalid payload format") from err
else:
# Legacy webhook: signature fields are interspersed with other POST data
try:
# Must use the *last* value of these fields if there are conflicting merged user-variables.
# (Fortunately, Django QueryDict is specced to return the last value.)
token = request.POST['token']
timestamp = request.POST['timestamp']
signature = request.POST['signature']
except KeyError as err:
raise AnymailWebhookValidationFailure(
"Mailgun webhook called without required security fields") from err
expected_signature = hmac.new(key=self.webhook_signing_key, msg='{}{}'.format(timestamp, token).encode('ascii'),
digestmod=hashlib.sha256).hexdigest()
if not constant_time_compare(signature, expected_signature):
raise AnymailWebhookValidationFailure("Mailgun webhook called with incorrect signature")
class MailgunTrackingWebhookView(MailgunBaseWebhookView):
"""Handler for Mailgun delivery and engagement tracking webhooks"""
signal = tracking
def parse_events(self, request):
if request.content_type == "application/json":
esp_event = json.loads(request.body.decode('utf-8'))
return [self.esp_to_anymail_event(esp_event)]
else:
return [self.mailgun_legacy_to_anymail_event(request.POST)]
event_types = {
# Map Mailgun event: Anymail normalized type
'accepted': EventType.QUEUED, # not delivered to webhooks (8/2018)
'rejected': EventType.REJECTED,
'delivered': EventType.DELIVERED,
'failed': EventType.BOUNCED,
'opened': EventType.OPENED,
'clicked': EventType.CLICKED,
'unsubscribed': EventType.UNSUBSCRIBED,
'complained': EventType.COMPLAINED,
}
reject_reasons = {
# Map Mailgun event_data.reason: Anymail normalized RejectReason
# (these appear in webhook doc examples, but aren't actually documented anywhere)
"bounce": RejectReason.BOUNCED,
"suppress-bounce": RejectReason.BOUNCED,
"generic": RejectReason.OTHER, # ??? appears to be used for any temporary failure?
}
severities = {
# Remap some event types based on "severity" payload field
(EventType.BOUNCED, 'temporary'): EventType.DEFERRED
}
def esp_to_anymail_event(self, esp_event):
event_data = esp_event.get('event-data', {})
event_type = self.event_types.get(event_data['event'], EventType.UNKNOWN)
event_type = self.severities.get((EventType.BOUNCED, event_data.get('severity')), event_type)
# Use signature.token for event_id, rather than event_data.id,
# because the latter is only "guaranteed to be unique within a day".
event_id = esp_event.get('signature', {}).get('token')
recipient = event_data.get('recipient')
try:
timestamp = datetime.fromtimestamp(float(event_data['timestamp']), tz=timezone.utc)
except KeyError:
timestamp = None
try:
message_id = event_data['message']['headers']['message-id']
except KeyError:
message_id = None
if message_id and not message_id.startswith('<'):
message_id = "<{}>".format(message_id)
metadata = event_data.get('user-variables', {})
tags = event_data.get('tags', [])
try:
delivery_status = event_data['delivery-status']
except KeyError:
description = None
mta_response = None
else:
description = delivery_status.get('description')
mta_response = delivery_status.get('message')
if 'reason' in event_data:
reject_reason = self.reject_reasons.get(event_data['reason'], RejectReason.OTHER)
else:
reject_reason = None
if event_type == EventType.REJECTED:
# This event has a somewhat different structure than the others...
description = description or event_data.get("reject", {}).get("reason")
reject_reason = reject_reason or RejectReason.OTHER
if not recipient:
try:
to_email = parse_single_address(
event_data["message"]["headers"]["to"])
except (AnymailInvalidAddress, KeyError):
pass
else:
recipient = to_email.addr_spec
return AnymailTrackingEvent(
event_type=event_type,
timestamp=timestamp,
message_id=message_id,
event_id=event_id,
recipient=recipient,
reject_reason=reject_reason,
description=description,
mta_response=mta_response,
tags=tags,
metadata=metadata,
click_url=event_data.get('url'),
user_agent=event_data.get('client-info', {}).get('user-agent'),
esp_event=esp_event,
)
# Legacy event handling
# (Prior to 2018-06-29, these were the only Mailgun events.)
legacy_event_types = {
# Map Mailgun event: Anymail normalized type
'delivered': EventType.DELIVERED,
'dropped': EventType.REJECTED,
'bounced': EventType.BOUNCED,
'complained': EventType.COMPLAINED,
'unsubscribed': EventType.UNSUBSCRIBED,
'opened': EventType.OPENED,
'clicked': EventType.CLICKED,
# Mailgun does not send events corresponding to QUEUED or DEFERRED
}
legacy_reject_reasons = {
# Map Mailgun (SMTP) error codes to Anymail normalized reject_reason.
# By default, we will treat anything 400-599 as REJECT_BOUNCED
# so only exceptions are listed here.
499: RejectReason.TIMED_OUT, # unable to connect to MX (also covers invalid recipients)
# These 6xx codes appear to be Mailgun extensions to SMTP
# (and don't seem to be documented anywhere):
605: RejectReason.BOUNCED, # previous bounce
607: RejectReason.SPAM, # previous spam complaint
}
def mailgun_legacy_to_anymail_event(self, esp_event):
# esp_event is a Django QueryDict (from request.POST),
# which has multi-valued fields, but is *not* case-insensitive.
# Because of the way Mailgun merges user-variables into the event,
# we must generally use the *first* value of any multi-valued field
# to avoid potential conflicting user-data.
esp_event.getfirst = querydict_getfirst.__get__(esp_event)
if 'event' not in esp_event and 'sender' in esp_event:
# Inbound events don't (currently) have an event field
raise AnymailConfigurationError(
"You seem to have set Mailgun's *inbound* route "
"to Anymail's Mailgun *tracking* webhook URL.")
event_type = self.legacy_event_types.get(esp_event.getfirst('event'), EventType.UNKNOWN)
timestamp = datetime.fromtimestamp(
int(esp_event['timestamp']), tz=timezone.utc) # use *last* value of timestamp
# Message-Id is not documented for every event, but seems to always be included.
# (It's sometimes spelled as 'message-id', lowercase, and missing the <angle-brackets>.)
message_id = esp_event.getfirst('Message-Id', None) or esp_event.getfirst('message-id', None)
if message_id and not message_id.startswith('<'):
message_id = "<{}>".format(message_id)
description = esp_event.getfirst('description', None)
mta_response = esp_event.getfirst('error', None) or esp_event.getfirst('notification', None)
reject_reason = None
try:
mta_status = int(esp_event.getfirst('code'))
except (KeyError, TypeError):
pass
except ValueError:
# RFC-3463 extended SMTP status code (class.subject.detail, where class is "2", "4" or "5")
try:
status_class = esp_event.getfirst('code').split('.')[0]
except (TypeError, IndexError):
# illegal SMTP status code format
pass
else:
reject_reason = RejectReason.BOUNCED if status_class in ("4", "5") else RejectReason.OTHER
else:
reject_reason = self.legacy_reject_reasons.get(
mta_status,
RejectReason.BOUNCED if 400 <= mta_status < 600
else RejectReason.OTHER)
metadata = self._extract_legacy_metadata(esp_event)
# tags are supposed to be in 'tag' fields, but are sometimes in undocumented X-Mailgun-Tag
tags = esp_event.getlist('tag', None) or esp_event.getlist('X-Mailgun-Tag', [])
return AnymailTrackingEvent(
event_type=event_type,
timestamp=timestamp,
message_id=message_id,
event_id=esp_event.get('token', None), # use *last* value of token
recipient=esp_event.getfirst('recipient', None),
reject_reason=reject_reason,
description=description,
mta_response=mta_response,
tags=tags,
metadata=metadata,
click_url=esp_event.getfirst('url', None),
user_agent=esp_event.getfirst('user-agent', None),
esp_event=esp_event,
)
def _extract_legacy_metadata(self, esp_event):
# Mailgun merges user-variables into the POST fields. If you know which user variable
# you want to retrieve--and it doesn't conflict with a Mailgun event field--that's fine.
# But if you want to extract all user-variables (like we do), it's more complicated...
event_type = esp_event.getfirst('event')
metadata = {}
if 'message-headers' in esp_event:
# For events where original message headers are available, it's most reliable
# to recover user-variables from the X-Mailgun-Variables header(s).
headers = json.loads(esp_event['message-headers'])
variables = [value for [field, value] in headers if field == 'X-Mailgun-Variables']
if len(variables) >= 1:
# Each X-Mailgun-Variables value is JSON. Parse and merge them all into single dict:
metadata = combine(*[json.loads(value) for value in variables])
elif event_type in self._known_legacy_event_fields:
# For other events, we must extract from the POST fields, ignoring known Mailgun
# event parameters, and treating all other values as user-variables.
known_fields = self._known_legacy_event_fields[event_type]
for field, values in esp_event.lists():
if field not in known_fields:
# Unknown fields are assumed to be user-variables. (There should really only be
# a single value, but just in case take the last one to match QueryDict semantics.)
metadata[field] = values[-1]
elif field == 'tag':
# There's no way to distinguish a user-variable named 'tag' from an actual tag,
# so don't treat this/these value(s) as metadata.
pass
elif len(values) == 1:
# This is an expected event parameter, and since there's only a single value
# it must be the event param, not metadata.
pass
else:
# This is an expected event parameter, but there are (at least) two values.
# One is the event param, and the other is a user-variable metadata value.
# Which is which depends on the field:
if field in {'signature', 'timestamp', 'token'}:
metadata[field] = values[0] # values = [user-variable, event-param]
else:
metadata[field] = values[-1] # values = [event-param, user-variable]
return metadata
_common_legacy_event_fields = {
# These fields are documented to appear in all Mailgun opened, clicked and unsubscribed events:
'event', 'recipient', 'domain', 'ip', 'country', 'region', 'city', 'user-agent', 'device-type',
'client-type', 'client-name', 'client-os', 'campaign-id', 'campaign-name', 'tag', 'mailing-list',
'timestamp', 'token', 'signature',
# Undocumented, but observed in actual events:
'body-plain', 'h', 'message-id',
}
_known_legacy_event_fields = {
# For all Mailgun event types that *don't* include message-headers,
# map Mailgun (not normalized) event type to set of expected event fields.
# Used for metadata extraction.
'clicked': _common_legacy_event_fields | {'url'},
'opened': _common_legacy_event_fields,
'unsubscribed': _common_legacy_event_fields,
}
class MailgunInboundWebhookView(MailgunBaseWebhookView):
"""Handler for Mailgun inbound (route forward-to-url) webhook"""
signal = inbound
def parse_events(self, request):
if request.content_type == "application/json":
esp_event = json.loads(request.body.decode('utf-8'))
event_type = esp_event.get('event-data', {}).get('event', '')
raise AnymailConfigurationError(
"You seem to have set Mailgun's *%s tracking* webhook "
"to Anymail's Mailgun *inbound* webhook URL. "
"(Or Mailgun has changed inbound events to use json.)"
% event_type)
return [self.esp_to_anymail_event(request)]
def esp_to_anymail_event(self, request):
# Inbound uses the entire Django request as esp_event, because we need POST and FILES.
# Note that request.POST is case-sensitive (unlike email.message.Message headers).
esp_event = request
if request.POST.get('event', 'inbound') != 'inbound':
# (Legacy) tracking event
raise AnymailConfigurationError(
"You seem to have set Mailgun's *%s tracking* webhook "
"to Anymail's Mailgun *inbound* webhook URL." % request.POST['event'])
if 'attachments' in request.POST:
# Inbound route used store() rather than forward().
# ("attachments" seems to be the only POST param that differs between
# store and forward; Anymail could support store by handling the JSON
# attachments param in message_from_mailgun_parsed.)
raise AnymailConfigurationError(
"You seem to have configured Mailgun's receiving route using the store()"
" action. Anymail's inbound webhook requires the forward() action.")
if 'body-mime' in request.POST:
# Raw-MIME
message = AnymailInboundMessage.parse_raw_mime(request.POST['body-mime'])
else:
# Fully-parsed
message = self.message_from_mailgun_parsed(request)
message.envelope_sender = request.POST.get('sender', None)
message.envelope_recipient = request.POST.get('recipient', None)
message.stripped_text = request.POST.get('stripped-text', None)
message.stripped_html = request.POST.get('stripped-html', None)
message.spam_detected = message.get('X-Mailgun-Sflag', 'No').lower() == 'yes'
try:
message.spam_score = float(message['X-Mailgun-Sscore'])
except (TypeError, ValueError):
pass
return AnymailInboundEvent(
event_type=EventType.INBOUND,
timestamp=datetime.fromtimestamp(int(request.POST['timestamp']), tz=timezone.utc),
event_id=request.POST.get('token', None),
esp_event=esp_event,
message=message,
)
def message_from_mailgun_parsed(self, request):
"""Construct a Message from Mailgun's "fully-parsed" fields"""
# Mailgun transcodes all fields to UTF-8 for "fully parsed" messages
try:
attachment_count = int(request.POST['attachment-count'])
except (KeyError, TypeError):
attachments = None
else:
# Load attachments from posted files: attachment-1, attachment-2, etc.
# content-id-map is {content-id: attachment-id}, identifying which files are inline attachments.
# Invert it to {attachment-id: content-id}, while handling potentially duplicate content-ids.
field_to_content_id = json.loads(
request.POST.get('content-id-map', '{}'),
object_pairs_hook=lambda pairs: {att_id: cid for (cid, att_id) in pairs})
attachments = []
for n in range(1, attachment_count+1):
attachment_id = "attachment-%d" % n
try:
file = request.FILES[attachment_id]
except KeyError:
# Django's multipart/form-data handling drops FILES with certain
# filenames (for security) or with empty filenames (Django ticket 15879).
# (To avoid this problem, use Mailgun's "raw MIME" inbound option.)
pass
else:
content_id = field_to_content_id.get(attachment_id)
attachment = AnymailInboundMessage.construct_attachment_from_uploaded_file(
file, content_id=content_id)
attachments.append(attachment)
return AnymailInboundMessage.construct(
headers=json.loads(request.POST['message-headers']), # includes From, To, Cc, Subject, etc.
text=request.POST.get('body-plain', None),
html=request.POST.get('body-html', None),
attachments=attachments,
)