diff --git a/anymail/utils.py b/anymail/utils.py index a0ccb5e..cabb251 100644 --- a/anymail/utils.py +++ b/anymail/utils.py @@ -398,6 +398,34 @@ def collect_all_methods(cls, method_name): return methods +def querydict_getfirst(qdict, field, default=UNSET): + """Like :func:`django.http.QueryDict.get`, but returns *first* value of multi-valued field. + + >>> from django.http import QueryDict + >>> q = QueryDict('a=1&a=2&a=3') + >>> querydict_getfirst(q, 'a') + '1' + >>> q.get('a') + '3' + >>> q['a'] + '3' + + You can bind this to a QueryDict instance using the "descriptor protocol": + >>> q.getfirst = querydict_getfirst.__get__(q) + >>> q.getfirst('a') + '1' + """ + # (Why not instead define a QueryDict subclass with this method? Because there's no simple way + # to efficiently initialize a QueryDict subclass with the contents of an existing instance.) + values = qdict.getlist(field) + if len(values) > 0: + return values[0] + elif default is not UNSET: + return default + else: + return qdict[field] # raise appropriate KeyError + + EPOCH = datetime(1970, 1, 1, tzinfo=utc) diff --git a/anymail/webhooks/mailgun.py b/anymail/webhooks/mailgun.py index cd3de0f..396a871 100644 --- a/anymail/webhooks/mailgun.py +++ b/anymail/webhooks/mailgun.py @@ -9,7 +9,7 @@ from django.utils.timezone import utc from .base import AnymailBaseWebhookView from ..exceptions import AnymailWebhookValidationFailure from ..signals import tracking, AnymailTrackingEvent, EventType, RejectReason -from ..utils import get_anymail_setting, combine +from ..utils import get_anymail_setting, combine, querydict_getfirst class MailgunBaseWebhookView(AnymailBaseWebhookView): @@ -28,6 +28,8 @@ class MailgunBaseWebhookView(AnymailBaseWebhookView): def validate_request(self, request): super(MailgunBaseWebhookView, self).validate_request(request) # first check basic auth if enabled try: + # Must use the *last* value of these fields if there are conflicting merged user-variables. + # (Fortunately, Django QueryDict is specced to return the last value.) token = request.POST['token'] timestamp = request.POST['timestamp'] signature = str(request.POST['signature']) # force to same type as hexdigest() (for python2) @@ -75,27 +77,31 @@ class MailgunTrackingWebhookView(MailgunBaseWebhookView): def esp_to_anymail_event(self, esp_event): # esp_event is a Django QueryDict (from request.POST), - # which has multi-valued fields, but is *not* case-insensitive + # which has multi-valued fields, but is *not* case-insensitive. + # Because of the way Mailgun merges user-variables into the event, + # we must generally use the *first* value of any multi-valued field + # to avoid potential conflicting user-data. + esp_event.getfirst = querydict_getfirst.__get__(esp_event) - event_type = self.event_types.get(esp_event['event'], EventType.UNKNOWN) - timestamp = datetime.fromtimestamp(int(esp_event['timestamp']), tz=utc) + event_type = self.event_types.get(esp_event.getfirst('event'), EventType.UNKNOWN) + timestamp = datetime.fromtimestamp(int(esp_event['timestamp']), tz=utc) # use *last* value of timestamp # Message-Id is not documented for every event, but seems to always be included. # (It's sometimes spelled as 'message-id', lowercase, and missing the .) - message_id = esp_event.get('Message-Id', esp_event.get('message-id', None)) + message_id = esp_event.getfirst('Message-Id', None) or esp_event.getfirst('message-id', None) if message_id and not message_id.startswith('<'): message_id = "<{}>".format(message_id) - description = esp_event.get('description', None) - mta_response = esp_event.get('error', esp_event.get('notification', None)) + description = esp_event.getfirst('description', None) + mta_response = esp_event.getfirst('error', None) or esp_event.getfirst('notification', None) reject_reason = None try: - mta_status = int(esp_event['code']) + mta_status = int(esp_event.getfirst('code')) except (KeyError, TypeError): pass except ValueError: # RFC-3463 extended SMTP status code (class.subject.detail, where class is "2", "4" or "5") try: - status_class = esp_event['code'].split('.')[0] + status_class = esp_event.getfirst('code').split('.')[0] except (TypeError, IndexError): # illegal SMTP status code format pass @@ -107,37 +113,84 @@ class MailgunTrackingWebhookView(MailgunBaseWebhookView): RejectReason.BOUNCED if 400 <= mta_status < 600 else RejectReason.OTHER) - # Mailgun merges metadata fields with the other event fields. - # However, it also includes the original message headers, - # which have the metadata separately as X-Mailgun-Variables. - try: - headers = json.loads(esp_event['message-headers']) - except (KeyError, ): - metadata = {} - else: - variables = [value for [field, value] in headers - if field == 'X-Mailgun-Variables'] - if len(variables) >= 1: - # Each X-Mailgun-Variables value is JSON. Parse and merge them all into single dict: - metadata = combine(*[json.loads(value) for value in variables]) - else: - metadata = {} + metadata = self._extract_metadata(esp_event) - # tags are sometimes delivered as X-Mailgun-Tag fields, sometimes as tag - tags = esp_event.getlist('tag', esp_event.getlist('X-Mailgun-Tag', [])) + # tags are supposed to be in 'tag' fields, but are sometimes in undocumented X-Mailgun-Tag + tags = esp_event.getlist('tag', None) or esp_event.getlist('X-Mailgun-Tag', []) return AnymailTrackingEvent( event_type=event_type, timestamp=timestamp, message_id=message_id, - event_id=esp_event.get('token', None), - recipient=esp_event.get('recipient', None), + event_id=esp_event.get('token', None), # use *last* value of token + recipient=esp_event.getfirst('recipient', None), reject_reason=reject_reason, description=description, mta_response=mta_response, tags=tags, metadata=metadata, - click_url=esp_event.get('url', None), - user_agent=esp_event.get('user-agent', None), + click_url=esp_event.getfirst('url', None), + user_agent=esp_event.getfirst('user-agent', None), esp_event=esp_event, ) + + def _extract_metadata(self, esp_event): + # Mailgun merges user-variables into the POST fields. If you know which user variable + # you want to retrieve--and it doesn't conflict with a Mailgun event field--that's fine. + # But if you want to extract all user-variables (like we do), it's more complicated... + event_type = esp_event.getfirst('event') + metadata = {} + + if 'message-headers' in esp_event: + # For events where original message headers are available, it's most reliable + # to recover user-variables from the X-Mailgun-Variables header(s). + headers = json.loads(esp_event['message-headers']) + variables = [value for [field, value] in headers if field == 'X-Mailgun-Variables'] + if len(variables) >= 1: + # Each X-Mailgun-Variables value is JSON. Parse and merge them all into single dict: + metadata = combine(*[json.loads(value) for value in variables]) + + elif event_type in self._known_event_fields: + # For other events, we must extract from the POST fields, ignoring known Mailgun + # event parameters, and treating all other values as user-variables. + known_fields = self._known_event_fields[event_type] + for field, values in esp_event.lists(): + if field not in known_fields: + # Unknown fields are assumed to be user-variables. (There should really only be + # a single value, but just in case take the last one to match QueryDict semantics.) + metadata[field] = values[-1] + elif field == 'tag': + # There's no way to distinguish a user-variable named 'tag' from an actual tag, + # so don't treat this/these value(s) as metadata. + pass + elif len(values) == 1: + # This is an expected event parameter, and since there's only a single value + # it must be the event param, not metadata. + pass + else: + # This is an expected event parameter, but there are (at least) two values. + # One is the event param, and the other is a user-variable metadata value. + # Which is which depends on the field: + if field in {'signature', 'timestamp', 'token'}: + metadata[field] = values[0] # values = [user-variable, event-param] + else: + metadata[field] = values[-1] # values = [event-param, user-variable] + + return metadata + + _common_event_fields = { + # These fields are documented to appear in all Mailgun opened, clicked and unsubscribed events: + 'event', 'recipient', 'domain', 'ip', 'country', 'region', 'city', 'user-agent', 'device-type', + 'client-type', 'client-name', 'client-os', 'campaign-id', 'campaign-name', 'tag', 'mailing-list', + 'timestamp', 'token', 'signature', + # Undocumented, but observed in actual events: + 'body-plain', 'h', 'message-id', + } + _known_event_fields = { + # For all Mailgun event types that *don't* include message-headers, + # map Mailgun (not normalized) event type to set of expected event fields. + # Used for metadata extraction. + 'clicked': _common_event_fields | {'url'}, + 'opened': _common_event_fields, + 'unsubscribed': _common_event_fields, + } diff --git a/docs/esps/mailgun.rst b/docs/esps/mailgun.rst index a9c1f0e..a5395d4 100644 --- a/docs/esps/mailgun.rst +++ b/docs/esps/mailgun.rst @@ -126,6 +126,20 @@ values directly to Mailgun. You can use any of the (non-file) parameters listed .. _Mailgun sending docs: https://documentation.mailgun.com/api-sending.html#sending +.. _mailgun-quirks: + +Limitations and quirks +---------------------- + +**Metadata keys and tracking webhooks** + Because of the way Mailgun supplies custom data (user-variables) to webhooks, + there are a few metadata keys that Anymail cannot reliably retrieve in some + tracking events. You should avoid using "body-plain", "h", "message-headers", + "message-id" or "tag" as :attr:`~anymail.message.AnymailMessage.metadata` keys + if you need to access that metadata from an opened, clicked, or unsubscribed + :ref:`tracking event ` handler. + + .. _mailgun-templates: Batch sending/merge and ESP templates diff --git a/tests/test_mailgun_webhooks.py b/tests/test_mailgun_webhooks.py index 7a5fc44..00977fb 100644 --- a/tests/test_mailgun_webhooks.py +++ b/tests/test_mailgun_webhooks.py @@ -225,14 +225,15 @@ class MailgunDeliveryTestCase(WebhookTestCase): self.assertEqual(event.reject_reason, "bounced") self.assertIn("RecipNotFound", event.mta_response) - def test_metadata(self): + def test_metadata_message_headers(self): # Metadata fields are interspersed with other data, but also in message-headers + # for delivered, bounced and dropped events raw_event = mailgun_sign({ 'event': 'delivered', 'message-headers': json.dumps([ ["X-Mailgun-Variables", "{\"custom1\": \"value1\", \"custom2\": \"{\\\"key\\\":\\\"value\\\"}\"}"], ]), - 'custom1': 'value', + 'custom1': 'value1', 'custom2': '{"key":"value"}', # you can store JSON, but you'll need to unpack it yourself }) self.client.post('/anymail/mailgun/tracking/', data=raw_event) @@ -240,6 +241,68 @@ class MailgunDeliveryTestCase(WebhookTestCase): event = kwargs['event'] self.assertEqual(event.metadata, {"custom1": "value1", "custom2": '{"key":"value"}'}) + def test_metadata_post_fields(self): + # Metadata fields are only interspersed with other event params + # for opened, clicked, unsubscribed events + raw_event = mailgun_sign({ + 'event': 'clicked', + 'custom1': 'value1', + 'custom2': '{"key":"value"}', # you can store JSON, but you'll need to unpack it yourself + }) + self.client.post('/anymail/mailgun/tracking/', data=raw_event) + kwargs = self.assert_handler_called_once_with(self.tracking_handler) + event = kwargs['event'] + self.assertEqual(event.metadata, {"custom1": "value1", "custom2": '{"key":"value"}'}) + + def test_metadata_key_conflicts(self): + # If you happen to name metadata (user-variable) keys the same as Mailgun + # event properties, Mailgun will include both in the webhook post. + # Make sure we don't confuse them. + metadata = { + "event": "metadata-event", + "recipient": "metadata-recipient", + "signature": "metadata-signature", + "timestamp": "metadata-timestamp", + "token": "metadata-token", + "ordinary field": "ordinary metadata value", + } + + raw_event = mailgun_sign({ + 'event': 'clicked', + 'recipient': 'actual-recipient@example.com', + 'token': 'actual-event-token', + 'timestamp': '1461261330', + 'url': 'http://clicked.example.com/actual/event/param', + 'h': "an (undocumented) Mailgun event param", + 'tag': ["actual-tag-1", "actual-tag-2"], + }) + + # Simulate how Mailgun merges user-variables fields into event: + for key in metadata.keys(): + if key in raw_event: + if key in {'signature', 'timestamp', 'token'}: + # For these fields, Mailgun's value appears after the metadata value + raw_event[key] = [metadata[key], raw_event[key]] + elif key == 'message-headers': + pass # Mailgun won't merge this field into the event + else: + # For all other fields, the defined event value comes first + raw_event[key] = [raw_event[key], metadata[key]] + else: + raw_event[key] = metadata[key] + + response = self.client.post('/anymail/mailgun/tracking/', data=raw_event) + self.assertEqual(response.status_code, 200) # if this fails, signature checking is using metadata values + + kwargs = self.assert_handler_called_once_with(self.tracking_handler) + event = kwargs['event'] + self.assertEqual(event.event_type, "clicked") + self.assertEqual(event.recipient, "actual-recipient@example.com") + self.assertEqual(event.timestamp.isoformat(), "2016-04-21T17:55:30+00:00") + self.assertEqual(event.event_id, "actual-event-token") + self.assertEqual(event.tags, ["actual-tag-1", "actual-tag-2"]) + self.assertEqual(event.metadata, metadata) + def test_tags(self): # Most events include multiple 'tag' fields for message's tags raw_event = mailgun_sign({ diff --git a/tests/test_utils.py b/tests/test_utils.py index b6c9f6e..507b355 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,6 +4,7 @@ import base64 from unittest import skipIf import six +from django.http import QueryDict from django.test import SimpleTestCase, RequestFactory, override_settings from django.utils.translation import ugettext_lazy @@ -22,7 +23,7 @@ from anymail.utils import ( parse_address_list, EmailAddress, is_lazy, force_non_lazy, force_non_lazy_dict, force_non_lazy_list, update_deep, - get_request_uri, get_request_basic_auth, parse_rfc2822date) + get_request_uri, get_request_basic_auth, parse_rfc2822date, querydict_getfirst) class ParseAddressListTests(SimpleTestCase): @@ -299,6 +300,21 @@ class RequestUtilsTests(SimpleTestCase): "https://user:pass@secret.example.com:8989/path/to/?query") +class QueryDictUtilsTests(SimpleTestCase): + def test_querydict_getfirst(self): + q = QueryDict("a=one&a=two&a=three") + q.getfirst = querydict_getfirst.__get__(q) + self.assertEqual(q.getfirst('a'), "one") + + # missing key exception: + with self.assertRaisesMessage(KeyError, "not a key"): + q.getfirst("not a key") + + # defaults: + self.assertEqual(q.getfirst('not a key', "beta"), "beta") + self.assertIsNone(q.getfirst('not a key', None)) + + class ParseRFC2822DateTests(SimpleTestCase): def test_with_timezones(self): dt = parse_rfc2822date("Tue, 24 Oct 2017 10:11:35 -0700")