mirror of
https://github.com/pacnpal/django-anymail.git
synced 2025-12-20 03:41:05 -05:00
Inbound: fix charset handling in .text, .html, .get_content_text()
Make `AnymailInboundMessage.text`, `.html` and `.get_content_text()` usually do the right thing for non-UTF-8 messages/attachments. Fixes an incorrect UnicodeDecodeError when receiving an (e.g.,) ISO-8859-1 encoded message, and improves handling for inbound messages that were not properly encoded by the sender. * Decode using the message's (or attachments's) declared charset by default (rather than always defaulting to 'utf-8'; you can still override with `get_content_text(charset=...)` * Add `errors` param to `get_content_text()`, defaulting to 'replace'. Mis-encoded messages will now use the Unicode replacement character rather than raising errors. (Use `get_content_text(errors='strict')` for the previous behavior.)
This commit is contained in:
@@ -199,9 +199,7 @@ class AnymailInboundMessage(Message, object): # `object` ensures new-style clas
|
||||
# should themselves be AnymailInboundMessage.
|
||||
for part in self.walk():
|
||||
if part.get_content_type() == content_type and not part.is_attachment():
|
||||
payload = part.get_payload(decode=True)
|
||||
if payload is not None:
|
||||
return payload.decode('utf-8')
|
||||
return part.get_content_text()
|
||||
return None
|
||||
|
||||
# Backport from Python 3.5 email.message.Message
|
||||
@@ -238,7 +236,7 @@ class AnymailInboundMessage(Message, object): # `object` ensures new-style clas
|
||||
"(perhaps you want as_bytes()?)")
|
||||
return self.get_payload(decode=True)
|
||||
|
||||
def get_content_text(self, charset='utf-8'):
|
||||
def get_content_text(self, charset=None, errors=None):
|
||||
"""Return the payload decoded to text"""
|
||||
maintype = self.get_content_maintype()
|
||||
if maintype == 'message':
|
||||
@@ -252,7 +250,13 @@ class AnymailInboundMessage(Message, object): # `object` ensures new-style clas
|
||||
# and it's not clear which one is the "content".
|
||||
raise ValueError("get_content_text() is not valid on multipart messages "
|
||||
"(perhaps you want as_string()?)")
|
||||
return self.get_payload(decode=True).decode(charset)
|
||||
else:
|
||||
payload = self.get_payload(decode=True)
|
||||
if payload is None:
|
||||
return payload
|
||||
charset = charset or self.get_content_charset('US-ASCII')
|
||||
errors = errors or 'replace'
|
||||
return payload.decode(charset, errors=errors)
|
||||
|
||||
def as_uploaded_file(self):
|
||||
"""Return the attachment converted to a Django UploadedFile"""
|
||||
|
||||
Reference in New Issue
Block a user