diff --git a/HISTORY.rst b/HISTORY.rst index b8061c8540..0c201ef1c6 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -20,6 +20,9 @@ trash. * Remove the INSTALLED_APPS setting. Replace it with the new COMMON_EXTRA_APPS and COMMON_DISABLED_APPS. +* Improve email metadata support. Can now work on + email with nested parts. Also the metadata.yaml + attachment no longer needs to be the first attachment. 3.2.3 (2019-06-21) ================== diff --git a/docs/releases/3.2.4.rst b/docs/releases/3.2.4.rst index 8abc58cbc3..25b0277f98 100644 --- a/docs/releases/3.2.4.rst +++ b/docs/releases/3.2.4.rst @@ -34,6 +34,9 @@ Changes trash. - Remove the INSTALLED_APPS setting. Replace it with the new COMMON_EXTRA_APPS and COMMON_DISABLED_APPS. +- Improve email metadata support. Can now work on + email with nested parts. Also the metadata.yaml + attachment no longer needs to be the first attachment. Removals -------- @@ -53,7 +56,7 @@ Remove deprecated requirements:: Type in the console:: - $ pip install mayan-edms==3.2.3 + $ pip install mayan-edms==3.2.4 the requirements will also be updated automatically. diff --git a/mayan/apps/sources/models/email_sources.py b/mayan/apps/sources/models/email_sources.py index ca8b13846b..4c07219088 100644 --- a/mayan/apps/sources/models/email_sources.py +++ b/mayan/apps/sources/models/email_sources.py @@ -16,6 +16,7 @@ from django.db import models from django.utils.encoding import force_bytes from django.utils.translation import ugettext_lazy as _ +from mayan.apps.documents.models import Document from mayan.apps.metadata.api import set_bulk_metadata from mayan.apps.metadata.models import MetadataType @@ -54,8 +55,7 @@ class EmailBaseModel(IntervalBaseModel): help_text=_( 'Name of the attachment that will contains the metadata type ' 'names and value pairs to be assigned to the rest of the ' - 'downloaded attachments. Note: This attachment has to be the ' - 'first attachment.' + 'downloaded attachments.' ), max_length=128, verbose_name=_('Metadata attachment name') ) subject_metadata_type = models.ForeignKey( @@ -85,52 +85,61 @@ class EmailBaseModel(IntervalBaseModel): verbose_name_plural = _('Email sources') @staticmethod - def process_message(source, message_text, message_properties=None): + def process_message(source, message_text): from flanker import mime - counter = 1 - message = mime.from_string(force_bytes(message_text)) metadata_dictionary = {} - if not message_properties: - message_properties = {} - - message_properties['Subject'] = message_properties.get( - 'Subject', message.headers.get('Subject') - ) - - message_properties['From'] = message_properties.get( - 'From', message.headers.get('From') - ) - - if source.subject_metadata_type: - metadata_dictionary[ - source.subject_metadata_type.name - ] = message_properties.get('Subject') + message = mime.from_string(force_bytes(message_text)) if source.from_metadata_type: metadata_dictionary[ source.from_metadata_type.name - ] = message_properties.get('From') + ] = message.headers.get('From') + + if source.subject_metadata_type: + metadata_dictionary[ + source.subject_metadata_type.name + ] = message.headers.get('Subject') + + document_ids, parts_metadata_dictionary = EmailBaseModel._process_message(source=source, message=message) + + metadata_dictionary.update(parts_metadata_dictionary) + + if metadata_dictionary: + for document in Document.objects.filter(id__in=document_ids): + set_bulk_metadata( + document=document, + metadata_dictionary=metadata_dictionary + ) + + @staticmethod + def _process_message(source, message): + counter = 1 + document_ids = [] + metadata_dictionary = {} # Messages are tree based, do nested processing of message parts until # a message with no children is found, then work out way up. if message.parts: for part in message.parts: - EmailBaseModel.process_message( - source=source, message_text=part.to_string(), - message_properties=message_properties + part_document_ids, part_metadata_dictionary = EmailBaseModel._process_message( + source=source, message=part, ) + + document_ids.extend(part_document_ids) + metadata_dictionary.update(part_metadata_dictionary) else: # Treat inlines as attachments, both are extracted and saved as # documents if message.is_attachment() or message.is_inline(): - # Reject zero length attachments if len(message.body) == 0: - return + return document_ids, metadata_dictionary label = message.detected_file_name or 'attachment-{}'.format(counter) + counter = counter + 1 + with ContentFile(content=message.body, name=label) as file_object: if label == source.metadata_attachment_name: metadata_dictionary = yaml.load( @@ -147,12 +156,10 @@ class EmailBaseModel(IntervalBaseModel): source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y ) ) - if metadata_dictionary: - for document in documents: - set_bulk_metadata( - document=document, - metadata_dictionary=metadata_dictionary - ) + + for document in documents: + document_ids.append(document.pk) + else: # If it is not an attachment then it should be a body message part. # Another option is to use message.is_body() @@ -168,12 +175,11 @@ class EmailBaseModel(IntervalBaseModel): expand=SOURCE_UNCOMPRESS_CHOICE_N, file_object=file_object ) - if metadata_dictionary: - for document in documents: - set_bulk_metadata( - document=document, - metadata_dictionary=metadata_dictionary - ) + + for document in documents: + document_ids.append(document.pk) + + return document_ids, metadata_dictionary def clean(self): if self.subject_metadata_type: diff --git a/mayan/apps/sources/tests/test_models.py b/mayan/apps/sources/tests/test_models.py index 0d7fcdc624..9288ff57be 100644 --- a/mayan/apps/sources/tests/test_models.py +++ b/mayan/apps/sources/tests/test_models.py @@ -6,7 +6,13 @@ import shutil import mock from pathlib2 import Path +import yaml +try: + from yaml import CSafeDumper as SafeDumper +except ImportError: + from yaml import SafeDumper +from django.core import mail from django.utils.encoding import force_text from mayan.apps.documents.models import Document @@ -190,6 +196,72 @@ class EmailBaseTestCase(GenericDocumentTestCase): # Only two attachments and a body document self.assertEqual(2, Document.objects.count()) + def test_metadata_yaml_attachment(self): + TEST_METADATA_VALUE_1 = 'test value 1' + TEST_METADATA_VALUE_2 = 'test value 2' + + test_metadata_type_1 = MetadataType.objects.create( + name='test_metadata_type_1' + ) + test_metadata_type_2 = MetadataType.objects.create( + name='test_metadata_type_2' + ) + self.test_document_type.metadata.create( + metadata_type=test_metadata_type_1 + ) + self.test_document_type.metadata.create( + metadata_type=test_metadata_type_2 + ) + + test_metadata_yaml = yaml.dump( + Dumper=SafeDumper, data={ + test_metadata_type_1.name: TEST_METADATA_VALUE_1, + test_metadata_type_2.name: TEST_METADATA_VALUE_2, + } + ) + + # Create email with a test attachment first, then the metadata.yaml + # attachment + with mail.get_connection( + backend='django.core.mail.backends.locmem.EmailBackend' + ) as connection: + email_message = mail.EmailMultiAlternatives( + body='test email body', connection=connection, + subject='test email subject', to=['test@example.com'], + ) + + email_message.attach( + filename='test_attachment', + content='test_content', + ) + + email_message.attach( + filename='metadata.yaml', + content=test_metadata_yaml, + ) + + email_message.send() + + self._create_email_source() + self.source.store_body = True + self.source.save() + + EmailBaseModel.process_message( + source=self.source, message_text=mail.outbox[0].message() + ) + + self.assertEqual(Document.objects.count(), 2) + + for document in Document.objects.all(): + self.assertEqual( + document.metadata.get(metadata_type=test_metadata_type_1).value, + TEST_METADATA_VALUE_1 + ) + self.assertEqual( + document.metadata.get(metadata_type=test_metadata_type_2).value, + TEST_METADATA_VALUE_2 + ) + class IMAPSourceTestCase(GenericDocumentTestCase): auto_upload_document = False