From 4d11234ba50fc9efb22e8ad37534255aad150627 Mon Sep 17 00:00:00 2001
From: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
Date: Thu, 7 Jun 2018 20:42:49 -0400
Subject: [PATCH] =?UTF-8?q?Fix=20error=20when=20trying=20to=20upload=20a?=
 =?UTF-8?q?=20document=20from=20and=20email=20account=20with=20'from'=20an?=
 =?UTF-8?q?d=20'subject'=20metadata.=20Fix=20typo=20on=20message.header=20?=
 =?UTF-8?q?get=20from=20'Suject'=20to=20'Subject'.=20On=20multi=20part=20e?=
 =?UTF-8?q?mails=20keep=20the=20original=20From=20and=20Subject=20properti?=
 =?UTF-8?q?es=20for=20all=20subsequent=20parts=20if=20the=20sub=20parts=20?=
 =?UTF-8?q?don't=20specify=20them.=20Fixes=20issue=20#481.=20Thanks=20to?=
 =?UTF-8?q?=20Robert=20Sch=C3=B6ftner=20@robert.schoeftner=20for=20the=20r?=
 =?UTF-8?q?eport=20and=20debug=20information.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
---
 HISTORY.rst                             |   4 +
 docs/releases/3.0.rst                   |   5 +-
 mayan/apps/sources/models.py            |  63 ++++++++++-----
 mayan/apps/sources/tests/literals.py    |   2 +
 mayan/apps/sources/tests/test_models.py | 100 +++++++++++++++++++++++-
 5 files changed, 154 insertions(+), 20 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index fd010e7f98..30d96d0f3d 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -170,6 +170,10 @@
 - Change the error log file handle class to RotatingFileHandle to avoid an indefinitely growing log file.
 - Disable embedded signatute verification during the perform upgrade command.
 - Replace the DOCUMENTS_LANGUAGE_CHOICES setting option. Replaced with the new DOCUMENTS_LANGUAGE_CODES.
+- Fix error when trying to upload a document from and email account with 'from' and 'subject' metadata.
+- Fix typo on message.header get from 'Suject' to 'Subject'.
+- On multi part emails keep the original From and Subject properties for all subsequent parts if the sub parts don't specify them. Fixes issue #481. Thanks to Robert Schöftner @robert.schoeftner for the report and debug information.
+
 
 
 2.7.3 (2017-09-11)
diff --git a/docs/releases/3.0.rst b/docs/releases/3.0.rst
index aaf2162ee4..ada943f87d 100644
--- a/docs/releases/3.0.rst
+++ b/docs/releases/3.0.rst
@@ -516,6 +516,9 @@ Other changes worth mentioning
   new DOCUMENTS_LANGUAGE_CODES.
 - Reduce default language code choice from 7800 to the top 100 spoken
   languages and related (https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers)
+- Fix error when trying to upload a document from and email account with 'from' and 'subject' metadata.
+- Fix typo on message.header get from 'Suject' to 'Subject'.
+- On multi part emails keep the original From and Subject properties for all subsequent parts if the sub parts don't specify them. Fixes issue #481. Thanks to Robert Schöftner @robert.schoeftner for the report and debug information.
 
 Removals
 --------
@@ -607,7 +610,7 @@ Bugs fixed or issues closed
 * `GitLab issue #467 <https://gitlab.com/mayan-edms/mayan-edms/issues/467>`_ mail attachments without content-disposition are lost
 * `GitLab issue #468 <https://gitlab.com/mayan-edms/mayan-edms/issues/468>`_ plain text e-mails without charset do not work
 * `GitLab issue #470 <https://gitlab.com/mayan-edms/mayan-edms/issues/470>`_ Enable Django variable for HTML encoded emails
-
+* `GitLab issue #481 <https://gitlab.com/mayan-edms/mayan-edms/issues/481>`_ IMAP sources with metadata not working in 3.0rc1
 * `GitHub issue #264 <https://github.com/mayan-edms/mayan-edms/issues/264>`_ migrate fails on document_states 0004_workflow_internal_name
 * `GitHub issue #269 <https://github.com/mayan-edms/mayan-edms/issues/269>`_ Lack of authentication for document previews
 
diff --git a/mayan/apps/sources/models.py b/mayan/apps/sources/models.py
index c6b6ee4de5..8fe8f78078 100644
--- a/mayan/apps/sources/models.py
+++ b/mayan/apps/sources/models.py
@@ -80,6 +80,7 @@ class Source(models.Model):
         Handle an upload request from a file object which may be an individual
         document or a compressed file containing multiple documents.
         """
+        documents = []
         if not document_type:
             document_type = self.document_type
 
@@ -94,16 +95,26 @@ class Source(models.Model):
                 compressed_file = CompressedFile(file_object)
                 for compressed_file_child in compressed_file.children():
                     kwargs.update({'label': force_text(compressed_file_child)})
-                    self.upload_document(
-                        file_object=File(compressed_file_child), **kwargs
+                    documents.append(
+                        self.upload_document(
+                            file_object=File(compressed_file_child), **kwargs
+                        )
                     )
                     compressed_file_child.close()
 
             except NotACompressedFile:
                 logging.debug('Exception: NotACompressedFile')
-                self.upload_document(file_object=file_object, **kwargs)
+                documents.append(
+                    self.upload_document(file_object=file_object, **kwargs)
+                )
         else:
-            self.upload_document(file_object=file_object, **kwargs)
+            documents.append(
+                self.upload_document(file_object=file_object, **kwargs)
+            )
+
+        # Return a list of newly created documents. Used by the email source
+        # to assign the from and subject metadata values.
+        return documents
 
     def get_upload_file_object(self, form_data):
         pass
@@ -549,28 +560,42 @@ class EmailBaseModel(IntervalBaseModel):
                 )
 
     @staticmethod
-    def process_message(source, message_text):
+    def process_message(source, message_text, message_properties=None):
         from flanker import mime
 
         counter = 1
         message = mime.from_string(force_str(message_text))
         metadata_dictionary = {}
 
+        if not message_properties:
+            message_properties = {}
+
+        message_properties['Subject'] = message_properties.get(
+            'Subject', message.headers.get('Subject')
+        )
+
+        message_properties['From'] = message_properties.get(
+            'From', message.headers.get('From')
+        )
+
         if source.subject_metadata_type:
             metadata_dictionary[
                 source.subject_metadata_type.name
-            ] = message.headers.get('Subjet')
+            ] = message_properties.get('Subject')
 
         if source.from_metadata_type:
             metadata_dictionary[
                 source.from_metadata_type.name
-            ] = message.headers.get('From')
+            ] = message_properties.get('From')
 
         # Messages are tree based, do nested processing of message parts until
         # a message with no children is found, then work out way up.
         if message.parts:
             for part in message.parts:
-                EmailBaseModel.process_message(source=source, message_text=part.to_string())
+                EmailBaseModel.process_message(
+                    source=source, message_text=part.to_string(),
+                    message_properties=message_properties
+                )
         else:
             # Treat inlines as attachments, both are extracted and saved as
             # documents
@@ -585,17 +610,18 @@ class EmailBaseModel(IntervalBaseModel):
                             'Got metadata dictionary: %s', metadata_dictionary
                         )
                     else:
-                        document = source.handle_upload(
+                        documents = source.handle_upload(
                             document_type=source.document_type,
                             file_object=file_object, expand=(
                                 source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y
                             )
                         )
                         if metadata_dictionary:
-                            set_bulk_metadata(
-                                document=document,
-                                metadata_dictionary=metadata_dictionary
-                            )
+                            for document in documents:
+                                set_bulk_metadata(
+                                    document=document,
+                                    metadata_dictionary=metadata_dictionary
+                                )
             else:
                 # If it is not an attachment then it should be a body message part.
                 # Another option is to use message.is_body()
@@ -605,16 +631,17 @@ class EmailBaseModel(IntervalBaseModel):
                     label = 'email_body.txt'
 
                 with ContentFile(content=message.body, name=label) as file_object:
-                    document = source.handle_upload(
+                    documents = source.handle_upload(
                         document_type=source.document_type,
                         file_object=file_object,
                         expand=SOURCE_UNCOMPRESS_CHOICE_N
                     )
                     if metadata_dictionary:
-                        set_bulk_metadata(
-                            document=document,
-                            metadata_dictionary=metadata_dictionary
-                        )
+                        for document in documents:
+                            set_bulk_metadata(
+                                document=document,
+                                metadata_dictionary=metadata_dictionary
+                            )
 
 
 class IMAPEmail(EmailBaseModel):
diff --git a/mayan/apps/sources/tests/literals.py b/mayan/apps/sources/tests/literals.py
index e831359995..d9f6556d86 100644
--- a/mayan/apps/sources/tests/literals.py
+++ b/mayan/apps/sources/tests/literals.py
@@ -64,6 +64,8 @@ Content-Disposition: attachment; filename="=?UTF-8?B?QW1wZWxtw6RubmNoZW4udHh0?="
 SGFsbG8gQW1wZWxtw6RubmNoZW4hCg==
 
 --RS1tYWlsIENsaWVudA==--'''
+TEST_EMAIL_BASE64_FILENAME_FROM = 'noreply@example.com'
+TEST_EMAIL_BASE64_FILENAME_SUBJECT = 'Scan to E-mail Server Job'
 TEST_EMAIL_NO_CONTENT_TYPE = '''MIME-Version: 1.0
 Received: by 10.0.0.1 with HTTP; Mon, 9 Apr 2018 00:00:00 -0400 (AST)
 X-Originating-IP: [10.0.0.1]
diff --git a/mayan/apps/sources/tests/test_models.py b/mayan/apps/sources/tests/test_models.py
index 55dcd95a97..66ed8a8e27 100644
--- a/mayan/apps/sources/tests/test_models.py
+++ b/mayan/apps/sources/tests/test_models.py
@@ -14,14 +14,16 @@ from documents.tests import (
     TEST_NON_ASCII_DOCUMENT_FILENAME, TEST_NON_ASCII_DOCUMENT_PATH,
     TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH
 )
+from metadata.models import MetadataType
 
 from ..literals import SOURCE_UNCOMPRESS_CHOICE_Y
 from ..models import (
-    EmailBaseModel, POP3Email, WatchFolderSource, WebFormSource
+    EmailBaseModel, IMAPEmail, POP3Email, WatchFolderSource, WebFormSource
 )
 
 from .literals import (
     TEST_EMAIL_ATTACHMENT_AND_INLINE, TEST_EMAIL_BASE64_FILENAME,
+    TEST_EMAIL_BASE64_FILENAME_FROM, TEST_EMAIL_BASE64_FILENAME_SUBJECT,
     TEST_EMAIL_INLINE_IMAGE, TEST_EMAIL_NO_CONTENT_TYPE,
     TEST_EMAIL_NO_CONTENT_TYPE_STRING
 )
@@ -129,6 +131,102 @@ class EmailFilenameDecodingTestCase(BaseTestCase):
             ),
         )
 
+    def test_decode_email_and_store_from_and_subject_as_metadata(self):
+        metadata_from = MetadataType.objects.create(name='from')
+        metadata_subject = MetadataType.objects.create(name='subject')
+        self.document_type.metadata.create(metadata_type=metadata_from)
+        self.document_type.metadata.create(metadata_type=metadata_subject)
+
+        self._create_email_source()
+        self.source.from_metadata_type = metadata_from
+        self.source.subject_metadata_type = metadata_subject
+        self.source.save()
+
+        EmailBaseModel.process_message(
+            source=self.source, message_text=TEST_EMAIL_BASE64_FILENAME
+        )
+
+        document = Document.objects.first()
+
+        self.assertEqual(
+            document.label, 'Ampelm\xe4nnchen.txt'
+        )
+        self.assertEqual(
+            document.metadata.get(metadata_type=metadata_from).value,
+            TEST_EMAIL_BASE64_FILENAME_FROM
+        )
+        self.assertEqual(
+            document.metadata.get(metadata_type=metadata_subject).value,
+            TEST_EMAIL_BASE64_FILENAME_SUBJECT
+        )
+
+
+@override_settings(OCR_AUTO_OCR=False)
+class IMAPSourceTestCase(BaseTestCase):
+    class MockIMAP4_SSL(object):
+        #def dele(self, which):
+        #    return
+
+        #def getwelcome(self):
+        #    return
+
+        #def list(self, which=None):
+        #    return (None, ['1 test'])
+
+        #def pass_(self, password):
+        #    return
+
+        #def quit(self):
+        #    return
+
+        #def retr(self, which=None):
+        #    return (
+        #        1, [TEST_EMAIL_BASE64_FILENAME]
+        #    )
+
+        def fetch(self, message_set, message_parts):
+            return 'STATUS', '(1 BODY[{}])'.format(TEST_EMAIL_ATTACHMENT_AND_INLINE)
+            #status, data = mailbox.fetch(message_number, '(RFC822)')
+            #EmailBaseModel.process_message(
+            #    source=self, message_text=data[0][1]
+            #)
+            #mailbox.store(message_number, '+FLAGS', '\\Deleted')
+
+        def login(self, username, password):
+            return
+
+        def search(self, charset, *criterion):
+            return (None, ['1'])
+
+        def select(self, mailbox):
+            return
+
+        def user(self, username):
+            return
+
+    def setUp(self):
+        super(IMAPSourceTestCase, self).setUp()
+        self.document_type = DocumentType.objects.create(
+            label=TEST_DOCUMENT_TYPE_LABEL
+        )
+
+    def tearDown(self):
+        self.document_type.delete()
+        super(IMAPSourceTestCase, self).tearDown()
+
+    @mock.patch('imaplib.IMAP4_SSL')
+    def test_download_document(self, mock_imaplib):
+        mock_imaplib.return_value = IMAPSourceTestCase.MockIMAP4_SSL()
+        self.source = IMAPEmail.objects.create(
+            document_type=self.document_type, label='', host='', password='',
+            username=''
+        )
+
+        self.source.check_source()
+        self.assertEqual(
+            Document.objects.first().label, 'Ampelm\xe4nnchen.txt'
+        )
+
 
 @override_settings(OCR_AUTO_OCR=False)
 class POP3SourceTestCase(BaseTestCase):