Fix error when trying to upload a document from and email account with 'from' and 'subject' metadata. Fix typo on message.header get from 'Suject' to 'Subject'. On multi part emails keep the original From and Subject properties for all subsequent parts if the sub parts don't specify them. Fixes issue #481. Thanks to Robert Schöftner @robert.schoeftner for the report and debug information.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2018-06-07 20:42:49 -04:00
parent 76513e7b38
commit 4d11234ba5
5 changed files with 154 additions and 20 deletions

View File

@@ -170,6 +170,10 @@
- Change the error log file handle class to RotatingFileHandle to avoid an indefinitely growing log file.
- Disable embedded signatute verification during the perform upgrade command.
- Replace the DOCUMENTS_LANGUAGE_CHOICES setting option. Replaced with the new DOCUMENTS_LANGUAGE_CODES.
- Fix error when trying to upload a document from and email account with 'from' and 'subject' metadata.
- Fix typo on message.header get from 'Suject' to 'Subject'.
- On multi part emails keep the original From and Subject properties for all subsequent parts if the sub parts don't specify them. Fixes issue #481. Thanks to Robert Schöftner @robert.schoeftner for the report and debug information.
2.7.3 (2017-09-11)

View File

@@ -516,6 +516,9 @@ Other changes worth mentioning
new DOCUMENTS_LANGUAGE_CODES.
- Reduce default language code choice from 7800 to the top 100 spoken
languages and related (https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers)
- Fix error when trying to upload a document from and email account with 'from' and 'subject' metadata.
- Fix typo on message.header get from 'Suject' to 'Subject'.
- On multi part emails keep the original From and Subject properties for all subsequent parts if the sub parts don't specify them. Fixes issue #481. Thanks to Robert Schöftner @robert.schoeftner for the report and debug information.
Removals
--------
@@ -607,7 +610,7 @@ Bugs fixed or issues closed
* `GitLab issue #467 <https://gitlab.com/mayan-edms/mayan-edms/issues/467>`_ mail attachments without content-disposition are lost
* `GitLab issue #468 <https://gitlab.com/mayan-edms/mayan-edms/issues/468>`_ plain text e-mails without charset do not work
* `GitLab issue #470 <https://gitlab.com/mayan-edms/mayan-edms/issues/470>`_ Enable Django variable for HTML encoded emails
* `GitLab issue #481 <https://gitlab.com/mayan-edms/mayan-edms/issues/481>`_ IMAP sources with metadata not working in 3.0rc1
* `GitHub issue #264 <https://github.com/mayan-edms/mayan-edms/issues/264>`_ migrate fails on document_states 0004_workflow_internal_name
* `GitHub issue #269 <https://github.com/mayan-edms/mayan-edms/issues/269>`_ Lack of authentication for document previews

View File

@@ -80,6 +80,7 @@ class Source(models.Model):
Handle an upload request from a file object which may be an individual
document or a compressed file containing multiple documents.
"""
documents = []
if not document_type:
document_type = self.document_type
@@ -94,16 +95,26 @@ class Source(models.Model):
compressed_file = CompressedFile(file_object)
for compressed_file_child in compressed_file.children():
kwargs.update({'label': force_text(compressed_file_child)})
self.upload_document(
file_object=File(compressed_file_child), **kwargs
documents.append(
self.upload_document(
file_object=File(compressed_file_child), **kwargs
)
)
compressed_file_child.close()
except NotACompressedFile:
logging.debug('Exception: NotACompressedFile')
self.upload_document(file_object=file_object, **kwargs)
documents.append(
self.upload_document(file_object=file_object, **kwargs)
)
else:
self.upload_document(file_object=file_object, **kwargs)
documents.append(
self.upload_document(file_object=file_object, **kwargs)
)
# Return a list of newly created documents. Used by the email source
# to assign the from and subject metadata values.
return documents
def get_upload_file_object(self, form_data):
pass
@@ -549,28 +560,42 @@ class EmailBaseModel(IntervalBaseModel):
)
@staticmethod
def process_message(source, message_text):
def process_message(source, message_text, message_properties=None):
from flanker import mime
counter = 1
message = mime.from_string(force_str(message_text))
metadata_dictionary = {}
if not message_properties:
message_properties = {}
message_properties['Subject'] = message_properties.get(
'Subject', message.headers.get('Subject')
)
message_properties['From'] = message_properties.get(
'From', message.headers.get('From')
)
if source.subject_metadata_type:
metadata_dictionary[
source.subject_metadata_type.name
] = message.headers.get('Subjet')
] = message_properties.get('Subject')
if source.from_metadata_type:
metadata_dictionary[
source.from_metadata_type.name
] = message.headers.get('From')
] = message_properties.get('From')
# Messages are tree based, do nested processing of message parts until
# a message with no children is found, then work out way up.
if message.parts:
for part in message.parts:
EmailBaseModel.process_message(source=source, message_text=part.to_string())
EmailBaseModel.process_message(
source=source, message_text=part.to_string(),
message_properties=message_properties
)
else:
# Treat inlines as attachments, both are extracted and saved as
# documents
@@ -585,17 +610,18 @@ class EmailBaseModel(IntervalBaseModel):
'Got metadata dictionary: %s', metadata_dictionary
)
else:
document = source.handle_upload(
documents = source.handle_upload(
document_type=source.document_type,
file_object=file_object, expand=(
source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y
)
)
if metadata_dictionary:
set_bulk_metadata(
document=document,
metadata_dictionary=metadata_dictionary
)
for document in documents:
set_bulk_metadata(
document=document,
metadata_dictionary=metadata_dictionary
)
else:
# If it is not an attachment then it should be a body message part.
# Another option is to use message.is_body()
@@ -605,16 +631,17 @@ class EmailBaseModel(IntervalBaseModel):
label = 'email_body.txt'
with ContentFile(content=message.body, name=label) as file_object:
document = source.handle_upload(
documents = source.handle_upload(
document_type=source.document_type,
file_object=file_object,
expand=SOURCE_UNCOMPRESS_CHOICE_N
)
if metadata_dictionary:
set_bulk_metadata(
document=document,
metadata_dictionary=metadata_dictionary
)
for document in documents:
set_bulk_metadata(
document=document,
metadata_dictionary=metadata_dictionary
)
class IMAPEmail(EmailBaseModel):

View File

@@ -64,6 +64,8 @@ Content-Disposition: attachment; filename="=?UTF-8?B?QW1wZWxtw6RubmNoZW4udHh0?="
SGFsbG8gQW1wZWxtw6RubmNoZW4hCg==
--RS1tYWlsIENsaWVudA==--'''
TEST_EMAIL_BASE64_FILENAME_FROM = 'noreply@example.com'
TEST_EMAIL_BASE64_FILENAME_SUBJECT = 'Scan to E-mail Server Job'
TEST_EMAIL_NO_CONTENT_TYPE = '''MIME-Version: 1.0
Received: by 10.0.0.1 with HTTP; Mon, 9 Apr 2018 00:00:00 -0400 (AST)
X-Originating-IP: [10.0.0.1]

View File

@@ -14,14 +14,16 @@ from documents.tests import (
TEST_NON_ASCII_DOCUMENT_FILENAME, TEST_NON_ASCII_DOCUMENT_PATH,
TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH
)
from metadata.models import MetadataType
from ..literals import SOURCE_UNCOMPRESS_CHOICE_Y
from ..models import (
EmailBaseModel, POP3Email, WatchFolderSource, WebFormSource
EmailBaseModel, IMAPEmail, POP3Email, WatchFolderSource, WebFormSource
)
from .literals import (
TEST_EMAIL_ATTACHMENT_AND_INLINE, TEST_EMAIL_BASE64_FILENAME,
TEST_EMAIL_BASE64_FILENAME_FROM, TEST_EMAIL_BASE64_FILENAME_SUBJECT,
TEST_EMAIL_INLINE_IMAGE, TEST_EMAIL_NO_CONTENT_TYPE,
TEST_EMAIL_NO_CONTENT_TYPE_STRING
)
@@ -129,6 +131,102 @@ class EmailFilenameDecodingTestCase(BaseTestCase):
),
)
def test_decode_email_and_store_from_and_subject_as_metadata(self):
metadata_from = MetadataType.objects.create(name='from')
metadata_subject = MetadataType.objects.create(name='subject')
self.document_type.metadata.create(metadata_type=metadata_from)
self.document_type.metadata.create(metadata_type=metadata_subject)
self._create_email_source()
self.source.from_metadata_type = metadata_from
self.source.subject_metadata_type = metadata_subject
self.source.save()
EmailBaseModel.process_message(
source=self.source, message_text=TEST_EMAIL_BASE64_FILENAME
)
document = Document.objects.first()
self.assertEqual(
document.label, 'Ampelm\xe4nnchen.txt'
)
self.assertEqual(
document.metadata.get(metadata_type=metadata_from).value,
TEST_EMAIL_BASE64_FILENAME_FROM
)
self.assertEqual(
document.metadata.get(metadata_type=metadata_subject).value,
TEST_EMAIL_BASE64_FILENAME_SUBJECT
)
@override_settings(OCR_AUTO_OCR=False)
class IMAPSourceTestCase(BaseTestCase):
class MockIMAP4_SSL(object):
#def dele(self, which):
# return
#def getwelcome(self):
# return
#def list(self, which=None):
# return (None, ['1 test'])
#def pass_(self, password):
# return
#def quit(self):
# return
#def retr(self, which=None):
# return (
# 1, [TEST_EMAIL_BASE64_FILENAME]
# )
def fetch(self, message_set, message_parts):
return 'STATUS', '(1 BODY[{}])'.format(TEST_EMAIL_ATTACHMENT_AND_INLINE)
#status, data = mailbox.fetch(message_number, '(RFC822)')
#EmailBaseModel.process_message(
# source=self, message_text=data[0][1]
#)
#mailbox.store(message_number, '+FLAGS', '\\Deleted')
def login(self, username, password):
return
def search(self, charset, *criterion):
return (None, ['1'])
def select(self, mailbox):
return
def user(self, username):
return
def setUp(self):
super(IMAPSourceTestCase, self).setUp()
self.document_type = DocumentType.objects.create(
label=TEST_DOCUMENT_TYPE_LABEL
)
def tearDown(self):
self.document_type.delete()
super(IMAPSourceTestCase, self).tearDown()
@mock.patch('imaplib.IMAP4_SSL')
def test_download_document(self, mock_imaplib):
mock_imaplib.return_value = IMAPSourceTestCase.MockIMAP4_SSL()
self.source = IMAPEmail.objects.create(
document_type=self.document_type, label='', host='', password='',
username=''
)
self.source.check_source()
self.assertEqual(
Document.objects.first().label, 'Ampelm\xe4nnchen.txt'
)
@override_settings(OCR_AUTO_OCR=False)
class POP3SourceTestCase(BaseTestCase):