Improve email metadata support
The feature can now work on emails with nested parts. Also the metadata.yaml attachment no longer needs to be the first attachment. Signed-off-by: Roberto Rosario <roberto.rosario@mayan-edms.com>
This commit is contained in:
@@ -20,6 +20,9 @@
|
||||
trash.
|
||||
* Remove the INSTALLED_APPS setting. Replace it with
|
||||
the new COMMON_EXTRA_APPS and COMMON_DISABLED_APPS.
|
||||
* Improve email metadata support. Can now work on
|
||||
email with nested parts. Also the metadata.yaml
|
||||
attachment no longer needs to be the first attachment.
|
||||
|
||||
3.2.3 (2019-06-21)
|
||||
==================
|
||||
|
||||
@@ -34,6 +34,9 @@ Changes
|
||||
trash.
|
||||
- Remove the INSTALLED_APPS setting. Replace it with
|
||||
the new COMMON_EXTRA_APPS and COMMON_DISABLED_APPS.
|
||||
- Improve email metadata support. Can now work on
|
||||
email with nested parts. Also the metadata.yaml
|
||||
attachment no longer needs to be the first attachment.
|
||||
|
||||
Removals
|
||||
--------
|
||||
@@ -53,7 +56,7 @@ Remove deprecated requirements::
|
||||
|
||||
Type in the console::
|
||||
|
||||
$ pip install mayan-edms==3.2.3
|
||||
$ pip install mayan-edms==3.2.4
|
||||
|
||||
the requirements will also be updated automatically.
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from django.db import models
|
||||
from django.utils.encoding import force_bytes
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from mayan.apps.documents.models import Document
|
||||
from mayan.apps.metadata.api import set_bulk_metadata
|
||||
from mayan.apps.metadata.models import MetadataType
|
||||
|
||||
@@ -54,8 +55,7 @@ class EmailBaseModel(IntervalBaseModel):
|
||||
help_text=_(
|
||||
'Name of the attachment that will contains the metadata type '
|
||||
'names and value pairs to be assigned to the rest of the '
|
||||
'downloaded attachments. Note: This attachment has to be the '
|
||||
'first attachment.'
|
||||
'downloaded attachments.'
|
||||
), max_length=128, verbose_name=_('Metadata attachment name')
|
||||
)
|
||||
subject_metadata_type = models.ForeignKey(
|
||||
@@ -85,52 +85,61 @@ class EmailBaseModel(IntervalBaseModel):
|
||||
verbose_name_plural = _('Email sources')
|
||||
|
||||
@staticmethod
|
||||
def process_message(source, message_text, message_properties=None):
|
||||
def process_message(source, message_text):
|
||||
from flanker import mime
|
||||
|
||||
counter = 1
|
||||
message = mime.from_string(force_bytes(message_text))
|
||||
metadata_dictionary = {}
|
||||
|
||||
if not message_properties:
|
||||
message_properties = {}
|
||||
|
||||
message_properties['Subject'] = message_properties.get(
|
||||
'Subject', message.headers.get('Subject')
|
||||
)
|
||||
|
||||
message_properties['From'] = message_properties.get(
|
||||
'From', message.headers.get('From')
|
||||
)
|
||||
|
||||
if source.subject_metadata_type:
|
||||
metadata_dictionary[
|
||||
source.subject_metadata_type.name
|
||||
] = message_properties.get('Subject')
|
||||
message = mime.from_string(force_bytes(message_text))
|
||||
|
||||
if source.from_metadata_type:
|
||||
metadata_dictionary[
|
||||
source.from_metadata_type.name
|
||||
] = message_properties.get('From')
|
||||
] = message.headers.get('From')
|
||||
|
||||
if source.subject_metadata_type:
|
||||
metadata_dictionary[
|
||||
source.subject_metadata_type.name
|
||||
] = message.headers.get('Subject')
|
||||
|
||||
document_ids, parts_metadata_dictionary = EmailBaseModel._process_message(source=source, message=message)
|
||||
|
||||
metadata_dictionary.update(parts_metadata_dictionary)
|
||||
|
||||
if metadata_dictionary:
|
||||
for document in Document.objects.filter(id__in=document_ids):
|
||||
set_bulk_metadata(
|
||||
document=document,
|
||||
metadata_dictionary=metadata_dictionary
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _process_message(source, message):
|
||||
counter = 1
|
||||
document_ids = []
|
||||
metadata_dictionary = {}
|
||||
|
||||
# Messages are tree based, do nested processing of message parts until
|
||||
# a message with no children is found, then work out way up.
|
||||
if message.parts:
|
||||
for part in message.parts:
|
||||
EmailBaseModel.process_message(
|
||||
source=source, message_text=part.to_string(),
|
||||
message_properties=message_properties
|
||||
part_document_ids, part_metadata_dictionary = EmailBaseModel._process_message(
|
||||
source=source, message=part,
|
||||
)
|
||||
|
||||
document_ids.extend(part_document_ids)
|
||||
metadata_dictionary.update(part_metadata_dictionary)
|
||||
else:
|
||||
# Treat inlines as attachments, both are extracted and saved as
|
||||
# documents
|
||||
if message.is_attachment() or message.is_inline():
|
||||
|
||||
# Reject zero length attachments
|
||||
if len(message.body) == 0:
|
||||
return
|
||||
return document_ids, metadata_dictionary
|
||||
|
||||
label = message.detected_file_name or 'attachment-{}'.format(counter)
|
||||
counter = counter + 1
|
||||
|
||||
with ContentFile(content=message.body, name=label) as file_object:
|
||||
if label == source.metadata_attachment_name:
|
||||
metadata_dictionary = yaml.load(
|
||||
@@ -147,12 +156,10 @@ class EmailBaseModel(IntervalBaseModel):
|
||||
source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y
|
||||
)
|
||||
)
|
||||
if metadata_dictionary:
|
||||
for document in documents:
|
||||
set_bulk_metadata(
|
||||
document=document,
|
||||
metadata_dictionary=metadata_dictionary
|
||||
)
|
||||
|
||||
for document in documents:
|
||||
document_ids.append(document.pk)
|
||||
|
||||
else:
|
||||
# If it is not an attachment then it should be a body message part.
|
||||
# Another option is to use message.is_body()
|
||||
@@ -168,12 +175,11 @@ class EmailBaseModel(IntervalBaseModel):
|
||||
expand=SOURCE_UNCOMPRESS_CHOICE_N,
|
||||
file_object=file_object
|
||||
)
|
||||
if metadata_dictionary:
|
||||
for document in documents:
|
||||
set_bulk_metadata(
|
||||
document=document,
|
||||
metadata_dictionary=metadata_dictionary
|
||||
)
|
||||
|
||||
for document in documents:
|
||||
document_ids.append(document.pk)
|
||||
|
||||
return document_ids, metadata_dictionary
|
||||
|
||||
def clean(self):
|
||||
if self.subject_metadata_type:
|
||||
|
||||
@@ -6,7 +6,13 @@ import shutil
|
||||
|
||||
import mock
|
||||
from pathlib2 import Path
|
||||
import yaml
|
||||
try:
|
||||
from yaml import CSafeDumper as SafeDumper
|
||||
except ImportError:
|
||||
from yaml import SafeDumper
|
||||
|
||||
from django.core import mail
|
||||
from django.utils.encoding import force_text
|
||||
|
||||
from mayan.apps.documents.models import Document
|
||||
@@ -190,6 +196,72 @@ class EmailBaseTestCase(GenericDocumentTestCase):
|
||||
# Only two attachments and a body document
|
||||
self.assertEqual(2, Document.objects.count())
|
||||
|
||||
def test_metadata_yaml_attachment(self):
|
||||
TEST_METADATA_VALUE_1 = 'test value 1'
|
||||
TEST_METADATA_VALUE_2 = 'test value 2'
|
||||
|
||||
test_metadata_type_1 = MetadataType.objects.create(
|
||||
name='test_metadata_type_1'
|
||||
)
|
||||
test_metadata_type_2 = MetadataType.objects.create(
|
||||
name='test_metadata_type_2'
|
||||
)
|
||||
self.test_document_type.metadata.create(
|
||||
metadata_type=test_metadata_type_1
|
||||
)
|
||||
self.test_document_type.metadata.create(
|
||||
metadata_type=test_metadata_type_2
|
||||
)
|
||||
|
||||
test_metadata_yaml = yaml.dump(
|
||||
Dumper=SafeDumper, data={
|
||||
test_metadata_type_1.name: TEST_METADATA_VALUE_1,
|
||||
test_metadata_type_2.name: TEST_METADATA_VALUE_2,
|
||||
}
|
||||
)
|
||||
|
||||
# Create email with a test attachment first, then the metadata.yaml
|
||||
# attachment
|
||||
with mail.get_connection(
|
||||
backend='django.core.mail.backends.locmem.EmailBackend'
|
||||
) as connection:
|
||||
email_message = mail.EmailMultiAlternatives(
|
||||
body='test email body', connection=connection,
|
||||
subject='test email subject', to=['test@example.com'],
|
||||
)
|
||||
|
||||
email_message.attach(
|
||||
filename='test_attachment',
|
||||
content='test_content',
|
||||
)
|
||||
|
||||
email_message.attach(
|
||||
filename='metadata.yaml',
|
||||
content=test_metadata_yaml,
|
||||
)
|
||||
|
||||
email_message.send()
|
||||
|
||||
self._create_email_source()
|
||||
self.source.store_body = True
|
||||
self.source.save()
|
||||
|
||||
EmailBaseModel.process_message(
|
||||
source=self.source, message_text=mail.outbox[0].message()
|
||||
)
|
||||
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
|
||||
for document in Document.objects.all():
|
||||
self.assertEqual(
|
||||
document.metadata.get(metadata_type=test_metadata_type_1).value,
|
||||
TEST_METADATA_VALUE_1
|
||||
)
|
||||
self.assertEqual(
|
||||
document.metadata.get(metadata_type=test_metadata_type_2).value,
|
||||
TEST_METADATA_VALUE_2
|
||||
)
|
||||
|
||||
|
||||
class IMAPSourceTestCase(GenericDocumentTestCase):
|
||||
auto_upload_document = False
|
||||
|
||||
Reference in New Issue
Block a user