diff --git a/mayan/apps/metadata/api.py b/mayan/apps/metadata/api.py index a190f4fd93..d64c42cd93 100644 --- a/mayan/apps/metadata/api.py +++ b/mayan/apps/metadata/api.py @@ -119,3 +119,18 @@ def convert_dict_to_dict_list(dictionary): result.append({'id': metadata_type.pk, 'value': value}) return result + + +def set_bulk_metadata(document, metadata_dictionary): + document_type = document.document_type + document_type_metadata_types = [ + document_type_metadata_type.metadata_type for document_type_metadata_type in document_type.metadata.all() + ] + + for metadata_type_name, value in metadata_dictionary.items(): + metadata_type = MetadataType.objects.get(name=metadata_type_name) + + if metadata_type in document_type_metadata_types: + DocumentMetadata.objects.get_or_create( + document=document, metadata_type=metadata_type, value=value + ) diff --git a/mayan/apps/sources/forms.py b/mayan/apps/sources/forms.py index 2869d78ac0..06e36321ba 100644 --- a/mayan/apps/sources/forms.py +++ b/mayan/apps/sources/forms.py @@ -94,7 +94,8 @@ class POP3EmailSetupForm(EmailSetupBaseForm): class Meta(EmailSetupBaseForm.Meta): fields = ( 'label', 'enabled', 'interval', 'document_type', 'uncompress', - 'host', 'ssl', 'port', 'username', 'password', 'timeout' + 'host', 'ssl', 'port', 'username', 'password', 'timeout', + 'metadata_attachment_name', ) model = POP3Email @@ -103,7 +104,8 @@ class IMAPEmailSetupForm(EmailSetupBaseForm): class Meta(EmailSetupBaseForm.Meta): fields = ( 'label', 'enabled', 'interval', 'document_type', 'uncompress', - 'host', 'ssl', 'port', 'username', 'password', 'mailbox' + 'host', 'ssl', 'port', 'username', 'password', 'mailbox', + 'metadata_attachment_name' ) model = IMAPEmail diff --git a/mayan/apps/sources/literals.py b/mayan/apps/sources/literals.py index 0c81c2e66f..0c10517073 100644 --- a/mayan/apps/sources/literals.py +++ b/mayan/apps/sources/literals.py @@ -32,6 +32,7 @@ SOURCE_CHOICES = ( ) DEFAULT_INTERVAL = 600 +DEFAULT_METADATA_ATTACHMENT_NAME = 'metadata.yaml' DEFAULT_POP3_TIMEOUT = 60 DEFAULT_IMAP_MAILBOX = 'INBOX' DEFAULT_SOURCE_TASK_RETRY_DELAY = 10 diff --git a/mayan/apps/sources/migrations/0007_emailbasemodel_metadata_attachment_name.py b/mayan/apps/sources/migrations/0007_emailbasemodel_metadata_attachment_name.py new file mode 100644 index 0000000000..2e6de0820c --- /dev/null +++ b/mayan/apps/sources/migrations/0007_emailbasemodel_metadata_attachment_name.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('sources', '0006_auto_20150708_0330'), + ] + + operations = [ + migrations.AddField( + model_name='emailbasemodel', + name='metadata_attachment_name', + field=models.CharField(default='metadata.yaml', help_text='Name of the attachment that will contains the metadata types and values to be assigned to the rest of the downloaded attachments.', max_length=128, verbose_name='Metadata attachment name'), + preserve_default=True, + ), + ] diff --git a/mayan/apps/sources/models.py b/mayan/apps/sources/models.py index 353813fd47..aa104eda2b 100644 --- a/mayan/apps/sources/models.py +++ b/mayan/apps/sources/models.py @@ -8,6 +8,8 @@ import logging import os import poplib +import yaml + from django.core.files import File from django.db import models, transaction from django.utils.encoding import python_2_unicode_compatible @@ -21,15 +23,16 @@ from converter.models import Transformation from djcelery.models import PeriodicTask, IntervalSchedule from documents.models import Document, DocumentType from documents.settings import setting_language -from metadata.api import save_metadata_list +from metadata.api import save_metadata_list, set_bulk_metadata from .classes import Attachment, SourceUploadedFile, StagingFile from .literals import ( DEFAULT_INTERVAL, DEFAULT_POP3_TIMEOUT, DEFAULT_IMAP_MAILBOX, - SOURCE_CHOICES, SOURCE_CHOICE_STAGING, SOURCE_CHOICE_WATCH, - SOURCE_CHOICE_WEB_FORM, SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES, - SOURCE_UNCOMPRESS_CHOICES, SOURCE_UNCOMPRESS_CHOICE_Y, - SOURCE_CHOICE_EMAIL_IMAP, SOURCE_CHOICE_EMAIL_POP3 + DEFAULT_METADATA_ATTACHMENT_NAME, SOURCE_CHOICES, SOURCE_CHOICE_STAGING, + SOURCE_CHOICE_WATCH, SOURCE_CHOICE_WEB_FORM, + SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES, SOURCE_UNCOMPRESS_CHOICES, + SOURCE_UNCOMPRESS_CHOICE_Y, SOURCE_CHOICE_EMAIL_IMAP, + SOURCE_CHOICE_EMAIL_POP3 ) logger = logging.getLogger(__name__) @@ -52,7 +55,7 @@ class Source(models.Model): def fullname(self): return ' '.join([self.class_fullname(), '"%s"' % self.label]) - def upload_document(self, file_object, document_type, description=None, label=None, language=None, metadata_dict_list=None, user=None): + def upload_document(self, file_object, document_type, description=None, label=None, language=None, metadata_dict_list=None, metadata_dictionary=None, user=None): try: with transaction.atomic(): document = Document.objects.create( @@ -75,6 +78,12 @@ class Source(models.Model): metadata_dict_list, document, create=True ) + if metadata_dictionary: + set_bulk_metadata( + document=document, + metadata_dictionary=metadata_dictionary + ) + except Exception as exception: logger.critical( 'Unexpected exception while trying to create new document "%s" from source "%s"; %s', @@ -82,14 +91,15 @@ class Source(models.Model): ) raise - def handle_upload(self, file_object, description=None, document_type=None, expand=False, label=None, language=None, metadata_dict_list=None, user=None): + def handle_upload(self, file_object, description=None, document_type=None, expand=False, label=None, language=None, metadata_dict_list=None, metadata_dictionary=None, user=None): if not document_type: document_type = self.document_type kwargs = { 'description': description, 'document_type': document_type, 'label': label, 'language': language, - 'metadata_dict_list': metadata_dict_list, 'user': user + 'metadata_dict_list': metadata_dict_list, + 'metadata_dictionary': metadata_dictionary, 'user': user } if expand: @@ -312,14 +322,23 @@ class EmailBaseModel(IntervalBaseModel): ) username = models.CharField(max_length=96, verbose_name=_('Username')) password = models.CharField(max_length=96, verbose_name=_('Password')) + metadata_attachment_name = models.CharField( + default=DEFAULT_METADATA_ATTACHMENT_NAME, + help_text=_( + 'Name of the attachment that will contains the metadata type names ' + 'and value pairs to be assigned to the rest of the downloaded ' + 'attachments. Note: This attachment has to be the first attachment.' + ), max_length=128, verbose_name=_('Metadata attachment name') + ) # From: http://bookmarks.honewatson.com/2009/08/11/python-gmail-imaplib-search-subject-get-attachments/ # TODO: Add lock to avoid running more than once concurrent same document download # TODO: Use message ID for lock @staticmethod def process_message(source, message): - email = message_from_string(message) counter = 1 + email = message_from_string(message) + metadata_dictionary = None for part in email.walk(): disposition = part.get('Content-Disposition', 'none') @@ -336,12 +355,20 @@ class EmailBaseModel(IntervalBaseModel): logger.debug('filename: %s', filename) - file_object = Attachment(part, name=filename) - source.handle_upload( - document_type=source.document_type, - file_object=file_object, label=filename, - expand=(source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y) - ) + with Attachment(part, name=filename) as file_object: + if filename == source.metadata_attachment_name: + metadata_dictionary = yaml.safe_load(file_object.read()) + logger.debug( + 'Got metadata dictionary: %s', metadata_dictionary + ) + else: + source.handle_upload( + document_type=source.document_type, + file_object=file_object, label=filename, + expand=( + source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y + ), metadata_dictionary=metadata_dictionary + ) class Meta: verbose_name = _('Email source')