Add support for including a metadata dictionary as an attachment for the POP3 and IMAP sources.
This commit is contained in:
@@ -119,3 +119,18 @@ def convert_dict_to_dict_list(dictionary):
|
|||||||
result.append({'id': metadata_type.pk, 'value': value})
|
result.append({'id': metadata_type.pk, 'value': value})
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def set_bulk_metadata(document, metadata_dictionary):
|
||||||
|
document_type = document.document_type
|
||||||
|
document_type_metadata_types = [
|
||||||
|
document_type_metadata_type.metadata_type for document_type_metadata_type in document_type.metadata.all()
|
||||||
|
]
|
||||||
|
|
||||||
|
for metadata_type_name, value in metadata_dictionary.items():
|
||||||
|
metadata_type = MetadataType.objects.get(name=metadata_type_name)
|
||||||
|
|
||||||
|
if metadata_type in document_type_metadata_types:
|
||||||
|
DocumentMetadata.objects.get_or_create(
|
||||||
|
document=document, metadata_type=metadata_type, value=value
|
||||||
|
)
|
||||||
|
|||||||
@@ -94,7 +94,8 @@ class POP3EmailSetupForm(EmailSetupBaseForm):
|
|||||||
class Meta(EmailSetupBaseForm.Meta):
|
class Meta(EmailSetupBaseForm.Meta):
|
||||||
fields = (
|
fields = (
|
||||||
'label', 'enabled', 'interval', 'document_type', 'uncompress',
|
'label', 'enabled', 'interval', 'document_type', 'uncompress',
|
||||||
'host', 'ssl', 'port', 'username', 'password', 'timeout'
|
'host', 'ssl', 'port', 'username', 'password', 'timeout',
|
||||||
|
'metadata_attachment_name',
|
||||||
)
|
)
|
||||||
model = POP3Email
|
model = POP3Email
|
||||||
|
|
||||||
@@ -103,7 +104,8 @@ class IMAPEmailSetupForm(EmailSetupBaseForm):
|
|||||||
class Meta(EmailSetupBaseForm.Meta):
|
class Meta(EmailSetupBaseForm.Meta):
|
||||||
fields = (
|
fields = (
|
||||||
'label', 'enabled', 'interval', 'document_type', 'uncompress',
|
'label', 'enabled', 'interval', 'document_type', 'uncompress',
|
||||||
'host', 'ssl', 'port', 'username', 'password', 'mailbox'
|
'host', 'ssl', 'port', 'username', 'password', 'mailbox',
|
||||||
|
'metadata_attachment_name'
|
||||||
)
|
)
|
||||||
model = IMAPEmail
|
model = IMAPEmail
|
||||||
|
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ SOURCE_CHOICES = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
DEFAULT_INTERVAL = 600
|
DEFAULT_INTERVAL = 600
|
||||||
|
DEFAULT_METADATA_ATTACHMENT_NAME = 'metadata.yaml'
|
||||||
DEFAULT_POP3_TIMEOUT = 60
|
DEFAULT_POP3_TIMEOUT = 60
|
||||||
DEFAULT_IMAP_MAILBOX = 'INBOX'
|
DEFAULT_IMAP_MAILBOX = 'INBOX'
|
||||||
DEFAULT_SOURCE_TASK_RETRY_DELAY = 10
|
DEFAULT_SOURCE_TASK_RETRY_DELAY = 10
|
||||||
|
|||||||
@@ -0,0 +1,20 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import models, migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('sources', '0006_auto_20150708_0330'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='emailbasemodel',
|
||||||
|
name='metadata_attachment_name',
|
||||||
|
field=models.CharField(default='metadata.yaml', help_text='Name of the attachment that will contains the metadata types and values to be assigned to the rest of the downloaded attachments.', max_length=128, verbose_name='Metadata attachment name'),
|
||||||
|
preserve_default=True,
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -8,6 +8,8 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import poplib
|
import poplib
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
from django.core.files import File
|
from django.core.files import File
|
||||||
from django.db import models, transaction
|
from django.db import models, transaction
|
||||||
from django.utils.encoding import python_2_unicode_compatible
|
from django.utils.encoding import python_2_unicode_compatible
|
||||||
@@ -21,15 +23,16 @@ from converter.models import Transformation
|
|||||||
from djcelery.models import PeriodicTask, IntervalSchedule
|
from djcelery.models import PeriodicTask, IntervalSchedule
|
||||||
from documents.models import Document, DocumentType
|
from documents.models import Document, DocumentType
|
||||||
from documents.settings import setting_language
|
from documents.settings import setting_language
|
||||||
from metadata.api import save_metadata_list
|
from metadata.api import save_metadata_list, set_bulk_metadata
|
||||||
|
|
||||||
from .classes import Attachment, SourceUploadedFile, StagingFile
|
from .classes import Attachment, SourceUploadedFile, StagingFile
|
||||||
from .literals import (
|
from .literals import (
|
||||||
DEFAULT_INTERVAL, DEFAULT_POP3_TIMEOUT, DEFAULT_IMAP_MAILBOX,
|
DEFAULT_INTERVAL, DEFAULT_POP3_TIMEOUT, DEFAULT_IMAP_MAILBOX,
|
||||||
SOURCE_CHOICES, SOURCE_CHOICE_STAGING, SOURCE_CHOICE_WATCH,
|
DEFAULT_METADATA_ATTACHMENT_NAME, SOURCE_CHOICES, SOURCE_CHOICE_STAGING,
|
||||||
SOURCE_CHOICE_WEB_FORM, SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES,
|
SOURCE_CHOICE_WATCH, SOURCE_CHOICE_WEB_FORM,
|
||||||
SOURCE_UNCOMPRESS_CHOICES, SOURCE_UNCOMPRESS_CHOICE_Y,
|
SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES, SOURCE_UNCOMPRESS_CHOICES,
|
||||||
SOURCE_CHOICE_EMAIL_IMAP, SOURCE_CHOICE_EMAIL_POP3
|
SOURCE_UNCOMPRESS_CHOICE_Y, SOURCE_CHOICE_EMAIL_IMAP,
|
||||||
|
SOURCE_CHOICE_EMAIL_POP3
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -52,7 +55,7 @@ class Source(models.Model):
|
|||||||
def fullname(self):
|
def fullname(self):
|
||||||
return ' '.join([self.class_fullname(), '"%s"' % self.label])
|
return ' '.join([self.class_fullname(), '"%s"' % self.label])
|
||||||
|
|
||||||
def upload_document(self, file_object, document_type, description=None, label=None, language=None, metadata_dict_list=None, user=None):
|
def upload_document(self, file_object, document_type, description=None, label=None, language=None, metadata_dict_list=None, metadata_dictionary=None, user=None):
|
||||||
try:
|
try:
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
document = Document.objects.create(
|
document = Document.objects.create(
|
||||||
@@ -75,6 +78,12 @@ class Source(models.Model):
|
|||||||
metadata_dict_list, document, create=True
|
metadata_dict_list, document, create=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if metadata_dictionary:
|
||||||
|
set_bulk_metadata(
|
||||||
|
document=document,
|
||||||
|
metadata_dictionary=metadata_dictionary
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
logger.critical(
|
logger.critical(
|
||||||
'Unexpected exception while trying to create new document "%s" from source "%s"; %s',
|
'Unexpected exception while trying to create new document "%s" from source "%s"; %s',
|
||||||
@@ -82,14 +91,15 @@ class Source(models.Model):
|
|||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def handle_upload(self, file_object, description=None, document_type=None, expand=False, label=None, language=None, metadata_dict_list=None, user=None):
|
def handle_upload(self, file_object, description=None, document_type=None, expand=False, label=None, language=None, metadata_dict_list=None, metadata_dictionary=None, user=None):
|
||||||
if not document_type:
|
if not document_type:
|
||||||
document_type = self.document_type
|
document_type = self.document_type
|
||||||
|
|
||||||
kwargs = {
|
kwargs = {
|
||||||
'description': description, 'document_type': document_type,
|
'description': description, 'document_type': document_type,
|
||||||
'label': label, 'language': language,
|
'label': label, 'language': language,
|
||||||
'metadata_dict_list': metadata_dict_list, 'user': user
|
'metadata_dict_list': metadata_dict_list,
|
||||||
|
'metadata_dictionary': metadata_dictionary, 'user': user
|
||||||
}
|
}
|
||||||
|
|
||||||
if expand:
|
if expand:
|
||||||
@@ -312,14 +322,23 @@ class EmailBaseModel(IntervalBaseModel):
|
|||||||
)
|
)
|
||||||
username = models.CharField(max_length=96, verbose_name=_('Username'))
|
username = models.CharField(max_length=96, verbose_name=_('Username'))
|
||||||
password = models.CharField(max_length=96, verbose_name=_('Password'))
|
password = models.CharField(max_length=96, verbose_name=_('Password'))
|
||||||
|
metadata_attachment_name = models.CharField(
|
||||||
|
default=DEFAULT_METADATA_ATTACHMENT_NAME,
|
||||||
|
help_text=_(
|
||||||
|
'Name of the attachment that will contains the metadata type names '
|
||||||
|
'and value pairs to be assigned to the rest of the downloaded '
|
||||||
|
'attachments. Note: This attachment has to be the first attachment.'
|
||||||
|
), max_length=128, verbose_name=_('Metadata attachment name')
|
||||||
|
)
|
||||||
|
|
||||||
# From: http://bookmarks.honewatson.com/2009/08/11/python-gmail-imaplib-search-subject-get-attachments/
|
# From: http://bookmarks.honewatson.com/2009/08/11/python-gmail-imaplib-search-subject-get-attachments/
|
||||||
# TODO: Add lock to avoid running more than once concurrent same document download
|
# TODO: Add lock to avoid running more than once concurrent same document download
|
||||||
# TODO: Use message ID for lock
|
# TODO: Use message ID for lock
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def process_message(source, message):
|
def process_message(source, message):
|
||||||
email = message_from_string(message)
|
|
||||||
counter = 1
|
counter = 1
|
||||||
|
email = message_from_string(message)
|
||||||
|
metadata_dictionary = None
|
||||||
|
|
||||||
for part in email.walk():
|
for part in email.walk():
|
||||||
disposition = part.get('Content-Disposition', 'none')
|
disposition = part.get('Content-Disposition', 'none')
|
||||||
@@ -336,12 +355,20 @@ class EmailBaseModel(IntervalBaseModel):
|
|||||||
|
|
||||||
logger.debug('filename: %s', filename)
|
logger.debug('filename: %s', filename)
|
||||||
|
|
||||||
file_object = Attachment(part, name=filename)
|
with Attachment(part, name=filename) as file_object:
|
||||||
source.handle_upload(
|
if filename == source.metadata_attachment_name:
|
||||||
document_type=source.document_type,
|
metadata_dictionary = yaml.safe_load(file_object.read())
|
||||||
file_object=file_object, label=filename,
|
logger.debug(
|
||||||
expand=(source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y)
|
'Got metadata dictionary: %s', metadata_dictionary
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
source.handle_upload(
|
||||||
|
document_type=source.document_type,
|
||||||
|
file_object=file_object, label=filename,
|
||||||
|
expand=(
|
||||||
|
source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y
|
||||||
|
), metadata_dictionary=metadata_dictionary
|
||||||
|
)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
verbose_name = _('Email source')
|
verbose_name = _('Email source')
|
||||||
|
|||||||
Reference in New Issue
Block a user