Add support for storing the subject and "from" value of an email as metadata.

This commit is contained in:
Roberto Rosario
2015-09-30 20:20:27 -04:00
parent 6faf4b0f2a
commit 3d0c9485d0
4 changed files with 122 additions and 26 deletions

View File

@@ -9,12 +9,18 @@ from .models import (
@admin.register(IMAPEmail)
class IMAPEmailAdmin(admin.ModelAdmin):
list_display = ('label', 'enabled', 'uncompress', 'host', 'ssl', 'port', 'interval', 'document_type', 'metadata_attachment_name')
list_display = (
'label', 'enabled', 'uncompress', 'host', 'ssl', 'port', 'interval',
'document_type', 'metadata_attachment_name'
)
@admin.register(POP3Email)
class POP3EmailAdmin(admin.ModelAdmin):
list_display = ('label', 'enabled', 'uncompress', 'host', 'ssl', 'port', 'interval', 'document_type', 'metadata_attachment_name')
list_display = (
'label', 'enabled', 'uncompress', 'host', 'ssl', 'port', 'interval',
'document_type', 'metadata_attachment_name'
)
@admin.register(StagingFolderSource)

View File

@@ -85,31 +85,29 @@ class StagingFolderSetupForm(forms.ModelForm):
class EmailSetupBaseForm(forms.ModelForm):
class Meta:
fields = (
'label', 'enabled', 'interval', 'document_type', 'uncompress',
'host', 'ssl', 'port', 'username', 'password',
'metadata_attachment_name', 'subject_metadata_type',
'from_metadata_type'
)
widgets = {
'password': forms.widgets.PasswordInput(render_value=True)
}
class POP3EmailSetupForm(EmailSetupBaseForm):
class Meta(EmailSetupBaseForm.Meta):
fields = (
'label', 'enabled', 'interval', 'document_type', 'uncompress',
'host', 'ssl', 'port', 'username', 'password', 'timeout',
'metadata_attachment_name',
)
model = POP3Email
class IMAPEmailSetupForm(EmailSetupBaseForm):
class Meta(EmailSetupBaseForm.Meta):
fields = (
'label', 'enabled', 'interval', 'document_type', 'uncompress',
'host', 'ssl', 'port', 'username', 'password', 'mailbox',
'metadata_attachment_name'
)
fields = EmailSetupBaseForm.Meta.fields + ('mailbox',)
model = IMAPEmail
class POP3EmailSetupForm(EmailSetupBaseForm):
class Meta(EmailSetupBaseForm.Meta):
fields = EmailSetupBaseForm.Meta.fields + ('timeout',)
model = POP3Email
class WatchFolderSetupForm(forms.ModelForm):
class Meta:
fields = (

View File

@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
class Migration(migrations.Migration):
dependencies = [
('metadata', '0007_auto_20150918_0800'),
('sources', '0008_auto_20150815_0351'),
]
operations = [
migrations.AddField(
model_name='emailbasemodel',
name='from_metadata_type',
field=models.ForeignKey(related_name='email_from', blank=True, to='metadata.MetadataType', help_text='Select a metadata type valid for the document type selected in which to store the email\'s "from" value.', null=True, verbose_name='From metadata type'),
preserve_default=True,
),
migrations.AddField(
model_name='emailbasemodel',
name='subject_metadata_type',
field=models.ForeignKey(related_name='email_subject', blank=True, to='metadata.MetadataType', help_text="Select a metadata type valid for the document type selected in which to store the email's subject.", null=True, verbose_name='Subject metadata type'),
preserve_default=True,
),
]

View File

@@ -1,15 +1,17 @@
from __future__ import unicode_literals
from email.Utils import collapse_rfc2231_value
from email import message_from_string
import json
from email.Utils import collapse_rfc2231_value
from email.header import decode_header
import imaplib
import json
import logging
import os
import poplib
import yaml
from django.core.exceptions import ValidationError
from django.core.files import File
from django.core.files.base import ContentFile
from django.db import models, transaction
@@ -25,6 +27,7 @@ from djcelery.models import PeriodicTask, IntervalSchedule
from documents.models import Document, DocumentType
from documents.settings import setting_language
from metadata.api import save_metadata_list, set_bulk_metadata
from metadata.models import MetadataType
from .classes import Attachment, SourceUploadedFile, StagingFile
from .literals import (
@@ -331,11 +334,55 @@ class EmailBaseModel(IntervalBaseModel):
metadata_attachment_name = models.CharField(
default=DEFAULT_METADATA_ATTACHMENT_NAME,
help_text=_(
'Name of the attachment that will contains the metadata type names '
'and value pairs to be assigned to the rest of the downloaded '
'attachments. Note: This attachment has to be the first attachment.'
'Name of the attachment that will contains the metadata type '
'names and value pairs to be assigned to the rest of the '
'downloaded attachments. Note: This attachment has to be the '
'first attachment.'
), max_length=128, verbose_name=_('Metadata attachment name')
)
subject_metadata_type = models.ForeignKey(
MetadataType, blank=True, help_text=_(
'Select a metadata type valid for the document type selected in '
'which to store the email\'s subject.'
), null=True, related_name='email_subject',
verbose_name=_('Subject metadata type')
)
from_metadata_type = models.ForeignKey(
MetadataType, blank=True, help_text=_(
'Select a metadata type valid for the document type selected in '
'which to store the email\'s "from" value.'
), null=True, related_name='email_from',
verbose_name=_('From metadata type')
)
def clean(self):
if self.subject_metadata_type:
if self.subject_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True):
raise ValidationError(
{
'subject_metadata_type': _(
'Subject metadata type "%(metadata_type)s" is not '
'valid for the document type: %(document_type)s'
) % {
'metadata_type': self.subject_metadata_type,
'document_type': self.document_type
}
}
)
if self.from_metadata_type:
if self.from_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True):
raise ValidationError(
{
'from_metadata_type': _(
'"From" metadata type "%(metadata_type)s" is not '
'valid for the document type: %(document_type)s'
) % {
'metadata_type': self.from_metadata_type,
'document_type': self.document_type
}
}
)
# From: http://bookmarks.honewatson.com/2009/08/11/
# python-gmail-imaplib-search-subject-get-attachments/
@@ -343,11 +390,30 @@ class EmailBaseModel(IntervalBaseModel):
# download
# TODO: Use message ID for lock
@staticmethod
def getheader(header_text, default='ascii'):
headers = decode_header(header_text)
header_sections = [
unicode(text, charset or default) for text, charset in headers
]
return ''.join(header_sections)
@staticmethod
def process_message(source, message):
counter = 1
email = message_from_string(message)
metadata_dictionary = None
metadata_dictionary = {}
if source.subject_metadata_type:
metadata_dictionary[
source.subject_metadata_type.name
] = EmailBaseModel.getheader(email['Subject'])
if source.from_metadata_type:
metadata_dictionary[
source.from_metadata_type.name
] = EmailBaseModel.getheader(email['From'])
for part in email.walk():
disposition = part.get('Content-Disposition', 'none')
@@ -452,9 +518,8 @@ class IMAPEmail(EmailBaseModel):
mailbox = models.CharField(
default=DEFAULT_IMAP_MAILBOX,
help_text=_(
'Mail from which to check for messages with attached documents.'
), max_length=64, verbose_name=_('Mailbox')
help_text=_('IMAP Mailbox from which to check for messages.'),
max_length=64, verbose_name=_('Mailbox')
)
# http://www.doughellmann.com/PyMOTW/imaplib/