Initial commit to support receiving documents via email

This commit is contained in:
Roberto Rosario
2014-10-20 02:22:19 -04:00
parent d6486b8a45
commit 59eb6202fe
5 changed files with 350 additions and 8 deletions

View File

@@ -10,6 +10,21 @@ from converter.api import convert
from mimetype.api import get_mimetype from mimetype.api import get_mimetype
class PseudoFile(File):
def __init__(self, file, name):
self.name = name
self.file = file
self.file.seek(0, os.SEEK_END)
self.size = self.file.tell()
self.file.seek(0)
class Attachment(File):
def __init__(self, part, name):
self.name = name
self.file = PseudoFile(StringIO(part.get_payload(decode=True)), name=name)
class StagingFile(object): class StagingFile(object):
""" """
Simple class to extend the File class to add preview capabilities Simple class to extend the File class to add preview capabilities

View File

@@ -18,15 +18,25 @@ SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES = (
SOURCE_CHOICE_WEB_FORM = 'webform' SOURCE_CHOICE_WEB_FORM = 'webform'
SOURCE_CHOICE_STAGING = 'staging' SOURCE_CHOICE_STAGING = 'staging'
SOURCE_CHOICE_WATCH = 'watch' SOURCE_CHOICE_WATCH = 'watch'
SOURCE_CHOICE_EMAIL_POP3 = 'pop3'
SOURCE_CHOICE_EMAIL_IMAP = 'imap'
SOURCE_CHOICES = ( SOURCE_CHOICES = (
(SOURCE_CHOICE_WEB_FORM, _(u'Web form')), (SOURCE_CHOICE_WEB_FORM, _(u'Web form')),
(SOURCE_CHOICE_STAGING, _(u'Server staging folder')), (SOURCE_CHOICE_STAGING, _(u'Server staging folder')),
(SOURCE_CHOICE_WATCH, _(u'Server watch folder')), (SOURCE_CHOICE_WATCH, _(u'Server watch folder')),
(SOURCE_CHOICE_EMAIL_POP3, _(u'POP3 email')),
(SOURCE_CHOICE_EMAIL_IMAP, _(u'IMAP email')),
) )
# TODO: remove PLURALS
SOURCE_CHOICES_PLURAL = ( SOURCE_CHOICES_PLURAL = (
(SOURCE_CHOICE_WEB_FORM, _(u'Web forms')), (SOURCE_CHOICE_WEB_FORM, _(u'Web forms')),
(SOURCE_CHOICE_STAGING, _(u'Server staging folders')), (SOURCE_CHOICE_STAGING, _(u'Server staging folders')),
(SOURCE_CHOICE_WATCH, _(u'Server watch folders')), (SOURCE_CHOICE_WATCH, _(u'Server watch folders')),
) (SOURCE_CHOICE_EMAIL_POP3, _(u'POP3 emails')),
(SOURCE_CHOICE_EMAIL_IMAP, _(u'IMAP emails')),)
DEFAULT_INTERVAL = 60
DEFAULT_POP3_TIMEOUT = 60
DEFAULT_IMAP_MAILBOX = 'INBOX'

View File

@@ -0,0 +1,148 @@
# -*- coding: utf-8 -*-
from south.utils import datetime_utils as datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding model 'IMAPEmail'
db.create_table(u'sources_imapemail', (
(u'emailbasemodel_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['sources.EmailBaseModel'], unique=True, primary_key=True)),
('mailbox', self.gf('django.db.models.fields.CharField')(default='INBOX', max_length=64)),
))
db.send_create_signal(u'sources', ['IMAPEmail'])
# Adding model 'IntervalBaseModel'
db.create_table(u'sources_intervalbasemodel', (
(u'outofprocesssource_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['sources.OutOfProcessSource'], unique=True, primary_key=True)),
('interval', self.gf('django.db.models.fields.PositiveIntegerField')(default=60)),
('document_type', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['documents.DocumentType'], null=True, blank=True)),
('uncompress', self.gf('django.db.models.fields.CharField')(max_length=1)),
))
db.send_create_signal(u'sources', ['IntervalBaseModel'])
# Adding model 'POP3Email'
db.create_table(u'sources_pop3email', (
(u'emailbasemodel_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['sources.EmailBaseModel'], unique=True, primary_key=True)),
('timeout', self.gf('django.db.models.fields.PositiveIntegerField')(default=60)),
))
db.send_create_signal(u'sources', ['POP3Email'])
# Adding model 'EmailBaseModel'
db.create_table(u'sources_emailbasemodel', (
(u'intervalbasemodel_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['sources.IntervalBaseModel'], unique=True, primary_key=True)),
('host', self.gf('django.db.models.fields.CharField')(max_length=128)),
('ssl', self.gf('django.db.models.fields.BooleanField')()),
('port', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)),
('username', self.gf('django.db.models.fields.CharField')(max_length=96)),
('password', self.gf('django.db.models.fields.CharField')(max_length=96)),
))
db.send_create_signal(u'sources', ['EmailBaseModel'])
def backwards(self, orm):
# Deleting model 'IMAPEmail'
db.delete_table(u'sources_imapemail')
# Deleting model 'IntervalBaseModel'
db.delete_table(u'sources_intervalbasemodel')
# Deleting model 'POP3Email'
db.delete_table(u'sources_pop3email')
# Deleting model 'EmailBaseModel'
db.delete_table(u'sources_emailbasemodel')
models = {
u'contenttypes.contenttype': {
'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
},
u'documents.documenttype': {
'Meta': {'ordering': "['name']", 'object_name': 'DocumentType'},
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'})
},
u'sources.emailbasemodel': {
'Meta': {'ordering': "('title',)", 'object_name': 'EmailBaseModel', '_ormbases': [u'sources.IntervalBaseModel']},
'host': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
u'intervalbasemodel_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.IntervalBaseModel']", 'unique': 'True', 'primary_key': 'True'}),
'password': ('django.db.models.fields.CharField', [], {'max_length': '96'}),
'port': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
'ssl': ('django.db.models.fields.BooleanField', [], {}),
'username': ('django.db.models.fields.CharField', [], {'max_length': '96'})
},
u'sources.imapemail': {
'Meta': {'ordering': "('title',)", 'object_name': 'IMAPEmail', '_ormbases': [u'sources.EmailBaseModel']},
u'emailbasemodel_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.EmailBaseModel']", 'unique': 'True', 'primary_key': 'True'}),
'mailbox': ('django.db.models.fields.CharField', [], {'default': "'INBOX'", 'max_length': '64'})
},
u'sources.interactivesource': {
'Meta': {'ordering': "('title',)", 'object_name': 'InteractiveSource', '_ormbases': [u'sources.Source']},
u'source_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.Source']", 'unique': 'True', 'primary_key': 'True'})
},
u'sources.intervalbasemodel': {
'Meta': {'ordering': "('title',)", 'object_name': 'IntervalBaseModel', '_ormbases': [u'sources.OutOfProcessSource']},
'document_type': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.DocumentType']", 'null': 'True', 'blank': 'True'}),
'interval': ('django.db.models.fields.PositiveIntegerField', [], {'default': '60'}),
u'outofprocesssource_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.OutOfProcessSource']", 'unique': 'True', 'primary_key': 'True'}),
'uncompress': ('django.db.models.fields.CharField', [], {'max_length': '1'})
},
u'sources.outofprocesssource': {
'Meta': {'ordering': "('title',)", 'object_name': 'OutOfProcessSource', '_ormbases': [u'sources.Source']},
u'source_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.Source']", 'unique': 'True', 'primary_key': 'True'})
},
u'sources.pop3email': {
'Meta': {'ordering': "('title',)", 'object_name': 'POP3Email', '_ormbases': [u'sources.EmailBaseModel']},
u'emailbasemodel_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.EmailBaseModel']", 'unique': 'True', 'primary_key': 'True'}),
'timeout': ('django.db.models.fields.PositiveIntegerField', [], {'default': '60'})
},
u'sources.source': {
'Meta': {'ordering': "('title',)", 'object_name': 'Source'},
'blacklist': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'title': ('django.db.models.fields.CharField', [], {'max_length': '64'}),
'whitelist': ('django.db.models.fields.TextField', [], {'blank': 'True'})
},
u'sources.sourcetransformation': {
'Meta': {'ordering': "('order',)", 'object_name': 'SourceTransformation'},
'arguments': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['contenttypes.ContentType']"}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'object_id': ('django.db.models.fields.PositiveIntegerField', [], {}),
'order': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0', 'null': 'True', 'db_index': 'True', 'blank': 'True'}),
'transformation': ('django.db.models.fields.CharField', [], {'max_length': '128'})
},
u'sources.stagingfoldersource': {
'Meta': {'ordering': "('title',)", 'object_name': 'StagingFolderSource', '_ormbases': [u'sources.InteractiveSource']},
'delete_after_upload': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
'folder_path': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
u'interactivesource_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.InteractiveSource']", 'unique': 'True', 'primary_key': 'True'}),
'preview_height': ('django.db.models.fields.IntegerField', [], {'null': 'True', 'blank': 'True'}),
'preview_width': ('django.db.models.fields.IntegerField', [], {}),
'uncompress': ('django.db.models.fields.CharField', [], {'max_length': '1'})
},
u'sources.watchfoldersource': {
'Meta': {'ordering': "('title',)", 'object_name': 'WatchFolderSource', '_ormbases': [u'sources.OutOfProcessSource']},
'delete_after_upload': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
'folder_path': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'interval': ('django.db.models.fields.PositiveIntegerField', [], {}),
u'outofprocesssource_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.OutOfProcessSource']", 'unique': 'True', 'primary_key': 'True'}),
'uncompress': ('django.db.models.fields.CharField', [], {'max_length': '1'})
},
u'sources.webformsource': {
'Meta': {'ordering': "('title',)", 'object_name': 'WebFormSource', '_ormbases': [u'sources.InteractiveSource']},
u'interactivesource_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.InteractiveSource']", 'unique': 'True', 'primary_key': 'True'}),
'uncompress': ('django.db.models.fields.CharField', [], {'max_length': '1'})
}
}
complete_apps = ['sources']

View File

@@ -1,6 +1,7 @@
from __future__ import absolute_import from __future__ import absolute_import
from ast import literal_eval from ast import literal_eval
import json
import logging import logging
import os import os
@@ -15,15 +16,18 @@ from model_utils.managers import InheritanceManager
from common.compressed_files import CompressedFile, NotACompressedFile from common.compressed_files import CompressedFile, NotACompressedFile
from converter.api import get_available_transformations_choices from converter.api import get_available_transformations_choices
from converter.literals import DIMENSION_SEPARATOR from converter.literals import DIMENSION_SEPARATOR
from documents.models import Document from djcelery.models import PeriodicTask, IntervalSchedule
from documents.models import Document, DocumentType
from metadata.api import save_metadata_list from metadata.api import save_metadata_list
from .classes import StagingFile from .classes import Attachment, StagingFile
from .literals import (SOURCE_CHOICES, SOURCE_CHOICES_PLURAL, from .literals import (DEFAULT_INTERVAL, DEFAULT_POP3_TIMEOUT,
SOURCE_CHOICE_STAGING, SOURCE_CHOICE_WATCH, DEFAULT_IMAP_MAILBOX, SOURCE_CHOICES,
SOURCE_CHOICE_WEB_FORM, SOURCE_CHOICES_PLURAL, SOURCE_CHOICE_STAGING,
SOURCE_CHOICE_WATCH, SOURCE_CHOICE_WEB_FORM,
SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES, SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES,
SOURCE_UNCOMPRESS_CHOICES) SOURCE_UNCOMPRESS_CHOICES, SOURCE_CHOICE_EMAIL_IMAP,
SOURCE_CHOICE_EMAIL_POP3)
from .managers import SourceTransformationManager from .managers import SourceTransformationManager
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -190,8 +194,171 @@ class OutOfProcessSource(Source):
verbose_name_plural = _(u'Out of process') verbose_name_plural = _(u'Out of process')
class IntervalBaseModel(OutOfProcessSource):
interval = models.PositiveIntegerField(default=DEFAULT_INTERVAL, verbose_name=_('Interval'), help_text=_('Interval in seconds between document downloads from this source.'))
document_type = models.ForeignKey(DocumentType, null=True, blank=True, verbose_name=_('Document type'), help_text=_('Assign a document type to documents uploaded from this source.'))
uncompress = models.CharField(max_length=1, choices=SOURCE_UNCOMPRESS_CHOICES, verbose_name=_('Uncompress'), help_text=_('Whether to expand or not, compressed archives.'))
def save(self, *args, **kwargs):
new_source = not self.pk
super(IntervalBaseModel, self).save(*args, **kwargs)
periodic_task_name = 'check_interval_source-%i' % self.pk
if new_source:
interval_instance = IntervalSchedule.objects.create(every=self.interval)
PeriodicTask.objects.create(
name=periodic_task_name,
interval=interval_instance,
task='sources.tasks.task_check_interval_source',
queue='mailing',
args=json.dump({'source_id': self.pk})
)
else:
periodic_task = PeriodicTask.objects.get(name=periodic_task_name)
periodic_task.interval.every = self.interval
periodic_task.interval.save()
periodic_task.save()
def delete(self, *args, **kwargs):
super(IntervalBaseModel, self).delete(*args, **kwargs)
periodic_task_name = 'check_interval_source-%i' % self.pk
periodic_task = PeriodicTask.objects.get(name=periodic_task_name)
interval_instance = periodic_task.interval
periodic_task.delete()
interval_instance.delete()
class Meta:
verbose_name = _('Interval source')
verbose_name_plural = _('Interval sources')
class EmailBaseModel(IntervalBaseModel):
host = models.CharField(max_length=128, verbose_name=_('Host'))
ssl = models.BooleanField(verbose_name=_('SSL'))
port = models.PositiveIntegerField(blank=True, null=True, verbose_name=_('Port'), help_text=_('Typical choices are 110 for POP3, 995 for POP3 over SSL, 143 for IMAP, 993 for IMAP over SSL.'))
username = models.CharField(max_length=96, verbose_name=_('Username'))
password = models.CharField(max_length=96, verbose_name=_('Password'))
# From: http://bookmarks.honewatson.com/2009/08/11/python-gmail-imaplib-search-subject-get-attachments/
@staticmethod
def process_message(source, message):
email = message_from_string(message)
counter = 1
for part in email.walk():
disposition = part.get('Content-Disposition', 'none')
logger.debug('Disposition: %s' % disposition)
if disposition.startswith('attachment'):
raw_filename = part.get_filename()
if raw_filename:
filename = collapse_rfc2231_value(raw_filename)
else:
filename = _('attachment-%i') % counter
counter += 1
logger.debug('filename: %s' % filename)
document_file = Attachment(part, name=filename)
source.upload_file(document_file, expand=(source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y), document_type=source.document_type)
class Meta:
verbose_name = _('Email source')
verbose_name_plural = _('Email sources')
class POP3Email(EmailBaseModel):
source_type = SOURCE_CHOICE_EMAIL_POP3
timeout = models.PositiveIntegerField(default=DEFAULT_POP3_TIMEOUT, verbose_name=_('Timeout'))
def fetch_mail(self):
try:
logger.debug('Starting POP3 email fetch')
logger.debug('host: %s' % self.host)
logger.debug('ssl: %s' % self.ssl)
if self.ssl:
mailbox = poplib.POP3_SSL(self.host, self.port)
else:
mailbox = poplib.POP3(self.host, self.port, timeout=POP3_TIMEOUT)
mailbox.getwelcome()
mailbox.user(self.username)
mailbox.pass_(self.password)
messages_info = mailbox.list()
logger.debug('messages_info:')
logger.debug(messages_info)
logger.debug('messages count: %s' % len(messages_info[1]))
for message_info in messages_info[1]:
message_number, message_size = message_info.split()
logger.debug('message_number: %s' % message_number)
logger.debug('message_size: %s' % message_size)
complete_message = '\n'.join(mailbox.retr(message_number)[1])
EmailBaseModel.process_message(source=self, message=complete_message)
mailbox.dele(message_number)
mailbox.quit()
#SourceLog.objects.save_status(source=self, status='Successful connection.')
except Exception as exception:
logger.error('Unhandled exception: %s' % exception)
#SourceLog.objects.save_status(source=self, status='Error: %s' % exc)
class Meta:
verbose_name = _('POP email')
verbose_name_plural = _('POP email')
class IMAPEmail(EmailBaseModel):
source_type = SOURCE_CHOICE_EMAIL_IMAP
mailbox = models.CharField(max_length=64, default=DEFAULT_IMAP_MAILBOX, verbose_name=_('Mailbox'), help_text=_('Mail from which to check for messages with attached documents.'))
# http://www.doughellmann.com/PyMOTW/imaplib/
def fetch_mail(self):
try:
logger.debug('Starting IMAP email fetch')
logger.debug('host: %s' % self.host)
logger.debug('ssl: %s' % self.ssl)
if self.ssl:
mailbox = imaplib.IMAP4_SSL(self.host, self.port)
else:
mailbox = imaplib.IMAP4(self.host, self.port)
mailbox.login(self.username, self.password)
mailbox.select(self.mailbox)
status, data = mailbox.search(None, 'NOT', 'DELETED')
if data:
messages_info = data[0].split()
logger.debug('messages count: %s' % len(messages_info))
for message_number in messages_info:
logger.debug('message_number: %s' % message_number)
status, data = mailbox.fetch(message_number, '(RFC822)')
EmailBaseModel.process_message(source=self, message=data[0][1])
mailbox.store(message_number, '+FLAGS', '\\Deleted')
mailbox.expunge()
mailbox.close()
mailbox.logout()
#SourceLog.objects.save_status(source=self, status='Successful connection.')
except Exception as exception:
logger.error('Unhandled exception: %s' % exc)
#SourceLog.objects.save_status(source=self, status='Error: %s' % exc)
class Meta:
verbose_name = _('IMAP email')
verbose_name_plural = _('IMAP email')
class WatchFolderSource(OutOfProcessSource): class WatchFolderSource(OutOfProcessSource):
is_interactive = False
source_type = SOURCE_CHOICE_WATCH source_type = SOURCE_CHOICE_WATCH
folder_path = models.CharField(max_length=255, verbose_name=_(u'Folder path'), help_text=_(u'Server side filesystem path.')) folder_path = models.CharField(max_length=255, verbose_name=_(u'Folder path'), help_text=_(u'Server side filesystem path.'))

View File

@@ -49,6 +49,7 @@ INSTALLED_APPS = (
# 3rd party # 3rd party
'compressor', 'compressor',
'corsheaders', 'corsheaders',
'djcelery',
'filetransfers', 'filetransfers',
'mptt', 'mptt',
'rest_framework', 'rest_framework',
@@ -266,6 +267,7 @@ REST_FRAMEWORK = {
CELERY_TIMEZONE = 'UTC' CELERY_TIMEZONE = 'UTC'
CELERY_ENABLE_UTC = True CELERY_ENABLE_UTC = True
CELERY_ALWAYS_EAGER = True CELERY_ALWAYS_EAGER = True
CELERYBEAT_SCHEDULER = 'djcelery.schedulers.DatabaseScheduler'
# ------------ CORS ------------ # ------------ CORS ------------
CORS_ORIGIN_ALLOW_ALL = True CORS_ORIGIN_ALLOW_ALL = True
# ------ Django REST Swagger ----- # ------ Django REST Swagger -----