diff --git a/mayan/apps/sources/classes.py b/mayan/apps/sources/classes.py index ab04b2d675..de8d8d5e82 100644 --- a/mayan/apps/sources/classes.py +++ b/mayan/apps/sources/classes.py @@ -10,6 +10,21 @@ from converter.api import convert from mimetype.api import get_mimetype +class PseudoFile(File): + def __init__(self, file, name): + self.name = name + self.file = file + self.file.seek(0, os.SEEK_END) + self.size = self.file.tell() + self.file.seek(0) + + +class Attachment(File): + def __init__(self, part, name): + self.name = name + self.file = PseudoFile(StringIO(part.get_payload(decode=True)), name=name) + + class StagingFile(object): """ Simple class to extend the File class to add preview capabilities diff --git a/mayan/apps/sources/literals.py b/mayan/apps/sources/literals.py index 260c9160e4..e16b6d7db7 100644 --- a/mayan/apps/sources/literals.py +++ b/mayan/apps/sources/literals.py @@ -18,15 +18,25 @@ SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES = ( SOURCE_CHOICE_WEB_FORM = 'webform' SOURCE_CHOICE_STAGING = 'staging' SOURCE_CHOICE_WATCH = 'watch' +SOURCE_CHOICE_EMAIL_POP3 = 'pop3' +SOURCE_CHOICE_EMAIL_IMAP = 'imap' SOURCE_CHOICES = ( (SOURCE_CHOICE_WEB_FORM, _(u'Web form')), (SOURCE_CHOICE_STAGING, _(u'Server staging folder')), (SOURCE_CHOICE_WATCH, _(u'Server watch folder')), + (SOURCE_CHOICE_EMAIL_POP3, _(u'POP3 email')), + (SOURCE_CHOICE_EMAIL_IMAP, _(u'IMAP email')), ) +# TODO: remove PLURALS SOURCE_CHOICES_PLURAL = ( (SOURCE_CHOICE_WEB_FORM, _(u'Web forms')), (SOURCE_CHOICE_STAGING, _(u'Server staging folders')), (SOURCE_CHOICE_WATCH, _(u'Server watch folders')), -) + (SOURCE_CHOICE_EMAIL_POP3, _(u'POP3 emails')), + (SOURCE_CHOICE_EMAIL_IMAP, _(u'IMAP emails')),) + +DEFAULT_INTERVAL = 60 +DEFAULT_POP3_TIMEOUT = 60 +DEFAULT_IMAP_MAILBOX = 'INBOX' diff --git a/mayan/apps/sources/migrations/0005_auto__add_imapemail__add_intervalbasemodel__add_pop3email__add_emailba.py b/mayan/apps/sources/migrations/0005_auto__add_imapemail__add_intervalbasemodel__add_pop3email__add_emailba.py new file mode 100644 index 0000000000..24ec380847 --- /dev/null +++ b/mayan/apps/sources/migrations/0005_auto__add_imapemail__add_intervalbasemodel__add_pop3email__add_emailba.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- +from south.utils import datetime_utils as datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding model 'IMAPEmail' + db.create_table(u'sources_imapemail', ( + (u'emailbasemodel_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['sources.EmailBaseModel'], unique=True, primary_key=True)), + ('mailbox', self.gf('django.db.models.fields.CharField')(default='INBOX', max_length=64)), + )) + db.send_create_signal(u'sources', ['IMAPEmail']) + + # Adding model 'IntervalBaseModel' + db.create_table(u'sources_intervalbasemodel', ( + (u'outofprocesssource_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['sources.OutOfProcessSource'], unique=True, primary_key=True)), + ('interval', self.gf('django.db.models.fields.PositiveIntegerField')(default=60)), + ('document_type', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['documents.DocumentType'], null=True, blank=True)), + ('uncompress', self.gf('django.db.models.fields.CharField')(max_length=1)), + )) + db.send_create_signal(u'sources', ['IntervalBaseModel']) + + # Adding model 'POP3Email' + db.create_table(u'sources_pop3email', ( + (u'emailbasemodel_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['sources.EmailBaseModel'], unique=True, primary_key=True)), + ('timeout', self.gf('django.db.models.fields.PositiveIntegerField')(default=60)), + )) + db.send_create_signal(u'sources', ['POP3Email']) + + # Adding model 'EmailBaseModel' + db.create_table(u'sources_emailbasemodel', ( + (u'intervalbasemodel_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['sources.IntervalBaseModel'], unique=True, primary_key=True)), + ('host', self.gf('django.db.models.fields.CharField')(max_length=128)), + ('ssl', self.gf('django.db.models.fields.BooleanField')()), + ('port', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)), + ('username', self.gf('django.db.models.fields.CharField')(max_length=96)), + ('password', self.gf('django.db.models.fields.CharField')(max_length=96)), + )) + db.send_create_signal(u'sources', ['EmailBaseModel']) + + + def backwards(self, orm): + # Deleting model 'IMAPEmail' + db.delete_table(u'sources_imapemail') + + # Deleting model 'IntervalBaseModel' + db.delete_table(u'sources_intervalbasemodel') + + # Deleting model 'POP3Email' + db.delete_table(u'sources_pop3email') + + # Deleting model 'EmailBaseModel' + db.delete_table(u'sources_emailbasemodel') + + + models = { + u'contenttypes.contenttype': { + 'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, + 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) + }, + u'documents.documenttype': { + 'Meta': {'ordering': "['name']", 'object_name': 'DocumentType'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}) + }, + u'sources.emailbasemodel': { + 'Meta': {'ordering': "('title',)", 'object_name': 'EmailBaseModel', '_ormbases': [u'sources.IntervalBaseModel']}, + 'host': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + u'intervalbasemodel_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.IntervalBaseModel']", 'unique': 'True', 'primary_key': 'True'}), + 'password': ('django.db.models.fields.CharField', [], {'max_length': '96'}), + 'port': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), + 'ssl': ('django.db.models.fields.BooleanField', [], {}), + 'username': ('django.db.models.fields.CharField', [], {'max_length': '96'}) + }, + u'sources.imapemail': { + 'Meta': {'ordering': "('title',)", 'object_name': 'IMAPEmail', '_ormbases': [u'sources.EmailBaseModel']}, + u'emailbasemodel_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.EmailBaseModel']", 'unique': 'True', 'primary_key': 'True'}), + 'mailbox': ('django.db.models.fields.CharField', [], {'default': "'INBOX'", 'max_length': '64'}) + }, + u'sources.interactivesource': { + 'Meta': {'ordering': "('title',)", 'object_name': 'InteractiveSource', '_ormbases': [u'sources.Source']}, + u'source_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.Source']", 'unique': 'True', 'primary_key': 'True'}) + }, + u'sources.intervalbasemodel': { + 'Meta': {'ordering': "('title',)", 'object_name': 'IntervalBaseModel', '_ormbases': [u'sources.OutOfProcessSource']}, + 'document_type': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.DocumentType']", 'null': 'True', 'blank': 'True'}), + 'interval': ('django.db.models.fields.PositiveIntegerField', [], {'default': '60'}), + u'outofprocesssource_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.OutOfProcessSource']", 'unique': 'True', 'primary_key': 'True'}), + 'uncompress': ('django.db.models.fields.CharField', [], {'max_length': '1'}) + }, + u'sources.outofprocesssource': { + 'Meta': {'ordering': "('title',)", 'object_name': 'OutOfProcessSource', '_ormbases': [u'sources.Source']}, + u'source_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.Source']", 'unique': 'True', 'primary_key': 'True'}) + }, + u'sources.pop3email': { + 'Meta': {'ordering': "('title',)", 'object_name': 'POP3Email', '_ormbases': [u'sources.EmailBaseModel']}, + u'emailbasemodel_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.EmailBaseModel']", 'unique': 'True', 'primary_key': 'True'}), + 'timeout': ('django.db.models.fields.PositiveIntegerField', [], {'default': '60'}) + }, + u'sources.source': { + 'Meta': {'ordering': "('title',)", 'object_name': 'Source'}, + 'blacklist': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'title': ('django.db.models.fields.CharField', [], {'max_length': '64'}), + 'whitelist': ('django.db.models.fields.TextField', [], {'blank': 'True'}) + }, + u'sources.sourcetransformation': { + 'Meta': {'ordering': "('order',)", 'object_name': 'SourceTransformation'}, + 'arguments': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['contenttypes.ContentType']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'object_id': ('django.db.models.fields.PositiveIntegerField', [], {}), + 'order': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0', 'null': 'True', 'db_index': 'True', 'blank': 'True'}), + 'transformation': ('django.db.models.fields.CharField', [], {'max_length': '128'}) + }, + u'sources.stagingfoldersource': { + 'Meta': {'ordering': "('title',)", 'object_name': 'StagingFolderSource', '_ormbases': [u'sources.InteractiveSource']}, + 'delete_after_upload': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'folder_path': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + u'interactivesource_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.InteractiveSource']", 'unique': 'True', 'primary_key': 'True'}), + 'preview_height': ('django.db.models.fields.IntegerField', [], {'null': 'True', 'blank': 'True'}), + 'preview_width': ('django.db.models.fields.IntegerField', [], {}), + 'uncompress': ('django.db.models.fields.CharField', [], {'max_length': '1'}) + }, + u'sources.watchfoldersource': { + 'Meta': {'ordering': "('title',)", 'object_name': 'WatchFolderSource', '_ormbases': [u'sources.OutOfProcessSource']}, + 'delete_after_upload': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'folder_path': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'interval': ('django.db.models.fields.PositiveIntegerField', [], {}), + u'outofprocesssource_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.OutOfProcessSource']", 'unique': 'True', 'primary_key': 'True'}), + 'uncompress': ('django.db.models.fields.CharField', [], {'max_length': '1'}) + }, + u'sources.webformsource': { + 'Meta': {'ordering': "('title',)", 'object_name': 'WebFormSource', '_ormbases': [u'sources.InteractiveSource']}, + u'interactivesource_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': u"orm['sources.InteractiveSource']", 'unique': 'True', 'primary_key': 'True'}), + 'uncompress': ('django.db.models.fields.CharField', [], {'max_length': '1'}) + } + } + + complete_apps = ['sources'] \ No newline at end of file diff --git a/mayan/apps/sources/models.py b/mayan/apps/sources/models.py index 07e516a3fc..6292575c3b 100644 --- a/mayan/apps/sources/models.py +++ b/mayan/apps/sources/models.py @@ -1,6 +1,7 @@ from __future__ import absolute_import from ast import literal_eval +import json import logging import os @@ -15,15 +16,18 @@ from model_utils.managers import InheritanceManager from common.compressed_files import CompressedFile, NotACompressedFile from converter.api import get_available_transformations_choices from converter.literals import DIMENSION_SEPARATOR -from documents.models import Document +from djcelery.models import PeriodicTask, IntervalSchedule +from documents.models import Document, DocumentType from metadata.api import save_metadata_list -from .classes import StagingFile -from .literals import (SOURCE_CHOICES, SOURCE_CHOICES_PLURAL, - SOURCE_CHOICE_STAGING, SOURCE_CHOICE_WATCH, - SOURCE_CHOICE_WEB_FORM, +from .classes import Attachment, StagingFile +from .literals import (DEFAULT_INTERVAL, DEFAULT_POP3_TIMEOUT, + DEFAULT_IMAP_MAILBOX, SOURCE_CHOICES, + SOURCE_CHOICES_PLURAL, SOURCE_CHOICE_STAGING, + SOURCE_CHOICE_WATCH, SOURCE_CHOICE_WEB_FORM, SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES, - SOURCE_UNCOMPRESS_CHOICES) + SOURCE_UNCOMPRESS_CHOICES, SOURCE_CHOICE_EMAIL_IMAP, + SOURCE_CHOICE_EMAIL_POP3) from .managers import SourceTransformationManager logger = logging.getLogger(__name__) @@ -190,8 +194,171 @@ class OutOfProcessSource(Source): verbose_name_plural = _(u'Out of process') +class IntervalBaseModel(OutOfProcessSource): + interval = models.PositiveIntegerField(default=DEFAULT_INTERVAL, verbose_name=_('Interval'), help_text=_('Interval in seconds between document downloads from this source.')) + document_type = models.ForeignKey(DocumentType, null=True, blank=True, verbose_name=_('Document type'), help_text=_('Assign a document type to documents uploaded from this source.')) + uncompress = models.CharField(max_length=1, choices=SOURCE_UNCOMPRESS_CHOICES, verbose_name=_('Uncompress'), help_text=_('Whether to expand or not, compressed archives.')) + + def save(self, *args, **kwargs): + new_source = not self.pk + super(IntervalBaseModel, self).save(*args, **kwargs) + periodic_task_name = 'check_interval_source-%i' % self.pk + if new_source: + interval_instance = IntervalSchedule.objects.create(every=self.interval) + PeriodicTask.objects.create( + name=periodic_task_name, + interval=interval_instance, + task='sources.tasks.task_check_interval_source', + queue='mailing', + args=json.dump({'source_id': self.pk}) + ) + else: + periodic_task = PeriodicTask.objects.get(name=periodic_task_name) + periodic_task.interval.every = self.interval + periodic_task.interval.save() + periodic_task.save() + + def delete(self, *args, **kwargs): + super(IntervalBaseModel, self).delete(*args, **kwargs) + periodic_task_name = 'check_interval_source-%i' % self.pk + periodic_task = PeriodicTask.objects.get(name=periodic_task_name) + interval_instance = periodic_task.interval + periodic_task.delete() + interval_instance.delete() + + class Meta: + verbose_name = _('Interval source') + verbose_name_plural = _('Interval sources') + + +class EmailBaseModel(IntervalBaseModel): + host = models.CharField(max_length=128, verbose_name=_('Host')) + ssl = models.BooleanField(verbose_name=_('SSL')) + port = models.PositiveIntegerField(blank=True, null=True, verbose_name=_('Port'), help_text=_('Typical choices are 110 for POP3, 995 for POP3 over SSL, 143 for IMAP, 993 for IMAP over SSL.')) + username = models.CharField(max_length=96, verbose_name=_('Username')) + password = models.CharField(max_length=96, verbose_name=_('Password')) + + # From: http://bookmarks.honewatson.com/2009/08/11/python-gmail-imaplib-search-subject-get-attachments/ + @staticmethod + def process_message(source, message): + email = message_from_string(message) + counter = 1 + + for part in email.walk(): + disposition = part.get('Content-Disposition', 'none') + logger.debug('Disposition: %s' % disposition) + + if disposition.startswith('attachment'): + raw_filename = part.get_filename() + + if raw_filename: + filename = collapse_rfc2231_value(raw_filename) + else: + filename = _('attachment-%i') % counter + counter += 1 + + logger.debug('filename: %s' % filename) + + document_file = Attachment(part, name=filename) + source.upload_file(document_file, expand=(source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y), document_type=source.document_type) + + class Meta: + verbose_name = _('Email source') + verbose_name_plural = _('Email sources') + + +class POP3Email(EmailBaseModel): + source_type = SOURCE_CHOICE_EMAIL_POP3 + + timeout = models.PositiveIntegerField(default=DEFAULT_POP3_TIMEOUT, verbose_name=_('Timeout')) + + def fetch_mail(self): + try: + logger.debug('Starting POP3 email fetch') + logger.debug('host: %s' % self.host) + logger.debug('ssl: %s' % self.ssl) + + if self.ssl: + mailbox = poplib.POP3_SSL(self.host, self.port) + else: + mailbox = poplib.POP3(self.host, self.port, timeout=POP3_TIMEOUT) + + mailbox.getwelcome() + mailbox.user(self.username) + mailbox.pass_(self.password) + messages_info = mailbox.list() + + logger.debug('messages_info:') + logger.debug(messages_info) + logger.debug('messages count: %s' % len(messages_info[1])) + + for message_info in messages_info[1]: + message_number, message_size = message_info.split() + logger.debug('message_number: %s' % message_number) + logger.debug('message_size: %s' % message_size) + + complete_message = '\n'.join(mailbox.retr(message_number)[1]) + + EmailBaseModel.process_message(source=self, message=complete_message) + mailbox.dele(message_number) + + mailbox.quit() + #SourceLog.objects.save_status(source=self, status='Successful connection.') + + except Exception as exception: + logger.error('Unhandled exception: %s' % exception) + #SourceLog.objects.save_status(source=self, status='Error: %s' % exc) + + class Meta: + verbose_name = _('POP email') + verbose_name_plural = _('POP email') + + +class IMAPEmail(EmailBaseModel): + source_type = SOURCE_CHOICE_EMAIL_IMAP + + mailbox = models.CharField(max_length=64, default=DEFAULT_IMAP_MAILBOX, verbose_name=_('Mailbox'), help_text=_('Mail from which to check for messages with attached documents.')) + + # http://www.doughellmann.com/PyMOTW/imaplib/ + def fetch_mail(self): + try: + logger.debug('Starting IMAP email fetch') + logger.debug('host: %s' % self.host) + logger.debug('ssl: %s' % self.ssl) + + if self.ssl: + mailbox = imaplib.IMAP4_SSL(self.host, self.port) + else: + mailbox = imaplib.IMAP4(self.host, self.port) + + mailbox.login(self.username, self.password) + mailbox.select(self.mailbox) + + status, data = mailbox.search(None, 'NOT', 'DELETED') + if data: + messages_info = data[0].split() + logger.debug('messages count: %s' % len(messages_info)) + + for message_number in messages_info: + logger.debug('message_number: %s' % message_number) + status, data = mailbox.fetch(message_number, '(RFC822)') + EmailBaseModel.process_message(source=self, message=data[0][1]) + mailbox.store(message_number, '+FLAGS', '\\Deleted') + + mailbox.expunge() + mailbox.close() + mailbox.logout() + #SourceLog.objects.save_status(source=self, status='Successful connection.') + except Exception as exception: + logger.error('Unhandled exception: %s' % exc) + #SourceLog.objects.save_status(source=self, status='Error: %s' % exc) + + class Meta: + verbose_name = _('IMAP email') + verbose_name_plural = _('IMAP email') + + class WatchFolderSource(OutOfProcessSource): - is_interactive = False source_type = SOURCE_CHOICE_WATCH folder_path = models.CharField(max_length=255, verbose_name=_(u'Folder path'), help_text=_(u'Server side filesystem path.')) diff --git a/mayan/settings/base.py b/mayan/settings/base.py index 871ffc94b5..26c1dd29e9 100644 --- a/mayan/settings/base.py +++ b/mayan/settings/base.py @@ -49,6 +49,7 @@ INSTALLED_APPS = ( # 3rd party 'compressor', 'corsheaders', + 'djcelery', 'filetransfers', 'mptt', 'rest_framework', @@ -266,6 +267,7 @@ REST_FRAMEWORK = { CELERY_TIMEZONE = 'UTC' CELERY_ENABLE_UTC = True CELERY_ALWAYS_EAGER = True +CELERYBEAT_SCHEDULER = 'djcelery.schedulers.DatabaseScheduler' # ------------ CORS ------------ CORS_ORIGIN_ALLOW_ALL = True # ------ Django REST Swagger -----