from __future__ import unicode_literals import imaplib import logging import poplib import yaml try: from yaml import CSafeLoader as SafeLoader except ImportError: from yaml import SafeLoader from django.core.exceptions import ValidationError from django.core.files.base import ContentFile from django.db import models from django.utils.encoding import force_bytes from django.utils.translation import ugettext_lazy as _ from mayan.apps.metadata.api import set_bulk_metadata from mayan.apps.metadata.models import MetadataType from ..literals import ( DEFAULT_IMAP_MAILBOX, DEFAULT_METADATA_ATTACHMENT_NAME, DEFAULT_POP3_TIMEOUT, SOURCE_CHOICE_EMAIL_IMAP, SOURCE_CHOICE_EMAIL_POP3, SOURCE_UNCOMPRESS_CHOICE_N, SOURCE_UNCOMPRESS_CHOICE_Y, ) from .base import IntervalBaseModel __all__ = ('IMAPEmail', 'POP3Email') logger = logging.getLogger(__name__) class EmailBaseModel(IntervalBaseModel): """ POP3 email and IMAP email sources are non-interactive sources that periodically fetch emails from an email account using either the POP3 or IMAP email protocol. These sources are useful when users need to scan documents outside their office, they can photograph a paper document with their phones and send the image to a designated email that is setup as a Mayan POP3 or IMAP source. Mayan will periodically download the emails and process them as Mayan documents. """ host = models.CharField(max_length=128, verbose_name=_('Host')) ssl = models.BooleanField(default=True, verbose_name=_('SSL')) port = models.PositiveIntegerField(blank=True, null=True, help_text=_( 'Typical choices are 110 for POP3, 995 for POP3 over SSL, 143 for ' 'IMAP, 993 for IMAP over SSL.'), verbose_name=_('Port') ) username = models.CharField(max_length=96, verbose_name=_('Username')) password = models.CharField(max_length=96, verbose_name=_('Password')) metadata_attachment_name = models.CharField( default=DEFAULT_METADATA_ATTACHMENT_NAME, help_text=_( 'Name of the attachment that will contains the metadata type ' 'names and value pairs to be assigned to the rest of the ' 'downloaded attachments. Note: This attachment has to be the ' 'first attachment.' ), max_length=128, verbose_name=_('Metadata attachment name') ) subject_metadata_type = models.ForeignKey( blank=True, help_text=_( 'Select a metadata type valid for the document type selected in ' 'which to store the email\'s subject.' ), on_delete=models.CASCADE, null=True, related_name='email_subject', to=MetadataType, verbose_name=_('Subject metadata type') ) from_metadata_type = models.ForeignKey( blank=True, help_text=_( 'Select a metadata type valid for the document type selected in ' 'which to store the email\'s "from" value.' ), on_delete=models.CASCADE, null=True, related_name='email_from', to=MetadataType, verbose_name=_('From metadata type') ) store_body = models.BooleanField( default=True, help_text=_( 'Store the body of the email as a text document.' ), verbose_name=_('Store email body') ) objects = models.Manager() class Meta: verbose_name = _('Email source') verbose_name_plural = _('Email sources') @staticmethod def process_message(source, message_text, message_properties=None): from flanker import mime counter = 1 message = mime.from_string(force_bytes(message_text)) metadata_dictionary = {} if not message_properties: message_properties = {} message_properties['Subject'] = message_properties.get( 'Subject', message.headers.get('Subject') ) message_properties['From'] = message_properties.get( 'From', message.headers.get('From') ) if source.subject_metadata_type: metadata_dictionary[ source.subject_metadata_type.name ] = message_properties.get('Subject') if source.from_metadata_type: metadata_dictionary[ source.from_metadata_type.name ] = message_properties.get('From') # Messages are tree based, do nested processing of message parts until # a message with no children is found, then work out way up. if message.parts: for part in message.parts: EmailBaseModel.process_message( source=source, message_text=part.to_string(), message_properties=message_properties ) else: # Treat inlines as attachments, both are extracted and saved as # documents if message.is_attachment() or message.is_inline(): # Reject zero length attachments if len(message.body) == 0: return label = message.detected_file_name or 'attachment-{}'.format(counter) with ContentFile(content=message.body, name=label) as file_object: if label == source.metadata_attachment_name: metadata_dictionary = yaml.load( stream=file_object.read(), Loader=SafeLoader ) logger.debug( 'Got metadata dictionary: %s', metadata_dictionary ) else: documents = source.handle_upload( document_type=source.document_type, file_object=file_object, expand=( source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y ) ) if metadata_dictionary: for document in documents: set_bulk_metadata( document=document, metadata_dictionary=metadata_dictionary ) else: # If it is not an attachment then it should be a body message part. # Another option is to use message.is_body() if message.detected_content_type == 'text/html': label = 'email_body.html' else: label = 'email_body.txt' if source.store_body: with ContentFile(content=force_bytes(message.body), name=label) as file_object: documents = source.handle_upload( document_type=source.document_type, expand=SOURCE_UNCOMPRESS_CHOICE_N, file_object=file_object ) if metadata_dictionary: for document in documents: set_bulk_metadata( document=document, metadata_dictionary=metadata_dictionary ) def clean(self): if self.subject_metadata_type: if self.subject_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): raise ValidationError( { 'subject_metadata_type': _( 'Subject metadata type "%(metadata_type)s" is not ' 'valid for the document type: %(document_type)s' ) % { 'metadata_type': self.subject_metadata_type, 'document_type': self.document_type } } ) if self.from_metadata_type: if self.from_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): raise ValidationError( { 'from_metadata_type': _( '"From" metadata type "%(metadata_type)s" is not ' 'valid for the document type: %(document_type)s' ) % { 'metadata_type': self.from_metadata_type, 'document_type': self.document_type } } ) class IMAPEmail(EmailBaseModel): source_type = SOURCE_CHOICE_EMAIL_IMAP mailbox = models.CharField( default=DEFAULT_IMAP_MAILBOX, help_text=_('IMAP Mailbox from which to check for messages.'), max_length=64, verbose_name=_('Mailbox') ) objects = models.Manager() class Meta: verbose_name = _('IMAP email') verbose_name_plural = _('IMAP email') # http://www.doughellmann.com/PyMOTW/imaplib/ def check_source(self, test=False): logger.debug(msg='Starting IMAP email fetch') logger.debug('host: %s', self.host) logger.debug('ssl: %s', self.ssl) if self.ssl: mailbox = imaplib.IMAP4_SSL(host=self.host, port=self.port) else: mailbox = imaplib.IMAP4(host=self.host, port=self.port) mailbox.login(user=self.username, password=self.password) mailbox.select(mailbox=self.mailbox) status, data = mailbox.search(None, 'NOT', 'DELETED') if data: messages_info = data[0].split() logger.debug('messages count: %s', len(messages_info)) for message_number in messages_info: logger.debug('message_number: %s', message_number) status, data = mailbox.fetch( message_set=message_number, message_parts='(RFC822)' ) EmailBaseModel.process_message( source=self, message_text=data[0][1] ) if not test: mailbox.store( message_set=message_number, command='+FLAGS', flag_list='\\Deleted' ) mailbox.expunge() mailbox.close() mailbox.logout() class POP3Email(EmailBaseModel): source_type = SOURCE_CHOICE_EMAIL_POP3 timeout = models.PositiveIntegerField( default=DEFAULT_POP3_TIMEOUT, verbose_name=_('Timeout') ) objects = models.Manager() class Meta: verbose_name = _('POP email') verbose_name_plural = _('POP email') def check_source(self, test=False): logger.debug(msg='Starting POP3 email fetch') logger.debug('host: %s', self.host) logger.debug('ssl: %s', self.ssl) if self.ssl: mailbox = poplib.POP3_SSL(host=self.host, post=self.port) else: mailbox = poplib.POP3( host=self.host, post=self.port, timeout=self.timeout ) mailbox.getwelcome() mailbox.user(username=self.username) mailbox.pass_(password=self.password) messages_info = mailbox.list() logger.debug(msg='messages_info:') logger.debug(msg=messages_info) logger.debug('messages count: %s', len(messages_info[1])) for message_info in messages_info[1]: message_number, message_size = message_info.split() logger.debug('message_number: %s', message_number) logger.debug('message_size: %s', message_size) complete_message = '\n'.join(mailbox.retr(message_number)[1]) EmailBaseModel.process_message( source=self, message_text=complete_message ) if not test: mailbox.dele(which=message_number) mailbox.quit()