Files
mayan-edms/apps/sources/models.py
2012-08-09 04:55:14 -04:00

603 lines
24 KiB
Python

from __future__ import absolute_import
from ast import literal_eval
import logging
import poplib
import imaplib
from email.Utils import collapse_rfc2231_value
from email import message_from_string
import os
import datetime
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
import imagescanner
from django.db import models
from django.utils.translation import ugettext_lazy as _
from django.contrib.contenttypes.models import ContentType
from django.contrib.contenttypes import generic
from django.core.exceptions import ValidationError
from django.db import transaction
from django.core.files import File
from converter.api import get_available_transformations_choices
from converter.literals import DIMENSION_SEPARATOR
from documents.models import Document, DocumentType
from documents.events import history_document_created
from metadata.api import save_metadata_list
from acls.utils import apply_default_acls
from .managers import SourceTransformationManager, SourceLogManager
from .literals import (SOURCE_CHOICES, SOURCE_CHOICES_PLURAL,
SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES, SOURCE_CHOICE_WEB_FORM,
SOURCE_CHOICE_STAGING, SOURCE_ICON_DISK, SOURCE_ICON_DRIVE,
SOURCE_ICON_CHOICES, SOURCE_CHOICE_WATCH, SOURCE_UNCOMPRESS_CHOICES,
SOURCE_UNCOMPRESS_CHOICE_Y,
POP3_PORT, POP3_SSL_PORT,
SOURCE_CHOICE_POP3_EMAIL, DEFAULT_POP3_INTERVAL,
IMAP_PORT, IMAP_SSL_PORT,
SOURCE_CHOICE_IMAP_EMAIL, DEFAULT_IMAP_INTERVAL,
IMAP_DEFAULT_MAILBOX,
SOURCE_CHOICE_LOCAL_SCANNER, SOURCE_ICON_IMAGES,
DEFAULT_LOCAL_SCANNER_FILE_FORMAT)
from .compressed_file import CompressedFile, NotACompressedFile
from .conf.settings import POP3_TIMEOUT
#from . import sources_scheduler
logger = logging.getLogger(__name__)
class BaseModel(models.Model):
title = models.CharField(max_length=64, verbose_name=_(u'title'))
enabled = models.BooleanField(default=True, verbose_name=_(u'enabled'))
whitelist = models.TextField(blank=True, verbose_name=_(u'whitelist'), editable=False)
blacklist = models.TextField(blank=True, verbose_name=_(u'blacklist'), editable=False)
#document_type = models.ForeignKey(DocumentType, blank=True, null=True, verbose_name=_(u'document type'), help_text=(u'Optional document type to be applied to documents uploaded from this source.'))
@classmethod
def class_fullname(cls):
return unicode(dict(SOURCE_CHOICES).get(cls.source_type))
@classmethod
def class_fullname_plural(cls):
return unicode(dict(SOURCE_CHOICES_PLURAL).get(cls.source_type))
def __unicode__(self):
return u'%s' % self.title
def fullname(self):
return u' '.join([self.class_fullname(), '"%s"' % self.title])
def internal_name(self):
return u'%s_%d' % (self.source_type, self.pk)
def get_transformation_list(self):
return SourceTransformation.transformations.get_for_object_as_list(self)
def upload_file(self, file_object, filename=None, use_file_name=False, document_type=None, expand=False, metadata_dict_list=None, user=None, document=None, new_version_data=None, command_line=False):
is_compressed = None
if expand:
try:
cf = CompressedFile(file_object)
count = 1
for fp in cf.children():
if command_line:
print 'Uploading file #%d: %s' % (count, fp)
self.upload_single_file(file_object=fp, filename=None, document_type=document_type, metadata_dict_list=metadata_dict_list, user=user)
fp.close()
count += 1
except NotACompressedFile:
is_compressed = False
logging.debug('Exception: NotACompressedFile')
if command_line:
raise
self.upload_single_file(file_object=file_object, filename=filename, document_type=document_type, metadata_dict_list=metadata_dict_list, user=user)
else:
is_compressed = True
else:
self.upload_single_file(file_object, filename, use_file_name, document_type, metadata_dict_list, user, document, new_version_data)
file_object.close()
return {'is_compressed': is_compressed}
@transaction.commit_on_success
def upload_single_file(self, file_object, filename=None, use_file_name=False, document_type=None, metadata_dict_list=None, user=None, document=None, new_version_data=None):
new_document = not document
if not document:
document = Document()
if document_type:
document.document_type = document_type
document.save()
apply_default_acls(document, user)
if user:
document.add_as_recent_document_for_user(user)
history_document_created.commit(source_object=document, data={'user': user})
else:
history_document_created.commit(source_object=document)
else:
if use_file_name:
filename = None
else:
filename = filename if filename else document.latest_version.filename
if not new_version_data:
new_version_data = {}
try:
new_version = document.new_version(file=file_object, user=user, **new_version_data)
except Exception, exc:
logger.error('Unhandled exception: %s' % exc)
# Don't leave the database in a broken state
# Delete invalid documents with no version child
# For databases that doesn't support transactions, delete the document
if document.version_set.count() == 0:
logger.debug('Empty document with no previous versions, deleting.')
document.delete()
# Rollback everything in case the database DOES support
# transactions
transaction.rollback()
# Re-raise the error so that the view can capture
# and display it
raise
if filename:
document.rename(filename)
transformations, errors = self.get_transformation_list()
new_version.apply_default_transformations(transformations)
#TODO: new HISTORY for version updates
if metadata_dict_list and new_document:
# Only do for new documents
save_metadata_list(metadata_dict_list, document, create=True)
class Meta:
ordering = ('title',)
abstract = True
class SourceLog(models.Model):
content_type = models.ForeignKey(ContentType)
object_id = models.PositiveIntegerField()
source = generic.GenericForeignKey('content_type', 'object_id')
creation_datetime = models.DateTimeField(verbose_name=_(u'date time'))
status = models.TextField(verbose_name=_(u'status'))
objects = SourceLogManager()
def save(self, *args, **kwargs):
if not self.pk:
self.creation_datetime = datetime.datetime.now()
return super(SourceLog, self).save(*args, **kwargs)
class Meta:
verbose_name = _(u'source log')
verbose_name_plural = _(u'sources logs')
get_latest_by = 'creation_datetime'
ordering = ('creation_datetime',)
class InteractiveBaseModel(BaseModel):
icon = models.CharField(blank=True, null=True, max_length=24, choices=SOURCE_ICON_CHOICES, verbose_name=_(u'icon'), help_text=_(u'An icon to visually distinguish this source.'))
def save(self, *args, **kwargs):
if not self.icon:
self.icon = self.default_icon
super(BaseModel, self).save(*args, **kwargs)
class Meta(BaseModel.Meta):
abstract = True
class PseudoFile(File):
def __init__(self, file, name):
self.name = name
self.file = file
self.file.seek(0, os.SEEK_END)
self.size = self.file.tell()
self.file.seek(0)
class Attachment(File):
def __init__(self, part, name):
self.name = name
self.file = PseudoFile(StringIO(part.get_payload(decode=True)), name=name)
class IntervalBaseModel(BaseModel):
is_interactive = False
interval = models.PositiveIntegerField(default=DEFAULT_POP3_INTERVAL, verbose_name=_(u'interval'), help_text=_(u'Interval in seconds between document downloads from this source.'))
document_type = models.ForeignKey(DocumentType, null=True, blank=True, verbose_name=_(u'document type'), help_text=_(u'Assign a document type to documents uploaded from this source.'))
uncompress = models.CharField(max_length=1, choices=SOURCE_UNCOMPRESS_CHOICES, verbose_name=_(u'uncompress'), help_text=_(u'Whether to expand or not, compressed archives.'))
class Meta(BaseModel.Meta):
verbose_name = _(u'interval source')
verbose_name_plural = _(u'interval sources')
abstract = True
class EmailBaseModel(IntervalBaseModel):
host = models.CharField(max_length=64, verbose_name=_(u'host'))
ssl = models.BooleanField(verbose_name=_(u'SSL'))
port = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'port'), help_text=_(u'Override the defaults values of %(normal_port)d and %(ssl_port)d for SSL, can be left blank otherwise.') % {'normal_port': POP3_PORT, 'ssl_port': POP3_SSL_PORT})
username = models.CharField(max_length=64, verbose_name=_(u'username'))
password = models.CharField(max_length=64, verbose_name=_(u'password'))
# From: http://bookmarks.honewatson.com/2009/08/11/python-gmail-imaplib-search-subject-get-attachments/
@staticmethod
def process_message(source, message):
email = message_from_string(message)
counter = 1
for part in email.walk():
disposition = part.get('Content-Disposition', 'none')
logger.debug('Disposition: %s' % disposition)
if disposition.startswith('attachment'):
raw_filename = part.get_filename()
if raw_filename:
filename = collapse_rfc2231_value(raw_filename)
else:
filename = _(u'attachment-%i') % counter
counter += 1
logger.debug('filename: %s' % filename)
document_file = Attachment(part, name=filename)
source.upload_file(document_file, expand=(source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y), document_type=source.document_type)
class Meta(IntervalBaseModel.Meta):
verbose_name = _(u'email source')
verbose_name_plural = _(u'email sources')
abstract = True
class POP3Email(EmailBaseModel):
source_type = SOURCE_CHOICE_POP3_EMAIL
def fetch_mail(self):
try:
last_check = SourceLog.objects.get_latest_for(self)
except SourceLog.DoesNotExist:
# Trigger email fetch when there are no previous logs
initial_trigger = True
difference = datetime.timedelta(seconds=0)
else:
difference = datetime.datetime.now() - last_check
initial_trigger = False
if difference >= datetime.timedelta(seconds=self.interval) or initial_trigger:
try:
logger.debug('Starting POP3 email fetch')
logger.debug('host: %s' % self.host)
logger.debug('ssl: %s' % self.ssl)
if self.ssl:
port = self.port or POP3_SSL_PORT
logger.debug('port: %d' % port)
mailbox = poplib.POP3_SSL(self.host, int(port))
else:
port = self.port or POP3_PORT
logger.debug('port: %d' % port)
mailbox = poplib.POP3(self.host, int(port), timeout=POP3_TIMEOUT)
mailbox.getwelcome()
mailbox.user(self.username)
mailbox.pass_(self.password)
messages_info = mailbox.list()
logger.debug('messages_info:')
logger.debug(messages_info)
logger.debug('messages count: %s' % len(messages_info[1]))
for message_info in messages_info[1]:
message_number, message_size = message_info.split()
logger.debug('message_number: %s' % message_number)
logger.debug('message_size: %s' % message_size)
complete_message = '\n'.join(mailbox.retr(message_number)[1])
EmailBaseModel.process_message(source=self, message=complete_message)
mailbox.dele(message_number)
mailbox.quit()
SourceLog.objects.save_status(source=self, status='Successful connection.')
except Exception, exc:
logger.error('Unhandled exception: %s' % exc)
SourceLog.objects.save_status(source=self, status='Error: %s' % exc)
class Meta(EmailBaseModel.Meta):
verbose_name = _(u'POP email')
verbose_name_plural = _(u'POP email')
class IMAPEmail(EmailBaseModel):
source_type = SOURCE_CHOICE_IMAP_EMAIL
mailbox = models.CharField(max_length=64, blank=True, verbose_name=_(u'mailbox'), help_text=_(u'Mail from which to check for messages with attached documents. If none is specified, the default mailbox is %s') % IMAP_DEFAULT_MAILBOX)
# http://www.doughellmann.com/PyMOTW/imaplib/
def fetch_mail(self):
try:
last_check = SourceLog.objects.get_latest_for(self)
except SourceLog.DoesNotExist:
# Trigger email fetch when there are no previous logs
initial_trigger = True
difference = datetime.timedelta(seconds=0)
else:
difference = datetime.datetime.now() - last_check
initial_trigger = False
if difference >= datetime.timedelta(seconds=self.interval) or initial_trigger:
try:
logger.debug('Starting IMAP email fetch')
logger.debug('host: %s' % self.host)
logger.debug('ssl: %s' % self.ssl)
if self.ssl:
port = self.port or IMAP_SSL_PORT
logger.debug('port: %d' % port)
mailbox = imaplib.IMAP4_SSL(self.host, int(port))
else:
port = self.port or IMAP_PORT
logger.debug('port: %d' % port)
mailbox = imaplib.IMAP4(self.host, int(port))
mailbox.login(self.username, self.password)
mailbox.select(self.mailbox or IMAP_DEFAULT_MAILBOX)
status, data = mailbox.search(None, 'NOT', 'DELETED')
if data:
messages_info = data[0].split()
logger.debug('messages count: %s' % len(messages_info))
for message_number in messages_info:
logger.debug('message_number: %s' % message_number)
status, data = mailbox.fetch(message_number, '(RFC822)')
EmailBaseModel.process_message(source=self, message=data[0][1])
mailbox.store(message_number, '+FLAGS', '\\Deleted')
mailbox.expunge()
mailbox.close()
mailbox.logout()
SourceLog.objects.save_status(source=self, status='Successful connection.')
except Exception, exc:
logger.error('Unhandled exception: %s' % exc)
SourceLog.objects.save_status(source=self, status='Error: %s' % exc)
class Meta(EmailBaseModel.Meta):
verbose_name = _(u'IMAP email')
verbose_name_plural = _(u'IMAP email')
class LocalScanner(InteractiveBaseModel):
# scanner device string to scanner instance cache dict
_scanner_cache = {}
class NoSuchScanner(Exception):
pass
is_interactive = True
source_type = SOURCE_CHOICE_LOCAL_SCANNER
default_icon = SOURCE_ICON_IMAGES
scanner_device = models.CharField(max_length=255, verbose_name=_(u'scanner device'))
scanner_description = models.CharField(max_length=255, verbose_name=_(u'scanner description'))
@classmethod
def get_scanners(cls):
iscanner = imagescanner.ImageScanner(remote_search=False)
scanners = iscanner.list_scanners()
imagescanner.settings.LOGGING_LEVEL = logging.FATAL
imagescanner.settings.ENABLE_NET_BACKEND = False
imagescanner.settings.ENABLE_TEST_BACKEND = False
for scanner in scanners:
LocalScanner._scanner_cache[unicode(scanner._device)] = {
'scanner': scanner,
'description': u'%s: %s - %s - %s <%s>' % (scanner.id, scanner.manufacturer, scanner.name, scanner.description, scanner._device)
}
return scanners
@classmethod
def get_scanner(cls, device):
try:
return cls._scanner_cache[device]
except KeyError:
raise cls.NoSuchScanner
@classmethod
def get_scanner_choices(cls, description_only=False):
if description_only:
template_func = lambda scanner: (scanner['description'])
else:
template_func = lambda scanner: (scanner['scanner']._device, scanner['description'])
return [template_func(scanner) for scanner in LocalScanner._scanner_cache.values()]
def scanner(self, _fail=False):
try:
return LocalScanner._scanner_cache[self.scanner_device]['scanner']
except KeyError:
if _fail == False:
# Refresh the cache before trying one last time
LocalScanner.get_scanners()
return self.scanner(_fail=True)
else:
raise self.__class__.NoSuchScanner
def scan(self, as_image=False):
image = self.scanner().scan()
if as_image:
return image
else:
buf = StringIO()
image.save(buf, DEFAULT_LOCAL_SCANNER_FILE_FORMAT)
return PseudoFile(buf, name=unicode(datetime.datetime.now()).replace(u'.', u'_').replace(u' ', u'_'))
# This code make new_version upload fail, use it for debugging
#buf = StringIO()
#buf.write(image.tostring())
#buf.seek(0)
#return buf
class Meta(InteractiveBaseModel.Meta):
verbose_name = _(u'local scanner')
verbose_name_plural = _(u'local scanners')
class StagingFolder(InteractiveBaseModel):
is_interactive = True
source_type = SOURCE_CHOICE_STAGING
default_icon = SOURCE_ICON_DRIVE
folder_path = models.CharField(max_length=255, verbose_name=_(u'folder path'), help_text=_(u'Server side filesystem path.'))
preview_width = models.IntegerField(verbose_name=_(u'preview width'), help_text=_(u'Width value to be passed to the converter backend.'))
preview_height = models.IntegerField(blank=True, null=True, verbose_name=_(u'preview height'), help_text=_(u'Height value to be passed to the converter backend.'))
uncompress = models.CharField(max_length=1, choices=SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES, verbose_name=_(u'uncompress'), help_text=_(u'Whether to expand or not compressed archives.'))
delete_after_upload = models.BooleanField(default=True, verbose_name=_(u'delete after upload'), help_text=_(u'Delete the file after is has been successfully uploaded.'))
def get_preview_size(self):
dimensions = []
dimensions.append(unicode(self.preview_width))
if self.preview_height:
dimensions.append(unicode(self.preview_height))
return DIMENSION_SEPARATOR.join(dimensions)
class Meta(InteractiveBaseModel.Meta):
verbose_name = _(u'staging folder')
verbose_name_plural = _(u'staging folders')
"""
class SourceMetadata(models.Model):
content_type = models.ForeignKey(ContentType)
object_id = models.PositiveIntegerField()
content_object = generic.GenericForeignKey('content_type', 'object_id')
metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type'))
value = models.CharField(max_length=256, blank=True, verbose_name=_(u'value'))
def __unicode__(self):
return self.source
class Meta:
verbose_name = _(u'source metadata')
verbose_name_plural = _(u'sources metadata')
"""
class WebForm(InteractiveBaseModel):
is_interactive = True
source_type = SOURCE_CHOICE_WEB_FORM
default_icon = SOURCE_ICON_DISK
uncompress = models.CharField(max_length=1, choices=SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES, verbose_name=_(u'uncompress'), help_text=_(u'Whether to expand or not compressed archives.'))
#Default path
class Meta(InteractiveBaseModel.Meta):
verbose_name = _(u'web form')
verbose_name_plural = _(u'web forms')
class WatchFolder(BaseModel):
is_interactive = False
source_type = SOURCE_CHOICE_WATCH
folder_path = models.CharField(max_length=255, verbose_name=_(u'folder path'), help_text=_(u'Server side filesystem path.'))
uncompress = models.CharField(max_length=1, choices=SOURCE_UNCOMPRESS_CHOICES, verbose_name=_(u'uncompress'), help_text=_(u'Whether to expand or not compressed archives.'))
delete_after_upload = models.BooleanField(default=True, verbose_name=_(u'delete after upload'), help_text=_(u'Delete the file after is has been successfully uploaded.'))
interval = models.PositiveIntegerField(verbose_name=_(u'interval'), help_text=_(u'Inverval in seconds where the watch folder path is checked for new documents.'))
def save(self, *args, **kwargs):
#if self.pk:
# remove_job(self.internal_name())
super(WatchFolder, self).save(*args, **kwargs)
self.schedule()
def schedule(self):
pass
#if self.enabled:
# sources_scheduler.add_interval_job(self.internal_name(),
# title=self.fullname(), function=self.execute,
# seconds=self.interval, kwargs={'source_id': self.pk}
# )
def execute(self, source_id):
source = WatchFolder.objects.get(pk=source_id)
if source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y:
expand = True
else:
expand = False
print 'execute: %s' % self.internal_name()
class Meta(BaseModel.Meta):
verbose_name = _(u'watch folder')
verbose_name_plural = _(u'watch folders')
class ArgumentsValidator(object):
message = _(u'Enter a valid value.')
code = 'invalid'
def __init__(self, message=None, code=None):
if message is not None:
self.message = message
if code is not None:
self.code = code
def __call__(self, value):
"""
Validates that the input evaluates correctly.
"""
value = value.strip()
try:
literal_eval(value)
except (ValueError, SyntaxError):
raise ValidationError(self.message, code=self.code)
class SourceTransformation(models.Model):
"""
Model that stores the transformation and transformation arguments
for a given document source
"""
content_type = models.ForeignKey(ContentType)
object_id = models.PositiveIntegerField()
content_object = generic.GenericForeignKey('content_type', 'object_id')
order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True)
transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: %s') % u'{\'degrees\':90}', validators=[ArgumentsValidator()])
objects = models.Manager()
transformations = SourceTransformationManager()
def __unicode__(self):
#return u'"%s" for %s' % (self.get_transformation_display(), unicode(self.content_object))
return self.get_transformation_display()
class Meta:
ordering = ('order',)
verbose_name = _(u'document source transformation')
verbose_name_plural = _(u'document source transformations')
class OutOfProcess(BaseModel):
is_interactive = False
class Meta(BaseModel.Meta):
verbose_name = _(u'out of process')
verbose_name_plural = _(u'out of process')