Files
mayan-edms/mayan/apps/documents/models.py

687 lines
24 KiB
Python

from __future__ import absolute_import
from ast import literal_eval
import base64
import hashlib
import logging
import os
import tempfile
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from django.db import models
from django.contrib.auth.models import User
from django.core.exceptions import ValidationError
from django.utils.translation import ugettext
from django.utils.translation import ugettext_lazy as _
from acls.utils import apply_default_acls
from common.settings import TEMPORARY_DIRECTORY
from converter.api import (convert, get_page_count,
get_available_transformations_choices)
from converter.exceptions import UnknownFileFormat
from converter.literals import (DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION,
DEFAULT_PAGE_NUMBER)
from history.api import create_history
from mimetype.api import get_mimetype
from .events import HISTORY_DOCUMENT_CREATED
from .exceptions import NewDocumentVersionNotAllowed
from .literals import (RELEASE_LEVEL_CHOICES, RELEASE_LEVEL_FINAL,
VERSION_UPDATE_MAJOR, VERSION_UPDATE_MICRO,
VERSION_UPDATE_MINOR)
from .managers import (DocumentPageTransformationManager, DocumentTypeManager,
RecentDocumentManager)
from .runtime import storage_backend
from .settings import (CACHE_PATH, CHECKSUM_FUNCTION, DISPLAY_SIZE,
UUID_FUNCTION, ZOOM_MAX_LEVEL, ZOOM_MIN_LEVEL)
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() # document image cache name hash function
logger = logging.getLogger(__name__)
def get_filename_from_uuid(instance, filename):
"""
Store the orignal filename of the uploaded file and replace it with
a UUID
"""
instance.filename = filename
return UUID_FUNCTION()
class DocumentType(models.Model):
"""
Define document types or classes to which a specific set of
properties can be attached
"""
name = models.CharField(max_length=32, verbose_name=_(u'Name'), unique=True)
objects = DocumentTypeManager()
def __unicode__(self):
return self.name
def natural_key(self):
return (self.name,)
class Meta:
verbose_name = _(u'Document type')
verbose_name_plural = _(u'Documents types')
ordering = ['name']
class Document(models.Model):
"""
Defines a single document with it's fields and properties
"""
_latest_versions = {}
uuid = models.CharField(max_length=48, blank=True, editable=False)
document_type = models.ForeignKey(DocumentType, verbose_name=_(u'Document type'), related_name='documents', null=True, blank=True)
description = models.TextField(blank=True, null=True, verbose_name=_(u'Description'))
date_added = models.DateTimeField(verbose_name=_(u'Added'), auto_now_add=True)
@staticmethod
def clear_image_cache():
for the_file in os.listdir(CACHE_PATH):
file_path = os.path.join(CACHE_PATH, the_file)
if os.path.isfile(file_path):
os.unlink(file_path)
class Meta:
verbose_name = _(u'Document')
verbose_name_plural = _(u'Documents')
ordering = ['-date_added']
def __unicode__(self):
try:
return self.latest_version.filename
except AttributeError:
# Document has no version yet, let's return a place holder text
return ugettext(u'Uninitialized document')
@models.permalink
def get_absolute_url(self):
return ('documents:document_view_simple', [self.pk])
def save(self, *args, **kwargs):
user = kwargs.pop('user', None)
new_document = not self.pk
if new_document:
self.uuid = UUID_FUNCTION()
super(Document, self).save(*args, **kwargs)
if new_document:
apply_default_acls(self, user)
if user:
self.add_as_recent_document_for_user(user)
create_history(HISTORY_DOCUMENT_CREATED, self, {'user': user})
else:
create_history(HISTORY_DOCUMENT_CREATED, self)
def get_cached_image_name(self, page, version):
document_version = DocumentVersion.objects.get(pk=version)
document_page = document_version.pages.get(page_number=page)
transformations, warnings = document_page.get_transformation_list()
hash_value = HASH_FUNCTION(u''.join([document_version.checksum, unicode(page), unicode(transformations)]))
return os.path.join(CACHE_PATH, hash_value), transformations
def get_image_cache_name(self, page, version):
cache_file_path, transformations = self.get_cached_image_name(page, version)
if os.path.exists(cache_file_path):
return cache_file_path
else:
document_version = DocumentVersion.objects.get(pk=version)
document_file = document_version.document.document_save_to_temp_dir(document_version.checksum)
return convert(input_filepath=document_file, output_filepath=cache_file_path, page=page, transformations=transformations, mimetype=self.file_mimetype)
def get_valid_image(self, size=DISPLAY_SIZE, page=DEFAULT_PAGE_NUMBER, zoom=DEFAULT_ZOOM_LEVEL, rotation=DEFAULT_ROTATION, version=None):
if not version:
version = self.latest_version.pk
image_cache_name = self.get_image_cache_name(page=page, version=version)
logger.debug('image_cache_name: %s' % image_cache_name)
return convert(input_filepath=image_cache_name, cleanup_files=False, size=size, zoom=zoom, rotation=rotation)
def get_image(self, size=DISPLAY_SIZE, page=DEFAULT_PAGE_NUMBER, zoom=DEFAULT_ZOOM_LEVEL, rotation=DEFAULT_ROTATION, as_base64=False, version=None):
if zoom < ZOOM_MIN_LEVEL:
zoom = ZOOM_MIN_LEVEL
if zoom > ZOOM_MAX_LEVEL:
zoom = ZOOM_MAX_LEVEL
rotation = rotation % 360
file_path = self.get_valid_image(size=size, page=page, zoom=zoom, rotation=rotation, version=version)
logger.debug('file_path: %s' % file_path)
if as_base64:
mimetype = get_mimetype(open(file_path, 'r'), file_path, mimetype_only=True)[0]
image = open(file_path, 'r')
base64_data = base64.b64encode(image.read())
image.close()
return u'data:%s;base64,%s' % (mimetype, base64_data)
else:
return file_path
def invalidate_cached_image(self, page):
try:
os.unlink(self.get_cached_image_name(page, self.latest_version.pk)[0])
except OSError:
pass
def add_as_recent_document_for_user(self, user):
RecentDocument.objects.add_document_for_user(user, self)
def delete(self, *args, **kwargs):
for version in self.versions.all():
version.delete()
return super(Document, self).delete(*args, **kwargs)
@property
def size(self):
return self.latest_version.size
def new_version(self, file, user=None, comment=None, version_update=None, release_level=None, serial=None):
logger.debug('creating new document version')
# TODO: move this restriction to a signal processor of the checkouts app
if not self.is_new_versions_allowed(user=user):
raise NewDocumentVersionNotAllowed
if version_update:
new_version_dict = self.latest_version.get_new_version_dict(version_update)
logger.debug('new_version_dict: %s' % new_version_dict)
new_version = DocumentVersion(
document=self,
file=file,
major=new_version_dict.get('major'),
minor=new_version_dict.get('minor'),
micro=new_version_dict.get('micro'),
release_level=release_level,
serial=serial,
comment=comment,
)
new_version.save()
else:
new_version_dict = {}
new_version = DocumentVersion(
document=self,
file=file,
)
new_version.save()
logger.debug('new_version saved')
self.__class__._latest_versions[self.pk] = new_version
return new_version
# Proxy methods
def open(self, *args, **kwargs):
"""
Return a file descriptor to a document's file irrespective of
the storage backend
"""
return self.latest_version.open(*args, **kwargs)
def save_to_file(self, *args, **kwargs):
return self.latest_version.save_to_file(*args, **kwargs)
def exists(self):
"""
Returns a boolean value that indicates if the document's
latest version file exists in storage
"""
return self.latest_version.exists()
# Compatibility methods
@property
def file(self):
return self.latest_version.file
@property
def file_mimetype(self):
return self.latest_version.mimetype
@property
def file_mime_encoding(self):
return self.latest_version.encoding
@property
def file_filename(self):
return self.latest_version.filename
@property
def date_updated(self):
return self.latest_version.timestamp
@property
def checksum(self):
return self.latest_version.checksum
@property
def signature_state(self):
return self.latest_version.signature_state
@property
def pages(self):
try:
return self.latest_version.pages
except AttributeError:
# Document has no version yet
return 0
@property
def page_count(self):
return self.latest_version.page_count
@property
def latest_version(self):
if self.pk not in self.__class__._latest_versions:
self.__class__._latest_versions[self.pk] = self.versions.order_by('timestamp').last()
return self.__class__._latest_versions[self.pk]
def rename(self, new_name):
version = self.latest_version
return version.rename(new_name)
@property
def filename(self):
return self.latest_version.filename
@filename.setter
def filename(self, value):
version = self.latest_version
version.filename = value
return version.save()
def document_save_to_temp_dir(self, filename, buffer_size=1024 * 1024):
temporary_path = os.path.join(TEMPORARY_DIRECTORY, filename)
return self.save_to_file(temporary_path, buffer_size)
class DocumentVersion(models.Model):
"""
Model that describes a document version and its properties
"""
_pre_open_hooks = {}
_post_save_hooks = {}
_page_counts = {}
@staticmethod
def get_version_update_choices(document_version):
return (
(VERSION_UPDATE_MAJOR, _(u'Major %(major)i.%(minor)i, (new release)') % document_version.get_new_version_dict(VERSION_UPDATE_MAJOR)),
(VERSION_UPDATE_MINOR, _(u'Minor %(major)i.%(minor)i, (some updates)') % document_version.get_new_version_dict(VERSION_UPDATE_MINOR)),
(VERSION_UPDATE_MICRO, _(u'Micro %(major)i.%(minor)i.%(micro)i, (fixes)') % document_version.get_new_version_dict(VERSION_UPDATE_MICRO))
)
@classmethod
def register_pre_open_hook(cls, order, func):
cls._pre_open_hooks[order] = func
@classmethod
def register_post_save_hook(cls, order, func):
cls._post_save_hooks[order] = func
document = models.ForeignKey(Document, verbose_name=_(u'Document'), related_name='versions')
major = models.PositiveIntegerField(verbose_name=_(u'Mayor'), default=1)
minor = models.PositiveIntegerField(verbose_name=_(u'Minor'), default=0)
micro = models.PositiveIntegerField(verbose_name=_(u'Micro'), default=0)
release_level = models.PositiveIntegerField(choices=RELEASE_LEVEL_CHOICES, default=RELEASE_LEVEL_FINAL, verbose_name=_(u'Release level'))
serial = models.PositiveIntegerField(verbose_name=_(u'Serial'), default=0)
timestamp = models.DateTimeField(verbose_name=_(u'Timestamp'), auto_now_add=True)
comment = models.TextField(blank=True, verbose_name=_(u'Comment'))
# File related fields
file = models.FileField(upload_to=get_filename_from_uuid, storage=storage_backend, verbose_name=_(u'File'))
mimetype = models.CharField(max_length=255, null=True, blank=True, editable=False)
encoding = models.CharField(max_length=64, null=True, blank=True, editable=False)
filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True)
checksum = models.TextField(blank=True, null=True, verbose_name=_(u'Checksum'), editable=False)
class Meta:
unique_together = ('document', 'major', 'minor', 'micro', 'release_level', 'serial')
verbose_name = _(u'Document version')
verbose_name_plural = _(u'Document version')
def __unicode__(self):
return self.get_formated_version()
def get_new_version_dict(self, version_update_type):
logger.debug('version_update_type: %s' % version_update_type)
if version_update_type == VERSION_UPDATE_MAJOR:
return {
'major': self.major + 1,
'minor': 0,
'micro': 0,
}
elif version_update_type == VERSION_UPDATE_MINOR:
return {
'major': self.major,
'minor': self.minor + 1,
'micro': 0,
}
elif version_update_type == VERSION_UPDATE_MICRO:
return {
'major': self.major,
'minor': self.minor,
'micro': self.micro + 1,
}
def get_formated_version(self):
"""
Return the formatted version information
"""
vers = [u'%i.%i' % (self.major, self.minor), ]
if self.micro:
vers.append(u'.%i' % self.micro)
if self.release_level != RELEASE_LEVEL_FINAL:
vers.append(u'%s%i' % (self.get_release_level_display(), self.serial))
return u''.join(vers)
def save(self, *args, **kwargs):
"""
Overloaded save method that updates the document version's checksum,
mimetype, page count and transformation when created
"""
new_document = not self.pk
# Only do this for new documents
transformations = kwargs.pop('transformations', None)
super(DocumentVersion, self).save(*args, **kwargs)
for key in sorted(DocumentVersion._post_save_hooks):
DocumentVersion._post_save_hooks[key](self)
if new_document:
# Only do this for new documents
self.update_checksum(save=False)
self.update_mimetype(save=False)
self.save()
self.update_page_count(save=False)
if transformations:
self.apply_default_transformations(transformations)
def update_checksum(self, save=True):
"""
Open a document version's file and update the checksum field using the
user provided checksum function
"""
if self.exists():
source = self.open()
self.checksum = unicode(CHECKSUM_FUNCTION(source.read()))
source.close()
if save:
self.save()
def update_page_count(self, save=True):
handle, filepath = tempfile.mkstemp()
# Just need the filepath, close the file description
os.close(handle)
self.save_to_file(filepath)
try:
detected_pages = get_page_count(filepath)
except UnknownFileFormat:
# If converter backend doesn't understand the format,
# use 1 as the total page count
detected_pages = 1
self.description = ugettext(u'This document\'s file format is not known, the page count has therefore defaulted to 1.')
self.save()
try:
os.remove(filepath)
except OSError:
pass
current_pages = self.pages.order_by('page_number',)
if current_pages.count() > detected_pages:
for page in current_pages[detected_pages:]:
page.delete()
for page_number in range(detected_pages):
DocumentPage.objects.get_or_create(
document_version=self, page_number=page_number + 1)
if save:
self.save()
self.__class__._page_counts[self.pk] = self.pages.count()
return detected_pages
def apply_default_transformations(self, transformations):
# Only apply default transformations on new documents
if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.pages.all()]) == 0:
for transformation in transformations:
for document_page in self.pages.all():
page_transformation = DocumentPageTransformation(
document_page=document_page,
order=0,
transformation=transformation.get('transformation'),
arguments=transformation.get('arguments')
)
page_transformation.save()
def revert(self):
"""
Delete the subsequent versions after this one
"""
for version in self.document.versions.filter(timestamp__gt=self.timestamp):
version.delete()
def update_mimetype(self, save=True):
"""
Read a document verions's file and determine the mimetype by calling the
get_mimetype wrapper
"""
if self.exists():
try:
self.mimetype, self.encoding = get_mimetype(self.open(), self.filename)
except:
self.mimetype = u''
self.encoding = u''
finally:
if save:
self.save()
def delete(self, *args, **kwargs):
self.file.storage.delete(self.file.path)
# Invalidate page count cache
self.__class__._page_counts[self.pk] = None
# Invalidate parent document's latest version cache
Document._latest_versions[self.document.pk] = None
return super(DocumentVersion, self).delete(*args, **kwargs)
def exists(self):
"""
Returns a boolean value that indicates if the document's file
exists in storage
"""
return self.file.storage.exists(self.file.path)
def open(self, raw=False):
"""
Return a file descriptor to a document version's file irrespective of
the storage backend
"""
if raw:
return self.file.storage.open(self.file.path)
else:
result = self.file.storage.open(self.file.path)
for key in sorted(DocumentVersion._pre_open_hooks):
result = DocumentVersion._pre_open_hooks[key](result, self)
return result
def save_to_file(self, filepath, buffer_size=1024 * 1024):
"""
Save a copy of the document from the document storage backend
to the local filesystem
"""
input_descriptor = self.open()
output_descriptor = open(filepath, 'wb')
while True:
copy_buffer = input_descriptor.read(buffer_size)
if copy_buffer:
output_descriptor.write(copy_buffer)
else:
break
output_descriptor.close()
input_descriptor.close()
return filepath
@property
def size(self):
if self.exists():
return self.file.storage.size(self.file.path)
else:
return None
def rename(self, new_name):
new_filename, new_extension = os.path.splitext(new_name)
name, extension = os.path.splitext(self.filename)
# Preserve existing extension if new name doesn't has one
if new_extension:
extension = new_extension
self.filename = u''.join([new_filename, extension])
self.save()
@property
def page_count(self):
if self.pk not in self.__class__._page_counts:
self.__class__._page_counts[self.pk] = self.pages.count()
return self.__class__._page_counts[self.pk]
class DocumentTypeFilename(models.Model):
"""
List of filenames available to a specific document type for the
quick rename functionality
"""
document_type = models.ForeignKey(DocumentType, verbose_name=_(u'Document type'))
filename = models.CharField(max_length=128, verbose_name=_(u'Filename'), db_index=True)
enabled = models.BooleanField(default=True, verbose_name=_(u'Enabled'))
def __unicode__(self):
return self.filename
class Meta:
ordering = ['filename']
verbose_name = _(u'Document type quick rename filename')
verbose_name_plural = _(u'Document types quick rename filenames')
class DocumentPage(models.Model):
"""
Model that describes a document version page including it's content
"""
document_version = models.ForeignKey(DocumentVersion, verbose_name=_(u'Document version'), related_name='pages')
content = models.TextField(blank=True, null=True, verbose_name=_(u'Content'))
page_label = models.CharField(max_length=40, blank=True, null=True, verbose_name=_(u'Page label'))
page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_(u'Page number'), db_index=True)
def __unicode__(self):
return _(u'Page %(page_num)d out of %(total_pages)d of %(document)s') % {
'document': unicode(self.document),
'page_num': self.page_number,
'total_pages': self.document_version.pages.count()
}
class Meta:
ordering = ['page_number']
verbose_name = _(u'Document page')
verbose_name_plural = _(u'Document pages')
@models.permalink
def get_absolute_url(self):
return ('documents:document_page_view', [self.pk])
@property
def siblings(self):
return DocumentPage.objects.filter(document_version=self.document_version)
# Compatibility methods
@property
def document(self):
return self.document_version.document
class ArgumentsValidator(object):
message = _(u'Enter a valid value.')
code = 'invalid'
def __init__(self, message=None, code=None):
if message is not None:
self.message = message
if code is not None:
self.code = code
def __call__(self, value):
"""
Validates that the input evaluates correctly.
"""
value = value.strip()
try:
literal_eval(value)
except (ValueError, SyntaxError):
raise ValidationError(self.message, code=self.code)
class DocumentPageTransformation(models.Model):
"""
Model that stores the transformation and transformation arguments
for a given document page
"""
document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'Document page'))
order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'Order'), db_index=True)
transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'Transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'Arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: {\'degrees\':90}'), validators=[ArgumentsValidator()])
objects = DocumentPageTransformationManager()
def __unicode__(self):
return self.get_transformation_display()
class Meta:
ordering = ('order',)
verbose_name = _(u'Document page transformation')
verbose_name_plural = _(u'Document page transformations')
class RecentDocument(models.Model):
"""
Keeps a list of the n most recent accessed or created document for
a given user
"""
user = models.ForeignKey(User, verbose_name=_(u'User'), editable=False)
document = models.ForeignKey(Document, verbose_name=_(u'Document'), editable=False)
datetime_accessed = models.DateTimeField(verbose_name=_(u'Accessed'), auto_now=True, db_index=True)
objects = RecentDocumentManager()
def __unicode__(self):
return unicode(self.document)
class Meta:
ordering = ('-datetime_accessed',)
verbose_name = _(u'Recent document')
verbose_name_plural = _(u'Recent documents')
# Quick hack to break the DocumentPage and DocumentPageTransformation circular dependency
# Can be remove once the transformations are moved to the converter app
DocumentPage.add_to_class('get_transformation_list', lambda document_page: DocumentPageTransformation.objects.get_for_document_page_as_list(document_page))