Initial commit of new document upload workflow which allows for document promises to be returned after POST request, gh-issue #194. Add new signal when a document is uploaded and finally ready, gh-issue #193 and gh-issue #213.

This commit is contained in:
Roberto Rosario
2015-07-05 03:33:27 -04:00
parent 8f30932b6f
commit 8cc05bbefb
14 changed files with 204 additions and 110 deletions

View File

@@ -8,12 +8,13 @@ from common import (
menu_setup, menu_tools menu_setup, menu_tools
) )
from documents.models import Document from documents.models import Document
from documents.signals import post_document_created
from metadata.models import DocumentMetadata from metadata.models import DocumentMetadata
from rest_api.classes import APIEndPoint from rest_api.classes import APIEndPoint
from .handlers import ( from .handlers import (
document_index_delete, document_metadata_index_update, document_created_index_update, document_index_delete,
document_metadata_index_post_delete document_metadata_index_update, document_metadata_index_post_delete
) )
from .links import ( from .links import (
link_document_index_list, link_index_main_menu, link_index_setup, link_document_index_list, link_index_main_menu, link_index_setup,
@@ -45,6 +46,7 @@ class DocumentIndexingApp(MayanAppConfig):
menu_setup.bind_links(links=[link_index_setup]) menu_setup.bind_links(links=[link_index_setup])
menu_tools.bind_links(links=[link_rebuild_index_instances]) menu_tools.bind_links(links=[link_rebuild_index_instances])
post_document_created.connect(document_created_index_update, dispatch_uid='document_created_index_update', sender=Document)
post_save.connect(document_metadata_index_update, dispatch_uid='document_metadata_index_update', sender=DocumentMetadata) post_save.connect(document_metadata_index_update, dispatch_uid='document_metadata_index_update', sender=DocumentMetadata)
post_delete.connect(document_index_delete, dispatch_uid='document_index_delete', sender=Document) post_delete.connect(document_index_delete, dispatch_uid='document_index_delete', sender=Document)
post_delete.connect(document_metadata_index_post_delete, dispatch_uid='document_metadata_index_post_delete', sender=DocumentMetadata) post_delete.connect(document_metadata_index_post_delete, dispatch_uid='document_metadata_index_post_delete', sender=DocumentMetadata)

View File

@@ -3,6 +3,10 @@ from __future__ import unicode_literals
from .tasks import task_delete_empty_index_nodes, task_index_document from .tasks import task_delete_empty_index_nodes, task_index_document
def document_created_index_update(sender, **kwargs):
task_index_document.apply_async(kwargs=dict(document_id=kwargs['instance'].pk), queue='indexing')
def document_index_delete(sender, **kwargs): def document_index_delete(sender, **kwargs):
task_delete_empty_index_nodes.apply_async(queue='indexing') task_delete_empty_index_nodes.apply_async(queue='indexing')

View File

@@ -38,7 +38,7 @@ from .serializers import (
from .settings import ( from .settings import (
setting_display_size, setting_zoom_max_level, setting_zoom_min_level setting_display_size, setting_zoom_max_level, setting_zoom_min_level
) )
from .tasks import task_get_document_page_image, task_new_document from .tasks import task_get_document_page_image
class APIDocumentListView(generics.ListAPIView): class APIDocumentListView(generics.ListAPIView):

View File

@@ -4,8 +4,6 @@ import logging
from django.db import models, transaction from django.db import models, transaction
from common.compressed_files import CompressedFile, NotACompressedFile
from .settings import setting_recent_count, setting_language from .settings import setting_recent_count, setting_language
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -44,39 +42,6 @@ class DocumentManager(models.Manager):
for document in self.model.objects.all(): for document in self.model.objects.all():
document.invalidate_cache() document.invalidate_cache()
@transaction.atomic
def new_document(self, document_type, file_object, label=None, command_line=False, description=None, expand=False, language=None, user=None):
versions_created = []
if expand:
try:
compressed_file = CompressedFile(file_object)
count = 1
for compressed_file_child in compressed_file.children():
if command_line:
print 'Uploading file #%d: %s' % (count, compressed_file_child)
versions_created.append(self.upload_single_document(document_type=document_type, file_object=compressed_file_child, description=description, label=unicode(compressed_file_child), language=language or setting_language.value, user=user))
compressed_file_child.close()
count += 1
except NotACompressedFile:
logging.debug('Exception: NotACompressedFile')
if command_line:
raise
versions_created.append(self.upload_single_document(document_type=document_type, file_object=file_object, description=description, label=label, language=language or setting_language.value, user=user))
else:
versions_created.append(self.upload_single_document(document_type=document_type, file_object=file_object, description=description, label=label, language=language or setting_language.value, user=user))
return versions_created
@transaction.atomic
def upload_single_document(self, document_type, file_object, label=None, description=None, language=None, user=None):
document = self.model(description=description, document_type=document_type, language=language, label=label or unicode(file_object))
document.save(user=user)
version = document.new_version(file_object=file_object, user=user)
document.set_document_type(document_type, force=True)
return version
class PassthroughManager(models.Manager): class PassthroughManager(models.Manager):
pass pass

View File

@@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
class Migration(migrations.Migration):
dependencies = [
('documents', '0011_auto_20150704_0508'),
]
operations = [
migrations.AlterField(
model_name='document',
name='deleted_date_time',
field=models.DateTimeField(null=True, verbose_name='Date and time trashed', blank=True),
preserve_default=True,
),
migrations.AlterField(
model_name='documenttype',
name='delete_time_period',
field=models.PositiveIntegerField(default=30, help_text='Amount of time after which documents of this type in the trash will be deleted.', verbose_name='Delete time period'),
preserve_default=True,
),
migrations.AlterField(
model_name='documenttype',
name='delete_time_unit',
field=models.CharField(default='days', max_length=8, verbose_name='Delete time unit', choices=[('days', 'Days'), ('hours', 'Hours'), ('minutes', 'Minutes')]),
preserve_default=True,
),
migrations.AlterField(
model_name='documenttype',
name='trash_time_period',
field=models.PositiveIntegerField(help_text='Amount of time after which documents of this type will be moved to the trash.', null=True, verbose_name='Trash time period', blank=True),
preserve_default=True,
),
migrations.AlterField(
model_name='documenttype',
name='trash_time_unit',
field=models.CharField(blank=True, max_length=8, null=True, verbose_name='Trash time unit', choices=[('days', 'Days'), ('hours', 'Hours'), ('minutes', 'Minutes')]),
preserve_default=True,
),
]

View File

@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
def make_existing_documents_not_stubs(apps, schema_editor):
Document = apps.get_model('documents', 'Document')
for document in Document.objects.all():
document.is_stub=False
document.save()
class Migration(migrations.Migration):
dependencies = [
('documents', '0012_auto_20150705_0347'),
]
operations = [
migrations.AddField(
model_name='document',
name='is_stub',
field=models.BooleanField(default=True, verbose_name='Is stub?', editable=False),
preserve_default=True,
),
migrations.RunPython(make_existing_documents_not_stubs),
]

View File

@@ -13,7 +13,10 @@ from django.utils.encoding import python_2_unicode_compatible
from django.utils.timezone import now from django.utils.timezone import now
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from celery.execute import send_task
from common.literals import TIME_DELTA_UNIT_CHOICES from common.literals import TIME_DELTA_UNIT_CHOICES
from common.models import SharedUploadedFile
from common.settings import setting_temporary_directory from common.settings import setting_temporary_directory
from common.utils import fs_cleanup from common.utils import fs_cleanup
from converter import ( from converter import (
@@ -39,7 +42,9 @@ from .settings import (
setting_cache_path, setting_display_size, setting_language, setting_cache_path, setting_display_size, setting_language,
setting_language_choices, setting_zoom_max_level, setting_zoom_min_level setting_language_choices, setting_zoom_max_level, setting_zoom_min_level
) )
from .signals import post_version_upload, post_document_type_change from .signals import (
post_document_created, post_document_type_change, post_version_upload
)
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() # document image cache name hash function HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() # document image cache name hash function
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -69,6 +74,28 @@ class DocumentType(models.Model):
def natural_key(self): def natural_key(self):
return (self.name,) return (self.name,)
def new_document(self, file_object, label=None, description=None, language=None, _user=None):
if not language:
language = setting_language.value
if not label:
label = unicode(file_object)
document = self.documents.create(description=description, language=language, label=label)
document.save(_user=_user)
document.new_version(file_object=file_object, _user=_user)
return document
@transaction.atomic
def upload_single_document(self, document_type, file_object, label=None, description=None, language=None, user=None):
document = self.model(description=description, document_type=document_type, language=language, label=label or unicode(file_object))
document.save(user=user)
version = document.new_version(file_object=file_object, user=user)
document.set_document_type(document_type, force=True)
return version
class Meta: class Meta:
verbose_name = _('Document type') verbose_name = _('Document type')
verbose_name_plural = _('Documents types') verbose_name_plural = _('Documents types')
@@ -88,7 +115,8 @@ class Document(models.Model):
date_added = models.DateTimeField(verbose_name=_('Added'), auto_now_add=True) date_added = models.DateTimeField(verbose_name=_('Added'), auto_now_add=True)
language = models.CharField(choices=setting_language_choices.value, default=setting_language.value, max_length=8, verbose_name=_('Language')) language = models.CharField(choices=setting_language_choices.value, default=setting_language.value, max_length=8, verbose_name=_('Language'))
in_trash = models.BooleanField(default=False, editable=False, verbose_name=_('In trash?')) in_trash = models.BooleanField(default=False, editable=False, verbose_name=_('In trash?'))
deleted_date_time = models.DateTimeField(blank=True, editable=True, verbose_name=_('Date and time trashed')) deleted_date_time = models.DateTimeField(blank=True, editable=True, null=True, verbose_name=_('Date and time trashed'))
is_stub = models.BooleanField(default=True, editable=False, verbose_name=_('Is stub?'))
objects = DocumentManager() objects = DocumentManager()
passthrough = PassthroughManager() passthrough = PassthroughManager()
@@ -118,7 +146,7 @@ class Document(models.Model):
return reverse('documents:document_preview', args=[self.pk]) return reverse('documents:document_preview', args=[self.pk])
def save(self, *args, **kwargs): def save(self, *args, **kwargs):
user = kwargs.pop('user', None) user = kwargs.pop('_user', None)
new_document = not self.pk new_document = not self.pk
super(Document, self).save(*args, **kwargs) super(Document, self).save(*args, **kwargs)
@@ -151,20 +179,24 @@ class Document(models.Model):
def size(self): def size(self):
return self.latest_version.size return self.latest_version.size
def new_version(self, file_object, user=None, comment=None): def new_version(self, file_object, comment=None, _user=None):
logger.info('Creating a new document version for document: %s', self) from .tasks import task_upload_new_version
new_version = DocumentVersion.objects.create( logger.info('Queueing creation of a new document version for document: %s', self)
document=self,
file=file_object,
comment=comment or '',
)
logger.info('New document version created for document: %s', self) shared_uploaded_file = SharedUploadedFile.objects.create(file=file_object)
event_document_new_version.commit(actor=user, target=self) if _user:
user_id = _user.pk
else:
user_id = None
return new_version task_upload_new_version.apply_async(kwargs=dict(
shared_uploaded_file_id=shared_uploaded_file.pk,
document_id=self.pk, user_id=user_id,
), queue='uploads')
logger.info('New document version queued for document: %s', self)
# Proxy methods # Proxy methods
def open(self, *args, **kwargs): def open(self, *args, **kwargs):
@@ -277,22 +309,41 @@ class DocumentVersion(models.Model):
Overloaded save method that updates the document version's checksum, Overloaded save method that updates the document version's checksum,
mimetype, and page count when created mimetype, and page count when created
""" """
user = kwargs.pop('_user', None)
new_document_version = not self.pk new_document_version = not self.pk
# Only do this for new documents
super(DocumentVersion, self).save(*args, **kwargs)
for key in sorted(DocumentVersion._post_save_hooks):
DocumentVersion._post_save_hooks[key](self)
if new_document_version: if new_document_version:
# Only do this for new documents logger.info('Creating new version for document: %s', self.document)
self.update_checksum(save=False)
self.update_mimetype(save=False)
self.save()
self.update_page_count(save=False)
post_version_upload.send(sender=self.__class__, instance=self) try:
with transaction.atomic():
super(DocumentVersion, self).save(*args, **kwargs)
for key in sorted(DocumentVersion._post_save_hooks):
DocumentVersion._post_save_hooks[key](self)
if new_document_version:
# Only do this for new documents
self.update_checksum(save=False)
self.update_mimetype(save=False)
self.save()
self.update_page_count(save=False)
logger.info('New document "%s" version created for document: %s', self, self.document)
self.document.is_stub = False
self.document.save()
except Exception as exception:
logger.error('Error creating new document version for document "%s"; %s', self.document, exception)
raise
else:
if new_document_version:
event_document_new_version.commit(actor=user, target=self.document)
post_version_upload.send(sender=self.__class__, instance=self)
if tuple(self.document.versions.all()) == (self,):
post_document_created.send(sender=self.document.__class__, instance=self.document)
def invalidate_cache(self): def invalidate_cache(self):
for page in self.pages.all(): for page in self.pages.all():

View File

@@ -4,3 +4,4 @@ from django.dispatch import Signal
post_version_upload = Signal(providing_args=['instance'], use_caching=True) post_version_upload = Signal(providing_args=['instance'], use_caching=True)
post_document_type_change = Signal(providing_args=['instance'], use_caching=True) post_document_type_change = Signal(providing_args=['instance'], use_caching=True)
post_document_created = Signal(providing_args=['instance'], use_caching=True)

View File

@@ -38,37 +38,12 @@ def task_update_page_count(version_id):
document_version.update_page_count() document_version.update_page_count()
@app.task(ignore_result=True)
def task_new_document(document_type_id, shared_uploaded_file_id, label, description=None, expand=False, language=None, user_id=None):
shared_uploaded_file = SharedUploadedFile.objects.get(pk=shared_uploaded_file_id)
document_type = DocumentType.objects.get(pk=document_type_id)
if user_id:
user = User.objects.get(pk=user_id)
else:
user = None
with File(file=shared_uploaded_file.file) as file_object:
Document.objects.new_document(document_type=document_type, expand=expand, file_object=file_object, label=label, description=description, language=language, user=user)
shared_uploaded_file.delete()
# TODO: Report/record how was file uploaded
# if result['is_compressed'] is None:
# messages.success(request, _('File uploaded successfully.'))
# if result['is_compressed'] is True:
# messages.success(request, _('File uncompressed successfully and uploaded as individual files.'))
# if result['is_compressed'] is False:
# messages.warning(request, _('File was not a compressed file, uploaded as it was.'))
@app.task(ignore_result=True) @app.task(ignore_result=True)
def task_upload_new_version(document_id, shared_uploaded_file_id, user_id, comment=None): def task_upload_new_version(document_id, shared_uploaded_file_id, user_id, comment=None):
shared_file = SharedUploadedFile.objects.get(pk=shared_uploaded_file_id)
document = Document.objects.get(pk=document_id) document = Document.objects.get(pk=document_id)
shared_file = SharedUploadedFile.objects.get(pk=shared_uploaded_file_id)
if user_id: if user_id:
user = User.objects.get(pk=user_id) user = User.objects.get(pk=user_id)
else: else:
@@ -76,7 +51,8 @@ def task_upload_new_version(document_id, shared_uploaded_file_id, user_id, comme
with File(file=shared_file.file) as file_object: with File(file=shared_file.file) as file_object:
try: try:
document.new_version(comment=comment, file_object=file_object, user=user) document_version = DocumentVersion(document=document, comment=comment or '', file=file_object)
document_version.save(_user=user)
except Warning as warning: except Warning as warning:
logger.info('Warning during attempt to create new document version for document:%s ; %s', document, warning) logger.info('Warning during attempt to create new document version for document:%s ; %s', document, warning)
finally: finally:

View File

@@ -75,7 +75,7 @@ class DocumentListView(SingleObjectListView):
return Document.objects.all() return Document.objects.all()
def get_queryset(self): def get_queryset(self):
self.queryset = self.get_document_queryset() self.queryset = self.get_document_queryset().filter(is_stub=False)
return super(DocumentListView, self).get_queryset() return super(DocumentListView, self).get_queryset()

View File

@@ -20,10 +20,6 @@ class Migration(migrations.Migration):
('ocr', '0002_documentpagecontent'), ('ocr', '0002_documentpagecontent'),
] ]
operations = [
]
operations = [ operations = [
migrations.RunPython(move_content_from_documents_to_ocr_app), migrations.RunPython(move_content_from_documents_to_ocr_app),
] ]

View File

@@ -10,11 +10,14 @@ from common.signals import post_initial_setup
from common.utils import encapsulate from common.utils import encapsulate
from converter.links import link_transformation_list from converter.links import link_transformation_list
from documents.models import Document from documents.models import Document
from documents.signals import post_version_upload
from navigation import SourceColumn from navigation import SourceColumn
from rest_api.classes import APIEndPoint from rest_api.classes import APIEndPoint
from .classes import StagingFile from .classes import StagingFile
from .handlers import create_default_document_source from .handlers import (
copy_transformations_to_version, create_default_document_source
)
from .links import ( from .links import (
link_document_create_multiple, link_document_create_siblings, link_document_create_multiple, link_document_create_siblings,
link_setup_sources, link_setup_source_create_imap_email, link_setup_sources, link_setup_source_create_imap_email,
@@ -50,3 +53,4 @@ class SourcesApp(MayanAppConfig):
menu_sidebar.bind_links(links=[link_upload_version], sources=['documents:document_version_list', 'documents:upload_version', 'documents:document_version_revert']) menu_sidebar.bind_links(links=[link_upload_version], sources=['documents:document_version_list', 'documents:upload_version', 'documents:document_version_revert'])
post_initial_setup.connect(create_default_document_source, dispatch_uid='create_default_document_source') post_initial_setup.connect(create_default_document_source, dispatch_uid='create_default_document_source')
post_version_upload.connect(copy_transformations_to_version, dispatch_uid='copy_transformations_to_version')

View File

@@ -2,9 +2,17 @@ from __future__ import unicode_literals
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from converter.models import Transformation
from .literals import SOURCE_UNCOMPRESS_CHOICE_ASK from .literals import SOURCE_UNCOMPRESS_CHOICE_ASK
from .models import WebFormSource from .models import WebFormSource
def create_default_document_source(sender, **kwargs): def create_default_document_source(sender, **kwargs):
WebFormSource.objects.create(title=_('Default'), uncompress=SOURCE_UNCOMPRESS_CHOICE_ASK) WebFormSource.objects.create(title=_('Default'), uncompress=SOURCE_UNCOMPRESS_CHOICE_ASK)
def copy_transformations_to_version(sender, **kwargs):
instance = kwargs['instance']
Transformation.objects.copy(source=instance.document, targets=instance.pages.all())

View File

@@ -15,6 +15,7 @@ from django.utils.translation import ugettext_lazy as _
from model_utils.managers import InheritanceManager from model_utils.managers import InheritanceManager
from common.compressed_files import CompressedFile, NotACompressedFile
from converter.literals import DIMENSION_SEPARATOR from converter.literals import DIMENSION_SEPARATOR
from converter.models import Transformation from converter.models import Transformation
from djcelery.models import PeriodicTask, IntervalSchedule from djcelery.models import PeriodicTask, IntervalSchedule
@@ -50,22 +51,35 @@ class Source(models.Model):
def fullname(self): def fullname(self):
return ' '.join([self.class_fullname(), '"%s"' % self.title]) return ' '.join([self.class_fullname(), '"%s"' % self.title])
def upload_document(self, file_object, label, description=None, document_type=None, expand=False, language=None, metadata_dict_list=None, user=None): def _upload_document(self, document_type, file_object, label, language, user, description=None, metadata_dict_list=None):
new_versions = Document.objects.new_document( document = document_type.new_document(
description=description, file_object=file_object, label=label, description=description,
document_type=document_type or self.document_type, language=language, _user=user
expand=expand,
file_object=file_object,
label=label,
language=language,
user=user
) )
for new_version in new_versions: Transformation.objects.get_for_model(document).delete()
Transformation.objects.copy(source=Source.objects.get_subclass(pk=self.pk), targets=new_version.pages.all()) Transformation.objects.copy(source=Source.objects.get_subclass(pk=self.pk), targets=Document.objects.filter(pk=document.pk))
if metadata_dict_list: if metadata_dict_list:
save_metadata_list(metadata_dict_list, new_version.document, create=True) save_metadata_list(metadata_dict_list, document, create=True)
# TODO: Rename this method to 'handle_upload' or similar
def upload_document(self, file_object, label, description=None, document_type=None, expand=False, language=None, metadata_dict_list=None, user=None):
if not document_type:
document_type = self.document_type
if expand:
try:
compressed_file = CompressedFile(file_object)
for compressed_file_child in compressed_file.children():
self._upload_document(document_type=document_type, file_object=compressed_file_child, description=description, label=unicode(compressed_file_child), language=language or setting_language.value, metadata_dict_list=metadata_dict_list, user=user)
compressed_file_child.close()
except NotACompressedFile:
logging.debug('Exception: NotACompressedFile')
self._upload_document(document_type=document_type, file_object=file_object, description=description, label=label, language=language or setting_language.value, metadata_dict_list=metadata_dict_list, user=user)
else:
self._upload_document(document_type=document_type, file_object=file_object, description=description, label=label, language=language or setting_language.value, metadata_dict_list=metadata_dict_list, user=user)
def get_upload_file_object(self, form_data): def get_upload_file_object(self, form_data):
pass pass