Initial commit of new document upload workflow which allows for document promises to be returned after POST request, gh-issue #194. Add new signal when a document is uploaded and finally ready, gh-issue #193 and gh-issue #213.

This commit is contained in:
Roberto Rosario
2015-07-05 03:33:27 -04:00
parent 8f30932b6f
commit 8cc05bbefb
14 changed files with 204 additions and 110 deletions

View File

@@ -8,12 +8,13 @@ from common import (
menu_setup, menu_tools
)
from documents.models import Document
from documents.signals import post_document_created
from metadata.models import DocumentMetadata
from rest_api.classes import APIEndPoint
from .handlers import (
document_index_delete, document_metadata_index_update,
document_metadata_index_post_delete
document_created_index_update, document_index_delete,
document_metadata_index_update, document_metadata_index_post_delete
)
from .links import (
link_document_index_list, link_index_main_menu, link_index_setup,
@@ -45,6 +46,7 @@ class DocumentIndexingApp(MayanAppConfig):
menu_setup.bind_links(links=[link_index_setup])
menu_tools.bind_links(links=[link_rebuild_index_instances])
post_document_created.connect(document_created_index_update, dispatch_uid='document_created_index_update', sender=Document)
post_save.connect(document_metadata_index_update, dispatch_uid='document_metadata_index_update', sender=DocumentMetadata)
post_delete.connect(document_index_delete, dispatch_uid='document_index_delete', sender=Document)
post_delete.connect(document_metadata_index_post_delete, dispatch_uid='document_metadata_index_post_delete', sender=DocumentMetadata)

View File

@@ -3,6 +3,10 @@ from __future__ import unicode_literals
from .tasks import task_delete_empty_index_nodes, task_index_document
def document_created_index_update(sender, **kwargs):
task_index_document.apply_async(kwargs=dict(document_id=kwargs['instance'].pk), queue='indexing')
def document_index_delete(sender, **kwargs):
task_delete_empty_index_nodes.apply_async(queue='indexing')

View File

@@ -38,7 +38,7 @@ from .serializers import (
from .settings import (
setting_display_size, setting_zoom_max_level, setting_zoom_min_level
)
from .tasks import task_get_document_page_image, task_new_document
from .tasks import task_get_document_page_image
class APIDocumentListView(generics.ListAPIView):

View File

@@ -4,8 +4,6 @@ import logging
from django.db import models, transaction
from common.compressed_files import CompressedFile, NotACompressedFile
from .settings import setting_recent_count, setting_language
logger = logging.getLogger(__name__)
@@ -44,39 +42,6 @@ class DocumentManager(models.Manager):
for document in self.model.objects.all():
document.invalidate_cache()
@transaction.atomic
def new_document(self, document_type, file_object, label=None, command_line=False, description=None, expand=False, language=None, user=None):
versions_created = []
if expand:
try:
compressed_file = CompressedFile(file_object)
count = 1
for compressed_file_child in compressed_file.children():
if command_line:
print 'Uploading file #%d: %s' % (count, compressed_file_child)
versions_created.append(self.upload_single_document(document_type=document_type, file_object=compressed_file_child, description=description, label=unicode(compressed_file_child), language=language or setting_language.value, user=user))
compressed_file_child.close()
count += 1
except NotACompressedFile:
logging.debug('Exception: NotACompressedFile')
if command_line:
raise
versions_created.append(self.upload_single_document(document_type=document_type, file_object=file_object, description=description, label=label, language=language or setting_language.value, user=user))
else:
versions_created.append(self.upload_single_document(document_type=document_type, file_object=file_object, description=description, label=label, language=language or setting_language.value, user=user))
return versions_created
@transaction.atomic
def upload_single_document(self, document_type, file_object, label=None, description=None, language=None, user=None):
document = self.model(description=description, document_type=document_type, language=language, label=label or unicode(file_object))
document.save(user=user)
version = document.new_version(file_object=file_object, user=user)
document.set_document_type(document_type, force=True)
return version
class PassthroughManager(models.Manager):
pass

View File

@@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
class Migration(migrations.Migration):
dependencies = [
('documents', '0011_auto_20150704_0508'),
]
operations = [
migrations.AlterField(
model_name='document',
name='deleted_date_time',
field=models.DateTimeField(null=True, verbose_name='Date and time trashed', blank=True),
preserve_default=True,
),
migrations.AlterField(
model_name='documenttype',
name='delete_time_period',
field=models.PositiveIntegerField(default=30, help_text='Amount of time after which documents of this type in the trash will be deleted.', verbose_name='Delete time period'),
preserve_default=True,
),
migrations.AlterField(
model_name='documenttype',
name='delete_time_unit',
field=models.CharField(default='days', max_length=8, verbose_name='Delete time unit', choices=[('days', 'Days'), ('hours', 'Hours'), ('minutes', 'Minutes')]),
preserve_default=True,
),
migrations.AlterField(
model_name='documenttype',
name='trash_time_period',
field=models.PositiveIntegerField(help_text='Amount of time after which documents of this type will be moved to the trash.', null=True, verbose_name='Trash time period', blank=True),
preserve_default=True,
),
migrations.AlterField(
model_name='documenttype',
name='trash_time_unit',
field=models.CharField(blank=True, max_length=8, null=True, verbose_name='Trash time unit', choices=[('days', 'Days'), ('hours', 'Hours'), ('minutes', 'Minutes')]),
preserve_default=True,
),
]

View File

@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
def make_existing_documents_not_stubs(apps, schema_editor):
Document = apps.get_model('documents', 'Document')
for document in Document.objects.all():
document.is_stub=False
document.save()
class Migration(migrations.Migration):
dependencies = [
('documents', '0012_auto_20150705_0347'),
]
operations = [
migrations.AddField(
model_name='document',
name='is_stub',
field=models.BooleanField(default=True, verbose_name='Is stub?', editable=False),
preserve_default=True,
),
migrations.RunPython(make_existing_documents_not_stubs),
]

View File

@@ -13,7 +13,10 @@ from django.utils.encoding import python_2_unicode_compatible
from django.utils.timezone import now
from django.utils.translation import ugettext_lazy as _
from celery.execute import send_task
from common.literals import TIME_DELTA_UNIT_CHOICES
from common.models import SharedUploadedFile
from common.settings import setting_temporary_directory
from common.utils import fs_cleanup
from converter import (
@@ -39,7 +42,9 @@ from .settings import (
setting_cache_path, setting_display_size, setting_language,
setting_language_choices, setting_zoom_max_level, setting_zoom_min_level
)
from .signals import post_version_upload, post_document_type_change
from .signals import (
post_document_created, post_document_type_change, post_version_upload
)
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() # document image cache name hash function
logger = logging.getLogger(__name__)
@@ -69,6 +74,28 @@ class DocumentType(models.Model):
def natural_key(self):
return (self.name,)
def new_document(self, file_object, label=None, description=None, language=None, _user=None):
if not language:
language = setting_language.value
if not label:
label = unicode(file_object)
document = self.documents.create(description=description, language=language, label=label)
document.save(_user=_user)
document.new_version(file_object=file_object, _user=_user)
return document
@transaction.atomic
def upload_single_document(self, document_type, file_object, label=None, description=None, language=None, user=None):
document = self.model(description=description, document_type=document_type, language=language, label=label or unicode(file_object))
document.save(user=user)
version = document.new_version(file_object=file_object, user=user)
document.set_document_type(document_type, force=True)
return version
class Meta:
verbose_name = _('Document type')
verbose_name_plural = _('Documents types')
@@ -88,7 +115,8 @@ class Document(models.Model):
date_added = models.DateTimeField(verbose_name=_('Added'), auto_now_add=True)
language = models.CharField(choices=setting_language_choices.value, default=setting_language.value, max_length=8, verbose_name=_('Language'))
in_trash = models.BooleanField(default=False, editable=False, verbose_name=_('In trash?'))
deleted_date_time = models.DateTimeField(blank=True, editable=True, verbose_name=_('Date and time trashed'))
deleted_date_time = models.DateTimeField(blank=True, editable=True, null=True, verbose_name=_('Date and time trashed'))
is_stub = models.BooleanField(default=True, editable=False, verbose_name=_('Is stub?'))
objects = DocumentManager()
passthrough = PassthroughManager()
@@ -118,7 +146,7 @@ class Document(models.Model):
return reverse('documents:document_preview', args=[self.pk])
def save(self, *args, **kwargs):
user = kwargs.pop('user', None)
user = kwargs.pop('_user', None)
new_document = not self.pk
super(Document, self).save(*args, **kwargs)
@@ -151,20 +179,24 @@ class Document(models.Model):
def size(self):
return self.latest_version.size
def new_version(self, file_object, user=None, comment=None):
logger.info('Creating a new document version for document: %s', self)
def new_version(self, file_object, comment=None, _user=None):
from .tasks import task_upload_new_version
new_version = DocumentVersion.objects.create(
document=self,
file=file_object,
comment=comment or '',
)
logger.info('Queueing creation of a new document version for document: %s', self)
logger.info('New document version created for document: %s', self)
shared_uploaded_file = SharedUploadedFile.objects.create(file=file_object)
event_document_new_version.commit(actor=user, target=self)
if _user:
user_id = _user.pk
else:
user_id = None
return new_version
task_upload_new_version.apply_async(kwargs=dict(
shared_uploaded_file_id=shared_uploaded_file.pk,
document_id=self.pk, user_id=user_id,
), queue='uploads')
logger.info('New document version queued for document: %s', self)
# Proxy methods
def open(self, *args, **kwargs):
@@ -277,9 +309,15 @@ class DocumentVersion(models.Model):
Overloaded save method that updates the document version's checksum,
mimetype, and page count when created
"""
user = kwargs.pop('_user', None)
new_document_version = not self.pk
# Only do this for new documents
if new_document_version:
logger.info('Creating new version for document: %s', self.document)
try:
with transaction.atomic():
super(DocumentVersion, self).save(*args, **kwargs)
for key in sorted(DocumentVersion._post_save_hooks):
@@ -292,8 +330,21 @@ class DocumentVersion(models.Model):
self.save()
self.update_page_count(save=False)
logger.info('New document "%s" version created for document: %s', self, self.document)
self.document.is_stub = False
self.document.save()
except Exception as exception:
logger.error('Error creating new document version for document "%s"; %s', self.document, exception)
raise
else:
if new_document_version:
event_document_new_version.commit(actor=user, target=self.document)
post_version_upload.send(sender=self.__class__, instance=self)
if tuple(self.document.versions.all()) == (self,):
post_document_created.send(sender=self.document.__class__, instance=self.document)
def invalidate_cache(self):
for page in self.pages.all():
page.invalidate_cache()

View File

@@ -4,3 +4,4 @@ from django.dispatch import Signal
post_version_upload = Signal(providing_args=['instance'], use_caching=True)
post_document_type_change = Signal(providing_args=['instance'], use_caching=True)
post_document_created = Signal(providing_args=['instance'], use_caching=True)

View File

@@ -38,37 +38,12 @@ def task_update_page_count(version_id):
document_version.update_page_count()
@app.task(ignore_result=True)
def task_new_document(document_type_id, shared_uploaded_file_id, label, description=None, expand=False, language=None, user_id=None):
shared_uploaded_file = SharedUploadedFile.objects.get(pk=shared_uploaded_file_id)
document_type = DocumentType.objects.get(pk=document_type_id)
if user_id:
user = User.objects.get(pk=user_id)
else:
user = None
with File(file=shared_uploaded_file.file) as file_object:
Document.objects.new_document(document_type=document_type, expand=expand, file_object=file_object, label=label, description=description, language=language, user=user)
shared_uploaded_file.delete()
# TODO: Report/record how was file uploaded
# if result['is_compressed'] is None:
# messages.success(request, _('File uploaded successfully.'))
# if result['is_compressed'] is True:
# messages.success(request, _('File uncompressed successfully and uploaded as individual files.'))
# if result['is_compressed'] is False:
# messages.warning(request, _('File was not a compressed file, uploaded as it was.'))
@app.task(ignore_result=True)
def task_upload_new_version(document_id, shared_uploaded_file_id, user_id, comment=None):
shared_file = SharedUploadedFile.objects.get(pk=shared_uploaded_file_id)
document = Document.objects.get(pk=document_id)
shared_file = SharedUploadedFile.objects.get(pk=shared_uploaded_file_id)
if user_id:
user = User.objects.get(pk=user_id)
else:
@@ -76,7 +51,8 @@ def task_upload_new_version(document_id, shared_uploaded_file_id, user_id, comme
with File(file=shared_file.file) as file_object:
try:
document.new_version(comment=comment, file_object=file_object, user=user)
document_version = DocumentVersion(document=document, comment=comment or '', file=file_object)
document_version.save(_user=user)
except Warning as warning:
logger.info('Warning during attempt to create new document version for document:%s ; %s', document, warning)
finally:

View File

@@ -75,7 +75,7 @@ class DocumentListView(SingleObjectListView):
return Document.objects.all()
def get_queryset(self):
self.queryset = self.get_document_queryset()
self.queryset = self.get_document_queryset().filter(is_stub=False)
return super(DocumentListView, self).get_queryset()

View File

@@ -20,10 +20,6 @@ class Migration(migrations.Migration):
('ocr', '0002_documentpagecontent'),
]
operations = [
]
operations = [
migrations.RunPython(move_content_from_documents_to_ocr_app),
]

View File

@@ -10,11 +10,14 @@ from common.signals import post_initial_setup
from common.utils import encapsulate
from converter.links import link_transformation_list
from documents.models import Document
from documents.signals import post_version_upload
from navigation import SourceColumn
from rest_api.classes import APIEndPoint
from .classes import StagingFile
from .handlers import create_default_document_source
from .handlers import (
copy_transformations_to_version, create_default_document_source
)
from .links import (
link_document_create_multiple, link_document_create_siblings,
link_setup_sources, link_setup_source_create_imap_email,
@@ -50,3 +53,4 @@ class SourcesApp(MayanAppConfig):
menu_sidebar.bind_links(links=[link_upload_version], sources=['documents:document_version_list', 'documents:upload_version', 'documents:document_version_revert'])
post_initial_setup.connect(create_default_document_source, dispatch_uid='create_default_document_source')
post_version_upload.connect(copy_transformations_to_version, dispatch_uid='copy_transformations_to_version')

View File

@@ -2,9 +2,17 @@ from __future__ import unicode_literals
from django.utils.translation import ugettext_lazy as _
from converter.models import Transformation
from .literals import SOURCE_UNCOMPRESS_CHOICE_ASK
from .models import WebFormSource
def create_default_document_source(sender, **kwargs):
WebFormSource.objects.create(title=_('Default'), uncompress=SOURCE_UNCOMPRESS_CHOICE_ASK)
def copy_transformations_to_version(sender, **kwargs):
instance = kwargs['instance']
Transformation.objects.copy(source=instance.document, targets=instance.pages.all())

View File

@@ -15,6 +15,7 @@ from django.utils.translation import ugettext_lazy as _
from model_utils.managers import InheritanceManager
from common.compressed_files import CompressedFile, NotACompressedFile
from converter.literals import DIMENSION_SEPARATOR
from converter.models import Transformation
from djcelery.models import PeriodicTask, IntervalSchedule
@@ -50,22 +51,35 @@ class Source(models.Model):
def fullname(self):
return ' '.join([self.class_fullname(), '"%s"' % self.title])
def upload_document(self, file_object, label, description=None, document_type=None, expand=False, language=None, metadata_dict_list=None, user=None):
new_versions = Document.objects.new_document(
description=description,
document_type=document_type or self.document_type,
expand=expand,
file_object=file_object,
label=label,
language=language,
user=user
def _upload_document(self, document_type, file_object, label, language, user, description=None, metadata_dict_list=None):
document = document_type.new_document(
file_object=file_object, label=label, description=description,
language=language, _user=user
)
for new_version in new_versions:
Transformation.objects.copy(source=Source.objects.get_subclass(pk=self.pk), targets=new_version.pages.all())
Transformation.objects.get_for_model(document).delete()
Transformation.objects.copy(source=Source.objects.get_subclass(pk=self.pk), targets=Document.objects.filter(pk=document.pk))
if metadata_dict_list:
save_metadata_list(metadata_dict_list, new_version.document, create=True)
save_metadata_list(metadata_dict_list, document, create=True)
# TODO: Rename this method to 'handle_upload' or similar
def upload_document(self, file_object, label, description=None, document_type=None, expand=False, language=None, metadata_dict_list=None, user=None):
if not document_type:
document_type = self.document_type
if expand:
try:
compressed_file = CompressedFile(file_object)
for compressed_file_child in compressed_file.children():
self._upload_document(document_type=document_type, file_object=compressed_file_child, description=description, label=unicode(compressed_file_child), language=language or setting_language.value, metadata_dict_list=metadata_dict_list, user=user)
compressed_file_child.close()
except NotACompressedFile:
logging.debug('Exception: NotACompressedFile')
self._upload_document(document_type=document_type, file_object=file_object, description=description, label=label, language=language or setting_language.value, metadata_dict_list=metadata_dict_list, user=user)
else:
self._upload_document(document_type=document_type, file_object=file_object, description=description, label=label, language=language or setting_language.value, metadata_dict_list=metadata_dict_list, user=user)
def get_upload_file_object(self, form_data):
pass