diff --git a/HISTORY.rst b/HISTORY.rst index f7d96c8b8d..dee7cadd38 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -135,6 +135,8 @@ #360. - Add new dashboard item to display the total page count. - Show the document type being uploaded in the source view title. +- Setting SOURCE_SCANIMAGE_PATH is now SOURCES_SCANIMAGE_PATH. +- New queue: sources_fast. Used for staging file generation. 3.0.3 (2018-08-17) ================== diff --git a/mayan/apps/sources/api_views.py b/mayan/apps/sources/api_views.py index 7335ecb9c0..e55530230d 100644 --- a/mayan/apps/sources/api_views.py +++ b/mayan/apps/sources/api_views.py @@ -3,12 +3,14 @@ from __future__ import unicode_literals from django.http import HttpResponse from django.shortcuts import get_object_or_404 -from converter.models import Transformation from rest_framework import generics from rest_framework.response import Response +from .literals import STAGING_FILE_IMAGE_TASK_TIMEOUT from .models import StagingFolderSource from .serializers import StagingFolderFileSerializer, StagingFolderSerializer +from .storages import storage_staging_file_image_cache +from .tasks import task_generate_staging_file_image class APIStagingSourceFileView(generics.GenericAPIView): @@ -56,20 +58,19 @@ class APIStagingSourceFileImageView(generics.RetrieveAPIView): return None def retrieve(self, request, *args, **kwargs): - staging_folder = get_object_or_404( - StagingFolderSource, pk=self.kwargs['staging_folder_pk'] - ) - staging_file = staging_folder.get_file( - encoded_filename=self.kwargs['encoded_filename'] + width = request.GET.get('width') + height = request.GET.get('height') + + task = task_generate_staging_file_image.apply_async( + kwargs=dict( + staging_folder_pk=self.kwargs['staging_folder_pk'], + encoded_filename=self.kwargs['encoded_filename'], + width=width, height=height + ) ) - size = request.GET.get('size') + cache_filename = task.get(timeout=STAGING_FILE_IMAGE_TASK_TIMEOUT) - return HttpResponse( - staging_file.get_image( - size=size, - transformations=Transformation.objects.get_for_model( - staging_folder, as_classes=True - ) - ), content_type='image' - ) + with storage_staging_file_image_cache.open(cache_filename) as file_object: + response = HttpResponse(file_object.read(), content_type='image') + return response diff --git a/mayan/apps/sources/apps.py b/mayan/apps/sources/apps.py index b392bc8152..21ee604a1f 100644 --- a/mayan/apps/sources/apps.py +++ b/mayan/apps/sources/apps.py @@ -93,6 +93,10 @@ class SourcesApp(MayanAppConfig): Queue( 'sources', Exchange('sources'), routing_key='sources' ), + Queue( + 'sources_fast', Exchange('sources_fast'), + routing_key='sources_fast', delivery_mode=1 + ), Queue( 'sources_periodic', Exchange('sources_periodic'), routing_key='sources_periodic', delivery_mode=1 @@ -105,6 +109,9 @@ class SourcesApp(MayanAppConfig): 'sources.tasks.task_check_interval_source': { 'queue': 'sources_periodic' }, + 'sources.tasks.task_generate_staging_file_image': { + 'queue': 'sources_fast' + }, 'sources.tasks.task_source_handle_upload': { 'queue': 'sources' }, diff --git a/mayan/apps/sources/classes.py b/mayan/apps/sources/classes.py index ea64144278..1605963b76 100644 --- a/mayan/apps/sources/classes.py +++ b/mayan/apps/sources/classes.py @@ -1,9 +1,12 @@ from __future__ import unicode_literals import base64 +import logging import os import time +from furl import furl + try: # Python 2 from urllib import unquote_plus @@ -13,11 +16,16 @@ except ImportError: from django.core.files import File +from django.core.files.base import ContentFile from django.urls import reverse from django.utils.encoding import force_text, python_2_unicode_compatible from converter import TransformationResize, converter_class +from .storages import storage_staging_file_image_cache + +logger = logging.getLogger(__name__) + class PseudoFile(File): def __init__(self, file, name): @@ -62,35 +70,111 @@ class StagingFile(object): file=open(self.get_full_path(), mode='rb'), name=self.filename ) - def get_api_image_url(self): - return reverse( + @property + def cache_filename(self): + return '{}{}'.format(self.staging_folder.pk, self.encoded_filename) + + def delete(self): + storage_staging_file_image_cache.delete(self.cache_filename) + os.unlink(self.get_full_path()) + + def generate_image(self, *args, **kwargs): + transformation_list = self.get_combined_transformation_list(*args, **kwargs) + + # Check is transformed image is available + logger.debug('transformations cache filename: %s', self.cache_filename) + + if storage_staging_file_image_cache.exists(self.cache_filename): + logger.debug( + 'staging file cache file "%s" found', self.cache_filename + ) + else: + logger.debug( + 'staging file cache file "%s" not found', self.cache_filename + ) + image = self.get_image(transformations=transformation_list) + with storage_staging_file_image_cache.open(self.cache_filename, 'wb+') as file_object: + file_object.write(image.getvalue()) + + return self.cache_filename + + def get_api_image_url(self, *args, **kwargs): + final_url = furl() + final_url.args = kwargs + final_url.path = reverse( 'rest_api:stagingfolderfile-image-view', args=( self.staging_folder.pk, self.encoded_filename ) ) + return final_url.tostr() + + def get_combined_transformation_list(self, *args, **kwargs): + """ + Return a list of transformation containing the server side + staging file transformation as well as tranformations created + from the arguments as transient interactive transformation. + """ + # Convert arguments into transformations + transformations = kwargs.get('transformations', []) + + # Set sensible defaults if the argument is not specified or if the + # argument is None + width = self.staging_folder.preview_width + height = self.staging_folder.preview_height + + # Generate transformation hash + transformation_list = [] + + # Interactive transformations second + for transformation in transformations: + transformation_list.append(transformation) + + if width: + transformation_list.append( + TransformationResize(width=width, height=height) + ) + + return transformation_list + def get_date_time_created(self): return time.ctime(os.path.getctime(self.get_full_path())) def get_full_path(self): return os.path.join(self.staging_folder.folder_path, self.filename) - def get_image(self, size=None, as_base64=False, transformations=None): - converter = converter_class(file_object=open(self.get_full_path())) + def get_image(self, transformations=None): + cache_filename = self.cache_filename + file_object = None - if size: - converter.transform( - transformation=TransformationResize( - **dict(zip(('width', 'height'), (size.split('x')))) - ) + try: + file_object = open(self.get_full_path()) + converter = converter_class(file_object=file_object) + + page_image = converter.get_page() + + # Since open "wb+" doesn't create files, check if the file + # exists, if not then create it + if not storage_staging_file_image_cache.exists(cache_filename): + storage_staging_file_image_cache.save(name=cache_filename, content=ContentFile(content='')) + + with storage_staging_file_image_cache.open(cache_filename, 'wb+') as file_object: + file_object.write(page_image.getvalue()) + except Exception as exception: + # Cleanup in case of error + logger.error( + 'Error creating staging file cache "%s"; %s', + cache_filename, exception ) + storage_staging_file_image_cache.delete(cache_filename) + if file_object: + file_object.close() + raise - # Interactive transformations for transformation in transformations: converter.transform(transformation=transformation) - return converter.get_page(as_base64=as_base64) - - def delete(self): - os.unlink(self.get_full_path()) + result = converter.get_page() + file_object.close() + return result diff --git a/mayan/apps/sources/literals.py b/mayan/apps/sources/literals.py index 25c8e3ba2b..ee01737193 100644 --- a/mayan/apps/sources/literals.py +++ b/mayan/apps/sources/literals.py @@ -2,6 +2,13 @@ from __future__ import unicode_literals from django.utils.translation import ugettext_lazy as _ +DEFAULT_IMAP_MAILBOX = 'INBOX' +DEFAULT_INTERVAL = 600 +DEFAULT_METADATA_ATTACHMENT_NAME = 'metadata.yaml' +DEFAULT_POP3_TIMEOUT = 60 +DEFAULT_SOURCE_LOCK_EXPIRE = 600 +DEFAULT_SOURCE_TASK_RETRY_DELAY = 10 + SCANNER_SOURCE_FLATBED = 'flatbed' SCANNER_SOURCE_ADF = 'Automatic Document Feeder' @@ -58,10 +65,4 @@ SOURCE_CHOICES = ( (SOURCE_CHOICE_EMAIL_POP3, _('POP3 email')), (SOURCE_CHOICE_EMAIL_IMAP, _('IMAP email')), ) - -DEFAULT_SOURCE_LOCK_EXPIRE = 600 -DEFAULT_INTERVAL = 600 -DEFAULT_METADATA_ATTACHMENT_NAME = 'metadata.yaml' -DEFAULT_POP3_TIMEOUT = 60 -DEFAULT_IMAP_MAILBOX = 'INBOX' -DEFAULT_SOURCE_TASK_RETRY_DELAY = 10 +STAGING_FILE_IMAGE_TASK_TIMEOUT = 120 diff --git a/mayan/apps/sources/queues.py b/mayan/apps/sources/queues.py index 9902b7a1b0..54afa4ea89 100644 --- a/mayan/apps/sources/queues.py +++ b/mayan/apps/sources/queues.py @@ -5,17 +5,23 @@ from django.utils.translation import ugettext_lazy as _ from task_manager.classes import CeleryQueue queue_sources = CeleryQueue( - name='sources', label=_('Sources'), transient=True + name='sources', label=_('Sources') ) queue_sources_periodic = CeleryQueue( - name='sources_periodic', label=_('Sources periodic') + name='sources_periodic', label=_('Sources periodic'), transient=True +) +queue_sources_fast = CeleryQueue( + name='sources_fast', label=_('Sources fast'), transient=True ) +queue_sources_fast.add_task_type( + name='sources.tasks.task_generate_staging_file_image', + label=_('Generate staging file image') +) queue_sources_periodic.add_task_type( name='sources.tasks.task_check_interval_source', label=_('Check interval source') ) - queue_sources.add_task_type( name='sources.tasks.task_source_handle_upload', label=_('Handle upload') diff --git a/mayan/apps/sources/settings.py b/mayan/apps/sources/settings.py index 6a57bb2c93..dcefcd8c88 100644 --- a/mayan/apps/sources/settings.py +++ b/mayan/apps/sources/settings.py @@ -1,5 +1,8 @@ from __future__ import unicode_literals +import os + +from django.conf import settings from django.utils.translation import ugettext_lazy as _ from smart_settings import Namespace @@ -7,9 +10,24 @@ from smart_settings import Namespace namespace = Namespace(name='sources', label=_('Sources')) setting_scanimage_path = namespace.add_setting( - global_name='SOURCE_SCANIMAGE_PATH', default='/usr/bin/scanimage', + global_name='SOURCES_SCANIMAGE_PATH', default='/usr/bin/scanimage', help_text=_( 'File path to the scanimage program used to control image scanners.' ), is_path=True ) +setting_staging_file_image_cache_storage = namespace.add_setting( + global_name='SOURCES_STAGING_FILE_CACHE_STORAGE_BACKEND', + default='django.core.files.storage.FileSystemStorage', help_text=_( + 'Path to the Storage subclass to use when storing the cached ' + 'staging_file image files.' + ), quoted=True +) +setting_staging_file_image_cache_storage_arguments = namespace.add_setting( + global_name='SOURCES_STAGING_FILE_CACHE_STORAGE_BACKEND_ARGUMENTS', + default='{{location: {}}}'.format( + os.path.join(settings.MEDIA_ROOT, 'staging_file_cache') + ), help_text=_( + 'Arguments to pass to the SOURCES_STAGING_FILE_CACHE_STORAGE_BACKEND.' + ), quoted=True, +) diff --git a/mayan/apps/sources/storages.py b/mayan/apps/sources/storages.py new file mode 100644 index 0000000000..e3bedf4311 --- /dev/null +++ b/mayan/apps/sources/storages.py @@ -0,0 +1,18 @@ +from __future__ import unicode_literals + +import yaml + +from django.utils.module_loading import import_string + +from .settings import ( + setting_staging_file_image_cache_storage, + setting_staging_file_image_cache_storage_arguments, +) + +storage_staging_file_image_cache = import_string( + dotted_path=setting_staging_file_image_cache_storage.value +)( + **yaml.safe_load( + setting_staging_file_image_cache_storage_arguments.value or '{}' + ) +) diff --git a/mayan/apps/sources/tasks.py b/mayan/apps/sources/tasks.py index 6237bac1ab..89e27930e2 100644 --- a/mayan/apps/sources/tasks.py +++ b/mayan/apps/sources/tasks.py @@ -197,3 +197,14 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s task_upload_document.delay( shared_uploaded_file_id=shared_upload.pk, **kwargs ) + + +@app.task() +def task_generate_staging_file_image(staging_folder_pk, encoded_filename, *args, **kwargs): + StagingFolderSource = apps.get_model( + app_label='sources', model_name='StagingFolderSource' + ) + staging_folder = StagingFolderSource.objects.get(pk=staging_folder_pk) + staging_file = staging_folder.get_file(encoded_filename=encoded_filename) + + return staging_file.generate_image(*args, **kwargs)