From 9bc16aabc3a1c67cd6f99fc09af7f66e2ee81050 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 12 Sep 2018 03:35:33 -0400 Subject: [PATCH 1/2] Initial commit to support staging file background image generation and caching. Signed-off-by: Roberto Rosario --- HISTORY.rst | 2 + mayan/apps/sources/api_views.py | 34 +++++--- mayan/apps/sources/apps.py | 7 ++ mayan/apps/sources/classes.py | 137 ++++++++++++++++++++++++++++---- mayan/apps/sources/literals.py | 15 ++-- mayan/apps/sources/queues.py | 12 ++- mayan/apps/sources/settings.py | 27 ++++++- mayan/apps/sources/storages.py | 18 +++++ mayan/apps/sources/tasks.py | 12 +++ 9 files changed, 225 insertions(+), 39 deletions(-) create mode 100644 mayan/apps/sources/storages.py diff --git a/HISTORY.rst b/HISTORY.rst index f7d96c8b8d..dee7cadd38 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -135,6 +135,8 @@ #360. - Add new dashboard item to display the total page count. - Show the document type being uploaded in the source view title. +- Setting SOURCE_SCANIMAGE_PATH is now SOURCES_SCANIMAGE_PATH. +- New queue: sources_fast. Used for staging file generation. 3.0.3 (2018-08-17) ================== diff --git a/mayan/apps/sources/api_views.py b/mayan/apps/sources/api_views.py index 7335ecb9c0..2ce6085962 100644 --- a/mayan/apps/sources/api_views.py +++ b/mayan/apps/sources/api_views.py @@ -2,13 +2,18 @@ from __future__ import unicode_literals from django.http import HttpResponse from django.shortcuts import get_object_or_404 +from django.views.decorators.cache import cache_control, patch_cache_control from converter.models import Transformation from rest_framework import generics from rest_framework.response import Response +from .literals import STAGING_FILE_IMAGE_TASK_TIMEOUT from .models import StagingFolderSource from .serializers import StagingFolderFileSerializer, StagingFolderSerializer +from .settings import settings_staging_file_image_cache_time +from .storages import storage_staging_file_image_cache +from .tasks import task_generate_staging_file_image class APIStagingSourceFileView(generics.GenericAPIView): @@ -56,20 +61,23 @@ class APIStagingSourceFileImageView(generics.RetrieveAPIView): return None def retrieve(self, request, *args, **kwargs): - staging_folder = get_object_or_404( - StagingFolderSource, pk=self.kwargs['staging_folder_pk'] - ) - staging_file = staging_folder.get_file( - encoded_filename=self.kwargs['encoded_filename'] + width = request.GET.get('width') + height = request.GET.get('height') + + task = task_generate_staging_file_image.apply_async( + kwargs=dict( + staging_folder_pk=self.kwargs['staging_folder_pk'], + encoded_filename=self.kwargs['encoded_filename'], + width=width, height=height + ) ) - size = request.GET.get('size') + cache_filename = task.get(timeout=STAGING_FILE_IMAGE_TASK_TIMEOUT) - return HttpResponse( - staging_file.get_image( - size=size, - transformations=Transformation.objects.get_for_model( - staging_folder, as_classes=True + with storage_staging_file_image_cache.open(cache_filename) as file_object: + response = HttpResponse(file_object.read(), content_type='image') + if '_hash' in request.GET: + patch_cache_control( + response, max_age=settings_staging_file_image_cache_time.value ) - ), content_type='image' - ) + return response diff --git a/mayan/apps/sources/apps.py b/mayan/apps/sources/apps.py index b392bc8152..21ee604a1f 100644 --- a/mayan/apps/sources/apps.py +++ b/mayan/apps/sources/apps.py @@ -93,6 +93,10 @@ class SourcesApp(MayanAppConfig): Queue( 'sources', Exchange('sources'), routing_key='sources' ), + Queue( + 'sources_fast', Exchange('sources_fast'), + routing_key='sources_fast', delivery_mode=1 + ), Queue( 'sources_periodic', Exchange('sources_periodic'), routing_key='sources_periodic', delivery_mode=1 @@ -105,6 +109,9 @@ class SourcesApp(MayanAppConfig): 'sources.tasks.task_check_interval_source': { 'queue': 'sources_periodic' }, + 'sources.tasks.task_generate_staging_file_image': { + 'queue': 'sources_fast' + }, 'sources.tasks.task_source_handle_upload': { 'queue': 'sources' }, diff --git a/mayan/apps/sources/classes.py b/mayan/apps/sources/classes.py index ea64144278..25f0254a79 100644 --- a/mayan/apps/sources/classes.py +++ b/mayan/apps/sources/classes.py @@ -1,9 +1,12 @@ from __future__ import unicode_literals import base64 +import logging import os import time +from furl import furl + try: # Python 2 from urllib import unquote_plus @@ -13,10 +16,16 @@ except ImportError: from django.core.files import File +from django.core.files.base import ContentFile from django.urls import reverse from django.utils.encoding import force_text, python_2_unicode_compatible -from converter import TransformationResize, converter_class +from converter import BaseTransformation, TransformationResize, converter_class +from converter.literals import DEFAULT_ROTATION, DEFAULT_ZOOM_LEVEL + +from .storages import storage_staging_file_image_cache + +logger = logging.getLogger(__name__) class PseudoFile(File): @@ -62,13 +71,86 @@ class StagingFile(object): file=open(self.get_full_path(), mode='rb'), name=self.filename ) - def get_api_image_url(self): - return reverse( + @property + def cache_filename(self): + return '{}{}'.format(self.staging_folder.pk, self.encoded_filename) + + def delete(self): + #TODO: delete cached files + os.unlink(self.get_full_path()) + + def generate_image(self, *args, **kwargs): + transformation_list = self.get_combined_transformation_list(*args, **kwargs) + + cache_filename = '{}-{}'.format( + self.cache_filename, BaseTransformation.combine(transformation_list) + ) + + # Check is transformed image is available + logger.debug('transformations cache filename: %s', cache_filename) + + if storage_staging_file_image_cache.exists(cache_filename): + logger.debug( + 'transformations cache file "%s" found', cache_filename + ) + else: + logger.debug( + 'transformations cache file "%s" not found', cache_filename + ) + image = self.get_image(transformations=transformation_list) + with storage_staging_file_image_cache.open(cache_filename, 'wb+') as file_object: + file_object.write(image.getvalue()) + + #self.cached_images.create(filename=cache_filename) + + return cache_filename + + def get_api_image_url(self, *args, **kwargs): + transformations_hash = BaseTransformation.combine( + self.get_combined_transformation_list(*args, **kwargs) + ) + + kwargs.pop('transformations', None) + + final_url = furl() + final_url.args = kwargs + final_url.path = reverse( 'rest_api:stagingfolderfile-image-view', args=( self.staging_folder.pk, self.encoded_filename ) ) + final_url.args['_hash'] = transformations_hash + + return final_url.tostr() + + def get_combined_transformation_list(self, *args, **kwargs): + """ + Return a list of transformation containing the server side + staging file transformation as well as tranformations created + from the arguments as transient interactive transformation. + """ + # Convert arguments into transformations + transformations = kwargs.get('transformations', []) + + # Set sensible defaults if the argument is not specified or if the + # argument is None + width = kwargs.get('width', self.staging_folder.preview_width) or self.staging_folder.preview_width + height = kwargs.get('height', self.staging_folder.preview_height) or self.staging_folder.preview_height + + # Generate transformation hash + transformation_list = [] + + # Interactive transformations second + for transformation in transformations: + transformation_list.append(transformation) + + if width: + transformation_list.append( + TransformationResize(width=width, height=height) + ) + + return transformation_list def get_date_time_created(self): return time.ctime(os.path.getctime(self.get_full_path())) @@ -76,21 +158,46 @@ class StagingFile(object): def get_full_path(self): return os.path.join(self.staging_folder.folder_path, self.filename) - def get_image(self, size=None, as_base64=False, transformations=None): - converter = converter_class(file_object=open(self.get_full_path())) + def get_image(self, transformations=None): + cache_filename = self.cache_filename + file_object = None + logger.debug('Page cache filename: %s', cache_filename) - if size: - converter.transform( - transformation=TransformationResize( - **dict(zip(('width', 'height'), (size.split('x')))) + if storage_staging_file_image_cache.exists(cache_filename): + logger.debug('Page cache file "%s" found', cache_filename) + file_object = storage_staging_file_image_cache.open(cache_filename) + converter = converter_class(file_object=file_object) + + converter.seek(0) + else: + logger.debug('Page cache file "%s" not found', cache_filename) + try: + file_object = open(self.get_full_path()) + converter = converter_class(file_object=file_object) + + page_image = converter.get_page() + + # Since open "wb+" doesn't create files, check if the file + # exists, if not then create it + if not storage_staging_file_image_cache.exists(cache_filename): + storage_staging_file_image_cache.save(name=cache_filename, content=ContentFile(content='')) + + with storage_staging_file_image_cache.open(cache_filename, 'wb+') as file_object: + file_object.write(page_image.getvalue()) + except Exception as exception: + # Cleanup in case of error + logger.error( + 'Error creating page cache file "%s"; %s', + cache_filename, exception ) - ) + storage_staging_file_image_cache.delete(cache_filename) + if file_object: + file_object.close() + raise - # Interactive transformations for transformation in transformations: converter.transform(transformation=transformation) - return converter.get_page(as_base64=as_base64) - - def delete(self): - os.unlink(self.get_full_path()) + result = converter.get_page() + file_object.close() + return result diff --git a/mayan/apps/sources/literals.py b/mayan/apps/sources/literals.py index 25c8e3ba2b..ee01737193 100644 --- a/mayan/apps/sources/literals.py +++ b/mayan/apps/sources/literals.py @@ -2,6 +2,13 @@ from __future__ import unicode_literals from django.utils.translation import ugettext_lazy as _ +DEFAULT_IMAP_MAILBOX = 'INBOX' +DEFAULT_INTERVAL = 600 +DEFAULT_METADATA_ATTACHMENT_NAME = 'metadata.yaml' +DEFAULT_POP3_TIMEOUT = 60 +DEFAULT_SOURCE_LOCK_EXPIRE = 600 +DEFAULT_SOURCE_TASK_RETRY_DELAY = 10 + SCANNER_SOURCE_FLATBED = 'flatbed' SCANNER_SOURCE_ADF = 'Automatic Document Feeder' @@ -58,10 +65,4 @@ SOURCE_CHOICES = ( (SOURCE_CHOICE_EMAIL_POP3, _('POP3 email')), (SOURCE_CHOICE_EMAIL_IMAP, _('IMAP email')), ) - -DEFAULT_SOURCE_LOCK_EXPIRE = 600 -DEFAULT_INTERVAL = 600 -DEFAULT_METADATA_ATTACHMENT_NAME = 'metadata.yaml' -DEFAULT_POP3_TIMEOUT = 60 -DEFAULT_IMAP_MAILBOX = 'INBOX' -DEFAULT_SOURCE_TASK_RETRY_DELAY = 10 +STAGING_FILE_IMAGE_TASK_TIMEOUT = 120 diff --git a/mayan/apps/sources/queues.py b/mayan/apps/sources/queues.py index 9902b7a1b0..54afa4ea89 100644 --- a/mayan/apps/sources/queues.py +++ b/mayan/apps/sources/queues.py @@ -5,17 +5,23 @@ from django.utils.translation import ugettext_lazy as _ from task_manager.classes import CeleryQueue queue_sources = CeleryQueue( - name='sources', label=_('Sources'), transient=True + name='sources', label=_('Sources') ) queue_sources_periodic = CeleryQueue( - name='sources_periodic', label=_('Sources periodic') + name='sources_periodic', label=_('Sources periodic'), transient=True +) +queue_sources_fast = CeleryQueue( + name='sources_fast', label=_('Sources fast'), transient=True ) +queue_sources_fast.add_task_type( + name='sources.tasks.task_generate_staging_file_image', + label=_('Generate staging file image') +) queue_sources_periodic.add_task_type( name='sources.tasks.task_check_interval_source', label=_('Check interval source') ) - queue_sources.add_task_type( name='sources.tasks.task_source_handle_upload', label=_('Handle upload') diff --git a/mayan/apps/sources/settings.py b/mayan/apps/sources/settings.py index 6a57bb2c93..7924450812 100644 --- a/mayan/apps/sources/settings.py +++ b/mayan/apps/sources/settings.py @@ -1,5 +1,8 @@ from __future__ import unicode_literals +import os + +from django.conf import settings from django.utils.translation import ugettext_lazy as _ from smart_settings import Namespace @@ -7,9 +10,31 @@ from smart_settings import Namespace namespace = Namespace(name='sources', label=_('Sources')) setting_scanimage_path = namespace.add_setting( - global_name='SOURCE_SCANIMAGE_PATH', default='/usr/bin/scanimage', + global_name='SOURCES_SCANIMAGE_PATH', default='/usr/bin/scanimage', help_text=_( 'File path to the scanimage program used to control image scanners.' ), is_path=True ) +setting_staging_file_image_cache_storage = namespace.add_setting( + global_name='SOURCES_STAGING_FILE_CACHE_STORAGE_BACKEND', + default='django.core.files.storage.FileSystemStorage', help_text=_( + 'Path to the Storage subclass to use when storing the cached ' + 'staging_file image files.' + ), quoted=True +) +setting_staging_file_image_cache_storage_arguments = namespace.add_setting( + global_name='SOURCES_STAGING_FILE_CACHE_STORAGE_BACKEND_ARGUMENTS', + default='{{location: {}}}'.format( + os.path.join(settings.MEDIA_ROOT, 'staging_file_cache') + ), help_text=_( + 'Arguments to pass to the SOURCES_STAGING_FILE_CACHE_STORAGE_BACKEND.' + ), quoted=True, +) +settings_staging_file_image_cache_time = namespace.add_setting( + global_name='SOURCES_STAGING_FILE_IMAGE_CACHE_TIME', default='31556926', + help_text=_( + 'Time in seconds that the browser should cache the supplied staging ' + 'file images. The default of 31559626 seconds corresponde to 1 year.' + ) +) diff --git a/mayan/apps/sources/storages.py b/mayan/apps/sources/storages.py new file mode 100644 index 0000000000..e3bedf4311 --- /dev/null +++ b/mayan/apps/sources/storages.py @@ -0,0 +1,18 @@ +from __future__ import unicode_literals + +import yaml + +from django.utils.module_loading import import_string + +from .settings import ( + setting_staging_file_image_cache_storage, + setting_staging_file_image_cache_storage_arguments, +) + +storage_staging_file_image_cache = import_string( + dotted_path=setting_staging_file_image_cache_storage.value +)( + **yaml.safe_load( + setting_staging_file_image_cache_storage_arguments.value or '{}' + ) +) diff --git a/mayan/apps/sources/tasks.py b/mayan/apps/sources/tasks.py index 6237bac1ab..747e58dcaf 100644 --- a/mayan/apps/sources/tasks.py +++ b/mayan/apps/sources/tasks.py @@ -14,6 +14,7 @@ from common.exceptions import NoMIMETypeMatch from lock_manager import LockError from lock_manager.runtime import locking_backend +from .classes import StagingFile from .literals import ( DEFAULT_SOURCE_LOCK_EXPIRE, DEFAULT_SOURCE_TASK_RETRY_DELAY ) @@ -197,3 +198,14 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s task_upload_document.delay( shared_uploaded_file_id=shared_upload.pk, **kwargs ) + + +@app.task() +def task_generate_staging_file_image(staging_folder_pk, encoded_filename, *args, **kwargs): + StagingFolderSource = apps.get_model( + app_label='sources', model_name='StagingFolderSource' + ) + staging_folder = StagingFolderSource.objects.get(pk=staging_folder_pk) + staging_file = staging_folder.get_file(encoded_filename=encoded_filename) + + return staging_file.generate_image(*args, **kwargs) From a85d33fd8e1d94d03796ead47b283cc495d45363 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 12 Sep 2018 13:32:41 -0400 Subject: [PATCH 2/2] Complete staging file caching refactor. Signed-off-by: Roberto Rosario --- mayan/apps/sources/api_views.py | 7 --- mayan/apps/sources/classes.py | 81 ++++++++++++--------------------- mayan/apps/sources/settings.py | 7 --- mayan/apps/sources/tasks.py | 1 - 4 files changed, 29 insertions(+), 67 deletions(-) diff --git a/mayan/apps/sources/api_views.py b/mayan/apps/sources/api_views.py index 2ce6085962..e55530230d 100644 --- a/mayan/apps/sources/api_views.py +++ b/mayan/apps/sources/api_views.py @@ -2,16 +2,13 @@ from __future__ import unicode_literals from django.http import HttpResponse from django.shortcuts import get_object_or_404 -from django.views.decorators.cache import cache_control, patch_cache_control -from converter.models import Transformation from rest_framework import generics from rest_framework.response import Response from .literals import STAGING_FILE_IMAGE_TASK_TIMEOUT from .models import StagingFolderSource from .serializers import StagingFolderFileSerializer, StagingFolderSerializer -from .settings import settings_staging_file_image_cache_time from .storages import storage_staging_file_image_cache from .tasks import task_generate_staging_file_image @@ -76,8 +73,4 @@ class APIStagingSourceFileImageView(generics.RetrieveAPIView): with storage_staging_file_image_cache.open(cache_filename) as file_object: response = HttpResponse(file_object.read(), content_type='image') - if '_hash' in request.GET: - patch_cache_control( - response, max_age=settings_staging_file_image_cache_time.value - ) return response diff --git a/mayan/apps/sources/classes.py b/mayan/apps/sources/classes.py index 25f0254a79..1605963b76 100644 --- a/mayan/apps/sources/classes.py +++ b/mayan/apps/sources/classes.py @@ -20,8 +20,7 @@ from django.core.files.base import ContentFile from django.urls import reverse from django.utils.encoding import force_text, python_2_unicode_compatible -from converter import BaseTransformation, TransformationResize, converter_class -from converter.literals import DEFAULT_ROTATION, DEFAULT_ZOOM_LEVEL +from converter import TransformationResize, converter_class from .storages import storage_staging_file_image_cache @@ -76,42 +75,30 @@ class StagingFile(object): return '{}{}'.format(self.staging_folder.pk, self.encoded_filename) def delete(self): - #TODO: delete cached files + storage_staging_file_image_cache.delete(self.cache_filename) os.unlink(self.get_full_path()) def generate_image(self, *args, **kwargs): transformation_list = self.get_combined_transformation_list(*args, **kwargs) - cache_filename = '{}-{}'.format( - self.cache_filename, BaseTransformation.combine(transformation_list) - ) - # Check is transformed image is available - logger.debug('transformations cache filename: %s', cache_filename) + logger.debug('transformations cache filename: %s', self.cache_filename) - if storage_staging_file_image_cache.exists(cache_filename): + if storage_staging_file_image_cache.exists(self.cache_filename): logger.debug( - 'transformations cache file "%s" found', cache_filename + 'staging file cache file "%s" found', self.cache_filename ) else: logger.debug( - 'transformations cache file "%s" not found', cache_filename + 'staging file cache file "%s" not found', self.cache_filename ) image = self.get_image(transformations=transformation_list) - with storage_staging_file_image_cache.open(cache_filename, 'wb+') as file_object: + with storage_staging_file_image_cache.open(self.cache_filename, 'wb+') as file_object: file_object.write(image.getvalue()) - #self.cached_images.create(filename=cache_filename) - - return cache_filename + return self.cache_filename def get_api_image_url(self, *args, **kwargs): - transformations_hash = BaseTransformation.combine( - self.get_combined_transformation_list(*args, **kwargs) - ) - - kwargs.pop('transformations', None) - final_url = furl() final_url.args = kwargs final_url.path = reverse( @@ -120,7 +107,6 @@ class StagingFile(object): self.encoded_filename ) ) - final_url.args['_hash'] = transformations_hash return final_url.tostr() @@ -135,8 +121,8 @@ class StagingFile(object): # Set sensible defaults if the argument is not specified or if the # argument is None - width = kwargs.get('width', self.staging_folder.preview_width) or self.staging_folder.preview_width - height = kwargs.get('height', self.staging_folder.preview_height) or self.staging_folder.preview_height + width = self.staging_folder.preview_width + height = self.staging_folder.preview_height # Generate transformation hash transformation_list = [] @@ -161,39 +147,30 @@ class StagingFile(object): def get_image(self, transformations=None): cache_filename = self.cache_filename file_object = None - logger.debug('Page cache filename: %s', cache_filename) - if storage_staging_file_image_cache.exists(cache_filename): - logger.debug('Page cache file "%s" found', cache_filename) - file_object = storage_staging_file_image_cache.open(cache_filename) + try: + file_object = open(self.get_full_path()) converter = converter_class(file_object=file_object) - converter.seek(0) - else: - logger.debug('Page cache file "%s" not found', cache_filename) - try: - file_object = open(self.get_full_path()) - converter = converter_class(file_object=file_object) + page_image = converter.get_page() - page_image = converter.get_page() + # Since open "wb+" doesn't create files, check if the file + # exists, if not then create it + if not storage_staging_file_image_cache.exists(cache_filename): + storage_staging_file_image_cache.save(name=cache_filename, content=ContentFile(content='')) - # Since open "wb+" doesn't create files, check if the file - # exists, if not then create it - if not storage_staging_file_image_cache.exists(cache_filename): - storage_staging_file_image_cache.save(name=cache_filename, content=ContentFile(content='')) - - with storage_staging_file_image_cache.open(cache_filename, 'wb+') as file_object: - file_object.write(page_image.getvalue()) - except Exception as exception: - # Cleanup in case of error - logger.error( - 'Error creating page cache file "%s"; %s', - cache_filename, exception - ) - storage_staging_file_image_cache.delete(cache_filename) - if file_object: - file_object.close() - raise + with storage_staging_file_image_cache.open(cache_filename, 'wb+') as file_object: + file_object.write(page_image.getvalue()) + except Exception as exception: + # Cleanup in case of error + logger.error( + 'Error creating staging file cache "%s"; %s', + cache_filename, exception + ) + storage_staging_file_image_cache.delete(cache_filename) + if file_object: + file_object.close() + raise for transformation in transformations: converter.transform(transformation=transformation) diff --git a/mayan/apps/sources/settings.py b/mayan/apps/sources/settings.py index 7924450812..dcefcd8c88 100644 --- a/mayan/apps/sources/settings.py +++ b/mayan/apps/sources/settings.py @@ -31,10 +31,3 @@ setting_staging_file_image_cache_storage_arguments = namespace.add_setting( 'Arguments to pass to the SOURCES_STAGING_FILE_CACHE_STORAGE_BACKEND.' ), quoted=True, ) -settings_staging_file_image_cache_time = namespace.add_setting( - global_name='SOURCES_STAGING_FILE_IMAGE_CACHE_TIME', default='31556926', - help_text=_( - 'Time in seconds that the browser should cache the supplied staging ' - 'file images. The default of 31559626 seconds corresponde to 1 year.' - ) -) diff --git a/mayan/apps/sources/tasks.py b/mayan/apps/sources/tasks.py index 747e58dcaf..89e27930e2 100644 --- a/mayan/apps/sources/tasks.py +++ b/mayan/apps/sources/tasks.py @@ -14,7 +14,6 @@ from common.exceptions import NoMIMETypeMatch from lock_manager import LockError from lock_manager.runtime import locking_backend -from .classes import StagingFile from .literals import ( DEFAULT_SOURCE_LOCK_EXPIRE, DEFAULT_SOURCE_TASK_RETRY_DELAY )