Merge branch 'feature/staging_file_caching' into versions/next

This commit is contained in:
Roberto Rosario
2018-09-12 13:34:14 -04:00
9 changed files with 188 additions and 40 deletions

View File

@@ -135,6 +135,8 @@
#360.
- Add new dashboard item to display the total page count.
- Show the document type being uploaded in the source view title.
- Setting SOURCE_SCANIMAGE_PATH is now SOURCES_SCANIMAGE_PATH.
- New queue: sources_fast. Used for staging file generation.
3.0.3 (2018-08-17)
==================

View File

@@ -3,12 +3,14 @@ from __future__ import unicode_literals
from django.http import HttpResponse
from django.shortcuts import get_object_or_404
from converter.models import Transformation
from rest_framework import generics
from rest_framework.response import Response
from .literals import STAGING_FILE_IMAGE_TASK_TIMEOUT
from .models import StagingFolderSource
from .serializers import StagingFolderFileSerializer, StagingFolderSerializer
from .storages import storage_staging_file_image_cache
from .tasks import task_generate_staging_file_image
class APIStagingSourceFileView(generics.GenericAPIView):
@@ -56,20 +58,19 @@ class APIStagingSourceFileImageView(generics.RetrieveAPIView):
return None
def retrieve(self, request, *args, **kwargs):
staging_folder = get_object_or_404(
StagingFolderSource, pk=self.kwargs['staging_folder_pk']
)
staging_file = staging_folder.get_file(
encoded_filename=self.kwargs['encoded_filename']
width = request.GET.get('width')
height = request.GET.get('height')
task = task_generate_staging_file_image.apply_async(
kwargs=dict(
staging_folder_pk=self.kwargs['staging_folder_pk'],
encoded_filename=self.kwargs['encoded_filename'],
width=width, height=height
)
)
size = request.GET.get('size')
cache_filename = task.get(timeout=STAGING_FILE_IMAGE_TASK_TIMEOUT)
return HttpResponse(
staging_file.get_image(
size=size,
transformations=Transformation.objects.get_for_model(
staging_folder, as_classes=True
)
), content_type='image'
)
with storage_staging_file_image_cache.open(cache_filename) as file_object:
response = HttpResponse(file_object.read(), content_type='image')
return response

View File

@@ -93,6 +93,10 @@ class SourcesApp(MayanAppConfig):
Queue(
'sources', Exchange('sources'), routing_key='sources'
),
Queue(
'sources_fast', Exchange('sources_fast'),
routing_key='sources_fast', delivery_mode=1
),
Queue(
'sources_periodic', Exchange('sources_periodic'),
routing_key='sources_periodic', delivery_mode=1
@@ -105,6 +109,9 @@ class SourcesApp(MayanAppConfig):
'sources.tasks.task_check_interval_source': {
'queue': 'sources_periodic'
},
'sources.tasks.task_generate_staging_file_image': {
'queue': 'sources_fast'
},
'sources.tasks.task_source_handle_upload': {
'queue': 'sources'
},

View File

@@ -1,9 +1,12 @@
from __future__ import unicode_literals
import base64
import logging
import os
import time
from furl import furl
try:
# Python 2
from urllib import unquote_plus
@@ -13,11 +16,16 @@ except ImportError:
from django.core.files import File
from django.core.files.base import ContentFile
from django.urls import reverse
from django.utils.encoding import force_text, python_2_unicode_compatible
from converter import TransformationResize, converter_class
from .storages import storage_staging_file_image_cache
logger = logging.getLogger(__name__)
class PseudoFile(File):
def __init__(self, file, name):
@@ -62,35 +70,111 @@ class StagingFile(object):
file=open(self.get_full_path(), mode='rb'), name=self.filename
)
def get_api_image_url(self):
return reverse(
@property
def cache_filename(self):
return '{}{}'.format(self.staging_folder.pk, self.encoded_filename)
def delete(self):
storage_staging_file_image_cache.delete(self.cache_filename)
os.unlink(self.get_full_path())
def generate_image(self, *args, **kwargs):
transformation_list = self.get_combined_transformation_list(*args, **kwargs)
# Check is transformed image is available
logger.debug('transformations cache filename: %s', self.cache_filename)
if storage_staging_file_image_cache.exists(self.cache_filename):
logger.debug(
'staging file cache file "%s" found', self.cache_filename
)
else:
logger.debug(
'staging file cache file "%s" not found', self.cache_filename
)
image = self.get_image(transformations=transformation_list)
with storage_staging_file_image_cache.open(self.cache_filename, 'wb+') as file_object:
file_object.write(image.getvalue())
return self.cache_filename
def get_api_image_url(self, *args, **kwargs):
final_url = furl()
final_url.args = kwargs
final_url.path = reverse(
'rest_api:stagingfolderfile-image-view', args=(
self.staging_folder.pk,
self.encoded_filename
)
)
return final_url.tostr()
def get_combined_transformation_list(self, *args, **kwargs):
"""
Return a list of transformation containing the server side
staging file transformation as well as tranformations created
from the arguments as transient interactive transformation.
"""
# Convert arguments into transformations
transformations = kwargs.get('transformations', [])
# Set sensible defaults if the argument is not specified or if the
# argument is None
width = self.staging_folder.preview_width
height = self.staging_folder.preview_height
# Generate transformation hash
transformation_list = []
# Interactive transformations second
for transformation in transformations:
transformation_list.append(transformation)
if width:
transformation_list.append(
TransformationResize(width=width, height=height)
)
return transformation_list
def get_date_time_created(self):
return time.ctime(os.path.getctime(self.get_full_path()))
def get_full_path(self):
return os.path.join(self.staging_folder.folder_path, self.filename)
def get_image(self, size=None, as_base64=False, transformations=None):
converter = converter_class(file_object=open(self.get_full_path()))
def get_image(self, transformations=None):
cache_filename = self.cache_filename
file_object = None
if size:
converter.transform(
transformation=TransformationResize(
**dict(zip(('width', 'height'), (size.split('x'))))
)
try:
file_object = open(self.get_full_path())
converter = converter_class(file_object=file_object)
page_image = converter.get_page()
# Since open "wb+" doesn't create files, check if the file
# exists, if not then create it
if not storage_staging_file_image_cache.exists(cache_filename):
storage_staging_file_image_cache.save(name=cache_filename, content=ContentFile(content=''))
with storage_staging_file_image_cache.open(cache_filename, 'wb+') as file_object:
file_object.write(page_image.getvalue())
except Exception as exception:
# Cleanup in case of error
logger.error(
'Error creating staging file cache "%s"; %s',
cache_filename, exception
)
storage_staging_file_image_cache.delete(cache_filename)
if file_object:
file_object.close()
raise
# Interactive transformations
for transformation in transformations:
converter.transform(transformation=transformation)
return converter.get_page(as_base64=as_base64)
def delete(self):
os.unlink(self.get_full_path())
result = converter.get_page()
file_object.close()
return result

View File

@@ -2,6 +2,13 @@ from __future__ import unicode_literals
from django.utils.translation import ugettext_lazy as _
DEFAULT_IMAP_MAILBOX = 'INBOX'
DEFAULT_INTERVAL = 600
DEFAULT_METADATA_ATTACHMENT_NAME = 'metadata.yaml'
DEFAULT_POP3_TIMEOUT = 60
DEFAULT_SOURCE_LOCK_EXPIRE = 600
DEFAULT_SOURCE_TASK_RETRY_DELAY = 10
SCANNER_SOURCE_FLATBED = 'flatbed'
SCANNER_SOURCE_ADF = 'Automatic Document Feeder'
@@ -58,10 +65,4 @@ SOURCE_CHOICES = (
(SOURCE_CHOICE_EMAIL_POP3, _('POP3 email')),
(SOURCE_CHOICE_EMAIL_IMAP, _('IMAP email')),
)
DEFAULT_SOURCE_LOCK_EXPIRE = 600
DEFAULT_INTERVAL = 600
DEFAULT_METADATA_ATTACHMENT_NAME = 'metadata.yaml'
DEFAULT_POP3_TIMEOUT = 60
DEFAULT_IMAP_MAILBOX = 'INBOX'
DEFAULT_SOURCE_TASK_RETRY_DELAY = 10
STAGING_FILE_IMAGE_TASK_TIMEOUT = 120

View File

@@ -5,17 +5,23 @@ from django.utils.translation import ugettext_lazy as _
from task_manager.classes import CeleryQueue
queue_sources = CeleryQueue(
name='sources', label=_('Sources'), transient=True
name='sources', label=_('Sources')
)
queue_sources_periodic = CeleryQueue(
name='sources_periodic', label=_('Sources periodic')
name='sources_periodic', label=_('Sources periodic'), transient=True
)
queue_sources_fast = CeleryQueue(
name='sources_fast', label=_('Sources fast'), transient=True
)
queue_sources_fast.add_task_type(
name='sources.tasks.task_generate_staging_file_image',
label=_('Generate staging file image')
)
queue_sources_periodic.add_task_type(
name='sources.tasks.task_check_interval_source',
label=_('Check interval source')
)
queue_sources.add_task_type(
name='sources.tasks.task_source_handle_upload',
label=_('Handle upload')

View File

@@ -1,5 +1,8 @@
from __future__ import unicode_literals
import os
from django.conf import settings
from django.utils.translation import ugettext_lazy as _
from smart_settings import Namespace
@@ -7,9 +10,24 @@ from smart_settings import Namespace
namespace = Namespace(name='sources', label=_('Sources'))
setting_scanimage_path = namespace.add_setting(
global_name='SOURCE_SCANIMAGE_PATH', default='/usr/bin/scanimage',
global_name='SOURCES_SCANIMAGE_PATH', default='/usr/bin/scanimage',
help_text=_(
'File path to the scanimage program used to control image scanners.'
),
is_path=True
)
setting_staging_file_image_cache_storage = namespace.add_setting(
global_name='SOURCES_STAGING_FILE_CACHE_STORAGE_BACKEND',
default='django.core.files.storage.FileSystemStorage', help_text=_(
'Path to the Storage subclass to use when storing the cached '
'staging_file image files.'
), quoted=True
)
setting_staging_file_image_cache_storage_arguments = namespace.add_setting(
global_name='SOURCES_STAGING_FILE_CACHE_STORAGE_BACKEND_ARGUMENTS',
default='{{location: {}}}'.format(
os.path.join(settings.MEDIA_ROOT, 'staging_file_cache')
), help_text=_(
'Arguments to pass to the SOURCES_STAGING_FILE_CACHE_STORAGE_BACKEND.'
), quoted=True,
)

View File

@@ -0,0 +1,18 @@
from __future__ import unicode_literals
import yaml
from django.utils.module_loading import import_string
from .settings import (
setting_staging_file_image_cache_storage,
setting_staging_file_image_cache_storage_arguments,
)
storage_staging_file_image_cache = import_string(
dotted_path=setting_staging_file_image_cache_storage.value
)(
**yaml.safe_load(
setting_staging_file_image_cache_storage_arguments.value or '{}'
)
)

View File

@@ -197,3 +197,14 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s
task_upload_document.delay(
shared_uploaded_file_id=shared_upload.pk, **kwargs
)
@app.task()
def task_generate_staging_file_image(staging_folder_pk, encoded_filename, *args, **kwargs):
StagingFolderSource = apps.get_model(
app_label='sources', model_name='StagingFolderSource'
)
staging_folder = StagingFolderSource.objects.get(pk=staging_folder_pk)
staging_file = staging_folder.get_file(encoded_filename=encoded_filename)
return staging_file.generate_image(*args, **kwargs)