From f4752a3f3fcaab6512a19d6aa85bed57f1d00794 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Sat, 6 Jun 2015 06:26:44 -0400 Subject: [PATCH] Further converter refactor and initial move from document-centric to page-centric image generation. Issue #93. --- mayan/apps/converter/backends/python.py | 78 +++++---------- mayan/apps/converter/classes.py | 127 ++++-------------------- mayan/apps/documents/api_views.py | 6 +- mayan/apps/documents/models.py | 49 ++++++--- mayan/apps/documents/tasks.py | 9 +- mayan/apps/documents/views.py | 15 ++- mayan/apps/documents/widgets.py | 28 +++--- 7 files changed, 105 insertions(+), 207 deletions(-) diff --git a/mayan/apps/converter/backends/python.py b/mayan/apps/converter/backends/python.py index ec96980d07..1c7dc44658 100644 --- a/mayan/apps/converter/backends/python.py +++ b/mayan/apps/converter/backends/python.py @@ -40,21 +40,17 @@ class Python(ConverterBase): def get_page_count(self, file_object, mimetype=None): page_count = 1 - #file_object, input_filepath = mkstemp() - #file_object.write(input_data) - if not mimetype: - #mimetype, encoding = get_mimetype(file_description=open(input_filepath, 'rb'), filepath=None, mimetype_only=True) mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True) else: encoding = None if mimetype == 'application/pdf': # If file is a PDF open it with slate to determine the page count - #with open(input_filepath) as fd: try: pages = slate.PDF(file_object) - except: + except Exception as exception: + logger.error('slate exception; %s', exception) return 1 # TODO: Maybe return UnknownFileFormat to display proper unknwon file format message in document description else: @@ -63,7 +59,6 @@ class Python(ConverterBase): file_object.seek(0) try: - #im = Image.fromarray(input_data) image = Image.open(file_object) except IOError: # cannot identify image file raise UnknownFileFormat @@ -81,47 +76,23 @@ class Python(ConverterBase): return page_count def convert(self, file_object, mimetype=None, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER): - - #tmpfile = None - #mimetype = kwargs.get('mimetype', None) - if not mimetype: mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True) - ##try: - print "MIME!", mimetype if mimetype == 'application/pdf' and pdftoppm: image_buffer = io.BytesIO() new_file_object, input_filepath = tempfile.mkstemp() os.write(new_file_object, file_object.read()) - #file_object.seek(0) - #new_file_object.seek(0) os.close(new_file_object) - - pdftoppm(input_filepath, f=page, l=page, _out=image_buffer) image_buffer.seek(0) image = Image.open(image_buffer) - # TODO: remove input_filepath + fs_cleanup(input_filepath) else: image = Image.open(file_object) - - - ##except Exception as exception: - ## logger.error('Error converting image; %s', exception) - ## # Python Imaging Library doesn't recognize it as an image - ## raise ConvertError - ##except IOError: # cannot identify image file - ## raise UnknownFileFormat - - - #finally: - # if tmpfile: - # fs_cleanup(tmpfile) - current_page = 0 try: while current_page == page - 1: @@ -132,36 +103,35 @@ class Python(ConverterBase): # end of sequence pass - ''' - try: - if transformations: - aspect = 1.0 * im.size[0] / im.size[1] - for transformation in transformations: - arguments = transformation.get('arguments') - if transformation['transformation'] == TRANSFORMATION_RESIZE: - width = int(arguments.get('width', 0)) - height = int(arguments.get('height', 1.0 * width * aspect)) - im = self.resize(im, (width, height)) - elif transformation['transformation'] == TRANSFORMATION_ZOOM: - decimal_value = float(arguments.get('percent', 100)) / 100 - im = im.transform((int(im.size[0] * decimal_value), int(im.size[1] * decimal_value)), Image.EXTENT, (0, 0, im.size[0], im.size[1])) - elif transformation['transformation'] == TRANSFORMATION_ROTATE: - # PIL counter degress counter-clockwise, reverse them - im = im.rotate(360 - arguments.get('degrees', 0)) - except: - # Ignore all transformation error - pass - ''' - if image.mode not in ('L', 'RGB'): image = image.convert('RGB') - output = StringIO() image.save(output, format=output_format) return output + ''' + try: + if transformations: + aspect = 1.0 * im.size[0] / im.size[1] + for transformation in transformations: + arguments = transformation.get('arguments') + if transformation['transformation'] == TRANSFORMATION_RESIZE: + width = int(arguments.get('width', 0)) + height = int(arguments.get('height', 1.0 * width * aspect)) + im = self.resize(im, (width, height)) + elif transformation['transformation'] == TRANSFORMATION_ZOOM: + decimal_value = float(arguments.get('percent', 100)) / 100 + im = im.transform((int(im.size[0] * decimal_value), int(im.size[1] * decimal_value)), Image.EXTENT, (0, 0, im.size[0], im.size[1])) + elif transformation['transformation'] == TRANSFORMATION_ROTATE: + # PIL counter degress counter-clockwise, reverse them + im = im.rotate(360 - arguments.get('degrees', 0)) + except: + # Ignore all transformation error + pass + ''' + # From: http://united-coders.com/christian-harms/image-resizing-tips-general-and-for-python def resize(self, img, box, fit=False, out=None): """ diff --git a/mayan/apps/converter/classes.py b/mayan/apps/converter/classes.py index b5893299b2..1412f28e3d 100644 --- a/mayan/apps/converter/classes.py +++ b/mayan/apps/converter/classes.py @@ -19,7 +19,6 @@ from .literals import ( TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM, DIMENSION_SEPARATOR ) from .office_converter import OfficeConverter -from .runtime import backend, office_converter from .settings import GRAPHICS_BACKEND, LIBREOFFICE_PATH CONVERTER_OFFICE_FILE_MIMETYPES = [ @@ -68,7 +67,6 @@ CONVERTER_OFFICE_FILE_MIMETYPES = [ 'text/plain', 'text/rtf', ] - logger = logging.getLogger(__name__) @@ -109,23 +107,6 @@ class TransformationScale(BaseTransformation): class Converter(object): - """ - def cache_cleanup(input_filepath, *args, **kwargs): - try: - os.remove(create_image_cache_filename(input_filepath, *args, **kwargs)) - except OSError: - pass - """ - - """ - def create_image_cache_filename(input_filepath, *args, **kwargs): - if input_filepath: - hash_value = HASH_FUNCTION(''.join([HASH_FUNCTION(smart_str(input_filepath)), unicode(args), unicode(kwargs)])) - return os.path.join(TEMPORARY_DIRECTORY, hash_value) - else: - return None - """ - @staticmethod def soffice(file_object): @@ -159,8 +140,7 @@ class Converter(object): readline = proc.stderr.readline() logger.debug('stderr: %s', readline) if return_code != 0: - #raise OfficeBackendError(readline) - raise Exception(readline) + raise OfficeBackendError(readline) filename, extension = os.path.splitext(os.path.basename(input_filepath)) logger.debug('filename: %s', filename) @@ -169,106 +149,35 @@ class Converter(object): converted_output = os.path.join(TEMPORARY_DIRECTORY, os.path.extsep.join([filename, 'pdf'])) logger.debug('converted_output: %s', converted_output) - return open(converted_output) - #os.rename(converted_output, output_filepath) - # TODO: remove temp file - + return converted_output def __init__(self, file_object, mime_type=None): self.file_object = file_object self.mime_type = mime_type or get_mimetype(file_object=file_object, mimetype_only=False)[0] - - if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES: - if os.path.exists(LIBREOFFICE_PATH): - #file_object, filename = mkstemp() - - # Cache results of conversion - #output_filepath = os.path.join(TEMPORARY_DIRECTORY, ''.join([self.input_filepath, CACHED_FILE_SUFFIX])) - - result = Converter.soffice(file_object) - file_object.close() - self.file_object = result - self.mime_type = 'application/pdf' - - #try: - # self.backend.convert(self.input_filepath, self.output_filepath) - # self.exists = True - #except OfficeBackendError as exception: - # # convert exception so that at least the mime type icon is displayed - # raise UnknownFileFormat(exception) - #else: - # result = office_converter.convert(self.file_object, mimetype=mime_type) - # self.file_object.close() - # self.file_object = result - else: - # TODO: NO LIBREOFFICE ERROR - pass - + self.temporary_files = [] def transform(self, transformations, page=DEFAULT_PAGE_NUMBER): pass - def convert(self, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER):#, *args, **kwargs): - #size = kwargs.get('size') - #file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT) - #zoom = kwargs.get('zoom', DEFAULT_ZOOM_LEVEL) - #rotation = kwargs.get('rotation', DEFAULT_ROTATION) - #page = kwargs.get('page', DEFAULT_PAGE_NUMBER) - #transformations = kwargs.get('transformations', []) + def convert(self, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER): + if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES: + if os.path.exists(LIBREOFFICE_PATH): + converted_output = Converter.soffice(self.file_object) + self.file_object.close() + self.file_object = open(converted_output) + self.mime_type = 'application/pdf' + self.temporary_file.append(converted_output) + else: + # TODO: NO LIBREOFFICE FOUND ERROR + pass - #if transformations is None: - # transformations = [] - - #if output_filepath is None: - # output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs) - - #if os.path.exists(output_filepath): - # return output_filepath - - ''' - if office_converter: - try: - office_converter.convert(input_filepath, mimetype=mimetype) - if office_converter.exists: - input_filepath = office_converter.output_filepath - mimetype = 'application/pdf' - else: - # Recycle the already detected mimetype - mimetype = office_converter.mimetype - - except OfficeConversionError: - raise UnknownFileFormat('office converter exception') - - if size: - transformations.append( - { - 'transformation': TRANSFORMATION_RESIZE, - 'arguments': dict(zip(['width', 'height'], size.split(DIMENSION_SEPARATOR))) - } - ) - - if zoom != 100: - transformations.append( - { - 'transformation': TRANSFORMATION_ZOOM, - 'arguments': {'percent': zoom} - } - ) - - if rotation != 0 and rotation != 360: - transformations.append( - { - 'transformation': TRANSFORMATION_ROTATE, - 'arguments': {'degrees': rotation} - } - ) - ''' + for temporary_file in self.temporary_files: + fs_cleanup(temporary_file) return backend.convert(file_object=self.file_object, mimetype=self.mime_type, output_format=output_format, page=page) - def get_page_count(self): - return backend.get_page_count(file_object) - + def get_page_count(self): + return backend.get_page_count(file_object) ''' diff --git a/mayan/apps/documents/api_views.py b/mayan/apps/documents/api_views.py index d9a9fe4685..f352da7a44 100644 --- a/mayan/apps/documents/api_views.py +++ b/mayan/apps/documents/api_views.py @@ -36,7 +36,7 @@ from .serializers import ( RecentDocumentSerializer ) from .settings import DISPLAY_SIZE, ZOOM_MAX_LEVEL, ZOOM_MIN_LEVEL -from .tasks import task_get_document_image, task_new_document +from .tasks import task_get_document_page_image, task_new_document class APIDocumentListView(generics.ListAPIView): @@ -202,8 +202,10 @@ class APIDocumentImageView(generics.GenericAPIView): rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360 + document_page = document.pages.get(page_number=page) + try: - task = task_get_document_image.apply_async(kwargs=dict(document_id=document.pk, size=size, page=page, zoom=zoom, rotation=rotation, as_base64=True, version=version), queue='converter') + task = task_get_document_page_image.apply_async(kwargs=dict(document_page_id=document_page.pk, size=size, zoom=zoom, rotation=rotation, as_base64=True, version=version), queue='converter') return Response({ 'status': 'success', 'data': task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT) diff --git a/mayan/apps/documents/models.py b/mayan/apps/documents/models.py index 654874a7be..71feb15e1f 100644 --- a/mayan/apps/documents/models.py +++ b/mayan/apps/documents/models.py @@ -17,6 +17,7 @@ from django.utils.translation import ugettext_lazy as _ from acls.utils import apply_default_acls from common.settings import TEMPORARY_DIRECTORY +from common.utils import fs_cleanup from converter.classes import Converter from converter.exceptions import UnknownFileFormat from converter.literals import ( @@ -526,7 +527,15 @@ class DocumentPage(models.Model): def document(self): return self.document_version.document - def get_image(self, *args, **kargs): + def get_uuid(self): + return 'page-cache-{}'.format(self.pk) + + def get_cache_filename(self): + return os.path.join(CACHE_PATH, self.get_uuid()) + + def get_image(self, *args, **kwargs): + transformations = kwargs.pop('transformations', []) + #size=DISPLAY_SIZE, page=DEFAULT_PAGE_NUMBER, zoom=DEFAULT_ZOOM_LEVEL, rotation=DEFAULT_ROTATION, as_base64=False, version=None): #if zoom < ZOOM_MIN_LEVEL: # zoom = ZOOM_MIN_LEVEL @@ -538,20 +547,34 @@ class DocumentPage(models.Model): #file_path = self.get_valid_image(size=size, page=page, zoom=zoom, rotation=rotation, version=version) #logger.debug('file_path: %s', file_path) + as_base64 = kwargs.pop('as_base64', False) - converter = Converter(file_object=self.document_version.open()) - data = converter.convert(page=self.page_number) - #print "data!!!!", data.getvalue() - ##, *args, **kwargs): - return 'data:%s;base64,%s' % ('PNG', base64.b64encode(data.getvalue())) + cache_filename = self.get_cache_filename() - #if as_base64: - # with open(file_path, 'r') as file_object: - # #mimetype = get_mimetype(file_object=file_object, mimetype_only=True)[0] - # base64_data = base64.b64encode(file_object.read()) - # return 'data:%s;base64,%s' % (mimetype, base64_data) - #else: - # return file_path + if os.path.exists(cache_filename) and 0: + with open(cache_filename) as file_object: + data = file_object.read() + + if as_base64: + return 'data:%s;base64,%s' % ('image/png', base64.b64encode(data)) + else: + return data + else: + try: + converter = Converter(file_object=self.document_version.open()) + image_buffer = converter.convert(page=self.page_number, output_format='PNG') + with open(cache_filename, 'wb+') as file_object: + file_object.write(image_buffer.getvalue()) + except: + fs_cleanup(cache_filename) + raise + else: + data = image_buffer.getvalue() + image_buffer.close() + if as_base64: + return 'data:%s;base64,%s' % ('image/png', base64.b64encode(data)) + else: + return data def argument_validator(value): diff --git a/mayan/apps/documents/tasks.py b/mayan/apps/documents/tasks.py index da9cef3fe5..26a06a23c3 100644 --- a/mayan/apps/documents/tasks.py +++ b/mayan/apps/documents/tasks.py @@ -9,16 +9,15 @@ from mayan.celery import app from common.models import SharedUploadedFile -from .models import Document, DocumentType, DocumentVersion +from .models import Document, DocumentPage, DocumentType, DocumentVersion logger = logging.getLogger(__name__) @app.task(compression='zlib') -def task_get_document_image(document_id, *args, **kwargs): - document = Document.objects.get(pk=document_id) - first_page = document.latest_version.pages.first() - return first_page.get_image(*args, **kwargs) +def task_get_document_page_image(document_page_id, *args, **kwargs): + document_page = DocumentPage.objects.get(pk=document_page_id) + return document_page.get_image(*args, **kwargs) @app.task(ignore_result=True) diff --git a/mayan/apps/documents/views.py b/mayan/apps/documents/views.py index 6c506abd10..3e01408aa1 100644 --- a/mayan/apps/documents/views.py +++ b/mayan/apps/documents/views.py @@ -57,7 +57,8 @@ from .settings import ( ZOOM_MAX_LEVEL, ZOOM_MIN_LEVEL ) from .tasks import ( - task_clear_image_cache, task_get_document_image, task_update_page_count + task_clear_image_cache, task_get_document_page_image, + task_update_page_count ) from .utils import parse_range @@ -366,17 +367,15 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE): rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360 - task = task_get_document_image.apply_async(kwargs=dict(document_id=document.pk, size=size, page=page, zoom=zoom, rotation=rotation, as_base64=False, version=version), queue='converter') + document_page = document.pages.get(page_number=page) + + task = task_get_document_page_image.apply_async(kwargs=dict(document_page_id=document_page.pk, size=size, zoom=zoom, rotation=rotation, as_base64=False, version=version), queue='converter') data = task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT) - response = HttpResponse(data, content_type='data/PNG') - #response['Content-Disposition'] = 'attachment; filename="somefilename.pdf"' - + response = HttpResponse(data, content_type='image') return response - #print 'data!!!!!!!!!!!', task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT) - #re - + # TODO: remove sendfile #return sendfile.sendfile(request, task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT), mimetype=DEFAULT_FILE_FORMAT_MIMETYPE) diff --git a/mayan/apps/documents/widgets.py b/mayan/apps/documents/widgets.py index 72b5858413..aba59d75d3 100644 --- a/mayan/apps/documents/widgets.py +++ b/mayan/apps/documents/widgets.py @@ -24,7 +24,7 @@ class DocumentPageImageWidget(forms.widgets.Widget): if value: output = [] output.append('
') - output.append(document_html_widget(value.document, page=value.page_number, zoom=zoom, rotation=rotation, image_class='lazy-load-interactive', nolazyload=False, size=DISPLAY_SIZE)) + output.append(document_html_widget(value, zoom=zoom, rotation=rotation, image_class='lazy-load-interactive', nolazyload=False, size=DISPLAY_SIZE)) output.append('
') return mark_safe(''.join(output)) else: @@ -46,21 +46,16 @@ class DocumentPagesCarouselWidget(forms.widgets.Widget): document_pages = [] total_pages = 0 - # Reuse expensive values - latest_version_pk = value.latest_version.pk - for page in document_pages: output.append('