From f4752a3f3fcaab6512a19d6aa85bed57f1d00794 Mon Sep 17 00:00:00 2001
From: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
Date: Sat, 6 Jun 2015 06:26:44 -0400
Subject: [PATCH] Further converter refactor and initial move from
 document-centric to page-centric image generation. Issue #93.

---
 mayan/apps/converter/backends/python.py |  78 +++++----------
 mayan/apps/converter/classes.py         | 127 ++++--------------------
 mayan/apps/documents/api_views.py       |   6 +-
 mayan/apps/documents/models.py          |  49 ++++++---
 mayan/apps/documents/tasks.py           |   9 +-
 mayan/apps/documents/views.py           |  15 ++-
 mayan/apps/documents/widgets.py         |  28 +++---
 7 files changed, 105 insertions(+), 207 deletions(-)

diff --git a/mayan/apps/converter/backends/python.py b/mayan/apps/converter/backends/python.py
index ec96980d07..1c7dc44658 100644
--- a/mayan/apps/converter/backends/python.py
+++ b/mayan/apps/converter/backends/python.py
@@ -40,21 +40,17 @@ class Python(ConverterBase):
     def get_page_count(self, file_object, mimetype=None):
         page_count = 1
 
-        #file_object, input_filepath = mkstemp()
-        #file_object.write(input_data)
-
         if not mimetype:
-            #mimetype, encoding = get_mimetype(file_description=open(input_filepath, 'rb'), filepath=None, mimetype_only=True)
             mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True)
         else:
             encoding = None
 
         if mimetype == 'application/pdf':
             # If file is a PDF open it with slate to determine the page count
-            #with open(input_filepath) as fd:
             try:
                 pages = slate.PDF(file_object)
-            except:
+            except Exception as exception:
+                logger.error('slate exception; %s', exception)
                 return 1
                 # TODO: Maybe return UnknownFileFormat to display proper unknwon file format message in document description
             else:
@@ -63,7 +59,6 @@ class Python(ConverterBase):
                 file_object.seek(0)
 
         try:
-            #im = Image.fromarray(input_data)
             image = Image.open(file_object)
         except IOError:  # cannot identify image file
             raise UnknownFileFormat
@@ -81,47 +76,23 @@ class Python(ConverterBase):
         return page_count
 
     def convert(self, file_object, mimetype=None, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER):
-
-        #tmpfile = None
-        #mimetype = kwargs.get('mimetype', None)
-
         if not mimetype:
             mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True)
 
-        ##try:
-        print "MIME!", mimetype
         if mimetype == 'application/pdf' and pdftoppm:
             image_buffer = io.BytesIO()
 
             new_file_object, input_filepath = tempfile.mkstemp()
             os.write(new_file_object, file_object.read())
-            #file_object.seek(0)
-            #new_file_object.seek(0)
             os.close(new_file_object)
 
-
-
             pdftoppm(input_filepath, f=page, l=page, _out=image_buffer)
             image_buffer.seek(0)
             image = Image.open(image_buffer)
-            # TODO: remove input_filepath
+            fs_cleanup(input_filepath)
         else:
             image = Image.open(file_object)
 
-
-
-        ##except Exception as exception:
-        ##    logger.error('Error converting image; %s', exception)
-        ##    # Python Imaging Library doesn't recognize it as an image
-        ##    raise ConvertError
-        ##except IOError:  # cannot identify image file
-        ##    raise UnknownFileFormat
-
-
-        #finally:
-        #    if tmpfile:
-        #        fs_cleanup(tmpfile)
-
         current_page = 0
         try:
             while current_page == page - 1:
@@ -132,36 +103,35 @@ class Python(ConverterBase):
             # end of sequence
             pass
 
-        '''
-        try:
-            if transformations:
-                aspect = 1.0 * im.size[0] / im.size[1]
-                for transformation in transformations:
-                    arguments = transformation.get('arguments')
-                    if transformation['transformation'] == TRANSFORMATION_RESIZE:
-                        width = int(arguments.get('width', 0))
-                        height = int(arguments.get('height', 1.0 * width * aspect))
-                        im = self.resize(im, (width, height))
-                    elif transformation['transformation'] == TRANSFORMATION_ZOOM:
-                        decimal_value = float(arguments.get('percent', 100)) / 100
-                        im = im.transform((int(im.size[0] * decimal_value), int(im.size[1] * decimal_value)), Image.EXTENT, (0, 0, im.size[0], im.size[1]))
-                    elif transformation['transformation'] == TRANSFORMATION_ROTATE:
-                        # PIL counter degress counter-clockwise, reverse them
-                        im = im.rotate(360 - arguments.get('degrees', 0))
-        except:
-            # Ignore all transformation error
-            pass
-        '''
-
         if image.mode not in ('L', 'RGB'):
             image = image.convert('RGB')
 
-
         output = StringIO()
         image.save(output, format=output_format)
 
         return output
 
+    '''
+    try:
+        if transformations:
+            aspect = 1.0 * im.size[0] / im.size[1]
+            for transformation in transformations:
+                arguments = transformation.get('arguments')
+                if transformation['transformation'] == TRANSFORMATION_RESIZE:
+                    width = int(arguments.get('width', 0))
+                    height = int(arguments.get('height', 1.0 * width * aspect))
+                    im = self.resize(im, (width, height))
+                elif transformation['transformation'] == TRANSFORMATION_ZOOM:
+                    decimal_value = float(arguments.get('percent', 100)) / 100
+                    im = im.transform((int(im.size[0] * decimal_value), int(im.size[1] * decimal_value)), Image.EXTENT, (0, 0, im.size[0], im.size[1]))
+                elif transformation['transformation'] == TRANSFORMATION_ROTATE:
+                    # PIL counter degress counter-clockwise, reverse them
+                    im = im.rotate(360 - arguments.get('degrees', 0))
+    except:
+        # Ignore all transformation error
+        pass
+    '''
+
     # From: http://united-coders.com/christian-harms/image-resizing-tips-general-and-for-python
     def resize(self, img, box, fit=False, out=None):
         """
diff --git a/mayan/apps/converter/classes.py b/mayan/apps/converter/classes.py
index b5893299b2..1412f28e3d 100644
--- a/mayan/apps/converter/classes.py
+++ b/mayan/apps/converter/classes.py
@@ -19,7 +19,6 @@ from .literals import (
     TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM, DIMENSION_SEPARATOR
 )
 from .office_converter import OfficeConverter
-from .runtime import backend, office_converter
 from .settings import GRAPHICS_BACKEND, LIBREOFFICE_PATH
 
 CONVERTER_OFFICE_FILE_MIMETYPES = [
@@ -68,7 +67,6 @@ CONVERTER_OFFICE_FILE_MIMETYPES = [
     'text/plain',
     'text/rtf',
 ]
-
 logger = logging.getLogger(__name__)
 
 
@@ -109,23 +107,6 @@ class TransformationScale(BaseTransformation):
 
 
 class Converter(object):
-    """
-    def cache_cleanup(input_filepath, *args, **kwargs):
-        try:
-            os.remove(create_image_cache_filename(input_filepath, *args, **kwargs))
-        except OSError:
-            pass
-    """
-
-    """
-    def create_image_cache_filename(input_filepath, *args, **kwargs):
-        if input_filepath:
-            hash_value = HASH_FUNCTION(''.join([HASH_FUNCTION(smart_str(input_filepath)), unicode(args), unicode(kwargs)]))
-            return os.path.join(TEMPORARY_DIRECTORY, hash_value)
-        else:
-            return None
-    """
-
 
     @staticmethod
     def soffice(file_object):
@@ -159,8 +140,7 @@ class Converter(object):
         readline = proc.stderr.readline()
         logger.debug('stderr: %s', readline)
         if return_code != 0:
-            #raise OfficeBackendError(readline)
-            raise Exception(readline)
+            raise OfficeBackendError(readline)
 
         filename, extension = os.path.splitext(os.path.basename(input_filepath))
         logger.debug('filename: %s', filename)
@@ -169,106 +149,35 @@ class Converter(object):
         converted_output = os.path.join(TEMPORARY_DIRECTORY, os.path.extsep.join([filename, 'pdf']))
         logger.debug('converted_output: %s', converted_output)
 
-        return open(converted_output)
-        #os.rename(converted_output, output_filepath)
-        # TODO: remove temp file
-
+        return converted_output
 
     def __init__(self, file_object, mime_type=None):
         self.file_object = file_object
         self.mime_type = mime_type or get_mimetype(file_object=file_object, mimetype_only=False)[0]
-
-        if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
-            if os.path.exists(LIBREOFFICE_PATH):
-                #file_object, filename = mkstemp()
-
-                # Cache results of conversion
-                #output_filepath = os.path.join(TEMPORARY_DIRECTORY, ''.join([self.input_filepath, CACHED_FILE_SUFFIX]))
-
-                result = Converter.soffice(file_object)
-                file_object.close()
-                self.file_object = result
-                self.mime_type = 'application/pdf'
-
-                #try:
-                #    self.backend.convert(self.input_filepath, self.output_filepath)
-                #    self.exists = True
-                #except OfficeBackendError as exception:
-                #    # convert exception so that at least the mime type icon is displayed
-                #    raise UnknownFileFormat(exception)
-                #else:
-                #    result = office_converter.convert(self.file_object, mimetype=mime_type)
-                #    self.file_object.close()
-                #    self.file_object = result
-            else:
-                # TODO: NO LIBREOFFICE ERROR
-                pass
-
+        self.temporary_files = []
 
     def transform(self, transformations, page=DEFAULT_PAGE_NUMBER):
         pass
 
-    def convert(self, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER):#, *args, **kwargs):
-        #size = kwargs.get('size')
-        #file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT)
-        #zoom = kwargs.get('zoom', DEFAULT_ZOOM_LEVEL)
-        #rotation = kwargs.get('rotation', DEFAULT_ROTATION)
-        #page = kwargs.get('page', DEFAULT_PAGE_NUMBER)
-        #transformations = kwargs.get('transformations', [])
+    def convert(self, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER):
+        if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
+            if os.path.exists(LIBREOFFICE_PATH):
+                converted_output = Converter.soffice(self.file_object)
+                self.file_object.close()
+                self.file_object = open(converted_output)
+                self.mime_type = 'application/pdf'
+                self.temporary_file.append(converted_output)
+            else:
+                # TODO: NO LIBREOFFICE FOUND ERROR
+                pass
 
-        #if transformations is None:
-        #    transformations = []
-
-        #if output_filepath is None:
-        #    output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs)
-
-        #if os.path.exists(output_filepath):
-        #    return output_filepath
-
-        '''
-        if office_converter:
-            try:
-                office_converter.convert(input_filepath, mimetype=mimetype)
-                if office_converter.exists:
-                    input_filepath = office_converter.output_filepath
-                    mimetype = 'application/pdf'
-                else:
-                    # Recycle the already detected mimetype
-                    mimetype = office_converter.mimetype
-
-            except OfficeConversionError:
-                raise UnknownFileFormat('office converter exception')
-
-        if size:
-            transformations.append(
-                {
-                    'transformation': TRANSFORMATION_RESIZE,
-                    'arguments': dict(zip(['width', 'height'], size.split(DIMENSION_SEPARATOR)))
-                }
-            )
-
-        if zoom != 100:
-            transformations.append(
-                {
-                    'transformation': TRANSFORMATION_ZOOM,
-                    'arguments': {'percent': zoom}
-                }
-            )
-
-        if rotation != 0 and rotation != 360:
-            transformations.append(
-                {
-                    'transformation': TRANSFORMATION_ROTATE,
-                    'arguments': {'degrees': rotation}
-                }
-            )
-        '''
+        for temporary_file in self.temporary_files:
+            fs_cleanup(temporary_file)
 
         return backend.convert(file_object=self.file_object, mimetype=self.mime_type, output_format=output_format, page=page)
 
-        def get_page_count(self):
-            return backend.get_page_count(file_object)
-
+    def get_page_count(self):
+        return backend.get_page_count(file_object)
 
 
 '''
diff --git a/mayan/apps/documents/api_views.py b/mayan/apps/documents/api_views.py
index d9a9fe4685..f352da7a44 100644
--- a/mayan/apps/documents/api_views.py
+++ b/mayan/apps/documents/api_views.py
@@ -36,7 +36,7 @@ from .serializers import (
     RecentDocumentSerializer
 )
 from .settings import DISPLAY_SIZE, ZOOM_MAX_LEVEL, ZOOM_MIN_LEVEL
-from .tasks import task_get_document_image, task_new_document
+from .tasks import task_get_document_page_image, task_new_document
 
 
 class APIDocumentListView(generics.ListAPIView):
@@ -202,8 +202,10 @@ class APIDocumentImageView(generics.GenericAPIView):
 
         rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360
 
+        document_page = document.pages.get(page_number=page)
+
         try:
-            task = task_get_document_image.apply_async(kwargs=dict(document_id=document.pk, size=size, page=page, zoom=zoom, rotation=rotation, as_base64=True, version=version), queue='converter')
+            task = task_get_document_page_image.apply_async(kwargs=dict(document_page_id=document_page.pk, size=size, zoom=zoom, rotation=rotation, as_base64=True, version=version), queue='converter')
             return Response({
                 'status': 'success',
                 'data': task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT)
diff --git a/mayan/apps/documents/models.py b/mayan/apps/documents/models.py
index 654874a7be..71feb15e1f 100644
--- a/mayan/apps/documents/models.py
+++ b/mayan/apps/documents/models.py
@@ -17,6 +17,7 @@ from django.utils.translation import ugettext_lazy as _
 
 from acls.utils import apply_default_acls
 from common.settings import TEMPORARY_DIRECTORY
+from common.utils import fs_cleanup
 from converter.classes import Converter
 from converter.exceptions import UnknownFileFormat
 from converter.literals import (
@@ -526,7 +527,15 @@ class DocumentPage(models.Model):
     def document(self):
         return self.document_version.document
 
-    def get_image(self, *args, **kargs):
+    def get_uuid(self):
+        return 'page-cache-{}'.format(self.pk)
+
+    def get_cache_filename(self):
+        return os.path.join(CACHE_PATH, self.get_uuid())
+
+    def get_image(self, *args, **kwargs):
+        transformations = kwargs.pop('transformations', [])
+
         #size=DISPLAY_SIZE, page=DEFAULT_PAGE_NUMBER, zoom=DEFAULT_ZOOM_LEVEL, rotation=DEFAULT_ROTATION, as_base64=False, version=None):
         #if zoom < ZOOM_MIN_LEVEL:
         #    zoom = ZOOM_MIN_LEVEL
@@ -538,20 +547,34 @@ class DocumentPage(models.Model):
 
         #file_path = self.get_valid_image(size=size, page=page, zoom=zoom, rotation=rotation, version=version)
         #logger.debug('file_path: %s', file_path)
+        as_base64 = kwargs.pop('as_base64', False)
 
-        converter = Converter(file_object=self.document_version.open())
-        data = converter.convert(page=self.page_number)
-        #print "data!!!!", data.getvalue()
-        ##, *args, **kwargs):
-        return 'data:%s;base64,%s' % ('PNG', base64.b64encode(data.getvalue()))
+        cache_filename = self.get_cache_filename()
 
-        #if as_base64:
-        #    with open(file_path, 'r') as file_object:
-        #        #mimetype = get_mimetype(file_object=file_object, mimetype_only=True)[0]
-        #        base64_data = base64.b64encode(file_object.read())
-        #        return 'data:%s;base64,%s' % (mimetype, base64_data)
-        #else:
-        #    return file_path
+        if os.path.exists(cache_filename) and 0:
+            with open(cache_filename) as file_object:
+                data = file_object.read()
+
+            if as_base64:
+                return 'data:%s;base64,%s' % ('image/png', base64.b64encode(data))
+            else:
+                return data
+        else:
+            try:
+                converter = Converter(file_object=self.document_version.open())
+                image_buffer = converter.convert(page=self.page_number, output_format='PNG')
+                with open(cache_filename, 'wb+') as file_object:
+                    file_object.write(image_buffer.getvalue())
+            except:
+                fs_cleanup(cache_filename)
+                raise
+            else:
+                data = image_buffer.getvalue()
+                image_buffer.close()
+                if as_base64:
+                    return 'data:%s;base64,%s' % ('image/png', base64.b64encode(data))
+                else:
+                    return data
 
 
 def argument_validator(value):
diff --git a/mayan/apps/documents/tasks.py b/mayan/apps/documents/tasks.py
index da9cef3fe5..26a06a23c3 100644
--- a/mayan/apps/documents/tasks.py
+++ b/mayan/apps/documents/tasks.py
@@ -9,16 +9,15 @@ from mayan.celery import app
 
 from common.models import SharedUploadedFile
 
-from .models import Document, DocumentType, DocumentVersion
+from .models import Document, DocumentPage, DocumentType, DocumentVersion
 
 logger = logging.getLogger(__name__)
 
 
 @app.task(compression='zlib')
-def task_get_document_image(document_id, *args, **kwargs):
-    document = Document.objects.get(pk=document_id)
-    first_page = document.latest_version.pages.first()
-    return first_page.get_image(*args, **kwargs)
+def task_get_document_page_image(document_page_id, *args, **kwargs):
+    document_page = DocumentPage.objects.get(pk=document_page_id)
+    return document_page.get_image(*args, **kwargs)
 
 
 @app.task(ignore_result=True)
diff --git a/mayan/apps/documents/views.py b/mayan/apps/documents/views.py
index 6c506abd10..3e01408aa1 100644
--- a/mayan/apps/documents/views.py
+++ b/mayan/apps/documents/views.py
@@ -57,7 +57,8 @@ from .settings import (
     ZOOM_MAX_LEVEL, ZOOM_MIN_LEVEL
 )
 from .tasks import (
-    task_clear_image_cache, task_get_document_image, task_update_page_count
+    task_clear_image_cache, task_get_document_page_image,
+    task_update_page_count
 )
 from .utils import parse_range
 
@@ -366,17 +367,15 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE):
 
     rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360
 
-    task = task_get_document_image.apply_async(kwargs=dict(document_id=document.pk, size=size, page=page, zoom=zoom, rotation=rotation, as_base64=False, version=version), queue='converter')
+    document_page = document.pages.get(page_number=page)
+
+    task = task_get_document_page_image.apply_async(kwargs=dict(document_page_id=document_page.pk, size=size, zoom=zoom, rotation=rotation, as_base64=False, version=version), queue='converter')
     data = task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT)
 
-    response = HttpResponse(data, content_type='data/PNG')
-    #response['Content-Disposition'] = 'attachment; filename="somefilename.pdf"'
-
+    response = HttpResponse(data, content_type='image')
     return response
 
-    #print 'data!!!!!!!!!!!', task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT)
-    #re
-
+    # TODO: remove sendfile
     #return sendfile.sendfile(request, task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT), mimetype=DEFAULT_FILE_FORMAT_MIMETYPE)
 
 
diff --git a/mayan/apps/documents/widgets.py b/mayan/apps/documents/widgets.py
index 72b5858413..aba59d75d3 100644
--- a/mayan/apps/documents/widgets.py
+++ b/mayan/apps/documents/widgets.py
@@ -24,7 +24,7 @@ class DocumentPageImageWidget(forms.widgets.Widget):
         if value:
             output = []
             output.append('<div class="full-height scrollable mayan-page-wrapper-interactive" data-height-difference=230>')
-            output.append(document_html_widget(value.document, page=value.page_number, zoom=zoom, rotation=rotation, image_class='lazy-load-interactive', nolazyload=False, size=DISPLAY_SIZE))
+            output.append(document_html_widget(value, zoom=zoom, rotation=rotation, image_class='lazy-load-interactive', nolazyload=False, size=DISPLAY_SIZE))
             output.append('</div>')
             return mark_safe(''.join(output))
         else:
@@ -46,21 +46,16 @@ class DocumentPagesCarouselWidget(forms.widgets.Widget):
             document_pages = []
             total_pages = 0
 
-        # Reuse expensive values
-        latest_version_pk = value.latest_version.pk
-
         for page in document_pages:
             output.append('<div class="carousel-item">')
             output.append(
                 document_html_widget(
-                    page.document,
+                    page,
                     click_view='documents:document_page_view',
                     click_view_arguments=[page.pk],
-                    page=page.page_number,
                     fancybox_class='',
                     image_class='lazy-load-carousel',
                     size=DISPLAY_SIZE,
-                    version=latest_version_pk,
                     post_load_class='lazy-load-carousel-loaded',
                 )
             )
@@ -73,29 +68,25 @@ class DocumentPagesCarouselWidget(forms.widgets.Widget):
 
 
 def document_thumbnail(document, **kwargs):
-    return document_html_widget(document, click_view='documents:document_display', **kwargs)
+    return document_html_widget(document.latest_version.pages.first(), click_view='documents:document_display', **kwargs)
 
 
 def document_link(document):
     return mark_safe('<a href="%s">%s</a>' % (document.get_absolute_url(), document))
 
 
-def document_html_widget(document, click_view=None, click_view_arguments=None, page=DEFAULT_PAGE_NUMBER, zoom=DEFAULT_ZOOM_LEVEL, rotation=DEFAULT_ROTATION, gallery_name=None, fancybox_class='fancybox', version=None, image_class='lazy-load', title=None, size=THUMBNAIL_SIZE, nolazyload=False, post_load_class=None):
+def document_html_widget(document_page, click_view=None, click_view_arguments=None, zoom=DEFAULT_ZOOM_LEVEL, rotation=DEFAULT_ROTATION, gallery_name=None, fancybox_class='fancybox', image_class='lazy-load', title=None, size=THUMBNAIL_SIZE, nolazyload=False, post_load_class=None):
     result = []
 
     alt_text = _('Document page image')
 
-    if not version:
-        try:
-            version = document.latest_version.pk
-        except AttributeError:
-            version = None
+    document = document_page.document
+    page = document_page.page_number
 
     query_dict = {
         'page': page,
         'zoom': zoom,
         'rotation': rotation,
-        'version': version,
         'size': size,
     }
 
@@ -116,7 +107,12 @@ def document_html_widget(document, click_view=None, click_view_arguments=None, p
         title_template = ''
 
     if click_view:
-        result.append('<a %s class="%s" href="%s" %s>' % (gallery_template, fancybox_class, '%s?%s' % (reverse(click_view, args=click_view_arguments or [document.pk]), query_string), title_template))
+        result.append('<a {gallery_template} class="{fancybox_class}" href="{image_data}" {title_template}>'.format(
+            gallery_template=gallery_template,
+            fancybox_class=fancybox_class,
+            image_data='%s?%s' % (reverse(click_view, args=click_view_arguments or [document.pk]), query_string),
+            title_template=title_template
+        ))
 
     if nolazyload:
         result.append('<img class="img-nolazyload" src="%s" alt="%s" />' % (preview_view, alt_text))