Implement class based converter transformations

2015-06-07 05:29:31 -04:00
parent f4752a3f3f
commit b256758db2
10 changed files with 212 additions and 231 deletions
--- a/mayan/apps/converter/init.py
+++ b/mayan/apps/converter/init.py
@@ -0,0 +1,4 @@
+from .classes import (
+    TransformationResize, TransformationRotate, TransformationZoom
+)
+from .runtime import converter_class
--- a/mayan/apps/converter/api.py
+++ b/mayan/apps/converter/api.py
@@ -15,7 +15,6 @@ from .literals import (
    DEFAULT_FILE_FORMAT, TRANSFORMATION_CHOICES, TRANSFORMATION_RESIZE,
    TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM, DIMENSION_SEPARATOR
 )
-from .runtime import backend, office_converter

 HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()

--- a/mayan/apps/converter/backends/init.py
+++ b/mayan/apps/converter/backends/init.py
@@ -1,16 +1 @@
-from __future__ import unicode_literals

-
-class ConverterBase(object):
-    """
-    Base class that all backend classes must inherit
-    """
-
-    def convert(self, input_data, ):
-        raise NotImplementedError()
-
-    def transform(self, input_data, transformations):
-        raise NotImplementedError()
-
-    def get_page_count(self, input_data):
-        raise NotImplementedError()
--- a/mayan/apps/converter/backends/graphicsmagick.py
+++ b/mayan/apps/converter/backends/graphicsmagick.py
@@ -2,7 +2,7 @@ from __future__ import unicode_literals

 import subprocess

-from . import ConverterBase
+from ..classes import ConverterBase
 from ..exceptions import ConvertError, IdentifyError, UnknownFileFormat
 from ..literals import (
    TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM
--- a/mayan/apps/converter/backends/imagemagick.py
+++ b/mayan/apps/converter/backends/imagemagick.py
@@ -2,7 +2,7 @@ from __future__ import unicode_literals

 import subprocess

-from . import ConverterBase
+from ..classes import ConverterBase
 from ..exceptions import ConvertError, IdentifyError, UnknownFileFormat
 from ..literals import (
    DEFAULT_FILE_FORMAT, DEFAULT_PAGE_NUMBER, DIMENSION_SEPARATOR,
--- a/mayan/apps/converter/backends/python.py
+++ b/mayan/apps/converter/backends/python.py
@@ -17,7 +17,7 @@ import sh
 from common.utils import fs_cleanup
 from mimetype.api import get_mimetype

-from . import ConverterBase
+from ..classes import ConverterBase
 from ..exceptions import ConvertError, UnknownFileFormat
 from ..literals import (
    DEFAULT_FILE_FORMAT, DEFAULT_PAGE_NUMBER, TRANSFORMATION_RESIZE,
@@ -37,18 +37,38 @@ logger = logging.getLogger(__name__)


 class Python(ConverterBase):
-    def get_page_count(self, file_object, mimetype=None):
+
+    def convert(self, *args, **kwargs):
+        super(Python, self).convert(*args, **kwargs)
+
+        if self.mime_type == 'application/pdf' and pdftoppm:
+
+            new_file_object, input_filepath = tempfile.mkstemp()
+
+            if self.soffice_file_object:
+                os.write(new_file_object, self.soffice_file_object.read())
+                self.soffice_file_object.close()
+            else:
+                os.write(new_file_object, self.file_object.read())
+                self.file_object.seek(0)
+
+            os.close(new_file_object)
+
+            image_buffer = io.BytesIO()
+            try:
+                pdftoppm(input_filepath, f=self.page_number + 1, l=self.page_number + 1, _out=image_buffer)
+                image_buffer.seek(0)
+                return Image.open(image_buffer)
+            finally:
+                fs_cleanup(input_filepath)
+
+    def get_page_count(self):
        page_count = 1

-        if not mimetype:
-            mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True)
-        else:
-            encoding = None
-
-        if mimetype == 'application/pdf':
+        if self.mime_type == 'application/pdf':
            # If file is a PDF open it with slate to determine the page count
            try:
-                pages = slate.PDF(file_object)
+                pages = slate.PDF(self.file_object)
            except Exception as exception:
                logger.error('slate exception; %s', exception)
                return 1
@@ -56,120 +76,22 @@ class Python(ConverterBase):
            else:
                return len(pages)
            finally:
-                file_object.seek(0)
+                self.file_object.seek(0)

        try:
-            image = Image.open(file_object)
-        except IOError:  # cannot identify image file
-            raise UnknownFileFormat
+            image = Image.open(self.file_object)
        finally:
-            file_object.seek(0)
+            self.file_object.seek(0)

        try:
            while True:
                image.seek(image.tell() + 1)
                page_count += 1
-                # do something to im
-        except EOFError:
-            pass  # end of sequence
-
-        return page_count
-
-    def convert(self, file_object, mimetype=None, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER):
-        if not mimetype:
-            mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True)
-
-        if mimetype == 'application/pdf' and pdftoppm:
-            image_buffer = io.BytesIO()
-
-            new_file_object, input_filepath = tempfile.mkstemp()
-            os.write(new_file_object, file_object.read())
-            os.close(new_file_object)
-
-            pdftoppm(input_filepath, f=page, l=page, _out=image_buffer)
-            image_buffer.seek(0)
-            image = Image.open(image_buffer)
-            fs_cleanup(input_filepath)
-        else:
-            image = Image.open(file_object)
-
-        current_page = 0
-        try:
-            while current_page == page - 1:
-                image.seek(image.tell() + 1)
-                current_page += 1
-                # do something to im
        except EOFError:
            # end of sequence
            pass

-        if image.mode not in ('L', 'RGB'):
-            image = image.convert('RGB')
+        return page_count

-        output = StringIO()
-        image.save(output, format=output_format)

-        return output

-    '''
-    try:
-        if transformations:
-            aspect = 1.0 * im.size[0] / im.size[1]
-            for transformation in transformations:
-                arguments = transformation.get('arguments')
-                if transformation['transformation'] == TRANSFORMATION_RESIZE:
-                    width = int(arguments.get('width', 0))
-                    height = int(arguments.get('height', 1.0 * width * aspect))
-                    im = self.resize(im, (width, height))
-                elif transformation['transformation'] == TRANSFORMATION_ZOOM:
-                    decimal_value = float(arguments.get('percent', 100)) / 100
-                    im = im.transform((int(im.size[0] * decimal_value), int(im.size[1] * decimal_value)), Image.EXTENT, (0, 0, im.size[0], im.size[1]))
-                elif transformation['transformation'] == TRANSFORMATION_ROTATE:
-                    # PIL counter degress counter-clockwise, reverse them
-                    im = im.rotate(360 - arguments.get('degrees', 0))
-    except:
-        # Ignore all transformation error
-        pass
-    '''
-
-    # From: http://united-coders.com/christian-harms/image-resizing-tips-general-and-for-python
-    def resize(self, img, box, fit=False, out=None):
-        """
-        Downsample the image.
-        @param img: Image -  an Image-object
-        @param box: tuple(x, y) - the bounding box of the result image
-        @param fit: boolean - crop the image to fill the box
-        @param out: file-like-object - save the image into the output stream
-        """
-        # preresize image with factor 2, 4, 8 and fast algorithm
-        factor = 1
-        while img.size[0] / factor > 2 * box[0] and img.size[1] * 2 / factor > 2 * box[1]:
-            factor *= 2
-        if factor > 1:
-            img.thumbnail((img.size[0] / factor, img.size[1] / factor), Image.NEAREST)
-
-        # calculate the cropping box and get the cropped part
-        if fit:
-            x1 = y1 = 0
-            x2, y2 = img.size
-            wRatio = 1.0 * x2 / box[0]
-            hRatio = 1.0 * y2 / box[1]
-            if hRatio > wRatio:
-                y1 = y2 / 2 - box[1] * wRatio / 2
-                y2 = y2 / 2 + box[1] * wRatio / 2
-            else:
-                x1 = x2 / 2 - box[0] * hRatio / 2
-                x2 = x2 / 2 + box[0] * hRatio / 2
-            img = img.crop((x1, y1, x2, y2))
-
-        # Resize the image with best quality algorithm ANTI-ALIAS
-        img.thumbnail(box, Image.ANTIALIAS)
-
-        if out:
-            # save it into a file-like object
-            img.save(out, 'JPEG', quality=75)
-        else:
-            return img
-
-        # if isinstance(self.regex, basestring):
-        #    self.regex = re.compile(regex)
--- a/mayan/apps/converter/classes.py
+++ b/mayan/apps/converter/classes.py
@@ -1,12 +1,21 @@
 from __future__ import unicode_literals

 import logging
+import io
 import os
 import subprocess
 from tempfile import mkstemp

+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+
+from PIL import Image
+
 from django.utils.encoding import smart_str
 from django.utils.module_loading import import_string
+from django.utils.translation import ugettext_lazy as _

 from common.settings import TEMPORARY_DIRECTORY
 from common.utils import fs_cleanup
@@ -18,7 +27,6 @@ from .literals import (
    DEFAULT_FILE_FORMAT, TRANSFORMATION_CHOICES, TRANSFORMATION_RESIZE,
    TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM, DIMENSION_SEPARATOR
 )
-from .office_converter import OfficeConverter
 from .settings import GRAPHICS_BACKEND, LIBREOFFICE_PATH

 CONVERTER_OFFICE_FILE_MIMETYPES = [
@@ -70,44 +78,7 @@ CONVERTER_OFFICE_FILE_MIMETYPES = [
 logger = logging.getLogger(__name__)


-logger.debug('initializing office backend')
-try:
-    office_converter = OfficeConverter()
-except OfficeBackendError as exception:
-    logger.error('error initializing office backend; %s', exception)
-    office_converter = None
-else:
-    logger.debug('office_backend initialized')
-
-backend = import_string(GRAPHICS_BACKEND)()
-
-
-class BaseTransformation(object):
-    name = 'base_transformation'
-    arguments = ()
-
-    def __init__(self, **kwargs):
-        for argument_name in self.arguments:
-            setattr(self, argument_name, kwargs.get(argument_name))
-
-
-class TransformationResize(BaseTransformation):
-    name = 'resize'
-    arguments = ('width', 'height')
-
-
-class TransformationRotate(BaseTransformation):
-    name = 'rotate'
-    arguments = ('degrees',)
-
-
-class TransformationScale(BaseTransformation):
-    name = 'scale'
-    arguments = ('percent',)
-
-
-class Converter(object):
-
+class ConverterBase(object):
    @staticmethod
    def soffice(file_object):
        """
@@ -154,38 +125,140 @@ class Converter(object):
    def __init__(self, file_object, mime_type=None):
        self.file_object = file_object
        self.mime_type = mime_type or get_mimetype(file_object=file_object, mimetype_only=False)[0]
-        self.temporary_files = []
+        self.soffice_file_object = None

-    def transform(self, transformations, page=DEFAULT_PAGE_NUMBER):
-        pass
+    def seek(self, page_number):
+        # Starting with #0
+        self.file_object.seek(0)
+
+        try:
+            self.image = Image.open(self.file_object)
+        except IOError:
+            # Cannot identify image file
+            self.image = self.convert(page_number=page_number)
+        else:
+            self.image.seek(page_number)
+            self.image.load()
+
+    def get_page(self, output_format=DEFAULT_FILE_FORMAT):
+        if not self.image:
+            self.seek(1)
+
+        image_buffer = StringIO()
+        self.image.save(image_buffer, format=output_format)
+        image_buffer.seek(0)
+
+        return image_buffer
+
+    def convert(self, page_number=DEFAULT_PAGE_NUMBER):
+        self.page_number = page_number
+
+        self.mime_type = 'application/pdf'

-    def convert(self, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER):
        if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
            if os.path.exists(LIBREOFFICE_PATH):
+                if not self.soffice_file_object:
                    converted_output = Converter.soffice(self.file_object)
-                self.file_object.close()
-                self.file_object = open(converted_output)
+                    self.file_object.seek(0)
+                    self.soffice_file_object = open(converted_output)
                    self.mime_type = 'application/pdf'
-                self.temporary_file.append(converted_output)
+                    fs_cleanup(converted_output)
+                else:
+                    self.soffice_file_object.seek(0)
            else:
                # TODO: NO LIBREOFFICE FOUND ERROR
                pass

-        for temporary_file in self.temporary_files:
-            fs_cleanup(temporary_file)
+    def transform(self, transformation):
+        self.image = transformation.execute_on(self.image)

-        return backend.convert(file_object=self.file_object, mimetype=self.mime_type, output_format=output_format, page=page)
+    def transform_many(self, transformations):
+        for transformation in transformations:
+            self.image = transformation.execute_on(self.image)

    def get_page_count(self):
-        return backend.get_page_count(file_object)
+        raise NotImplementedError()


-'''
-def get_available_transformations_choices():
-    result = []
-    for transformation in backend.get_available_transformations():
-        result.append((transformation, TRANSFORMATION_CHOICES[transformation]['label']))
+class BaseTransformation(object):
+    name = 'base_transformation'
+    arguments = ()

-    return result
-'''
+    _registry = {}

+    @classmethod
+    def get_transformations_classes(cls):
+        return map(lambda name: getattr(cls, name), filter(lambda entry: entry.startswith('Transform'), dir(cls)))
+
+    @classmethod
+    def get_transformations_choices(cls):
+        return [(transformation.name, transformation.label) for transformation in cls.get_transformations_classes()]
+
+    def __init__(self, **kwargs):
+        for argument_name in self.arguments:
+            setattr(self, argument_name, kwargs.get(argument_name))
+
+    def execute_on(self, image):
+        self.image = image
+        self.aspect = 1.0 * image.size[0] / image.size[1]
+
+
+class TransformationResize(BaseTransformation):
+    name = 'resize'
+    arguments = ('width', 'height')
+    label = _('Resize')
+
+    def execute_on(self, *args, **kwargs):
+        super(TransformationResize, self).execute_on(*args, **kwargs)
+        fit = False
+
+        width = int(self.width)
+        height = int(self.height or 1.0 * width * self.aspect)
+
+        factor = 1
+        while self.image.size[0] / factor > 2 * width and self.image.size[1] * 2 / factor > 2 * height:
+            factor *= 2
+        if factor > 1:
+            self.image.thumbnail((self.image.size[0] / factor, self.image.size[1] / factor), Image.NEAREST)
+
+        # calculate the cropping box and get the cropped part
+        if fit:
+            x1 = y1 = 0
+            x2, y2 = self.image.size
+            wRatio = 1.0 * x2 / width
+            hRatio = 1.0 * y2 / height
+            if hRatio > wRatio:
+                y1 = y2 / 2 - height * wRatio / 2
+                y2 = y2 / 2 + height * wRatio / 2
+            else:
+                x1 = x2 / 2 - width * hRatio / 2
+                x2 = x2 / 2 + width * hRatio / 2
+            self.image = self.image.crop((x1, y1, x2, y2))
+
+        # Resize the image with best quality algorithm ANTI-ALIAS
+        self.image.thumbnail((width, height), Image.ANTIALIAS)
+
+        return self.image
+
+
+class TransformationRotate(BaseTransformation):
+    name = 'rotate'
+    arguments = ('degrees',)
+    label = _('Rotate')
+
+    def execute_on(self, *args, **kwargs):
+        super(TransformationRotate, self).execute_on(*args, **kwargs)
+
+        return self.image.rotate(360 - self.degrees)
+
+
+class TransformationZoom(BaseTransformation):
+    name = 'zoom'
+    arguments = ('percent',)
+    label = _('Zoom')
+
+    def execute_on(self, *args, **kwargs):
+        super(TransformationZoom, self).execute_on(*args, **kwargs)
+
+        decimal_value = float(self.percent) / 100
+        return self.image.resize((int(self.image.size[0] * decimal_value), int(self.image.size[1] * decimal_value)), Image.ANTIALIAS)
--- a/mayan/apps/converter/runtime.py
+++ b/mayan/apps/converter/runtime.py
@@ -4,20 +4,7 @@ import logging

 from django.utils.module_loading import import_string

-from .exceptions import OfficeBackendError
-from .office_converter import OfficeConverter
 from .settings import GRAPHICS_BACKEND

 logger = logging.getLogger(__name__)
-
-logger.debug('initializing office backend')
-try:
-    office_converter = OfficeConverter()
-except OfficeBackendError as exception:
-    logger.error('error initializing office backend; %s', exception)
-    office_converter = None
-else:
-    logger.debug('office_backend initialized')
-
-
-backend = import_string(GRAPHICS_BACKEND)()
+backend = converter_class = import_string(GRAPHICS_BACKEND)
--- a/mayan/apps/documents/models.py
+++ b/mayan/apps/documents/models.py
@@ -18,7 +18,9 @@ from django.utils.translation import ugettext_lazy as _
 from acls.utils import apply_default_acls
 from common.settings import TEMPORARY_DIRECTORY
 from common.utils import fs_cleanup
-from converter.classes import Converter
+from converter import (
+    converter_class, TransformationResize, TransformationRotate, TransformationZoom
+)
 from converter.exceptions import UnknownFileFormat
 from converter.literals import (
    DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, DEFAULT_PAGE_NUMBER
@@ -355,7 +357,7 @@ class DocumentVersion(models.Model):
        #self.save_to_file(filepath)
        try:
            with self.open() as file_object:
-                converter = Converter(file_object=file_object, mimetype=self.mimetype)
+                converter = converter_class(file_object=file_object, mimetype=self.mimetype)
                detected_pages = converter.get_page_count()
        except UnknownFileFormat:
            # If converter backend doesn't understand the format,
@@ -536,45 +538,53 @@ class DocumentPage(models.Model):
    def get_image(self, *args, **kwargs):
        transformations = kwargs.pop('transformations', [])

-        #size=DISPLAY_SIZE, page=DEFAULT_PAGE_NUMBER, zoom=DEFAULT_ZOOM_LEVEL, rotation=DEFAULT_ROTATION, as_base64=False, version=None):
-        #if zoom < ZOOM_MIN_LEVEL:
-        #    zoom = ZOOM_MIN_LEVEL
+        size = kwargs.pop('size', DISPLAY_SIZE)
+        rotation = kwargs.pop('rotation', DEFAULT_ROTATION)
+        zoom_level = kwargs.pop('zoom', DEFAULT_ZOOM_LEVEL)

-        #if zoom > ZOOM_MAX_LEVEL:
-        #    zoom = ZOOM_MAX_LEVEL
+        if zoom_level < ZOOM_MIN_LEVEL:
+            zoom_level = ZOOM_MIN_LEVEL

-        #rotation = rotation % 360
+        if zoom_level > ZOOM_MAX_LEVEL:
+            zoom_level = ZOOM_MAX_LEVEL
+
+        rotation = rotation % 360

-        #file_path = self.get_valid_image(size=size, page=page, zoom=zoom, rotation=rotation, version=version)
-        #logger.debug('file_path: %s', file_path)
        as_base64 = kwargs.pop('as_base64', False)

        cache_filename = self.get_cache_filename()

-        if os.path.exists(cache_filename) and 0:
-            with open(cache_filename) as file_object:
-                data = file_object.read()
+        if os.path.exists(cache_filename):
+            converter = converter_class(file_object=open(cache_filename))

-            if as_base64:
-                return 'data:%s;base64,%s' % ('image/png', base64.b64encode(data))
-            else:
-                return data
+            converter.seek(0)
        else:
            try:
-                converter = Converter(file_object=self.document_version.open())
-                image_buffer = converter.convert(page=self.page_number, output_format='PNG')
+                converter = converter_class(file_object=self.document_version.open())
+                converter.seek(page_number=self.page_number - 1)
+
+                page_image = converter.get_page()
                with open(cache_filename, 'wb+') as file_object:
-                    file_object.write(image_buffer.getvalue())
+                    file_object.write(page_image.getvalue())
            except:
                fs_cleanup(cache_filename)
                raise
-            else:
-                data = image_buffer.getvalue()
-                image_buffer.close()
+
+        if rotation:
+            converter.transform(transformation=TransformationRotate(degrees=rotation))
+
+        if size:
+            converter.transform(transformation=TransformationResize(**dict(zip(('width', 'height'), (size.split('x'))))))
+
+        if zoom_level:
+            converter.transform(transformation=TransformationZoom(percent=zoom_level))
+
+        page_image = converter.get_page()
+
        if as_base64:
-                    return 'data:%s;base64,%s' % ('image/png', base64.b64encode(data))
+            return 'data:%s;base64,%s' % ('image/png', base64.b64encode(page_image.getvalue()))
        else:
-                    return data
+            return page_image


 def argument_validator(value):
--- a/mayan/apps/documents/views.py
+++ b/mayan/apps/documents/views.py
@@ -372,10 +372,11 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE):
    task = task_get_document_page_image.apply_async(kwargs=dict(document_page_id=document_page.pk, size=size, zoom=zoom, rotation=rotation, as_base64=False, version=version), queue='converter')
    data = task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT)

-    response = HttpResponse(data, content_type='image')
-    return response
+    return HttpResponse(data, content_type='image')

    # TODO: remove sendfile
+    # TODO: test if celery result store can store binary blobs or switch to
+    # full base64 in JS
    #return sendfile.sendfile(request, task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT), mimetype=DEFAULT_FILE_FORMAT_MIMETYPE)