Unify the way backends are defined and loaded, unify the fs_cleanup function

2014-07-01 00:22:31 -04:00
parent 0255ea67b1
commit a9390d55ba
21 changed files with 208 additions and 235 deletions
--- a/mayan/apps/converter/backends/python.py
+++ b/mayan/apps/converter/backends/python.py
@@ -0,0 +1,199 @@
+from __future__ import absolute_import
+
+import os
+import tempfile
+
+import slate
+from PIL import Image
+
+try:
+    import ghostscript
+    USE_GHOSTSCRIPT = True
+except RuntimeError:
+    USE_GHOSTSCRIPT = False
+
+from common.utils import fs_cleanup
+from mimetype.api import get_mimetype
+
+from . import ConverterBase
+from ..exceptions import UnknownFileFormat
+from ..literals import (TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE,
+    TRANSFORMATION_ZOOM, DEFAULT_PAGE_NUMBER, DEFAULT_FILE_FORMAT)
+
+Image.init()
+
+
+class Python(ConverterBase):
+    def get_page_count(self, input_filepath):
+        page_count = 1
+
+        mimetype, encoding = get_mimetype(open(input_filepath, 'rb'), input_filepath, mimetype_only=True)
+        if mimetype == 'application/pdf':
+            # If file is a PDF open it with slate to determine the page
+            # count
+            with open(input_filepath) as fd:
+                try:
+                    pages = slate.PDF(fd)
+                except:
+                    return 1
+                    # TODO: Maybe return UnknownFileFormat to display proper unknwon file format message in document description
+            return len(pages)
+
+        try:
+            im = Image.open(input_filepath)
+        except IOError:  # cannot identify image file
+            raise UnknownFileFormat
+
+        try:
+            while 1:
+                im.seek(im.tell() + 1)
+                page_count += 1
+                # do something to im
+        except EOFError:
+            pass  # end of sequence
+
+        return page_count
+
+    def convert_file(self, input_filepath, output_filepath, transformations=None, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT, **kwargs):
+        tmpfile = None
+        mimetype = kwargs.get('mimetype', None)
+        if not mimetype:
+            mimetype, encoding = get_mimetype(open(input_filepath, 'rb'), input_filepath, mimetype_only=True)
+
+        if mimetype == 'application/pdf' and USE_GHOSTSCRIPT:
+            # If file is a PDF open it with ghostscript and convert it to
+            # TIFF
+            first_page_tmpl = '-dFirstPage=%d' % page
+            last_page_tmpl = '-dLastPage=%d' % page
+            fd, tmpfile = tempfile.mkstemp()
+            os.close(fd)
+            output_file_tmpl = '-sOutputFile=%s' % tmpfile
+            input_file_tmpl = '-f%s' % input_filepath
+            args = [
+                'gs', '-q', '-dQUIET', '-dSAFER', '-dBATCH',
+                '-dNOPAUSE', '-dNOPROMPT',
+                first_page_tmpl, last_page_tmpl,
+                '-sDEVICE=jpeg', '-dJPEGQ=95',
+                '-r150', output_file_tmpl,
+                input_file_tmpl,
+                '-c "60000000 setvmthreshold"',  # use 30MB
+                '-dNOGC',  # No garbage collection
+                '-dMaxBitmap=500000000',
+                '-dAlignToPixels=0',
+                '-dGridFitTT=0',
+                '-dTextAlphaBits=4',
+                '-dGraphicsAlphaBits=4',
+            ]
+
+            ghostscript.Ghostscript(*args)
+            page = 1  # Don't execute the following while loop
+            input_filepath = tmpfile
+
+        try:
+            im = Image.open(input_filepath)
+        except Exception:
+            # Python Imaging Library doesn't recognize it as an image
+            raise UnknownFileFormat
+        finally:
+            if tmpfile:
+                fs_cleanup(tmpfile)
+
+        current_page = 0
+        try:
+            while current_page == page - 1:
+                im.seek(im.tell() + 1)
+                current_page += 1
+                # do something to im
+        except EOFError:
+            # end of sequence
+            pass
+
+        try:
+            if transformations:
+                aspect = 1.0 * im.size[0] / im.size[1]
+                for transformation in transformations:
+                    arguments = transformation.get('arguments')
+                    if transformation['transformation'] == TRANSFORMATION_RESIZE:
+                        width = int(arguments.get('width', 0))
+                        height = int(arguments.get('height', 1.0 * width * aspect))
+                        im = self.resize(im, (width, height))
+                    elif transformation['transformation'] == TRANSFORMATION_ZOOM:
+                        decimal_value = float(arguments.get('percent', 100)) / 100
+                        im = im.transform((int(im.size[0] * decimal_value), int(im.size[1] * decimal_value)), Image.EXTENT, (0, 0, im.size[0], im.size[1]))
+                    elif transformation['transformation'] == TRANSFORMATION_ROTATE:
+                        # PIL counter degress counter-clockwise, reverse them
+                        im = im.rotate(360 - arguments.get('degrees', 0))
+        except:
+            # Ignore all transformation error
+            pass
+
+        if im.mode not in ('L', 'RGB'):
+            im = im.convert('RGB')
+
+        im.save(output_filepath, format=file_format)
+
+    def get_format_list(self):
+        """
+        Introspect PIL's internal registry to obtain a list of the
+        supported file types
+        """
+        formats = []
+        for format_name in Image.ID:
+            if format_name == 'GBR':
+                formats.append('GBR_PIL')
+            else:
+                formats.append(format_name)
+
+        if USE_GHOSTSCRIPT:
+            formats.append('PDF')
+            formats.append('PS')
+
+        return formats
+
+    def get_available_transformations(self):
+        return [
+            TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE,
+            TRANSFORMATION_ZOOM
+        ]
+
+    # From: http://united-coders.com/christian-harms/image-resizing-tips-general-and-for-python
+    def resize(self, img, box, fit=False, out=None):
+        """
+        Downsample the image.
+        @param img: Image -  an Image-object
+        @param box: tuple(x, y) - the bounding box of the result image
+        @param fit: boolean - crop the image to fill the box
+        @param out: file-like-object - save the image into the output stream
+        """
+        # preresize image with factor 2, 4, 8 and fast algorithm
+        factor = 1
+        while img.size[0] / factor > 2 * box[0] and img.size[1] * 2 / factor > 2 * box[1]:
+            factor *= 2
+        if factor > 1:
+            img.thumbnail((img.size[0] / factor, img.size[1] / factor), Image.NEAREST)
+
+        # calculate the cropping box and get the cropped part
+        if fit:
+            x1 = y1 = 0
+            x2, y2 = img.size
+            wRatio = 1.0 * x2 / box[0]
+            hRatio = 1.0 * y2 / box[1]
+            if hRatio > wRatio:
+                y1 = y2 / 2 - box[1] * wRatio / 2
+                y2 = y2 / 2 + box[1] * wRatio / 2
+            else:
+                x1 = x2 / 2 - box[0] * hRatio / 2
+                x2 = x2 / 2 + box[0] * hRatio / 2
+            img = img.crop((x1, y1, x2, y2))
+
+        # Resize the image with best quality algorithm ANTI-ALIAS
+        img.thumbnail(box, Image.ANTIALIAS)
+
+        if out:
+            # save it into a file-like object
+            img.save(out, 'JPEG', quality=75)
+        else:
+            return img
+
+        # if isinstance(self.regex, basestring):
+        #    self.regex = re.compile(regex)