Added PDF file support to the python converter backend via ghostscript

2011-07-19 20:55:08 -04:00
parent 57fed7608a
commit 8a017e2af0
7 changed files with 137 additions and 55 deletions
--- a/apps/converter/backends/python/base.py
+++ b/apps/converter/backends/python/base.py
@@ -1,8 +1,14 @@
+import tempfile
+import os
+
 import slate
 from PIL import Image
+import ghostscript

 from django.utils.translation import ugettext_lazy as _

+from common.utils import get_mimetype
+
 from converter.literals import QUALITY_DEFAULT, QUALITY_SETTINGS
 from converter.exceptions import ConvertError, UnknownFormat, IdentifyError
 from converter.backends import ConverterBase
@@ -10,7 +16,7 @@ from converter.literals import TRANSFORMATION_RESIZE, \
    TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM
 from converter.literals import QUALITY_DEFAULT, DEFAULT_PAGE_NUMBER, \
    DEFAULT_FILE_FORMAT
-from converter.utils import get_mimetype
+from converter.utils import cleanup


 class ConverterClass(ConverterBase):
@@ -43,10 +49,44 @@ class ConverterClass(ConverterBase):
        return page_count
    
    def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
+        tmpfile = None
+        mimetype, encoding = get_mimetype(input_filepath)
+        if mimetype == 'application/pdf':
+            # If file is a PDF open it with ghostscript and convert it to
+            # TIFF
+            first_page_tmpl = '-dFirstPage=%d' % page
+            last_page_tmpl = '-dLastPage=%d' % page
+            fd, tmpfile = tempfile.mkstemp()
+            os.close(fd)
+            output_file_tmpl = '-sOutputFile=%s' % tmpfile
+            input_file_tmpl = '-f%s' % input_filepath
+            args = [
+                'gs', '-q', '-dQUIET', '-dSAFER', '-dBATCH',
+                '-dNOPAUSE', '-dNOPROMPT', 
+                first_page_tmpl, last_page_tmpl,
+                '-sDEVICE=jpeg', '-dJPEGQ=75',
+                '-r300', output_file_tmpl,
+                input_file_tmpl,
+                '-c "60000000 setvmthreshold"',  # use 30MB
+                '-dNOGC',  # No garbage collection
+                '-dMaxBitmap=500000000',
+                '-dAlignToPixels=0',
+                '-dGridFitTT=0',
+                '-dTextAlphaBits=4',
+                '-dGraphicsAlphaBits=4',                
+            ] 
+
+            ghostscript.Ghostscript(*args)
+            page = 1 # Don't execute the following while loop
+            input_filepath = tmpfile    
+
        try:
            im = Image.open(input_filepath)
        except Exception: # Python Imaging Library doesn't recognize it as an image
            raise UnknownFormat
+        finally:
+            if tmpfile:
+                cleanup(tmpfile)
        
        current_page = 0
        try:
@@ -58,12 +98,12 @@ class ConverterClass(ConverterBase):
            pass # end of sequence        

        if transformations:
+            aspect = 1.0 * im.size[0] / im.size[1]
            for transformation in transformations:
-                aspect = 1.0 * im.size[1] / im.size[0]
                if transformation['transformation'] == TRANSFORMATION_RESIZE:
                    width = int(transformation['arguments']['width'])
                    height = int(transformation['arguments'].get('height', 1.0 * width * aspect))
-                    im = im.resize((width, height), Image.ANTIALIAS)
+                    im = self.resize(im, (width, height))
                elif transformation['transformation'] == TRANSFORMATION_ZOOM:
                    decimal_value = float(transformation['arguments']['percent']) / 100
                    im = im.transform((im.size[0] * decimal_value, im.size[1] * decimal_value), Image.EXTENT, (0, 0, im.size[0], im.size[1])) 
@@ -73,6 +113,7 @@ class ConverterClass(ConverterBase):

        if im.mode not in ('L', 'RGB'):
            im = im.convert('RGB')
+            
        im.save(output_filepath, format=file_format)

    def get_format_list(self):
@@ -91,3 +132,41 @@ class ConverterClass(ConverterBase):
            TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \
            TRANSFORMATION_ZOOM
        ]
+
+    # From: http://united-coders.com/christian-harms/image-resizing-tips-general-and-for-python
+    def resize(self, img, box, fit=False, out=None):
+        '''Downsample the image.
+        @param img: Image -  an Image-object
+        @param box: tuple(x, y) - the bounding box of the result image
+        @param fit: boolean - crop the image to fill the box
+        @param out: file-like-object - save the image into the output stream
+        '''
+        #preresize image with factor 2, 4, 8 and fast algorithm
+        factor = 1
+        while img.size[0]/factor > 2*box[0] and img.size[1]*2/factor > 2*box[1]:
+            factor *=2
+        if factor > 1:
+            img.thumbnail((img.size[0]/factor, img.size[1]/factor), Image.NEAREST)
+
+        #calculate the cropping box and get the cropped part
+        if fit:
+            x1 = y1 = 0
+            x2, y2 = img.size
+            wRatio = 1.0 * x2/box[0]
+            hRatio = 1.0 * y2/box[1]
+            if hRatio > wRatio:
+                y1 = y2/2-box[1]*wRatio/2
+                y2 = y2/2+box[1]*wRatio/2
+            else:
+                x1 = x2/2-box[0]*hRatio/2
+                x2 = x2/2+box[0]*hRatio/2
+            img = img.crop((x1,y1,x2,y2))
+
+        #Resize the image with best quality algorithm ANTI-ALIAS
+        img.thumbnail(box, Image.ANTIALIAS)
+
+        if out:
+            #save it into a file-like object
+            img.save(out, "JPEG", quality=75)
+        else:
+            return img