From ed11e242ca8800e4fad5f234fca234755bcba213 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 18 Nov 2014 00:35:56 -0400 Subject: [PATCH] Issue #23, Experiment with pdftoppm --- mayan/apps/converter/backends/python.py | 59 ++++++++++--------------- 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/mayan/apps/converter/backends/python.py b/mayan/apps/converter/backends/python.py index 57ab6bbab9..c6dad6bb1a 100644 --- a/mayan/apps/converter/backends/python.py +++ b/mayan/apps/converter/backends/python.py @@ -1,16 +1,24 @@ from __future__ import absolute_import +import logging import os import tempfile +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + import slate from PIL import Image +import sh try: - import ghostscript - USE_GHOSTSCRIPT = True -except RuntimeError: - USE_GHOSTSCRIPT = False + pdftoppm = sh.Command('/usr/bin/pdftoppm') +except sh.CommandNotFound: + pdftoppm = None +else: + pdftoppm = pdftoppm.bake('-png') from common.utils import fs_cleanup from mimetype.api import get_mimetype @@ -22,6 +30,7 @@ from ..literals import (DEFAULT_FILE_FORMAT, DEFAULT_PAGE_NUMBER, TRANSFORMATION_ZOOM) Image.init() +logger = logging.getLogger(__name__) class Python(ConverterBase): @@ -46,7 +55,7 @@ class Python(ConverterBase): raise UnknownFileFormat try: - while 1: + while True: im.seek(im.tell() + 1) page_count += 1 # do something to im @@ -61,38 +70,16 @@ class Python(ConverterBase): if not mimetype: mimetype, encoding = get_mimetype(open(input_filepath, 'rb'), input_filepath, mimetype_only=True) - if mimetype == 'application/pdf' and USE_GHOSTSCRIPT: - # If file is a PDF open it with ghostscript and convert it to - # TIFF - first_page_tmpl = '-dFirstPage=%d' % page - last_page_tmpl = '-dLastPage=%d' % page - fd, tmpfile = tempfile.mkstemp() - os.close(fd) - output_file_tmpl = '-sOutputFile=%s' % tmpfile - input_file_tmpl = '-f%s' % input_filepath - args = [ - 'gs', '-q', '-dQUIET', '-dSAFER', '-dBATCH', - '-dNOPAUSE', '-dNOPROMPT', - first_page_tmpl, last_page_tmpl, - '-sDEVICE=jpeg', '-dJPEGQ=95', - '-r150', output_file_tmpl, - input_file_tmpl, - '-c "60000000 setvmthreshold"', # use 30MB - '-dNOGC', # No garbage collection - '-dMaxBitmap=500000000', - '-dAlignToPixels=0', - '-dGridFitTT=0', - '-dTextAlphaBits=4', - '-dGraphicsAlphaBits=4', - ] - - ghostscript.Ghostscript(*args) - page = 1 # Don't execute the following while loop - input_filepath = tmpfile - try: - im = Image.open(input_filepath) - except Exception: + if mimetype == 'application/pdf' and pdftoppm: + image_buffer = StringIO() + pdftoppm(input_filepath, f=page, l=page, _out=image_buffer) + image_buffer.seek(0) + im = Image.open(image_buffer) + else: + im = Image.open(input_filepath) + except Exception as exception: + logger.error('Error converting image; %s', exception) # Python Imaging Library doesn't recognize it as an image raise UnknownFileFormat finally: