Added Graphics magick support by means of user selectable graphic convertion backends

This commit is contained in:
Roberto Rosario
2011-03-31 12:23:58 -04:00
parent c1bb3a90bb
commit 18b9964786
10 changed files with 150 additions and 69 deletions

View File

@@ -4,16 +4,22 @@ import subprocess
import tempfile
import shutil
from django.utils.importlib import import_module
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.template.defaultfilters import slugify
from converter.conf.settings import CONVERT_PATH
from converter.conf.settings import UNPAPER_PATH
from converter.conf.settings import IDENTIFY_PATH
from converter.conf.settings import OCR_OPTIONS
from converter.conf.settings import DEFAULT_OPTIONS
from converter.conf.settings import LOW_QUALITY_OPTIONS
from converter.conf.settings import HIGH_QUALITY_OPTIONS
from converter.conf.settings import GRAPHICS_BACKEND
from exceptions import ConvertError, UnknownFormat, UnpaperError, \
IdentifyError, UnkownConvertError
#from converter.conf.settings import UNOCONV_PATH
from common import TEMPORARY_DIRECTORY
from converter import TRANFORMATION_CHOICES
@@ -26,28 +32,19 @@ QUALITY_HIGH = 'quality_high'
QUALITY_SETTINGS = {QUALITY_DEFAULT:DEFAULT_OPTIONS, QUALITY_LOW:LOW_QUALITY_OPTIONS,
QUALITY_HIGH:HIGH_QUALITY_OPTIONS}
CONVERTER_ERROR_STRING_NO_DECODER = 'no decode delegate for this image format'
def _lazy_load(fn):
_cached = []
def _decorated():
if not _cached:
_cached.append(fn())
return _cached[0]
return _decorated
class ConvertError(Exception):
pass
class UnknownFormat(ConvertError):
pass
class UnpaperError(ConvertError):
pass
@_lazy_load
def _get_backend():
return import_module(GRAPHICS_BACKEND)
class IdentifyError(ConvertError):
pass
class UnkownConvertError(ConvertError):
pass
backend = _get_backend()
def cleanup(filename):
''' tries to remove the given filename. Ignores non-existent files '''
@@ -56,26 +53,6 @@ def cleanup(filename):
except OSError:
pass
#TODO: Timeout & kill child
def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
command = []
command.append(CONVERT_PATH)
command.extend(shlex.split(str(QUALITY_SETTINGS[quality])))
command.append(input_filepath)
if arguments:
command.extend(shlex.split(str(arguments)))
command.append(output_filepath)
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
#Got an error from convert program
error_line = proc.stderr.readline()
if CONVERTER_ERROR_STRING_NO_DECODER in error_line:
#Try to determine from error message which class of error is it
raise UnknownFormat
else:
raise ConvertError(error_line)
def execute_unpaper(input_filepath, output_filepath):
command = []
command.append(UNPAPER_PATH)
@@ -97,17 +74,7 @@ def execute_unoconv(input_filepath, output_filepath, arguments=''):
shutil.copyfileobj(proc.stdout, output)
return (proc.wait(), proc.stderr.read())
"""
def execute_identify(input_filepath, arguments):
command = []
command.append(IDENTIFY_PATH)
command.extend(shlex.split(str(arguments)))
command.append(input_filepath)
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise IdentifyError(proc.stderr.readline())
return proc.stdout.read()
def cache_cleanup(input_filepath, size, page=0, format='jpg'):
filepath = create_image_cache_filename(input_filepath, size, page, format)
@@ -159,7 +126,7 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
try:
input_arg = '%s[%s]' % (input_filepath, page)
extra_options += ' -resize %s' % size
execute_convert(input_filepath=input_arg, arguments=extra_options, output_filepath='%s:%s' % (format, output_filepath), quality=quality)
backend.execute_convert(input_filepath=input_arg, arguments=extra_options, output_filepath='%s:%s' % (format, output_filepath), quality=quality)
finally:
if cleanup_files:
cleanup(input_filepath)
@@ -170,7 +137,7 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
def get_page_count(input_filepath):
try:
return int(execute_identify(input_filepath, '-format %n'))
return int(backend.execute_identify(input_filepath, '-format %n'))
except Exception, e:
#TODO: send to other page number identifying program
return 1
@@ -211,13 +178,13 @@ def convert_document_for_ocr(document, page=0, format='tif'):
tranformation_string = ' '.join(transformation_list)
try:
#Apply default transformations
execute_convert(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=tranformation_string, output_filepath=transformation_output_file)
backend.execute_convert(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=tranformation_string, output_filepath=transformation_output_file)
#Do OCR operations
execute_convert(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file)
backend.execute_convert(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file)
# Process by unpaper
execute_unpaper(input_filepath=unpaper_input_file, output_filepath=unpaper_output_file)
# Convert to tif
execute_convert(input_filepath=unpaper_output_file, output_filepath=convert_output_file)
backend.execute_convert(input_filepath=unpaper_output_file, output_filepath=convert_output_file)
finally:
cleanup(transformation_output_file)
cleanup(unpaper_input_file)

View File

View File

View File

@@ -0,0 +1,42 @@
import shlex
import subprocess
from converter.conf.settings import GM_PATH
from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS
from converter.exceptions import ConvertError, UnknownFormat, UnpaperError, \
IdentifyError, UnkownConvertError
CONVERTER_ERROR_STRING_NO_DECODER = 'No decode delegate for this image format'
def execute_identify(input_filepath, arguments):
command = []
command.append(GM_PATH)
command.append(u'identify')
command.extend(shlex.split(str(arguments)))
command.append(input_filepath)
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise IdentifyError(proc.stderr.readline())
return proc.stdout.read()
def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
command = []
command.append(GM_PATH)
command.append(u'convert')
command.extend(shlex.split(str(QUALITY_SETTINGS[quality])))
command.append(input_filepath)
if arguments:
command.extend(shlex.split(str(arguments)))
command.append(output_filepath)
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
#Got an error from convert program
error_line = proc.stderr.readline()
if CONVERTER_ERROR_STRING_NO_DECODER in error_line:
#Try to determine from error message which class of error is it
raise UnknownFormat
else:
raise ConvertError(error_line)

View File

@@ -0,0 +1,41 @@
import shlex
import subprocess
from converter.conf.settings import IM_IDENTIFY_PATH
from converter.conf.settings import IM_CONVERT_PATH
from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS
from converter.exceptions import ConvertError, UnknownFormat, UnpaperError, \
IdentifyError, UnkownConvertError
CONVERTER_ERROR_STRING_NO_DECODER = 'no decode delegate for this image format'
def execute_identify(input_filepath, arguments):
command = []
command.append(IM_IDENTIFY_PATH)
command.extend(shlex.split(str(arguments)))
command.append(input_filepath)
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise IdentifyError(proc.stderr.readline())
return proc.stdout.read()
def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
command = []
command.append(IM_CONVERT_PATH)
command.extend(shlex.split(str(QUALITY_SETTINGS[quality])))
command.append(input_filepath)
if arguments:
command.extend(shlex.split(str(arguments)))
command.append(output_filepath)
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
#Got an error from convert program
error_line = proc.stderr.readline()
if CONVERTER_ERROR_STRING_NO_DECODER in error_line:
#Try to determine from error message which class of error is it
raise UnknownFormat
else:
raise ConvertError(error_line)

View File

@@ -2,9 +2,12 @@ from django.conf import settings
from django.utils.translation import ugettext_lazy as _
CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert')
IM_CONVERT_PATH = getattr(settings, 'CONVERTER_IM_ONVERT_PATH', u'/usr/bin/convert')
IM_IDENTIFY_PATH = getattr(settings, 'CONVERTER_IM_IDENTIFY_PATH', u'/usr/bin/identify')
UNPAPER_PATH = getattr(settings, 'CONVERTER_UNPAPER_PATH', u'/usr/bin/unpaper')
IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify')
GM_PATH = getattr(settings, 'CONVERTER_GM_PATH', u'/usr/bin/gm')
GRAPHICS_BACKEND = getattr(settings, 'CONVERTER_GRAPHICS_BACKEND', u'converter.backends.imagemagick')
OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200')
DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'')
LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'')
@@ -12,7 +15,9 @@ HIGH_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_HIGH_QUALITY_OPTIONS', u'-de
setting_description = {
'CONVERTER_CONVERT_PATH':_(u'File path to imagemagick\'s convert program.'),
'CONVERTER_IDENTIFY_PATH':_(u'File path to imagemagick\'s identify program.'),
'CONVERTER_IM_CONVERT_PATH':_(u'File path to imagemagick\'s convert program.'),
'CONVERTER_IM_IDENTIFY_PATH':_(u'File path to imagemagick\'s identify program.'),
'CONVERTER_GM_PATH':_(u'File path to graphicsmagick\'s program.'),
'CONVERTER_UNPAPER_PATH':_(u'File path to unpaper program.'),
'CONVERTER_GRAPHICS_BACKEND':_(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick and converter.backends.graphicsmagick.'),
}

View File

@@ -0,0 +1,18 @@
class ConvertError(Exception):
pass
class UnknownFormat(ConvertError):
pass
class UnpaperError(ConvertError):
pass
class IdentifyError(ConvertError):
pass
class UnkownConvertError(ConvertError):
pass

View File

@@ -17,8 +17,8 @@ from django.core.files.uploadedfile import SimpleUploadedFile
import sendfile
from common.utils import pretty_size
from converter.api import convert, in_image_cache, QUALITY_DEFAULT, \
UnkownConvertError, UnknownFormat
from converter.api import convert, in_image_cache, QUALITY_DEFAULT
from converter.exceptions import UnkownConvertError, UnknownFormat
from converter import TRANFORMATION_CHOICES
from filetransfers.api import serve_file
from filesystem_serving.api import document_create_fs_links, document_delete_fs_links

View File

@@ -61,15 +61,22 @@ def check_settings(request):
'description':common_settings.setting_description},
# Converter
{'name':'CONVERTER_CONVERT_PATH',
'value':converter_settings.CONVERT_PATH, 'exists':True,
{'name':'CONVERTER_IM_CONVERT_PATH',
'value':converter_settings.IM_CONVERT_PATH, 'exists':True,
'description':converter_settings.setting_description},
{'name':'CONVERTER_UNPAPER_PATH',
'value':converter_settings.UNPAPER_PATH, 'exists':True,
'description':converter_settings.setting_description},
{'name':'CONVERTER_IDENTIFY_PATH',
'value':converter_settings.IDENTIFY_PATH, 'exists':True,
{'name':'CONVERTER_IM_IDENTIFY_PATH',
'value':converter_settings.IM_IDENTIFY_PATH, 'exists':True,
'description':converter_settings.setting_description},
{'name':'CONVERTER_GM_PATH',
'value':converter_settings.GM_PATH, 'exists':True,
'description':converter_settings.setting_description},
{'name':'CONVERTER_GRAPHICS_BACKEND',
'value':converter_settings.GRAPHICS_BACKEND,
'description':converter_settings.setting_description},
{'name':'CONVERTER_OCR_OPTIONS', 'value':converter_settings.OCR_OPTIONS},
{'name':'CONVERTER_DEFAULT_OPTIONS', 'value':converter_settings.DEFAULT_OPTIONS},
{'name':'CONVERTER_LOW_QUALITY_OPTIONS', 'value':converter_settings.LOW_QUALITY_OPTIONS},

View File

@@ -212,10 +212,11 @@ TEMPLATE_CONTEXT_PROCESSORS = (
#CONVERTER_DEFAULT_OPTIONS = u''
#CONVERTER_LOW_QUALITY_OPTIONS = u''
#CONVERTER_HIGH_QUALITY_OPTIONS = u'-density 400'
#CONVERTER_CONVERT_PATH = u'/usr/bin/convert'
#CONVERTER_OCR_OPTIONS = u'-colorspace Gray -depth 8 -resample 200x200'
#CONVERTER_IDENTIFY_PATH = u'/usr/bin/identify'
#CONVERTER_IM_CONVERT_PATH = u'/usr/bin/convert'
#CONVERTER_IM_IDENTIFY_PATH = u'/usr/bin/identify'
#CONVERTER_UNPAPER_PATH = u'/usr/bin/unpaper'
#CONVERTER_GRAPHICS_BACKEND = u'converter.backends.imagemagick'
# OCR
#OCR_TESSERACT_PATH = u'/usr/bin/tesseract'