Added Graphics magick support by means of user selectable graphic convertion backends
This commit is contained in:
@@ -4,16 +4,22 @@ import subprocess
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
from django.utils.importlib import import_module
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ImproperlyConfigured
|
||||
from django.template.defaultfilters import slugify
|
||||
|
||||
from converter.conf.settings import CONVERT_PATH
|
||||
|
||||
from converter.conf.settings import UNPAPER_PATH
|
||||
from converter.conf.settings import IDENTIFY_PATH
|
||||
from converter.conf.settings import OCR_OPTIONS
|
||||
from converter.conf.settings import DEFAULT_OPTIONS
|
||||
from converter.conf.settings import LOW_QUALITY_OPTIONS
|
||||
from converter.conf.settings import HIGH_QUALITY_OPTIONS
|
||||
from converter.conf.settings import GRAPHICS_BACKEND
|
||||
|
||||
from exceptions import ConvertError, UnknownFormat, UnpaperError, \
|
||||
IdentifyError, UnkownConvertError
|
||||
|
||||
#from converter.conf.settings import UNOCONV_PATH
|
||||
from common import TEMPORARY_DIRECTORY
|
||||
from converter import TRANFORMATION_CHOICES
|
||||
@@ -26,28 +32,19 @@ QUALITY_HIGH = 'quality_high'
|
||||
QUALITY_SETTINGS = {QUALITY_DEFAULT:DEFAULT_OPTIONS, QUALITY_LOW:LOW_QUALITY_OPTIONS,
|
||||
QUALITY_HIGH:HIGH_QUALITY_OPTIONS}
|
||||
|
||||
CONVERTER_ERROR_STRING_NO_DECODER = 'no decode delegate for this image format'
|
||||
def _lazy_load(fn):
|
||||
_cached = []
|
||||
def _decorated():
|
||||
if not _cached:
|
||||
_cached.append(fn())
|
||||
return _cached[0]
|
||||
return _decorated
|
||||
|
||||
|
||||
class ConvertError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class UnknownFormat(ConvertError):
|
||||
pass
|
||||
|
||||
|
||||
class UnpaperError(ConvertError):
|
||||
pass
|
||||
@_lazy_load
|
||||
def _get_backend():
|
||||
return import_module(GRAPHICS_BACKEND)
|
||||
|
||||
|
||||
class IdentifyError(ConvertError):
|
||||
pass
|
||||
|
||||
|
||||
class UnkownConvertError(ConvertError):
|
||||
pass
|
||||
|
||||
backend = _get_backend()
|
||||
|
||||
def cleanup(filename):
|
||||
''' tries to remove the given filename. Ignores non-existent files '''
|
||||
@@ -56,26 +53,6 @@ def cleanup(filename):
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
#TODO: Timeout & kill child
|
||||
def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
|
||||
command = []
|
||||
command.append(CONVERT_PATH)
|
||||
command.extend(shlex.split(str(QUALITY_SETTINGS[quality])))
|
||||
command.append(input_filepath)
|
||||
if arguments:
|
||||
command.extend(shlex.split(str(arguments)))
|
||||
command.append(output_filepath)
|
||||
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return_code = proc.wait()
|
||||
if return_code != 0:
|
||||
#Got an error from convert program
|
||||
error_line = proc.stderr.readline()
|
||||
if CONVERTER_ERROR_STRING_NO_DECODER in error_line:
|
||||
#Try to determine from error message which class of error is it
|
||||
raise UnknownFormat
|
||||
else:
|
||||
raise ConvertError(error_line)
|
||||
|
||||
def execute_unpaper(input_filepath, output_filepath):
|
||||
command = []
|
||||
command.append(UNPAPER_PATH)
|
||||
@@ -97,17 +74,7 @@ def execute_unoconv(input_filepath, output_filepath, arguments=''):
|
||||
shutil.copyfileobj(proc.stdout, output)
|
||||
return (proc.wait(), proc.stderr.read())
|
||||
"""
|
||||
def execute_identify(input_filepath, arguments):
|
||||
command = []
|
||||
command.append(IDENTIFY_PATH)
|
||||
command.extend(shlex.split(str(arguments)))
|
||||
command.append(input_filepath)
|
||||
|
||||
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return_code = proc.wait()
|
||||
if return_code != 0:
|
||||
raise IdentifyError(proc.stderr.readline())
|
||||
return proc.stdout.read()
|
||||
|
||||
def cache_cleanup(input_filepath, size, page=0, format='jpg'):
|
||||
filepath = create_image_cache_filename(input_filepath, size, page, format)
|
||||
@@ -159,7 +126,7 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
|
||||
try:
|
||||
input_arg = '%s[%s]' % (input_filepath, page)
|
||||
extra_options += ' -resize %s' % size
|
||||
execute_convert(input_filepath=input_arg, arguments=extra_options, output_filepath='%s:%s' % (format, output_filepath), quality=quality)
|
||||
backend.execute_convert(input_filepath=input_arg, arguments=extra_options, output_filepath='%s:%s' % (format, output_filepath), quality=quality)
|
||||
finally:
|
||||
if cleanup_files:
|
||||
cleanup(input_filepath)
|
||||
@@ -170,7 +137,7 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
|
||||
|
||||
def get_page_count(input_filepath):
|
||||
try:
|
||||
return int(execute_identify(input_filepath, '-format %n'))
|
||||
return int(backend.execute_identify(input_filepath, '-format %n'))
|
||||
except Exception, e:
|
||||
#TODO: send to other page number identifying program
|
||||
return 1
|
||||
@@ -211,13 +178,13 @@ def convert_document_for_ocr(document, page=0, format='tif'):
|
||||
tranformation_string = ' '.join(transformation_list)
|
||||
try:
|
||||
#Apply default transformations
|
||||
execute_convert(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=tranformation_string, output_filepath=transformation_output_file)
|
||||
backend.execute_convert(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=tranformation_string, output_filepath=transformation_output_file)
|
||||
#Do OCR operations
|
||||
execute_convert(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file)
|
||||
backend.execute_convert(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file)
|
||||
# Process by unpaper
|
||||
execute_unpaper(input_filepath=unpaper_input_file, output_filepath=unpaper_output_file)
|
||||
# Convert to tif
|
||||
execute_convert(input_filepath=unpaper_output_file, output_filepath=convert_output_file)
|
||||
backend.execute_convert(input_filepath=unpaper_output_file, output_filepath=convert_output_file)
|
||||
finally:
|
||||
cleanup(transformation_output_file)
|
||||
cleanup(unpaper_input_file)
|
||||
|
||||
0
apps/converter/backends/__init__.py
Normal file
0
apps/converter/backends/__init__.py
Normal file
0
apps/converter/backends/base.py
Normal file
0
apps/converter/backends/base.py
Normal file
42
apps/converter/backends/graphicsmagick.py
Normal file
42
apps/converter/backends/graphicsmagick.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import shlex
|
||||
import subprocess
|
||||
|
||||
from converter.conf.settings import GM_PATH
|
||||
from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS
|
||||
from converter.exceptions import ConvertError, UnknownFormat, UnpaperError, \
|
||||
IdentifyError, UnkownConvertError
|
||||
|
||||
CONVERTER_ERROR_STRING_NO_DECODER = 'No decode delegate for this image format'
|
||||
|
||||
def execute_identify(input_filepath, arguments):
|
||||
command = []
|
||||
command.append(GM_PATH)
|
||||
command.append(u'identify')
|
||||
command.extend(shlex.split(str(arguments)))
|
||||
command.append(input_filepath)
|
||||
|
||||
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return_code = proc.wait()
|
||||
if return_code != 0:
|
||||
raise IdentifyError(proc.stderr.readline())
|
||||
return proc.stdout.read()
|
||||
|
||||
def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
|
||||
command = []
|
||||
command.append(GM_PATH)
|
||||
command.append(u'convert')
|
||||
command.extend(shlex.split(str(QUALITY_SETTINGS[quality])))
|
||||
command.append(input_filepath)
|
||||
if arguments:
|
||||
command.extend(shlex.split(str(arguments)))
|
||||
command.append(output_filepath)
|
||||
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return_code = proc.wait()
|
||||
if return_code != 0:
|
||||
#Got an error from convert program
|
||||
error_line = proc.stderr.readline()
|
||||
if CONVERTER_ERROR_STRING_NO_DECODER in error_line:
|
||||
#Try to determine from error message which class of error is it
|
||||
raise UnknownFormat
|
||||
else:
|
||||
raise ConvertError(error_line)
|
||||
41
apps/converter/backends/imagemagick.py
Normal file
41
apps/converter/backends/imagemagick.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import shlex
|
||||
import subprocess
|
||||
|
||||
from converter.conf.settings import IM_IDENTIFY_PATH
|
||||
from converter.conf.settings import IM_CONVERT_PATH
|
||||
from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS
|
||||
from converter.exceptions import ConvertError, UnknownFormat, UnpaperError, \
|
||||
IdentifyError, UnkownConvertError
|
||||
|
||||
CONVERTER_ERROR_STRING_NO_DECODER = 'no decode delegate for this image format'
|
||||
|
||||
def execute_identify(input_filepath, arguments):
|
||||
command = []
|
||||
command.append(IM_IDENTIFY_PATH)
|
||||
command.extend(shlex.split(str(arguments)))
|
||||
command.append(input_filepath)
|
||||
|
||||
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return_code = proc.wait()
|
||||
if return_code != 0:
|
||||
raise IdentifyError(proc.stderr.readline())
|
||||
return proc.stdout.read()
|
||||
|
||||
def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
|
||||
command = []
|
||||
command.append(IM_CONVERT_PATH)
|
||||
command.extend(shlex.split(str(QUALITY_SETTINGS[quality])))
|
||||
command.append(input_filepath)
|
||||
if arguments:
|
||||
command.extend(shlex.split(str(arguments)))
|
||||
command.append(output_filepath)
|
||||
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return_code = proc.wait()
|
||||
if return_code != 0:
|
||||
#Got an error from convert program
|
||||
error_line = proc.stderr.readline()
|
||||
if CONVERTER_ERROR_STRING_NO_DECODER in error_line:
|
||||
#Try to determine from error message which class of error is it
|
||||
raise UnknownFormat
|
||||
else:
|
||||
raise ConvertError(error_line)
|
||||
@@ -2,9 +2,12 @@ from django.conf import settings
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
|
||||
CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert')
|
||||
IM_CONVERT_PATH = getattr(settings, 'CONVERTER_IM_ONVERT_PATH', u'/usr/bin/convert')
|
||||
IM_IDENTIFY_PATH = getattr(settings, 'CONVERTER_IM_IDENTIFY_PATH', u'/usr/bin/identify')
|
||||
UNPAPER_PATH = getattr(settings, 'CONVERTER_UNPAPER_PATH', u'/usr/bin/unpaper')
|
||||
IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify')
|
||||
GM_PATH = getattr(settings, 'CONVERTER_GM_PATH', u'/usr/bin/gm')
|
||||
GRAPHICS_BACKEND = getattr(settings, 'CONVERTER_GRAPHICS_BACKEND', u'converter.backends.imagemagick')
|
||||
|
||||
OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200')
|
||||
DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'')
|
||||
LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'')
|
||||
@@ -12,7 +15,9 @@ HIGH_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_HIGH_QUALITY_OPTIONS', u'-de
|
||||
|
||||
|
||||
setting_description = {
|
||||
'CONVERTER_CONVERT_PATH':_(u'File path to imagemagick\'s convert program.'),
|
||||
'CONVERTER_IDENTIFY_PATH':_(u'File path to imagemagick\'s identify program.'),
|
||||
'CONVERTER_IM_CONVERT_PATH':_(u'File path to imagemagick\'s convert program.'),
|
||||
'CONVERTER_IM_IDENTIFY_PATH':_(u'File path to imagemagick\'s identify program.'),
|
||||
'CONVERTER_GM_PATH':_(u'File path to graphicsmagick\'s program.'),
|
||||
'CONVERTER_UNPAPER_PATH':_(u'File path to unpaper program.'),
|
||||
'CONVERTER_GRAPHICS_BACKEND':_(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick and converter.backends.graphicsmagick.'),
|
||||
}
|
||||
|
||||
18
apps/converter/exceptions.py
Normal file
18
apps/converter/exceptions.py
Normal file
@@ -0,0 +1,18 @@
|
||||
class ConvertError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class UnknownFormat(ConvertError):
|
||||
pass
|
||||
|
||||
|
||||
class UnpaperError(ConvertError):
|
||||
pass
|
||||
|
||||
|
||||
class IdentifyError(ConvertError):
|
||||
pass
|
||||
|
||||
|
||||
class UnkownConvertError(ConvertError):
|
||||
pass
|
||||
@@ -17,8 +17,8 @@ from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
|
||||
import sendfile
|
||||
from common.utils import pretty_size
|
||||
from converter.api import convert, in_image_cache, QUALITY_DEFAULT, \
|
||||
UnkownConvertError, UnknownFormat
|
||||
from converter.api import convert, in_image_cache, QUALITY_DEFAULT
|
||||
from converter.exceptions import UnkownConvertError, UnknownFormat
|
||||
from converter import TRANFORMATION_CHOICES
|
||||
from filetransfers.api import serve_file
|
||||
from filesystem_serving.api import document_create_fs_links, document_delete_fs_links
|
||||
|
||||
@@ -61,15 +61,22 @@ def check_settings(request):
|
||||
'description':common_settings.setting_description},
|
||||
|
||||
# Converter
|
||||
{'name':'CONVERTER_CONVERT_PATH',
|
||||
'value':converter_settings.CONVERT_PATH, 'exists':True,
|
||||
{'name':'CONVERTER_IM_CONVERT_PATH',
|
||||
'value':converter_settings.IM_CONVERT_PATH, 'exists':True,
|
||||
'description':converter_settings.setting_description},
|
||||
{'name':'CONVERTER_UNPAPER_PATH',
|
||||
'value':converter_settings.UNPAPER_PATH, 'exists':True,
|
||||
'description':converter_settings.setting_description},
|
||||
{'name':'CONVERTER_IDENTIFY_PATH',
|
||||
'value':converter_settings.IDENTIFY_PATH, 'exists':True,
|
||||
{'name':'CONVERTER_IM_IDENTIFY_PATH',
|
||||
'value':converter_settings.IM_IDENTIFY_PATH, 'exists':True,
|
||||
'description':converter_settings.setting_description},
|
||||
{'name':'CONVERTER_GM_PATH',
|
||||
'value':converter_settings.GM_PATH, 'exists':True,
|
||||
'description':converter_settings.setting_description},
|
||||
{'name':'CONVERTER_GRAPHICS_BACKEND',
|
||||
'value':converter_settings.GRAPHICS_BACKEND,
|
||||
'description':converter_settings.setting_description},
|
||||
|
||||
{'name':'CONVERTER_OCR_OPTIONS', 'value':converter_settings.OCR_OPTIONS},
|
||||
{'name':'CONVERTER_DEFAULT_OPTIONS', 'value':converter_settings.DEFAULT_OPTIONS},
|
||||
{'name':'CONVERTER_LOW_QUALITY_OPTIONS', 'value':converter_settings.LOW_QUALITY_OPTIONS},
|
||||
|
||||
@@ -212,10 +212,11 @@ TEMPLATE_CONTEXT_PROCESSORS = (
|
||||
#CONVERTER_DEFAULT_OPTIONS = u''
|
||||
#CONVERTER_LOW_QUALITY_OPTIONS = u''
|
||||
#CONVERTER_HIGH_QUALITY_OPTIONS = u'-density 400'
|
||||
#CONVERTER_CONVERT_PATH = u'/usr/bin/convert'
|
||||
#CONVERTER_OCR_OPTIONS = u'-colorspace Gray -depth 8 -resample 200x200'
|
||||
#CONVERTER_IDENTIFY_PATH = u'/usr/bin/identify'
|
||||
#CONVERTER_IM_CONVERT_PATH = u'/usr/bin/convert'
|
||||
#CONVERTER_IM_IDENTIFY_PATH = u'/usr/bin/identify'
|
||||
#CONVERTER_UNPAPER_PATH = u'/usr/bin/unpaper'
|
||||
#CONVERTER_GRAPHICS_BACKEND = u'converter.backends.imagemagick'
|
||||
|
||||
# OCR
|
||||
#OCR_TESSERACT_PATH = u'/usr/bin/tesseract'
|
||||
|
||||
Reference in New Issue
Block a user