diff --git a/apps/converter/api.py b/apps/converter/api.py index 6fbc0d840b..4347bae6f6 100644 --- a/apps/converter/api.py +++ b/apps/converter/api.py @@ -4,16 +4,22 @@ import subprocess import tempfile import shutil +from django.utils.importlib import import_module +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured from django.template.defaultfilters import slugify -from converter.conf.settings import CONVERT_PATH + from converter.conf.settings import UNPAPER_PATH -from converter.conf.settings import IDENTIFY_PATH from converter.conf.settings import OCR_OPTIONS from converter.conf.settings import DEFAULT_OPTIONS from converter.conf.settings import LOW_QUALITY_OPTIONS from converter.conf.settings import HIGH_QUALITY_OPTIONS +from converter.conf.settings import GRAPHICS_BACKEND +from exceptions import ConvertError, UnknownFormat, UnpaperError, \ + IdentifyError, UnkownConvertError + #from converter.conf.settings import UNOCONV_PATH from common import TEMPORARY_DIRECTORY from converter import TRANFORMATION_CHOICES @@ -26,28 +32,19 @@ QUALITY_HIGH = 'quality_high' QUALITY_SETTINGS = {QUALITY_DEFAULT:DEFAULT_OPTIONS, QUALITY_LOW:LOW_QUALITY_OPTIONS, QUALITY_HIGH:HIGH_QUALITY_OPTIONS} -CONVERTER_ERROR_STRING_NO_DECODER = 'no decode delegate for this image format' +def _lazy_load(fn): + _cached = [] + def _decorated(): + if not _cached: + _cached.append(fn()) + return _cached[0] + return _decorated - -class ConvertError(Exception): - pass - - -class UnknownFormat(ConvertError): - pass - - -class UnpaperError(ConvertError): - pass +@_lazy_load +def _get_backend(): + return import_module(GRAPHICS_BACKEND) - -class IdentifyError(ConvertError): - pass - - -class UnkownConvertError(ConvertError): - pass - +backend = _get_backend() def cleanup(filename): ''' tries to remove the given filename. Ignores non-existent files ''' @@ -56,26 +53,6 @@ def cleanup(filename): except OSError: pass -#TODO: Timeout & kill child -def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None): - command = [] - command.append(CONVERT_PATH) - command.extend(shlex.split(str(QUALITY_SETTINGS[quality]))) - command.append(input_filepath) - if arguments: - command.extend(shlex.split(str(arguments))) - command.append(output_filepath) - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - #Got an error from convert program - error_line = proc.stderr.readline() - if CONVERTER_ERROR_STRING_NO_DECODER in error_line: - #Try to determine from error message which class of error is it - raise UnknownFormat - else: - raise ConvertError(error_line) - def execute_unpaper(input_filepath, output_filepath): command = [] command.append(UNPAPER_PATH) @@ -97,17 +74,7 @@ def execute_unoconv(input_filepath, output_filepath, arguments=''): shutil.copyfileobj(proc.stdout, output) return (proc.wait(), proc.stderr.read()) """ -def execute_identify(input_filepath, arguments): - command = [] - command.append(IDENTIFY_PATH) - command.extend(shlex.split(str(arguments))) - command.append(input_filepath) - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - raise IdentifyError(proc.stderr.readline()) - return proc.stdout.read() def cache_cleanup(input_filepath, size, page=0, format='jpg'): filepath = create_image_cache_filename(input_filepath, size, page, format) @@ -159,7 +126,7 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f try: input_arg = '%s[%s]' % (input_filepath, page) extra_options += ' -resize %s' % size - execute_convert(input_filepath=input_arg, arguments=extra_options, output_filepath='%s:%s' % (format, output_filepath), quality=quality) + backend.execute_convert(input_filepath=input_arg, arguments=extra_options, output_filepath='%s:%s' % (format, output_filepath), quality=quality) finally: if cleanup_files: cleanup(input_filepath) @@ -170,7 +137,7 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f def get_page_count(input_filepath): try: - return int(execute_identify(input_filepath, '-format %n')) + return int(backend.execute_identify(input_filepath, '-format %n')) except Exception, e: #TODO: send to other page number identifying program return 1 @@ -211,13 +178,13 @@ def convert_document_for_ocr(document, page=0, format='tif'): tranformation_string = ' '.join(transformation_list) try: #Apply default transformations - execute_convert(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=tranformation_string, output_filepath=transformation_output_file) + backend.execute_convert(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=tranformation_string, output_filepath=transformation_output_file) #Do OCR operations - execute_convert(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file) + backend.execute_convert(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file) # Process by unpaper execute_unpaper(input_filepath=unpaper_input_file, output_filepath=unpaper_output_file) # Convert to tif - execute_convert(input_filepath=unpaper_output_file, output_filepath=convert_output_file) + backend.execute_convert(input_filepath=unpaper_output_file, output_filepath=convert_output_file) finally: cleanup(transformation_output_file) cleanup(unpaper_input_file) diff --git a/apps/converter/backends/__init__.py b/apps/converter/backends/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/converter/backends/base.py b/apps/converter/backends/base.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/converter/backends/graphicsmagick.py b/apps/converter/backends/graphicsmagick.py new file mode 100644 index 0000000000..d70a3b9844 --- /dev/null +++ b/apps/converter/backends/graphicsmagick.py @@ -0,0 +1,42 @@ +import shlex +import subprocess + +from converter.conf.settings import GM_PATH +from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS +from converter.exceptions import ConvertError, UnknownFormat, UnpaperError, \ + IdentifyError, UnkownConvertError + +CONVERTER_ERROR_STRING_NO_DECODER = 'No decode delegate for this image format' + +def execute_identify(input_filepath, arguments): + command = [] + command.append(GM_PATH) + command.append(u'identify') + command.extend(shlex.split(str(arguments))) + command.append(input_filepath) + + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + raise IdentifyError(proc.stderr.readline()) + return proc.stdout.read() + +def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None): + command = [] + command.append(GM_PATH) + command.append(u'convert') + command.extend(shlex.split(str(QUALITY_SETTINGS[quality]))) + command.append(input_filepath) + if arguments: + command.extend(shlex.split(str(arguments))) + command.append(output_filepath) + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + #Got an error from convert program + error_line = proc.stderr.readline() + if CONVERTER_ERROR_STRING_NO_DECODER in error_line: + #Try to determine from error message which class of error is it + raise UnknownFormat + else: + raise ConvertError(error_line) diff --git a/apps/converter/backends/imagemagick.py b/apps/converter/backends/imagemagick.py new file mode 100644 index 0000000000..12a8df04e5 --- /dev/null +++ b/apps/converter/backends/imagemagick.py @@ -0,0 +1,41 @@ +import shlex +import subprocess + +from converter.conf.settings import IM_IDENTIFY_PATH +from converter.conf.settings import IM_CONVERT_PATH +from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS +from converter.exceptions import ConvertError, UnknownFormat, UnpaperError, \ + IdentifyError, UnkownConvertError + +CONVERTER_ERROR_STRING_NO_DECODER = 'no decode delegate for this image format' + +def execute_identify(input_filepath, arguments): + command = [] + command.append(IM_IDENTIFY_PATH) + command.extend(shlex.split(str(arguments))) + command.append(input_filepath) + + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + raise IdentifyError(proc.stderr.readline()) + return proc.stdout.read() + +def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None): + command = [] + command.append(IM_CONVERT_PATH) + command.extend(shlex.split(str(QUALITY_SETTINGS[quality]))) + command.append(input_filepath) + if arguments: + command.extend(shlex.split(str(arguments))) + command.append(output_filepath) + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + #Got an error from convert program + error_line = proc.stderr.readline() + if CONVERTER_ERROR_STRING_NO_DECODER in error_line: + #Try to determine from error message which class of error is it + raise UnknownFormat + else: + raise ConvertError(error_line) diff --git a/apps/converter/conf/settings.py b/apps/converter/conf/settings.py index 9ac3689d70..e44dc940d9 100644 --- a/apps/converter/conf/settings.py +++ b/apps/converter/conf/settings.py @@ -2,9 +2,12 @@ from django.conf import settings from django.utils.translation import ugettext_lazy as _ -CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert') +IM_CONVERT_PATH = getattr(settings, 'CONVERTER_IM_ONVERT_PATH', u'/usr/bin/convert') +IM_IDENTIFY_PATH = getattr(settings, 'CONVERTER_IM_IDENTIFY_PATH', u'/usr/bin/identify') UNPAPER_PATH = getattr(settings, 'CONVERTER_UNPAPER_PATH', u'/usr/bin/unpaper') -IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify') +GM_PATH = getattr(settings, 'CONVERTER_GM_PATH', u'/usr/bin/gm') +GRAPHICS_BACKEND = getattr(settings, 'CONVERTER_GRAPHICS_BACKEND', u'converter.backends.imagemagick') + OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200') DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'') LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'') @@ -12,7 +15,9 @@ HIGH_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_HIGH_QUALITY_OPTIONS', u'-de setting_description = { - 'CONVERTER_CONVERT_PATH':_(u'File path to imagemagick\'s convert program.'), - 'CONVERTER_IDENTIFY_PATH':_(u'File path to imagemagick\'s identify program.'), + 'CONVERTER_IM_CONVERT_PATH':_(u'File path to imagemagick\'s convert program.'), + 'CONVERTER_IM_IDENTIFY_PATH':_(u'File path to imagemagick\'s identify program.'), + 'CONVERTER_GM_PATH':_(u'File path to graphicsmagick\'s program.'), 'CONVERTER_UNPAPER_PATH':_(u'File path to unpaper program.'), + 'CONVERTER_GRAPHICS_BACKEND':_(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick and converter.backends.graphicsmagick.'), } diff --git a/apps/converter/exceptions.py b/apps/converter/exceptions.py new file mode 100644 index 0000000000..2a57f39d21 --- /dev/null +++ b/apps/converter/exceptions.py @@ -0,0 +1,18 @@ +class ConvertError(Exception): + pass + + +class UnknownFormat(ConvertError): + pass + + +class UnpaperError(ConvertError): + pass + + +class IdentifyError(ConvertError): + pass + + +class UnkownConvertError(ConvertError): + pass diff --git a/apps/documents/views.py b/apps/documents/views.py index 2595659c15..50501db5ab 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -17,8 +17,8 @@ from django.core.files.uploadedfile import SimpleUploadedFile import sendfile from common.utils import pretty_size -from converter.api import convert, in_image_cache, QUALITY_DEFAULT, \ - UnkownConvertError, UnknownFormat +from converter.api import convert, in_image_cache, QUALITY_DEFAULT +from converter.exceptions import UnkownConvertError, UnknownFormat from converter import TRANFORMATION_CHOICES from filetransfers.api import serve_file from filesystem_serving.api import document_create_fs_links, document_delete_fs_links diff --git a/apps/main/views.py b/apps/main/views.py index d79d4d4193..06c83b7a44 100644 --- a/apps/main/views.py +++ b/apps/main/views.py @@ -61,15 +61,22 @@ def check_settings(request): 'description':common_settings.setting_description}, # Converter - {'name':'CONVERTER_CONVERT_PATH', - 'value':converter_settings.CONVERT_PATH, 'exists':True, + {'name':'CONVERTER_IM_CONVERT_PATH', + 'value':converter_settings.IM_CONVERT_PATH, 'exists':True, 'description':converter_settings.setting_description}, {'name':'CONVERTER_UNPAPER_PATH', 'value':converter_settings.UNPAPER_PATH, 'exists':True, 'description':converter_settings.setting_description}, - {'name':'CONVERTER_IDENTIFY_PATH', - 'value':converter_settings.IDENTIFY_PATH, 'exists':True, + {'name':'CONVERTER_IM_IDENTIFY_PATH', + 'value':converter_settings.IM_IDENTIFY_PATH, 'exists':True, 'description':converter_settings.setting_description}, + {'name':'CONVERTER_GM_PATH', + 'value':converter_settings.GM_PATH, 'exists':True, + 'description':converter_settings.setting_description}, + {'name':'CONVERTER_GRAPHICS_BACKEND', + 'value':converter_settings.GRAPHICS_BACKEND, + 'description':converter_settings.setting_description}, + {'name':'CONVERTER_OCR_OPTIONS', 'value':converter_settings.OCR_OPTIONS}, {'name':'CONVERTER_DEFAULT_OPTIONS', 'value':converter_settings.DEFAULT_OPTIONS}, {'name':'CONVERTER_LOW_QUALITY_OPTIONS', 'value':converter_settings.LOW_QUALITY_OPTIONS}, diff --git a/settings.py b/settings.py index c6f9f71ed6..af91f835ea 100644 --- a/settings.py +++ b/settings.py @@ -212,10 +212,11 @@ TEMPLATE_CONTEXT_PROCESSORS = ( #CONVERTER_DEFAULT_OPTIONS = u'' #CONVERTER_LOW_QUALITY_OPTIONS = u'' #CONVERTER_HIGH_QUALITY_OPTIONS = u'-density 400' -#CONVERTER_CONVERT_PATH = u'/usr/bin/convert' #CONVERTER_OCR_OPTIONS = u'-colorspace Gray -depth 8 -resample 200x200' -#CONVERTER_IDENTIFY_PATH = u'/usr/bin/identify' +#CONVERTER_IM_CONVERT_PATH = u'/usr/bin/convert' +#CONVERTER_IM_IDENTIFY_PATH = u'/usr/bin/identify' #CONVERTER_UNPAPER_PATH = u'/usr/bin/unpaper' +#CONVERTER_GRAPHICS_BACKEND = u'converter.backends.imagemagick' # OCR #OCR_TESSERACT_PATH = u'/usr/bin/tesseract'