diff --git a/apps/converter/api.py b/apps/converter/api.py index 933962446d..395838ffba 100644 --- a/apps/converter/api.py +++ b/apps/converter/api.py @@ -26,10 +26,16 @@ QUALITY_HIGH = 'quality_high' QUALITY_SETTINGS = {QUALITY_DEFAULT:DEFAULT_OPTIONS, QUALITY_LOW:LOW_QUALITY_OPTIONS, QUALITY_HIGH:HIGH_QUALITY_OPTIONS} +CONVERTER_ERROR_STRING_NO_DECODER = 'no decode delegate for this image format' + + class ConvertError(Exception): - def __init__(self, status, message): - self.status = status - self.message = message + pass + + +class UnknownFormat(Exception): + pass + def cleanup(filename): ''' tries to remove the given filename. Ignores non-existent files ''' @@ -38,17 +44,6 @@ def cleanup(filename): except OSError: pass -def get_errors(error_string): - ''' - returns all lines in the error_string that start with the string "error" - - ''' - lines = error_string.splitlines() - return lines[0] - #error_lines = (line for line in lines if line.find('error') >= 0) - #return '\n'.join(error_lines) - - #TODO: Timeout & kill child def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None): command = [] @@ -58,10 +53,17 @@ def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, ar if arguments: command.extend(shlex.split(str(arguments))) command.append(output_filepath) - proc = subprocess.Popen(command, stderr=subprocess.PIPE) - return (proc.wait(), proc.stderr.read()) - - + proc = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + #Got an error from convert program + error_line = proc.stderr.readline() + if CONVERTER_ERROR_STRING_NO_DECODER in error_line: + #Try to determine from error message which class of error is it + raise UnknownFormat + else: + raise ConvertError(error_line) + def execute_unpaper(input_filepath, output_filepath): command = [] command.append(UNPAPER_PATH) @@ -71,7 +73,6 @@ def execute_unpaper(input_filepath, output_filepath): proc = subprocess.Popen(command, stderr=subprocess.PIPE) return (proc.wait(), proc.stderr.read()) - def execute_unoconv(input_filepath, output_filepath, arguments=''): command = [UNOCONV_PATH] command.extend(['--stdout']) @@ -82,7 +83,6 @@ def execute_unoconv(input_filepath, output_filepath, arguments=''): shutil.copyfileobj(proc.stdout, output) return (proc.wait(), proc.stderr.read()) - def execute_identify(input_filepath, arguments): command = [] command.append(IDENTIFY_PATH) @@ -92,14 +92,12 @@ def execute_identify(input_filepath, arguments): proc = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE) return (proc.wait(), proc.stderr.read(), proc.stdout.read()) - def cache_cleanup(input_filepath, size, page=0, format='jpg'): filepath = create_image_cache_filename(input_filepath, size, page, format) try: os.remove(filepath) except OSError: pass - def create_image_cache_filename(input_filepath, quality=QUALITY_DEFAULT, extra_options='', *args, **kwargs): if input_filepath: @@ -125,7 +123,6 @@ def in_image_cache(input_filepath, size, page=0, format='jpg', quality=QUALITY_D else: return None - def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, format='jpg', extra_options='', mimetype=None, extension=None, cleanup_files=True): unoconv_output = None output_filepath = create_image_cache_filename(input_filepath, size=size, page=page, format=format, quality=quality, extra_options=extra_options) @@ -142,36 +139,29 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f cleanup(input_filepath) input_filepath = unoconv_output ''' - #TODO: Check mimetype and use corresponding utility try: input_arg = '%s[%s]' % (input_filepath, page) extra_options += ' -resize %s' % size - status, error_string = execute_convert(input_filepath=input_arg, arguments=extra_options, output_filepath='%s:%s' % (format, output_filepath), quality=quality) - if status: - errors = get_errors(error_string) - raise ConvertError(status, errors) + execute_convert(input_filepath=input_arg, arguments=extra_options, output_filepath='%s:%s' % (format, output_filepath), quality=quality) finally: if cleanup_files: cleanup(input_filepath) if unoconv_output: cleanup(unoconv_output) - return output_filepath - + + return output_filepath def get_page_count(input_filepath): try: status, error_string, output = execute_identify(input_filepath, '-format %n') if status: - errors = get_errors(error_string) return 1 - #raise ConvertError(status, errors) finally: if output: return int(output) else: return 1 -#TODO: slugify OCR_OPTIONS and add to file name to cache def convert_document_for_ocr(document, page=0, format='tif'): #Extract document file input_filepath = document_save_to_temp_dir(document, document.uuid) @@ -208,25 +198,13 @@ def convert_document_for_ocr(document, page=0, format='tif'): tranformation_string = ' '.join(transformation_list) try: #Apply default transformations - status, error_string = execute_convert(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=tranformation_string, output_filepath=transformation_output_file) - if status: - errors = get_errors(error_string) - raise ConvertError(status, errors) + execute_convert(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=tranformation_string, output_filepath=transformation_output_file) #Do OCR operations - status, error_string = execute_convert(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file) - if status: - errors = get_errors(error_string) - raise ConvertError(status, errors) + execute_convert(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file) # Process by unpaper status, error_string = execute_unpaper(input_filepath=unpaper_input_file, output_filepath=unpaper_output_file) - if status: - errors = get_errors(error_string) - raise ConvertError(status, errors) # Convert to tif - status, error_string = execute_convert(input_filepath=unpaper_output_file, output_filepath=convert_output_file) - if status: - errors = get_errors(error_string) - raise ConvertError(status, errors) + execute_convert(input_filepath=unpaper_output_file, output_filepath=convert_output_file) finally: cleanup(transformation_output_file) cleanup(unpaper_input_file) diff --git a/apps/documents/views.py b/apps/documents/views.py index 05b8699201..01d7ddb97f 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -16,7 +16,8 @@ from django.core.exceptions import ObjectDoesNotExist from django.core.files.uploadedfile import SimpleUploadedFile from common.utils import pretty_size -from converter.api import convert, in_image_cache, QUALITY_DEFAULT +from converter.api import convert, in_image_cache, QUALITY_DEFAULT, \ + ConvertError, UnknownFormat from converter import TRANFORMATION_CHOICES from filetransfers.api import serve_file from filesystem_serving.api import document_create_fs_links, document_delete_fs_links @@ -509,6 +510,18 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_ filepath = document_save_to_temp_dir(document, filename=document.checksum) output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string, page=page-1) return serve_file(request, File(file=open(output_file, 'r')), content_type='image/jpeg') + except ConvertError, e: + if request.user.is_staff or request.user.is_superuser: + messages.error(request, e) + if size == THUMBNAIL_SIZE: + return serve_file(request, File(file=open('%simages/picture_error.png' % settings.MEDIA_ROOT, 'r'))) + else: + return serve_file(request, File(file=open('%simages/1297211435_error.png' % settings.MEDIA_ROOT, 'r'))) + except UnknownFormat: + if size == THUMBNAIL_SIZE: + return serve_file(request, File(file=open('%simages/1299549572_unknown2.png' % settings.MEDIA_ROOT, 'r'))) + else: + return serve_file(request, File(file=open('%simages/1299549805_unknown.png' % settings.MEDIA_ROOT, 'r'))) except Exception, e: if request.user.is_staff or request.user.is_superuser: messages.error(request, e) diff --git a/docs/CREDITS b/docs/CREDITS index 7161417ef0..df0cded20d 100644 --- a/docs/CREDITS +++ b/docs/CREDITS @@ -108,3 +108,11 @@ PyMongo - is a Python distribution containing tools for working with GridFS - is a storage specification for large objects in MongoDB Copyright 10gen http://www.mongodb.org/display/DOCS/GridFS+Specification + +Image 1299549572_unknown2.png + Everaldo Coelho + http://www.everaldo.com/ + +Image 1299549805_unknown.png + Oxygen Team + http://www.oxygen-icons.org/ diff --git a/docs/Changelog.txt b/docs/Changelog.txt index f18cec4a91..86bc9f2109 100644 --- a/docs/Changelog.txt +++ b/docs/Changelog.txt @@ -1,3 +1,6 @@ +2011-Mar-07 +* Converter now differentiates between unknown file format and convert errors + 2011-Mar-06 * Fixed duplicated document search * Optimized document duplicate search diff --git a/site_media/images/1299549572_unknown2.png b/site_media/images/1299549572_unknown2.png new file mode 100644 index 0000000000..06d0b1cee9 Binary files /dev/null and b/site_media/images/1299549572_unknown2.png differ diff --git a/site_media/images/1299549805_unknown.png b/site_media/images/1299549805_unknown.png new file mode 100644 index 0000000000..d97079861c Binary files /dev/null and b/site_media/images/1299549805_unknown.png differ