diff --git a/apps/converter/api.py b/apps/converter/api.py index 478783b299..32d92b5664 100644 --- a/apps/converter/api.py +++ b/apps/converter/api.py @@ -1,5 +1,6 @@ import os import subprocess +import hashlib from django.utils.importlib import import_module from django.template.defaultfilters import slugify @@ -22,6 +23,7 @@ from converter.literals import TRANSFORMATION_RESIZE, \ TRANSFORMATION_ZOOM from converter.literals import DIMENSION_SEPARATOR +HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() CONVERTER_OFFICE_FILE_EXTENSIONS = [ u'ods', u'docx', u'doc' @@ -75,19 +77,11 @@ def cache_cleanup(input_filepath, *args, **kwargs): def create_image_cache_filename(input_filepath, *args, **kwargs): if input_filepath: - temp_filename, separator = os.path.splitext(os.path.basename(input_filepath)) - temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename) - - final_filepath = [] - [final_filepath.append(str(arg)) for arg in args] - final_filepath.extend([u'%s_%s' % (key, value) for key, value in kwargs.items()]) - - temp_path += slugify(u'_'.join(final_filepath)) - - return temp_path + hash_value = HASH_FUNCTION(u''.join([input_filepath, unicode(args), unicode(kwargs)])) + return os.path.join(TEMPORARY_DIRECTORY, hash_value) else: return None - + def convert_office_document(input_filepath): if os.path.exists(UNOCONV_PATH): @@ -104,21 +98,21 @@ def convert_document(document, *args, **kwargs): return convert(document_save_to_temp_dir(document, document.checksum), *args, **kwargs) -def convert(input_filepath, *args, **kwargs): +def convert(input_filepath, cleanup_files=True, *args, **kwargs): size = kwargs.get('size') file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT) zoom = kwargs.get('zoom', DEFAULT_ZOOM_LEVEL) rotation = kwargs.get('rotation', DEFAULT_ROTATION) page = kwargs.get('page', DEFAULT_PAGE_NUMBER) - cleanup_files = kwargs.get('cleanup_files', True) quality = kwargs.get('quality', QUALITY_DEFAULT) transformations = kwargs.get('transformations', []) unoconv_output = None output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs) - #if os.path.exists(output_filepath): - # return output_filepath + print 'output_filepath', output_filepath + if os.path.exists(output_filepath): + return output_filepath path, extension = os.path.splitext(input_filepath) if extension[1:].lower() in CONVERTER_OFFICE_FILE_EXTENSIONS: @@ -128,8 +122,6 @@ def convert(input_filepath, *args, **kwargs): input_filepath = result extra_options = u'' - #TODO: not here in the backend - input_arg = u'%s[%s]' % (input_filepath, page-1) transformations.append( { 'transformation': TRANSFORMATION_RESIZE, @@ -154,7 +146,7 @@ def convert(input_filepath, *args, **kwargs): ) try: - backend.convert_file(input_filepath=input_arg, output_filepath=u'%s:%s' % (file_format, output_filepath), quality=quality, transformations=transformations) + backend.convert_file(input_filepath=input_filepath, output_filepath=output_filepath, quality=quality, transformations=transformations, page=page, file_format=file_format) finally: if cleanup_files: cleanup(input_filepath) @@ -189,14 +181,12 @@ def convert_document_for_ocr(document, page=DEFAULT_PAGE_NUMBER, file_format=DEF unpaper_output_file = u'%s_unpaper_out%s%spnm' % (temp_path, page, os.extsep) convert_output_file = u'%s_ocr%s%s%s' % (temp_path, page, os.extsep, file_format) - input_arg = u'%s[%s]' % (input_filepath, page-1) - try: - document_page = document.documentpage_set.get(page_number=page + 1) + document_page = document.documentpage_set.get(page_number=page) transformation_string, warnings = document_page.get_transformation_string() #Apply default transformations - backend.convert_file(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file) + backend.convert_file(input_filepath=input_filepath, page=page, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file) #Do OCR operations backend.convert_file(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file) # Process by unpaper diff --git a/apps/converter/backends/graphicsmagick/base.py b/apps/converter/backends/graphicsmagick/base.py index 8cb0f3fb55..c8b479a0aa 100644 --- a/apps/converter/backends/graphicsmagick/base.py +++ b/apps/converter/backends/graphicsmagick/base.py @@ -11,7 +11,8 @@ from converter.backends import ConverterBase from converter.literals import TRANSFORMATION_RESIZE, \ TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \ TRANSFORMATION_ZOOM -from converter.literals import DIMENSION_SEPARATOR +from converter.literals import DIMENSION_SEPARATOR, DEFAULT_PAGE_NUMBER, \ + DEFAULT_FILE_FORMAT CONVERTER_ERROR_STRING_NO_DECODER = u'No decode delegate for this image format' CONVERTER_ERROR_STARTS_WITH = u'starts with' @@ -32,10 +33,12 @@ class ConverterClass(ConverterBase): return proc.stdout.read() - def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT): + def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT): + print 'convert_file' arguments = [] if transformations: for transformation in transformations: + print 'transformation: %s' % transformation if transformation['transformation'] == TRANSFORMATION_RESIZE: dimensions = [] dimensions.append(unicode(transformation['arguments']['width'])) @@ -46,21 +49,31 @@ class ConverterClass(ConverterBase): elif transformation['transformation'] == TRANSFORMATION_ZOOM: arguments.append(u'-resize') - arguments.append(u'%d%%' % transformation['arguments']['zoom']) + arguments.append(u'%d%%' % transformation['arguments']['percent']) elif transformation['transformation'] == TRANSFORMATION_ROTATE: + print 'Do rotate' arguments.append(u'-rotate') arguments.append(u'%s' % transformation['arguments']['degrees']) print 'arguments: %s' % arguments - #if format == u'jpg': - # extra_options += u' -quality 85' + if format == u'jpg': + arguments.append(u'-quality') + arguments.append(u'85') + + + # Graphicsmagick page number is 0 base + input_arg = u'%s[%d]' % (input_filepath, page - 1) + + # Specify the file format next to the output filename + output_filepath = u'%s:%s' % (file_format, output_filepath) + command = [] command.append(unicode(GM_PATH)) command.append(u'convert') command.extend(unicode(QUALITY_SETTINGS[quality]).split()) command.extend(unicode(GM_SETTINGS).split()) - command.append(unicode(input_filepath)) + command.append(unicode(input_arg)) if arguments: command.extend(arguments) command.append(unicode(output_filepath)) @@ -115,10 +128,3 @@ class ConverterClass(ConverterBase): except: #TODO: send to other page number identifying program return 1 - - - def _get_transformation_string(): - pass - #'command_line': u'-rotate %(degrees)d' - # } - #} diff --git a/apps/converter/backends/imagemagick/base.py b/apps/converter/backends/imagemagick/base.py index cd5b1ba53e..11822db515 100644 --- a/apps/converter/backends/imagemagick/base.py +++ b/apps/converter/backends/imagemagick/base.py @@ -31,15 +31,42 @@ class ConverterClass(ConverterBase): return proc.stdout.read() - def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None): - #if format == u'jpg': - # extra_options += u' -quality 85' + def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT): + arguments = [] + if transformations: + for transformation in transformations: + if transformation['transformation'] == TRANSFORMATION_RESIZE: + dimensions = [] + dimensions.append(unicode(transformation['arguments']['width'])) + if 'height' in transformation['arguments']: + dimensions.append(unicode(transformation['arguments']['height'])) + arguments.append(u'-resize') + arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions)) + + elif transformation['transformation'] == TRANSFORMATION_ZOOM: + arguments.append(u'-resize') + arguments.append(u'%d%%' % transformation['arguments']['percent']) + + elif transformation['transformation'] == TRANSFORMATION_ROTATE: + arguments.append(u'-rotate') + arguments.append(u'%s' % transformation['arguments']['degrees']) + + if format == u'jpg': + arguments.append(u'-quality') + arguments.append(u'85') + + # Imagemagick page number is 0 base + input_arg = u'%s[%d]' % (input_filepath, page - 1) + + # Specify the file format next to the output filename + output_filepath = u'%s:%s' % (file_format, output_filepath) + command = [] command.append(unicode(IM_CONVERT_PATH)) command.extend(unicode(QUALITY_SETTINGS[quality]).split()) - command.append(unicode(input_filepath)) + command.append(unicode(input_arg)) if arguments: - command.extend(unicode(arguments).split()) + command.extend(arguments) command.append(unicode(output_filepath)) proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) return_code = proc.wait() diff --git a/apps/documents/managers.py b/apps/documents/managers.py index d63b2d644f..ef87c929fe 100644 --- a/apps/documents/managers.py +++ b/apps/documents/managers.py @@ -20,4 +20,17 @@ class DocumentPageTransformationManager(models.Manager): return self.model.objects.filter(document_page=document_page) def get_for_document_page_as_list(self, document_page): - return list([{'transformation': transformation['transformation'], 'arguments': eval(transformation['arguments'])} for transformation in self.get_for_document_page(document_page).values('transformation', 'arguments')]) + warnings = [] + transformations = [] + for transformation in self.get_for_document_page(document_page).values('transformation', 'arguments'): + try: + transformations.append( + { + 'transformation': transformation['transformation'], + 'arguments': eval(transformation['arguments'], {}) + } + ) + except Exception, e: + warnings.append(e) + + return transformations, warnings diff --git a/apps/documents/views.py b/apps/documents/views.py index dcd383006d..c727407b88 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -20,10 +20,11 @@ from common.widgets import two_state_template from common.literals import PAGE_SIZE_DIMENSIONS, \ PAGE_ORIENTATION_PORTRAIT, PAGE_ORIENTATION_LANDSCAPE from common.conf.settings import DEFAULT_PAPER_SIZE -from converter.api import convert_document, QUALITY_DEFAULT +from converter.api import convert_document from converter.exceptions import UnkownConvertError, UnknownFormat -from converter.api import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \ - DEFAULT_FILE_FORMAT, QUALITY_PRINT +from converter.literals import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \ + DEFAULT_FILE_FORMAT, QUALITY_PRINT, QUALITY_DEFAULT, \ + DEFAULT_PAGE_NUMBER from filetransfers.api import serve_file from grouping.utils import get_document_group_subtemplate from metadata.api import save_metadata_list, \ @@ -285,39 +286,15 @@ def document_edit(request, document_id): 'object': document, }, context_instance=RequestContext(request)) -''' -def calculate_converter_arguments(document, *args, **kwargs): - size = kwargs.pop('size', PREVIEW_SIZE) - quality = kwargs.pop('quality', QUALITY_DEFAULT) - page = kwargs.pop('page', 1) - file_format = kwargs.pop('file_format', DEFAULT_FILE_FORMAT) - zoom = kwargs.pop('zoom', DEFAULT_ZOOM_LEVEL) - rotation = kwargs.pop('rotation', DEFAULT_ROTATION) - - document_page = DocumentPage.objects.get(document=document, page_number=page) - transformation_string, warnings = document_page.get_transformation_string() - - arguments = { - 'size': size, - 'file_format': file_format, - 'quality': quality, - 'extra_options': transformation_string, - 'page': page - 1, - 'zoom': zoom, - 'rotation': rotation - } - - return arguments, warnings -''' def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_DEFAULT): check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW]) document = get_object_or_404(Document, pk=document_id) - page = int(request.GET.get('page', 1)) + page = int(request.GET.get('page', DEFAULT_PAGE_NUMBER)) - zoom = int(request.GET.get('zoom', 100)) + zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL)) if zoom < ZOOM_MIN_LEVEL: zoom = ZOOM_MIN_LEVEL @@ -325,18 +302,16 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_ if zoom > ZOOM_MAX_LEVEL: zoom = ZOOM_MAX_LEVEL - rotation = int(request.GET.get('rotation', 0)) % 360 + rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360 - #arguments, warnings = calculate_converter_arguments(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation) - - #if warnings and (request.user.is_staff or request.user.is_superuser): - # for warning in warnings: - # messages.warning(request, _(u'Page transformation error: %s') % warning) - - transformations = DocumentPageTransformation.objects.get_for_document_page_as_list(document) + document_page = get_object_or_404(document.documentpage_set, page_number=page) + transformations, warnings = DocumentPageTransformation.objects.get_for_document_page_as_list(document_page) + if warnings and (request.user.is_staff or request.user.is_superuser): + for warning in warnings: + messages.warning(request, _(u'Page transformation error: %s') % warning) + try: - #output_file = convert_document(document, **arguments) output_file = convert_document(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation, transformations=transformations) except UnkownConvertError, e: if request.user.is_staff or request.user.is_superuser: @@ -595,13 +570,13 @@ def document_page_view(request, document_page_id): document_page = get_object_or_404(DocumentPage, pk=document_page_id) - zoom = int(request.GET.get('zoom', 100)) - rotation = int(request.GET.get('rotation', 0)) + zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL)) + rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) document_page_form = DocumentPageForm(instance=document_page, zoom=zoom, rotation=rotation) base_title = _(u'details for: %s') % document_page - if zoom != 100: + if zoom != DEFAULT_ZOOM_LEVEL: zoom_text = u'(%d%%)' % zoom else: zoom_text = u'' diff --git a/apps/sources/managers.py b/apps/sources/managers.py index 1fd2d38d21..aacccb31e8 100644 --- a/apps/sources/managers.py +++ b/apps/sources/managers.py @@ -7,5 +7,21 @@ class SourceTransformationManager(models.Manager): ct = ContentType.objects.get_for_model(obj) return self.model.objects.filter(content_type=ct).filter(object_id=obj.pk) + #def get_for_object_as_list(self, obj): + # return list([{'transformation': transformation['transformation'], 'arguments': eval(transformation['arguments'])} for transformation in self.get_for_object(obj).values('transformation', 'arguments')]) + def get_for_object_as_list(self, obj): - return list([{'transformation': transformation['transformation'], 'arguments': eval(transformation['arguments'])} for transformation in self.get_for_object(obj).values('transformation', 'arguments')]) + warnings = [] + transformations = [] + for transformation in self.get_for_object(obj).values('transformation', 'arguments'): + try: + transformations.append( + { + 'transformation': transformation['transformation'], + 'arguments': eval(transformation['arguments'], {}) + } + ) + except Exception, e: + warnings.append(e) + + return transformations, warnings diff --git a/apps/sources/views.py b/apps/sources/views.py index 5748f7ffd8..2e2b42ed01 100644 --- a/apps/sources/views.py +++ b/apps/sources/views.py @@ -285,9 +285,11 @@ def staging_file_preview(request, source_type, source_id, staging_file_id): staging_folder = get_object_or_404(StagingFolder, pk=source_id) StagingFile = create_staging_file_class(request, staging_folder.folder_path) try: + transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder) + output_file, errors = StagingFile.get(staging_file_id).preview( preview_size=staging_folder.get_preview_size(), - transformations=SourceTransformation.objects.get_for_object_as_list(staging_folder) + transformations=transformations ) if errors and (request.user.is_staff or request.user.is_superuser): for error in errors: @@ -321,9 +323,10 @@ def staging_file_delete(request, source_type, source_id, staging_file_id): if request.method == 'POST': try: + transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder) staging_file.delete( preview_size=staging_folder.get_preview_size(), - transformations=SourceTransformation.objects.get_for_object_as_list(staging_folder) + transformations=transformations ) messages.success(request, _(u'Staging file delete successfully.')) except Exception, e: @@ -516,12 +519,16 @@ def setup_source_transformation_edit(request, transformation_id): if form.is_valid(): try: # Test the validity of the argument field - eval(form.cleaned_data['arguments']) - form.save() - messages.success(request, _(u'Source transformation edited successfully')) - return HttpResponseRedirect(next) - except Exception, e: - messages.error(request, _(u'Error editing source transformation; %s') % e) + eval(form.cleaned_data['arguments'], {}) + except: + messages.error(request, _(u'Source transformation argument error.')) + else: + try: + form.save() + messages.success(request, _(u'Source transformation edited successfully')) + return HttpResponseRedirect(next) + except Exception, e: + messages.error(request, _(u'Error editing source transformation; %s') % e) else: form = SourceTransformationForm(instance=source_transformation) @@ -607,14 +614,18 @@ def setup_source_transformation_create(request, source_type, source_id): if form.is_valid(): try: # Test the validity of the argument field - eval(form.cleaned_data['arguments']) - source_tranformation = form.save(commit=False) - source_tranformation.content_object = source - source_tranformation.save() - messages.success(request, _(u'Source transformation created successfully')) - return HttpResponseRedirect(redirect_view) - except Exception, e: - messages.error(request, _(u'Error creating source transformation; %s') % e) + eval(form.cleaned_data['arguments'], {}) + except: + messages.error(request, _(u'Source transformation argument error.')) + else: + try: + source_tranformation = form.save(commit=False) + source_tranformation.content_object = source + source_tranformation.save() + messages.success(request, _(u'Source transformation created successfully')) + return HttpResponseRedirect(redirect_view) + except Exception, e: + messages.error(request, _(u'Error creating source transformation; %s') % e) else: form = SourceTransformationForm_create()