Source, document page and thumbnails working, new document transformations and OCR yet to convert

2011-07-15 20:25:49 -04:00
parent 743ae0fce0
commit 389253385c
7 changed files with 137 additions and 99 deletions
--- a/apps/converter/api.py
+++ b/apps/converter/api.py
@@ -1,5 +1,6 @@
 import os
 import subprocess
+import hashlib

 from django.utils.importlib import import_module
 from django.template.defaultfilters import slugify
@@ -22,6 +23,7 @@ from converter.literals import TRANSFORMATION_RESIZE, \
    TRANSFORMATION_ZOOM
 from converter.literals import DIMENSION_SEPARATOR    

+HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()
    
 CONVERTER_OFFICE_FILE_EXTENSIONS = [
    u'ods', u'docx', u'doc'
@@ -75,19 +77,11 @@ def cache_cleanup(input_filepath, *args, **kwargs):

 def create_image_cache_filename(input_filepath, *args, **kwargs):
    if input_filepath:
-        temp_filename, separator = os.path.splitext(os.path.basename(input_filepath))
-        temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename)
-
-        final_filepath = []
-        [final_filepath.append(str(arg)) for arg in args]
-        final_filepath.extend([u'%s_%s' % (key, value) for key, value in kwargs.items()])
-
-        temp_path += slugify(u'_'.join(final_filepath))
-
-        return temp_path
+        hash_value = HASH_FUNCTION(u''.join([input_filepath, unicode(args), unicode(kwargs)]))
+        return os.path.join(TEMPORARY_DIRECTORY, hash_value)
    else:
        return None
-
+        

 def convert_office_document(input_filepath):
    if os.path.exists(UNOCONV_PATH):
@@ -104,21 +98,21 @@ def convert_document(document, *args, **kwargs):
    return convert(document_save_to_temp_dir(document, document.checksum), *args, **kwargs)


-def convert(input_filepath, *args, **kwargs):
+def convert(input_filepath, cleanup_files=True, *args, **kwargs):
    size = kwargs.get('size')
    file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT)
    zoom = kwargs.get('zoom', DEFAULT_ZOOM_LEVEL)
    rotation = kwargs.get('rotation', DEFAULT_ROTATION)
    page = kwargs.get('page', DEFAULT_PAGE_NUMBER)
-    cleanup_files = kwargs.get('cleanup_files', True)
    quality = kwargs.get('quality', QUALITY_DEFAULT)
    transformations = kwargs.get('transformations', [])

    unoconv_output = None

    output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs)
-    #if os.path.exists(output_filepath):
-    #    return output_filepath
+    print 'output_filepath', output_filepath
+    if os.path.exists(output_filepath):
+        return output_filepath

    path, extension = os.path.splitext(input_filepath)
    if extension[1:].lower() in CONVERTER_OFFICE_FILE_EXTENSIONS:
@@ -128,8 +122,6 @@ def convert(input_filepath, *args, **kwargs):
            input_filepath = result
            extra_options = u''

-    #TODO: not here in the backend
-    input_arg = u'%s[%s]' % (input_filepath, page-1)
    transformations.append(
        {
            'transformation': TRANSFORMATION_RESIZE,
@@ -154,7 +146,7 @@ def convert(input_filepath, *args, **kwargs):
        )           

    try:
-        backend.convert_file(input_filepath=input_arg, output_filepath=u'%s:%s' % (file_format, output_filepath), quality=quality, transformations=transformations)
+        backend.convert_file(input_filepath=input_filepath, output_filepath=output_filepath, quality=quality, transformations=transformations, page=page, file_format=file_format)
    finally:
        if cleanup_files:
            cleanup(input_filepath)
@@ -189,14 +181,12 @@ def convert_document_for_ocr(document, page=DEFAULT_PAGE_NUMBER, file_format=DEF
    unpaper_output_file = u'%s_unpaper_out%s%spnm' % (temp_path, page, os.extsep)
    convert_output_file = u'%s_ocr%s%s%s' % (temp_path, page, os.extsep, file_format)

-    input_arg = u'%s[%s]' % (input_filepath, page-1)
-
    try:
-        document_page = document.documentpage_set.get(page_number=page + 1)
+        document_page = document.documentpage_set.get(page_number=page)
        transformation_string, warnings = document_page.get_transformation_string()

        #Apply default transformations
-        backend.convert_file(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file)
+        backend.convert_file(input_filepath=input_filepath, page=page, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file)
        #Do OCR operations
        backend.convert_file(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file)
        # Process by unpaper
--- a/apps/converter/backends/graphicsmagick/base.py
+++ b/apps/converter/backends/graphicsmagick/base.py
@@ -11,7 +11,8 @@ from converter.backends import ConverterBase
 from converter.literals import TRANSFORMATION_RESIZE, \
    TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \
    TRANSFORMATION_ZOOM
-from converter.literals import DIMENSION_SEPARATOR    
+from converter.literals import DIMENSION_SEPARATOR, DEFAULT_PAGE_NUMBER, \
+    DEFAULT_FILE_FORMAT

 CONVERTER_ERROR_STRING_NO_DECODER = u'No decode delegate for this image format'
 CONVERTER_ERROR_STARTS_WITH = u'starts with'
@@ -32,10 +33,12 @@ class ConverterClass(ConverterBase):
        return proc.stdout.read()


-    def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT):
+    def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
+        print 'convert_file'
        arguments = []
        if transformations:
            for transformation in transformations:
+                print 'transformation: %s' % transformation
                if transformation['transformation'] == TRANSFORMATION_RESIZE:
                    dimensions = []
                    dimensions.append(unicode(transformation['arguments']['width']))
@@ -46,21 +49,31 @@ class ConverterClass(ConverterBase):

                elif transformation['transformation'] == TRANSFORMATION_ZOOM:
                    arguments.append(u'-resize')
-                    arguments.append(u'%d%%' % transformation['arguments']['zoom'])
+                    arguments.append(u'%d%%' % transformation['arguments']['percent'])
                    
                elif transformation['transformation'] == TRANSFORMATION_ROTATE:
+                    print 'Do rotate'
                    arguments.append(u'-rotate')
                    arguments.append(u'%s' % transformation['arguments']['degrees'])
                
        print 'arguments: %s' % arguments
-        #if format == u'jpg':
-        #    extra_options += u' -quality 85'
+        if format == u'jpg':
+            arguments.append(u'-quality')
+            arguments.append(u'85')
+
+        
+        # Graphicsmagick page number is 0 base
+        input_arg = u'%s[%d]' % (input_filepath, page - 1)
+        
+        # Specify the file format next to the output filename
+        output_filepath = u'%s:%s' % (file_format, output_filepath)
+        
        command = []
        command.append(unicode(GM_PATH))
        command.append(u'convert')
        command.extend(unicode(QUALITY_SETTINGS[quality]).split())
        command.extend(unicode(GM_SETTINGS).split())
-        command.append(unicode(input_filepath))
+        command.append(unicode(input_arg))
        if arguments:
            command.extend(arguments)
        command.append(unicode(output_filepath))
@@ -115,10 +128,3 @@ class ConverterClass(ConverterBase):
        except:
            #TODO: send to other page number identifying program
            return 1
-                
-
-    def _get_transformation_string():
-        pass
-        #'command_line': u'-rotate %(degrees)d'
-        #    }
-        #}
--- a/apps/converter/backends/imagemagick/base.py
+++ b/apps/converter/backends/imagemagick/base.py
@@ -31,15 +31,42 @@ class ConverterClass(ConverterBase):
        return proc.stdout.read()


-    def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
-        #if format == u'jpg':
-        #    extra_options += u' -quality 85'        
+    def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
+        arguments = []
+        if transformations:
+            for transformation in transformations:
+                if transformation['transformation'] == TRANSFORMATION_RESIZE:
+                    dimensions = []
+                    dimensions.append(unicode(transformation['arguments']['width']))
+                    if 'height' in transformation['arguments']:
+                        dimensions.append(unicode(transformation['arguments']['height']))                    
+                    arguments.append(u'-resize')
+                    arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions))
+
+                elif transformation['transformation'] == TRANSFORMATION_ZOOM:
+                    arguments.append(u'-resize')
+                    arguments.append(u'%d%%' % transformation['arguments']['percent'])
+                    
+                elif transformation['transformation'] == TRANSFORMATION_ROTATE:
+                    arguments.append(u'-rotate')
+                    arguments.append(u'%s' % transformation['arguments']['degrees'])
+                    
+        if format == u'jpg':
+            arguments.append(u'-quality')
+            arguments.append(u'85')
+        
+        # Imagemagick page number is 0 base
+        input_arg = u'%s[%d]' % (input_filepath, page - 1)
+
+        # Specify the file format next to the output filename
+        output_filepath = u'%s:%s' % (file_format, output_filepath)
+                  
        command = []
        command.append(unicode(IM_CONVERT_PATH))
        command.extend(unicode(QUALITY_SETTINGS[quality]).split())
-        command.append(unicode(input_filepath))
+        command.append(unicode(input_arg))
        if arguments:
-            command.extend(unicode(arguments).split())
+            command.extend(arguments)
        command.append(unicode(output_filepath))
        proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
        return_code = proc.wait()
--- a/apps/documents/managers.py
+++ b/apps/documents/managers.py
@@ -20,4 +20,17 @@ class DocumentPageTransformationManager(models.Manager):
        return self.model.objects.filter(document_page=document_page)

    def get_for_document_page_as_list(self, document_page):
-        return list([{'transformation': transformation['transformation'], 'arguments': eval(transformation['arguments'])} for transformation in self.get_for_document_page(document_page).values('transformation', 'arguments')])
+        warnings = []
+        transformations = []
+        for transformation in self.get_for_document_page(document_page).values('transformation', 'arguments'):
+            try:
+                transformations.append(
+                    {
+                        'transformation': transformation['transformation'],
+                        'arguments': eval(transformation['arguments'], {})
+                    }
+                )
+            except Exception, e:
+                warnings.append(e)
+        
+        return transformations, warnings  
--- a/apps/documents/views.py
+++ b/apps/documents/views.py
@@ -20,10 +20,11 @@ from common.widgets import two_state_template
 from common.literals import PAGE_SIZE_DIMENSIONS, \
    PAGE_ORIENTATION_PORTRAIT, PAGE_ORIENTATION_LANDSCAPE
 from common.conf.settings import DEFAULT_PAPER_SIZE
-from converter.api import convert_document, QUALITY_DEFAULT
+from converter.api import convert_document
 from converter.exceptions import UnkownConvertError, UnknownFormat
-from converter.api import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \
-    DEFAULT_FILE_FORMAT, QUALITY_PRINT
+from converter.literals import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \
+    DEFAULT_FILE_FORMAT, QUALITY_PRINT, QUALITY_DEFAULT, \
+    DEFAULT_PAGE_NUMBER
 from filetransfers.api import serve_file
 from grouping.utils import get_document_group_subtemplate
 from metadata.api import save_metadata_list, \
@@ -285,39 +286,15 @@ def document_edit(request, document_id):
        'object': document,
    }, context_instance=RequestContext(request))

-'''
-def calculate_converter_arguments(document, *args, **kwargs):
-    size = kwargs.pop('size', PREVIEW_SIZE)
-    quality = kwargs.pop('quality', QUALITY_DEFAULT)
-    page = kwargs.pop('page', 1)
-    file_format = kwargs.pop('file_format', DEFAULT_FILE_FORMAT)
-    zoom = kwargs.pop('zoom', DEFAULT_ZOOM_LEVEL)
-    rotation = kwargs.pop('rotation', DEFAULT_ROTATION)
-
-    document_page = DocumentPage.objects.get(document=document, page_number=page)
-    transformation_string, warnings = document_page.get_transformation_string()
-
-    arguments = {
-        'size': size,
-        'file_format': file_format,
-        'quality': quality,
-        'extra_options': transformation_string,
-        'page': page - 1,
-        'zoom': zoom,
-        'rotation': rotation
-    }
-
-    return arguments, warnings
-'''

 def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_DEFAULT):
    check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])

    document = get_object_or_404(Document, pk=document_id)

-    page = int(request.GET.get('page', 1))
+    page = int(request.GET.get('page', DEFAULT_PAGE_NUMBER))

-    zoom = int(request.GET.get('zoom', 100))
+    zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL))

    if zoom < ZOOM_MIN_LEVEL:
        zoom = ZOOM_MIN_LEVEL
@@ -325,18 +302,16 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
    if zoom > ZOOM_MAX_LEVEL:
        zoom = ZOOM_MAX_LEVEL

-    rotation = int(request.GET.get('rotation', 0)) % 360
+    rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360

-    #arguments, warnings = calculate_converter_arguments(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation)
-
-    #if warnings and (request.user.is_staff or request.user.is_superuser):
-    #    for warning in warnings:
-    #        messages.warning(request, _(u'Page transformation error: %s') % warning)
-
-    transformations = DocumentPageTransformation.objects.get_for_document_page_as_list(document)
+    document_page = get_object_or_404(document.documentpage_set, page_number=page)
+    transformations, warnings = DocumentPageTransformation.objects.get_for_document_page_as_list(document_page)

+    if warnings and (request.user.is_staff or request.user.is_superuser):
+        for warning in warnings:
+            messages.warning(request, _(u'Page transformation error: %s') % warning)
+            
    try:
-        #output_file = convert_document(document, **arguments)
        output_file = convert_document(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation, transformations=transformations)
    except UnkownConvertError, e:
        if request.user.is_staff or request.user.is_superuser:
@@ -595,13 +570,13 @@ def document_page_view(request, document_page_id):

    document_page = get_object_or_404(DocumentPage, pk=document_page_id)

-    zoom = int(request.GET.get('zoom', 100))
-    rotation = int(request.GET.get('rotation', 0))
+    zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL))
+    rotation = int(request.GET.get('rotation', DEFAULT_ROTATION))
    document_page_form = DocumentPageForm(instance=document_page, zoom=zoom, rotation=rotation)

    base_title = _(u'details for: %s') % document_page

-    if zoom != 100:
+    if zoom != DEFAULT_ZOOM_LEVEL:
        zoom_text = u'(%d%%)' % zoom
    else:
        zoom_text = u''
--- a/apps/sources/managers.py
+++ b/apps/sources/managers.py
@@ -7,5 +7,21 @@ class SourceTransformationManager(models.Manager):
        ct = ContentType.objects.get_for_model(obj)
        return self.model.objects.filter(content_type=ct).filter(object_id=obj.pk)

+    #def get_for_object_as_list(self, obj):
+    #    return list([{'transformation': transformation['transformation'], 'arguments': eval(transformation['arguments'])} for transformation in self.get_for_object(obj).values('transformation', 'arguments')])
+
    def get_for_object_as_list(self, obj):
-        return list([{'transformation': transformation['transformation'], 'arguments': eval(transformation['arguments'])} for transformation in self.get_for_object(obj).values('transformation', 'arguments')])
+        warnings = []
+        transformations = []
+        for transformation in self.get_for_object(obj).values('transformation', 'arguments'):
+            try:
+                transformations.append(
+                    {
+                        'transformation': transformation['transformation'],
+                        'arguments': eval(transformation['arguments'], {})
+                    }
+                )
+            except Exception, e:
+                warnings.append(e)
+        
+        return transformations, warnings  
--- a/apps/sources/views.py
+++ b/apps/sources/views.py
@@ -285,9 +285,11 @@ def staging_file_preview(request, source_type, source_id, staging_file_id):
    staging_folder = get_object_or_404(StagingFolder, pk=source_id)
    StagingFile = create_staging_file_class(request, staging_folder.folder_path)
    try:
+        transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder)
+        
        output_file, errors = StagingFile.get(staging_file_id).preview(
            preview_size=staging_folder.get_preview_size(),
-            transformations=SourceTransformation.objects.get_for_object_as_list(staging_folder)
+            transformations=transformations
        )
        if errors and (request.user.is_staff or request.user.is_superuser):
            for error in errors:
@@ -321,9 +323,10 @@ def staging_file_delete(request, source_type, source_id, staging_file_id):

    if request.method == 'POST':
        try:
+            transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder)
            staging_file.delete(
                preview_size=staging_folder.get_preview_size(),
-                transformations=SourceTransformation.objects.get_for_object_as_list(staging_folder)
+                transformations=transformations
            )
            messages.success(request, _(u'Staging file delete successfully.'))
        except Exception, e:
@@ -516,12 +519,16 @@ def setup_source_transformation_edit(request, transformation_id):
        if form.is_valid():
            try:
                # Test the validity of the argument field
-                eval(form.cleaned_data['arguments'])                
-                form.save()
-                messages.success(request, _(u'Source transformation edited successfully'))
-                return HttpResponseRedirect(next)
-            except Exception, e:
-                messages.error(request, _(u'Error editing source transformation; %s') % e)
+                eval(form.cleaned_data['arguments'], {})
+            except:
+                messages.error(request, _(u'Source transformation argument error.'))
+            else:
+                try:
+                    form.save()
+                    messages.success(request, _(u'Source transformation edited successfully'))
+                    return HttpResponseRedirect(next)
+                except Exception, e:
+                    messages.error(request, _(u'Error editing source transformation; %s') % e)
    else:
        form = SourceTransformationForm(instance=source_transformation)

@@ -607,14 +614,18 @@ def setup_source_transformation_create(request, source_type, source_id):
        if form.is_valid():
            try:
                # Test the validity of the argument field
-                eval(form.cleaned_data['arguments'])
-                source_tranformation = form.save(commit=False)
-                source_tranformation.content_object = source
-                source_tranformation.save()
-                messages.success(request, _(u'Source transformation created successfully'))
-                return HttpResponseRedirect(redirect_view)
-            except Exception, e:
-                messages.error(request, _(u'Error creating source transformation; %s') % e)
+                eval(form.cleaned_data['arguments'], {})
+            except:
+                messages.error(request, _(u'Source transformation argument error.'))
+            else:            
+                try:
+                    source_tranformation = form.save(commit=False)
+                    source_tranformation.content_object = source
+                    source_tranformation.save()
+                    messages.success(request, _(u'Source transformation created successfully'))
+                    return HttpResponseRedirect(redirect_view)
+                except Exception, e:
+                    messages.error(request, _(u'Error creating source transformation; %s') % e)
    else:
        form = SourceTransformationForm_create()