diff --git a/apps/converter/__init__.py b/apps/converter/__init__.py index 6ab5029f01..ffaef00c09 100644 --- a/apps/converter/__init__.py +++ b/apps/converter/__init__.py @@ -1,7 +1,16 @@ from django.utils.translation import ugettext_lazy as _ +from django.core.exceptions import ImproperlyConfigured from navigation.api import register_sidebar_template +from converter.utils import load_backend +from converter.conf.settings import GRAPHICS_BACKEND + formats_list = {'text': _('file formats'), 'view': 'formats_list', 'famfam': 'pictures'} register_sidebar_template(['formats_list'], 'converter_file_formats_help.html') + +try: + backend = load_backend().ConverterClass() +except ImproperlyConfigured: + raise ImproperlyConfigured(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND) diff --git a/apps/converter/api.py b/apps/converter/api.py index 9de0ed737e..f5d5203bae 100644 --- a/apps/converter/api.py +++ b/apps/converter/api.py @@ -1,9 +1,6 @@ import os import subprocess - -from django.utils.importlib import import_module -from django.template.defaultfilters import slugify -from django.core.exceptions import ImproperlyConfigured +import hashlib from common import TEMPORARY_DIRECTORY from documents.utils import document_save_to_temp_dir @@ -12,21 +9,23 @@ from converter.conf.settings import UNPAPER_PATH from converter.conf.settings import OCR_OPTIONS from converter.conf.settings import UNOCONV_PATH from converter.exceptions import UnpaperError, OfficeConversionError -from converter.utils import load_backend -from converter.literals import DEFAULT_PAGE_INDEX_NUMBER, \ +from converter.literals import DEFAULT_PAGE_NUMBER, \ DEFAULT_OCR_FILE_FORMAT, QUALITY_DEFAULT, DEFAULT_ZOOM_LEVEL, \ - DEFAULT_ROTATION, DEFAULT_FILE_FORMAT, QUALITY_PRINT + DEFAULT_ROTATION, DEFAULT_FILE_FORMAT, QUALITY_HIGH +from converter import backend +from converter.literals import TRANSFORMATION_CHOICES +from converter.literals import TRANSFORMATION_RESIZE, \ + TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \ + TRANSFORMATION_ZOOM +from converter.literals import DIMENSION_SEPARATOR + +HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() + CONVERTER_OFFICE_FILE_EXTENSIONS = [ u'ods', u'docx', u'doc' ] -try: - backend = load_backend().ConverterClass() -except ImproperlyConfigured: - raise ImproperlyConfigured(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND) - - def cleanup(filename): """ Tries to remove the given filename. Ignores non-existent files @@ -75,19 +74,11 @@ def cache_cleanup(input_filepath, *args, **kwargs): def create_image_cache_filename(input_filepath, *args, **kwargs): if input_filepath: - temp_filename, separator = os.path.splitext(os.path.basename(input_filepath)) - temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename) - - final_filepath = [] - [final_filepath.append(str(arg)) for arg in args] - final_filepath.extend([u'%s_%s' % (key, value) for key, value in kwargs.items()]) - - temp_path += slugify(u'_'.join(final_filepath)) - - return temp_path + hash_value = HASH_FUNCTION(u''.join([input_filepath, unicode(args), unicode(kwargs)])) + return os.path.join(TEMPORARY_DIRECTORY, hash_value) else: return None - + def convert_office_document(input_filepath): if os.path.exists(UNOCONV_PATH): @@ -104,15 +95,14 @@ def convert_document(document, *args, **kwargs): return convert(document_save_to_temp_dir(document, document.checksum), *args, **kwargs) -def convert(input_filepath, *args, **kwargs): +def convert(input_filepath, cleanup_files=True, *args, **kwargs): size = kwargs.get('size') file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT) - extra_options = kwargs.get('extra_options', u'') zoom = kwargs.get('zoom', DEFAULT_ZOOM_LEVEL) rotation = kwargs.get('rotation', DEFAULT_ROTATION) - page = kwargs.get('page', DEFAULT_PAGE_INDEX_NUMBER) - cleanup_files = kwargs.get('cleanup_files', True) + page = kwargs.get('page', DEFAULT_PAGE_NUMBER) quality = kwargs.get('quality', QUALITY_DEFAULT) + transformations = kwargs.get('transformations', []) unoconv_output = None @@ -126,20 +116,32 @@ def convert(input_filepath, *args, **kwargs): if result: unoconv_output = result input_filepath = result - extra_options = u'' - input_arg = u'%s[%s]' % (input_filepath, page) - extra_options += u' -resize %s' % size + transformations.append( + { + 'transformation': TRANSFORMATION_RESIZE, + 'arguments': dict(zip([u'width', u'height'], size.split(DIMENSION_SEPARATOR))) + } + ) + if zoom != 100: - extra_options += u' -resize %d%% ' % zoom + transformations.append( + { + 'transformation': TRANSFORMATION_ZOOM, + 'arguments': {'percent': zoom} + } + ) if rotation != 0 and rotation != 360: - extra_options += u' -rotate %d ' % rotation + transformations.append( + { + 'transformation': TRANSFORMATION_ROTATE, + 'arguments': {'degrees': rotation} + } + ) - if format == u'jpg': - extra_options += u' -quality 85' try: - backend.convert_file(input_filepath=input_arg, arguments=extra_options, output_filepath=u'%s:%s' % (file_format, output_filepath), quality=quality) + backend.convert_file(input_filepath=input_filepath, output_filepath=output_filepath, quality=quality, transformations=transformations, page=page, file_format=file_format) finally: if cleanup_files: cleanup(input_filepath) @@ -150,11 +152,7 @@ def convert(input_filepath, *args, **kwargs): def get_page_count(input_filepath): - try: - return len(backend.identify_file(unicode(input_filepath)).splitlines()) - except: - #TODO: send to other page number identifying program - return 1 + return backend.get_page_count(input_filepath) def get_document_dimensions(document, *args, **kwargs): @@ -166,7 +164,7 @@ def get_document_dimensions(document, *args, **kwargs): return [0, 0] -def convert_document_for_ocr(document, page=DEFAULT_PAGE_INDEX_NUMBER, file_format=DEFAULT_OCR_FILE_FORMAT): +def convert_document_for_ocr(document, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_OCR_FILE_FORMAT): #Extract document file input_filepath = document_save_to_temp_dir(document, document.uuid) @@ -178,14 +176,12 @@ def convert_document_for_ocr(document, page=DEFAULT_PAGE_INDEX_NUMBER, file_form unpaper_output_file = u'%s_unpaper_out%s%spnm' % (temp_path, page, os.extsep) convert_output_file = u'%s_ocr%s%s%s' % (temp_path, page, os.extsep, file_format) - input_arg = u'%s[%s]' % (input_filepath, page) - try: - document_page = document.documentpage_set.get(page_number=page + 1) + document_page = document.documentpage_set.get(page_number=page) transformation_string, warnings = document_page.get_transformation_string() #Apply default transformations - backend.convert_file(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file) + backend.convert_file(input_filepath=input_filepath, page=page, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file) #Do OCR operations backend.convert_file(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file) # Process by unpaper @@ -198,3 +194,12 @@ def convert_document_for_ocr(document, page=DEFAULT_PAGE_INDEX_NUMBER, file_form cleanup(unpaper_output_file) return convert_output_file + + +def get_available_transformations_choices(): + result = [] + for transformation in backend.get_available_transformations(): + transformation_template = u'%s %s' % (TRANSFORMATION_CHOICES[transformation]['label'], u','.join(['<%s>' % argument['name'] if argument['required'] else '[%s]' % argument['name'] for argument in TRANSFORMATION_CHOICES[transformation]['arguments']])) + result.append([transformation, transformation_template]) + + return result diff --git a/apps/converter/backends/__init__.py b/apps/converter/backends/__init__.py index 1d81dd8149..0b42ec89c2 100644 --- a/apps/converter/backends/__init__.py +++ b/apps/converter/backends/__init__.py @@ -21,9 +21,6 @@ class ConverterBase(object): def get_available_transformations(self): raise NotImplementedError("Your %s class has not defined a get_available_transformations() method, which is required." % self.__class__.__name__) - def get_available_transformations_labels(self): - return ([(name, data['label']) for name, data in self.get_available_transformations().items()]) - def get_transformation_string(self, transformation_list): transformations = [] warnings = [] @@ -41,3 +38,5 @@ class ConverterBase(object): return u' '.join(transformations), warnings + def get_page_count(self): + raise NotImplementedError("Your %s class has not defined a get_page_count() method, which is required." % self.__class__.__name__) diff --git a/apps/converter/backends/graphicsmagick/base.py b/apps/converter/backends/graphicsmagick/base.py index 5570650038..9ec5f6a951 100644 --- a/apps/converter/backends/graphicsmagick/base.py +++ b/apps/converter/backends/graphicsmagick/base.py @@ -1,13 +1,16 @@ import subprocess import re -from django.utils.translation import ugettext_lazy as _ - from converter.conf.settings import GM_PATH from converter.conf.settings import GM_SETTINGS from converter.literals import QUALITY_DEFAULT, QUALITY_SETTINGS from converter.exceptions import ConvertError, UnknownFormat, IdentifyError from converter.backends import ConverterBase +from converter.literals import TRANSFORMATION_RESIZE, \ + TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \ + TRANSFORMATION_ZOOM +from converter.literals import DIMENSION_SEPARATOR, DEFAULT_PAGE_NUMBER, \ + DEFAULT_FILE_FORMAT CONVERTER_ERROR_STRING_NO_DECODER = u'No decode delegate for this image format' CONVERTER_ERROR_STARTS_WITH = u'starts with' @@ -27,16 +30,44 @@ class ConverterClass(ConverterBase): raise IdentifyError(proc.stderr.readline()) return proc.stdout.read() + def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT): + arguments = [] + if transformations: + for transformation in transformations: + if transformation['transformation'] == TRANSFORMATION_RESIZE: + dimensions = [] + dimensions.append(unicode(transformation['arguments']['width'])) + if 'height' in transformation['arguments']: + dimensions.append(unicode(transformation['arguments']['height'])) + arguments.append(u'-resize') + arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions)) + + elif transformation['transformation'] == TRANSFORMATION_ZOOM: + arguments.append(u'-resize') + arguments.append(u'%d%%' % transformation['arguments']['percent']) + + elif transformation['transformation'] == TRANSFORMATION_ROTATE: + arguments.append(u'-rotate') + arguments.append(u'%s' % transformation['arguments']['degrees']) + + if format == u'jpg': + arguments.append(u'-quality') + arguments.append(u'85') + + # Graphicsmagick page number is 0 base + input_arg = u'%s[%d]' % (input_filepath, page - 1) + + # Specify the file format next to the output filename + output_filepath = u'%s:%s' % (file_format, output_filepath) - def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None): command = [] command.append(unicode(GM_PATH)) command.append(u'convert') command.extend(unicode(QUALITY_SETTINGS[quality]).split()) command.extend(unicode(GM_SETTINGS).split()) - command.append(unicode(input_filepath)) + command.append(unicode(input_arg)) if arguments: - command.extend(unicode(arguments).split()) + command.extend(arguments) command.append(unicode(output_filepath)) proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) return_code = proc.wait() @@ -49,13 +80,12 @@ class ConverterClass(ConverterBase): else: raise ConvertError(error_line) - def get_format_list(self): """ Call GraphicsMagick to parse all of it's supported file formats, and return a list of the names and descriptions """ - format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*') + format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*') formats = [] command = [] command.append(unicode(GM_PATH)) @@ -66,20 +96,23 @@ class ConverterClass(ConverterBase): return_code = proc.wait() if return_code != 0: raise ConvertError(proc.stderr.readline()) - + for line in proc.stdout.readlines(): fields = format_regex.findall(line) if fields: formats.append((fields[0][0], fields[0][3])) - + return formats - def get_available_transformations(self): - return { - 'rotate': { - 'label': _(u'Rotate [degrees]'), - 'arguments': [{'name': 'degrees'}], - 'command_line': u'-rotate %(degrees)d' - } - } + return [ + TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \ + TRANSFORMATION_DENSITY, TRANSFORMATION_ZOOM + ] + + def get_page_count(self, input_filepath): + try: + return len(self.identify_file(unicode(input_filepath)).splitlines()) + except: + #TODO: send to other page number identifying program + return 1 diff --git a/apps/converter/backends/imagemagick/base.py b/apps/converter/backends/imagemagick/base.py index e2b8c40fdd..11822db515 100644 --- a/apps/converter/backends/imagemagick/base.py +++ b/apps/converter/backends/imagemagick/base.py @@ -9,7 +9,10 @@ from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS from converter.exceptions import ConvertError, UnknownFormat, \ IdentifyError from converter.backends import ConverterBase - +from converter.literals import TRANSFORMATION_RESIZE, \ + TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \ + TRANSFORMATION_ZOOM + CONVERTER_ERROR_STRING_NO_DECODER = u'no decode delegate for this image format' @@ -28,13 +31,42 @@ class ConverterClass(ConverterBase): return proc.stdout.read() - def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None): + def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT): + arguments = [] + if transformations: + for transformation in transformations: + if transformation['transformation'] == TRANSFORMATION_RESIZE: + dimensions = [] + dimensions.append(unicode(transformation['arguments']['width'])) + if 'height' in transformation['arguments']: + dimensions.append(unicode(transformation['arguments']['height'])) + arguments.append(u'-resize') + arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions)) + + elif transformation['transformation'] == TRANSFORMATION_ZOOM: + arguments.append(u'-resize') + arguments.append(u'%d%%' % transformation['arguments']['percent']) + + elif transformation['transformation'] == TRANSFORMATION_ROTATE: + arguments.append(u'-rotate') + arguments.append(u'%s' % transformation['arguments']['degrees']) + + if format == u'jpg': + arguments.append(u'-quality') + arguments.append(u'85') + + # Imagemagick page number is 0 base + input_arg = u'%s[%d]' % (input_filepath, page - 1) + + # Specify the file format next to the output filename + output_filepath = u'%s:%s' % (file_format, output_filepath) + command = [] command.append(unicode(IM_CONVERT_PATH)) command.extend(unicode(QUALITY_SETTINGS[quality]).split()) - command.append(unicode(input_filepath)) + command.append(unicode(input_arg)) if arguments: - command.extend(unicode(arguments).split()) + command.extend(arguments) command.append(unicode(output_filepath)) proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) return_code = proc.wait() @@ -73,10 +105,15 @@ class ConverterClass(ConverterBase): def get_available_transformations(self): - return { - 'rotate': { - 'label': _(u'Rotate [degrees]'), - 'arguments': [{'name': 'degrees'}], - 'command_line': u'-rotate %(degrees)d' - } - } + return [ + TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \ + TRANSFORMATION_DENSITY, TRANSFORMATION_ZOOM + ] + + + def get_page_count(self, input_filepath): + try: + return len(self.identify_file(unicode(input_filepath)).splitlines()) + except: + #TODO: send to other page number identifying program + return 1 diff --git a/apps/converter/backends/python/__init__.py b/apps/converter/backends/python/__init__.py new file mode 100644 index 0000000000..dfeca950f1 --- /dev/null +++ b/apps/converter/backends/python/__init__.py @@ -0,0 +1,3 @@ +from PIL import Image + +Image.init() diff --git a/apps/converter/backends/python/base.py b/apps/converter/backends/python/base.py new file mode 100644 index 0000000000..4d776454e4 --- /dev/null +++ b/apps/converter/backends/python/base.py @@ -0,0 +1,80 @@ +from PIL import Image + +from django.utils.translation import ugettext_lazy as _ + +from converter.literals import QUALITY_DEFAULT, QUALITY_SETTINGS +from converter.exceptions import ConvertError, UnknownFormat, IdentifyError +from converter.backends import ConverterBase +from converter.literals import TRANSFORMATION_RESIZE, \ + TRANSFORMATION_ROTATE + +class ConverterClass(ConverterBase): + def identify_file(self, input_filepath, arguments=None): + pass + + + def get_page_count(self, input_filepath): + page_count = 1 + im = Image.open(input_filepath) + + try: + while 1: + im.seek(im.tell()+1) + page_count += 1 + # do something to im + except EOFError: + pass # end of sequence + + return page_count + + + def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None): + im = Image.open(input_filepath) + outfile, format = output_filepath.split(u':') + im.save(outfile, format) + ''' + command = [] + command.append(unicode(GM_PATH)) + command.append(u'convert') + command.extend(unicode(QUALITY_SETTINGS[quality]).split()) + command.extend(unicode(GM_SETTINGS).split()) + command.append(unicode(input_filepath)) + if arguments: + command.extend(unicode(arguments).split()) + command.append(unicode(output_filepath)) + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + #Got an error from convert program + error_line = proc.stderr.readline() + if (CONVERTER_ERROR_STRING_NO_DECODER in error_line) or (CONVERTER_ERROR_STARTS_WITH in error_line): + #Try to determine from error message which class of error is it + raise UnknownFormat + else: + raise ConvertError(error_line) + ''' + + def get_format_list(self): + """ + Introspect PIL's internal registry to obtain a list of the + supported file types + """ + formats = [] + for format_name in Image.ID: + formats.append((format_name, u'')) + + return formats + + + def get_available_transformations(self): + return [ + TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE + ] + + + def get_page_count(self, input_filepath): + try: + return len(self.identify_file(unicode(input_filepath)).splitlines()) + except: + #TODO: send to other page number identifying program + return 1 diff --git a/apps/converter/conf/settings.py b/apps/converter/conf/settings.py index f73c0f2b64..fcaa1ec9b0 100644 --- a/apps/converter/conf/settings.py +++ b/apps/converter/conf/settings.py @@ -12,7 +12,7 @@ register_settings( {'name': u'UNPAPER_PATH', 'global_name': u'CONVERTER_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True}, {'name': u'GM_PATH', 'global_name': u'CONVERTER_GM_PATH', 'default': u'/usr/bin/gm', 'description': _(u'File path to graphicsmagick\'s program.'), 'exists': True}, {'name': u'GM_SETTINGS', 'global_name': u'CONVERTER_GM_SETTINGS', 'default': u''}, - {'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.imagemagick', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick and converter.backends.graphicsmagick.')}, + {'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.python', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick, converter.backends.graphicsmagick and converter.backends.python.')}, {'name': u'UNOCONV_PATH', 'global_name': u'CONVERTER_UNOCONV_PATH', 'default': u'/usr/bin/unoconv', 'exists': True}, {'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'}, {'name': u'DEFAULT_OPTIONS', 'global_name': u'CONVERTER_DEFAULT_OPTIONS', 'default': u''}, diff --git a/apps/converter/literals.py b/apps/converter/literals.py index 403400d229..7671fddf8a 100644 --- a/apps/converter/literals.py +++ b/apps/converter/literals.py @@ -1,3 +1,5 @@ +from django.utils.translation import ugettext_lazy as _ + from converter.conf.settings import DEFAULT_OPTIONS from converter.conf.settings import LOW_QUALITY_OPTIONS from converter.conf.settings import HIGH_QUALITY_OPTIONS @@ -5,7 +7,7 @@ from converter.conf.settings import PRINT_QUALITY_OPTIONS DEFAULT_ZOOM_LEVEL = 100 DEFAULT_ROTATION = 0 -DEFAULT_PAGE_INDEX_NUMBER = 0 +DEFAULT_PAGE_NUMBER = 1 DEFAULT_FILE_FORMAT = u'jpg' DEFAULT_OCR_FILE_FORMAT = u'tif' @@ -20,3 +22,43 @@ QUALITY_SETTINGS = { QUALITY_HIGH: HIGH_QUALITY_OPTIONS, QUALITY_PRINT: PRINT_QUALITY_OPTIONS } + +DIMENSION_SEPARATOR = u'x' + +TRANSFORMATION_RESIZE = u'resize' +TRANSFORMATION_ROTATE = u'rotate' +TRANSFORMATION_DENSITY = u'density' +TRANSFORMATION_ZOOM = u'zoom' + +TRANSFORMATION_CHOICES = { + TRANSFORMATION_RESIZE: { + 'label': _(u'Resize'), + 'description': _(u'Resize.'), + 'arguments': [ + {'name': 'width', 'label': _(u'width'), 'required': True}, + {'name': 'height', 'label': _(u'height'), 'required': False}, + ] + }, + TRANSFORMATION_ROTATE: { + 'label': _(u'Rotate'), + 'description': _(u'Rotate by n degress.'), + 'arguments': [ + {'name': 'degrees', 'label': _(u'degrees'), 'required': True} + ] + }, + TRANSFORMATION_DENSITY: { + 'label': _(u'Density'), + 'description': _(u'Change the resolution (ie: DPI) without resizing.'), + 'arguments': [ + {'name': 'width', 'label': _(u'width'), 'required': True}, + {'name': 'height', 'label': _(u'height'), 'required': False}, + ] + }, + TRANSFORMATION_ZOOM: { + 'label': _(u'Zoom'), + 'description': _(u'Zoom by n percent.'), + 'arguments': [ + {'name': 'percent', 'label': _(u'percent'), 'required': True} + ] + }, +} diff --git a/apps/converter/views.py b/apps/converter/views.py index ad95783539..ef7173f908 100644 --- a/apps/converter/views.py +++ b/apps/converter/views.py @@ -1,38 +1,18 @@ from django.utils.translation import ugettext_lazy as _ from django.shortcuts import render_to_response from django.template import RequestContext -from django.utils.importlib import import_module + +from converter import backend from converter.conf.settings import GRAPHICS_BACKEND - -def _lazy_load(fn): - _cached = [] - - def _decorated(): - if not _cached: - _cached.append(fn()) - return _cached[0] - return _decorated - - -@_lazy_load -def _get_backend(): - return import_module(GRAPHICS_BACKEND) - -try: - backend = _get_backend() -except ImportError: - raise ImportError(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND) - - def formats_list(request): #check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW]) context = { 'title': _(u'suported file formats'), 'hide_object': True, - 'object_list': backend.get_format_list(), + 'object_list': sorted(backend.get_format_list()), 'extra_columns': [ { 'name': _(u'name'), diff --git a/apps/documents/managers.py b/apps/documents/managers.py index 3b007a936e..ef87c929fe 100644 --- a/apps/documents/managers.py +++ b/apps/documents/managers.py @@ -13,3 +13,24 @@ class RecentDocumentManager(models.Manager): to_delete = self.model.objects.filter(user=user)[RECENT_COUNT:] for recent_to_delete in to_delete: recent_to_delete.delete() + + +class DocumentPageTransformationManager(models.Manager): + def get_for_document_page(self, document_page): + return self.model.objects.filter(document_page=document_page) + + def get_for_document_page_as_list(self, document_page): + warnings = [] + transformations = [] + for transformation in self.get_for_document_page(document_page).values('transformation', 'arguments'): + try: + transformations.append( + { + 'transformation': transformation['transformation'], + 'arguments': eval(transformation['arguments'], {}) + } + ) + except Exception, e: + warnings.append(e) + + return transformations, warnings diff --git a/apps/documents/models.py b/apps/documents/models.py index e0df918fc1..c058bdb694 100644 --- a/apps/documents/models.py +++ b/apps/documents/models.py @@ -12,12 +12,13 @@ from python_magic import magic from taggit.managers import TaggableManager from dynamic_search.api import register from converter.api import get_page_count -from converter.api import backend +from converter.api import get_available_transformations_choices from documents.conf.settings import CHECKSUM_FUNCTION from documents.conf.settings import UUID_FUNCTION from documents.conf.settings import STORAGE_BACKEND -from documents.managers import RecentDocumentManager +from documents.managers import RecentDocumentManager, \ + DocumentPageTransformationManager def get_filename_from_uuid(instance, filename): @@ -89,7 +90,7 @@ class Document(models.Model): mimetype, page count and transformation when originally created """ new_document = not self.pk - + transformations = kwargs.pop('transformations', None) super(Document, self).save(*args, **kwargs) if new_document: @@ -98,7 +99,8 @@ class Document(models.Model): self.update_mimetype(save=False) self.save() self.update_page_count(save=False) - self.apply_default_transformations() + if transformations: + self.apply_default_transformations(transformations) @models.permalink def get_absolute_url(self): @@ -199,21 +201,21 @@ class Document(models.Model): exists in storage """ return self.file.storage.exists(self.file.path) + - def apply_default_transformations(self): + def apply_default_transformations(self, transformations): #Only apply default transformations on new documents - if DEFAULT_TRANSFORMATIONS and reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0: - for transformation in DEFAULT_TRANSFORMATIONS: - if 'name' in transformation: - for document_page in self.documentpage_set.all(): - page_transformation = DocumentPageTransformation( - document_page=document_page, - order=0, - transformation=transformation['name']) - if 'arguments' in transformation: - page_transformation.arguments = transformation['arguments'] + if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0: + for transformation in transformations: + for document_page in self.documentpage_set.all(): + page_transformation = DocumentPageTransformation( + document_page=document_page, + order=0, + transformation=transformation.get('transformation'), + arguments=transformation.get('arguments') + ) - page_transformation.save() + page_transformation.save() class DocumentTypeFilename(models.Model): @@ -259,9 +261,6 @@ class DocumentPage(models.Model): def get_absolute_url(self): return ('document_page_view', [self.pk]) - def get_transformation_string(self): - return backend.get_transformation_string(self.documentpagetransformation_set.values('transformation', 'arguments')) - class DocumentPageTransformation(models.Model): """ @@ -270,9 +269,11 @@ class DocumentPageTransformation(models.Model): """ document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page')) order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True) - transformation = models.CharField(choices=backend.get_available_transformations_labels(), max_length=128, verbose_name=_(u'transformation')) + transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation')) arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: {\'degrees\':90}')) + objects = DocumentPageTransformationManager() + def __unicode__(self): return u'"%s" for %s' % (self.get_transformation_display(), unicode(self.document_page)) diff --git a/apps/documents/views.py b/apps/documents/views.py index 4a3247c576..c727407b88 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -20,10 +20,11 @@ from common.widgets import two_state_template from common.literals import PAGE_SIZE_DIMENSIONS, \ PAGE_ORIENTATION_PORTRAIT, PAGE_ORIENTATION_LANDSCAPE from common.conf.settings import DEFAULT_PAPER_SIZE -from converter.api import convert_document, QUALITY_DEFAULT +from converter.api import convert_document from converter.exceptions import UnkownConvertError, UnknownFormat -from converter.api import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \ - DEFAULT_FILE_FORMAT, QUALITY_PRINT +from converter.literals import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \ + DEFAULT_FILE_FORMAT, QUALITY_PRINT, QUALITY_DEFAULT, \ + DEFAULT_PAGE_NUMBER from filetransfers.api import serve_file from grouping.utils import get_document_group_subtemplate from metadata.api import save_metadata_list, \ @@ -286,38 +287,14 @@ def document_edit(request, document_id): }, context_instance=RequestContext(request)) -def calculate_converter_arguments(document, *args, **kwargs): - size = kwargs.pop('size', PREVIEW_SIZE) - quality = kwargs.pop('quality', QUALITY_DEFAULT) - page = kwargs.pop('page', 1) - file_format = kwargs.pop('file_format', DEFAULT_FILE_FORMAT) - zoom = kwargs.pop('zoom', DEFAULT_ZOOM_LEVEL) - rotation = kwargs.pop('rotation', DEFAULT_ROTATION) - - document_page = DocumentPage.objects.get(document=document, page_number=page) - transformation_string, warnings = document_page.get_transformation_string() - - arguments = { - 'size': size, - 'file_format': file_format, - 'quality': quality, - 'extra_options': transformation_string, - 'page': page - 1, - 'zoom': zoom, - 'rotation': rotation - } - - return arguments, warnings - - def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_DEFAULT): check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW]) document = get_object_or_404(Document, pk=document_id) - page = int(request.GET.get('page', 1)) + page = int(request.GET.get('page', DEFAULT_PAGE_NUMBER)) - zoom = int(request.GET.get('zoom', 100)) + zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL)) if zoom < ZOOM_MIN_LEVEL: zoom = ZOOM_MIN_LEVEL @@ -325,16 +302,17 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_ if zoom > ZOOM_MAX_LEVEL: zoom = ZOOM_MAX_LEVEL - rotation = int(request.GET.get('rotation', 0)) % 360 + rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360 - arguments, warnings = calculate_converter_arguments(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation) + document_page = get_object_or_404(document.documentpage_set, page_number=page) + transformations, warnings = DocumentPageTransformation.objects.get_for_document_page_as_list(document_page) if warnings and (request.user.is_staff or request.user.is_superuser): for warning in warnings: messages.warning(request, _(u'Page transformation error: %s') % warning) - + try: - output_file = convert_document(document, **arguments) + output_file = convert_document(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation, transformations=transformations) except UnkownConvertError, e: if request.user.is_staff or request.user.is_superuser: messages.error(request, e) @@ -592,13 +570,13 @@ def document_page_view(request, document_page_id): document_page = get_object_or_404(DocumentPage, pk=document_page_id) - zoom = int(request.GET.get('zoom', 100)) - rotation = int(request.GET.get('rotation', 0)) + zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL)) + rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) document_page_form = DocumentPageForm(instance=document_page, zoom=zoom, rotation=rotation) base_title = _(u'details for: %s') % document_page - if zoom != 100: + if zoom != DEFAULT_ZOOM_LEVEL: zoom_text = u'(%d%%)' % zoom else: zoom_text = u'' diff --git a/apps/ocr/views.py b/apps/ocr/views.py index eb4f3a945d..1dbff3e278 100644 --- a/apps/ocr/views.py +++ b/apps/ocr/views.py @@ -6,9 +6,7 @@ from django.shortcuts import render_to_response, get_object_or_404 from django.template import RequestContext from django.contrib import messages from django.views.generic.list_detail import object_list -from django.core.urlresolvers import reverse from django.utils.translation import ugettext_lazy as _ -from django.conf import settings from celery.task.control import inspect from permissions.api import check_permissions diff --git a/apps/sources/managers.py b/apps/sources/managers.py index aee45cf4c1..f45e06e340 100644 --- a/apps/sources/managers.py +++ b/apps/sources/managers.py @@ -6,3 +6,19 @@ class SourceTransformationManager(models.Manager): def get_for_object(self, obj): ct = ContentType.objects.get_for_model(obj) return self.model.objects.filter(content_type=ct).filter(object_id=obj.pk) + + def get_for_object_as_list(self, obj): + warnings = [] + transformations = [] + for transformation in self.get_for_object(obj).values('transformation', 'arguments'): + try: + transformations.append( + { + 'transformation': transformation['transformation'], + 'arguments': eval(transformation['arguments'], {}) + } + ) + except Exception, e: + warnings.append(e) + + return transformations, warnings diff --git a/apps/sources/models.py b/apps/sources/models.py index ffd4211fe6..795f269132 100644 --- a/apps/sources/models.py +++ b/apps/sources/models.py @@ -6,7 +6,8 @@ from django.contrib.contenttypes import generic from documents.models import DocumentType from documents.managers import RecentDocumentManager from metadata.models import MetadataType -from converter.api import backend +from converter.api import get_available_transformations_choices +from converter.literals import DIMENSION_SEPARATOR from sources.managers import SourceTransformationManager @@ -118,7 +119,7 @@ class StagingFolder(InteractiveBaseModel): if self.preview_height: dimensions.append(unicode(self.preview_height)) - return u'x'.join(dimensions) + return DIMENSION_SEPARATOR.join(dimensions) class Meta(InteractiveBaseModel.Meta): verbose_name = _(u'staging folder') @@ -162,8 +163,8 @@ class SourceTransformation(models.Model): object_id = models.PositiveIntegerField() content_object = generic.GenericForeignKey('content_type', 'object_id') order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True) - transformation = models.CharField(choices=backend.get_available_transformations_labels(), max_length=128, verbose_name=_(u'transformation')) - arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: {\'degrees\':90}')) + transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation')) + arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: %s') % u'{\'degrees\':90}') objects = SourceTransformationManager() diff --git a/apps/sources/staging.py b/apps/sources/staging.py index a608f1b30f..4a80435a3a 100644 --- a/apps/sources/staging.py +++ b/apps/sources/staging.py @@ -11,7 +11,6 @@ from django.utils.translation import ugettext_lazy as _ from converter.api import convert, cache_cleanup DEFAULT_STAGING_DIRECTORY = u'/tmp' -#from documents.conf.settings import DEFAULT_TRANSFORMATIONS HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() #TODO: Do benchmarks @@ -106,16 +105,15 @@ class StagingFile(object): def upload(self): """ Return a StagingFile encapsulated in a File class instance to - allow for easier upload a staging files + allow for easier upload of staging files """ try: return File(file(self.filepath, 'rb'), name=self.filename) except Exception, exc: raise Exception(ugettext(u'Unable to upload staging file: %s') % exc) - def delete(self, preview_size): - # tranformation_string, errors = get_transformation_string(DEFAULT_TRANSFORMATIONS) - cache_cleanup(self.filepath, size=preview_size)# , extra_options=tranformation_string) + def delete(self, preview_size, transformations): + cache_cleanup(self.filepath, size=preview_size, transformations=transformations) try: os.unlink(self.filepath) except OSError, exc: @@ -124,24 +122,7 @@ class StagingFile(object): else: raise OSError(ugettext(u'Unable to delete staging file: %s') % exc) - def preview(self, preview_size): + def preview(self, preview_size, transformations): errors = [] - # tranformation_string, errors = get_transformation_string(DEFAULT_TRANSFORMATIONS) - # output_file = convert(self.filepath, size=STAGING_FILES_PREVIEW_SIZE, extra_options=tranformation_string, cleanup_files=False) - output_file = convert(self.filepath, size=preview_size, cleanup_files=False) + output_file = convert(self.filepath, size=preview_size, cleanup_files=False, transformations=transformations) return output_file, errors - - -def get_transformation_string(transformations): - transformation_list = [] - errors = [] - #for transformation in transformations: - # try: - # if transformation['name'] in TRANFORMATION_CHOICES: - # output = TRANFORMATION_CHOICES[transformation['name']] % eval(transformation['arguments']) - # transformation_list.append(output) - # except Exception, e: - # errors.append(e) - - #tranformation_string = ' '.join(transformation_list) - return tranformation_string, errors diff --git a/apps/sources/views.py b/apps/sources/views.py index 6a48dc1e34..05766d78a9 100644 --- a/apps/sources/views.py +++ b/apps/sources/views.py @@ -129,9 +129,13 @@ def upload_interactive(request, source_type=None, source_id=None): expand = True else: expand = False - if (not expand) or (expand and not _handle_zip_file(request, request.FILES['file'], document_type)): + + transformations, errors = SourceTransformation.objects.get_for_object_as_list(web_form) + + if (not expand) or (expand and not _handle_zip_file(request, request.FILES['file'], document_type=document_type, transformations=transformations)): instance = form.save() instance.save() + instance.apply_default_transformations(transformations) if document_type: instance.document_type = document_type _handle_save_document(request, instance, form) @@ -174,16 +178,18 @@ def upload_interactive(request, source_type=None, source_id=None): expand = True else: expand = False - if (not expand) or (expand and not _handle_zip_file(request, staging_file.upload(), document_type)): + transformations, errors = SourceTransformation.objects.get_for_object_as_list(staging_folder) + if (not expand) or (expand and not _handle_zip_file(request, staging_file.upload(), document_type=document_type, transformations=transformations)): document = Document(file=staging_file.upload()) if document_type: document.document_type = document_type document.save() + document.apply_default_transformations(transformations) _handle_save_document(request, document, form) messages.success(request, _(u'Staging file: %s, uploaded successfully.') % staging_file.filename) if staging_folder.delete_after_upload: - staging_file.delete(staging_folder.get_preview_size()) + staging_file.delete(preview_size=staging_folder.get_preview_size(), transformations=transformations) messages.success(request, _(u'Staging file: %s, deleted successfully.') % staging_file.filename) except Exception, e: messages.error(request, e) @@ -260,7 +266,7 @@ def _handle_save_document(request, document, form=None): create_history(HISTORY_DOCUMENT_CREATED, document, {'user': request.user}) -def _handle_zip_file(request, uploaded_file, document_type=None): +def _handle_zip_file(request, uploaded_file, document_type=None, transformations=None): filename = getattr(uploaded_file, 'filename', getattr(uploaded_file, 'name', '')) if filename.lower().endswith('zip'): zfobj = zipfile.ZipFile(uploaded_file) @@ -285,7 +291,12 @@ def staging_file_preview(request, source_type, source_id, staging_file_id): staging_folder = get_object_or_404(StagingFolder, pk=source_id) StagingFile = create_staging_file_class(request, staging_folder.folder_path) try: - output_file, errors = StagingFile.get(staging_file_id).preview(staging_folder.get_preview_size()) + transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder) + + output_file, errors = StagingFile.get(staging_file_id).preview( + preview_size=staging_folder.get_preview_size(), + transformations=transformations + ) if errors and (request.user.is_staff or request.user.is_superuser): for error in errors: messages.warning(request, _(u'Staging file transformation error: %(error)s') % { @@ -313,15 +324,19 @@ def staging_file_delete(request, source_type, source_id, staging_file_id): StagingFile = create_staging_file_class(request, staging_folder.folder_path) staging_file = StagingFile.get(staging_file_id) - next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None))) - previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None))) + next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', '/'))) + previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', '/'))) if request.method == 'POST': try: - staging_file.delete(staging_folder.get_preview_size()) + transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder) + staging_file.delete( + preview_size=staging_folder.get_preview_size(), + transformations=transformations + ) messages.success(request, _(u'Staging file delete successfully.')) except Exception, e: - messages.error(request, e) + messages.error(request, _(u'Staging file delete error; %s.') % e) return HttpResponseRedirect(next) results = get_active_tab_links() @@ -509,11 +524,17 @@ def setup_source_transformation_edit(request, transformation_id): form = SourceTransformationForm(instance=source_transformation, data=request.POST) if form.is_valid(): try: - form.save() - messages.success(request, _(u'Source transformation edited successfully')) - return HttpResponseRedirect(next) - except Exception, e: - messages.error(request, _(u'Error editing source transformation; %s') % e) + # Test the validity of the argument field + eval(form.cleaned_data['arguments'], {}) + except: + messages.error(request, _(u'Source transformation argument error.')) + else: + try: + form.save() + messages.success(request, _(u'Source transformation edited successfully')) + return HttpResponseRedirect(next) + except Exception, e: + messages.error(request, _(u'Error editing source transformation; %s') % e) else: form = SourceTransformationForm(instance=source_transformation) @@ -598,13 +619,19 @@ def setup_source_transformation_create(request, source_type, source_id): form = SourceTransformationForm_create(request.POST) if form.is_valid(): try: - source_tranformation = form.save(commit=False) - source_tranformation.content_object = source - source_tranformation.save() - messages.success(request, _(u'Source transformation created successfully')) - return HttpResponseRedirect(redirect_view) - except Exception, e: - messages.error(request, _(u'Error creating source transformation; %s') % e) + # Test the validity of the argument field + eval(form.cleaned_data['arguments'], {}) + except: + messages.error(request, _(u'Source transformation argument error.')) + else: + try: + source_tranformation = form.save(commit=False) + source_tranformation.content_object = source + source_tranformation.save() + messages.success(request, _(u'Source transformation created successfully')) + return HttpResponseRedirect(redirect_view) + except Exception, e: + messages.error(request, _(u'Error creating source transformation; %s') % e) else: form = SourceTransformationForm_create()