diff --git a/README.md b/README.md index 2e9f754566..662d33ab2a 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Open source, Django based document manager with custom metadata indexing, file s [Website](http://bit.ly/mayan-edms) -Requirements +Basic requirements --- Python: @@ -15,6 +15,21 @@ Python: * django-filetransfers - File upload/download abstraction * celery- asynchronous task queue/job queue based on distributed message passing * django-celery - celery Django integration +* django-mptt - Utilities for implementing a modified pre-order traversal tree in django +* python-magic - A python wrapper for libmagic +* django-taggit - Simple tagging for django +* slate - The simplest way to extract text from PDFs in Python + + +Execute pip install -r requirements/production.txt to install the python/django dependencies automatically. + +Executables: + +* tesseract-ocr - An OCR Engine that was developed at HP Labs between 1985 and 1995... and now at Google. +* unpaper - post-processing scanned and photocopied book pages + +Optional requirements +--- For the GridFS storage backend: @@ -22,13 +37,12 @@ For the GridFS storage backend: * GridFS - a storage specification for large objects in MongoDB * MongoDB - a scalable, open source, document-oriented database -Or execute pip install -r requirements/production.txt to install the dependencies automatically. +Libraries: -Executables: +* libmagic - MIME detection library, if not installed Mayan will fall back to using python's simpler mimetype built in library + +Mayan has the ability to switch between different image conversion backends, at the moment these two are supported: -* libmagic - MIME detection library -* tesseract-ocr - An OCR Engine that was developed at HP Labs between 1985 and 1995... and now at Google. -* unpaper - post-processing scanned and photocopied book pages * ImageMagick - Convert, Edit, Or Compose Bitmap Images * GraphicMagick - Robust collection of tools and libraries to read, write, and manipulate an image. diff --git a/apps/converter/__init__.py b/apps/converter/__init__.py index 331738373a..ffaef00c09 100644 --- a/apps/converter/__init__.py +++ b/apps/converter/__init__.py @@ -1,11 +1,16 @@ from django.utils.translation import ugettext_lazy as _ +from django.core.exceptions import ImproperlyConfigured from navigation.api import register_sidebar_template -TRANFORMATION_CHOICES = { - u'rotate': u'-rotate %(degrees)d' -} +from converter.utils import load_backend +from converter.conf.settings import GRAPHICS_BACKEND formats_list = {'text': _('file formats'), 'view': 'formats_list', 'famfam': 'pictures'} register_sidebar_template(['formats_list'], 'converter_file_formats_help.html') + +try: + backend = load_backend().ConverterClass() +except ImproperlyConfigured: + raise ImproperlyConfigured(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND) diff --git a/apps/converter/api.py b/apps/converter/api.py index d7595de8c3..665a980c27 100644 --- a/apps/converter/api.py +++ b/apps/converter/api.py @@ -1,66 +1,29 @@ import os import subprocess - -from django.utils.importlib import import_module -from django.template.defaultfilters import slugify - -from converter.conf.settings import UNPAPER_PATH -from converter.conf.settings import OCR_OPTIONS -from converter.conf.settings import DEFAULT_OPTIONS -from converter.conf.settings import LOW_QUALITY_OPTIONS -from converter.conf.settings import HIGH_QUALITY_OPTIONS -from converter.conf.settings import PRINT_QUALITY_OPTIONS -from converter.conf.settings import GRAPHICS_BACKEND -from converter.conf.settings import UNOCONV_PATH - -from converter.exceptions import UnpaperError, OfficeConversionError +import hashlib from common import TEMPORARY_DIRECTORY from documents.utils import document_save_to_temp_dir -DEFAULT_ZOOM_LEVEL = 100 -DEFAULT_ROTATION = 0 -DEFAULT_PAGE_INDEX_NUMBER = 0 -DEFAULT_FILE_FORMAT = u'jpg' -DEFAULT_OCR_FILE_FORMAT = u'tif' +from converter.conf.settings import UNOCONV_PATH +from converter.exceptions import UnpaperError, OfficeConversionError +from converter.literals import DEFAULT_PAGE_NUMBER, \ + DEFAULT_OCR_FILE_FORMAT, QUALITY_DEFAULT, DEFAULT_ZOOM_LEVEL, \ + DEFAULT_ROTATION, DEFAULT_FILE_FORMAT, QUALITY_HIGH -QUALITY_DEFAULT = u'quality_default' -QUALITY_LOW = u'quality_low' -QUALITY_HIGH = u'quality_high' -QUALITY_PRINT = u'quality_print' - -QUALITY_SETTINGS = { - QUALITY_DEFAULT: DEFAULT_OPTIONS, - QUALITY_LOW: LOW_QUALITY_OPTIONS, - QUALITY_HIGH: HIGH_QUALITY_OPTIONS, - QUALITY_PRINT: PRINT_QUALITY_OPTIONS -} +from converter import backend +from converter.literals import TRANSFORMATION_CHOICES +from converter.literals import TRANSFORMATION_RESIZE, \ + TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \ + TRANSFORMATION_ZOOM +from converter.literals import DIMENSION_SEPARATOR +HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() + CONVERTER_OFFICE_FILE_EXTENSIONS = [ u'ods', u'docx', u'doc' ] - -def _lazy_load(fn): - _cached = [] - - def _decorated(): - if not _cached: - _cached.append(fn()) - return _cached[0] - return _decorated - - -@_lazy_load -def _get_backend(): - return import_module(GRAPHICS_BACKEND) - -try: - backend = _get_backend() -except ImportError: - raise ImportError(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND) - - def cleanup(filename): """ Tries to remove the given filename. Ignores non-existent files @@ -71,21 +34,6 @@ def cleanup(filename): pass -def execute_unpaper(input_filepath, output_filepath): - """ - Executes the program unpaper using subprocess's Popen - """ - command = [] - command.append(UNPAPER_PATH) - command.append(u'--overwrite') - command.append(input_filepath) - command.append(output_filepath) - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - raise UnpaperError(proc.stderr.readline()) - - def execute_unoconv(input_filepath, arguments=''): """ Executes the program unoconv using subprocess's Popen @@ -109,19 +57,11 @@ def cache_cleanup(input_filepath, *args, **kwargs): def create_image_cache_filename(input_filepath, *args, **kwargs): if input_filepath: - temp_filename, separator = os.path.splitext(os.path.basename(input_filepath)) - temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename) - - final_filepath = [] - [final_filepath.append(str(arg)) for arg in args] - final_filepath.extend([u'%s_%s' % (key, value) for key, value in kwargs.items()]) - - temp_path += slugify(u'_'.join(final_filepath)) - - return temp_path + hash_value = HASH_FUNCTION(u''.join([input_filepath, unicode(args), unicode(kwargs)])) + return os.path.join(TEMPORARY_DIRECTORY, hash_value) else: return None - + def convert_office_document(input_filepath): if os.path.exists(UNOCONV_PATH): @@ -138,15 +78,14 @@ def convert_document(document, *args, **kwargs): return convert(document_save_to_temp_dir(document, document.checksum), *args, **kwargs) -def convert(input_filepath, *args, **kwargs): +def convert(input_filepath, cleanup_files=True, *args, **kwargs): size = kwargs.get('size') file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT) - extra_options = kwargs.get('extra_options', u'') zoom = kwargs.get('zoom', DEFAULT_ZOOM_LEVEL) rotation = kwargs.get('rotation', DEFAULT_ROTATION) - page = kwargs.get('page', DEFAULT_PAGE_INDEX_NUMBER) - cleanup_files = kwargs.get('cleanup_files', True) + page = kwargs.get('page', DEFAULT_PAGE_NUMBER) quality = kwargs.get('quality', QUALITY_DEFAULT) + transformations = kwargs.get('transformations', []) unoconv_output = None @@ -160,20 +99,32 @@ def convert(input_filepath, *args, **kwargs): if result: unoconv_output = result input_filepath = result - extra_options = u'' - input_arg = u'%s[%s]' % (input_filepath, page) - extra_options += u' -resize %s' % size + transformations.append( + { + 'transformation': TRANSFORMATION_RESIZE, + 'arguments': dict(zip([u'width', u'height'], size.split(DIMENSION_SEPARATOR))) + } + ) + if zoom != 100: - extra_options += u' -resize %d%% ' % zoom + transformations.append( + { + 'transformation': TRANSFORMATION_ZOOM, + 'arguments': {'percent': zoom} + } + ) if rotation != 0 and rotation != 360: - extra_options += u' -rotate %d ' % rotation + transformations.append( + { + 'transformation': TRANSFORMATION_ROTATE, + 'arguments': {'degrees': rotation} + } + ) - if format == u'jpg': - extra_options += u' -quality 85' try: - backend.execute_convert(input_filepath=input_arg, arguments=extra_options, output_filepath=u'%s:%s' % (file_format, output_filepath), quality=quality) + backend.convert_file(input_filepath=input_filepath, output_filepath=output_filepath, quality=quality, transformations=transformations, page=page, file_format=file_format) finally: if cleanup_files: cleanup(input_filepath) @@ -184,51 +135,22 @@ def convert(input_filepath, *args, **kwargs): def get_page_count(input_filepath): - try: - return len(backend.execute_identify(unicode(input_filepath)).splitlines()) - except: - #TODO: send to other page number identifying program - return 1 + return backend.get_page_count(input_filepath) def get_document_dimensions(document, *args, **kwargs): document_filepath = create_image_cache_filename(document.checksum, *args, **kwargs) if os.path.exists(document_filepath): options = [u'-format', u'%w %h'] - return [int(dimension) for dimension in backend.execute_identify(unicode(document_filepath), options).split()] + return [int(dimension) for dimension in backend.identify_file(unicode(document_filepath), options).split()] else: return [0, 0] -def convert_document_for_ocr(document, page=DEFAULT_PAGE_INDEX_NUMBER, file_format=DEFAULT_OCR_FILE_FORMAT): - #Extract document file - input_filepath = document_save_to_temp_dir(document, document.uuid) - - #Convert for OCR - temp_filename, separator = os.path.splitext(os.path.basename(input_filepath)) - temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename) - transformation_output_file = u'%s_trans%s%s%s' % (temp_path, page, os.extsep, file_format) - unpaper_input_file = u'%s_unpaper_in%s%spnm' % (temp_path, page, os.extsep) - unpaper_output_file = u'%s_unpaper_out%s%spnm' % (temp_path, page, os.extsep) - convert_output_file = u'%s_ocr%s%s%s' % (temp_path, page, os.extsep, file_format) - - input_arg = u'%s[%s]' % (input_filepath, page) - - try: - document_page = document.documentpage_set.get(page_number=page + 1) - transformation_string, warnings = document_page.get_transformation_string() - - #Apply default transformations - backend.execute_convert(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file) - #Do OCR operations - backend.execute_convert(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file) - # Process by unpaper - execute_unpaper(input_filepath=unpaper_input_file, output_filepath=unpaper_output_file) - # Convert to tif - backend.execute_convert(input_filepath=unpaper_output_file, output_filepath=convert_output_file) - finally: - cleanup(transformation_output_file) - cleanup(unpaper_input_file) - cleanup(unpaper_output_file) - - return convert_output_file +def get_available_transformations_choices(): + result = [] + for transformation in backend.get_available_transformations(): + transformation_template = u'%s %s' % (TRANSFORMATION_CHOICES[transformation]['label'], u','.join(['<%s>' % argument['name'] if argument['required'] else '[%s]' % argument['name'] for argument in TRANSFORMATION_CHOICES[transformation]['arguments']])) + result.append([transformation, transformation_template]) + + return result diff --git a/apps/converter/backends/__init__.py b/apps/converter/backends/__init__.py index e69de29bb2..a98881632b 100644 --- a/apps/converter/backends/__init__.py +++ b/apps/converter/backends/__init__.py @@ -0,0 +1,18 @@ +class ConverterBase(object): + """ + Base class that all backend classes must inherit + """ + def convert_file(self, input_filepath, *args, **kwargs): + raise NotImplementedError("Your %s class has not defined a convert_file() method, which is required." % self.__class__.__name__) + + def convert_document(self, document, *args, **kwargs): + raise NotImplementedError("Your %s class has not defined a convert_document() method, which is required." % self.__class__.__name__) + + def get_format_list(self): + raise NotImplementedError("Your %s class has not defined a get_format_list() method, which is required." % self.__class__.__name__) + + def get_available_transformations(self): + raise NotImplementedError("Your %s class has not defined a get_available_transformations() method, which is required." % self.__class__.__name__) + + def get_page_count(self): + raise NotImplementedError("Your %s class has not defined a get_page_count() method, which is required." % self.__class__.__name__) diff --git a/apps/converter/backends/graphicsmagick.py b/apps/converter/backends/graphicsmagick.py deleted file mode 100644 index 360a24a58b..0000000000 --- a/apps/converter/backends/graphicsmagick.py +++ /dev/null @@ -1,71 +0,0 @@ -import subprocess -import re - -from converter.conf.settings import GM_PATH -from converter.conf.settings import GM_SETTINGS -from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS -from converter.exceptions import ConvertError, UnknownFormat, IdentifyError - -CONVERTER_ERROR_STRING_NO_DECODER = u'No decode delegate for this image format' -CONVERTER_ERROR_STARTS_WITH = u'starts with' - - -def execute_identify(input_filepath, arguments=None): - command = [] - command.append(unicode(GM_PATH)) - command.append(u'identify') - if arguments: - command.extend(arguments) - command.append(unicode(input_filepath)) - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - raise IdentifyError(proc.stderr.readline()) - return proc.stdout.read() - - -def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None): - command = [] - command.append(unicode(GM_PATH)) - command.append(u'convert') - command.extend(unicode(QUALITY_SETTINGS[quality]).split()) - command.extend(unicode(GM_SETTINGS).split()) - command.append(unicode(input_filepath)) - if arguments: - command.extend(unicode(arguments).split()) - command.append(unicode(output_filepath)) - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - #Got an error from convert program - error_line = proc.stderr.readline() - if (CONVERTER_ERROR_STRING_NO_DECODER in error_line) or (CONVERTER_ERROR_STARTS_WITH in error_line): - #Try to determine from error message which class of error is it - raise UnknownFormat - else: - raise ConvertError(error_line) - - -def get_format_list(): - """ - Call GraphicsMagick to parse all of it's supported file formats, and - return a list of the names and descriptions - """ - format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*') - formats = [] - command = [] - command.append(unicode(GM_PATH)) - command.append(u'convert') - command.append(u'-list') - command.append(u'formats') - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - raise ConvertError(proc.stderr.readline()) - - for line in proc.stdout.readlines(): - fields = format_regex.findall(line) - if fields: - formats.append((fields[0][0], fields[0][3])) - - return formats diff --git a/apps/converter/backends/base.py b/apps/converter/backends/graphicsmagick/__init__.py similarity index 100% rename from apps/converter/backends/base.py rename to apps/converter/backends/graphicsmagick/__init__.py diff --git a/apps/converter/backends/graphicsmagick/base.py b/apps/converter/backends/graphicsmagick/base.py new file mode 100644 index 0000000000..54ebbaaa95 --- /dev/null +++ b/apps/converter/backends/graphicsmagick/base.py @@ -0,0 +1,119 @@ +import subprocess +import re + +from converter.conf.settings import GM_PATH +from converter.conf.settings import GM_SETTINGS +from converter.literals import QUALITY_DEFAULT, QUALITY_SETTINGS +from converter.exceptions import ConvertError, UnknownFormat, \ + IdentifyError +from converter.backends import ConverterBase +from converter.literals import TRANSFORMATION_RESIZE, \ + TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \ + TRANSFORMATION_ZOOM +from converter.literals import DIMENSION_SEPARATOR, DEFAULT_PAGE_NUMBER, \ + DEFAULT_FILE_FORMAT + +CONVERTER_ERROR_STRING_NO_DECODER = u'No decode delegate for this image format' +CONVERTER_ERROR_STARTS_WITH = u'starts with' + + +class ConverterClass(ConverterBase): + def identify_file(self, input_filepath, arguments=None): + command = [] + command.append(unicode(GM_PATH)) + command.append(u'identify') + if arguments: + command.extend(arguments) + command.append(unicode(input_filepath)) + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + raise IdentifyError(proc.stderr.readline()) + return proc.stdout.read() + + def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT): + arguments = [] + if transformations: + for transformation in transformations: + if transformation['transformation'] == TRANSFORMATION_RESIZE: + dimensions = [] + dimensions.append(unicode(transformation['arguments']['width'])) + if 'height' in transformation['arguments']: + dimensions.append(unicode(transformation['arguments']['height'])) + arguments.append(u'-resize') + arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions)) + + elif transformation['transformation'] == TRANSFORMATION_ZOOM: + arguments.append(u'-resize') + arguments.append(u'%d%%' % transformation['arguments']['percent']) + + elif transformation['transformation'] == TRANSFORMATION_ROTATE: + arguments.append(u'-rotate') + arguments.append(u'%s' % transformation['arguments']['degrees']) + + if format == u'jpeg': + arguments.append(u'-quality') + arguments.append(u'85') + + # Graphicsmagick page number is 0 base + input_arg = u'%s[%d]' % (input_filepath, page - 1) + + # Specify the file format next to the output filename + output_filepath = u'%s:%s' % (file_format, output_filepath) + + command = [] + command.append(unicode(GM_PATH)) + command.append(u'convert') + command.extend(unicode(QUALITY_SETTINGS[quality]).split()) + command.extend(unicode(GM_SETTINGS).split()) + command.append(unicode(input_arg)) + if arguments: + command.extend(arguments) + command.append(unicode(output_filepath)) + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + #Got an error from convert program + error_line = proc.stderr.readline() + if (CONVERTER_ERROR_STRING_NO_DECODER in error_line) or (CONVERTER_ERROR_STARTS_WITH in error_line): + #Try to determine from error message which class of error is it + raise UnknownFormat + else: + raise ConvertError(error_line) + + def get_format_list(self): + """ + Call GraphicsMagick to parse all of it's supported file formats, and + return a list of the names and descriptions + """ + format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*') + formats = [] + command = [] + command.append(unicode(GM_PATH)) + command.append(u'convert') + command.append(u'-list') + command.append(u'formats') + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + raise ConvertError(proc.stderr.readline()) + + for line in proc.stdout.readlines(): + fields = format_regex.findall(line) + if fields: + formats.append((fields[0][0], fields[0][3])) + + return formats + + def get_available_transformations(self): + return [ + TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \ + TRANSFORMATION_ZOOM + ] + + def get_page_count(self, input_filepath): + try: + return len(self.identify_file(unicode(input_filepath)).splitlines()) + except: + #TODO: send to other page number identifying program + return 1 diff --git a/apps/converter/backends/imagemagick.py b/apps/converter/backends/imagemagick.py deleted file mode 100644 index 4542ebdeba..0000000000 --- a/apps/converter/backends/imagemagick.py +++ /dev/null @@ -1,68 +0,0 @@ -import subprocess -import re - -from converter.conf.settings import IM_IDENTIFY_PATH -from converter.conf.settings import IM_CONVERT_PATH -from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS -from converter.exceptions import ConvertError, UnknownFormat, \ - IdentifyError - -CONVERTER_ERROR_STRING_NO_DECODER = u'no decode delegate for this image format' - - -def execute_identify(input_filepath, arguments=None): - command = [] - command.append(unicode(IM_IDENTIFY_PATH)) - if arguments: - command.extend(arguments) - command.append(unicode(input_filepath)) - - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - raise IdentifyError(proc.stderr.readline()) - return proc.stdout.read() - - -def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None): - command = [] - command.append(unicode(IM_CONVERT_PATH)) - command.extend(unicode(QUALITY_SETTINGS[quality]).split()) - command.append(unicode(input_filepath)) - if arguments: - command.extend(unicode(arguments).split()) - command.append(unicode(output_filepath)) - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - #Got an error from convert program - error_line = proc.stderr.readline() - if CONVERTER_ERROR_STRING_NO_DECODER in error_line: - #Try to determine from error message which class of error is it - raise UnknownFormat - else: - raise ConvertError(error_line) - - -def get_format_list(): - """ - Call ImageMagick to parse all of it's supported file formats, and - return a list of the names and descriptions - """ - format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*') - formats = [] - command = [] - command.append(unicode(IM_CONVERT_PATH)) - command.append(u'-list') - command.append(u'format') - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - raise ConvertError(proc.stderr.readline()) - - for line in proc.stdout.readlines(): - fields = format_regex.findall(line) - if fields: - formats.append((fields[0][0], fields[0][3])) - - return formats diff --git a/apps/converter/backends/imagemagick/__init__.py b/apps/converter/backends/imagemagick/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/converter/backends/imagemagick/base.py b/apps/converter/backends/imagemagick/base.py new file mode 100644 index 0000000000..4f924316ed --- /dev/null +++ b/apps/converter/backends/imagemagick/base.py @@ -0,0 +1,118 @@ +import subprocess +import re + +from converter.conf.settings import IM_IDENTIFY_PATH +from converter.conf.settings import IM_CONVERT_PATH +from converter.literals import QUALITY_DEFAULT, QUALITY_SETTINGS +from converter.exceptions import ConvertError, UnknownFormat, \ + IdentifyError +from converter.backends import ConverterBase +from converter.literals import TRANSFORMATION_RESIZE, \ + TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \ + TRANSFORMATION_ZOOM +from converter.literals import DIMENSION_SEPARATOR, DEFAULT_PAGE_NUMBER, \ + DEFAULT_FILE_FORMAT + +CONVERTER_ERROR_STRING_NO_DECODER = u'no decode delegate for this image format' + + +class ConverterClass(ConverterBase): + def identify_file(self, input_filepath, arguments=None): + command = [] + command.append(unicode(IM_IDENTIFY_PATH)) + if arguments: + command.extend(arguments) + command.append(unicode(input_filepath)) + + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + raise IdentifyError(proc.stderr.readline()) + return proc.stdout.read() + + def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT): + arguments = [] + if transformations: + for transformation in transformations: + if transformation['transformation'] == TRANSFORMATION_RESIZE: + dimensions = [] + dimensions.append(unicode(transformation['arguments']['width'])) + if 'height' in transformation['arguments']: + dimensions.append(unicode(transformation['arguments']['height'])) + arguments.append(u'-resize') + arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions)) + + elif transformation['transformation'] == TRANSFORMATION_ZOOM: + arguments.append(u'-resize') + arguments.append(u'%d%%' % transformation['arguments']['percent']) + + elif transformation['transformation'] == TRANSFORMATION_ROTATE: + arguments.append(u'-rotate') + arguments.append(u'%s' % transformation['arguments']['degrees']) + + if format == u'jpeg': + arguments.append(u'-quality') + arguments.append(u'85') + + # Imagemagick page number is 0 base + input_arg = u'%s[%d]' % (input_filepath, page - 1) + + # Specify the file format next to the output filename + output_filepath = u'%s:%s' % (file_format, output_filepath) + + command = [] + command.append(unicode(IM_CONVERT_PATH)) + command.extend(unicode(QUALITY_SETTINGS[quality]).split()) + command.append(unicode(input_arg)) + if arguments: + command.extend(arguments) + command.append(unicode(output_filepath)) + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + #Got an error from convert program + error_line = proc.stderr.readline() + if CONVERTER_ERROR_STRING_NO_DECODER in error_line: + #Try to determine from error message which class of error is it + raise UnknownFormat + else: + raise ConvertError(error_line) + + + def get_format_list(self): + """ + Call ImageMagick to parse all of it's supported file formats, and + return a list of the names and descriptions + """ + format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*') + formats = [] + command = [] + command.append(unicode(IM_CONVERT_PATH)) + command.append(u'-list') + command.append(u'format') + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + raise ConvertError(proc.stderr.readline()) + + for line in proc.stdout.readlines(): + fields = format_regex.findall(line) + if fields: + formats.append((fields[0][0], fields[0][3])) + + return formats + + + def get_available_transformations(self): + return [ + TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \ + TRANSFORMATION_ZOOM + ] + + + def get_page_count(self, input_filepath): + try: + return len(self.identify_file(unicode(input_filepath)).splitlines()) + except: + #TODO: send to other page number identifying program + return 1 diff --git a/apps/converter/backends/python/__init__.py b/apps/converter/backends/python/__init__.py new file mode 100644 index 0000000000..dfeca950f1 --- /dev/null +++ b/apps/converter/backends/python/__init__.py @@ -0,0 +1,3 @@ +from PIL import Image + +Image.init() diff --git a/apps/converter/backends/python/base.py b/apps/converter/backends/python/base.py new file mode 100644 index 0000000000..25448346ff --- /dev/null +++ b/apps/converter/backends/python/base.py @@ -0,0 +1,93 @@ +import slate +from PIL import Image + +from django.utils.translation import ugettext_lazy as _ + +from converter.literals import QUALITY_DEFAULT, QUALITY_SETTINGS +from converter.exceptions import ConvertError, UnknownFormat, IdentifyError +from converter.backends import ConverterBase +from converter.literals import TRANSFORMATION_RESIZE, \ + TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM +from converter.literals import QUALITY_DEFAULT, DEFAULT_PAGE_NUMBER, \ + DEFAULT_FILE_FORMAT +from converter.utils import get_mimetype + + +class ConverterClass(ConverterBase): + def get_page_count(self, input_filepath): + page_count = 1 + + mimetype, encoding = get_mimetype(input_filepath) + if mimetype == 'application/pdf': + # If file is a PDF open it with slate to determine the page + # count + with open(input_filepath) as fd: + pages = slate.PDF(fd) + return len(pages) + + try: + im = Image.open(input_filepath) + except IOError: #cannot identify image file + # Return a page count of 1, to atleast allow the document + # to be created + return 1 + + try: + while 1: + im.seek(im.tell()+1) + page_count += 1 + # do something to im + except EOFError: + pass # end of sequence + + return page_count + + def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT): + try: + im = Image.open(input_filepath) + except Exception: # Python Imaging Library doesn't recognize it as an image + raise UnknownFormat + + current_page = 0 + try: + while current_page == page - 1: + im.seek(im.tell() + 1) + current_page += 1 + # do something to im + except EOFError: + pass # end of sequence + + if transformations: + for transformation in transformations: + aspect = 1.0 * im.size[1] / im.size[0] + if transformation['transformation'] == TRANSFORMATION_RESIZE: + width = int(transformation['arguments']['width']) + height = int(transformation['arguments'].get('height', 1.0 * width * aspect)) + im = im.resize((width, height), Image.ANTIALIAS) + elif transformation['transformation'] == TRANSFORMATION_ZOOM: + decimal_value = float(transformation['arguments']['percent']) / 100 + im = im.transform((im.size[0] * decimal_value, im.size[1] * decimal_value), Image.EXTENT, (0, 0, im.size[0], im.size[1])) + elif transformation['transformation'] == TRANSFORMATION_ROTATE: + # PIL counter degress counter-clockwise, reverse them + im = im.rotate(360 - transformation['arguments']['degrees']) + + if im.mode not in ('L', 'RGB'): + im = im.convert('RGB') + im.save(output_filepath, format=file_format) + + def get_format_list(self): + """ + Introspect PIL's internal registry to obtain a list of the + supported file types + """ + formats = [] + for format_name in Image.ID: + formats.append((format_name, u'')) + + return formats + + def get_available_transformations(self): + return [ + TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \ + TRANSFORMATION_ZOOM + ] diff --git a/apps/converter/conf/settings.py b/apps/converter/conf/settings.py index f73c0f2b64..95aee33b92 100644 --- a/apps/converter/conf/settings.py +++ b/apps/converter/conf/settings.py @@ -9,12 +9,11 @@ register_settings( settings=[ {'name': u'IM_CONVERT_PATH', 'global_name': u'CONVERTER_IM_CONVERT_PATH', 'default': u'/usr/bin/convert', 'description': _(u'File path to imagemagick\'s convert program.'), 'exists': True}, {'name': u'IM_IDENTIFY_PATH', 'global_name': u'CONVERTER_IM_IDENTIFY_PATH', 'default': u'/usr/bin/identify', 'description': _(u'File path to imagemagick\'s identify program.'), 'exists': True}, - {'name': u'UNPAPER_PATH', 'global_name': u'CONVERTER_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True}, {'name': u'GM_PATH', 'global_name': u'CONVERTER_GM_PATH', 'default': u'/usr/bin/gm', 'description': _(u'File path to graphicsmagick\'s program.'), 'exists': True}, {'name': u'GM_SETTINGS', 'global_name': u'CONVERTER_GM_SETTINGS', 'default': u''}, - {'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.imagemagick', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick and converter.backends.graphicsmagick.')}, + {'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.python', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick, converter.backends.graphicsmagick and converter.backends.python.')}, {'name': u'UNOCONV_PATH', 'global_name': u'CONVERTER_UNOCONV_PATH', 'default': u'/usr/bin/unoconv', 'exists': True}, - {'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'}, + #{'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'}, {'name': u'DEFAULT_OPTIONS', 'global_name': u'CONVERTER_DEFAULT_OPTIONS', 'default': u''}, {'name': u'LOW_QUALITY_OPTIONS', 'global_name': u'CONVERTER_LOW_QUALITY_OPTIONS', 'default': u''}, {'name': u'HIGH_QUALITY_OPTIONS', 'global_name': u'CONVERTER_HIGH_QUALITY_OPTIONS', 'default': u'-density 400'}, diff --git a/apps/converter/literals.py b/apps/converter/literals.py new file mode 100644 index 0000000000..cb5af4c06c --- /dev/null +++ b/apps/converter/literals.py @@ -0,0 +1,64 @@ +from django.utils.translation import ugettext_lazy as _ + +from converter.conf.settings import DEFAULT_OPTIONS +from converter.conf.settings import LOW_QUALITY_OPTIONS +from converter.conf.settings import HIGH_QUALITY_OPTIONS +from converter.conf.settings import PRINT_QUALITY_OPTIONS + +DEFAULT_ZOOM_LEVEL = 100 +DEFAULT_ROTATION = 0 +DEFAULT_PAGE_NUMBER = 1 +DEFAULT_FILE_FORMAT = u'jpeg' +DEFAULT_OCR_FILE_FORMAT = u'tif' + +QUALITY_DEFAULT = u'quality_default' +QUALITY_LOW = u'quality_low' +QUALITY_HIGH = u'quality_high' +QUALITY_PRINT = u'quality_print' + +QUALITY_SETTINGS = { + QUALITY_DEFAULT: DEFAULT_OPTIONS, + QUALITY_LOW: LOW_QUALITY_OPTIONS, + QUALITY_HIGH: HIGH_QUALITY_OPTIONS, + QUALITY_PRINT: PRINT_QUALITY_OPTIONS +} + +DIMENSION_SEPARATOR = u'x' + +TRANSFORMATION_RESIZE = u'resize' +TRANSFORMATION_ROTATE = u'rotate' +TRANSFORMATION_DENSITY = u'density' +TRANSFORMATION_ZOOM = u'zoom' + +TRANSFORMATION_CHOICES = { + TRANSFORMATION_RESIZE: { + 'label': _(u'Resize'), + 'description': _(u'Resize.'), + 'arguments': [ + {'name': 'width', 'label': _(u'width'), 'required': True}, + {'name': 'height', 'label': _(u'height'), 'required': False}, + ] + }, + TRANSFORMATION_ROTATE: { + 'label': _(u'Rotate'), + 'description': _(u'Rotate by n degress.'), + 'arguments': [ + {'name': 'degrees', 'label': _(u'degrees'), 'required': True} + ] + }, + TRANSFORMATION_DENSITY: { + 'label': _(u'Density'), + 'description': _(u'Change the resolution (ie: DPI) without resizing.'), + 'arguments': [ + {'name': 'width', 'label': _(u'width'), 'required': True}, + {'name': 'height', 'label': _(u'height'), 'required': False}, + ] + }, + TRANSFORMATION_ZOOM: { + 'label': _(u'Zoom'), + 'description': _(u'Zoom by n percent.'), + 'arguments': [ + {'name': 'percent', 'label': _(u'percent'), 'required': True} + ] + }, +} diff --git a/apps/converter/utils.py b/apps/converter/utils.py index c5a4e7e55b..4653b6dc9d 100644 --- a/apps/converter/utils.py +++ b/apps/converter/utils.py @@ -1,6 +1,18 @@ +import os + +from django.core.exceptions import ImproperlyConfigured +from django.utils.importlib import import_module + +try: + from python_magic import magic + USE_PYTHON_MAGIC = True +except: + import mimetypes + mimetypes.init() + USE_PYTHON_MAGIC = False + + #http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python - - def copyfile(source, dest, buffer_size=1024 * 1024): """ Copy a file from source to dest. source and dest @@ -21,3 +33,79 @@ def copyfile(source, dest, buffer_size=1024 * 1024): source.close() dest.close() + + +def _lazy_load(fn): + _cached = [] + + def _decorated(): + if not _cached: + _cached.append(fn()) + return _cached[0] + return _decorated + + +@_lazy_load +def load_backend(): + from converter.conf.settings import GRAPHICS_BACKEND as backend_name + + try: + module = import_module('.base', 'converter.backends.%s' % backend_name) + import warnings + warnings.warn( + "Short names for CONVERTER_BACKEND are deprecated; prepend with 'converter.backends.'", + PendingDeprecationWarning + ) + return module + except ImportError, e: + # Look for a fully qualified converter backend name + try: + return import_module('.base', backend_name) + except ImportError, e_user: + # The converter backend wasn't found. Display a helpful error message + # listing all possible (built-in) converter backends. + backend_dir = os.path.join(os.path.dirname(__file__), 'backends') + try: + available_backends = [f for f in os.listdir(backend_dir) + if os.path.isdir(os.path.join(backend_dir, f)) + and not f.startswith('.')] + except EnvironmentError: + available_backends = [] + available_backends.sort() + if backend_name not in available_backends: + error_msg = ("%r isn't an available converter backend. \n" + + "Try using converter.backends.XXX, where XXX is one of:\n %s\n" + + "Error was: %s") % \ + (backend_name, ", ".join(map(repr, available_backends)), e_user) + raise ImproperlyConfigured(error_msg) + else: + raise # If there's some other error, this must be an error in Mayan itself. + + +def get_mimetype(filepath): + """ + Determine a file's mimetype by calling the system's libmagic + library via python-magic or fallback to use python's mimetypes + library + """ + file_mimetype = u'' + file_mime_encoding = u'' + + if USE_PYTHON_MAGIC: + if os.path.exists(filepath): + try: + source = open(filepath, 'r') + mime = magic.Magic(mime=True) + file_mimetype = mime.from_buffer(source.read()) + source.seek(0) + mime_encoding = magic.Magic(mime_encoding=True) + file_mime_encoding = mime_encoding.from_buffer(source.read()) + finally: + if source: + source.close() + else: + path, filename = os.path.split(filepath) + file_mimetype, file_mime_encoding = mimetypes.guess_type(filename) + + return file_mimetype, file_mime_encoding + diff --git a/apps/converter/views.py b/apps/converter/views.py index ad95783539..ef7173f908 100644 --- a/apps/converter/views.py +++ b/apps/converter/views.py @@ -1,38 +1,18 @@ from django.utils.translation import ugettext_lazy as _ from django.shortcuts import render_to_response from django.template import RequestContext -from django.utils.importlib import import_module + +from converter import backend from converter.conf.settings import GRAPHICS_BACKEND - -def _lazy_load(fn): - _cached = [] - - def _decorated(): - if not _cached: - _cached.append(fn()) - return _cached[0] - return _decorated - - -@_lazy_load -def _get_backend(): - return import_module(GRAPHICS_BACKEND) - -try: - backend = _get_backend() -except ImportError: - raise ImportError(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND) - - def formats_list(request): #check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW]) context = { 'title': _(u'suported file formats'), 'hide_object': True, - 'object_list': backend.get_format_list(), + 'object_list': sorted(backend.get_format_list()), 'extra_columns': [ { 'name': _(u'name'), diff --git a/apps/documents/conf/settings.py b/apps/documents/conf/settings.py index 7a253f52de..4c7749624c 100644 --- a/apps/documents/conf/settings.py +++ b/apps/documents/conf/settings.py @@ -18,10 +18,6 @@ def default_uuid(): """unicode(uuid.uuid4())""" return unicode(uuid.uuid4()) -available_transformations = { - 'rotate': {'label': _(u'Rotate [degrees]'), 'arguments': [{'name': 'degrees'}]} -} - register_settings( namespace=u'documents', module=u'documents.conf.settings', @@ -31,8 +27,6 @@ register_settings( {'name': u'UUID_FUNCTION', 'global_name': u'DOCUMENTS_UUID_FUNCTION', 'default': default_uuid}, # Storage {'name': u'STORAGE_BACKEND', 'global_name': u'DOCUMENTS_STORAGE_BACKEND', 'default': FileBasedStorage}, - # Transformations - {'name': u'AVAILABLE_TRANSFORMATIONS', 'global_name': u'DOCUMENTS_AVAILABLE_TRANSFORMATIONS', 'default': available_transformations}, # Usage {'name': u'PREVIEW_SIZE', 'global_name': u'DOCUMENTS_PREVIEW_SIZE', 'default': u'640x480'}, {'name': u'PRINT_SIZE', 'global_name': u'DOCUMENTS_PRINT_SIZE', 'default': u'1400'}, diff --git a/apps/documents/managers.py b/apps/documents/managers.py index 3b007a936e..ef87c929fe 100644 --- a/apps/documents/managers.py +++ b/apps/documents/managers.py @@ -13,3 +13,24 @@ class RecentDocumentManager(models.Manager): to_delete = self.model.objects.filter(user=user)[RECENT_COUNT:] for recent_to_delete in to_delete: recent_to_delete.delete() + + +class DocumentPageTransformationManager(models.Manager): + def get_for_document_page(self, document_page): + return self.model.objects.filter(document_page=document_page) + + def get_for_document_page_as_list(self, document_page): + warnings = [] + transformations = [] + for transformation in self.get_for_document_page(document_page).values('transformation', 'arguments'): + try: + transformations.append( + { + 'transformation': transformation['transformation'], + 'arguments': eval(transformation['arguments'], {}) + } + ) + except Exception, e: + warnings.append(e) + + return transformations, warnings diff --git a/apps/documents/models.py b/apps/documents/models.py index 96d988bfdb..b3eadb08e7 100644 --- a/apps/documents/models.py +++ b/apps/documents/models.py @@ -12,15 +12,13 @@ from python_magic import magic from taggit.managers import TaggableManager from dynamic_search.api import register from converter.api import get_page_count -from converter import TRANFORMATION_CHOICES +from converter.api import get_available_transformations_choices from documents.conf.settings import CHECKSUM_FUNCTION from documents.conf.settings import UUID_FUNCTION from documents.conf.settings import STORAGE_BACKEND -from documents.conf.settings import AVAILABLE_TRANSFORMATIONS -from documents.managers import RecentDocumentManager - -available_transformations = ([(name, data['label']) for name, data in AVAILABLE_TRANSFORMATIONS.items()]) +from documents.managers import RecentDocumentManager, \ + DocumentPageTransformationManager def get_filename_from_uuid(instance, filename): @@ -92,7 +90,7 @@ class Document(models.Model): mimetype, page count and transformation when originally created """ new_document = not self.pk - + transformations = kwargs.pop('transformations', None) super(Document, self).save(*args, **kwargs) if new_document: @@ -101,7 +99,8 @@ class Document(models.Model): self.update_mimetype(save=False) self.save() self.update_page_count(save=False) - self.apply_default_transformations() + if transformations: + self.apply_default_transformations(transformations) @models.permalink def get_absolute_url(self): @@ -202,21 +201,21 @@ class Document(models.Model): exists in storage """ return self.file.storage.exists(self.file.path) + - def apply_default_transformations(self): + def apply_default_transformations(self, transformations): #Only apply default transformations on new documents - if DEFAULT_TRANSFORMATIONS and reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0: - for transformation in DEFAULT_TRANSFORMATIONS: - if 'name' in transformation: - for document_page in self.documentpage_set.all(): - page_transformation = DocumentPageTransformation( - document_page=document_page, - order=0, - transformation=transformation['name']) - if 'arguments' in transformation: - page_transformation.arguments = transformation['arguments'] + if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0: + for transformation in transformations: + for document_page in self.documentpage_set.all(): + page_transformation = DocumentPageTransformation( + document_page=document_page, + order=0, + transformation=transformation.get('transformation'), + arguments=transformation.get('arguments') + ) - page_transformation.save() + page_transformation.save() class DocumentTypeFilename(models.Model): @@ -258,26 +257,13 @@ class DocumentPage(models.Model): verbose_name = _(u'document page') verbose_name_plural = _(u'document pages') + def get_transformation_list(self): + return DocumentPageTransformation.objects.get_for_document_page_as_list(self) + @models.permalink def get_absolute_url(self): return ('document_page_view', [self.pk]) - def get_transformation_string(self): - transformation_list = [] - warnings = [] - for page_transformation in self.documentpagetransformation_set.all(): - try: - if page_transformation.transformation in TRANFORMATION_CHOICES: - transformation_list.append( - TRANFORMATION_CHOICES[page_transformation.transformation] % eval( - page_transformation.arguments - ) - ) - except Exception, e: - warnings.append(e) - - return u' '.join(transformation_list), warnings - class DocumentPageTransformation(models.Model): """ @@ -286,9 +272,11 @@ class DocumentPageTransformation(models.Model): """ document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page')) order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True) - transformation = models.CharField(choices=available_transformations, max_length=128, verbose_name=_(u'transformation')) + transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation')) arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: {\'degrees\':90}')) + objects = DocumentPageTransformationManager() + def __unicode__(self): return u'"%s" for %s' % (self.get_transformation_display(), unicode(self.document_page)) diff --git a/apps/documents/urls.py b/apps/documents/urls.py index 4dc99f37de..4a8dcd2d46 100644 --- a/apps/documents/urls.py +++ b/apps/documents/urls.py @@ -1,14 +1,12 @@ from django.conf.urls.defaults import patterns, url -from converter.api import QUALITY_HIGH, QUALITY_PRINT +from converter.literals import QUALITY_HIGH, QUALITY_PRINT from documents.conf.settings import PREVIEW_SIZE from documents.conf.settings import PRINT_SIZE from documents.conf.settings import THUMBNAIL_SIZE from documents.conf.settings import DISPLAY_SIZE from documents.conf.settings import MULTIPAGE_PREVIEW_SIZE -#from documents.literals import UPLOAD_SOURCE_LOCAL, \ -# UPLOAD_SOURCE_STAGING, UPLOAD_SOURCE_USER_STAGING urlpatterns = patterns('documents.views', url(r'^list/$', 'document_list', (), 'document_list'), diff --git a/apps/documents/views.py b/apps/documents/views.py index 4a3247c576..ee9be82b3e 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -20,10 +20,11 @@ from common.widgets import two_state_template from common.literals import PAGE_SIZE_DIMENSIONS, \ PAGE_ORIENTATION_PORTRAIT, PAGE_ORIENTATION_LANDSCAPE from common.conf.settings import DEFAULT_PAPER_SIZE -from converter.api import convert_document, QUALITY_DEFAULT +from converter.api import convert_document from converter.exceptions import UnkownConvertError, UnknownFormat -from converter.api import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \ - DEFAULT_FILE_FORMAT, QUALITY_PRINT +from converter.literals import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \ + DEFAULT_FILE_FORMAT, QUALITY_PRINT, QUALITY_DEFAULT, \ + DEFAULT_PAGE_NUMBER from filetransfers.api import serve_file from grouping.utils import get_document_group_subtemplate from metadata.api import save_metadata_list, \ @@ -286,38 +287,14 @@ def document_edit(request, document_id): }, context_instance=RequestContext(request)) -def calculate_converter_arguments(document, *args, **kwargs): - size = kwargs.pop('size', PREVIEW_SIZE) - quality = kwargs.pop('quality', QUALITY_DEFAULT) - page = kwargs.pop('page', 1) - file_format = kwargs.pop('file_format', DEFAULT_FILE_FORMAT) - zoom = kwargs.pop('zoom', DEFAULT_ZOOM_LEVEL) - rotation = kwargs.pop('rotation', DEFAULT_ROTATION) - - document_page = DocumentPage.objects.get(document=document, page_number=page) - transformation_string, warnings = document_page.get_transformation_string() - - arguments = { - 'size': size, - 'file_format': file_format, - 'quality': quality, - 'extra_options': transformation_string, - 'page': page - 1, - 'zoom': zoom, - 'rotation': rotation - } - - return arguments, warnings - - def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_DEFAULT): check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW]) document = get_object_or_404(Document, pk=document_id) - page = int(request.GET.get('page', 1)) + page = int(request.GET.get('page', DEFAULT_PAGE_NUMBER)) - zoom = int(request.GET.get('zoom', 100)) + zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL)) if zoom < ZOOM_MIN_LEVEL: zoom = ZOOM_MIN_LEVEL @@ -325,16 +302,17 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_ if zoom > ZOOM_MAX_LEVEL: zoom = ZOOM_MAX_LEVEL - rotation = int(request.GET.get('rotation', 0)) % 360 + rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360 - arguments, warnings = calculate_converter_arguments(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation) + document_page = get_object_or_404(document.documentpage_set, page_number=page) + transformations, warnings = document_page.get_transformation_list() if warnings and (request.user.is_staff or request.user.is_superuser): for warning in warnings: messages.warning(request, _(u'Page transformation error: %s') % warning) - + try: - output_file = convert_document(document, **arguments) + output_file = convert_document(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation, transformations=transformations) except UnkownConvertError, e: if request.user.is_staff or request.user.is_superuser: messages.error(request, e) @@ -592,13 +570,13 @@ def document_page_view(request, document_page_id): document_page = get_object_or_404(DocumentPage, pk=document_page_id) - zoom = int(request.GET.get('zoom', 100)) - rotation = int(request.GET.get('rotation', 0)) + zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL)) + rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) document_page_form = DocumentPageForm(instance=document_page, zoom=zoom, rotation=rotation) base_title = _(u'details for: %s') % document_page - if zoom != 100: + if zoom != DEFAULT_ZOOM_LEVEL: zoom_text = u'(%d%%)' % zoom else: zoom_text = u'' diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py index 55aa3d5b60..00efe7b276 100644 --- a/apps/ocr/__init__.py +++ b/apps/ocr/__init__.py @@ -9,7 +9,7 @@ from documents.models import Document from main.api import register_tool from ocr.conf.settings import AUTOMATIC_OCR -from ocr.models import DocumentQueue +from ocr.models import DocumentQueue, QueueTransformation #Permissions PERMISSION_OCR_DOCUMENT = {'namespace': 'ocr', 'name': 'ocr_document', 'label': _(u'Submit document for OCR')} @@ -30,20 +30,27 @@ re_queue_multiple_document = {'text': _('re-queue'), 'view': 're_queue_multiple_ queue_document_delete = {'text': _(u'delete'), 'view': 'queue_document_delete', 'args': 'object.id', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]} queue_document_multiple_delete = {'text': _(u'delete'), 'view': 'queue_document_multiple_delete', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]} -document_queue_disable = {'text': _(u'stop queue'), 'view': 'document_queue_disable', 'args': 'object.id', 'famfam': 'control_stop_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]} -document_queue_enable = {'text': _(u'activate queue'), 'view': 'document_queue_enable', 'args': 'object.id', 'famfam': 'control_play_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]} +document_queue_disable = {'text': _(u'stop queue'), 'view': 'document_queue_disable', 'args': 'queue.id', 'famfam': 'control_stop_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]} +document_queue_enable = {'text': _(u'activate queue'), 'view': 'document_queue_enable', 'args': 'queue.id', 'famfam': 'control_play_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]} all_document_ocr_cleanup = {'text': _(u'clean up pages content'), 'view': 'all_document_ocr_cleanup', 'famfam': 'text_strikethrough', 'permissions': [PERMISSION_OCR_CLEAN_ALL_PAGES], 'description': _(u'Runs a language filter to remove common OCR mistakes from document pages content.')} queue_document_list = {'text': _(u'queue document list'), 'view': 'queue_document_list', 'famfam': 'hourglass', 'permissions': [PERMISSION_OCR_DOCUMENT]} node_active_list = {'text': _(u'active tasks'), 'view': 'node_active_list', 'famfam': 'server_chart', 'permissions': [PERMISSION_OCR_DOCUMENT]} +setup_queue_transformation_list = {'text': _(u'transformations'), 'view': 'setup_queue_transformation_list', 'args': 'queue.pk', 'famfam': 'shape_move_front'} +setup_queue_transformation_create = {'text': _(u'add transformation'), 'view': 'setup_queue_transformation_create', 'args': 'queue.pk', 'famfam': 'shape_square_add'} +setup_queue_transformation_edit = {'text': _(u'edit'), 'view': 'setup_queue_transformation_edit', 'args': 'transformation.pk', 'famfam': 'shape_square_edit'} +setup_queue_transformation_delete = {'text': _(u'delete'), 'view': 'setup_queue_transformation_delete', 'args': 'transformation.pk', 'famfam': 'shape_square_delete'} + register_links(Document, [submit_document]) -register_links(DocumentQueue, [document_queue_disable, document_queue_enable]) +register_links(DocumentQueue, [document_queue_disable, document_queue_enable, setup_queue_transformation_list]) +register_links(QueueTransformation, [setup_queue_transformation_edit, setup_queue_transformation_delete]) register_multi_item_links(['queue_document_list'], [re_queue_multiple_document, queue_document_multiple_delete]) -register_links(['queue_document_list', 'node_active_list'], [queue_document_list, node_active_list], menu_name='secondary_menu') +register_links(['setup_queue_transformation_create', 'setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'document_queue_disable', 'document_queue_enable', 'queue_document_list', 'node_active_list', 'setup_queue_transformation_list'], [queue_document_list, node_active_list], menu_name='secondary_menu') +register_links(['setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'setup_queue_transformation_list', 'setup_queue_transformation_create'], [setup_queue_transformation_create], menu_name='sidebar') register_tool(all_document_ocr_cleanup, namespace='ocr', title=_(u'OCR')) diff --git a/apps/ocr/api.py b/apps/ocr/api.py index bc9f775f76..ec89a669c9 100644 --- a/apps/ocr/api.py +++ b/apps/ocr/api.py @@ -9,13 +9,15 @@ import sys from django.utils.translation import ugettext as _ from django.utils.importlib import import_module -from converter.api import convert_document_for_ocr +from converter.api import convert from documents.models import DocumentPage from ocr.conf.settings import TESSERACT_PATH from ocr.conf.settings import TESSERACT_LANGUAGE -from ocr.conf.settings import PDFTOTEXT_PATH -from ocr.exceptions import TesseractError, PdftotextError +from ocr.exceptions import TesseractError +from ocr.conf.settings import UNPAPER_PATH +from ocr.parsers import parse_document_page +from ocr.parsers.exceptions import ParserError, ParserUnknownFile def get_language_backend(): @@ -30,7 +32,7 @@ def get_language_backend(): return None return module -backend = get_language_backend() +language_backend = get_language_backend() def cleanup(filename): @@ -58,63 +60,38 @@ def run_tesseract(input_filename, output_filename_base, lang=None): raise TesseractError(error_text) -def run_pdftotext(input_filename, output_filename, page_number=None): - """ - Execute the command line binary of pdftotext - """ - command = [unicode(PDFTOTEXT_PATH)] - if page_number: - command.extend([u'-nopgbrk', u'-f', unicode(page_number), u'-l', unicode(page_number)]) - command.extend([unicode(input_filename), unicode(output_filename)]) - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - error_text = proc.stderr.read() - raise PdftotextError(error_text) - - def do_document_ocr(document): """ - Do OCR on all the pages of the given document object, first - trying to extract text from PDF using pdftotext then by calling - tesseract + first try to extract text from document pages using the registered + parser if the parser fails or if there is no parser registered for + the document mimetype do a visual OCR by calling tesseract """ - for page_index, document_page in enumerate(document.documentpage_set.all()): - desc, filepath = tempfile.mkstemp() - imagefile = None - source = u'' + for document_page in document.documentpage_set.all(): try: - if document.file_mimetype == u'application/pdf': - pdf_filename = os.extsep.join([filepath, u'pdf']) - document.save_to_file(pdf_filename) - run_pdftotext(pdf_filename, filepath, document_page.page_number) - cleanup(pdf_filename) - if os.stat(filepath).st_size == 0: - #PDF page had no text, run tesseract on the page - imagefile = convert_document_for_ocr(document, page=page_index) - run_tesseract(imagefile, filepath, TESSERACT_LANGUAGE) - ocr_output = os.extsep.join([filepath, u'txt']) - source = _(u'Text from OCR') - else: - ocr_output = filepath - source = _(u'Text extracted from PDF') - else: - imagefile = convert_document_for_ocr(document, page=page_index) - run_tesseract(imagefile, filepath, TESSERACT_LANGUAGE) - ocr_output = os.extsep.join([filepath, u'txt']) - source = _(u'Text from OCR') - f = codecs.open(ocr_output, 'r', 'utf-8') - document_page = document.documentpage_set.get(page_number=page_index + 1) - document_page.content = ocr_cleanup(f.read().strip()) - document_page.page_label = source - document_page.save() - f.close() - cleanup(ocr_output) - finally: - os.close(desc) - cleanup(filepath) - if imagefile: - cleanup(imagefile) + # Try to extract text by means of a parser + parse_document_page(document_page) + except (ParserError, ParserUnknownFile): + # Fall back to doing visual OCR + pass + #desc, filepath = tempfile.mkstemp() + #imagefile = None + #source = u'' + #imagefile = convert_document_for_ocr(document, page=document_page.page_number) + #run_tesseract(imagefile, filepath, TESSERACT_LANGUAGE) + #ocr_output = os.extsep.join([filepath, u'txt']) + #source = _(u'Text from OCR') + #f = codecs.open(ocr_output, 'r', 'utf-8') + #document_page.content = ocr_cleanup(f.read().strip()) + #document_page.page_label = source + #document_page.save() + #f.close() + #cleanup(ocr_output) + #finally: + # pass + #os.close(desc) + #cleanup(filepath) + #if imagefile: + # cleanup(imagefile) def ocr_cleanup(text): @@ -127,8 +104,8 @@ def ocr_cleanup(text): for line in text.splitlines(): line = line.strip() for word in line.split(): - if backend: - result = backend.check_word(word) + if language_backend: + result = language_backend.check_word(word) else: result = word if result: @@ -147,3 +124,53 @@ def clean_pages(): if page.content: page.content = ocr_cleanup(page.content) page.save() + + +def execute_unpaper(input_filepath, output_filepath): + """ + Executes the program unpaper using subprocess's Popen + """ + command = [] + command.append(UNPAPER_PATH) + command.append(u'--overwrite') + command.append(input_filepath) + command.append(output_filepath) + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE) + return_code = proc.wait() + if return_code != 0: + raise UnpaperError(proc.stderr.readline()) + +''' +def convert_document_for_ocr(document, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_OCR_FILE_FORMAT): + #Extract document file + input_filepath = document_save_to_temp_dir(document, document.uuid) + + #Convert for OCR + temp_filename, separator = os.path.splitext(os.path.basename(input_filepath)) + temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename) + transformation_output_file = u'%s_trans%s%s%s' % (temp_path, page, os.extsep, file_format) + unpaper_input_file = u'%s_unpaper_in%s%spnm' % (temp_path, page, os.extsep) + unpaper_output_file = u'%s_unpaper_out%s%spnm' % (temp_path, page, os.extsep) + convert_output_file = u'%s_ocr%s%s%s' % (temp_path, page, os.extsep, file_format) + + try: + document_page = document.documentpage_set.get(page_number=page) + transformations, warnings = document_page.get_transformation_list() + + #Apply default transformations + backend.convert_file(input_filepath=input_filepath, page=page, quality=QUALITY_HIGH, transformations=transformations, output_filepath=transformation_output_file) + #Do OCR operations + backend.convert_file(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file) + # Process by unpaper + execute_unpaper(input_filepath=unpaper_input_file, output_filepath=unpaper_output_file) + # Convert to tif + backend.convert_file(input_filepath=unpaper_output_file, output_filepath=convert_output_file) + finally: + cleanup(transformation_output_file) + cleanup(unpaper_input_file) + cleanup(unpaper_output_file) + + return convert_output_file +''' + + diff --git a/apps/ocr/conf/settings.py b/apps/ocr/conf/settings.py index e9024b7152..52785f46ac 100644 --- a/apps/ocr/conf/settings.py +++ b/apps/ocr/conf/settings.py @@ -13,8 +13,9 @@ register_settings( {'name': u'REPLICATION_DELAY', 'global_name': u'OCR_REPLICATION_DELAY', 'default': 10, 'description': _(u'Amount of seconds to delay OCR of documents to allow for the node\'s storage replication overhead.')}, {'name': u'NODE_CONCURRENT_EXECUTION', 'global_name': u'OCR_NODE_CONCURRENT_EXECUTION', 'default': 1, 'description': _(u'Maximum amount of concurrent document OCRs a node can perform.')}, {'name': u'AUTOMATIC_OCR', 'global_name': u'OCR_AUTOMATIC_OCR', 'default': False, 'description': _(u'Automatically queue newly created documents for OCR.')}, - {'name': u'PDFTOTEXT_PATH', 'global_name': u'OCR_PDFTOTEXT_PATH', 'default': u'/usr/bin/pdftotext', 'exists': True}, {'name': u'QUEUE_PROCESSING_INTERVAL', 'global_name': u'OCR_QUEUE_PROCESSING_INTERVAL', 'default': 10}, - {'name': u'CACHE_URI', 'global_name': u'OCR_CACHE_URI', 'default': None, 'description': _(u'URI in the form: "memcached://127.0.0.1:11211/" to specify a cache backend to use for locking. Multiple hosts can be specified separated by a semicolon.')} + {'name': u'CACHE_URI', 'global_name': u'OCR_CACHE_URI', 'default': None, 'description': _(u'URI in the form: "memcached://127.0.0.1:11211/" to specify a cache backend to use for locking. Multiple hosts can be specified separated by a semicolon.')}, + {'name': u'UNPAPER_PATH', 'global_name': u'OCR_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True}, + {'name': u'PARSERS_PDFTOTEXT_PATH', 'global_name': u'OCR_PARSERS_PDFTOTEXT_PATH', 'default': u'/usr/bin/pdftotext', 'exists': True}, ] ) diff --git a/apps/ocr/exceptions.py b/apps/ocr/exceptions.py index 4bfa8f725a..b1ec8c3fe3 100644 --- a/apps/ocr/exceptions.py +++ b/apps/ocr/exceptions.py @@ -4,7 +4,3 @@ class AlreadyQueued(Exception): class TesseractError(Exception): pass - - -class PdftotextError(Exception): - pass diff --git a/apps/ocr/forms.py b/apps/ocr/forms.py new file mode 100644 index 0000000000..c88e7a8351 --- /dev/null +++ b/apps/ocr/forms.py @@ -0,0 +1,21 @@ +from django import forms +from django.utils.translation import ugettext_lazy as _ +from django.utils.translation import ugettext + +from ocr.models import QueueTransformation + + +class QueueTransformationForm(forms.ModelForm): + class Meta: + model = QueueTransformation + + def __init__(self, *args, **kwargs): + super(QueueTransformationForm, self).__init__(*args, **kwargs) + self.fields['content_type'].widget = forms.HiddenInput() + self.fields['object_id'].widget = forms.HiddenInput() + + +class QueueTransformationForm_create(forms.ModelForm): + class Meta: + model = QueueTransformation + exclude = ('content_type', 'object_id') diff --git a/apps/ocr/manager.py b/apps/ocr/manager.py deleted file mode 100644 index a1fdb80b8c..0000000000 --- a/apps/ocr/manager.py +++ /dev/null @@ -1,18 +0,0 @@ -from django.db import models - -from ocr.exceptions import AlreadyQueued - - -class DocumentQueueManager(models.Manager): - """ - Module manager class to handle adding documents to an OCR document - queue - """ - def queue_document(self, document, queue_name='default'): - document_queue = self.model.objects.get(name=queue_name) - if document_queue.queuedocument_set.filter(document=document): - raise AlreadyQueued - - document_queue.queuedocument_set.create(document=document, delay=True) - - return document_queue diff --git a/apps/ocr/managers.py b/apps/ocr/managers.py new file mode 100644 index 0000000000..a3ed9621ad --- /dev/null +++ b/apps/ocr/managers.py @@ -0,0 +1,41 @@ +from django.db import models +from django.contrib.contenttypes.models import ContentType + +from ocr.exceptions import AlreadyQueued + + +class DocumentQueueManager(models.Manager): + """ + Module manager class to handle adding documents to an OCR document + queue + """ + def queue_document(self, document, queue_name='default'): + document_queue = self.model.objects.get(name=queue_name) + if document_queue.queuedocument_set.filter(document=document): + raise AlreadyQueued + + document_queue.queuedocument_set.create(document=document, delay=True) + + return document_queue + + +class QueueTransformationManager(models.Manager): + def get_for_object(self, obj): + ct = ContentType.objects.get_for_model(obj) + return self.model.objects.filter(content_type=ct).filter(object_id=obj.pk) + + def get_for_object_as_list(self, obj): + warnings = [] + transformations = [] + for transformation in self.get_for_object(obj).values('transformation', 'arguments'): + try: + transformations.append( + { + 'transformation': transformation['transformation'], + 'arguments': eval(transformation['arguments'], {}) + } + ) + except Exception, e: + warnings.append(e) + + return transformations, warnings diff --git a/apps/ocr/models.py b/apps/ocr/models.py index f9567e2b0a..bfcbf74aec 100644 --- a/apps/ocr/models.py +++ b/apps/ocr/models.py @@ -2,13 +2,16 @@ from django.db import models from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext from django.core.exceptions import ObjectDoesNotExist +from django.contrib.contenttypes.models import ContentType +from django.contrib.contenttypes import generic from documents.models import Document +from converter.api import get_available_transformations_choices from ocr.literals import DOCUMENTQUEUE_STATE_STOPPED, \ DOCUMENTQUEUE_STATE_CHOICES, QUEUEDOCUMENT_STATE_PENDING, \ QUEUEDOCUMENT_STATE_CHOICES -from ocr.manager import DocumentQueueManager +from ocr.managers import DocumentQueueManager, QueueTransformationManager class DocumentQueue(models.Model): @@ -51,3 +54,26 @@ class QueueDocument(models.Model): return unicode(self.document) except ObjectDoesNotExist: return ugettext(u'Missing document.') + + +class QueueTransformation(models.Model): + """ + Model that stores the transformation and transformation arguments + for a given document queue + """ + content_type = models.ForeignKey(ContentType) + object_id = models.PositiveIntegerField() + content_object = generic.GenericForeignKey('content_type', 'object_id') + order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True) + transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation')) + arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: %s') % u'{\'degrees\':90}') + + objects = QueueTransformationManager() + + def __unicode__(self): + return self.get_transformation_display() + + class Meta: + ordering = ('order',) + verbose_name = _(u'document queue transformation') + verbose_name_plural = _(u'document queue transformations') diff --git a/apps/ocr/parsers/__init__.py b/apps/ocr/parsers/__init__.py new file mode 100644 index 0000000000..815e868747 --- /dev/null +++ b/apps/ocr/parsers/__init__.py @@ -0,0 +1,40 @@ +import codecs +import os +import subprocess +import tempfile +import sys + +import slate + +from django.utils.translation import ugettext as _ + +from ocr.parsers.exceptions import ParserError, ParserUnknownFile + +mimetype_registry = {} + + +def register_parser(mimetype, function): + mimetype_registry[mimetype] = {'function': function} + + +def pdf_parser(document_page): + fd = document_page.document.open() + pdf_pages = slate.PDF(fd) + fd.close() + + if pdf_pages[document_page.page_number - 1] == '\x0c': + raise ParserError + + document_page.content = pdf_pages[document_page.page_number - 1] + document_page.page_label = _(u'Text extracted from PDF') + document_page.save() + + +def parse_document_page(document_page): + try: + mimetype_registry[document_page.document.file_mimetype]['function'](document_page) + except KeyError: + raise ParserUnknownFile + + +register_parser('application/pdf', pdf_parser) diff --git a/apps/ocr/parsers/exceptions.py b/apps/ocr/parsers/exceptions.py new file mode 100644 index 0000000000..e06875f222 --- /dev/null +++ b/apps/ocr/parsers/exceptions.py @@ -0,0 +1,10 @@ +class ParserError(Exception): + """ + Raised when a text parser fails to understand a file it been passed + or the resulting parsed text is invalid + """ + pass + + +class ParserUnknownFile(Exception): + pass diff --git a/apps/ocr/urls.py b/apps/ocr/urls.py index 6bddd3d7fe..cb090cf065 100644 --- a/apps/ocr/urls.py +++ b/apps/ocr/urls.py @@ -1,16 +1,22 @@ from django.conf.urls.defaults import patterns, url urlpatterns = patterns('ocr.views', - url(r'^(?P\d+)/submit/$', 'submit_document', (), 'submit_document'), - url(r'^ocr/queue/document/list/$', 'queue_document_list', (), 'queue_document_list'), - url(r'^ocr/queue/document/(?P\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'), - url(r'^ocr/queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'), - url(r'^ocr/queue/document/(?P\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'), - url(r'^ocr/queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'), + url(r'^document/(?P\d+)/submit/$', 'submit_document', (), 'submit_document'), + url(r'^queue/document/list/$', 'queue_document_list', (), 'queue_document_list'), + url(r'^queue/document/(?P\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'), + url(r'^queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'), + url(r'^queue/document/(?P\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'), + url(r'^queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'), - url(r'^ocr/queue/(?P\d+)/enable/$', 'document_queue_enable', (), 'document_queue_enable'), - url(r'^ocr/queue/(?P\d+)/disable/$', 'document_queue_disable', (), 'document_queue_disable'), + url(r'^queue/(?P\d+)/enable/$', 'document_queue_enable', (), 'document_queue_enable'), + url(r'^queue/(?P\d+)/disable/$', 'document_queue_disable', (), 'document_queue_disable'), - url(r'^ocr/document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'), - url(r'^ocr/node/active/list/$', 'node_active_list', (), 'node_active_list'), + url(r'^document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'), + url(r'^node/active/list/$', 'node_active_list', (), 'node_active_list'), + + url(r'^queue/(?P\d+)/transformation/list/$', 'setup_queue_transformation_list', (), 'setup_queue_transformation_list'), + url(r'^queue/(?P\w+)/transformation/create/$', 'setup_queue_transformation_create', (), 'setup_queue_transformation_create'), + url(r'^queue/transformation/(?P\w+)/edit/$', 'setup_queue_transformation_edit', (), 'setup_queue_transformation_edit'), + url(r'^queue/transformation/(?P\w+)/delete/$', 'setup_queue_transformation_delete', (), 'setup_queue_transformation_delete'), + ) diff --git a/apps/ocr/views.py b/apps/ocr/views.py index eb4f3a945d..6ee6a96393 100644 --- a/apps/ocr/views.py +++ b/apps/ocr/views.py @@ -6,9 +6,8 @@ from django.shortcuts import render_to_response, get_object_or_404 from django.template import RequestContext from django.contrib import messages from django.views.generic.list_detail import object_list -from django.core.urlresolvers import reverse from django.utils.translation import ugettext_lazy as _ -from django.conf import settings +from django.core.urlresolvers import reverse from celery.task.control import inspect from permissions.api import check_permissions @@ -18,12 +17,13 @@ from documents.widgets import document_link, document_thumbnail from ocr import PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE, \ PERMISSION_OCR_QUEUE_ENABLE_DISABLE, PERMISSION_OCR_CLEAN_ALL_PAGES -from ocr.models import DocumentQueue, QueueDocument +from ocr.models import DocumentQueue, QueueDocument, QueueTransformation from ocr.literals import QUEUEDOCUMENT_STATE_PENDING, \ QUEUEDOCUMENT_STATE_PROCESSING, DOCUMENTQUEUE_STATE_STOPPED, \ DOCUMENTQUEUE_STATE_ACTIVE from ocr.exceptions import AlreadyQueued from ocr.api import clean_pages +from ocr.forms import QueueTransformationForm, QueueTransformationForm_create def queue_document_list(request, queue_name='default'): @@ -38,8 +38,10 @@ def queue_document_list(request, queue_name='default'): extra_context={ 'title': _(u'documents in queue: %s') % document_queue, 'hide_object': True, - 'object': document_queue, + 'queue': document_queue, 'object_name': _(u'document queue'), + 'navigation_object_name': 'queue', + 'list_object_variable_name': 'queue_document', 'extra_columns': [ {'name': 'document', 'attribute': lambda x: document_link(x.document) if hasattr(x, 'document') else _(u'Missing document.')}, {'name': _(u'thumbnail'), 'attribute': lambda x: document_thumbnail(x.document)}, @@ -212,7 +214,8 @@ def document_queue_disable(request, document_queue_id): return HttpResponseRedirect(next) return render_to_response('generic_confirm.html', { - 'object': document_queue, + 'queue': document_queue, + 'navigation_object_name': 'queue', 'title': _(u'Are you sure you wish to disable document queue: %s') % document_queue, 'next': next, 'previous': previous, @@ -238,7 +241,8 @@ def document_queue_enable(request, document_queue_id): return HttpResponseRedirect(next) return render_to_response('generic_confirm.html', { - 'object': document_queue, + 'queue': document_queue, + 'navigation_object_name': 'queue', 'title': _(u'Are you sure you wish to activate document queue: %s') % document_queue, 'next': next, 'previous': previous, @@ -317,3 +321,141 @@ def node_active_list(request): {'name': _(u'related object'), 'attribute': lambda x: display_link(x['related_object']) if x['related_object'] else u''} ], }, context_instance=RequestContext(request)) + + +def setup_queue_transformation_list(request, document_queue_id): + #check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT]) + + document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id) + + context = { + 'object_list': QueueTransformation.objects.get_for_object(document_queue), + 'title': _(u'transformations for: %s') % document_queue, + #'object_name': _(u'document queue'), + #'object': document_queue, + 'queue': document_queue, + 'object_name': _(u'document queue'), + 'navigation_object_name': 'queue', + 'list_object_variable_name': 'transformation', + 'extra_columns': [ + {'name': _(u'order'), 'attribute': 'order'}, + {'name': _(u'transformation'), 'attribute': lambda x: x.get_transformation_display()}, + {'name': _(u'arguments'), 'attribute': 'arguments'} + ], + 'hide_link': True, + 'hide_object': True, + } + + return render_to_response('generic_list.html', context, + context_instance=RequestContext(request)) + + +def setup_queue_transformation_edit(request, transformation_id): + #check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT]) + + transformation = get_object_or_404(QueueTransformation, pk=transformation_id) + redirect_view = reverse('setup_queue_transformation_list', args=[transformation.content_object.pk]) + next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', redirect_view))) + + if request.method == 'POST': + form = QueueTransformationForm(instance=transformation, data=request.POST) + if form.is_valid(): + try: + # Test the validity of the argument field + eval(form.cleaned_data['arguments'], {}) + except: + messages.error(request, _(u'Queue transformation argument error.')) + else: + try: + form.save() + messages.success(request, _(u'Queue transformation edited successfully')) + return HttpResponseRedirect(next) + except Exception, e: + messages.error(request, _(u'Error editing queue transformation; %s') % e) + else: + form = QueueTransformationForm(instance=transformation) + + return render_to_response('generic_form.html', { + 'title': _(u'Edit transformation: %s') % transformation, + 'form': form, + 'queue': transformation.content_object, + 'transformation': transformation, + 'navigation_object_list': [ + {'object': 'queue', 'name': _(u'document queue')}, + {'object': 'transformation', 'name': _(u'transformation')} + ], + 'next': next, + }, + context_instance=RequestContext(request)) + + +def setup_queue_transformation_delete(request, transformation_id): + #check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT]) + + transformation = get_object_or_404(QueueTransformation, pk=transformation_id) + redirect_view = reverse('setup_queue_transformation_list', args=[transformation.content_object.pk]) + previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', redirect_view))) + + if request.method == 'POST': + try: + transformation.delete() + messages.success(request, _(u'Queue transformation deleted successfully.')) + except Exception, e: + messages.error(request, _(u'Error deleting queue transformation; %(error)s') % { + 'error': e} + ) + return HttpResponseRedirect(redirect_view) + + return render_to_response('generic_confirm.html', { + 'delete_view': True, + 'transformation': transformation, + 'queue': transformation.content_object, + 'navigation_object_list': [ + {'object': 'queue', 'name': _(u'document queue')}, + {'object': 'transformation', 'name': _(u'transformation')} + ], + 'title': _(u'Are you sure you wish to delete queue transformation "%(transformation)s"') % { + 'transformation': transformation.get_transformation_display(), + }, + 'previous': previous, + 'form_icon': u'shape_square_delete.png', + }, + context_instance=RequestContext(request)) + + +def setup_queue_transformation_create(request, document_queue_id): + #check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT]) + + document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id) + + redirect_view = reverse('setup_queue_transformation_list', args=[document_queue.pk]) + previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', redirect_view))) + + if request.method == 'POST': + form = QueueTransformationForm_create(request.POST) + if form.is_valid(): + try: + # Test the validity of the argument field + eval(form.cleaned_data['arguments'], {}) + except: + messages.error(request, _(u'Queue transformation argument error.')) + else: + try: + queue_tranformation = form.save(commit=False) + queue_tranformation.content_object = document_queue + queue_tranformation.save() + messages.success(request, _(u'Queue transformation created successfully')) + return HttpResponseRedirect(redirect_view) + except Exception, e: + messages.error(request, _(u'Error creating queue transformation; %s') % e) + else: + form = QueueTransformationForm_create() + + return render_to_response('generic_form.html', { + 'form': form, + 'queue': document_queue, + 'object_name': _(u'document queue'), + 'navigation_object_name': 'queue', + 'title': _(u'Create new transformation for queue: %s') % document_queue, + }, context_instance=RequestContext(request)) + diff --git a/apps/sources/managers.py b/apps/sources/managers.py index aee45cf4c1..e27a6468a9 100644 --- a/apps/sources/managers.py +++ b/apps/sources/managers.py @@ -6,3 +6,19 @@ class SourceTransformationManager(models.Manager): def get_for_object(self, obj): ct = ContentType.objects.get_for_model(obj) return self.model.objects.filter(content_type=ct).filter(object_id=obj.pk) + + def get_for_object_as_list(self, obj): + warnings = [] + transformations = [] + for transformation in self.get_for_object(obj).values('transformation', 'arguments'): + try: + transformations.append( + { + 'transformation': transformation['transformation'], + 'arguments': eval(transformation['arguments'], {}) + } + ) + except Exception, e: + warnings.append(e) + + return transformations, warnings diff --git a/apps/sources/models.py b/apps/sources/models.py index 70eaf2d4e4..795f269132 100644 --- a/apps/sources/models.py +++ b/apps/sources/models.py @@ -4,14 +4,13 @@ from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes import generic from documents.models import DocumentType -from documents.conf.settings import AVAILABLE_TRANSFORMATIONS from documents.managers import RecentDocumentManager from metadata.models import MetadataType +from converter.api import get_available_transformations_choices +from converter.literals import DIMENSION_SEPARATOR from sources.managers import SourceTransformationManager -available_transformations = ([(name, data['label']) for name, data in AVAILABLE_TRANSFORMATIONS.items()]) - SOURCE_UNCOMPRESS_CHOICE_Y = 'y' SOURCE_UNCOMPRESS_CHOICE_N = 'n' SOURCE_UNCOMPRESS_CHOICE_ASK = 'a' @@ -120,7 +119,7 @@ class StagingFolder(InteractiveBaseModel): if self.preview_height: dimensions.append(unicode(self.preview_height)) - return u'x'.join(dimensions) + return DIMENSION_SEPARATOR.join(dimensions) class Meta(InteractiveBaseModel.Meta): verbose_name = _(u'staging folder') @@ -164,8 +163,8 @@ class SourceTransformation(models.Model): object_id = models.PositiveIntegerField() content_object = generic.GenericForeignKey('content_type', 'object_id') order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True) - transformation = models.CharField(choices=available_transformations, max_length=128, verbose_name=_(u'transformation')) - arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: {\'degrees\':90}')) + transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation')) + arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: %s') % u'{\'degrees\':90}') objects = SourceTransformationManager() diff --git a/apps/sources/staging.py b/apps/sources/staging.py index c6668455c9..4a80435a3a 100644 --- a/apps/sources/staging.py +++ b/apps/sources/staging.py @@ -8,11 +8,9 @@ from django.utils.translation import ugettext from django.contrib import messages from django.utils.translation import ugettext_lazy as _ -from converter import TRANFORMATION_CHOICES from converter.api import convert, cache_cleanup DEFAULT_STAGING_DIRECTORY = u'/tmp' -#from documents.conf.settings import DEFAULT_TRANSFORMATIONS HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() #TODO: Do benchmarks @@ -107,16 +105,15 @@ class StagingFile(object): def upload(self): """ Return a StagingFile encapsulated in a File class instance to - allow for easier upload a staging files + allow for easier upload of staging files """ try: return File(file(self.filepath, 'rb'), name=self.filename) except Exception, exc: raise Exception(ugettext(u'Unable to upload staging file: %s') % exc) - def delete(self, preview_size): - # tranformation_string, errors = get_transformation_string(DEFAULT_TRANSFORMATIONS) - cache_cleanup(self.filepath, size=preview_size)# , extra_options=tranformation_string) + def delete(self, preview_size, transformations): + cache_cleanup(self.filepath, size=preview_size, transformations=transformations) try: os.unlink(self.filepath) except OSError, exc: @@ -125,24 +122,7 @@ class StagingFile(object): else: raise OSError(ugettext(u'Unable to delete staging file: %s') % exc) - def preview(self, preview_size): + def preview(self, preview_size, transformations): errors = [] - # tranformation_string, errors = get_transformation_string(DEFAULT_TRANSFORMATIONS) - # output_file = convert(self.filepath, size=STAGING_FILES_PREVIEW_SIZE, extra_options=tranformation_string, cleanup_files=False) - output_file = convert(self.filepath, size=preview_size, cleanup_files=False) + output_file = convert(self.filepath, size=preview_size, cleanup_files=False, transformations=transformations) return output_file, errors - - -def get_transformation_string(transformations): - transformation_list = [] - errors = [] - for transformation in transformations: - try: - if transformation['name'] in TRANFORMATION_CHOICES: - output = TRANFORMATION_CHOICES[transformation['name']] % eval(transformation['arguments']) - transformation_list.append(output) - except Exception, e: - errors.append(e) - - tranformation_string = ' '.join(transformation_list) - return tranformation_string, errors diff --git a/apps/sources/urls.py b/apps/sources/urls.py index 354ec22b0a..5d6a015f92 100644 --- a/apps/sources/urls.py +++ b/apps/sources/urls.py @@ -16,12 +16,12 @@ urlpatterns = patterns('sources.views', url(r'^setup/interactive/staging_folder/list/$', 'setup_source_list', {'source_type': SOURCE_CHOICE_STAGING}, 'setup_staging_folder_list'), url(r'^setup/interactive/(?P\w+)/list/$', 'setup_source_list', (), 'setup_source_list'), - url(r'^setup/interactive/(?P\w+)/(?P\w+)/edit/$', 'setup_source_edit', (), 'setup_source_edit'), - url(r'^setup/interactive/(?P\w+)/(?P\w+)/delete/$', 'setup_source_delete', (), 'setup_source_delete'), + url(r'^setup/interactive/(?P\w+)/(?P\d+)/edit/$', 'setup_source_edit', (), 'setup_source_edit'), + url(r'^setup/interactive/(?P\w+)/(?P\d+)/delete/$', 'setup_source_delete', (), 'setup_source_delete'), url(r'^setup/interactive/(?P\w+)/create/$', 'setup_source_create', (), 'setup_source_create'), - url(r'^setup/interactive/(?P\w+)/(?P\w+)/transformation/list/$', 'setup_source_transformation_list', (), 'setup_source_transformation_list'), - url(r'^setup/interactive/(?P\w+)/(?P\w+)/transformation/create/$', 'setup_source_transformation_create', (), 'setup_source_transformation_create'), - url(r'^setup/interactive/source/transformation/(?P\w+)/edit/$', 'setup_source_transformation_edit', (), 'setup_source_transformation_edit'), - url(r'^setup/interactive/source/transformation/(?P\w+)/delete/$', 'setup_source_transformation_delete', (), 'setup_source_transformation_delete'), + url(r'^setup/interactive/(?P\w+)/(?P\d+)/transformation/list/$', 'setup_source_transformation_list', (), 'setup_source_transformation_list'), + url(r'^setup/interactive/(?P\w+)/(?P\d+)/transformation/create/$', 'setup_source_transformation_create', (), 'setup_source_transformation_create'), + url(r'^setup/interactive/source/transformation/(?P\d+)/edit/$', 'setup_source_transformation_edit', (), 'setup_source_transformation_edit'), + url(r'^setup/interactive/source/transformation/(?P\d+)/delete/$', 'setup_source_transformation_delete', (), 'setup_source_transformation_delete'), ) diff --git a/apps/sources/views.py b/apps/sources/views.py index 6a48dc1e34..8732d66913 100644 --- a/apps/sources/views.py +++ b/apps/sources/views.py @@ -129,9 +129,13 @@ def upload_interactive(request, source_type=None, source_id=None): expand = True else: expand = False - if (not expand) or (expand and not _handle_zip_file(request, request.FILES['file'], document_type)): + + transformations, errors = SourceTransformation.objects.get_for_object_as_list(web_form) + + if (not expand) or (expand and not _handle_zip_file(request, request.FILES['file'], document_type=document_type, transformations=transformations)): instance = form.save() instance.save() + instance.apply_default_transformations(transformations) if document_type: instance.document_type = document_type _handle_save_document(request, instance, form) @@ -174,16 +178,18 @@ def upload_interactive(request, source_type=None, source_id=None): expand = True else: expand = False - if (not expand) or (expand and not _handle_zip_file(request, staging_file.upload(), document_type)): + transformations, errors = SourceTransformation.objects.get_for_object_as_list(staging_folder) + if (not expand) or (expand and not _handle_zip_file(request, staging_file.upload(), document_type=document_type, transformations=transformations)): document = Document(file=staging_file.upload()) if document_type: document.document_type = document_type document.save() + document.apply_default_transformations(transformations) _handle_save_document(request, document, form) messages.success(request, _(u'Staging file: %s, uploaded successfully.') % staging_file.filename) if staging_folder.delete_after_upload: - staging_file.delete(staging_folder.get_preview_size()) + staging_file.delete(preview_size=staging_folder.get_preview_size(), transformations=transformations) messages.success(request, _(u'Staging file: %s, deleted successfully.') % staging_file.filename) except Exception, e: messages.error(request, e) @@ -260,7 +266,7 @@ def _handle_save_document(request, document, form=None): create_history(HISTORY_DOCUMENT_CREATED, document, {'user': request.user}) -def _handle_zip_file(request, uploaded_file, document_type=None): +def _handle_zip_file(request, uploaded_file, document_type=None, transformations=None): filename = getattr(uploaded_file, 'filename', getattr(uploaded_file, 'name', '')) if filename.lower().endswith('zip'): zfobj = zipfile.ZipFile(uploaded_file) @@ -285,7 +291,12 @@ def staging_file_preview(request, source_type, source_id, staging_file_id): staging_folder = get_object_or_404(StagingFolder, pk=source_id) StagingFile = create_staging_file_class(request, staging_folder.folder_path) try: - output_file, errors = StagingFile.get(staging_file_id).preview(staging_folder.get_preview_size()) + transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder) + + output_file, errors = StagingFile.get(staging_file_id).preview( + preview_size=staging_folder.get_preview_size(), + transformations=transformations + ) if errors and (request.user.is_staff or request.user.is_superuser): for error in errors: messages.warning(request, _(u'Staging file transformation error: %(error)s') % { @@ -313,15 +324,19 @@ def staging_file_delete(request, source_type, source_id, staging_file_id): StagingFile = create_staging_file_class(request, staging_folder.folder_path) staging_file = StagingFile.get(staging_file_id) - next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None))) - previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None))) + next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', '/'))) + previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', '/'))) if request.method == 'POST': try: - staging_file.delete(staging_folder.get_preview_size()) + transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder) + staging_file.delete( + preview_size=staging_folder.get_preview_size(), + transformations=transformations + ) messages.success(request, _(u'Staging file delete successfully.')) except Exception, e: - messages.error(request, e) + messages.error(request, _(u'Staging file delete error; %s.') % e) return HttpResponseRedirect(next) results = get_active_tab_links() @@ -509,11 +524,17 @@ def setup_source_transformation_edit(request, transformation_id): form = SourceTransformationForm(instance=source_transformation, data=request.POST) if form.is_valid(): try: - form.save() - messages.success(request, _(u'Source transformation edited successfully')) - return HttpResponseRedirect(next) - except Exception, e: - messages.error(request, _(u'Error editing source transformation; %s') % e) + # Test the validity of the argument field + eval(form.cleaned_data['arguments'], {}) + except: + messages.error(request, _(u'Source transformation argument error.')) + else: + try: + form.save() + messages.success(request, _(u'Source transformation edited successfully')) + return HttpResponseRedirect(next) + except Exception, e: + messages.error(request, _(u'Error editing source transformation; %s') % e) else: form = SourceTransformationForm(instance=source_transformation) @@ -541,9 +562,9 @@ def setup_source_transformation_delete(request, transformation_id): if request.method == 'POST': try: source_transformation.delete() - messages.success(request, _(u'Transformation deleted successfully.')) + messages.success(request, _(u'Source transformation deleted successfully.')) except Exception, e: - messages.error(request, _(u'Error deleting transformation; %(error)s') % { + messages.error(request, _(u'Error deleting source transformation; %(error)s') % { 'error': e} ) return HttpResponseRedirect(redirect_view) @@ -556,7 +577,7 @@ def setup_source_transformation_delete(request, transformation_id): {'object': 'source', 'name': _(u'source')}, {'object': 'transformation', 'name': _(u'transformation')} ], - 'title': _(u'Are you sure you wish to delete transformation "%(transformation)s"') % { + 'title': _(u'Are you sure you wish to delete source transformation "%(transformation)s"') % { 'transformation': source_transformation.get_transformation_display(), }, 'previous': previous, @@ -598,13 +619,19 @@ def setup_source_transformation_create(request, source_type, source_id): form = SourceTransformationForm_create(request.POST) if form.is_valid(): try: - source_tranformation = form.save(commit=False) - source_tranformation.content_object = source - source_tranformation.save() - messages.success(request, _(u'Source transformation created successfully')) - return HttpResponseRedirect(redirect_view) - except Exception, e: - messages.error(request, _(u'Error creating source transformation; %s') % e) + # Test the validity of the argument field + eval(form.cleaned_data['arguments'], {}) + except: + messages.error(request, _(u'Source transformation argument error.')) + else: + try: + source_tranformation = form.save(commit=False) + source_tranformation.content_object = source + source_tranformation.save() + messages.success(request, _(u'Source transformation created successfully')) + return HttpResponseRedirect(redirect_view) + except Exception, e: + messages.error(request, _(u'Error creating source transformation; %s') % e) else: form = SourceTransformationForm_create() diff --git a/requirements/development.txt b/requirements/development.txt index 00def8c63a..3acf630b4e 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -9,3 +9,5 @@ django-celery==2.2.2 django-sentry==1.6.0 django-taggit==0.9.3 -e git://github.com/django-mptt/django-mptt.git@0af02a95877041b2fd6d458bd95413dc1666c321#egg=django-mptt +slate==0.3 +PIL==1.1.7 diff --git a/requirements/production.txt b/requirements/production.txt index 1f1d3a0881..02219abaee 100644 --- a/requirements/production.txt +++ b/requirements/production.txt @@ -6,3 +6,5 @@ django-celery==2.2.2 django-sentry==1.6.0 django-taggit==0.9.3 -e git://github.com/django-mptt/django-mptt.git@0af02a95877041b2fd6d458bd95413dc1666c321#egg=django-mptt +slate==0.3 +PIL==1.1.7