diff --git a/README.md b/README.md index e91aa08fa8..7cdb366d54 100755 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ Features * Document OCR and searching * Group documents by metadata automatically * Permissions and roles support +* Multi page document support +* Page transformations Requirements --- diff --git a/apps/common/templates/generic_confirm.html b/apps/common/templates/generic_confirm.html index c67ecb3fd5..14dd37eead 100755 --- a/apps/common/templates/generic_confirm.html +++ b/apps/common/templates/generic_confirm.html @@ -39,9 +39,11 @@ + {% if previous %} {% trans 'No' %} {% trans "No" %} + {% endif %} diff --git a/apps/converter/__init__.py b/apps/converter/__init__.py index 9562a07710..8111257f27 100755 --- a/apps/converter/__init__.py +++ b/apps/converter/__init__.py @@ -3,3 +3,14 @@ import tempfile from common.conf import settings as common_settings TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp() + +#ugettext = lambda s: s + +#TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees')) +TRANFORMATION_CHOICES = { + 'rotate':'-rotate %(degrees)d' +} + +#getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [ +# TRANFORMATION_ROTATE, +# ]) diff --git a/apps/converter/conf/settings.py b/apps/converter/conf/settings.py index 07a771e822..f7cd3901dd 100755 --- a/apps/converter/conf/settings.py +++ b/apps/converter/conf/settings.py @@ -1,17 +1,8 @@ from django.conf import settings - -ugettext = lambda s: s - - CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert') IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify') OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200') DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'') LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'') HIGH_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_HIGH_QUALITY_OPTIONS', u'-density 400') - -TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees')) -TRANFORMATION_CHOICES = getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [ - TRANFORMATION_ROTATE, - ]) diff --git a/apps/documents/conf/settings.py b/apps/documents/conf/settings.py index 72671d396c..a07fce99b8 100755 --- a/apps/documents/conf/settings.py +++ b/apps/documents/conf/settings.py @@ -5,6 +5,7 @@ import tempfile from django.conf import settings from django.contrib.auth.models import User +from django.utils.translation import ugettext_lazy as _ from converter.api import get_page_count @@ -18,6 +19,11 @@ default_available_models = { 'User':User } +available_transformations = { + 'rotate': {'label':_(u'Rotate [degrees]'), 'arguments':[{'name':'degrees'}]} +} + + # Definition AVAILABLE_FUNCTIONS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_FUNCTIONS', default_available_functions) AVAILABLE_MODELS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_MODELS', default_available_models) @@ -44,6 +50,10 @@ MULTIPAGE_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_MULTIPAGE_PREVIEW_SIZE', ' THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50') DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200') +# Transformations +AVAILABLE_TRANSFORMATIONS = getattr(settings, 'DOCUMENTS_AVAILABLE_TRANSFORMATIONS', available_transformations) +DEFAULT_TRANSFORMATIONS = getattr(settings, 'DOCUMENTS_DEFAULT_TRANSFORMATIONS', []) + #Groups GROUP_MAX_RESULTS = getattr(settings, 'DOCUMENTS_GROUP_MAX_RESULTS', 20) GROUP_SHOW_EMPTY = getattr(settings, 'DOCUMENTS_GROUP_SHOW_EMPTY', True) diff --git a/apps/documents/models.py b/apps/documents/models.py index d4647e2d9a..3e32963735 100755 --- a/apps/documents/models.py +++ b/apps/documents/models.py @@ -14,8 +14,6 @@ from django.db.models import Q from dynamic_search.api import register -from converter.conf.settings import TRANFORMATION_CHOICES - from documents.conf.settings import AVAILABLE_FUNCTIONS from documents.conf.settings import AVAILABLE_MODELS from documents.conf.settings import CHECKSUM_FUNCTION @@ -27,7 +25,7 @@ from documents.conf.settings import FILESYSTEM_FILESERVING_ENABLE from documents.conf.settings import FILESYSTEM_FILESERVING_PATH from documents.conf.settings import FILESYSTEM_SLUGIFY_PATHS from documents.conf.settings import FILESYSTEM_MAX_RENAME_COUNT - +from documents.conf.settings import AVAILABLE_TRANSFORMATIONS if FILESYSTEM_SLUGIFY_PATHS == False: #Do not slugify path or filenames and extensions @@ -447,21 +445,18 @@ class MetadataGroupItem(models.Model): verbose_name = _(u'metadata group item') verbose_name_plural = _(u'metadata group items') + +available_transformations = ([(name, data['label']) for name, data in AVAILABLE_TRANSFORMATIONS.items()]) if AVAILABLE_MODELS else [] + class DocumentPageTransformation(models.Model): document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page')) order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order')) - transformation = models.CharField(choices=TRANFORMATION_CHOICES, max_length=128, verbose_name=_(u'transformation')) + transformation = models.CharField(choices=available_transformations, max_length=128, verbose_name=_(u'transformation')) arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}')) def __unicode__(self): - return self.get_transformation_display() - - def get_transformation(self): - try: - return self.transformation % eval(self.arguments) - except Exception, e: - raise Exception(e) + return '%s - %s' % (self.document_page, self.get_transformation_display()) class Meta: ordering = ('order',) diff --git a/apps/documents/views.py b/apps/documents/views.py index eeb2386b0f..1ccc38ff9d 100755 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -10,16 +10,19 @@ from django.core.files.base import File from django.conf import settings from django.utils.http import urlencode from django.template.defaultfilters import slugify +from django.core.exceptions import ObjectDoesNotExist +from common.utils import pretty_size from permissions.api import check_permissions, Unauthorized from filetransfers.api import serve_file from converter.api import convert, in_image_cache, QUALITY_DEFAULT -from common.utils import pretty_size +from converter import TRANFORMATION_CHOICES from utils import from_descriptor_to_tempfile from models import Document, DocumentMetadata, DocumentType, MetadataType, \ - DocumentPage + DocumentPage, DocumentPageTransformation + from forms import DocumentTypeSelectForm, DocumentCreateWizard, \ MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \ StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \ @@ -35,6 +38,8 @@ from documents.conf.settings import PREVIEW_SIZE from documents.conf.settings import THUMBNAIL_SIZE from documents.conf.settings import GROUP_MAX_RESULTS from documents.conf.settings import GROUP_SHOW_EMPTY +from documents.conf.settings import DEFAULT_TRANSFORMATIONS + from documents import PERMISSION_DOCUMENT_CREATE, \ PERMISSION_DOCUMENT_CREATE, PERMISSION_DOCUMENT_PROPERTIES_EDIT, \ @@ -124,6 +129,20 @@ def upload_document_with_type(request, document_type_id, multiple=True): instance.update_checksum() instance.update_mimetype() instance.update_page_count() + if DEFAULT_TRANSFORMATIONS: + for transformation in DEFAULT_TRANSFORMATIONS: + if 'name' in transformation: + for document_page in instance.documentpage_set.all(): + page_transformation = DocumentPageTransformation( + document_page=document_page, + order=0, + transformation=transformation['name']) + if 'arguments' in transformation: + page_transformation.arguments = transformation['arguments'] + + page_transformation.save() + + if 'document_type_available_filenames' in local_form.cleaned_data: if local_form.cleaned_data['document_type_available_filenames']: @@ -445,17 +464,20 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_ page = int(request.GET.get('page', 1)) transformation_list = [] try: + #Catch invalid or non existing pages document_page = DocumentPage.objects.get(document=document, page_number=page) - for tranformation in document_page.documentpagetransformation_set.all(): + for page_transformation in document_page.documentpagetransformation_set.all(): try: - transformation_list.append(tranformation.get_transformation()) + if page_transformation.transformation in TRANFORMATION_CHOICES: + output = TRANFORMATION_CHOICES[page_transformation.transformation] % eval(page_transformation.arguments) + transformation_list.append(output) except Exception, e: if request.user.is_staff: - messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e))) + messages.warning(request, _(u'Error for transformation %s:, %s' % (page_transformation.get_transformation_display(), e))) else: pass - except: + except ObjectDoesNotExist: pass tranformation_string = ' '.join(transformation_list) diff --git a/apps/ocr/api.py b/apps/ocr/api.py index b638f999d0..6ba305c8b0 100755 --- a/apps/ocr/api.py +++ b/apps/ocr/api.py @@ -37,10 +37,8 @@ def run_tesseract(input_filename, output_filename_base, lang=None): def ocr_document(document): - total_pages = 1 - page = 0 - while page < total_pages: - imagefile = convert_document_for_ocr(document, page=page) + for page_index, document_page in enumerate(document.documentpage_set.all()): + imagefile = convert_document_for_ocr(document, page=page_index) desc, filepath = tempfile.mkstemp() try: status, error_string = run_tesseract(imagefile, filepath) @@ -52,7 +50,7 @@ def ocr_document(document): f = file(ocr_output) try: document_page, created = DocumentPage.objects.get_or_create(document=document, - page_number=page) + page_number=page_index+1) document_page.content = f.read().strip() document_page.page_label = _(u'Text from OCR') document_page.save() @@ -61,6 +59,3 @@ def ocr_document(document): cleanup(filepath) cleanup(ocr_output) cleanup(imagefile) - - page += 1 - diff --git a/settings.py b/settings.py index d0e6dc54bf..0c6d83b713 100755 --- a/settings.py +++ b/settings.py @@ -182,6 +182,7 @@ LOGIN_EXEMPT_URLS = ( # Saving #DOCUMENTS_CHECKSUM_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()) #DOCUMENTS_UUID_FUNCTION = lambda:unicode(uuid.uuid4()) +#DOCUMENTS_DEFAULT_TRANSFORMATIONS = [] # Storage #DOCUMENTS_STORAGE_DIRECTORY_NAME = 'documents' @@ -192,6 +193,8 @@ LOGIN_EXEMPT_URLS = ( #DOCUMENTS_THUMBNAIL_SIZE = '50x50' #DOCUMENTS_DISPLAY_SIZE = '1200' #DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120' +#DOCUMENTS_AVAILABLE_TRANSFORMATIONS = {} +#example: DOCUMENTS_DEFAULT_TRANSFORMATIONS = [{'name':'rotate', 'arguments':"{'degrees':270}"}] # Groups #DOCUMENTS_GROUP_MAX_RESULTS = 20