Decoupled page transformation interface, added default transformation support
This commit is contained in:
@@ -21,6 +21,8 @@ Features
|
||||
* Document OCR and searching
|
||||
* Group documents by metadata automatically
|
||||
* Permissions and roles support
|
||||
* Multi page document support
|
||||
* Page transformations
|
||||
|
||||
Requirements
|
||||
---
|
||||
|
||||
@@ -39,9 +39,11 @@
|
||||
<button class="button" type="submit">
|
||||
<img src="{{ MEDIA_URL }}web_theme_media/images/icons/tick.png" alt="{% trans 'Yes' %}" /> {% trans "Yes" %}
|
||||
</button>
|
||||
{% if previous %}
|
||||
<a href="#header" onclick='{% if previous %}window.location.replace("{{ previous }}");{% else %}history.go(-1);{% endif %}' class="button">
|
||||
<img src="{{ MEDIA_URL }}web_theme_media/images/icons/cross.png" alt="{% trans 'No' %}"/> {% trans "No" %}
|
||||
</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
@@ -3,3 +3,14 @@ import tempfile
|
||||
from common.conf import settings as common_settings
|
||||
|
||||
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
|
||||
|
||||
#ugettext = lambda s: s
|
||||
|
||||
#TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees'))
|
||||
TRANFORMATION_CHOICES = {
|
||||
'rotate':'-rotate %(degrees)d'
|
||||
}
|
||||
|
||||
#getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [
|
||||
# TRANFORMATION_ROTATE,
|
||||
# ])
|
||||
|
||||
@@ -1,17 +1,8 @@
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
ugettext = lambda s: s
|
||||
|
||||
|
||||
CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert')
|
||||
IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify')
|
||||
OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200')
|
||||
DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'')
|
||||
LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'')
|
||||
HIGH_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_HIGH_QUALITY_OPTIONS', u'-density 400')
|
||||
|
||||
TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees'))
|
||||
TRANFORMATION_CHOICES = getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [
|
||||
TRANFORMATION_ROTATE,
|
||||
])
|
||||
|
||||
@@ -5,6 +5,7 @@ import tempfile
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from converter.api import get_page_count
|
||||
|
||||
@@ -18,6 +19,11 @@ default_available_models = {
|
||||
'User':User
|
||||
}
|
||||
|
||||
available_transformations = {
|
||||
'rotate': {'label':_(u'Rotate [degrees]'), 'arguments':[{'name':'degrees'}]}
|
||||
}
|
||||
|
||||
|
||||
# Definition
|
||||
AVAILABLE_FUNCTIONS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_FUNCTIONS', default_available_functions)
|
||||
AVAILABLE_MODELS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_MODELS', default_available_models)
|
||||
@@ -44,6 +50,10 @@ MULTIPAGE_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_MULTIPAGE_PREVIEW_SIZE', '
|
||||
THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50')
|
||||
DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200')
|
||||
|
||||
# Transformations
|
||||
AVAILABLE_TRANSFORMATIONS = getattr(settings, 'DOCUMENTS_AVAILABLE_TRANSFORMATIONS', available_transformations)
|
||||
DEFAULT_TRANSFORMATIONS = getattr(settings, 'DOCUMENTS_DEFAULT_TRANSFORMATIONS', [])
|
||||
|
||||
#Groups
|
||||
GROUP_MAX_RESULTS = getattr(settings, 'DOCUMENTS_GROUP_MAX_RESULTS', 20)
|
||||
GROUP_SHOW_EMPTY = getattr(settings, 'DOCUMENTS_GROUP_SHOW_EMPTY', True)
|
||||
|
||||
@@ -14,8 +14,6 @@ from django.db.models import Q
|
||||
|
||||
from dynamic_search.api import register
|
||||
|
||||
from converter.conf.settings import TRANFORMATION_CHOICES
|
||||
|
||||
from documents.conf.settings import AVAILABLE_FUNCTIONS
|
||||
from documents.conf.settings import AVAILABLE_MODELS
|
||||
from documents.conf.settings import CHECKSUM_FUNCTION
|
||||
@@ -27,7 +25,7 @@ from documents.conf.settings import FILESYSTEM_FILESERVING_ENABLE
|
||||
from documents.conf.settings import FILESYSTEM_FILESERVING_PATH
|
||||
from documents.conf.settings import FILESYSTEM_SLUGIFY_PATHS
|
||||
from documents.conf.settings import FILESYSTEM_MAX_RENAME_COUNT
|
||||
|
||||
from documents.conf.settings import AVAILABLE_TRANSFORMATIONS
|
||||
|
||||
if FILESYSTEM_SLUGIFY_PATHS == False:
|
||||
#Do not slugify path or filenames and extensions
|
||||
@@ -447,21 +445,18 @@ class MetadataGroupItem(models.Model):
|
||||
verbose_name = _(u'metadata group item')
|
||||
verbose_name_plural = _(u'metadata group items')
|
||||
|
||||
|
||||
available_transformations = ([(name, data['label']) for name, data in AVAILABLE_TRANSFORMATIONS.items()]) if AVAILABLE_MODELS else []
|
||||
|
||||
|
||||
class DocumentPageTransformation(models.Model):
|
||||
document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
|
||||
order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order'))
|
||||
transformation = models.CharField(choices=TRANFORMATION_CHOICES, max_length=128, verbose_name=_(u'transformation'))
|
||||
transformation = models.CharField(choices=available_transformations, max_length=128, verbose_name=_(u'transformation'))
|
||||
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}'))
|
||||
|
||||
def __unicode__(self):
|
||||
return self.get_transformation_display()
|
||||
|
||||
def get_transformation(self):
|
||||
try:
|
||||
return self.transformation % eval(self.arguments)
|
||||
except Exception, e:
|
||||
raise Exception(e)
|
||||
return '%s - %s' % (self.document_page, self.get_transformation_display())
|
||||
|
||||
class Meta:
|
||||
ordering = ('order',)
|
||||
|
||||
@@ -10,16 +10,19 @@ from django.core.files.base import File
|
||||
from django.conf import settings
|
||||
from django.utils.http import urlencode
|
||||
from django.template.defaultfilters import slugify
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
|
||||
from common.utils import pretty_size
|
||||
from permissions.api import check_permissions, Unauthorized
|
||||
from filetransfers.api import serve_file
|
||||
from converter.api import convert, in_image_cache, QUALITY_DEFAULT
|
||||
from common.utils import pretty_size
|
||||
from converter import TRANFORMATION_CHOICES
|
||||
|
||||
from utils import from_descriptor_to_tempfile
|
||||
|
||||
from models import Document, DocumentMetadata, DocumentType, MetadataType, \
|
||||
DocumentPage
|
||||
DocumentPage, DocumentPageTransformation
|
||||
|
||||
from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
|
||||
MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
|
||||
StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
|
||||
@@ -35,6 +38,8 @@ from documents.conf.settings import PREVIEW_SIZE
|
||||
from documents.conf.settings import THUMBNAIL_SIZE
|
||||
from documents.conf.settings import GROUP_MAX_RESULTS
|
||||
from documents.conf.settings import GROUP_SHOW_EMPTY
|
||||
from documents.conf.settings import DEFAULT_TRANSFORMATIONS
|
||||
|
||||
|
||||
from documents import PERMISSION_DOCUMENT_CREATE, \
|
||||
PERMISSION_DOCUMENT_CREATE, PERMISSION_DOCUMENT_PROPERTIES_EDIT, \
|
||||
@@ -124,6 +129,20 @@ def upload_document_with_type(request, document_type_id, multiple=True):
|
||||
instance.update_checksum()
|
||||
instance.update_mimetype()
|
||||
instance.update_page_count()
|
||||
if DEFAULT_TRANSFORMATIONS:
|
||||
for transformation in DEFAULT_TRANSFORMATIONS:
|
||||
if 'name' in transformation:
|
||||
for document_page in instance.documentpage_set.all():
|
||||
page_transformation = DocumentPageTransformation(
|
||||
document_page=document_page,
|
||||
order=0,
|
||||
transformation=transformation['name'])
|
||||
if 'arguments' in transformation:
|
||||
page_transformation.arguments = transformation['arguments']
|
||||
|
||||
page_transformation.save()
|
||||
|
||||
|
||||
|
||||
if 'document_type_available_filenames' in local_form.cleaned_data:
|
||||
if local_form.cleaned_data['document_type_available_filenames']:
|
||||
@@ -445,17 +464,20 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
|
||||
page = int(request.GET.get('page', 1))
|
||||
transformation_list = []
|
||||
try:
|
||||
#Catch invalid or non existing pages
|
||||
document_page = DocumentPage.objects.get(document=document, page_number=page)
|
||||
|
||||
for tranformation in document_page.documentpagetransformation_set.all():
|
||||
for page_transformation in document_page.documentpagetransformation_set.all():
|
||||
try:
|
||||
transformation_list.append(tranformation.get_transformation())
|
||||
if page_transformation.transformation in TRANFORMATION_CHOICES:
|
||||
output = TRANFORMATION_CHOICES[page_transformation.transformation] % eval(page_transformation.arguments)
|
||||
transformation_list.append(output)
|
||||
except Exception, e:
|
||||
if request.user.is_staff:
|
||||
messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e)))
|
||||
messages.warning(request, _(u'Error for transformation %s:, %s' % (page_transformation.get_transformation_display(), e)))
|
||||
else:
|
||||
pass
|
||||
except:
|
||||
except ObjectDoesNotExist:
|
||||
pass
|
||||
|
||||
tranformation_string = ' '.join(transformation_list)
|
||||
|
||||
@@ -37,10 +37,8 @@ def run_tesseract(input_filename, output_filename_base, lang=None):
|
||||
|
||||
|
||||
def ocr_document(document):
|
||||
total_pages = 1
|
||||
page = 0
|
||||
while page < total_pages:
|
||||
imagefile = convert_document_for_ocr(document, page=page)
|
||||
for page_index, document_page in enumerate(document.documentpage_set.all()):
|
||||
imagefile = convert_document_for_ocr(document, page=page_index)
|
||||
desc, filepath = tempfile.mkstemp()
|
||||
try:
|
||||
status, error_string = run_tesseract(imagefile, filepath)
|
||||
@@ -52,7 +50,7 @@ def ocr_document(document):
|
||||
f = file(ocr_output)
|
||||
try:
|
||||
document_page, created = DocumentPage.objects.get_or_create(document=document,
|
||||
page_number=page)
|
||||
page_number=page_index+1)
|
||||
document_page.content = f.read().strip()
|
||||
document_page.page_label = _(u'Text from OCR')
|
||||
document_page.save()
|
||||
@@ -61,6 +59,3 @@ def ocr_document(document):
|
||||
cleanup(filepath)
|
||||
cleanup(ocr_output)
|
||||
cleanup(imagefile)
|
||||
|
||||
page += 1
|
||||
|
||||
|
||||
@@ -182,6 +182,7 @@ LOGIN_EXEMPT_URLS = (
|
||||
# Saving
|
||||
#DOCUMENTS_CHECKSUM_FUNCTION = lambda x: hashlib.sha256(x).hexdigest())
|
||||
#DOCUMENTS_UUID_FUNCTION = lambda:unicode(uuid.uuid4())
|
||||
#DOCUMENTS_DEFAULT_TRANSFORMATIONS = []
|
||||
|
||||
# Storage
|
||||
#DOCUMENTS_STORAGE_DIRECTORY_NAME = 'documents'
|
||||
@@ -192,6 +193,8 @@ LOGIN_EXEMPT_URLS = (
|
||||
#DOCUMENTS_THUMBNAIL_SIZE = '50x50'
|
||||
#DOCUMENTS_DISPLAY_SIZE = '1200'
|
||||
#DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120'
|
||||
#DOCUMENTS_AVAILABLE_TRANSFORMATIONS = {}
|
||||
#example: DOCUMENTS_DEFAULT_TRANSFORMATIONS = [{'name':'rotate', 'arguments':"{'degrees':270}"}]
|
||||
|
||||
# Groups
|
||||
#DOCUMENTS_GROUP_MAX_RESULTS = 20
|
||||
|
||||
Reference in New Issue
Block a user