Decoupled page transformation interface, added default transformation support
This commit is contained in:
@@ -21,6 +21,8 @@ Features
|
|||||||
* Document OCR and searching
|
* Document OCR and searching
|
||||||
* Group documents by metadata automatically
|
* Group documents by metadata automatically
|
||||||
* Permissions and roles support
|
* Permissions and roles support
|
||||||
|
* Multi page document support
|
||||||
|
* Page transformations
|
||||||
|
|
||||||
Requirements
|
Requirements
|
||||||
---
|
---
|
||||||
|
|||||||
@@ -39,9 +39,11 @@
|
|||||||
<button class="button" type="submit">
|
<button class="button" type="submit">
|
||||||
<img src="{{ MEDIA_URL }}web_theme_media/images/icons/tick.png" alt="{% trans 'Yes' %}" /> {% trans "Yes" %}
|
<img src="{{ MEDIA_URL }}web_theme_media/images/icons/tick.png" alt="{% trans 'Yes' %}" /> {% trans "Yes" %}
|
||||||
</button>
|
</button>
|
||||||
|
{% if previous %}
|
||||||
<a href="#header" onclick='{% if previous %}window.location.replace("{{ previous }}");{% else %}history.go(-1);{% endif %}' class="button">
|
<a href="#header" onclick='{% if previous %}window.location.replace("{{ previous }}");{% else %}history.go(-1);{% endif %}' class="button">
|
||||||
<img src="{{ MEDIA_URL }}web_theme_media/images/icons/cross.png" alt="{% trans 'No' %}"/> {% trans "No" %}
|
<img src="{{ MEDIA_URL }}web_theme_media/images/icons/cross.png" alt="{% trans 'No' %}"/> {% trans "No" %}
|
||||||
</a>
|
</a>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -3,3 +3,14 @@ import tempfile
|
|||||||
from common.conf import settings as common_settings
|
from common.conf import settings as common_settings
|
||||||
|
|
||||||
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
|
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
|
||||||
|
|
||||||
|
#ugettext = lambda s: s
|
||||||
|
|
||||||
|
#TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees'))
|
||||||
|
TRANFORMATION_CHOICES = {
|
||||||
|
'rotate':'-rotate %(degrees)d'
|
||||||
|
}
|
||||||
|
|
||||||
|
#getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [
|
||||||
|
# TRANFORMATION_ROTATE,
|
||||||
|
# ])
|
||||||
|
|||||||
@@ -1,17 +1,8 @@
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
ugettext = lambda s: s
|
|
||||||
|
|
||||||
|
|
||||||
CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert')
|
CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert')
|
||||||
IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify')
|
IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify')
|
||||||
OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200')
|
OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200')
|
||||||
DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'')
|
DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'')
|
||||||
LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'')
|
LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'')
|
||||||
HIGH_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_HIGH_QUALITY_OPTIONS', u'-density 400')
|
HIGH_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_HIGH_QUALITY_OPTIONS', u'-density 400')
|
||||||
|
|
||||||
TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees'))
|
|
||||||
TRANFORMATION_CHOICES = getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [
|
|
||||||
TRANFORMATION_ROTATE,
|
|
||||||
])
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import tempfile
|
|||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
from converter.api import get_page_count
|
from converter.api import get_page_count
|
||||||
|
|
||||||
@@ -18,6 +19,11 @@ default_available_models = {
|
|||||||
'User':User
|
'User':User
|
||||||
}
|
}
|
||||||
|
|
||||||
|
available_transformations = {
|
||||||
|
'rotate': {'label':_(u'Rotate [degrees]'), 'arguments':[{'name':'degrees'}]}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# Definition
|
# Definition
|
||||||
AVAILABLE_FUNCTIONS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_FUNCTIONS', default_available_functions)
|
AVAILABLE_FUNCTIONS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_FUNCTIONS', default_available_functions)
|
||||||
AVAILABLE_MODELS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_MODELS', default_available_models)
|
AVAILABLE_MODELS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_MODELS', default_available_models)
|
||||||
@@ -44,6 +50,10 @@ MULTIPAGE_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_MULTIPAGE_PREVIEW_SIZE', '
|
|||||||
THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50')
|
THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50')
|
||||||
DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200')
|
DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200')
|
||||||
|
|
||||||
|
# Transformations
|
||||||
|
AVAILABLE_TRANSFORMATIONS = getattr(settings, 'DOCUMENTS_AVAILABLE_TRANSFORMATIONS', available_transformations)
|
||||||
|
DEFAULT_TRANSFORMATIONS = getattr(settings, 'DOCUMENTS_DEFAULT_TRANSFORMATIONS', [])
|
||||||
|
|
||||||
#Groups
|
#Groups
|
||||||
GROUP_MAX_RESULTS = getattr(settings, 'DOCUMENTS_GROUP_MAX_RESULTS', 20)
|
GROUP_MAX_RESULTS = getattr(settings, 'DOCUMENTS_GROUP_MAX_RESULTS', 20)
|
||||||
GROUP_SHOW_EMPTY = getattr(settings, 'DOCUMENTS_GROUP_SHOW_EMPTY', True)
|
GROUP_SHOW_EMPTY = getattr(settings, 'DOCUMENTS_GROUP_SHOW_EMPTY', True)
|
||||||
|
|||||||
@@ -14,8 +14,6 @@ from django.db.models import Q
|
|||||||
|
|
||||||
from dynamic_search.api import register
|
from dynamic_search.api import register
|
||||||
|
|
||||||
from converter.conf.settings import TRANFORMATION_CHOICES
|
|
||||||
|
|
||||||
from documents.conf.settings import AVAILABLE_FUNCTIONS
|
from documents.conf.settings import AVAILABLE_FUNCTIONS
|
||||||
from documents.conf.settings import AVAILABLE_MODELS
|
from documents.conf.settings import AVAILABLE_MODELS
|
||||||
from documents.conf.settings import CHECKSUM_FUNCTION
|
from documents.conf.settings import CHECKSUM_FUNCTION
|
||||||
@@ -27,7 +25,7 @@ from documents.conf.settings import FILESYSTEM_FILESERVING_ENABLE
|
|||||||
from documents.conf.settings import FILESYSTEM_FILESERVING_PATH
|
from documents.conf.settings import FILESYSTEM_FILESERVING_PATH
|
||||||
from documents.conf.settings import FILESYSTEM_SLUGIFY_PATHS
|
from documents.conf.settings import FILESYSTEM_SLUGIFY_PATHS
|
||||||
from documents.conf.settings import FILESYSTEM_MAX_RENAME_COUNT
|
from documents.conf.settings import FILESYSTEM_MAX_RENAME_COUNT
|
||||||
|
from documents.conf.settings import AVAILABLE_TRANSFORMATIONS
|
||||||
|
|
||||||
if FILESYSTEM_SLUGIFY_PATHS == False:
|
if FILESYSTEM_SLUGIFY_PATHS == False:
|
||||||
#Do not slugify path or filenames and extensions
|
#Do not slugify path or filenames and extensions
|
||||||
@@ -448,20 +446,17 @@ class MetadataGroupItem(models.Model):
|
|||||||
verbose_name_plural = _(u'metadata group items')
|
verbose_name_plural = _(u'metadata group items')
|
||||||
|
|
||||||
|
|
||||||
|
available_transformations = ([(name, data['label']) for name, data in AVAILABLE_TRANSFORMATIONS.items()]) if AVAILABLE_MODELS else []
|
||||||
|
|
||||||
|
|
||||||
class DocumentPageTransformation(models.Model):
|
class DocumentPageTransformation(models.Model):
|
||||||
document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
|
document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
|
||||||
order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order'))
|
order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order'))
|
||||||
transformation = models.CharField(choices=TRANFORMATION_CHOICES, max_length=128, verbose_name=_(u'transformation'))
|
transformation = models.CharField(choices=available_transformations, max_length=128, verbose_name=_(u'transformation'))
|
||||||
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}'))
|
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}'))
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return self.get_transformation_display()
|
return '%s - %s' % (self.document_page, self.get_transformation_display())
|
||||||
|
|
||||||
def get_transformation(self):
|
|
||||||
try:
|
|
||||||
return self.transformation % eval(self.arguments)
|
|
||||||
except Exception, e:
|
|
||||||
raise Exception(e)
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
ordering = ('order',)
|
ordering = ('order',)
|
||||||
|
|||||||
@@ -10,16 +10,19 @@ from django.core.files.base import File
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.utils.http import urlencode
|
from django.utils.http import urlencode
|
||||||
from django.template.defaultfilters import slugify
|
from django.template.defaultfilters import slugify
|
||||||
|
from django.core.exceptions import ObjectDoesNotExist
|
||||||
|
|
||||||
|
from common.utils import pretty_size
|
||||||
from permissions.api import check_permissions, Unauthorized
|
from permissions.api import check_permissions, Unauthorized
|
||||||
from filetransfers.api import serve_file
|
from filetransfers.api import serve_file
|
||||||
from converter.api import convert, in_image_cache, QUALITY_DEFAULT
|
from converter.api import convert, in_image_cache, QUALITY_DEFAULT
|
||||||
from common.utils import pretty_size
|
from converter import TRANFORMATION_CHOICES
|
||||||
|
|
||||||
from utils import from_descriptor_to_tempfile
|
from utils import from_descriptor_to_tempfile
|
||||||
|
|
||||||
from models import Document, DocumentMetadata, DocumentType, MetadataType, \
|
from models import Document, DocumentMetadata, DocumentType, MetadataType, \
|
||||||
DocumentPage
|
DocumentPage, DocumentPageTransformation
|
||||||
|
|
||||||
from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
|
from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
|
||||||
MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
|
MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
|
||||||
StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
|
StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
|
||||||
@@ -35,6 +38,8 @@ from documents.conf.settings import PREVIEW_SIZE
|
|||||||
from documents.conf.settings import THUMBNAIL_SIZE
|
from documents.conf.settings import THUMBNAIL_SIZE
|
||||||
from documents.conf.settings import GROUP_MAX_RESULTS
|
from documents.conf.settings import GROUP_MAX_RESULTS
|
||||||
from documents.conf.settings import GROUP_SHOW_EMPTY
|
from documents.conf.settings import GROUP_SHOW_EMPTY
|
||||||
|
from documents.conf.settings import DEFAULT_TRANSFORMATIONS
|
||||||
|
|
||||||
|
|
||||||
from documents import PERMISSION_DOCUMENT_CREATE, \
|
from documents import PERMISSION_DOCUMENT_CREATE, \
|
||||||
PERMISSION_DOCUMENT_CREATE, PERMISSION_DOCUMENT_PROPERTIES_EDIT, \
|
PERMISSION_DOCUMENT_CREATE, PERMISSION_DOCUMENT_PROPERTIES_EDIT, \
|
||||||
@@ -124,6 +129,20 @@ def upload_document_with_type(request, document_type_id, multiple=True):
|
|||||||
instance.update_checksum()
|
instance.update_checksum()
|
||||||
instance.update_mimetype()
|
instance.update_mimetype()
|
||||||
instance.update_page_count()
|
instance.update_page_count()
|
||||||
|
if DEFAULT_TRANSFORMATIONS:
|
||||||
|
for transformation in DEFAULT_TRANSFORMATIONS:
|
||||||
|
if 'name' in transformation:
|
||||||
|
for document_page in instance.documentpage_set.all():
|
||||||
|
page_transformation = DocumentPageTransformation(
|
||||||
|
document_page=document_page,
|
||||||
|
order=0,
|
||||||
|
transformation=transformation['name'])
|
||||||
|
if 'arguments' in transformation:
|
||||||
|
page_transformation.arguments = transformation['arguments']
|
||||||
|
|
||||||
|
page_transformation.save()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if 'document_type_available_filenames' in local_form.cleaned_data:
|
if 'document_type_available_filenames' in local_form.cleaned_data:
|
||||||
if local_form.cleaned_data['document_type_available_filenames']:
|
if local_form.cleaned_data['document_type_available_filenames']:
|
||||||
@@ -445,17 +464,20 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
|
|||||||
page = int(request.GET.get('page', 1))
|
page = int(request.GET.get('page', 1))
|
||||||
transformation_list = []
|
transformation_list = []
|
||||||
try:
|
try:
|
||||||
|
#Catch invalid or non existing pages
|
||||||
document_page = DocumentPage.objects.get(document=document, page_number=page)
|
document_page = DocumentPage.objects.get(document=document, page_number=page)
|
||||||
|
|
||||||
for tranformation in document_page.documentpagetransformation_set.all():
|
for page_transformation in document_page.documentpagetransformation_set.all():
|
||||||
try:
|
try:
|
||||||
transformation_list.append(tranformation.get_transformation())
|
if page_transformation.transformation in TRANFORMATION_CHOICES:
|
||||||
|
output = TRANFORMATION_CHOICES[page_transformation.transformation] % eval(page_transformation.arguments)
|
||||||
|
transformation_list.append(output)
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
if request.user.is_staff:
|
if request.user.is_staff:
|
||||||
messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e)))
|
messages.warning(request, _(u'Error for transformation %s:, %s' % (page_transformation.get_transformation_display(), e)))
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
except:
|
except ObjectDoesNotExist:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
tranformation_string = ' '.join(transformation_list)
|
tranformation_string = ' '.join(transformation_list)
|
||||||
|
|||||||
@@ -37,10 +37,8 @@ def run_tesseract(input_filename, output_filename_base, lang=None):
|
|||||||
|
|
||||||
|
|
||||||
def ocr_document(document):
|
def ocr_document(document):
|
||||||
total_pages = 1
|
for page_index, document_page in enumerate(document.documentpage_set.all()):
|
||||||
page = 0
|
imagefile = convert_document_for_ocr(document, page=page_index)
|
||||||
while page < total_pages:
|
|
||||||
imagefile = convert_document_for_ocr(document, page=page)
|
|
||||||
desc, filepath = tempfile.mkstemp()
|
desc, filepath = tempfile.mkstemp()
|
||||||
try:
|
try:
|
||||||
status, error_string = run_tesseract(imagefile, filepath)
|
status, error_string = run_tesseract(imagefile, filepath)
|
||||||
@@ -52,7 +50,7 @@ def ocr_document(document):
|
|||||||
f = file(ocr_output)
|
f = file(ocr_output)
|
||||||
try:
|
try:
|
||||||
document_page, created = DocumentPage.objects.get_or_create(document=document,
|
document_page, created = DocumentPage.objects.get_or_create(document=document,
|
||||||
page_number=page)
|
page_number=page_index+1)
|
||||||
document_page.content = f.read().strip()
|
document_page.content = f.read().strip()
|
||||||
document_page.page_label = _(u'Text from OCR')
|
document_page.page_label = _(u'Text from OCR')
|
||||||
document_page.save()
|
document_page.save()
|
||||||
@@ -61,6 +59,3 @@ def ocr_document(document):
|
|||||||
cleanup(filepath)
|
cleanup(filepath)
|
||||||
cleanup(ocr_output)
|
cleanup(ocr_output)
|
||||||
cleanup(imagefile)
|
cleanup(imagefile)
|
||||||
|
|
||||||
page += 1
|
|
||||||
|
|
||||||
|
|||||||
@@ -182,6 +182,7 @@ LOGIN_EXEMPT_URLS = (
|
|||||||
# Saving
|
# Saving
|
||||||
#DOCUMENTS_CHECKSUM_FUNCTION = lambda x: hashlib.sha256(x).hexdigest())
|
#DOCUMENTS_CHECKSUM_FUNCTION = lambda x: hashlib.sha256(x).hexdigest())
|
||||||
#DOCUMENTS_UUID_FUNCTION = lambda:unicode(uuid.uuid4())
|
#DOCUMENTS_UUID_FUNCTION = lambda:unicode(uuid.uuid4())
|
||||||
|
#DOCUMENTS_DEFAULT_TRANSFORMATIONS = []
|
||||||
|
|
||||||
# Storage
|
# Storage
|
||||||
#DOCUMENTS_STORAGE_DIRECTORY_NAME = 'documents'
|
#DOCUMENTS_STORAGE_DIRECTORY_NAME = 'documents'
|
||||||
@@ -192,6 +193,8 @@ LOGIN_EXEMPT_URLS = (
|
|||||||
#DOCUMENTS_THUMBNAIL_SIZE = '50x50'
|
#DOCUMENTS_THUMBNAIL_SIZE = '50x50'
|
||||||
#DOCUMENTS_DISPLAY_SIZE = '1200'
|
#DOCUMENTS_DISPLAY_SIZE = '1200'
|
||||||
#DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120'
|
#DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120'
|
||||||
|
#DOCUMENTS_AVAILABLE_TRANSFORMATIONS = {}
|
||||||
|
#example: DOCUMENTS_DEFAULT_TRANSFORMATIONS = [{'name':'rotate', 'arguments':"{'degrees':270}"}]
|
||||||
|
|
||||||
# Groups
|
# Groups
|
||||||
#DOCUMENTS_GROUP_MAX_RESULTS = 20
|
#DOCUMENTS_GROUP_MAX_RESULTS = 20
|
||||||
|
|||||||
Reference in New Issue
Block a user