Decoupled page transformation interface, added default transformation support

This commit is contained in:
Roberto Rosario
2011-02-14 02:11:39 -04:00
parent 06d7e5a46a
commit fbc8bc960a
9 changed files with 65 additions and 34 deletions

View File

@@ -21,6 +21,8 @@ Features
* Document OCR and searching
* Group documents by metadata automatically
* Permissions and roles support
* Multi page document support
* Page transformations
Requirements
---

View File

@@ -39,9 +39,11 @@
<button class="button" type="submit">
<img src="{{ MEDIA_URL }}web_theme_media/images/icons/tick.png" alt="{% trans 'Yes' %}" /> {% trans "Yes" %}
</button>
{% if previous %}
<a href="#header" onclick='{% if previous %}window.location.replace("{{ previous }}");{% else %}history.go(-1);{% endif %}' class="button">
<img src="{{ MEDIA_URL }}web_theme_media/images/icons/cross.png" alt="{% trans 'No' %}"/> {% trans "No" %}
</a>
{% endif %}
</div>
</form>
</div>

View File

@@ -3,3 +3,14 @@ import tempfile
from common.conf import settings as common_settings
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
#ugettext = lambda s: s
#TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees'))
TRANFORMATION_CHOICES = {
'rotate':'-rotate %(degrees)d'
}
#getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [
# TRANFORMATION_ROTATE,
# ])

View File

@@ -1,17 +1,8 @@
from django.conf import settings
ugettext = lambda s: s
CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert')
IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify')
OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200')
DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'')
LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'')
HIGH_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_HIGH_QUALITY_OPTIONS', u'-density 400')
TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees'))
TRANFORMATION_CHOICES = getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [
TRANFORMATION_ROTATE,
])

View File

@@ -5,6 +5,7 @@ import tempfile
from django.conf import settings
from django.contrib.auth.models import User
from django.utils.translation import ugettext_lazy as _
from converter.api import get_page_count
@@ -18,6 +19,11 @@ default_available_models = {
'User':User
}
available_transformations = {
'rotate': {'label':_(u'Rotate [degrees]'), 'arguments':[{'name':'degrees'}]}
}
# Definition
AVAILABLE_FUNCTIONS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_FUNCTIONS', default_available_functions)
AVAILABLE_MODELS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_MODELS', default_available_models)
@@ -44,6 +50,10 @@ MULTIPAGE_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_MULTIPAGE_PREVIEW_SIZE', '
THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50')
DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200')
# Transformations
AVAILABLE_TRANSFORMATIONS = getattr(settings, 'DOCUMENTS_AVAILABLE_TRANSFORMATIONS', available_transformations)
DEFAULT_TRANSFORMATIONS = getattr(settings, 'DOCUMENTS_DEFAULT_TRANSFORMATIONS', [])
#Groups
GROUP_MAX_RESULTS = getattr(settings, 'DOCUMENTS_GROUP_MAX_RESULTS', 20)
GROUP_SHOW_EMPTY = getattr(settings, 'DOCUMENTS_GROUP_SHOW_EMPTY', True)

View File

@@ -14,8 +14,6 @@ from django.db.models import Q
from dynamic_search.api import register
from converter.conf.settings import TRANFORMATION_CHOICES
from documents.conf.settings import AVAILABLE_FUNCTIONS
from documents.conf.settings import AVAILABLE_MODELS
from documents.conf.settings import CHECKSUM_FUNCTION
@@ -27,7 +25,7 @@ from documents.conf.settings import FILESYSTEM_FILESERVING_ENABLE
from documents.conf.settings import FILESYSTEM_FILESERVING_PATH
from documents.conf.settings import FILESYSTEM_SLUGIFY_PATHS
from documents.conf.settings import FILESYSTEM_MAX_RENAME_COUNT
from documents.conf.settings import AVAILABLE_TRANSFORMATIONS
if FILESYSTEM_SLUGIFY_PATHS == False:
#Do not slugify path or filenames and extensions
@@ -447,21 +445,18 @@ class MetadataGroupItem(models.Model):
verbose_name = _(u'metadata group item')
verbose_name_plural = _(u'metadata group items')
available_transformations = ([(name, data['label']) for name, data in AVAILABLE_TRANSFORMATIONS.items()]) if AVAILABLE_MODELS else []
class DocumentPageTransformation(models.Model):
document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order'))
transformation = models.CharField(choices=TRANFORMATION_CHOICES, max_length=128, verbose_name=_(u'transformation'))
transformation = models.CharField(choices=available_transformations, max_length=128, verbose_name=_(u'transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}'))
def __unicode__(self):
return self.get_transformation_display()
def get_transformation(self):
try:
return self.transformation % eval(self.arguments)
except Exception, e:
raise Exception(e)
return '%s - %s' % (self.document_page, self.get_transformation_display())
class Meta:
ordering = ('order',)

View File

@@ -10,16 +10,19 @@ from django.core.files.base import File
from django.conf import settings
from django.utils.http import urlencode
from django.template.defaultfilters import slugify
from django.core.exceptions import ObjectDoesNotExist
from common.utils import pretty_size
from permissions.api import check_permissions, Unauthorized
from filetransfers.api import serve_file
from converter.api import convert, in_image_cache, QUALITY_DEFAULT
from common.utils import pretty_size
from converter import TRANFORMATION_CHOICES
from utils import from_descriptor_to_tempfile
from models import Document, DocumentMetadata, DocumentType, MetadataType, \
DocumentPage
DocumentPage, DocumentPageTransformation
from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
@@ -35,6 +38,8 @@ from documents.conf.settings import PREVIEW_SIZE
from documents.conf.settings import THUMBNAIL_SIZE
from documents.conf.settings import GROUP_MAX_RESULTS
from documents.conf.settings import GROUP_SHOW_EMPTY
from documents.conf.settings import DEFAULT_TRANSFORMATIONS
from documents import PERMISSION_DOCUMENT_CREATE, \
PERMISSION_DOCUMENT_CREATE, PERMISSION_DOCUMENT_PROPERTIES_EDIT, \
@@ -124,6 +129,20 @@ def upload_document_with_type(request, document_type_id, multiple=True):
instance.update_checksum()
instance.update_mimetype()
instance.update_page_count()
if DEFAULT_TRANSFORMATIONS:
for transformation in DEFAULT_TRANSFORMATIONS:
if 'name' in transformation:
for document_page in instance.documentpage_set.all():
page_transformation = DocumentPageTransformation(
document_page=document_page,
order=0,
transformation=transformation['name'])
if 'arguments' in transformation:
page_transformation.arguments = transformation['arguments']
page_transformation.save()
if 'document_type_available_filenames' in local_form.cleaned_data:
if local_form.cleaned_data['document_type_available_filenames']:
@@ -445,17 +464,20 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
page = int(request.GET.get('page', 1))
transformation_list = []
try:
#Catch invalid or non existing pages
document_page = DocumentPage.objects.get(document=document, page_number=page)
for tranformation in document_page.documentpagetransformation_set.all():
for page_transformation in document_page.documentpagetransformation_set.all():
try:
transformation_list.append(tranformation.get_transformation())
if page_transformation.transformation in TRANFORMATION_CHOICES:
output = TRANFORMATION_CHOICES[page_transformation.transformation] % eval(page_transformation.arguments)
transformation_list.append(output)
except Exception, e:
if request.user.is_staff:
messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e)))
messages.warning(request, _(u'Error for transformation %s:, %s' % (page_transformation.get_transformation_display(), e)))
else:
pass
except:
except ObjectDoesNotExist:
pass
tranformation_string = ' '.join(transformation_list)

View File

@@ -37,10 +37,8 @@ def run_tesseract(input_filename, output_filename_base, lang=None):
def ocr_document(document):
total_pages = 1
page = 0
while page < total_pages:
imagefile = convert_document_for_ocr(document, page=page)
for page_index, document_page in enumerate(document.documentpage_set.all()):
imagefile = convert_document_for_ocr(document, page=page_index)
desc, filepath = tempfile.mkstemp()
try:
status, error_string = run_tesseract(imagefile, filepath)
@@ -52,7 +50,7 @@ def ocr_document(document):
f = file(ocr_output)
try:
document_page, created = DocumentPage.objects.get_or_create(document=document,
page_number=page)
page_number=page_index+1)
document_page.content = f.read().strip()
document_page.page_label = _(u'Text from OCR')
document_page.save()
@@ -61,6 +59,3 @@ def ocr_document(document):
cleanup(filepath)
cleanup(ocr_output)
cleanup(imagefile)
page += 1

View File

@@ -182,6 +182,7 @@ LOGIN_EXEMPT_URLS = (
# Saving
#DOCUMENTS_CHECKSUM_FUNCTION = lambda x: hashlib.sha256(x).hexdigest())
#DOCUMENTS_UUID_FUNCTION = lambda:unicode(uuid.uuid4())
#DOCUMENTS_DEFAULT_TRANSFORMATIONS = []
# Storage
#DOCUMENTS_STORAGE_DIRECTORY_NAME = 'documents'
@@ -192,6 +193,8 @@ LOGIN_EXEMPT_URLS = (
#DOCUMENTS_THUMBNAIL_SIZE = '50x50'
#DOCUMENTS_DISPLAY_SIZE = '1200'
#DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120'
#DOCUMENTS_AVAILABLE_TRANSFORMATIONS = {}
#example: DOCUMENTS_DEFAULT_TRANSFORMATIONS = [{'name':'rotate', 'arguments':"{'degrees':270}"}]
# Groups
#DOCUMENTS_GROUP_MAX_RESULTS = 20