Decoupled page transformation interface, added default transformation support

This commit is contained in:
Roberto Rosario
2011-02-14 02:11:39 -04:00
parent 06d7e5a46a
commit fbc8bc960a
9 changed files with 65 additions and 34 deletions

View File

@@ -21,6 +21,8 @@ Features
* Document OCR and searching * Document OCR and searching
* Group documents by metadata automatically * Group documents by metadata automatically
* Permissions and roles support * Permissions and roles support
* Multi page document support
* Page transformations
Requirements Requirements
--- ---

View File

@@ -39,9 +39,11 @@
<button class="button" type="submit"> <button class="button" type="submit">
<img src="{{ MEDIA_URL }}web_theme_media/images/icons/tick.png" alt="{% trans 'Yes' %}" /> {% trans "Yes" %} <img src="{{ MEDIA_URL }}web_theme_media/images/icons/tick.png" alt="{% trans 'Yes' %}" /> {% trans "Yes" %}
</button> </button>
{% if previous %}
<a href="#header" onclick='{% if previous %}window.location.replace("{{ previous }}");{% else %}history.go(-1);{% endif %}' class="button"> <a href="#header" onclick='{% if previous %}window.location.replace("{{ previous }}");{% else %}history.go(-1);{% endif %}' class="button">
<img src="{{ MEDIA_URL }}web_theme_media/images/icons/cross.png" alt="{% trans 'No' %}"/> {% trans "No" %} <img src="{{ MEDIA_URL }}web_theme_media/images/icons/cross.png" alt="{% trans 'No' %}"/> {% trans "No" %}
</a> </a>
{% endif %}
</div> </div>
</form> </form>
</div> </div>

View File

@@ -3,3 +3,14 @@ import tempfile
from common.conf import settings as common_settings from common.conf import settings as common_settings
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp() TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
#ugettext = lambda s: s
#TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees'))
TRANFORMATION_CHOICES = {
'rotate':'-rotate %(degrees)d'
}
#getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [
# TRANFORMATION_ROTATE,
# ])

View File

@@ -1,17 +1,8 @@
from django.conf import settings from django.conf import settings
ugettext = lambda s: s
CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert') CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert')
IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify') IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify')
OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200') OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200')
DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'') DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'')
LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'') LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'')
HIGH_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_HIGH_QUALITY_OPTIONS', u'-density 400') HIGH_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_HIGH_QUALITY_OPTIONS', u'-density 400')
TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees'))
TRANFORMATION_CHOICES = getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [
TRANFORMATION_ROTATE,
])

View File

@@ -5,6 +5,7 @@ import tempfile
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.utils.translation import ugettext_lazy as _
from converter.api import get_page_count from converter.api import get_page_count
@@ -18,6 +19,11 @@ default_available_models = {
'User':User 'User':User
} }
available_transformations = {
'rotate': {'label':_(u'Rotate [degrees]'), 'arguments':[{'name':'degrees'}]}
}
# Definition # Definition
AVAILABLE_FUNCTIONS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_FUNCTIONS', default_available_functions) AVAILABLE_FUNCTIONS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_FUNCTIONS', default_available_functions)
AVAILABLE_MODELS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_MODELS', default_available_models) AVAILABLE_MODELS = getattr(settings, 'DOCUMENTS_METADATA_AVAILABLE_MODELS', default_available_models)
@@ -44,6 +50,10 @@ MULTIPAGE_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_MULTIPAGE_PREVIEW_SIZE', '
THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50') THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50')
DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200') DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200')
# Transformations
AVAILABLE_TRANSFORMATIONS = getattr(settings, 'DOCUMENTS_AVAILABLE_TRANSFORMATIONS', available_transformations)
DEFAULT_TRANSFORMATIONS = getattr(settings, 'DOCUMENTS_DEFAULT_TRANSFORMATIONS', [])
#Groups #Groups
GROUP_MAX_RESULTS = getattr(settings, 'DOCUMENTS_GROUP_MAX_RESULTS', 20) GROUP_MAX_RESULTS = getattr(settings, 'DOCUMENTS_GROUP_MAX_RESULTS', 20)
GROUP_SHOW_EMPTY = getattr(settings, 'DOCUMENTS_GROUP_SHOW_EMPTY', True) GROUP_SHOW_EMPTY = getattr(settings, 'DOCUMENTS_GROUP_SHOW_EMPTY', True)

View File

@@ -14,8 +14,6 @@ from django.db.models import Q
from dynamic_search.api import register from dynamic_search.api import register
from converter.conf.settings import TRANFORMATION_CHOICES
from documents.conf.settings import AVAILABLE_FUNCTIONS from documents.conf.settings import AVAILABLE_FUNCTIONS
from documents.conf.settings import AVAILABLE_MODELS from documents.conf.settings import AVAILABLE_MODELS
from documents.conf.settings import CHECKSUM_FUNCTION from documents.conf.settings import CHECKSUM_FUNCTION
@@ -27,7 +25,7 @@ from documents.conf.settings import FILESYSTEM_FILESERVING_ENABLE
from documents.conf.settings import FILESYSTEM_FILESERVING_PATH from documents.conf.settings import FILESYSTEM_FILESERVING_PATH
from documents.conf.settings import FILESYSTEM_SLUGIFY_PATHS from documents.conf.settings import FILESYSTEM_SLUGIFY_PATHS
from documents.conf.settings import FILESYSTEM_MAX_RENAME_COUNT from documents.conf.settings import FILESYSTEM_MAX_RENAME_COUNT
from documents.conf.settings import AVAILABLE_TRANSFORMATIONS
if FILESYSTEM_SLUGIFY_PATHS == False: if FILESYSTEM_SLUGIFY_PATHS == False:
#Do not slugify path or filenames and extensions #Do not slugify path or filenames and extensions
@@ -448,20 +446,17 @@ class MetadataGroupItem(models.Model):
verbose_name_plural = _(u'metadata group items') verbose_name_plural = _(u'metadata group items')
available_transformations = ([(name, data['label']) for name, data in AVAILABLE_TRANSFORMATIONS.items()]) if AVAILABLE_MODELS else []
class DocumentPageTransformation(models.Model): class DocumentPageTransformation(models.Model):
document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page')) document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order')) order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order'))
transformation = models.CharField(choices=TRANFORMATION_CHOICES, max_length=128, verbose_name=_(u'transformation')) transformation = models.CharField(choices=available_transformations, max_length=128, verbose_name=_(u'transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}')) arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}'))
def __unicode__(self): def __unicode__(self):
return self.get_transformation_display() return '%s - %s' % (self.document_page, self.get_transformation_display())
def get_transformation(self):
try:
return self.transformation % eval(self.arguments)
except Exception, e:
raise Exception(e)
class Meta: class Meta:
ordering = ('order',) ordering = ('order',)

View File

@@ -10,16 +10,19 @@ from django.core.files.base import File
from django.conf import settings from django.conf import settings
from django.utils.http import urlencode from django.utils.http import urlencode
from django.template.defaultfilters import slugify from django.template.defaultfilters import slugify
from django.core.exceptions import ObjectDoesNotExist
from common.utils import pretty_size
from permissions.api import check_permissions, Unauthorized from permissions.api import check_permissions, Unauthorized
from filetransfers.api import serve_file from filetransfers.api import serve_file
from converter.api import convert, in_image_cache, QUALITY_DEFAULT from converter.api import convert, in_image_cache, QUALITY_DEFAULT
from common.utils import pretty_size from converter import TRANFORMATION_CHOICES
from utils import from_descriptor_to_tempfile from utils import from_descriptor_to_tempfile
from models import Document, DocumentMetadata, DocumentType, MetadataType, \ from models import Document, DocumentMetadata, DocumentType, MetadataType, \
DocumentPage DocumentPage, DocumentPageTransformation
from forms import DocumentTypeSelectForm, DocumentCreateWizard, \ from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \ MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \ StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
@@ -35,6 +38,8 @@ from documents.conf.settings import PREVIEW_SIZE
from documents.conf.settings import THUMBNAIL_SIZE from documents.conf.settings import THUMBNAIL_SIZE
from documents.conf.settings import GROUP_MAX_RESULTS from documents.conf.settings import GROUP_MAX_RESULTS
from documents.conf.settings import GROUP_SHOW_EMPTY from documents.conf.settings import GROUP_SHOW_EMPTY
from documents.conf.settings import DEFAULT_TRANSFORMATIONS
from documents import PERMISSION_DOCUMENT_CREATE, \ from documents import PERMISSION_DOCUMENT_CREATE, \
PERMISSION_DOCUMENT_CREATE, PERMISSION_DOCUMENT_PROPERTIES_EDIT, \ PERMISSION_DOCUMENT_CREATE, PERMISSION_DOCUMENT_PROPERTIES_EDIT, \
@@ -124,6 +129,20 @@ def upload_document_with_type(request, document_type_id, multiple=True):
instance.update_checksum() instance.update_checksum()
instance.update_mimetype() instance.update_mimetype()
instance.update_page_count() instance.update_page_count()
if DEFAULT_TRANSFORMATIONS:
for transformation in DEFAULT_TRANSFORMATIONS:
if 'name' in transformation:
for document_page in instance.documentpage_set.all():
page_transformation = DocumentPageTransformation(
document_page=document_page,
order=0,
transformation=transformation['name'])
if 'arguments' in transformation:
page_transformation.arguments = transformation['arguments']
page_transformation.save()
if 'document_type_available_filenames' in local_form.cleaned_data: if 'document_type_available_filenames' in local_form.cleaned_data:
if local_form.cleaned_data['document_type_available_filenames']: if local_form.cleaned_data['document_type_available_filenames']:
@@ -445,17 +464,20 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
page = int(request.GET.get('page', 1)) page = int(request.GET.get('page', 1))
transformation_list = [] transformation_list = []
try: try:
#Catch invalid or non existing pages
document_page = DocumentPage.objects.get(document=document, page_number=page) document_page = DocumentPage.objects.get(document=document, page_number=page)
for tranformation in document_page.documentpagetransformation_set.all(): for page_transformation in document_page.documentpagetransformation_set.all():
try: try:
transformation_list.append(tranformation.get_transformation()) if page_transformation.transformation in TRANFORMATION_CHOICES:
output = TRANFORMATION_CHOICES[page_transformation.transformation] % eval(page_transformation.arguments)
transformation_list.append(output)
except Exception, e: except Exception, e:
if request.user.is_staff: if request.user.is_staff:
messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e))) messages.warning(request, _(u'Error for transformation %s:, %s' % (page_transformation.get_transformation_display(), e)))
else: else:
pass pass
except: except ObjectDoesNotExist:
pass pass
tranformation_string = ' '.join(transformation_list) tranformation_string = ' '.join(transformation_list)

View File

@@ -37,10 +37,8 @@ def run_tesseract(input_filename, output_filename_base, lang=None):
def ocr_document(document): def ocr_document(document):
total_pages = 1 for page_index, document_page in enumerate(document.documentpage_set.all()):
page = 0 imagefile = convert_document_for_ocr(document, page=page_index)
while page < total_pages:
imagefile = convert_document_for_ocr(document, page=page)
desc, filepath = tempfile.mkstemp() desc, filepath = tempfile.mkstemp()
try: try:
status, error_string = run_tesseract(imagefile, filepath) status, error_string = run_tesseract(imagefile, filepath)
@@ -52,7 +50,7 @@ def ocr_document(document):
f = file(ocr_output) f = file(ocr_output)
try: try:
document_page, created = DocumentPage.objects.get_or_create(document=document, document_page, created = DocumentPage.objects.get_or_create(document=document,
page_number=page) page_number=page_index+1)
document_page.content = f.read().strip() document_page.content = f.read().strip()
document_page.page_label = _(u'Text from OCR') document_page.page_label = _(u'Text from OCR')
document_page.save() document_page.save()
@@ -61,6 +59,3 @@ def ocr_document(document):
cleanup(filepath) cleanup(filepath)
cleanup(ocr_output) cleanup(ocr_output)
cleanup(imagefile) cleanup(imagefile)
page += 1

View File

@@ -182,6 +182,7 @@ LOGIN_EXEMPT_URLS = (
# Saving # Saving
#DOCUMENTS_CHECKSUM_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()) #DOCUMENTS_CHECKSUM_FUNCTION = lambda x: hashlib.sha256(x).hexdigest())
#DOCUMENTS_UUID_FUNCTION = lambda:unicode(uuid.uuid4()) #DOCUMENTS_UUID_FUNCTION = lambda:unicode(uuid.uuid4())
#DOCUMENTS_DEFAULT_TRANSFORMATIONS = []
# Storage # Storage
#DOCUMENTS_STORAGE_DIRECTORY_NAME = 'documents' #DOCUMENTS_STORAGE_DIRECTORY_NAME = 'documents'
@@ -192,6 +193,8 @@ LOGIN_EXEMPT_URLS = (
#DOCUMENTS_THUMBNAIL_SIZE = '50x50' #DOCUMENTS_THUMBNAIL_SIZE = '50x50'
#DOCUMENTS_DISPLAY_SIZE = '1200' #DOCUMENTS_DISPLAY_SIZE = '1200'
#DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120' #DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120'
#DOCUMENTS_AVAILABLE_TRANSFORMATIONS = {}
#example: DOCUMENTS_DEFAULT_TRANSFORMATIONS = [{'name':'rotate', 'arguments':"{'degrees':270}"}]
# Groups # Groups
#DOCUMENTS_GROUP_MAX_RESULTS = 20 #DOCUMENTS_GROUP_MAX_RESULTS = 20