Added multipage document support and document page transformation

This commit is contained in:
Roberto Rosario
2011-02-14 00:18:16 -04:00
parent 65d1e5b176
commit 06d7e5a46a
21 changed files with 219 additions and 73 deletions

View File

View File

@@ -0,0 +1,3 @@
from django.conf import settings
TEMPORARY_DIRECTORY = getattr(settings, 'COMMON_TEMPORARY_DIRECTORY', u'/tmp')

View File

@@ -1,5 +1,5 @@
import tempfile
from documents.conf import settings as documents_settings
from common.conf import settings as common_settings
TEMPORARY_DIRECTORY = documents_settings.TEMPORARY_DIRECTORY if documents_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()

View File

@@ -6,10 +6,8 @@ import shutil
from django.template.defaultfilters import slugify
from documents.utils import from_descriptor_to_tempfile
from converter.conf.settings import CONVERT_PATH
from converter.conf.settings import IDENTIFY_PATH
from converter.conf.settings import OCR_OPTIONS
from converter.conf.settings import DEFAULT_OPTIONS
from converter.conf.settings import LOW_QUALITY_OPTIONS
@@ -18,6 +16,7 @@ from converter.conf.settings import HIGH_QUALITY_OPTIONS
#from converter.conf.settings import UNOCONV_PATH
from converter import TEMPORARY_DIRECTORY
from utils import from_descriptor_to_tempfile
QUALITY_DEFAULT = 'quality_default'
@@ -73,6 +72,16 @@ def execute_unoconv(input_filepath, output_filepath, arguments=''):
return (proc.wait(), proc.stderr.read())
def execute_identify(input_filepath, arguments):
command = []
command.append(IDENTIFY_PATH)
command.extend(shlex.split(str(arguments)))
command.append(input_filepath)
proc = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return (proc.wait(), proc.stderr.read(), proc.stdout.read())
def cache_cleanup(input_filepath, size, page=0, format='jpg'):
filepath = create_image_cache_filename(input_filepath, size, page, format)
try:
@@ -126,7 +135,6 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
try:
input_arg = '%s[%s]' % (input_filepath, page)
extra_options += ' -resize %s' % size
print 'extra_options', extra_options
status, error_string = execute_convert(input_arg, extra_options, '%s:%s' % (format, output_filepath), quality=quality)
if status:
errors = get_errors(error_string)
@@ -138,6 +146,15 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
return output_filepath
def get_page_count(input_filepath):
try:
status, error_string, output = execute_identify(input_filepath, '-format %n')
if status:
errors = get_errors(error_string)
raise ConvertError(status, errors)
finally:
return int(output)
#TODO: slugify OCR_OPTIONS and add to file name to cache
def convert_document_for_ocr(document, page=0, format='tif'):
#Extract document file

View File

@@ -5,6 +5,7 @@ ugettext = lambda s: s
CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert')
IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify')
OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200')
DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'')
LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'')

59
apps/converter/utils.py Normal file
View File

@@ -0,0 +1,59 @@
import os
import tempfile
from converter import TEMPORARY_DIRECTORY
#http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python
def copyfile(source, dest, buffer_size=1024*1024):
"""
Copy a file from source to dest. source and dest
can either be strings or any object with a read or
write method, like StringIO for example.
"""
if not hasattr(source, 'read'):
source = open(source, 'rb')
if not hasattr(dest, 'write'):
dest = open(dest, 'wb')
while 1:
copy_buffer = source.read(buffer_size)
if copy_buffer:
dest.write(copy_buffer)
else:
break
source.close()
dest.close()
def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*1024):
path = os.path.join(TEMPORARY_DIRECTORY, filename)
output_descriptor = open(path, 'wb')
while 1:
copy_buffer = input_descriptor.read(buffer_size)
if copy_buffer:
output_descriptor.write(copy_buffer)
else:
break
input_descriptor.close()
output_descriptor.close()
return path
def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
output_descriptor, tmp_filename = tempfile.mkstemp()
while 1:
copy_buffer = input_descriptor.read(buffer_size)
if copy_buffer:
#output_descriptor.write(copy_buffer)
os.write(output_descriptor, copy_buffer)
else:
break
input_descriptor.close()
os.close(output_descriptor)
return tmp_filename

View File

@@ -9,10 +9,10 @@ from common.utils import pretty_size
from permissions.api import register_permissions
from models import Document, DocumentTransformation
from models import Document, DocumentPage, DocumentPageTransformation
from staging import StagingFile
from documents.conf import settings as documents_settings
from common.conf import settings as common_settings
PERMISSION_DOCUMENT_CREATE = 'document_create'
PERMISSION_DOCUMENT_PROPERTIES_EDIT = 'document_properties_edit'
@@ -43,18 +43,18 @@ document_edit_metadata = {'text':_('edit metadata'), 'view':'document_edit_metad
document_preview = {'text':_('preview'), 'class':'fancybox', 'view':'document_preview', 'args':'object.id', 'famfam':'magnifier', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_VIEW]}}
document_download = {'text':_('download'), 'view':'document_download', 'args':'object.id', 'famfam':'page_save', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_DOWNLOAD]}}
document_transformation_list = {'text':_(u'transformations'), 'view':'document_transformation_list', 'args':'object.id', 'famfam':'page_paintbrush', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
document_transformation_delete = {'text':_('delete'), 'view':'document_transformation_delete', 'args':'object.id', 'famfam':'delete'}#, 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
#document_transformation_list = {'text':_(u'transformations'), 'view':'document_transformation_list', 'args':'object.id', 'famfam':'page_paintbrush', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
#document_transformation_delete = {'text':_('delete'), 'view':'document_transformation_delete', 'args':'object.id', 'famfam':'delete'}#, 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
staging_file_preview = {'text':_('preview'), 'class':'fancybox', 'view':'staging_file_preview', 'args':'object.id', 'famfam':'drive_magnify'}
staging_file_delete = {'text':_('delete'), 'view':'staging_file_delete', 'args':'object.id', 'famfam':'drive_delete'}
register_links(Document, [document_view, document_edit, document_edit_metadata, document_delete, document_download, document_transformation_list], menu_name='sidebar')
register_links(Document, [document_view, document_edit, document_edit_metadata, document_delete, document_download], menu_name='sidebar')
register_links(Document, [document_list, document_create, document_create_multiple, document_create_sibling], menu_name='sidebar')
register_links(['document_list', 'document_create', 'document_create_multiple', 'upload_document_with_type', 'upload_multiple_documents_with_type'], [document_list, document_create, document_create_multiple], menu_name='sidebar')
register_links(DocumentTransformation, [document_transformation_delete])
#register_links(DocumentTransformation, [document_transformation_delete])
@@ -76,4 +76,4 @@ register_menu([
document_list
],'famfam':'page','position':4}])
TEMPORARY_DIRECTORY = documents_settings.TEMPORARY_DIRECTORY if documents_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()

View File

@@ -3,7 +3,7 @@ from django.contrib import admin
from models import MetadataType, DocumentType, Document, \
DocumentTypeMetadataType, DocumentMetadata, DocumentTypeFilename, \
MetadataIndex, DocumentMetadataIndex, DocumentPage, MetadataGroup, \
MetadataGroupItem, DocumentTransformation
MetadataGroupItem, DocumentPageTransformation
class MetadataTypeAdmin(admin.ModelAdmin):
@@ -48,7 +48,11 @@ class DocumentMetadataIndexInline(admin.StackedInline):
extra = 1
classes = ('collapse-open',)
allow_add = True
readonly_fields = ('metadata_index', 'filename')
readonly_fields = ('suffix', 'metadata_index', 'filename')
class DocumentPageTransformationAdmin(admin.ModelAdmin):
model = DocumentPageTransformation
class DocumentPageInline(admin.StackedInline):
@@ -58,16 +62,9 @@ class DocumentPageInline(admin.StackedInline):
allow_add = True
class DocumentTransformationline(admin.StackedInline):
model = DocumentTransformation
extra = 1
classes = ('collapse-open',)
allow_add = True
class DocumentAdmin(admin.ModelAdmin):
inlines = [DocumentMetadataInline, DocumentMetadataIndexInline,
DocumentTransformationline, DocumentPageInline]
DocumentPageInline]
list_display = ('uuid', 'file_filename', 'file_extension')
@@ -87,4 +84,5 @@ admin.site.register(MetadataType, MetadataTypeAdmin)
admin.site.register(DocumentType, DocumentTypeAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(MetadataGroup, MetadataGroupAdmin)
admin.site.register(DocumentPageTransformation, DocumentPageTransformationAdmin)

View File

@@ -1,10 +1,13 @@
import datetime
import hashlib
import uuid
import tempfile
from django.conf import settings
from django.contrib.auth.models import User
from converter.api import get_page_count
from documents.storage import DocumentStorage
default_available_functions = {
@@ -29,6 +32,7 @@ DELETE_LOCAL_ORIGINAL = getattr(settings, 'DOCUMENTS_DELETE_LOCAL_ORIGINAL', Fal
# Saving
CHECKSUM_FUNCTION = getattr(settings, 'DOCUMENTS_CHECKSUM_FUNCTION', lambda x: hashlib.sha256(x).hexdigest())
UUID_FUNCTION = getattr(settings, 'DOCUMENTS_UUID_FUNCTION', lambda:unicode(uuid.uuid4()))
PAGE_COUNT_FUNCTION = getattr(settings, 'DOCUMENTS_PAGE_COUNT_FUNCTION', lambda x: get_page_count(x.save_to_file(tempfile.mkstemp()[1])))
# Storage
STORAGE_BACKEND = getattr(settings, 'DOCUMENTS_STORAGE_BACKEND', DocumentStorage)
@@ -36,6 +40,7 @@ STORAGE_DIRECTORY_NAME = getattr(settings, 'DOCUMENTS_STORAGE_DIRECTORY_NAME', '
# Usage
PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_PREVIEW_SIZE', '640x480')
MULTIPAGE_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_MULTIPAGE_PREVIEW_SIZE', '160x120')
THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50')
DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200')
@@ -48,8 +53,3 @@ FILESYSTEM_FILESERVING_ENABLE = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERV
FILESYSTEM_FILESERVING_PATH = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', u'/tmp/mayan/documents')
FILESYSTEM_SLUGIFY_PATHS = getattr(settings, 'DOCUMENTS_SLUGIFY_PATHS', False)
FILESYSTEM_MAX_RENAME_COUNT = getattr(settings, 'DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT', 200)
#misc
TEMPORARY_DIRECTORY = getattr(settings, 'DOCUMENTS_TEMPORARY_DIRECTORY', u'/tmp')

View File

@@ -24,8 +24,22 @@ from documents.conf.settings import AVAILABLE_MODELS
class ImageWidget(forms.widgets.Widget):
def render(self, name, value, attrs=None):
output = []
output.append('<a class="fancybox-noscaling" href="%s"><img width="300" src="%s" /></a>' % (reverse('document_display', args=[value.id]),
reverse('document_preview', args=[value.id])))
page_count = value.documentpage_set.count()
if page_count > 1:
output.append('<br /><span class="famfam active famfam-page_white_copy"></span>%s<br />' % ugettext(u'Pages'))
for page_index in range(value.documentpage_set.count()):
output.append('<span>%(page)s)<a rel="gallery_1" class="fancybox-noscaling" href="%(url)s?page=%(page)s"><img src="%(img)s?page=%(page)s" /></a></span>' % {
'url':reverse('document_display', args=[value.id]),
'img':reverse('document_preview_multipage', args=[value.id]),
'page':page_index+1,
})
else:
output.append('<a class="fancybox-noscaling" href="%(url)s"><img width="300" src="%(img)s" /></a>' % {
'url':reverse('document_display', args=[value.id]),
'img':reverse('document_preview', args=[value.id]),
})
output.append('<br /><span class="famfam active famfam-magnifier"></span>%s' % ugettext(u'Click on the image for full size view'))
#output.append(super(ImageWidget, self).render(name, value, attrs))
return mark_safe(u''.join(output))
@@ -58,7 +72,7 @@ class DocumentPreviewForm(forms.Form):
super(DocumentPreviewForm, self).__init__(*args, **kwargs)
self.fields['preview'].initial = self.document
preview = forms.CharField(widget=ImageWidget)
preview = forms.CharField(widget=ImageWidget())
class DocumentForm_view(DetailForm):

View File

@@ -20,6 +20,7 @@ from documents.conf.settings import AVAILABLE_FUNCTIONS
from documents.conf.settings import AVAILABLE_MODELS
from documents.conf.settings import CHECKSUM_FUNCTION
from documents.conf.settings import UUID_FUNCTION
from documents.conf.settings import PAGE_COUNT_FUNCTION
from documents.conf.settings import STORAGE_BACKEND
from documents.conf.settings import STORAGE_DIRECTORY_NAME
from documents.conf.settings import FILESYSTEM_FILESERVING_ENABLE
@@ -74,12 +75,15 @@ class Document(models.Model):
verbose_name_plural = _(u'documents')
ordering = ['-date_added']
def __unicode__(self):
return '%s.%s' % (self.file_filename, self.file_extension)
def get_fullname(self):
return os.extsep.join([self.file_filename, self.file_extension])
def update_mimetype(self):
try:
mime = magic.Magic(mime=True)
@@ -95,25 +99,52 @@ class Document(models.Model):
def read(self, count=1024):
return self.file.storage.open(self.file.url).read(count)
@models.permalink
def get_absolute_url(self):
return ('document_view', [self.id])
def update_checksum(self, save=True):
if self.exists():
self.checksum = unicode(CHECKSUM_FUNCTION(self.file.read()))
if save:
self.save()
def update_page_count(self):
total_pages = PAGE_COUNT_FUNCTION(self)
for page_number in range(total_pages):
document_page, created = DocumentPage.objects.get_or_create(
document=self, page_number=page_number+1)
def save_to_file(self, filepath, buffer_size=1024*1024):
storage = self.file.storage.open(self.file.url)
output_descriptor = open(filepath, 'wb')
while 1:
copy_buffer = storage.read()
if copy_buffer:
output_descriptor.write(copy_buffer)
else:
break
#input_descriptor.close()
output_descriptor.close()
return filepath
def exists(self):
return self.file.storage.exists(self.file.url)
def delete(self, *args, **kwargs):
#TODO: Might not execute when done in bulk from a queryset
#topics/db/queries.html#topics-db-queries-delete
self.delete_fs_links()
super(Document, self).delete(*args, **kwargs)
def get_metadata_groups(self):
errors = []
metadata_groups = {}
@@ -144,6 +175,7 @@ class Document(models.Model):
metadata_groups[group] = Document.objects.filter(Q(id__in=document_id_list) & ~Q(id=self.id)) or []
return metadata_groups, errors
def create_fs_links(self):
if FILESYSTEM_FILESERVING_ENABLE:
if not self.exists():
@@ -171,6 +203,7 @@ class Document(models.Model):
#This should be a warning not an error
pass
def delete_fs_links(self):
if FILESYSTEM_FILESERVING_ENABLE:
for document_metadata_index in self.documentmetadataindex_set.all():
@@ -209,12 +242,14 @@ class Document(models.Model):
except OSError, exc:
pass
#Remove the directory if it is empty
try:
os.removedirs(path)
except:
pass
def next_available_filename(document, metadata_index, path, filename, extension, suffix=0):
target = filename
if suffix:
@@ -344,10 +379,10 @@ class DocumentPage(models.Model):
document = models.ForeignKey(Document, verbose_name=_(u'document'))
content = models.TextField(blank=True, null=True, verbose_name=_(u'content'))
page_label = models.CharField(max_length=32, blank=True, null=True, verbose_name=_(u'page label'))
page_number = models.PositiveIntegerField(default=0, verbose_name=_(u'page number'))
page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_(u'page number'))
def __unicode__(self):
return '%s - %s' % (self.page_number, self.page_label)
return '%s - %s - %s' % (self.document, self.page_number, self.page_label)
class Meta:
verbose_name = _(u'document page')
@@ -377,7 +412,7 @@ INCLUSION_CHOICES = (
(INCLUSION_OR, _(u'or')),
)
OPERATOR_CHOCIES = (
OPERATOR_CHOICES = (
('exact', _(u'is equal')),
('iexact', _(u'is equal (case insensitive)')),
('contains', _(u'contains')),
@@ -399,7 +434,7 @@ class MetadataGroupItem(models.Model):
metadata_group = models.ForeignKey(MetadataGroup, verbose_name=_(u'metadata group'))
inclusion = models.CharField(default=INCLUSION_AND, max_length=16, choices=INCLUSION_CHOICES, help_text=_(u'The inclusion is ignored for the first item.'))
metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type'), help_text=_(u'This represents the metadata of all other documents.'))
operator = models.CharField(max_length=16, choices=OPERATOR_CHOCIES)
operator = models.CharField(max_length=16, choices=OPERATOR_CHOICES)
expression = models.CharField(max_length=128,
verbose_name=_(u'expression'), help_text=_(u'This expression will be evaluated against the current seleted document. The document metadata is available as variables of the same name but with the "metadata_" prefix added their name.'))
negated = models.BooleanField(default=False, verbose_name=_(u'negated'), help_text=_(u'Inverts the logic of the operator.'))
@@ -413,8 +448,8 @@ class MetadataGroupItem(models.Model):
verbose_name_plural = _(u'metadata group items')
class DocumentTransformation(models.Model):
document = models.ForeignKey(Document, verbose_name=_(u'document'))
class DocumentPageTransformation(models.Model):
document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order'))
transformation = models.CharField(choices=TRANFORMATION_CHOICES, max_length=128, verbose_name=_(u'transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}'))
@@ -430,10 +465,8 @@ class DocumentTransformation(models.Model):
class Meta:
ordering = ('order',)
verbose_name = _(u'document transformation')
verbose_name_plural = _(u'document transformations')
verbose_name = _(u'document page transformation')
verbose_name_plural = _(u'document page transformations')
register(Document, _(u'document'), ['document_type__name', 'file_mimetype', 'file_filename', 'file_extension', 'documentmetadata__value', 'documentpage__content'])

View File

@@ -5,6 +5,7 @@ from django.views.generic.create_update import create_object, update_object
from documents.conf.settings import PREVIEW_SIZE
from documents.conf.settings import THUMBNAIL_SIZE
from documents.conf.settings import DISPLAY_SIZE
from documents.conf.settings import MULTIPAGE_PREVIEW_SIZE
from converter.api import QUALITY_HIGH
@@ -19,8 +20,9 @@ urlpatterns = patterns('documents.views',
url(r'^document/(?P<document_id>\d+)/delete/$', 'document_delete', (), 'document_delete'),
url(r'^document/(?P<document_id>\d+)/edit/$', 'document_edit', (), 'document_edit'),
url(r'^document/(?P<document_id>\d+)/edit/metadata/$', 'document_edit_metadata', (), 'document_edit_metadata'),
url(r'^document/(?P<document_id>\d+)/preview/$', 'get_document_image', {'size':PREVIEW_SIZE}, 'document_preview'),
url(r'^document/(?P<document_id>\d+)/thumbnail/$', 'get_document_image', {'size':THUMBNAIL_SIZE}, 'document_thumbnail'),
url(r'^document/(?P<document_id>\d+)/display/preview/$', 'get_document_image', {'size':PREVIEW_SIZE}, 'document_preview'),
url(r'^document/(?P<document_id>\d+)/display/preview/multipage/$', 'get_document_image', {'size':MULTIPAGE_PREVIEW_SIZE}, 'document_preview_multipage'),
url(r'^document/(?P<document_id>\d+)/display/thumbnail/$', 'get_document_image', {'size':THUMBNAIL_SIZE}, 'document_thumbnail'),
url(r'^document/(?P<document_id>\d+)/display/$', 'get_document_image', {'size':DISPLAY_SIZE,'quality':QUALITY_HIGH}, 'document_display'),
url(r'^document/(?P<document_id>\d+)/download/$', 'document_download', (), 'document_download'),
url(r'^document/(?P<document_id>\d+)/create/siblings/$', 'document_create_sibling', {'multiple':False}, 'document_create_sibling'),

View File

@@ -49,7 +49,6 @@ def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*102
return path
def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
output_descriptor, tmp_filename = tempfile.mkstemp()

View File

@@ -18,7 +18,8 @@ from common.utils import pretty_size
from utils import from_descriptor_to_tempfile
from models import Document, DocumentMetadata, DocumentType, MetadataType
from models import Document, DocumentMetadata, DocumentType, MetadataType, \
DocumentPage
from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
@@ -122,6 +123,8 @@ def upload_document_with_type(request, document_type_id, multiple=True):
instance = local_form.save()
instance.update_checksum()
instance.update_mimetype()
instance.update_page_count()
if 'document_type_available_filenames' in local_form.cleaned_data:
if local_form.cleaned_data['document_type_available_filenames']:
instance.file_filename = local_form.cleaned_data['document_type_available_filenames'].filename
@@ -154,6 +157,7 @@ def upload_document_with_type(request, document_type_id, multiple=True):
document.save()
document.update_checksum()
document.update_mimetype()
document.update_page_count()
except Exception, e:
messages.error(request, e)
else:
@@ -243,6 +247,7 @@ def document_view(request, document_id):
{'label':_(u'Time added'), 'field':lambda x: unicode(x.date_added.time()).split('.')[0]},
{'label':_(u'Checksum'), 'field':'checksum'},
{'label':_(u'UUID'), 'field':'uuid'},
{'label':_(u'Pages'), 'field':lambda x: x.documentpage_set.count()},
])
@@ -436,8 +441,13 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
raise Http404(e)
document = get_object_or_404(Document, pk=document_id)
page = int(request.GET.get('page', 1))
transformation_list = []
for tranformation in document.documenttransformation_set.all():
try:
document_page = DocumentPage.objects.get(document=document, page_number=page)
for tranformation in document_page.documentpagetransformation_set.all():
try:
transformation_list.append(tranformation.get_transformation())
except Exception, e:
@@ -445,18 +455,19 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e)))
else:
pass
except:
pass
tranformation_string = ' '.join(transformation_list)
try:
filepath = in_image_cache(document.checksum, size=size, quality=quality, extra_options=tranformation_string)
filepath = in_image_cache(document.checksum, size=size, quality=quality, extra_options=tranformation_string, page=page-1)
if filepath:
return serve_file(request, File(file=open(filepath, 'r')))
#Save to a temporary location
document.file.open()
desc = document.file.storage.open(document.file.path)
filepath = from_descriptor_to_tempfile(desc, document.checksum)
output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string)
output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string, page=page-1)
return serve_file(request, File(file=open(output_file, 'r')), content_type='image/jpeg')
except Exception, e:
if size == THUMBNAIL_SIZE:
@@ -523,6 +534,7 @@ def document_transformation_list(request, document_id):
document = get_object_or_404(Document, pk=document_id)
return object_list(
request,
queryset=document.documenttransformation_set.all(),
@@ -539,9 +551,9 @@ def document_transformation_delete(request, document_transformation_id):
except Unauthorized, e:
raise Http404(e)
document_transformation = get_object_or_404(DocumentTransformation, pk=document_transformation_id)
document_transformation = get_object_or_404(DocumentPageTransformation, pk=document_transformation_id)
return delete_object(request, model=DocumentTransformation, object_id=document_transformation_id,
return delete_object(request, model=DocumentPageTransformation, object_id=document_transformation_id,
template_name='generic_confirm.html',
post_delete_redirect=reverse('document_transformation_list'),
extra_context={

View File

@@ -5,6 +5,7 @@ from django.shortcuts import render_to_response
from django.template import RequestContext
from django.utils.translation import ugettext_lazy as _
from common.conf import settings as common_settings
from documents.conf import settings as documents_settings
from converter.conf import settings as converter_settings
from ocr.conf import settings as ocr_settings
@@ -34,7 +35,9 @@ def check_settings(request):
{'name':'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', 'value':documents_settings.FILESYSTEM_FILESERVING_PATH, 'exists':True},
{'name':'DOCUMENTS_SLUGIFY_PATHS', 'value':documents_settings.FILESYSTEM_SLUGIFY_PATHS},
{'name':'DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT', 'value':documents_settings.FILESYSTEM_MAX_RENAME_COUNT},
{'name':'DOCUMENTS_TEMPORARY_DIRECTORY', 'value':documents_settings.TEMPORARY_DIRECTORY, 'exists':True},
#Common
{'name':'COMMON_TEMPORARY_DIRECTORY', 'value':common_settings.TEMPORARY_DIRECTORY, 'exists':True},
#Converter
{'name':'CONVERTER_CONVERT_PATH', 'value':converter_settings.CONVERT_PATH, 'exists':True},

View File

@@ -5,13 +5,13 @@ from permissions.api import register_permissions
from documents.models import Document
OCR_DOCUMENT_OCR = 'document_ocr'
PERMISSION_OCR_DOCUMENT = 'ocr_document'
register_permissions('ocr', [
{'name':OCR_DOCUMENT_OCR, 'label':_(u'Submit document for OCR')},
{'name':PERMISSION_OCR_DOCUMENT, 'label':_(u'Submit document for OCR')},
])
submit_document = {'text':_('submit to OCR queue'), 'view':'submit_document', 'args':'object.id', 'famfam':'page_lightning', 'permissions':{'namespace':'ocr', 'permissions':[OCR_DOCUMENT_OCR]}}
submit_document = {'text':_('submit to OCR queue'), 'view':'submit_document', 'args':'object.id', 'famfam':'page_lightning', 'permissions':{'namespace':'ocr', 'permissions':[PERMISSION_OCR_DOCUMENT]}}
register_links(Document, [submit_document], menu_name='sidebar')

View File

@@ -8,7 +8,7 @@ import tempfile
from django.utils.translation import ugettext as _
from documents.models import DocumentPage
from documents.conf.settings import TEMPORARY_DIRECTORY
from common.conf.settings import TEMPORARY_DIRECTORY
from converter.api import convert_document_for_ocr
from ocr.conf.settings import TESSERACT_PATH

View File

@@ -11,11 +11,11 @@ from django.utils.translation import ugettext as _
from permissions.api import check_permissions, Unauthorized
from documents.models import Document
from ocr import OCR_DOCUMENT_OCR
from ocr import PERMISSION_OCR_DOCUMENT
from api import ocr_document
def submit_document(request, document_id):
permissions = [OCR_DOCUMENT_OCR]
permissions = [PERMISSION_OCR_DOCUMENT]
try:
check_permissions(request.user, 'ocr', permissions)
except Unauthorized, e:

View File

@@ -6,3 +6,6 @@
* Added the ability to group documents by their metadata
* New abstracted options to adjust document conversion quality (default, low, high)
* Added permissions and roles support
* Added multipage documents support (only tested on pdfs)
To update a previous database do: [d.update_page_count() for d in Document.objects.all()]
* Added support for document page transformation (no GUI yet)

View File

@@ -29,6 +29,8 @@
* Permissions - DONE
* Roles - DONE
* Assign default role to new users - DONE
* DB stored transformations - DONE
* Recognize multi-page documents - DONE
* Document list filtering by metadata
* Filterform date filtering widget
* Validate GET data before saving file
@@ -49,7 +51,6 @@
* Scheduled maintenance (cleanup, deferred OCR's)
* Add tags to documents
* Field for document language or autodetect
* Recognize multi-page documents
* Count pages in a PDF file http://pybrary.net/pyPdf/
* Download a document in diffent formats: (jpg, png, pdf)
* Cache.cleanup function to delete cached images when document hash changes
@@ -67,6 +68,5 @@
* Download metadata group documents as a single zip file
* Download original document or transformed document
* Include annotations in transformed documents downloads
* DB stored transformations
* Document view temp transformations
* Implement permissions decorators

View File

@@ -191,6 +191,7 @@ LOGIN_EXEMPT_URLS = (
#DOCUMENTS_PREVIEW_SIZE = '640x480'
#DOCUMENTS_THUMBNAIL_SIZE = '50x50'
#DOCUMENTS_DISPLAY_SIZE = '1200'
#DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120'
# Groups
#DOCUMENTS_GROUP_MAX_RESULTS = 20
@@ -203,7 +204,7 @@ LOGIN_EXEMPT_URLS = (
#DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT = 200
# Misc
#DOCUMENTS_TEMPORARY_DIRECTORY = u'/tmp'
#COMMON_TEMPORARY_DIRECTORY = u'/tmp'
# Converter
#CONVERTER_DEFAULT_OPTIONS = u''
@@ -211,6 +212,7 @@ LOGIN_EXEMPT_URLS = (
#CONVERTER_HIGH_QUALITY_OPTIONS = u'-density 400'
#CONVERTER_CONVERT_PATH = u'/usr/bin/convert'
#CONVERTER_OCR_OPTIONS = u'-colorspace Gray -depth 8 -resample 200x200'
#CONVERTER_IDENTIFY_PATH = u'/usr/bin/identify'
# OCR
#OCR_TESSERACT_PATH = u'/usr/bin/tesseract'