Added multipage document support and document page transformation
This commit is contained in:
0
apps/common/conf/__init__.py
Normal file
0
apps/common/conf/__init__.py
Normal file
3
apps/common/conf/settings.py
Normal file
3
apps/common/conf/settings.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from django.conf import settings
|
||||
|
||||
TEMPORARY_DIRECTORY = getattr(settings, 'COMMON_TEMPORARY_DIRECTORY', u'/tmp')
|
||||
@@ -1,5 +1,5 @@
|
||||
import tempfile
|
||||
|
||||
from documents.conf import settings as documents_settings
|
||||
from common.conf import settings as common_settings
|
||||
|
||||
TEMPORARY_DIRECTORY = documents_settings.TEMPORARY_DIRECTORY if documents_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
|
||||
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
|
||||
|
||||
@@ -6,10 +6,8 @@ import shutil
|
||||
|
||||
from django.template.defaultfilters import slugify
|
||||
|
||||
|
||||
from documents.utils import from_descriptor_to_tempfile
|
||||
|
||||
from converter.conf.settings import CONVERT_PATH
|
||||
from converter.conf.settings import IDENTIFY_PATH
|
||||
from converter.conf.settings import OCR_OPTIONS
|
||||
from converter.conf.settings import DEFAULT_OPTIONS
|
||||
from converter.conf.settings import LOW_QUALITY_OPTIONS
|
||||
@@ -18,6 +16,7 @@ from converter.conf.settings import HIGH_QUALITY_OPTIONS
|
||||
#from converter.conf.settings import UNOCONV_PATH
|
||||
|
||||
from converter import TEMPORARY_DIRECTORY
|
||||
from utils import from_descriptor_to_tempfile
|
||||
|
||||
|
||||
QUALITY_DEFAULT = 'quality_default'
|
||||
@@ -73,6 +72,16 @@ def execute_unoconv(input_filepath, output_filepath, arguments=''):
|
||||
return (proc.wait(), proc.stderr.read())
|
||||
|
||||
|
||||
def execute_identify(input_filepath, arguments):
|
||||
command = []
|
||||
command.append(IDENTIFY_PATH)
|
||||
command.extend(shlex.split(str(arguments)))
|
||||
command.append(input_filepath)
|
||||
|
||||
proc = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return (proc.wait(), proc.stderr.read(), proc.stdout.read())
|
||||
|
||||
|
||||
def cache_cleanup(input_filepath, size, page=0, format='jpg'):
|
||||
filepath = create_image_cache_filename(input_filepath, size, page, format)
|
||||
try:
|
||||
@@ -126,7 +135,6 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
|
||||
try:
|
||||
input_arg = '%s[%s]' % (input_filepath, page)
|
||||
extra_options += ' -resize %s' % size
|
||||
print 'extra_options', extra_options
|
||||
status, error_string = execute_convert(input_arg, extra_options, '%s:%s' % (format, output_filepath), quality=quality)
|
||||
if status:
|
||||
errors = get_errors(error_string)
|
||||
@@ -138,6 +146,15 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
|
||||
return output_filepath
|
||||
|
||||
|
||||
def get_page_count(input_filepath):
|
||||
try:
|
||||
status, error_string, output = execute_identify(input_filepath, '-format %n')
|
||||
if status:
|
||||
errors = get_errors(error_string)
|
||||
raise ConvertError(status, errors)
|
||||
finally:
|
||||
return int(output)
|
||||
|
||||
#TODO: slugify OCR_OPTIONS and add to file name to cache
|
||||
def convert_document_for_ocr(document, page=0, format='tif'):
|
||||
#Extract document file
|
||||
|
||||
@@ -5,6 +5,7 @@ ugettext = lambda s: s
|
||||
|
||||
|
||||
CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert')
|
||||
IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify')
|
||||
OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200')
|
||||
DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'')
|
||||
LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'')
|
||||
|
||||
59
apps/converter/utils.py
Normal file
59
apps/converter/utils.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from converter import TEMPORARY_DIRECTORY
|
||||
|
||||
#http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python
|
||||
def copyfile(source, dest, buffer_size=1024*1024):
|
||||
"""
|
||||
Copy a file from source to dest. source and dest
|
||||
can either be strings or any object with a read or
|
||||
write method, like StringIO for example.
|
||||
"""
|
||||
if not hasattr(source, 'read'):
|
||||
source = open(source, 'rb')
|
||||
if not hasattr(dest, 'write'):
|
||||
dest = open(dest, 'wb')
|
||||
|
||||
while 1:
|
||||
copy_buffer = source.read(buffer_size)
|
||||
if copy_buffer:
|
||||
dest.write(copy_buffer)
|
||||
else:
|
||||
break
|
||||
|
||||
source.close()
|
||||
dest.close()
|
||||
|
||||
|
||||
def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*1024):
|
||||
path = os.path.join(TEMPORARY_DIRECTORY, filename)
|
||||
|
||||
output_descriptor = open(path, 'wb')
|
||||
|
||||
while 1:
|
||||
copy_buffer = input_descriptor.read(buffer_size)
|
||||
if copy_buffer:
|
||||
output_descriptor.write(copy_buffer)
|
||||
else:
|
||||
break
|
||||
|
||||
input_descriptor.close()
|
||||
output_descriptor.close()
|
||||
return path
|
||||
|
||||
|
||||
def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
|
||||
output_descriptor, tmp_filename = tempfile.mkstemp()
|
||||
|
||||
while 1:
|
||||
copy_buffer = input_descriptor.read(buffer_size)
|
||||
if copy_buffer:
|
||||
#output_descriptor.write(copy_buffer)
|
||||
os.write(output_descriptor, copy_buffer)
|
||||
else:
|
||||
break
|
||||
|
||||
input_descriptor.close()
|
||||
os.close(output_descriptor)
|
||||
return tmp_filename
|
||||
@@ -9,10 +9,10 @@ from common.utils import pretty_size
|
||||
|
||||
from permissions.api import register_permissions
|
||||
|
||||
from models import Document, DocumentTransformation
|
||||
from models import Document, DocumentPage, DocumentPageTransformation
|
||||
from staging import StagingFile
|
||||
|
||||
from documents.conf import settings as documents_settings
|
||||
from common.conf import settings as common_settings
|
||||
|
||||
PERMISSION_DOCUMENT_CREATE = 'document_create'
|
||||
PERMISSION_DOCUMENT_PROPERTIES_EDIT = 'document_properties_edit'
|
||||
@@ -43,18 +43,18 @@ document_edit_metadata = {'text':_('edit metadata'), 'view':'document_edit_metad
|
||||
document_preview = {'text':_('preview'), 'class':'fancybox', 'view':'document_preview', 'args':'object.id', 'famfam':'magnifier', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_VIEW]}}
|
||||
document_download = {'text':_('download'), 'view':'document_download', 'args':'object.id', 'famfam':'page_save', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_DOWNLOAD]}}
|
||||
|
||||
document_transformation_list = {'text':_(u'transformations'), 'view':'document_transformation_list', 'args':'object.id', 'famfam':'page_paintbrush', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
|
||||
document_transformation_delete = {'text':_('delete'), 'view':'document_transformation_delete', 'args':'object.id', 'famfam':'delete'}#, 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
|
||||
#document_transformation_list = {'text':_(u'transformations'), 'view':'document_transformation_list', 'args':'object.id', 'famfam':'page_paintbrush', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
|
||||
#document_transformation_delete = {'text':_('delete'), 'view':'document_transformation_delete', 'args':'object.id', 'famfam':'delete'}#, 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
|
||||
|
||||
|
||||
staging_file_preview = {'text':_('preview'), 'class':'fancybox', 'view':'staging_file_preview', 'args':'object.id', 'famfam':'drive_magnify'}
|
||||
staging_file_delete = {'text':_('delete'), 'view':'staging_file_delete', 'args':'object.id', 'famfam':'drive_delete'}
|
||||
|
||||
register_links(Document, [document_view, document_edit, document_edit_metadata, document_delete, document_download, document_transformation_list], menu_name='sidebar')
|
||||
register_links(Document, [document_view, document_edit, document_edit_metadata, document_delete, document_download], menu_name='sidebar')
|
||||
register_links(Document, [document_list, document_create, document_create_multiple, document_create_sibling], menu_name='sidebar')
|
||||
register_links(['document_list', 'document_create', 'document_create_multiple', 'upload_document_with_type', 'upload_multiple_documents_with_type'], [document_list, document_create, document_create_multiple], menu_name='sidebar')
|
||||
|
||||
register_links(DocumentTransformation, [document_transformation_delete])
|
||||
#register_links(DocumentTransformation, [document_transformation_delete])
|
||||
|
||||
|
||||
|
||||
@@ -76,4 +76,4 @@ register_menu([
|
||||
document_list
|
||||
],'famfam':'page','position':4}])
|
||||
|
||||
TEMPORARY_DIRECTORY = documents_settings.TEMPORARY_DIRECTORY if documents_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
|
||||
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
|
||||
|
||||
@@ -3,7 +3,7 @@ from django.contrib import admin
|
||||
from models import MetadataType, DocumentType, Document, \
|
||||
DocumentTypeMetadataType, DocumentMetadata, DocumentTypeFilename, \
|
||||
MetadataIndex, DocumentMetadataIndex, DocumentPage, MetadataGroup, \
|
||||
MetadataGroupItem, DocumentTransformation
|
||||
MetadataGroupItem, DocumentPageTransformation
|
||||
|
||||
|
||||
class MetadataTypeAdmin(admin.ModelAdmin):
|
||||
@@ -48,7 +48,11 @@ class DocumentMetadataIndexInline(admin.StackedInline):
|
||||
extra = 1
|
||||
classes = ('collapse-open',)
|
||||
allow_add = True
|
||||
readonly_fields = ('metadata_index', 'filename')
|
||||
readonly_fields = ('suffix', 'metadata_index', 'filename')
|
||||
|
||||
|
||||
class DocumentPageTransformationAdmin(admin.ModelAdmin):
|
||||
model = DocumentPageTransformation
|
||||
|
||||
|
||||
class DocumentPageInline(admin.StackedInline):
|
||||
@@ -58,16 +62,9 @@ class DocumentPageInline(admin.StackedInline):
|
||||
allow_add = True
|
||||
|
||||
|
||||
class DocumentTransformationline(admin.StackedInline):
|
||||
model = DocumentTransformation
|
||||
extra = 1
|
||||
classes = ('collapse-open',)
|
||||
allow_add = True
|
||||
|
||||
|
||||
class DocumentAdmin(admin.ModelAdmin):
|
||||
inlines = [DocumentMetadataInline, DocumentMetadataIndexInline,
|
||||
DocumentTransformationline, DocumentPageInline]
|
||||
DocumentPageInline]
|
||||
list_display = ('uuid', 'file_filename', 'file_extension')
|
||||
|
||||
|
||||
@@ -87,4 +84,5 @@ admin.site.register(MetadataType, MetadataTypeAdmin)
|
||||
admin.site.register(DocumentType, DocumentTypeAdmin)
|
||||
admin.site.register(Document, DocumentAdmin)
|
||||
admin.site.register(MetadataGroup, MetadataGroupAdmin)
|
||||
admin.site.register(DocumentPageTransformation, DocumentPageTransformationAdmin)
|
||||
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import uuid
|
||||
import tempfile
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
from converter.api import get_page_count
|
||||
|
||||
from documents.storage import DocumentStorage
|
||||
|
||||
default_available_functions = {
|
||||
@@ -29,6 +32,7 @@ DELETE_LOCAL_ORIGINAL = getattr(settings, 'DOCUMENTS_DELETE_LOCAL_ORIGINAL', Fal
|
||||
# Saving
|
||||
CHECKSUM_FUNCTION = getattr(settings, 'DOCUMENTS_CHECKSUM_FUNCTION', lambda x: hashlib.sha256(x).hexdigest())
|
||||
UUID_FUNCTION = getattr(settings, 'DOCUMENTS_UUID_FUNCTION', lambda:unicode(uuid.uuid4()))
|
||||
PAGE_COUNT_FUNCTION = getattr(settings, 'DOCUMENTS_PAGE_COUNT_FUNCTION', lambda x: get_page_count(x.save_to_file(tempfile.mkstemp()[1])))
|
||||
|
||||
# Storage
|
||||
STORAGE_BACKEND = getattr(settings, 'DOCUMENTS_STORAGE_BACKEND', DocumentStorage)
|
||||
@@ -36,6 +40,7 @@ STORAGE_DIRECTORY_NAME = getattr(settings, 'DOCUMENTS_STORAGE_DIRECTORY_NAME', '
|
||||
|
||||
# Usage
|
||||
PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_PREVIEW_SIZE', '640x480')
|
||||
MULTIPAGE_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_MULTIPAGE_PREVIEW_SIZE', '160x120')
|
||||
THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50')
|
||||
DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200')
|
||||
|
||||
@@ -48,8 +53,3 @@ FILESYSTEM_FILESERVING_ENABLE = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERV
|
||||
FILESYSTEM_FILESERVING_PATH = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', u'/tmp/mayan/documents')
|
||||
FILESYSTEM_SLUGIFY_PATHS = getattr(settings, 'DOCUMENTS_SLUGIFY_PATHS', False)
|
||||
FILESYSTEM_MAX_RENAME_COUNT = getattr(settings, 'DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT', 200)
|
||||
|
||||
#misc
|
||||
TEMPORARY_DIRECTORY = getattr(settings, 'DOCUMENTS_TEMPORARY_DIRECTORY', u'/tmp')
|
||||
|
||||
|
||||
|
||||
@@ -24,8 +24,22 @@ from documents.conf.settings import AVAILABLE_MODELS
|
||||
class ImageWidget(forms.widgets.Widget):
|
||||
def render(self, name, value, attrs=None):
|
||||
output = []
|
||||
output.append('<a class="fancybox-noscaling" href="%s"><img width="300" src="%s" /></a>' % (reverse('document_display', args=[value.id]),
|
||||
reverse('document_preview', args=[value.id])))
|
||||
|
||||
page_count = value.documentpage_set.count()
|
||||
if page_count > 1:
|
||||
output.append('<br /><span class="famfam active famfam-page_white_copy"></span>%s<br />' % ugettext(u'Pages'))
|
||||
for page_index in range(value.documentpage_set.count()):
|
||||
output.append('<span>%(page)s)<a rel="gallery_1" class="fancybox-noscaling" href="%(url)s?page=%(page)s"><img src="%(img)s?page=%(page)s" /></a></span>' % {
|
||||
'url':reverse('document_display', args=[value.id]),
|
||||
'img':reverse('document_preview_multipage', args=[value.id]),
|
||||
'page':page_index+1,
|
||||
})
|
||||
else:
|
||||
output.append('<a class="fancybox-noscaling" href="%(url)s"><img width="300" src="%(img)s" /></a>' % {
|
||||
'url':reverse('document_display', args=[value.id]),
|
||||
'img':reverse('document_preview', args=[value.id]),
|
||||
})
|
||||
|
||||
output.append('<br /><span class="famfam active famfam-magnifier"></span>%s' % ugettext(u'Click on the image for full size view'))
|
||||
#output.append(super(ImageWidget, self).render(name, value, attrs))
|
||||
return mark_safe(u''.join(output))
|
||||
@@ -58,7 +72,7 @@ class DocumentPreviewForm(forms.Form):
|
||||
super(DocumentPreviewForm, self).__init__(*args, **kwargs)
|
||||
self.fields['preview'].initial = self.document
|
||||
|
||||
preview = forms.CharField(widget=ImageWidget)
|
||||
preview = forms.CharField(widget=ImageWidget())
|
||||
|
||||
|
||||
class DocumentForm_view(DetailForm):
|
||||
|
||||
@@ -20,6 +20,7 @@ from documents.conf.settings import AVAILABLE_FUNCTIONS
|
||||
from documents.conf.settings import AVAILABLE_MODELS
|
||||
from documents.conf.settings import CHECKSUM_FUNCTION
|
||||
from documents.conf.settings import UUID_FUNCTION
|
||||
from documents.conf.settings import PAGE_COUNT_FUNCTION
|
||||
from documents.conf.settings import STORAGE_BACKEND
|
||||
from documents.conf.settings import STORAGE_DIRECTORY_NAME
|
||||
from documents.conf.settings import FILESYSTEM_FILESERVING_ENABLE
|
||||
@@ -74,12 +75,15 @@ class Document(models.Model):
|
||||
verbose_name_plural = _(u'documents')
|
||||
ordering = ['-date_added']
|
||||
|
||||
|
||||
def __unicode__(self):
|
||||
return '%s.%s' % (self.file_filename, self.file_extension)
|
||||
|
||||
|
||||
def get_fullname(self):
|
||||
return os.extsep.join([self.file_filename, self.file_extension])
|
||||
|
||||
|
||||
def update_mimetype(self):
|
||||
try:
|
||||
mime = magic.Magic(mime=True)
|
||||
@@ -95,25 +99,52 @@ class Document(models.Model):
|
||||
def read(self, count=1024):
|
||||
return self.file.storage.open(self.file.url).read(count)
|
||||
|
||||
|
||||
@models.permalink
|
||||
def get_absolute_url(self):
|
||||
return ('document_view', [self.id])
|
||||
|
||||
|
||||
def update_checksum(self, save=True):
|
||||
if self.exists():
|
||||
self.checksum = unicode(CHECKSUM_FUNCTION(self.file.read()))
|
||||
if save:
|
||||
self.save()
|
||||
|
||||
|
||||
def update_page_count(self):
|
||||
total_pages = PAGE_COUNT_FUNCTION(self)
|
||||
for page_number in range(total_pages):
|
||||
document_page, created = DocumentPage.objects.get_or_create(
|
||||
document=self, page_number=page_number+1)
|
||||
|
||||
|
||||
def save_to_file(self, filepath, buffer_size=1024*1024):
|
||||
storage = self.file.storage.open(self.file.url)
|
||||
output_descriptor = open(filepath, 'wb')
|
||||
while 1:
|
||||
copy_buffer = storage.read()
|
||||
if copy_buffer:
|
||||
output_descriptor.write(copy_buffer)
|
||||
else:
|
||||
break
|
||||
|
||||
#input_descriptor.close()
|
||||
output_descriptor.close()
|
||||
return filepath
|
||||
|
||||
|
||||
def exists(self):
|
||||
return self.file.storage.exists(self.file.url)
|
||||
|
||||
|
||||
def delete(self, *args, **kwargs):
|
||||
#TODO: Might not execute when done in bulk from a queryset
|
||||
#topics/db/queries.html#topics-db-queries-delete
|
||||
self.delete_fs_links()
|
||||
super(Document, self).delete(*args, **kwargs)
|
||||
|
||||
|
||||
def get_metadata_groups(self):
|
||||
errors = []
|
||||
metadata_groups = {}
|
||||
@@ -144,6 +175,7 @@ class Document(models.Model):
|
||||
metadata_groups[group] = Document.objects.filter(Q(id__in=document_id_list) & ~Q(id=self.id)) or []
|
||||
return metadata_groups, errors
|
||||
|
||||
|
||||
def create_fs_links(self):
|
||||
if FILESYSTEM_FILESERVING_ENABLE:
|
||||
if not self.exists():
|
||||
@@ -171,6 +203,7 @@ class Document(models.Model):
|
||||
#This should be a warning not an error
|
||||
pass
|
||||
|
||||
|
||||
def delete_fs_links(self):
|
||||
if FILESYSTEM_FILESERVING_ENABLE:
|
||||
for document_metadata_index in self.documentmetadataindex_set.all():
|
||||
@@ -209,12 +242,14 @@ class Document(models.Model):
|
||||
except OSError, exc:
|
||||
pass
|
||||
|
||||
|
||||
#Remove the directory if it is empty
|
||||
try:
|
||||
os.removedirs(path)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def next_available_filename(document, metadata_index, path, filename, extension, suffix=0):
|
||||
target = filename
|
||||
if suffix:
|
||||
@@ -344,10 +379,10 @@ class DocumentPage(models.Model):
|
||||
document = models.ForeignKey(Document, verbose_name=_(u'document'))
|
||||
content = models.TextField(blank=True, null=True, verbose_name=_(u'content'))
|
||||
page_label = models.CharField(max_length=32, blank=True, null=True, verbose_name=_(u'page label'))
|
||||
page_number = models.PositiveIntegerField(default=0, verbose_name=_(u'page number'))
|
||||
page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_(u'page number'))
|
||||
|
||||
def __unicode__(self):
|
||||
return '%s - %s' % (self.page_number, self.page_label)
|
||||
return '%s - %s - %s' % (self.document, self.page_number, self.page_label)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _(u'document page')
|
||||
@@ -377,7 +412,7 @@ INCLUSION_CHOICES = (
|
||||
(INCLUSION_OR, _(u'or')),
|
||||
)
|
||||
|
||||
OPERATOR_CHOCIES = (
|
||||
OPERATOR_CHOICES = (
|
||||
('exact', _(u'is equal')),
|
||||
('iexact', _(u'is equal (case insensitive)')),
|
||||
('contains', _(u'contains')),
|
||||
@@ -399,7 +434,7 @@ class MetadataGroupItem(models.Model):
|
||||
metadata_group = models.ForeignKey(MetadataGroup, verbose_name=_(u'metadata group'))
|
||||
inclusion = models.CharField(default=INCLUSION_AND, max_length=16, choices=INCLUSION_CHOICES, help_text=_(u'The inclusion is ignored for the first item.'))
|
||||
metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type'), help_text=_(u'This represents the metadata of all other documents.'))
|
||||
operator = models.CharField(max_length=16, choices=OPERATOR_CHOCIES)
|
||||
operator = models.CharField(max_length=16, choices=OPERATOR_CHOICES)
|
||||
expression = models.CharField(max_length=128,
|
||||
verbose_name=_(u'expression'), help_text=_(u'This expression will be evaluated against the current seleted document. The document metadata is available as variables of the same name but with the "metadata_" prefix added their name.'))
|
||||
negated = models.BooleanField(default=False, verbose_name=_(u'negated'), help_text=_(u'Inverts the logic of the operator.'))
|
||||
@@ -413,8 +448,8 @@ class MetadataGroupItem(models.Model):
|
||||
verbose_name_plural = _(u'metadata group items')
|
||||
|
||||
|
||||
class DocumentTransformation(models.Model):
|
||||
document = models.ForeignKey(Document, verbose_name=_(u'document'))
|
||||
class DocumentPageTransformation(models.Model):
|
||||
document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
|
||||
order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order'))
|
||||
transformation = models.CharField(choices=TRANFORMATION_CHOICES, max_length=128, verbose_name=_(u'transformation'))
|
||||
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}'))
|
||||
@@ -430,10 +465,8 @@ class DocumentTransformation(models.Model):
|
||||
|
||||
class Meta:
|
||||
ordering = ('order',)
|
||||
verbose_name = _(u'document transformation')
|
||||
verbose_name_plural = _(u'document transformations')
|
||||
|
||||
|
||||
verbose_name = _(u'document page transformation')
|
||||
verbose_name_plural = _(u'document page transformations')
|
||||
|
||||
|
||||
register(Document, _(u'document'), ['document_type__name', 'file_mimetype', 'file_filename', 'file_extension', 'documentmetadata__value', 'documentpage__content'])
|
||||
|
||||
@@ -5,6 +5,7 @@ from django.views.generic.create_update import create_object, update_object
|
||||
from documents.conf.settings import PREVIEW_SIZE
|
||||
from documents.conf.settings import THUMBNAIL_SIZE
|
||||
from documents.conf.settings import DISPLAY_SIZE
|
||||
from documents.conf.settings import MULTIPAGE_PREVIEW_SIZE
|
||||
|
||||
from converter.api import QUALITY_HIGH
|
||||
|
||||
@@ -19,8 +20,9 @@ urlpatterns = patterns('documents.views',
|
||||
url(r'^document/(?P<document_id>\d+)/delete/$', 'document_delete', (), 'document_delete'),
|
||||
url(r'^document/(?P<document_id>\d+)/edit/$', 'document_edit', (), 'document_edit'),
|
||||
url(r'^document/(?P<document_id>\d+)/edit/metadata/$', 'document_edit_metadata', (), 'document_edit_metadata'),
|
||||
url(r'^document/(?P<document_id>\d+)/preview/$', 'get_document_image', {'size':PREVIEW_SIZE}, 'document_preview'),
|
||||
url(r'^document/(?P<document_id>\d+)/thumbnail/$', 'get_document_image', {'size':THUMBNAIL_SIZE}, 'document_thumbnail'),
|
||||
url(r'^document/(?P<document_id>\d+)/display/preview/$', 'get_document_image', {'size':PREVIEW_SIZE}, 'document_preview'),
|
||||
url(r'^document/(?P<document_id>\d+)/display/preview/multipage/$', 'get_document_image', {'size':MULTIPAGE_PREVIEW_SIZE}, 'document_preview_multipage'),
|
||||
url(r'^document/(?P<document_id>\d+)/display/thumbnail/$', 'get_document_image', {'size':THUMBNAIL_SIZE}, 'document_thumbnail'),
|
||||
url(r'^document/(?P<document_id>\d+)/display/$', 'get_document_image', {'size':DISPLAY_SIZE,'quality':QUALITY_HIGH}, 'document_display'),
|
||||
url(r'^document/(?P<document_id>\d+)/download/$', 'document_download', (), 'document_download'),
|
||||
url(r'^document/(?P<document_id>\d+)/create/siblings/$', 'document_create_sibling', {'multiple':False}, 'document_create_sibling'),
|
||||
|
||||
@@ -49,7 +49,6 @@ def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*102
|
||||
return path
|
||||
|
||||
|
||||
|
||||
def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
|
||||
output_descriptor, tmp_filename = tempfile.mkstemp()
|
||||
|
||||
|
||||
@@ -18,7 +18,8 @@ from common.utils import pretty_size
|
||||
|
||||
from utils import from_descriptor_to_tempfile
|
||||
|
||||
from models import Document, DocumentMetadata, DocumentType, MetadataType
|
||||
from models import Document, DocumentMetadata, DocumentType, MetadataType, \
|
||||
DocumentPage
|
||||
from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
|
||||
MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
|
||||
StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
|
||||
@@ -122,6 +123,8 @@ def upload_document_with_type(request, document_type_id, multiple=True):
|
||||
instance = local_form.save()
|
||||
instance.update_checksum()
|
||||
instance.update_mimetype()
|
||||
instance.update_page_count()
|
||||
|
||||
if 'document_type_available_filenames' in local_form.cleaned_data:
|
||||
if local_form.cleaned_data['document_type_available_filenames']:
|
||||
instance.file_filename = local_form.cleaned_data['document_type_available_filenames'].filename
|
||||
@@ -154,6 +157,7 @@ def upload_document_with_type(request, document_type_id, multiple=True):
|
||||
document.save()
|
||||
document.update_checksum()
|
||||
document.update_mimetype()
|
||||
document.update_page_count()
|
||||
except Exception, e:
|
||||
messages.error(request, e)
|
||||
else:
|
||||
@@ -243,6 +247,7 @@ def document_view(request, document_id):
|
||||
{'label':_(u'Time added'), 'field':lambda x: unicode(x.date_added.time()).split('.')[0]},
|
||||
{'label':_(u'Checksum'), 'field':'checksum'},
|
||||
{'label':_(u'UUID'), 'field':'uuid'},
|
||||
{'label':_(u'Pages'), 'field':lambda x: x.documentpage_set.count()},
|
||||
])
|
||||
|
||||
|
||||
@@ -436,8 +441,13 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
|
||||
raise Http404(e)
|
||||
|
||||
document = get_object_or_404(Document, pk=document_id)
|
||||
|
||||
page = int(request.GET.get('page', 1))
|
||||
transformation_list = []
|
||||
for tranformation in document.documenttransformation_set.all():
|
||||
try:
|
||||
document_page = DocumentPage.objects.get(document=document, page_number=page)
|
||||
|
||||
for tranformation in document_page.documentpagetransformation_set.all():
|
||||
try:
|
||||
transformation_list.append(tranformation.get_transformation())
|
||||
except Exception, e:
|
||||
@@ -445,18 +455,19 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
|
||||
messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e)))
|
||||
else:
|
||||
pass
|
||||
except:
|
||||
pass
|
||||
|
||||
tranformation_string = ' '.join(transformation_list)
|
||||
try:
|
||||
filepath = in_image_cache(document.checksum, size=size, quality=quality, extra_options=tranformation_string)
|
||||
|
||||
filepath = in_image_cache(document.checksum, size=size, quality=quality, extra_options=tranformation_string, page=page-1)
|
||||
if filepath:
|
||||
return serve_file(request, File(file=open(filepath, 'r')))
|
||||
#Save to a temporary location
|
||||
document.file.open()
|
||||
desc = document.file.storage.open(document.file.path)
|
||||
filepath = from_descriptor_to_tempfile(desc, document.checksum)
|
||||
output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string)
|
||||
output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string, page=page-1)
|
||||
return serve_file(request, File(file=open(output_file, 'r')), content_type='image/jpeg')
|
||||
except Exception, e:
|
||||
if size == THUMBNAIL_SIZE:
|
||||
@@ -523,6 +534,7 @@ def document_transformation_list(request, document_id):
|
||||
|
||||
document = get_object_or_404(Document, pk=document_id)
|
||||
|
||||
|
||||
return object_list(
|
||||
request,
|
||||
queryset=document.documenttransformation_set.all(),
|
||||
@@ -539,9 +551,9 @@ def document_transformation_delete(request, document_transformation_id):
|
||||
except Unauthorized, e:
|
||||
raise Http404(e)
|
||||
|
||||
document_transformation = get_object_or_404(DocumentTransformation, pk=document_transformation_id)
|
||||
document_transformation = get_object_or_404(DocumentPageTransformation, pk=document_transformation_id)
|
||||
|
||||
return delete_object(request, model=DocumentTransformation, object_id=document_transformation_id,
|
||||
return delete_object(request, model=DocumentPageTransformation, object_id=document_transformation_id,
|
||||
template_name='generic_confirm.html',
|
||||
post_delete_redirect=reverse('document_transformation_list'),
|
||||
extra_context={
|
||||
|
||||
@@ -5,6 +5,7 @@ from django.shortcuts import render_to_response
|
||||
from django.template import RequestContext
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from common.conf import settings as common_settings
|
||||
from documents.conf import settings as documents_settings
|
||||
from converter.conf import settings as converter_settings
|
||||
from ocr.conf import settings as ocr_settings
|
||||
@@ -34,7 +35,9 @@ def check_settings(request):
|
||||
{'name':'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', 'value':documents_settings.FILESYSTEM_FILESERVING_PATH, 'exists':True},
|
||||
{'name':'DOCUMENTS_SLUGIFY_PATHS', 'value':documents_settings.FILESYSTEM_SLUGIFY_PATHS},
|
||||
{'name':'DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT', 'value':documents_settings.FILESYSTEM_MAX_RENAME_COUNT},
|
||||
{'name':'DOCUMENTS_TEMPORARY_DIRECTORY', 'value':documents_settings.TEMPORARY_DIRECTORY, 'exists':True},
|
||||
|
||||
#Common
|
||||
{'name':'COMMON_TEMPORARY_DIRECTORY', 'value':common_settings.TEMPORARY_DIRECTORY, 'exists':True},
|
||||
|
||||
#Converter
|
||||
{'name':'CONVERTER_CONVERT_PATH', 'value':converter_settings.CONVERT_PATH, 'exists':True},
|
||||
|
||||
@@ -5,13 +5,13 @@ from permissions.api import register_permissions
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
OCR_DOCUMENT_OCR = 'document_ocr'
|
||||
PERMISSION_OCR_DOCUMENT = 'ocr_document'
|
||||
|
||||
register_permissions('ocr', [
|
||||
{'name':OCR_DOCUMENT_OCR, 'label':_(u'Submit document for OCR')},
|
||||
{'name':PERMISSION_OCR_DOCUMENT, 'label':_(u'Submit document for OCR')},
|
||||
])
|
||||
|
||||
submit_document = {'text':_('submit to OCR queue'), 'view':'submit_document', 'args':'object.id', 'famfam':'page_lightning', 'permissions':{'namespace':'ocr', 'permissions':[OCR_DOCUMENT_OCR]}}
|
||||
submit_document = {'text':_('submit to OCR queue'), 'view':'submit_document', 'args':'object.id', 'famfam':'page_lightning', 'permissions':{'namespace':'ocr', 'permissions':[PERMISSION_OCR_DOCUMENT]}}
|
||||
|
||||
register_links(Document, [submit_document], menu_name='sidebar')
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import tempfile
|
||||
from django.utils.translation import ugettext as _
|
||||
|
||||
from documents.models import DocumentPage
|
||||
from documents.conf.settings import TEMPORARY_DIRECTORY
|
||||
from common.conf.settings import TEMPORARY_DIRECTORY
|
||||
from converter.api import convert_document_for_ocr
|
||||
|
||||
from ocr.conf.settings import TESSERACT_PATH
|
||||
|
||||
@@ -11,11 +11,11 @@ from django.utils.translation import ugettext as _
|
||||
from permissions.api import check_permissions, Unauthorized
|
||||
from documents.models import Document
|
||||
|
||||
from ocr import OCR_DOCUMENT_OCR
|
||||
from ocr import PERMISSION_OCR_DOCUMENT
|
||||
from api import ocr_document
|
||||
|
||||
def submit_document(request, document_id):
|
||||
permissions = [OCR_DOCUMENT_OCR]
|
||||
permissions = [PERMISSION_OCR_DOCUMENT]
|
||||
try:
|
||||
check_permissions(request.user, 'ocr', permissions)
|
||||
except Unauthorized, e:
|
||||
|
||||
@@ -6,3 +6,6 @@
|
||||
* Added the ability to group documents by their metadata
|
||||
* New abstracted options to adjust document conversion quality (default, low, high)
|
||||
* Added permissions and roles support
|
||||
* Added multipage documents support (only tested on pdfs)
|
||||
To update a previous database do: [d.update_page_count() for d in Document.objects.all()]
|
||||
* Added support for document page transformation (no GUI yet)
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
* Permissions - DONE
|
||||
* Roles - DONE
|
||||
* Assign default role to new users - DONE
|
||||
* DB stored transformations - DONE
|
||||
* Recognize multi-page documents - DONE
|
||||
* Document list filtering by metadata
|
||||
* Filterform date filtering widget
|
||||
* Validate GET data before saving file
|
||||
@@ -49,7 +51,6 @@
|
||||
* Scheduled maintenance (cleanup, deferred OCR's)
|
||||
* Add tags to documents
|
||||
* Field for document language or autodetect
|
||||
* Recognize multi-page documents
|
||||
* Count pages in a PDF file http://pybrary.net/pyPdf/
|
||||
* Download a document in diffent formats: (jpg, png, pdf)
|
||||
* Cache.cleanup function to delete cached images when document hash changes
|
||||
@@ -67,6 +68,5 @@
|
||||
* Download metadata group documents as a single zip file
|
||||
* Download original document or transformed document
|
||||
* Include annotations in transformed documents downloads
|
||||
* DB stored transformations
|
||||
* Document view temp transformations
|
||||
* Implement permissions decorators
|
||||
|
||||
@@ -191,6 +191,7 @@ LOGIN_EXEMPT_URLS = (
|
||||
#DOCUMENTS_PREVIEW_SIZE = '640x480'
|
||||
#DOCUMENTS_THUMBNAIL_SIZE = '50x50'
|
||||
#DOCUMENTS_DISPLAY_SIZE = '1200'
|
||||
#DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120'
|
||||
|
||||
# Groups
|
||||
#DOCUMENTS_GROUP_MAX_RESULTS = 20
|
||||
@@ -203,7 +204,7 @@ LOGIN_EXEMPT_URLS = (
|
||||
#DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT = 200
|
||||
|
||||
# Misc
|
||||
#DOCUMENTS_TEMPORARY_DIRECTORY = u'/tmp'
|
||||
#COMMON_TEMPORARY_DIRECTORY = u'/tmp'
|
||||
|
||||
# Converter
|
||||
#CONVERTER_DEFAULT_OPTIONS = u''
|
||||
@@ -211,6 +212,7 @@ LOGIN_EXEMPT_URLS = (
|
||||
#CONVERTER_HIGH_QUALITY_OPTIONS = u'-density 400'
|
||||
#CONVERTER_CONVERT_PATH = u'/usr/bin/convert'
|
||||
#CONVERTER_OCR_OPTIONS = u'-colorspace Gray -depth 8 -resample 200x200'
|
||||
#CONVERTER_IDENTIFY_PATH = u'/usr/bin/identify'
|
||||
|
||||
# OCR
|
||||
#OCR_TESSERACT_PATH = u'/usr/bin/tesseract'
|
||||
|
||||
Reference in New Issue
Block a user