diff --git a/apps/common/conf/__init__.py b/apps/common/conf/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/apps/common/conf/settings.py b/apps/common/conf/settings.py
new file mode 100644
index 0000000000..afbba62697
--- /dev/null
+++ b/apps/common/conf/settings.py
@@ -0,0 +1,3 @@
+from django.conf import settings
+
+TEMPORARY_DIRECTORY = getattr(settings, 'COMMON_TEMPORARY_DIRECTORY', u'/tmp')
diff --git a/apps/converter/__init__.py b/apps/converter/__init__.py
index e5c4e1ea01..9562a07710 100755
--- a/apps/converter/__init__.py
+++ b/apps/converter/__init__.py
@@ -1,5 +1,5 @@
import tempfile
-from documents.conf import settings as documents_settings
+from common.conf import settings as common_settings
-TEMPORARY_DIRECTORY = documents_settings.TEMPORARY_DIRECTORY if documents_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
+TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
diff --git a/apps/converter/api.py b/apps/converter/api.py
index 856c0c4c14..c5dc63bd17 100755
--- a/apps/converter/api.py
+++ b/apps/converter/api.py
@@ -6,10 +6,8 @@ import shutil
from django.template.defaultfilters import slugify
-
-from documents.utils import from_descriptor_to_tempfile
-
from converter.conf.settings import CONVERT_PATH
+from converter.conf.settings import IDENTIFY_PATH
from converter.conf.settings import OCR_OPTIONS
from converter.conf.settings import DEFAULT_OPTIONS
from converter.conf.settings import LOW_QUALITY_OPTIONS
@@ -18,6 +16,7 @@ from converter.conf.settings import HIGH_QUALITY_OPTIONS
#from converter.conf.settings import UNOCONV_PATH
from converter import TEMPORARY_DIRECTORY
+from utils import from_descriptor_to_tempfile
QUALITY_DEFAULT = 'quality_default'
@@ -73,6 +72,16 @@ def execute_unoconv(input_filepath, output_filepath, arguments=''):
return (proc.wait(), proc.stderr.read())
+def execute_identify(input_filepath, arguments):
+ command = []
+ command.append(IDENTIFY_PATH)
+ command.extend(shlex.split(str(arguments)))
+ command.append(input_filepath)
+
+ proc = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+ return (proc.wait(), proc.stderr.read(), proc.stdout.read())
+
+
def cache_cleanup(input_filepath, size, page=0, format='jpg'):
filepath = create_image_cache_filename(input_filepath, size, page, format)
try:
@@ -126,7 +135,6 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
try:
input_arg = '%s[%s]' % (input_filepath, page)
extra_options += ' -resize %s' % size
- print 'extra_options', extra_options
status, error_string = execute_convert(input_arg, extra_options, '%s:%s' % (format, output_filepath), quality=quality)
if status:
errors = get_errors(error_string)
@@ -136,7 +144,16 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
if unoconv_output:
cleanup(unoconv_output)
return output_filepath
-
+
+
+def get_page_count(input_filepath):
+ try:
+ status, error_string, output = execute_identify(input_filepath, '-format %n')
+ if status:
+ errors = get_errors(error_string)
+ raise ConvertError(status, errors)
+ finally:
+ return int(output)
#TODO: slugify OCR_OPTIONS and add to file name to cache
def convert_document_for_ocr(document, page=0, format='tif'):
diff --git a/apps/converter/conf/settings.py b/apps/converter/conf/settings.py
index d843670ba3..07a771e822 100755
--- a/apps/converter/conf/settings.py
+++ b/apps/converter/conf/settings.py
@@ -5,6 +5,7 @@ ugettext = lambda s: s
CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert')
+IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify')
OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200')
DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'')
LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'')
diff --git a/apps/converter/utils.py b/apps/converter/utils.py
new file mode 100644
index 0000000000..ee50a701d2
--- /dev/null
+++ b/apps/converter/utils.py
@@ -0,0 +1,59 @@
+import os
+import tempfile
+
+from converter import TEMPORARY_DIRECTORY
+
+#http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python
+def copyfile(source, dest, buffer_size=1024*1024):
+ """
+ Copy a file from source to dest. source and dest
+ can either be strings or any object with a read or
+ write method, like StringIO for example.
+ """
+ if not hasattr(source, 'read'):
+ source = open(source, 'rb')
+ if not hasattr(dest, 'write'):
+ dest = open(dest, 'wb')
+
+ while 1:
+ copy_buffer = source.read(buffer_size)
+ if copy_buffer:
+ dest.write(copy_buffer)
+ else:
+ break
+
+ source.close()
+ dest.close()
+
+
+def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*1024):
+ path = os.path.join(TEMPORARY_DIRECTORY, filename)
+
+ output_descriptor = open(path, 'wb')
+
+ while 1:
+ copy_buffer = input_descriptor.read(buffer_size)
+ if copy_buffer:
+ output_descriptor.write(copy_buffer)
+ else:
+ break
+
+ input_descriptor.close()
+ output_descriptor.close()
+ return path
+
+
+def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
+ output_descriptor, tmp_filename = tempfile.mkstemp()
+
+ while 1:
+ copy_buffer = input_descriptor.read(buffer_size)
+ if copy_buffer:
+ #output_descriptor.write(copy_buffer)
+ os.write(output_descriptor, copy_buffer)
+ else:
+ break
+
+ input_descriptor.close()
+ os.close(output_descriptor)
+ return tmp_filename
diff --git a/apps/documents/__init__.py b/apps/documents/__init__.py
index c59d518355..f4a14bf572 100755
--- a/apps/documents/__init__.py
+++ b/apps/documents/__init__.py
@@ -9,10 +9,10 @@ from common.utils import pretty_size
from permissions.api import register_permissions
-from models import Document, DocumentTransformation
+from models import Document, DocumentPage, DocumentPageTransformation
from staging import StagingFile
-from documents.conf import settings as documents_settings
+from common.conf import settings as common_settings
PERMISSION_DOCUMENT_CREATE = 'document_create'
PERMISSION_DOCUMENT_PROPERTIES_EDIT = 'document_properties_edit'
@@ -43,18 +43,18 @@ document_edit_metadata = {'text':_('edit metadata'), 'view':'document_edit_metad
document_preview = {'text':_('preview'), 'class':'fancybox', 'view':'document_preview', 'args':'object.id', 'famfam':'magnifier', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_VIEW]}}
document_download = {'text':_('download'), 'view':'document_download', 'args':'object.id', 'famfam':'page_save', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_DOWNLOAD]}}
-document_transformation_list = {'text':_(u'transformations'), 'view':'document_transformation_list', 'args':'object.id', 'famfam':'page_paintbrush', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
-document_transformation_delete = {'text':_('delete'), 'view':'document_transformation_delete', 'args':'object.id', 'famfam':'delete'}#, 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
+#document_transformation_list = {'text':_(u'transformations'), 'view':'document_transformation_list', 'args':'object.id', 'famfam':'page_paintbrush', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
+#document_transformation_delete = {'text':_('delete'), 'view':'document_transformation_delete', 'args':'object.id', 'famfam':'delete'}#, 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
staging_file_preview = {'text':_('preview'), 'class':'fancybox', 'view':'staging_file_preview', 'args':'object.id', 'famfam':'drive_magnify'}
staging_file_delete = {'text':_('delete'), 'view':'staging_file_delete', 'args':'object.id', 'famfam':'drive_delete'}
-register_links(Document, [document_view, document_edit, document_edit_metadata, document_delete, document_download, document_transformation_list], menu_name='sidebar')
+register_links(Document, [document_view, document_edit, document_edit_metadata, document_delete, document_download], menu_name='sidebar')
register_links(Document, [document_list, document_create, document_create_multiple, document_create_sibling], menu_name='sidebar')
register_links(['document_list', 'document_create', 'document_create_multiple', 'upload_document_with_type', 'upload_multiple_documents_with_type'], [document_list, document_create, document_create_multiple], menu_name='sidebar')
-register_links(DocumentTransformation, [document_transformation_delete])
+#register_links(DocumentTransformation, [document_transformation_delete])
@@ -76,4 +76,4 @@ register_menu([
document_list
],'famfam':'page','position':4}])
-TEMPORARY_DIRECTORY = documents_settings.TEMPORARY_DIRECTORY if documents_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
+TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
diff --git a/apps/documents/admin.py b/apps/documents/admin.py
index 863d77ac4d..d384c6e3b2 100755
--- a/apps/documents/admin.py
+++ b/apps/documents/admin.py
@@ -3,7 +3,7 @@ from django.contrib import admin
from models import MetadataType, DocumentType, Document, \
DocumentTypeMetadataType, DocumentMetadata, DocumentTypeFilename, \
MetadataIndex, DocumentMetadataIndex, DocumentPage, MetadataGroup, \
- MetadataGroupItem, DocumentTransformation
+ MetadataGroupItem, DocumentPageTransformation
class MetadataTypeAdmin(admin.ModelAdmin):
@@ -48,9 +48,13 @@ class DocumentMetadataIndexInline(admin.StackedInline):
extra = 1
classes = ('collapse-open',)
allow_add = True
- readonly_fields = ('metadata_index', 'filename')
+ readonly_fields = ('suffix', 'metadata_index', 'filename')
+class DocumentPageTransformationAdmin(admin.ModelAdmin):
+ model = DocumentPageTransformation
+
+
class DocumentPageInline(admin.StackedInline):
model = DocumentPage
extra = 1
@@ -58,16 +62,9 @@ class DocumentPageInline(admin.StackedInline):
allow_add = True
-class DocumentTransformationline(admin.StackedInline):
- model = DocumentTransformation
- extra = 1
- classes = ('collapse-open',)
- allow_add = True
-
-
class DocumentAdmin(admin.ModelAdmin):
- inlines = [DocumentMetadataInline, DocumentMetadataIndexInline,
- DocumentTransformationline, DocumentPageInline]
+ inlines = [DocumentMetadataInline, DocumentMetadataIndexInline,
+ DocumentPageInline]
list_display = ('uuid', 'file_filename', 'file_extension')
@@ -87,4 +84,5 @@ admin.site.register(MetadataType, MetadataTypeAdmin)
admin.site.register(DocumentType, DocumentTypeAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(MetadataGroup, MetadataGroupAdmin)
+admin.site.register(DocumentPageTransformation, DocumentPageTransformationAdmin)
diff --git a/apps/documents/conf/settings.py b/apps/documents/conf/settings.py
index 42314d2c78..72671d396c 100755
--- a/apps/documents/conf/settings.py
+++ b/apps/documents/conf/settings.py
@@ -1,10 +1,13 @@
import datetime
import hashlib
import uuid
+import tempfile
from django.conf import settings
from django.contrib.auth.models import User
+from converter.api import get_page_count
+
from documents.storage import DocumentStorage
default_available_functions = {
@@ -29,6 +32,7 @@ DELETE_LOCAL_ORIGINAL = getattr(settings, 'DOCUMENTS_DELETE_LOCAL_ORIGINAL', Fal
# Saving
CHECKSUM_FUNCTION = getattr(settings, 'DOCUMENTS_CHECKSUM_FUNCTION', lambda x: hashlib.sha256(x).hexdigest())
UUID_FUNCTION = getattr(settings, 'DOCUMENTS_UUID_FUNCTION', lambda:unicode(uuid.uuid4()))
+PAGE_COUNT_FUNCTION = getattr(settings, 'DOCUMENTS_PAGE_COUNT_FUNCTION', lambda x: get_page_count(x.save_to_file(tempfile.mkstemp()[1])))
# Storage
STORAGE_BACKEND = getattr(settings, 'DOCUMENTS_STORAGE_BACKEND', DocumentStorage)
@@ -36,6 +40,7 @@ STORAGE_DIRECTORY_NAME = getattr(settings, 'DOCUMENTS_STORAGE_DIRECTORY_NAME', '
# Usage
PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_PREVIEW_SIZE', '640x480')
+MULTIPAGE_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_MULTIPAGE_PREVIEW_SIZE', '160x120')
THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50')
DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200')
@@ -48,8 +53,3 @@ FILESYSTEM_FILESERVING_ENABLE = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERV
FILESYSTEM_FILESERVING_PATH = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', u'/tmp/mayan/documents')
FILESYSTEM_SLUGIFY_PATHS = getattr(settings, 'DOCUMENTS_SLUGIFY_PATHS', False)
FILESYSTEM_MAX_RENAME_COUNT = getattr(settings, 'DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT', 200)
-
-#misc
-TEMPORARY_DIRECTORY = getattr(settings, 'DOCUMENTS_TEMPORARY_DIRECTORY', u'/tmp')
-
-
diff --git a/apps/documents/forms.py b/apps/documents/forms.py
index c12270ed72..f0e6369efe 100755
--- a/apps/documents/forms.py
+++ b/apps/documents/forms.py
@@ -24,8 +24,22 @@ from documents.conf.settings import AVAILABLE_MODELS
class ImageWidget(forms.widgets.Widget):
def render(self, name, value, attrs=None):
output = []
- output.append('
' % (reverse('document_display', args=[value.id]),
- reverse('document_preview', args=[value.id])))
+
+ page_count = value.documentpage_set.count()
+ if page_count > 1:
+ output.append('
%s
' % ugettext(u'Pages'))
+ for page_index in range(value.documentpage_set.count()):
+ output.append('%(page)s)
' % {
+ 'url':reverse('document_display', args=[value.id]),
+ 'img':reverse('document_preview_multipage', args=[value.id]),
+ 'page':page_index+1,
+ })
+ else:
+ output.append('
' % {
+ 'url':reverse('document_display', args=[value.id]),
+ 'img':reverse('document_preview', args=[value.id]),
+ })
+
output.append('
%s' % ugettext(u'Click on the image for full size view'))
#output.append(super(ImageWidget, self).render(name, value, attrs))
return mark_safe(u''.join(output))
@@ -57,8 +71,8 @@ class DocumentPreviewForm(forms.Form):
self.document = kwargs.pop('document', None)
super(DocumentPreviewForm, self).__init__(*args, **kwargs)
self.fields['preview'].initial = self.document
-
- preview = forms.CharField(widget=ImageWidget)
+
+ preview = forms.CharField(widget=ImageWidget())
class DocumentForm_view(DetailForm):
diff --git a/apps/documents/models.py b/apps/documents/models.py
index 281618e2ba..d4647e2d9a 100755
--- a/apps/documents/models.py
+++ b/apps/documents/models.py
@@ -20,6 +20,7 @@ from documents.conf.settings import AVAILABLE_FUNCTIONS
from documents.conf.settings import AVAILABLE_MODELS
from documents.conf.settings import CHECKSUM_FUNCTION
from documents.conf.settings import UUID_FUNCTION
+from documents.conf.settings import PAGE_COUNT_FUNCTION
from documents.conf.settings import STORAGE_BACKEND
from documents.conf.settings import STORAGE_DIRECTORY_NAME
from documents.conf.settings import FILESYSTEM_FILESERVING_ENABLE
@@ -73,12 +74,15 @@ class Document(models.Model):
verbose_name = _(u'document')
verbose_name_plural = _(u'documents')
ordering = ['-date_added']
+
def __unicode__(self):
return '%s.%s' % (self.file_filename, self.file_extension)
+
def get_fullname(self):
return os.extsep.join([self.file_filename, self.file_extension])
+
def update_mimetype(self):
try:
@@ -94,19 +98,45 @@ class Document(models.Model):
def read(self, count=1024):
return self.file.storage.open(self.file.url).read(count)
+
@models.permalink
def get_absolute_url(self):
return ('document_view', [self.id])
+
def update_checksum(self, save=True):
if self.exists():
self.checksum = unicode(CHECKSUM_FUNCTION(self.file.read()))
if save:
self.save()
+
+
+ def update_page_count(self):
+ total_pages = PAGE_COUNT_FUNCTION(self)
+ for page_number in range(total_pages):
+ document_page, created = DocumentPage.objects.get_or_create(
+ document=self, page_number=page_number+1)
+
+
+ def save_to_file(self, filepath, buffer_size=1024*1024):
+ storage = self.file.storage.open(self.file.url)
+ output_descriptor = open(filepath, 'wb')
+ while 1:
+ copy_buffer = storage.read()
+ if copy_buffer:
+ output_descriptor.write(copy_buffer)
+ else:
+ break
+
+ #input_descriptor.close()
+ output_descriptor.close()
+ return filepath
+
def exists(self):
return self.file.storage.exists(self.file.url)
+
def delete(self, *args, **kwargs):
#TODO: Might not execute when done in bulk from a queryset
@@ -114,6 +144,7 @@ class Document(models.Model):
self.delete_fs_links()
super(Document, self).delete(*args, **kwargs)
+
def get_metadata_groups(self):
errors = []
metadata_groups = {}
@@ -143,6 +174,7 @@ class Document(models.Model):
document_id_list = DocumentMetadata.objects.filter(query).values_list('document', flat=True)
metadata_groups[group] = Document.objects.filter(Q(id__in=document_id_list) & ~Q(id=self.id)) or []
return metadata_groups, errors
+
def create_fs_links(self):
if FILESYSTEM_FILESERVING_ENABLE:
@@ -171,6 +203,7 @@ class Document(models.Model):
#This should be a warning not an error
pass
+
def delete_fs_links(self):
if FILESYSTEM_FILESERVING_ENABLE:
for document_metadata_index in self.documentmetadataindex_set.all():
@@ -209,11 +242,13 @@ class Document(models.Model):
except OSError, exc:
pass
+
#Remove the directory if it is empty
try:
os.removedirs(path)
except:
pass
+
def next_available_filename(document, metadata_index, path, filename, extension, suffix=0):
target = filename
@@ -344,10 +379,10 @@ class DocumentPage(models.Model):
document = models.ForeignKey(Document, verbose_name=_(u'document'))
content = models.TextField(blank=True, null=True, verbose_name=_(u'content'))
page_label = models.CharField(max_length=32, blank=True, null=True, verbose_name=_(u'page label'))
- page_number = models.PositiveIntegerField(default=0, verbose_name=_(u'page number'))
+ page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_(u'page number'))
def __unicode__(self):
- return '%s - %s' % (self.page_number, self.page_label)
+ return '%s - %s - %s' % (self.document, self.page_number, self.page_label)
class Meta:
verbose_name = _(u'document page')
@@ -377,7 +412,7 @@ INCLUSION_CHOICES = (
(INCLUSION_OR, _(u'or')),
)
-OPERATOR_CHOCIES = (
+OPERATOR_CHOICES = (
('exact', _(u'is equal')),
('iexact', _(u'is equal (case insensitive)')),
('contains', _(u'contains')),
@@ -399,7 +434,7 @@ class MetadataGroupItem(models.Model):
metadata_group = models.ForeignKey(MetadataGroup, verbose_name=_(u'metadata group'))
inclusion = models.CharField(default=INCLUSION_AND, max_length=16, choices=INCLUSION_CHOICES, help_text=_(u'The inclusion is ignored for the first item.'))
metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type'), help_text=_(u'This represents the metadata of all other documents.'))
- operator = models.CharField(max_length=16, choices=OPERATOR_CHOCIES)
+ operator = models.CharField(max_length=16, choices=OPERATOR_CHOICES)
expression = models.CharField(max_length=128,
verbose_name=_(u'expression'), help_text=_(u'This expression will be evaluated against the current seleted document. The document metadata is available as variables of the same name but with the "metadata_" prefix added their name.'))
negated = models.BooleanField(default=False, verbose_name=_(u'negated'), help_text=_(u'Inverts the logic of the operator.'))
@@ -413,8 +448,8 @@ class MetadataGroupItem(models.Model):
verbose_name_plural = _(u'metadata group items')
-class DocumentTransformation(models.Model):
- document = models.ForeignKey(Document, verbose_name=_(u'document'))
+class DocumentPageTransformation(models.Model):
+ document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order'))
transformation = models.CharField(choices=TRANFORMATION_CHOICES, max_length=128, verbose_name=_(u'transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}'))
@@ -430,10 +465,8 @@ class DocumentTransformation(models.Model):
class Meta:
ordering = ('order',)
- verbose_name = _(u'document transformation')
- verbose_name_plural = _(u'document transformations')
-
-
-
+ verbose_name = _(u'document page transformation')
+ verbose_name_plural = _(u'document page transformations')
+
register(Document, _(u'document'), ['document_type__name', 'file_mimetype', 'file_filename', 'file_extension', 'documentmetadata__value', 'documentpage__content'])
diff --git a/apps/documents/urls.py b/apps/documents/urls.py
index 62714155d5..d60622b73a 100755
--- a/apps/documents/urls.py
+++ b/apps/documents/urls.py
@@ -5,6 +5,7 @@ from django.views.generic.create_update import create_object, update_object
from documents.conf.settings import PREVIEW_SIZE
from documents.conf.settings import THUMBNAIL_SIZE
from documents.conf.settings import DISPLAY_SIZE
+from documents.conf.settings import MULTIPAGE_PREVIEW_SIZE
from converter.api import QUALITY_HIGH
@@ -19,8 +20,9 @@ urlpatterns = patterns('documents.views',
url(r'^document/(?P\d+)/delete/$', 'document_delete', (), 'document_delete'),
url(r'^document/(?P\d+)/edit/$', 'document_edit', (), 'document_edit'),
url(r'^document/(?P\d+)/edit/metadata/$', 'document_edit_metadata', (), 'document_edit_metadata'),
- url(r'^document/(?P\d+)/preview/$', 'get_document_image', {'size':PREVIEW_SIZE}, 'document_preview'),
- url(r'^document/(?P\d+)/thumbnail/$', 'get_document_image', {'size':THUMBNAIL_SIZE}, 'document_thumbnail'),
+ url(r'^document/(?P\d+)/display/preview/$', 'get_document_image', {'size':PREVIEW_SIZE}, 'document_preview'),
+ url(r'^document/(?P\d+)/display/preview/multipage/$', 'get_document_image', {'size':MULTIPAGE_PREVIEW_SIZE}, 'document_preview_multipage'),
+ url(r'^document/(?P\d+)/display/thumbnail/$', 'get_document_image', {'size':THUMBNAIL_SIZE}, 'document_thumbnail'),
url(r'^document/(?P\d+)/display/$', 'get_document_image', {'size':DISPLAY_SIZE,'quality':QUALITY_HIGH}, 'document_display'),
url(r'^document/(?P\d+)/download/$', 'document_download', (), 'document_download'),
url(r'^document/(?P\d+)/create/siblings/$', 'document_create_sibling', {'multiple':False}, 'document_create_sibling'),
diff --git a/apps/documents/utils.py b/apps/documents/utils.py
index c3cdc142dc..300c4bf0a7 100755
--- a/apps/documents/utils.py
+++ b/apps/documents/utils.py
@@ -49,7 +49,6 @@ def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*102
return path
-
def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
output_descriptor, tmp_filename = tempfile.mkstemp()
diff --git a/apps/documents/views.py b/apps/documents/views.py
index 7fc7abee4d..eeb2386b0f 100755
--- a/apps/documents/views.py
+++ b/apps/documents/views.py
@@ -18,7 +18,8 @@ from common.utils import pretty_size
from utils import from_descriptor_to_tempfile
-from models import Document, DocumentMetadata, DocumentType, MetadataType
+from models import Document, DocumentMetadata, DocumentType, MetadataType, \
+ DocumentPage
from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
@@ -122,6 +123,8 @@ def upload_document_with_type(request, document_type_id, multiple=True):
instance = local_form.save()
instance.update_checksum()
instance.update_mimetype()
+ instance.update_page_count()
+
if 'document_type_available_filenames' in local_form.cleaned_data:
if local_form.cleaned_data['document_type_available_filenames']:
instance.file_filename = local_form.cleaned_data['document_type_available_filenames'].filename
@@ -154,6 +157,7 @@ def upload_document_with_type(request, document_type_id, multiple=True):
document.save()
document.update_checksum()
document.update_mimetype()
+ document.update_page_count()
except Exception, e:
messages.error(request, e)
else:
@@ -243,6 +247,7 @@ def document_view(request, document_id):
{'label':_(u'Time added'), 'field':lambda x: unicode(x.date_added.time()).split('.')[0]},
{'label':_(u'Checksum'), 'field':'checksum'},
{'label':_(u'UUID'), 'field':'uuid'},
+ {'label':_(u'Pages'), 'field':lambda x: x.documentpage_set.count()},
])
@@ -436,27 +441,33 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
raise Http404(e)
document = get_object_or_404(Document, pk=document_id)
+
+ page = int(request.GET.get('page', 1))
transformation_list = []
- for tranformation in document.documenttransformation_set.all():
- try:
- transformation_list.append(tranformation.get_transformation())
- except Exception, e:
- if request.user.is_staff:
- messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e)))
- else:
- pass
-
+ try:
+ document_page = DocumentPage.objects.get(document=document, page_number=page)
+
+ for tranformation in document_page.documentpagetransformation_set.all():
+ try:
+ transformation_list.append(tranformation.get_transformation())
+ except Exception, e:
+ if request.user.is_staff:
+ messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e)))
+ else:
+ pass
+ except:
+ pass
+
tranformation_string = ' '.join(transformation_list)
try:
- filepath = in_image_cache(document.checksum, size=size, quality=quality, extra_options=tranformation_string)
-
+ filepath = in_image_cache(document.checksum, size=size, quality=quality, extra_options=tranformation_string, page=page-1)
if filepath:
return serve_file(request, File(file=open(filepath, 'r')))
#Save to a temporary location
document.file.open()
desc = document.file.storage.open(document.file.path)
filepath = from_descriptor_to_tempfile(desc, document.checksum)
- output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string)
+ output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string, page=page-1)
return serve_file(request, File(file=open(output_file, 'r')), content_type='image/jpeg')
except Exception, e:
if size == THUMBNAIL_SIZE:
@@ -523,6 +534,7 @@ def document_transformation_list(request, document_id):
document = get_object_or_404(Document, pk=document_id)
+
return object_list(
request,
queryset=document.documenttransformation_set.all(),
@@ -539,9 +551,9 @@ def document_transformation_delete(request, document_transformation_id):
except Unauthorized, e:
raise Http404(e)
- document_transformation = get_object_or_404(DocumentTransformation, pk=document_transformation_id)
+ document_transformation = get_object_or_404(DocumentPageTransformation, pk=document_transformation_id)
- return delete_object(request, model=DocumentTransformation, object_id=document_transformation_id,
+ return delete_object(request, model=DocumentPageTransformation, object_id=document_transformation_id,
template_name='generic_confirm.html',
post_delete_redirect=reverse('document_transformation_list'),
extra_context={
diff --git a/apps/main/views.py b/apps/main/views.py
index e6dfa863e1..4e8cc6112e 100755
--- a/apps/main/views.py
+++ b/apps/main/views.py
@@ -5,6 +5,7 @@ from django.shortcuts import render_to_response
from django.template import RequestContext
from django.utils.translation import ugettext_lazy as _
+from common.conf import settings as common_settings
from documents.conf import settings as documents_settings
from converter.conf import settings as converter_settings
from ocr.conf import settings as ocr_settings
@@ -34,7 +35,9 @@ def check_settings(request):
{'name':'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', 'value':documents_settings.FILESYSTEM_FILESERVING_PATH, 'exists':True},
{'name':'DOCUMENTS_SLUGIFY_PATHS', 'value':documents_settings.FILESYSTEM_SLUGIFY_PATHS},
{'name':'DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT', 'value':documents_settings.FILESYSTEM_MAX_RENAME_COUNT},
- {'name':'DOCUMENTS_TEMPORARY_DIRECTORY', 'value':documents_settings.TEMPORARY_DIRECTORY, 'exists':True},
+
+ #Common
+ {'name':'COMMON_TEMPORARY_DIRECTORY', 'value':common_settings.TEMPORARY_DIRECTORY, 'exists':True},
#Converter
{'name':'CONVERTER_CONVERT_PATH', 'value':converter_settings.CONVERT_PATH, 'exists':True},
diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py
index 44466ab757..81ef308819 100755
--- a/apps/ocr/__init__.py
+++ b/apps/ocr/__init__.py
@@ -5,13 +5,13 @@ from permissions.api import register_permissions
from documents.models import Document
-OCR_DOCUMENT_OCR = 'document_ocr'
+PERMISSION_OCR_DOCUMENT = 'ocr_document'
register_permissions('ocr', [
- {'name':OCR_DOCUMENT_OCR, 'label':_(u'Submit document for OCR')},
+ {'name':PERMISSION_OCR_DOCUMENT, 'label':_(u'Submit document for OCR')},
])
-submit_document = {'text':_('submit to OCR queue'), 'view':'submit_document', 'args':'object.id', 'famfam':'page_lightning', 'permissions':{'namespace':'ocr', 'permissions':[OCR_DOCUMENT_OCR]}}
+submit_document = {'text':_('submit to OCR queue'), 'view':'submit_document', 'args':'object.id', 'famfam':'page_lightning', 'permissions':{'namespace':'ocr', 'permissions':[PERMISSION_OCR_DOCUMENT]}}
register_links(Document, [submit_document], menu_name='sidebar')
diff --git a/apps/ocr/api.py b/apps/ocr/api.py
index dcf6dda2c1..b638f999d0 100755
--- a/apps/ocr/api.py
+++ b/apps/ocr/api.py
@@ -8,7 +8,7 @@ import tempfile
from django.utils.translation import ugettext as _
from documents.models import DocumentPage
-from documents.conf.settings import TEMPORARY_DIRECTORY
+from common.conf.settings import TEMPORARY_DIRECTORY
from converter.api import convert_document_for_ocr
from ocr.conf.settings import TESSERACT_PATH
diff --git a/apps/ocr/views.py b/apps/ocr/views.py
index f3a7fe3def..e37e1b9284 100755
--- a/apps/ocr/views.py
+++ b/apps/ocr/views.py
@@ -11,11 +11,11 @@ from django.utils.translation import ugettext as _
from permissions.api import check_permissions, Unauthorized
from documents.models import Document
-from ocr import OCR_DOCUMENT_OCR
+from ocr import PERMISSION_OCR_DOCUMENT
from api import ocr_document
def submit_document(request, document_id):
- permissions = [OCR_DOCUMENT_OCR]
+ permissions = [PERMISSION_OCR_DOCUMENT]
try:
check_permissions(request.user, 'ocr', permissions)
except Unauthorized, e:
diff --git a/docs/Changelog.txt b/docs/Changelog.txt
index a4dfe3cb74..d94cbe21fe 100644
--- a/docs/Changelog.txt
+++ b/docs/Changelog.txt
@@ -6,3 +6,6 @@
* Added the ability to group documents by their metadata
* New abstracted options to adjust document conversion quality (default, low, high)
* Added permissions and roles support
+* Added multipage documents support (only tested on pdfs)
+ To update a previous database do: [d.update_page_count() for d in Document.objects.all()]
+* Added support for document page transformation (no GUI yet)
diff --git a/docs/TODO b/docs/TODO
index 6f4901e708..c113d47250 100755
--- a/docs/TODO
+++ b/docs/TODO
@@ -29,6 +29,8 @@
* Permissions - DONE
* Roles - DONE
* Assign default role to new users - DONE
+* DB stored transformations - DONE
+* Recognize multi-page documents - DONE
* Document list filtering by metadata
* Filterform date filtering widget
* Validate GET data before saving file
@@ -49,7 +51,6 @@
* Scheduled maintenance (cleanup, deferred OCR's)
* Add tags to documents
* Field for document language or autodetect
-* Recognize multi-page documents
* Count pages in a PDF file http://pybrary.net/pyPdf/
* Download a document in diffent formats: (jpg, png, pdf)
* Cache.cleanup function to delete cached images when document hash changes
@@ -67,6 +68,5 @@
* Download metadata group documents as a single zip file
* Download original document or transformed document
* Include annotations in transformed documents downloads
-* DB stored transformations
* Document view temp transformations
* Implement permissions decorators
diff --git a/settings.py b/settings.py
index fc425d2f54..d0e6dc54bf 100755
--- a/settings.py
+++ b/settings.py
@@ -191,6 +191,7 @@ LOGIN_EXEMPT_URLS = (
#DOCUMENTS_PREVIEW_SIZE = '640x480'
#DOCUMENTS_THUMBNAIL_SIZE = '50x50'
#DOCUMENTS_DISPLAY_SIZE = '1200'
+#DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120'
# Groups
#DOCUMENTS_GROUP_MAX_RESULTS = 20
@@ -203,7 +204,7 @@ LOGIN_EXEMPT_URLS = (
#DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT = 200
# Misc
-#DOCUMENTS_TEMPORARY_DIRECTORY = u'/tmp'
+#COMMON_TEMPORARY_DIRECTORY = u'/tmp'
# Converter
#CONVERTER_DEFAULT_OPTIONS = u''
@@ -211,6 +212,7 @@ LOGIN_EXEMPT_URLS = (
#CONVERTER_HIGH_QUALITY_OPTIONS = u'-density 400'
#CONVERTER_CONVERT_PATH = u'/usr/bin/convert'
#CONVERTER_OCR_OPTIONS = u'-colorspace Gray -depth 8 -resample 200x200'
+#CONVERTER_IDENTIFY_PATH = u'/usr/bin/identify'
# OCR
#OCR_TESSERACT_PATH = u'/usr/bin/tesseract'