diff --git a/apps/common/conf/__init__.py b/apps/common/conf/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/common/conf/settings.py b/apps/common/conf/settings.py new file mode 100644 index 0000000000..afbba62697 --- /dev/null +++ b/apps/common/conf/settings.py @@ -0,0 +1,3 @@ +from django.conf import settings + +TEMPORARY_DIRECTORY = getattr(settings, 'COMMON_TEMPORARY_DIRECTORY', u'/tmp') diff --git a/apps/converter/__init__.py b/apps/converter/__init__.py index e5c4e1ea01..9562a07710 100755 --- a/apps/converter/__init__.py +++ b/apps/converter/__init__.py @@ -1,5 +1,5 @@ import tempfile -from documents.conf import settings as documents_settings +from common.conf import settings as common_settings -TEMPORARY_DIRECTORY = documents_settings.TEMPORARY_DIRECTORY if documents_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp() +TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp() diff --git a/apps/converter/api.py b/apps/converter/api.py index 856c0c4c14..c5dc63bd17 100755 --- a/apps/converter/api.py +++ b/apps/converter/api.py @@ -6,10 +6,8 @@ import shutil from django.template.defaultfilters import slugify - -from documents.utils import from_descriptor_to_tempfile - from converter.conf.settings import CONVERT_PATH +from converter.conf.settings import IDENTIFY_PATH from converter.conf.settings import OCR_OPTIONS from converter.conf.settings import DEFAULT_OPTIONS from converter.conf.settings import LOW_QUALITY_OPTIONS @@ -18,6 +16,7 @@ from converter.conf.settings import HIGH_QUALITY_OPTIONS #from converter.conf.settings import UNOCONV_PATH from converter import TEMPORARY_DIRECTORY +from utils import from_descriptor_to_tempfile QUALITY_DEFAULT = 'quality_default' @@ -73,6 +72,16 @@ def execute_unoconv(input_filepath, output_filepath, arguments=''): return (proc.wait(), proc.stderr.read()) +def execute_identify(input_filepath, arguments): + command = [] + command.append(IDENTIFY_PATH) + command.extend(shlex.split(str(arguments))) + command.append(input_filepath) + + proc = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return (proc.wait(), proc.stderr.read(), proc.stdout.read()) + + def cache_cleanup(input_filepath, size, page=0, format='jpg'): filepath = create_image_cache_filename(input_filepath, size, page, format) try: @@ -126,7 +135,6 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f try: input_arg = '%s[%s]' % (input_filepath, page) extra_options += ' -resize %s' % size - print 'extra_options', extra_options status, error_string = execute_convert(input_arg, extra_options, '%s:%s' % (format, output_filepath), quality=quality) if status: errors = get_errors(error_string) @@ -136,7 +144,16 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f if unoconv_output: cleanup(unoconv_output) return output_filepath - + + +def get_page_count(input_filepath): + try: + status, error_string, output = execute_identify(input_filepath, '-format %n') + if status: + errors = get_errors(error_string) + raise ConvertError(status, errors) + finally: + return int(output) #TODO: slugify OCR_OPTIONS and add to file name to cache def convert_document_for_ocr(document, page=0, format='tif'): diff --git a/apps/converter/conf/settings.py b/apps/converter/conf/settings.py index d843670ba3..07a771e822 100755 --- a/apps/converter/conf/settings.py +++ b/apps/converter/conf/settings.py @@ -5,6 +5,7 @@ ugettext = lambda s: s CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert') +IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify') OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200') DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'') LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'') diff --git a/apps/converter/utils.py b/apps/converter/utils.py new file mode 100644 index 0000000000..ee50a701d2 --- /dev/null +++ b/apps/converter/utils.py @@ -0,0 +1,59 @@ +import os +import tempfile + +from converter import TEMPORARY_DIRECTORY + +#http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python +def copyfile(source, dest, buffer_size=1024*1024): + """ + Copy a file from source to dest. source and dest + can either be strings or any object with a read or + write method, like StringIO for example. + """ + if not hasattr(source, 'read'): + source = open(source, 'rb') + if not hasattr(dest, 'write'): + dest = open(dest, 'wb') + + while 1: + copy_buffer = source.read(buffer_size) + if copy_buffer: + dest.write(copy_buffer) + else: + break + + source.close() + dest.close() + + +def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*1024): + path = os.path.join(TEMPORARY_DIRECTORY, filename) + + output_descriptor = open(path, 'wb') + + while 1: + copy_buffer = input_descriptor.read(buffer_size) + if copy_buffer: + output_descriptor.write(copy_buffer) + else: + break + + input_descriptor.close() + output_descriptor.close() + return path + + +def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024): + output_descriptor, tmp_filename = tempfile.mkstemp() + + while 1: + copy_buffer = input_descriptor.read(buffer_size) + if copy_buffer: + #output_descriptor.write(copy_buffer) + os.write(output_descriptor, copy_buffer) + else: + break + + input_descriptor.close() + os.close(output_descriptor) + return tmp_filename diff --git a/apps/documents/__init__.py b/apps/documents/__init__.py index c59d518355..f4a14bf572 100755 --- a/apps/documents/__init__.py +++ b/apps/documents/__init__.py @@ -9,10 +9,10 @@ from common.utils import pretty_size from permissions.api import register_permissions -from models import Document, DocumentTransformation +from models import Document, DocumentPage, DocumentPageTransformation from staging import StagingFile -from documents.conf import settings as documents_settings +from common.conf import settings as common_settings PERMISSION_DOCUMENT_CREATE = 'document_create' PERMISSION_DOCUMENT_PROPERTIES_EDIT = 'document_properties_edit' @@ -43,18 +43,18 @@ document_edit_metadata = {'text':_('edit metadata'), 'view':'document_edit_metad document_preview = {'text':_('preview'), 'class':'fancybox', 'view':'document_preview', 'args':'object.id', 'famfam':'magnifier', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_VIEW]}} document_download = {'text':_('download'), 'view':'document_download', 'args':'object.id', 'famfam':'page_save', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_DOWNLOAD]}} -document_transformation_list = {'text':_(u'transformations'), 'view':'document_transformation_list', 'args':'object.id', 'famfam':'page_paintbrush', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}} -document_transformation_delete = {'text':_('delete'), 'view':'document_transformation_delete', 'args':'object.id', 'famfam':'delete'}#, 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}} +#document_transformation_list = {'text':_(u'transformations'), 'view':'document_transformation_list', 'args':'object.id', 'famfam':'page_paintbrush', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}} +#document_transformation_delete = {'text':_('delete'), 'view':'document_transformation_delete', 'args':'object.id', 'famfam':'delete'}#, 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}} staging_file_preview = {'text':_('preview'), 'class':'fancybox', 'view':'staging_file_preview', 'args':'object.id', 'famfam':'drive_magnify'} staging_file_delete = {'text':_('delete'), 'view':'staging_file_delete', 'args':'object.id', 'famfam':'drive_delete'} -register_links(Document, [document_view, document_edit, document_edit_metadata, document_delete, document_download, document_transformation_list], menu_name='sidebar') +register_links(Document, [document_view, document_edit, document_edit_metadata, document_delete, document_download], menu_name='sidebar') register_links(Document, [document_list, document_create, document_create_multiple, document_create_sibling], menu_name='sidebar') register_links(['document_list', 'document_create', 'document_create_multiple', 'upload_document_with_type', 'upload_multiple_documents_with_type'], [document_list, document_create, document_create_multiple], menu_name='sidebar') -register_links(DocumentTransformation, [document_transformation_delete]) +#register_links(DocumentTransformation, [document_transformation_delete]) @@ -76,4 +76,4 @@ register_menu([ document_list ],'famfam':'page','position':4}]) -TEMPORARY_DIRECTORY = documents_settings.TEMPORARY_DIRECTORY if documents_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp() +TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp() diff --git a/apps/documents/admin.py b/apps/documents/admin.py index 863d77ac4d..d384c6e3b2 100755 --- a/apps/documents/admin.py +++ b/apps/documents/admin.py @@ -3,7 +3,7 @@ from django.contrib import admin from models import MetadataType, DocumentType, Document, \ DocumentTypeMetadataType, DocumentMetadata, DocumentTypeFilename, \ MetadataIndex, DocumentMetadataIndex, DocumentPage, MetadataGroup, \ - MetadataGroupItem, DocumentTransformation + MetadataGroupItem, DocumentPageTransformation class MetadataTypeAdmin(admin.ModelAdmin): @@ -48,9 +48,13 @@ class DocumentMetadataIndexInline(admin.StackedInline): extra = 1 classes = ('collapse-open',) allow_add = True - readonly_fields = ('metadata_index', 'filename') + readonly_fields = ('suffix', 'metadata_index', 'filename') +class DocumentPageTransformationAdmin(admin.ModelAdmin): + model = DocumentPageTransformation + + class DocumentPageInline(admin.StackedInline): model = DocumentPage extra = 1 @@ -58,16 +62,9 @@ class DocumentPageInline(admin.StackedInline): allow_add = True -class DocumentTransformationline(admin.StackedInline): - model = DocumentTransformation - extra = 1 - classes = ('collapse-open',) - allow_add = True - - class DocumentAdmin(admin.ModelAdmin): - inlines = [DocumentMetadataInline, DocumentMetadataIndexInline, - DocumentTransformationline, DocumentPageInline] + inlines = [DocumentMetadataInline, DocumentMetadataIndexInline, + DocumentPageInline] list_display = ('uuid', 'file_filename', 'file_extension') @@ -87,4 +84,5 @@ admin.site.register(MetadataType, MetadataTypeAdmin) admin.site.register(DocumentType, DocumentTypeAdmin) admin.site.register(Document, DocumentAdmin) admin.site.register(MetadataGroup, MetadataGroupAdmin) +admin.site.register(DocumentPageTransformation, DocumentPageTransformationAdmin) diff --git a/apps/documents/conf/settings.py b/apps/documents/conf/settings.py index 42314d2c78..72671d396c 100755 --- a/apps/documents/conf/settings.py +++ b/apps/documents/conf/settings.py @@ -1,10 +1,13 @@ import datetime import hashlib import uuid +import tempfile from django.conf import settings from django.contrib.auth.models import User +from converter.api import get_page_count + from documents.storage import DocumentStorage default_available_functions = { @@ -29,6 +32,7 @@ DELETE_LOCAL_ORIGINAL = getattr(settings, 'DOCUMENTS_DELETE_LOCAL_ORIGINAL', Fal # Saving CHECKSUM_FUNCTION = getattr(settings, 'DOCUMENTS_CHECKSUM_FUNCTION', lambda x: hashlib.sha256(x).hexdigest()) UUID_FUNCTION = getattr(settings, 'DOCUMENTS_UUID_FUNCTION', lambda:unicode(uuid.uuid4())) +PAGE_COUNT_FUNCTION = getattr(settings, 'DOCUMENTS_PAGE_COUNT_FUNCTION', lambda x: get_page_count(x.save_to_file(tempfile.mkstemp()[1]))) # Storage STORAGE_BACKEND = getattr(settings, 'DOCUMENTS_STORAGE_BACKEND', DocumentStorage) @@ -36,6 +40,7 @@ STORAGE_DIRECTORY_NAME = getattr(settings, 'DOCUMENTS_STORAGE_DIRECTORY_NAME', ' # Usage PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_PREVIEW_SIZE', '640x480') +MULTIPAGE_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_MULTIPAGE_PREVIEW_SIZE', '160x120') THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50') DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200') @@ -48,8 +53,3 @@ FILESYSTEM_FILESERVING_ENABLE = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERV FILESYSTEM_FILESERVING_PATH = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', u'/tmp/mayan/documents') FILESYSTEM_SLUGIFY_PATHS = getattr(settings, 'DOCUMENTS_SLUGIFY_PATHS', False) FILESYSTEM_MAX_RENAME_COUNT = getattr(settings, 'DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT', 200) - -#misc -TEMPORARY_DIRECTORY = getattr(settings, 'DOCUMENTS_TEMPORARY_DIRECTORY', u'/tmp') - - diff --git a/apps/documents/forms.py b/apps/documents/forms.py index c12270ed72..f0e6369efe 100755 --- a/apps/documents/forms.py +++ b/apps/documents/forms.py @@ -24,8 +24,22 @@ from documents.conf.settings import AVAILABLE_MODELS class ImageWidget(forms.widgets.Widget): def render(self, name, value, attrs=None): output = [] - output.append('' % (reverse('document_display', args=[value.id]), - reverse('document_preview', args=[value.id]))) + + page_count = value.documentpage_set.count() + if page_count > 1: + output.append('
%s
' % ugettext(u'Pages')) + for page_index in range(value.documentpage_set.count()): + output.append('%(page)s)' % { + 'url':reverse('document_display', args=[value.id]), + 'img':reverse('document_preview_multipage', args=[value.id]), + 'page':page_index+1, + }) + else: + output.append('' % { + 'url':reverse('document_display', args=[value.id]), + 'img':reverse('document_preview', args=[value.id]), + }) + output.append('
%s' % ugettext(u'Click on the image for full size view')) #output.append(super(ImageWidget, self).render(name, value, attrs)) return mark_safe(u''.join(output)) @@ -57,8 +71,8 @@ class DocumentPreviewForm(forms.Form): self.document = kwargs.pop('document', None) super(DocumentPreviewForm, self).__init__(*args, **kwargs) self.fields['preview'].initial = self.document - - preview = forms.CharField(widget=ImageWidget) + + preview = forms.CharField(widget=ImageWidget()) class DocumentForm_view(DetailForm): diff --git a/apps/documents/models.py b/apps/documents/models.py index 281618e2ba..d4647e2d9a 100755 --- a/apps/documents/models.py +++ b/apps/documents/models.py @@ -20,6 +20,7 @@ from documents.conf.settings import AVAILABLE_FUNCTIONS from documents.conf.settings import AVAILABLE_MODELS from documents.conf.settings import CHECKSUM_FUNCTION from documents.conf.settings import UUID_FUNCTION +from documents.conf.settings import PAGE_COUNT_FUNCTION from documents.conf.settings import STORAGE_BACKEND from documents.conf.settings import STORAGE_DIRECTORY_NAME from documents.conf.settings import FILESYSTEM_FILESERVING_ENABLE @@ -73,12 +74,15 @@ class Document(models.Model): verbose_name = _(u'document') verbose_name_plural = _(u'documents') ordering = ['-date_added'] + def __unicode__(self): return '%s.%s' % (self.file_filename, self.file_extension) + def get_fullname(self): return os.extsep.join([self.file_filename, self.file_extension]) + def update_mimetype(self): try: @@ -94,19 +98,45 @@ class Document(models.Model): def read(self, count=1024): return self.file.storage.open(self.file.url).read(count) + @models.permalink def get_absolute_url(self): return ('document_view', [self.id]) + def update_checksum(self, save=True): if self.exists(): self.checksum = unicode(CHECKSUM_FUNCTION(self.file.read())) if save: self.save() + + + def update_page_count(self): + total_pages = PAGE_COUNT_FUNCTION(self) + for page_number in range(total_pages): + document_page, created = DocumentPage.objects.get_or_create( + document=self, page_number=page_number+1) + + + def save_to_file(self, filepath, buffer_size=1024*1024): + storage = self.file.storage.open(self.file.url) + output_descriptor = open(filepath, 'wb') + while 1: + copy_buffer = storage.read() + if copy_buffer: + output_descriptor.write(copy_buffer) + else: + break + + #input_descriptor.close() + output_descriptor.close() + return filepath + def exists(self): return self.file.storage.exists(self.file.url) + def delete(self, *args, **kwargs): #TODO: Might not execute when done in bulk from a queryset @@ -114,6 +144,7 @@ class Document(models.Model): self.delete_fs_links() super(Document, self).delete(*args, **kwargs) + def get_metadata_groups(self): errors = [] metadata_groups = {} @@ -143,6 +174,7 @@ class Document(models.Model): document_id_list = DocumentMetadata.objects.filter(query).values_list('document', flat=True) metadata_groups[group] = Document.objects.filter(Q(id__in=document_id_list) & ~Q(id=self.id)) or [] return metadata_groups, errors + def create_fs_links(self): if FILESYSTEM_FILESERVING_ENABLE: @@ -171,6 +203,7 @@ class Document(models.Model): #This should be a warning not an error pass + def delete_fs_links(self): if FILESYSTEM_FILESERVING_ENABLE: for document_metadata_index in self.documentmetadataindex_set.all(): @@ -209,11 +242,13 @@ class Document(models.Model): except OSError, exc: pass + #Remove the directory if it is empty try: os.removedirs(path) except: pass + def next_available_filename(document, metadata_index, path, filename, extension, suffix=0): target = filename @@ -344,10 +379,10 @@ class DocumentPage(models.Model): document = models.ForeignKey(Document, verbose_name=_(u'document')) content = models.TextField(blank=True, null=True, verbose_name=_(u'content')) page_label = models.CharField(max_length=32, blank=True, null=True, verbose_name=_(u'page label')) - page_number = models.PositiveIntegerField(default=0, verbose_name=_(u'page number')) + page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_(u'page number')) def __unicode__(self): - return '%s - %s' % (self.page_number, self.page_label) + return '%s - %s - %s' % (self.document, self.page_number, self.page_label) class Meta: verbose_name = _(u'document page') @@ -377,7 +412,7 @@ INCLUSION_CHOICES = ( (INCLUSION_OR, _(u'or')), ) -OPERATOR_CHOCIES = ( +OPERATOR_CHOICES = ( ('exact', _(u'is equal')), ('iexact', _(u'is equal (case insensitive)')), ('contains', _(u'contains')), @@ -399,7 +434,7 @@ class MetadataGroupItem(models.Model): metadata_group = models.ForeignKey(MetadataGroup, verbose_name=_(u'metadata group')) inclusion = models.CharField(default=INCLUSION_AND, max_length=16, choices=INCLUSION_CHOICES, help_text=_(u'The inclusion is ignored for the first item.')) metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type'), help_text=_(u'This represents the metadata of all other documents.')) - operator = models.CharField(max_length=16, choices=OPERATOR_CHOCIES) + operator = models.CharField(max_length=16, choices=OPERATOR_CHOICES) expression = models.CharField(max_length=128, verbose_name=_(u'expression'), help_text=_(u'This expression will be evaluated against the current seleted document. The document metadata is available as variables of the same name but with the "metadata_" prefix added their name.')) negated = models.BooleanField(default=False, verbose_name=_(u'negated'), help_text=_(u'Inverts the logic of the operator.')) @@ -413,8 +448,8 @@ class MetadataGroupItem(models.Model): verbose_name_plural = _(u'metadata group items') -class DocumentTransformation(models.Model): - document = models.ForeignKey(Document, verbose_name=_(u'document')) +class DocumentPageTransformation(models.Model): + document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page')) order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order')) transformation = models.CharField(choices=TRANFORMATION_CHOICES, max_length=128, verbose_name=_(u'transformation')) arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}')) @@ -430,10 +465,8 @@ class DocumentTransformation(models.Model): class Meta: ordering = ('order',) - verbose_name = _(u'document transformation') - verbose_name_plural = _(u'document transformations') - - - + verbose_name = _(u'document page transformation') + verbose_name_plural = _(u'document page transformations') + register(Document, _(u'document'), ['document_type__name', 'file_mimetype', 'file_filename', 'file_extension', 'documentmetadata__value', 'documentpage__content']) diff --git a/apps/documents/urls.py b/apps/documents/urls.py index 62714155d5..d60622b73a 100755 --- a/apps/documents/urls.py +++ b/apps/documents/urls.py @@ -5,6 +5,7 @@ from django.views.generic.create_update import create_object, update_object from documents.conf.settings import PREVIEW_SIZE from documents.conf.settings import THUMBNAIL_SIZE from documents.conf.settings import DISPLAY_SIZE +from documents.conf.settings import MULTIPAGE_PREVIEW_SIZE from converter.api import QUALITY_HIGH @@ -19,8 +20,9 @@ urlpatterns = patterns('documents.views', url(r'^document/(?P\d+)/delete/$', 'document_delete', (), 'document_delete'), url(r'^document/(?P\d+)/edit/$', 'document_edit', (), 'document_edit'), url(r'^document/(?P\d+)/edit/metadata/$', 'document_edit_metadata', (), 'document_edit_metadata'), - url(r'^document/(?P\d+)/preview/$', 'get_document_image', {'size':PREVIEW_SIZE}, 'document_preview'), - url(r'^document/(?P\d+)/thumbnail/$', 'get_document_image', {'size':THUMBNAIL_SIZE}, 'document_thumbnail'), + url(r'^document/(?P\d+)/display/preview/$', 'get_document_image', {'size':PREVIEW_SIZE}, 'document_preview'), + url(r'^document/(?P\d+)/display/preview/multipage/$', 'get_document_image', {'size':MULTIPAGE_PREVIEW_SIZE}, 'document_preview_multipage'), + url(r'^document/(?P\d+)/display/thumbnail/$', 'get_document_image', {'size':THUMBNAIL_SIZE}, 'document_thumbnail'), url(r'^document/(?P\d+)/display/$', 'get_document_image', {'size':DISPLAY_SIZE,'quality':QUALITY_HIGH}, 'document_display'), url(r'^document/(?P\d+)/download/$', 'document_download', (), 'document_download'), url(r'^document/(?P\d+)/create/siblings/$', 'document_create_sibling', {'multiple':False}, 'document_create_sibling'), diff --git a/apps/documents/utils.py b/apps/documents/utils.py index c3cdc142dc..300c4bf0a7 100755 --- a/apps/documents/utils.py +++ b/apps/documents/utils.py @@ -49,7 +49,6 @@ def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*102 return path - def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024): output_descriptor, tmp_filename = tempfile.mkstemp() diff --git a/apps/documents/views.py b/apps/documents/views.py index 7fc7abee4d..eeb2386b0f 100755 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -18,7 +18,8 @@ from common.utils import pretty_size from utils import from_descriptor_to_tempfile -from models import Document, DocumentMetadata, DocumentType, MetadataType +from models import Document, DocumentMetadata, DocumentType, MetadataType, \ + DocumentPage from forms import DocumentTypeSelectForm, DocumentCreateWizard, \ MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \ StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \ @@ -122,6 +123,8 @@ def upload_document_with_type(request, document_type_id, multiple=True): instance = local_form.save() instance.update_checksum() instance.update_mimetype() + instance.update_page_count() + if 'document_type_available_filenames' in local_form.cleaned_data: if local_form.cleaned_data['document_type_available_filenames']: instance.file_filename = local_form.cleaned_data['document_type_available_filenames'].filename @@ -154,6 +157,7 @@ def upload_document_with_type(request, document_type_id, multiple=True): document.save() document.update_checksum() document.update_mimetype() + document.update_page_count() except Exception, e: messages.error(request, e) else: @@ -243,6 +247,7 @@ def document_view(request, document_id): {'label':_(u'Time added'), 'field':lambda x: unicode(x.date_added.time()).split('.')[0]}, {'label':_(u'Checksum'), 'field':'checksum'}, {'label':_(u'UUID'), 'field':'uuid'}, + {'label':_(u'Pages'), 'field':lambda x: x.documentpage_set.count()}, ]) @@ -436,27 +441,33 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_ raise Http404(e) document = get_object_or_404(Document, pk=document_id) + + page = int(request.GET.get('page', 1)) transformation_list = [] - for tranformation in document.documenttransformation_set.all(): - try: - transformation_list.append(tranformation.get_transformation()) - except Exception, e: - if request.user.is_staff: - messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e))) - else: - pass - + try: + document_page = DocumentPage.objects.get(document=document, page_number=page) + + for tranformation in document_page.documentpagetransformation_set.all(): + try: + transformation_list.append(tranformation.get_transformation()) + except Exception, e: + if request.user.is_staff: + messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e))) + else: + pass + except: + pass + tranformation_string = ' '.join(transformation_list) try: - filepath = in_image_cache(document.checksum, size=size, quality=quality, extra_options=tranformation_string) - + filepath = in_image_cache(document.checksum, size=size, quality=quality, extra_options=tranformation_string, page=page-1) if filepath: return serve_file(request, File(file=open(filepath, 'r'))) #Save to a temporary location document.file.open() desc = document.file.storage.open(document.file.path) filepath = from_descriptor_to_tempfile(desc, document.checksum) - output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string) + output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string, page=page-1) return serve_file(request, File(file=open(output_file, 'r')), content_type='image/jpeg') except Exception, e: if size == THUMBNAIL_SIZE: @@ -523,6 +534,7 @@ def document_transformation_list(request, document_id): document = get_object_or_404(Document, pk=document_id) + return object_list( request, queryset=document.documenttransformation_set.all(), @@ -539,9 +551,9 @@ def document_transformation_delete(request, document_transformation_id): except Unauthorized, e: raise Http404(e) - document_transformation = get_object_or_404(DocumentTransformation, pk=document_transformation_id) + document_transformation = get_object_or_404(DocumentPageTransformation, pk=document_transformation_id) - return delete_object(request, model=DocumentTransformation, object_id=document_transformation_id, + return delete_object(request, model=DocumentPageTransformation, object_id=document_transformation_id, template_name='generic_confirm.html', post_delete_redirect=reverse('document_transformation_list'), extra_context={ diff --git a/apps/main/views.py b/apps/main/views.py index e6dfa863e1..4e8cc6112e 100755 --- a/apps/main/views.py +++ b/apps/main/views.py @@ -5,6 +5,7 @@ from django.shortcuts import render_to_response from django.template import RequestContext from django.utils.translation import ugettext_lazy as _ +from common.conf import settings as common_settings from documents.conf import settings as documents_settings from converter.conf import settings as converter_settings from ocr.conf import settings as ocr_settings @@ -34,7 +35,9 @@ def check_settings(request): {'name':'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', 'value':documents_settings.FILESYSTEM_FILESERVING_PATH, 'exists':True}, {'name':'DOCUMENTS_SLUGIFY_PATHS', 'value':documents_settings.FILESYSTEM_SLUGIFY_PATHS}, {'name':'DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT', 'value':documents_settings.FILESYSTEM_MAX_RENAME_COUNT}, - {'name':'DOCUMENTS_TEMPORARY_DIRECTORY', 'value':documents_settings.TEMPORARY_DIRECTORY, 'exists':True}, + + #Common + {'name':'COMMON_TEMPORARY_DIRECTORY', 'value':common_settings.TEMPORARY_DIRECTORY, 'exists':True}, #Converter {'name':'CONVERTER_CONVERT_PATH', 'value':converter_settings.CONVERT_PATH, 'exists':True}, diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py index 44466ab757..81ef308819 100755 --- a/apps/ocr/__init__.py +++ b/apps/ocr/__init__.py @@ -5,13 +5,13 @@ from permissions.api import register_permissions from documents.models import Document -OCR_DOCUMENT_OCR = 'document_ocr' +PERMISSION_OCR_DOCUMENT = 'ocr_document' register_permissions('ocr', [ - {'name':OCR_DOCUMENT_OCR, 'label':_(u'Submit document for OCR')}, + {'name':PERMISSION_OCR_DOCUMENT, 'label':_(u'Submit document for OCR')}, ]) -submit_document = {'text':_('submit to OCR queue'), 'view':'submit_document', 'args':'object.id', 'famfam':'page_lightning', 'permissions':{'namespace':'ocr', 'permissions':[OCR_DOCUMENT_OCR]}} +submit_document = {'text':_('submit to OCR queue'), 'view':'submit_document', 'args':'object.id', 'famfam':'page_lightning', 'permissions':{'namespace':'ocr', 'permissions':[PERMISSION_OCR_DOCUMENT]}} register_links(Document, [submit_document], menu_name='sidebar') diff --git a/apps/ocr/api.py b/apps/ocr/api.py index dcf6dda2c1..b638f999d0 100755 --- a/apps/ocr/api.py +++ b/apps/ocr/api.py @@ -8,7 +8,7 @@ import tempfile from django.utils.translation import ugettext as _ from documents.models import DocumentPage -from documents.conf.settings import TEMPORARY_DIRECTORY +from common.conf.settings import TEMPORARY_DIRECTORY from converter.api import convert_document_for_ocr from ocr.conf.settings import TESSERACT_PATH diff --git a/apps/ocr/views.py b/apps/ocr/views.py index f3a7fe3def..e37e1b9284 100755 --- a/apps/ocr/views.py +++ b/apps/ocr/views.py @@ -11,11 +11,11 @@ from django.utils.translation import ugettext as _ from permissions.api import check_permissions, Unauthorized from documents.models import Document -from ocr import OCR_DOCUMENT_OCR +from ocr import PERMISSION_OCR_DOCUMENT from api import ocr_document def submit_document(request, document_id): - permissions = [OCR_DOCUMENT_OCR] + permissions = [PERMISSION_OCR_DOCUMENT] try: check_permissions(request.user, 'ocr', permissions) except Unauthorized, e: diff --git a/docs/Changelog.txt b/docs/Changelog.txt index a4dfe3cb74..d94cbe21fe 100644 --- a/docs/Changelog.txt +++ b/docs/Changelog.txt @@ -6,3 +6,6 @@ * Added the ability to group documents by their metadata * New abstracted options to adjust document conversion quality (default, low, high) * Added permissions and roles support +* Added multipage documents support (only tested on pdfs) + To update a previous database do: [d.update_page_count() for d in Document.objects.all()] +* Added support for document page transformation (no GUI yet) diff --git a/docs/TODO b/docs/TODO index 6f4901e708..c113d47250 100755 --- a/docs/TODO +++ b/docs/TODO @@ -29,6 +29,8 @@ * Permissions - DONE * Roles - DONE * Assign default role to new users - DONE +* DB stored transformations - DONE +* Recognize multi-page documents - DONE * Document list filtering by metadata * Filterform date filtering widget * Validate GET data before saving file @@ -49,7 +51,6 @@ * Scheduled maintenance (cleanup, deferred OCR's) * Add tags to documents * Field for document language or autodetect -* Recognize multi-page documents * Count pages in a PDF file http://pybrary.net/pyPdf/ * Download a document in diffent formats: (jpg, png, pdf) * Cache.cleanup function to delete cached images when document hash changes @@ -67,6 +68,5 @@ * Download metadata group documents as a single zip file * Download original document or transformed document * Include annotations in transformed documents downloads -* DB stored transformations * Document view temp transformations * Implement permissions decorators diff --git a/settings.py b/settings.py index fc425d2f54..d0e6dc54bf 100755 --- a/settings.py +++ b/settings.py @@ -191,6 +191,7 @@ LOGIN_EXEMPT_URLS = ( #DOCUMENTS_PREVIEW_SIZE = '640x480' #DOCUMENTS_THUMBNAIL_SIZE = '50x50' #DOCUMENTS_DISPLAY_SIZE = '1200' +#DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120' # Groups #DOCUMENTS_GROUP_MAX_RESULTS = 20 @@ -203,7 +204,7 @@ LOGIN_EXEMPT_URLS = ( #DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT = 200 # Misc -#DOCUMENTS_TEMPORARY_DIRECTORY = u'/tmp' +#COMMON_TEMPORARY_DIRECTORY = u'/tmp' # Converter #CONVERTER_DEFAULT_OPTIONS = u'' @@ -211,6 +212,7 @@ LOGIN_EXEMPT_URLS = ( #CONVERTER_HIGH_QUALITY_OPTIONS = u'-density 400' #CONVERTER_CONVERT_PATH = u'/usr/bin/convert' #CONVERTER_OCR_OPTIONS = u'-colorspace Gray -depth 8 -resample 200x200' +#CONVERTER_IDENTIFY_PATH = u'/usr/bin/identify' # OCR #OCR_TESSERACT_PATH = u'/usr/bin/tesseract'