Added multipage document support and document page transformation

2011-02-14 00:18:16 -04:00
parent 65d1e5b176
commit 06d7e5a46a
21 changed files with 219 additions and 73 deletions
--- a/apps/common/conf/init.py
+++ b/apps/common/conf/init.py
--- a/apps/common/conf/settings.py
+++ b/apps/common/conf/settings.py
@@ -0,0 +1,3 @@
+from django.conf import settings
+
+TEMPORARY_DIRECTORY = getattr(settings, 'COMMON_TEMPORARY_DIRECTORY', u'/tmp')
--- a/apps/converter/init.py
+++ b/apps/converter/init.py
@@ -1,5 +1,5 @@
 import tempfile

-from documents.conf import settings as documents_settings
+from common.conf import settings as common_settings

-TEMPORARY_DIRECTORY = documents_settings.TEMPORARY_DIRECTORY if documents_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
+TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
--- a/apps/converter/api.py
+++ b/apps/converter/api.py
@@ -6,10 +6,8 @@ import shutil

 from django.template.defaultfilters import slugify

-
-from documents.utils import from_descriptor_to_tempfile
-
 from converter.conf.settings import CONVERT_PATH
+from converter.conf.settings import IDENTIFY_PATH
 from converter.conf.settings import OCR_OPTIONS
 from converter.conf.settings import DEFAULT_OPTIONS
 from converter.conf.settings import LOW_QUALITY_OPTIONS
@@ -18,6 +16,7 @@ from converter.conf.settings import HIGH_QUALITY_OPTIONS
 #from converter.conf.settings import UNOCONV_PATH

 from converter import TEMPORARY_DIRECTORY
+from utils import from_descriptor_to_tempfile


 QUALITY_DEFAULT = 'quality_default'
@@ -73,6 +72,16 @@ def execute_unoconv(input_filepath, output_filepath, arguments=''):
    return (proc.wait(), proc.stderr.read())


+def execute_identify(input_filepath, arguments):
+    command = []
+    command.append(IDENTIFY_PATH)
+    command.extend(shlex.split(str(arguments)))
+    command.append(input_filepath)
+
+    proc = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+    return (proc.wait(), proc.stderr.read(), proc.stdout.read())
+
+
 def cache_cleanup(input_filepath, size, page=0, format='jpg'):
    filepath = create_image_cache_filename(input_filepath, size, page, format)
    try:
@@ -126,7 +135,6 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
    try:
        input_arg = '%s[%s]' % (input_filepath, page)
        extra_options += ' -resize %s' % size
-        print 'extra_options', extra_options
        status, error_string = execute_convert(input_arg, extra_options, '%s:%s' % (format, output_filepath), quality=quality)
        if status:
            errors = get_errors(error_string)
@@ -138,6 +146,15 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
        return output_filepath


+def get_page_count(input_filepath):
+    try:
+        status, error_string, output = execute_identify(input_filepath, '-format %n')
+        if status:
+            errors = get_errors(error_string)
+            raise ConvertError(status, errors)
+    finally:
+        return int(output)
+
 #TODO: slugify OCR_OPTIONS and add to file name to cache
 def convert_document_for_ocr(document, page=0, format='tif'):
    #Extract document file
--- a/apps/converter/conf/settings.py
+++ b/apps/converter/conf/settings.py
@@ -5,6 +5,7 @@ ugettext = lambda s: s


 CONVERT_PATH = getattr(settings, 'CONVERTER_CONVERT_PATH', u'/usr/bin/convert')
+IDENTIFY_PATH = getattr(settings, 'CONVERTER_IDENTIFY_PATH', u'/usr/bin/identify')
 OCR_OPTIONS = getattr(settings, 'CONVERTER_OCR_OPTIONS', u'-colorspace Gray -depth 8 -resample 200x200')
 DEFAULT_OPTIONS = getattr(settings, 'CONVERTER_DEFAULT_OPTIONS', u'')
 LOW_QUALITY_OPTIONS = getattr(settings, 'CONVERTER_LOW_QUALITY_OPTIONS', u'')
--- a/apps/converter/utils.py
+++ b/apps/converter/utils.py
@@ -0,0 +1,59 @@
+import os
+import tempfile
+
+from converter import TEMPORARY_DIRECTORY
+
+#http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python
+def copyfile(source, dest, buffer_size=1024*1024):
+    """
+    Copy a file from source to dest. source and dest
+    can either be strings or any object with a read or
+    write method, like StringIO for example.
+    """
+    if not hasattr(source, 'read'):
+        source = open(source, 'rb')
+    if not hasattr(dest, 'write'):
+        dest = open(dest, 'wb')
+
+    while 1:
+        copy_buffer = source.read(buffer_size)
+        if copy_buffer:
+            dest.write(copy_buffer)
+        else:
+            break
+
+    source.close()
+    dest.close()
+
+
+def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*1024):
+    path = os.path.join(TEMPORARY_DIRECTORY, filename)
+    
+    output_descriptor = open(path, 'wb')
+    
+    while 1:
+        copy_buffer = input_descriptor.read(buffer_size)
+        if copy_buffer:
+            output_descriptor.write(copy_buffer)
+        else:
+            break
+
+    input_descriptor.close()
+    output_descriptor.close()
+    return path
+
+
+def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
+    output_descriptor, tmp_filename = tempfile.mkstemp()
+    
+    while 1:
+        copy_buffer = input_descriptor.read(buffer_size)
+        if copy_buffer:
+            #output_descriptor.write(copy_buffer)
+            os.write(output_descriptor, copy_buffer)
+        else:
+            break
+
+    input_descriptor.close()
+    os.close(output_descriptor)
+    return tmp_filename
--- a/apps/documents/init.py
+++ b/apps/documents/init.py
@@ -9,10 +9,10 @@ from common.utils import pretty_size

 from permissions.api import register_permissions

-from models import Document, DocumentTransformation
+from models import Document, DocumentPage, DocumentPageTransformation
 from staging import StagingFile

-from documents.conf import settings as documents_settings
+from common.conf import settings as common_settings

 PERMISSION_DOCUMENT_CREATE = 'document_create'
 PERMISSION_DOCUMENT_PROPERTIES_EDIT = 'document_properties_edit'
@@ -43,18 +43,18 @@ document_edit_metadata = {'text':_('edit metadata'), 'view':'document_edit_metad
 document_preview = {'text':_('preview'), 'class':'fancybox', 'view':'document_preview', 'args':'object.id', 'famfam':'magnifier', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_VIEW]}}
 document_download = {'text':_('download'), 'view':'document_download', 'args':'object.id', 'famfam':'page_save', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_DOWNLOAD]}}

-document_transformation_list = {'text':_(u'transformations'), 'view':'document_transformation_list', 'args':'object.id', 'famfam':'page_paintbrush', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
-document_transformation_delete = {'text':_('delete'), 'view':'document_transformation_delete', 'args':'object.id', 'famfam':'delete'}#, 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
+#document_transformation_list = {'text':_(u'transformations'), 'view':'document_transformation_list', 'args':'object.id', 'famfam':'page_paintbrush', 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}
+#document_transformation_delete = {'text':_('delete'), 'view':'document_transformation_delete', 'args':'object.id', 'famfam':'delete'}#, 'permissions':{'namespace':'documents', 'permissions':[PERMISSION_DOCUMENT_TRANSFORM]}}


 staging_file_preview = {'text':_('preview'), 'class':'fancybox', 'view':'staging_file_preview', 'args':'object.id', 'famfam':'drive_magnify'}
 staging_file_delete = {'text':_('delete'), 'view':'staging_file_delete', 'args':'object.id', 'famfam':'drive_delete'}

-register_links(Document, [document_view, document_edit, document_edit_metadata, document_delete, document_download, document_transformation_list], menu_name='sidebar')
+register_links(Document, [document_view, document_edit, document_edit_metadata, document_delete, document_download], menu_name='sidebar')
 register_links(Document, [document_list, document_create, document_create_multiple, document_create_sibling], menu_name='sidebar')
 register_links(['document_list', 'document_create', 'document_create_multiple', 'upload_document_with_type', 'upload_multiple_documents_with_type'], [document_list, document_create, document_create_multiple], menu_name='sidebar')

-register_links(DocumentTransformation, [document_transformation_delete])
+#register_links(DocumentTransformation, [document_transformation_delete])



@@ -76,4 +76,4 @@ register_menu([
        document_list
    ],'famfam':'page','position':4}])

-TEMPORARY_DIRECTORY = documents_settings.TEMPORARY_DIRECTORY if documents_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
+TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
--- a/apps/documents/admin.py
+++ b/apps/documents/admin.py
@@ -3,7 +3,7 @@ from django.contrib import admin
 from models import MetadataType, DocumentType, Document, \
    DocumentTypeMetadataType, DocumentMetadata, DocumentTypeFilename, \
    MetadataIndex, DocumentMetadataIndex, DocumentPage, MetadataGroup, \
-    MetadataGroupItem, DocumentTransformation
+    MetadataGroupItem, DocumentPageTransformation


 class MetadataTypeAdmin(admin.ModelAdmin):
@@ -48,7 +48,11 @@ class DocumentMetadataIndexInline(admin.StackedInline):
    extra = 1
    classes = ('collapse-open',)
    allow_add = True
-    readonly_fields = ('metadata_index', 'filename')
+    readonly_fields = ('suffix', 'metadata_index', 'filename')
+
+
+class DocumentPageTransformationAdmin(admin.ModelAdmin):
+    model = DocumentPageTransformation
    

 class DocumentPageInline(admin.StackedInline):
@@ -58,16 +62,9 @@ class DocumentPageInline(admin.StackedInline):
    allow_add = True


-class DocumentTransformationline(admin.StackedInline):
-    model = DocumentTransformation
-    extra = 1
-    classes = ('collapse-open',)
-    allow_add = True
-    
-
 class DocumentAdmin(admin.ModelAdmin):
    inlines = [DocumentMetadataInline, DocumentMetadataIndexInline,
-        DocumentTransformationline, DocumentPageInline]
+        DocumentPageInline]
    list_display = ('uuid', 'file_filename', 'file_extension')


@@ -87,4 +84,5 @@ admin.site.register(MetadataType, MetadataTypeAdmin)
 admin.site.register(DocumentType, DocumentTypeAdmin)
 admin.site.register(Document, DocumentAdmin)
 admin.site.register(MetadataGroup, MetadataGroupAdmin)
+admin.site.register(DocumentPageTransformation, DocumentPageTransformationAdmin)
                
--- a/apps/documents/conf/settings.py
+++ b/apps/documents/conf/settings.py
@@ -1,10 +1,13 @@
 import datetime
 import hashlib
 import uuid
+import tempfile

 from django.conf import settings
 from django.contrib.auth.models import User

+from converter.api import get_page_count
+
 from documents.storage import DocumentStorage

 default_available_functions = {
@@ -29,6 +32,7 @@ DELETE_LOCAL_ORIGINAL = getattr(settings, 'DOCUMENTS_DELETE_LOCAL_ORIGINAL', Fal
 # Saving
 CHECKSUM_FUNCTION = getattr(settings, 'DOCUMENTS_CHECKSUM_FUNCTION', lambda x: hashlib.sha256(x).hexdigest())
 UUID_FUNCTION = getattr(settings, 'DOCUMENTS_UUID_FUNCTION', lambda:unicode(uuid.uuid4()))
+PAGE_COUNT_FUNCTION = getattr(settings, 'DOCUMENTS_PAGE_COUNT_FUNCTION', lambda x: get_page_count(x.save_to_file(tempfile.mkstemp()[1])))

 # Storage
 STORAGE_BACKEND = getattr(settings, 'DOCUMENTS_STORAGE_BACKEND', DocumentStorage)
@@ -36,6 +40,7 @@ STORAGE_DIRECTORY_NAME = getattr(settings, 'DOCUMENTS_STORAGE_DIRECTORY_NAME', '

 # Usage
 PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_PREVIEW_SIZE', '640x480')
+MULTIPAGE_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_MULTIPAGE_PREVIEW_SIZE', '160x120')
 THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50')
 DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1200')

@@ -48,8 +53,3 @@ FILESYSTEM_FILESERVING_ENABLE = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERV
 FILESYSTEM_FILESERVING_PATH = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', u'/tmp/mayan/documents')
 FILESYSTEM_SLUGIFY_PATHS = getattr(settings, 'DOCUMENTS_SLUGIFY_PATHS', False)
 FILESYSTEM_MAX_RENAME_COUNT = getattr(settings, 'DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT', 200)
-
-#misc
-TEMPORARY_DIRECTORY = getattr(settings, 'DOCUMENTS_TEMPORARY_DIRECTORY', u'/tmp')
-
-
--- a/apps/documents/forms.py
+++ b/apps/documents/forms.py
@@ -24,8 +24,22 @@ from documents.conf.settings import AVAILABLE_MODELS
 class ImageWidget(forms.widgets.Widget):
    def render(self, name, value, attrs=None):
        output = []
-        output.append('<a class="fancybox-noscaling" href="%s"><img width="300" src="%s" /></a>' % (reverse('document_display', args=[value.id]),
-            reverse('document_preview', args=[value.id])))
+
+        page_count = value.documentpage_set.count()
+        if page_count > 1:
+            output.append('<br /><span class="famfam active famfam-page_white_copy"></span>%s<br />' % ugettext(u'Pages'))
+            for page_index in range(value.documentpage_set.count()):
+                output.append('<span>%(page)s)<a rel="gallery_1" class="fancybox-noscaling" href="%(url)s?page=%(page)s"><img src="%(img)s?page=%(page)s" /></a></span>' % {
+                    'url':reverse('document_display', args=[value.id]),
+                    'img':reverse('document_preview_multipage', args=[value.id]),
+                    'page':page_index+1,
+                    })
+        else:
+            output.append('<a class="fancybox-noscaling" href="%(url)s"><img width="300" src="%(img)s" /></a>' % {
+                'url':reverse('document_display', args=[value.id]),
+                'img':reverse('document_preview', args=[value.id]),
+                })
+
        output.append('<br /><span class="famfam active famfam-magnifier"></span>%s' % ugettext(u'Click on the image for full size view'))
        #output.append(super(ImageWidget, self).render(name, value, attrs))
        return mark_safe(u''.join(output))  
@@ -58,7 +72,7 @@ class DocumentPreviewForm(forms.Form):
        super(DocumentPreviewForm, self).__init__(*args, **kwargs)
        self.fields['preview'].initial = self.document
                    
-    preview = forms.CharField(widget=ImageWidget)    
+    preview = forms.CharField(widget=ImageWidget())


 class DocumentForm_view(DetailForm):
--- a/apps/documents/models.py
+++ b/apps/documents/models.py
@@ -20,6 +20,7 @@ from documents.conf.settings import AVAILABLE_FUNCTIONS
 from documents.conf.settings import AVAILABLE_MODELS
 from documents.conf.settings import CHECKSUM_FUNCTION
 from documents.conf.settings import UUID_FUNCTION
+from documents.conf.settings import PAGE_COUNT_FUNCTION
 from documents.conf.settings import STORAGE_BACKEND
 from documents.conf.settings import STORAGE_DIRECTORY_NAME
 from documents.conf.settings import FILESYSTEM_FILESERVING_ENABLE
@@ -74,12 +75,15 @@ class Document(models.Model):
        verbose_name_plural = _(u'documents')
        ordering = ['-date_added']

+        
    def __unicode__(self):
        return '%s.%s' % (self.file_filename, self.file_extension)

+      
    def get_fullname(self):
        return os.extsep.join([self.file_filename, self.file_extension])

+        
    def update_mimetype(self):
        try:
            mime = magic.Magic(mime=True)
@@ -95,25 +99,52 @@ class Document(models.Model):
    def read(self, count=1024):
        return self.file.storage.open(self.file.url).read(count)

+        
    @models.permalink
    def get_absolute_url(self):
        return ('document_view', [self.id])

+
    def update_checksum(self, save=True):
        if self.exists():
            self.checksum = unicode(CHECKSUM_FUNCTION(self.file.read()))
            if save:
                self.save()

+    
+    def update_page_count(self):
+        total_pages = PAGE_COUNT_FUNCTION(self)
+        for page_number in range(total_pages):
+            document_page, created = DocumentPage.objects.get_or_create(
+                document=self, page_number=page_number+1)
+
+        
+    def save_to_file(self, filepath, buffer_size=1024*1024):
+        storage = self.file.storage.open(self.file.url)
+        output_descriptor = open(filepath, 'wb')
+        while 1:
+            copy_buffer = storage.read()
+            if copy_buffer:
+                output_descriptor.write(copy_buffer)
+            else:
+                break
+    
+        #input_descriptor.close()
+        output_descriptor.close()
+        return filepath       
+       
+    
    def exists(self):
        return self.file.storage.exists(self.file.url)

+        
    def delete(self, *args, **kwargs):
        #TODO: Might not execute when done in bulk from a queryset
        #topics/db/queries.html#topics-db-queries-delete
        self.delete_fs_links()
        super(Document, self).delete(*args, **kwargs)

+
    def get_metadata_groups(self):
        errors = []
        metadata_groups = {}
@@ -144,6 +175,7 @@ class Document(models.Model):
                metadata_groups[group] = Document.objects.filter(Q(id__in=document_id_list) & ~Q(id=self.id)) or []
        return metadata_groups, errors

+        
    def create_fs_links(self):
        if FILESYSTEM_FILESERVING_ENABLE:
            if not self.exists():
@@ -171,6 +203,7 @@ class Document(models.Model):
                        #This should be a warning not an error
                        pass

+
    def delete_fs_links(self):
        if FILESYSTEM_FILESERVING_ENABLE:
            for document_metadata_index in self.documentmetadataindex_set.all():
@@ -209,12 +242,14 @@ class Document(models.Model):
                except OSError, exc:
                    pass

+
                #Remove the directory if it is empty
                try:
                    os.removedirs(path)
                except:
                    pass

+           
 def next_available_filename(document, metadata_index, path, filename, extension, suffix=0): 
    target = filename
    if suffix:
@@ -344,10 +379,10 @@ class DocumentPage(models.Model):
    document = models.ForeignKey(Document, verbose_name=_(u'document'))
    content = models.TextField(blank=True, null=True, verbose_name=_(u'content'))
    page_label = models.CharField(max_length=32, blank=True, null=True, verbose_name=_(u'page label'))
-    page_number = models.PositiveIntegerField(default=0, verbose_name=_(u'page number'))
+    page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_(u'page number'))
        
    def __unicode__(self):
-        return '%s - %s' % (self.page_number, self.page_label)
+        return '%s - %s - %s' % (self.document, self.page_number, self.page_label)

    class Meta:
        verbose_name = _(u'document page')
@@ -377,7 +412,7 @@ INCLUSION_CHOICES = (
    (INCLUSION_OR, _(u'or')),
 )

-OPERATOR_CHOCIES = (
+OPERATOR_CHOICES = (
    ('exact', _(u'is equal')),
    ('iexact', _(u'is equal (case insensitive)')),
    ('contains', _(u'contains')),
@@ -399,7 +434,7 @@ class MetadataGroupItem(models.Model):
    metadata_group = models.ForeignKey(MetadataGroup, verbose_name=_(u'metadata group'))
    inclusion = models.CharField(default=INCLUSION_AND, max_length=16, choices=INCLUSION_CHOICES, help_text=_(u'The inclusion is ignored for the first item.'))
    metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type'), help_text=_(u'This represents the metadata of all other documents.'))
-    operator = models.CharField(max_length=16, choices=OPERATOR_CHOCIES)
+    operator = models.CharField(max_length=16, choices=OPERATOR_CHOICES)
    expression = models.CharField(max_length=128,
        verbose_name=_(u'expression'), help_text=_(u'This expression will be evaluated against the current seleted document.  The document metadata is available as variables of the same name but with the "metadata_" prefix added their name.'))
    negated = models.BooleanField(default=False, verbose_name=_(u'negated'), help_text=_(u'Inverts the logic of the operator.'))
@@ -413,8 +448,8 @@ class MetadataGroupItem(models.Model):
        verbose_name_plural = _(u'metadata group items')

    
-class DocumentTransformation(models.Model):
-    document = models.ForeignKey(Document, verbose_name=_(u'document'))
+class DocumentPageTransformation(models.Model):
+    document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
    order = models.PositiveIntegerField(blank=True, null=True, verbose_name=_(u'order'))
    transformation = models.CharField(choices=TRANFORMATION_CHOICES, max_length=128, verbose_name=_(u'transformation'))
    arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use directories to indentify arguments, example: {\'degrees\':90}'))
@@ -430,10 +465,8 @@ class DocumentTransformation(models.Model):

    class Meta:
        ordering = ('order',)
-        verbose_name = _(u'document transformation')
-        verbose_name_plural = _(u'document transformations')
-    
-    
+        verbose_name = _(u'document page transformation')
+        verbose_name_plural = _(u'document page transformations')
    
  
 register(Document, _(u'document'), ['document_type__name', 'file_mimetype', 'file_filename', 'file_extension', 'documentmetadata__value', 'documentpage__content'])
--- a/apps/documents/urls.py
+++ b/apps/documents/urls.py
@@ -5,6 +5,7 @@ from django.views.generic.create_update import create_object, update_object
 from documents.conf.settings import PREVIEW_SIZE
 from documents.conf.settings import THUMBNAIL_SIZE
 from documents.conf.settings import DISPLAY_SIZE
+from documents.conf.settings import MULTIPAGE_PREVIEW_SIZE

 from converter.api import QUALITY_HIGH

@@ -19,8 +20,9 @@ urlpatterns = patterns('documents.views',
    url(r'^document/(?P<document_id>\d+)/delete/$', 'document_delete', (), 'document_delete'),
    url(r'^document/(?P<document_id>\d+)/edit/$', 'document_edit', (), 'document_edit'),
    url(r'^document/(?P<document_id>\d+)/edit/metadata/$', 'document_edit_metadata', (), 'document_edit_metadata'),
-    url(r'^document/(?P<document_id>\d+)/preview/$', 'get_document_image', {'size':PREVIEW_SIZE}, 'document_preview'),
-    url(r'^document/(?P<document_id>\d+)/thumbnail/$', 'get_document_image', {'size':THUMBNAIL_SIZE}, 'document_thumbnail'),
+    url(r'^document/(?P<document_id>\d+)/display/preview/$', 'get_document_image', {'size':PREVIEW_SIZE}, 'document_preview'),
+    url(r'^document/(?P<document_id>\d+)/display/preview/multipage/$', 'get_document_image', {'size':MULTIPAGE_PREVIEW_SIZE}, 'document_preview_multipage'),
+    url(r'^document/(?P<document_id>\d+)/display/thumbnail/$', 'get_document_image', {'size':THUMBNAIL_SIZE}, 'document_thumbnail'),
    url(r'^document/(?P<document_id>\d+)/display/$', 'get_document_image', {'size':DISPLAY_SIZE,'quality':QUALITY_HIGH}, 'document_display'),
    url(r'^document/(?P<document_id>\d+)/download/$', 'document_download', (), 'document_download'),
    url(r'^document/(?P<document_id>\d+)/create/siblings/$', 'document_create_sibling', {'multiple':False}, 'document_create_sibling'),
--- a/apps/documents/utils.py
+++ b/apps/documents/utils.py
@@ -49,7 +49,6 @@ def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*102
    return path


-
 def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
    output_descriptor, tmp_filename = tempfile.mkstemp()
    
--- a/apps/documents/views.py
+++ b/apps/documents/views.py
@@ -18,7 +18,8 @@ from common.utils import pretty_size

 from utils import from_descriptor_to_tempfile

-from models import Document, DocumentMetadata, DocumentType, MetadataType
+from models import Document, DocumentMetadata, DocumentType, MetadataType, \
+    DocumentPage
 from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
        MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
        StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
@@ -122,6 +123,8 @@ def upload_document_with_type(request, document_type_id, multiple=True):
                instance = local_form.save()
                instance.update_checksum()
                instance.update_mimetype()
+                instance.update_page_count()
+
                if 'document_type_available_filenames' in local_form.cleaned_data:
                    if local_form.cleaned_data['document_type_available_filenames']:
                        instance.file_filename = local_form.cleaned_data['document_type_available_filenames'].filename
@@ -154,6 +157,7 @@ def upload_document_with_type(request, document_type_id, multiple=True):
                        document.save()
                        document.update_checksum()
                        document.update_mimetype()
+                        document.update_page_count()
                    except Exception, e:
                        messages.error(request, e)   
                    else:
@@ -243,6 +247,7 @@ def document_view(request, document_id):
        {'label':_(u'Time added'), 'field':lambda x: unicode(x.date_added.time()).split('.')[0]},
        {'label':_(u'Checksum'), 'field':'checksum'},
        {'label':_(u'UUID'), 'field':'uuid'},
+        {'label':_(u'Pages'), 'field':lambda x: x.documentpage_set.count()},
    ])

        
@@ -436,8 +441,13 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
        raise Http404(e)
        
    document = get_object_or_404(Document, pk=document_id)
+
+    page = int(request.GET.get('page', 1))
    transformation_list = []
-    for tranformation in document.documenttransformation_set.all():
+    try:
+        document_page = DocumentPage.objects.get(document=document, page_number=page)
+    
+        for tranformation in document_page.documentpagetransformation_set.all():
            try:
                transformation_list.append(tranformation.get_transformation())
            except Exception, e:
@@ -445,18 +455,19 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
                    messages.warning(request, _(u'Transformation %s error: %s' % (tranformation, e)))
                else:
                    pass
+    except:
+        pass

    tranformation_string = ' '.join(transformation_list)
    try:
-        filepath = in_image_cache(document.checksum, size=size, quality=quality, extra_options=tranformation_string)
-
+        filepath = in_image_cache(document.checksum, size=size, quality=quality, extra_options=tranformation_string, page=page-1)
        if filepath:
            return serve_file(request, File(file=open(filepath, 'r')))
        #Save to a temporary location
        document.file.open()
        desc = document.file.storage.open(document.file.path)
        filepath = from_descriptor_to_tempfile(desc, document.checksum)
-        output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string)
+        output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string, page=page-1)
        return serve_file(request, File(file=open(output_file, 'r')), content_type='image/jpeg')
    except Exception, e:
        if size == THUMBNAIL_SIZE:
@@ -523,6 +534,7 @@ def document_transformation_list(request, document_id):
    
    document = get_object_or_404(Document, pk=document_id)
    
+    
    return object_list(
        request,
        queryset=document.documenttransformation_set.all(),
@@ -539,9 +551,9 @@ def document_transformation_delete(request, document_transformation_id):
    except Unauthorized, e:
        raise Http404(e)
            
-    document_transformation = get_object_or_404(DocumentTransformation, pk=document_transformation_id)
+    document_transformation = get_object_or_404(DocumentPageTransformation, pk=document_transformation_id)
        
-    return delete_object(request, model=DocumentTransformation, object_id=document_transformation_id, 
+    return delete_object(request, model=DocumentPageTransformation, object_id=document_transformation_id, 
        template_name='generic_confirm.html', 
        post_delete_redirect=reverse('document_transformation_list'),
        extra_context={
--- a/apps/main/views.py
+++ b/apps/main/views.py
@@ -5,6 +5,7 @@ from django.shortcuts import render_to_response
 from django.template import RequestContext
 from django.utils.translation import ugettext_lazy as _

+from common.conf import settings as common_settings
 from documents.conf import settings as documents_settings
 from converter.conf import settings as converter_settings
 from ocr.conf import settings as ocr_settings
@@ -34,7 +35,9 @@ def check_settings(request):
        {'name':'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', 'value':documents_settings.FILESYSTEM_FILESERVING_PATH, 'exists':True},
        {'name':'DOCUMENTS_SLUGIFY_PATHS', 'value':documents_settings.FILESYSTEM_SLUGIFY_PATHS},
        {'name':'DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT', 'value':documents_settings.FILESYSTEM_MAX_RENAME_COUNT},
-        {'name':'DOCUMENTS_TEMPORARY_DIRECTORY', 'value':documents_settings.TEMPORARY_DIRECTORY, 'exists':True},
+        
+        #Common
+        {'name':'COMMON_TEMPORARY_DIRECTORY', 'value':common_settings.TEMPORARY_DIRECTORY, 'exists':True},

        #Converter
        {'name':'CONVERTER_CONVERT_PATH', 'value':converter_settings.CONVERT_PATH, 'exists':True},
--- a/apps/ocr/init.py
+++ b/apps/ocr/init.py
@@ -5,13 +5,13 @@ from permissions.api import register_permissions

 from documents.models import Document

-OCR_DOCUMENT_OCR = 'document_ocr'
+PERMISSION_OCR_DOCUMENT = 'ocr_document'

 register_permissions('ocr', [
-    {'name':OCR_DOCUMENT_OCR, 'label':_(u'Submit document for OCR')},
+    {'name':PERMISSION_OCR_DOCUMENT, 'label':_(u'Submit document for OCR')},
 ])

-submit_document = {'text':_('submit to OCR queue'), 'view':'submit_document', 'args':'object.id', 'famfam':'page_lightning', 'permissions':{'namespace':'ocr', 'permissions':[OCR_DOCUMENT_OCR]}}
+submit_document = {'text':_('submit to OCR queue'), 'view':'submit_document', 'args':'object.id', 'famfam':'page_lightning', 'permissions':{'namespace':'ocr', 'permissions':[PERMISSION_OCR_DOCUMENT]}}

 register_links(Document, [submit_document], menu_name='sidebar')

--- a/apps/ocr/api.py
+++ b/apps/ocr/api.py
@@ -8,7 +8,7 @@ import tempfile
 from django.utils.translation import ugettext as _

 from documents.models import DocumentPage
-from documents.conf.settings import TEMPORARY_DIRECTORY
+from common.conf.settings import TEMPORARY_DIRECTORY
 from converter.api import convert_document_for_ocr

 from ocr.conf.settings import TESSERACT_PATH
--- a/apps/ocr/views.py
+++ b/apps/ocr/views.py
@@ -11,11 +11,11 @@ from django.utils.translation import ugettext as _
 from permissions.api import check_permissions, Unauthorized
 from documents.models import Document

-from ocr import OCR_DOCUMENT_OCR
+from ocr import PERMISSION_OCR_DOCUMENT
 from api import ocr_document

 def submit_document(request, document_id):
-    permissions = [OCR_DOCUMENT_OCR]
+    permissions = [PERMISSION_OCR_DOCUMENT]
    try:
        check_permissions(request.user, 'ocr', permissions)
    except Unauthorized, e:
--- a/docs/Changelog.txt
+++ b/docs/Changelog.txt
@@ -6,3 +6,6 @@
 * Added the ability to group documents by their metadata
 * New abstracted options to adjust document conversion quality (default, low, high)
 * Added permissions and roles support
+* Added multipage documents support (only tested on pdfs)
+    To update a previous database do: [d.update_page_count() for d in Document.objects.all()]
+* Added support for document page transformation (no GUI yet)
--- a/docs/TODO
+++ b/docs/TODO
@@ -29,6 +29,8 @@
 * Permissions                                                          - DONE
 * Roles                                                                - DONE
 * Assign default role to new users                                     - DONE
+* DB stored transformations                                            - DONE
+* Recognize multi-page documents                                       - DONE
 * Document list filtering by metadata
 * Filterform date filtering widget
 * Validate GET data before saving file
@@ -49,7 +51,6 @@
 * Scheduled maintenance (cleanup, deferred OCR's)
 * Add tags to documents
 * Field for document language or autodetect
-* Recognize multi-page documents
 * Count pages in a PDF file http://pybrary.net/pyPdf/
 * Download a document in diffent formats: (jpg, png, pdf)
 * Cache.cleanup function to delete cached images when document hash changes
@@ -67,6 +68,5 @@
 * Download metadata group documents as a single zip file
 * Download original document or transformed document
 * Include annotations in transformed documents downloads
-* DB stored transformations
 * Document view temp transformations
 * Implement permissions decorators
--- a/settings.py
+++ b/settings.py
@@ -191,6 +191,7 @@ LOGIN_EXEMPT_URLS = (
 #DOCUMENTS_PREVIEW_SIZE = '640x480'
 #DOCUMENTS_THUMBNAIL_SIZE = '50x50'
 #DOCUMENTS_DISPLAY_SIZE = '1200'
+#DOCUMENTS_MULTIPAGE_PREVIEW_SIZE = '160x120'

 # Groups
 #DOCUMENTS_GROUP_MAX_RESULTS = 20
@@ -203,7 +204,7 @@ LOGIN_EXEMPT_URLS = (
 #DOCUMENTS_FILESYSTEM_MAX_RENAME_COUNT = 200

 # Misc
-#DOCUMENTS_TEMPORARY_DIRECTORY = u'/tmp'
+#COMMON_TEMPORARY_DIRECTORY = u'/tmp'

 # Converter
 #CONVERTER_DEFAULT_OPTIONS = u''
@@ -211,6 +212,7 @@ LOGIN_EXEMPT_URLS = (
 #CONVERTER_HIGH_QUALITY_OPTIONS =  u'-density 400'
 #CONVERTER_CONVERT_PATH = u'/usr/bin/convert'
 #CONVERTER_OCR_OPTIONS = u'-colorspace Gray -depth 8 -resample 200x200'
+#CONVERTER_IDENTIFY_PATH = u'/usr/bin/identify'

 # OCR
 #OCR_TESSERACT_PATH = u'/usr/bin/tesseract'