diff --git a/apps/documents/__init__.py b/apps/documents/__init__.py index 14ff0b07a3..f47ab4bab9 100644 --- a/apps/documents/__init__.py +++ b/apps/documents/__init__.py @@ -35,7 +35,7 @@ def is_first_page(context): def is_last_page(context): - return context['page'].page_number >= context['page'].document.documentpage_set.count() + return context['page'].page_number >= context['page'].document_version.pages.count() def is_min_zoom(context): diff --git a/apps/documents/admin.py b/apps/documents/admin.py index 71d0fab939..8ef02b1cea 100644 --- a/apps/documents/admin.py +++ b/apps/documents/admin.py @@ -2,11 +2,29 @@ from django.contrib import admin from metadata.admin import DocumentMetadataInline -from documents.models import DocumentType, Document, \ - DocumentTypeFilename, DocumentPage, \ - DocumentPageTransformation, RecentDocument +from documents.models import (DocumentType, Document, + DocumentTypeFilename, DocumentPage, + DocumentPageTransformation, RecentDocument, + DocumentVersion) +class DocumentPageInline(admin.StackedInline): + model = DocumentPage + extra = 1 + classes = ('collapse-open',) + allow_add = True + + +class DocumentVersionInline(admin.StackedInline): + model = DocumentVersion + extra = 1 + classes = ('collapse-open',) + allow_add = True + inlines = [ + DocumentPageInline, + ] + + class DocumentTypeFilenameInline(admin.StackedInline): model = DocumentTypeFilename extra = 1 @@ -24,16 +42,9 @@ class DocumentPageTransformationAdmin(admin.ModelAdmin): model = DocumentPageTransformation -class DocumentPageInline(admin.StackedInline): - model = DocumentPage - extra = 1 - classes = ('collapse-open',) - allow_add = True - - class DocumentAdmin(admin.ModelAdmin): inlines = [ - DocumentMetadataInline, DocumentPageInline + DocumentMetadataInline, DocumentVersionInline ] list_display = ('uuid', 'file_filename',) diff --git a/apps/documents/forms.py b/apps/documents/forms.py index 320b60bc71..47ac8215da 100644 --- a/apps/documents/forms.py +++ b/apps/documents/forms.py @@ -100,7 +100,7 @@ class DocumentPagesCarouselWidget(forms.widgets.Widget): output = [] output.append(u'
') - for page in value.documentpage_set.all(): + for page in value.pages.all(): output.append(u'
') output.append( document_html_widget( @@ -128,7 +128,7 @@ class DocumentPreviewForm(forms.Form): document = kwargs.pop('document', None) super(DocumentPreviewForm, self).__init__(*args, **kwargs) self.fields['preview'].initial = document - self.fields['preview'].label = _(u'Document pages (%s)') % document.documentpage_set.count() + self.fields['preview'].label = _(u'Document pages (%s)') % document.pages.count() preview = forms.CharField(widget=DocumentPagesCarouselWidget()) @@ -198,7 +198,7 @@ class DocumentContentForm(forms.Form): super(DocumentContentForm, self).__init__(*args, **kwargs) content = [] self.fields['contents'].initial = u'' - for page in self.document.documentpage_set.all(): + for page in self.document.pages.all(): if page.content: content.append(page.content) content.append(u'\n\n\n - Page %s - \n\n\n' % page.page_number) diff --git a/apps/documents/models.py b/apps/documents/models.py index ed578c36b9..6d68e03ae6 100644 --- a/apps/documents/models.py +++ b/apps/documents/models.py @@ -72,17 +72,20 @@ class Document(models.Model): """ Defines a single document with it's fields and properties """ + # Base fields document_type = models.ForeignKey(DocumentType, verbose_name=_(u'document type'), null=True, blank=True) - file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file')) uuid = models.CharField(max_length=48, default=UUID_FUNCTION(), blank=True, editable=False) - file_mimetype = models.CharField(max_length=64, default='', editable=False) - file_mime_encoding = models.CharField(max_length=64, default='', editable=False) - #FAT filename can be up to 255 using LFN - file_filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True) - date_added = models.DateTimeField(verbose_name=_(u'added'), auto_now_add=True, db_index=True) - date_updated = models.DateTimeField(verbose_name=_(u'updated'), auto_now=True) - checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False) description = models.TextField(blank=True, null=True, verbose_name=_(u'description'), db_index=True) + date_added = models.DateTimeField(verbose_name=_(u'added'), auto_now_add=True, db_index=True) + + ## Fields to migrate + #file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file')) + #file_mimetype = models.CharField(max_length=64, default='', editable=False) + #file_mime_encoding = models.CharField(max_length=64, default='', editable=False) + ##FAT filename can be up to 255 using LFN + #file_filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True) + #date_updated = models.DateTimeField(verbose_name=_(u'updated'), auto_now=True) + #checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False) tags = TaggableManager() @@ -155,7 +158,8 @@ class Document(models.Model): Return a file descriptor to a document's file irrespective of the storage backend """ - return self.file.storage.open(self.file.path) + #return self.file.storage.open(self.file.path) + return self.get_latest_version().file.storage.open(self.get_latest_version().file.path) def update_checksum(self, save=True): """ @@ -163,11 +167,11 @@ class Document(models.Model): user provided checksum function """ if self.exists(): - source = self.open() - self.checksum = unicode(CHECKSUM_FUNCTION(source.read())) + source = self.get_latest_version().open() + self.get_latest_version().checksum = unicode(CHECKSUM_FUNCTION(source.read())) source.close() if save: - self.save() + self.get_latest_version().save() def update_page_count(self, save=True): handle, filepath = tempfile.mkstemp() @@ -204,7 +208,8 @@ class Document(models.Model): @property def page_count(self): - return self.documentpage_set.count() + #return self.documentpage_set.count() + return self.get_latest_version().documentpage_set.count() def save_to_file(self, filepath, buffer_size=1024 * 1024): """ @@ -229,13 +234,13 @@ class Document(models.Model): Returns a boolean value that indicates if the document's file exists in storage """ - return self.file.storage.exists(self.file.path) + return self.get_latest_version().file.storage.exists(self.get_latest_version().file.path) def apply_default_transformations(self, transformations): #Only apply default transformations on new documents - if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0: + if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.pages.all()]) == 0: for transformation in transformations: - for document_page in self.documentpage_set.all(): + for document_page in self.pages.all(): page_transformation = DocumentPageTransformation( document_page=document_page, order=0, @@ -246,7 +251,7 @@ class Document(models.Model): page_transformation.save() def get_cached_image_name(self, page): - document_page = self.documentpage_set.get(page_number=page) + document_page = self.pages.get(page_number=page) transformations, warnings = document_page.get_transformation_list() hash_value = HASH_FUNCTION(u''.join([self.checksum, unicode(page), unicode(transformations)])) return os.path.join(CACHE_PATH, hash_value), transformations @@ -300,15 +305,129 @@ class Document(models.Model): def delete(self, *args, **kwargs): super(Document, self).delete(*args, **kwargs) - return self.file.storage.delete(self.file.path) + for version in self.documentversion_set.all(): + version.file.storage.delete(version.file.path) + #return self.get_latest_version().file.storage.delete(self.get_latest_version().file.path) @property def size(self): if self.exists(): - return self.file.storage.size(self.file.path) + return self.get_latest_version().file.storage.size(self.get_latest_version().file.path) else: return None - + + # Compatibiliy methods + @property + def file(self): + return self.get_latest_version().file + + @property + def file_mimetype(self): + return self.get_latest_version().mimetype + + @property + def file_mime_encoding(self): + return self.get_latest_version().encoding + + @property + def file_filename(self): + return self.get_latest_version().filename + + @property + def date_updated(self): + return self.get_latest_version().timestamp + + #@property + #def date_added(self): + # return self.get_latest_version().timestamp + + @property + def checksum(self): + return self.get_latest_version().checksum + + @property + def pages(self): + return self.get_latest_version().pages + + + #file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file')) + #file_mimetype = models.CharField(max_length=64, default='', editable=False) + #file_mime_encoding = models.CharField(max_length=64, default='', editable=False) + ##FAT filename can be up to 255 using LFN + #file_filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True) + #date_updated = models.DateTimeField(verbose_name=_(u'updated'), auto_now=True) + #checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False) + + def get_latest_version(self): + return self.documentversion_set.order_by('-timestamp')[0] + + +RELEASE_LEVEL_FINAL = 1 +RELEASE_LEVEL_ALPHA = 2 +RELEASE_LEVEL_BETA = 3 +RELEASE_LEVEL_RC = 4 +RELEASE_LEVEL_HF = 5 + +RELEASE_LEVEL_CHOICES = ( + (RELEASE_LEVEL_FINAL, _(u'final')), + (RELEASE_LEVEL_ALPHA, _(u'alpha')), + (RELEASE_LEVEL_BETA, _(u'beta')), + (RELEASE_LEVEL_RC, _(u'release candidate')), + (RELEASE_LEVEL_HF, _(u'hotfix')), +) + +class DocumentVersion(models.Model): + ''' + Model that describes a document version and it properties + ''' + document = models.ForeignKey(Document, verbose_name=_(u'document')) + mayor = models.PositiveIntegerField(verbose_name=_(u'mayor'), default=1) + minor = models.PositiveIntegerField(verbose_name=_(u'minor'), default=0) + micro = models.PositiveIntegerField(verbose_name=_(u'micro'), default=0) + release_level = models.PositiveIntegerField(choices=RELEASE_LEVEL_CHOICES, default=RELEASE_LEVEL_FINAL, verbose_name=_(u'release level')) + serial = models.PositiveIntegerField(verbose_name=_(u'serial'), default=0) + timestamp = models.DateTimeField(verbose_name=_(u'timestamp')) + + # File related fields + file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file')) + mimetype = models.CharField(max_length=64, default='', editable=False) + encoding = models.CharField(max_length=64, default='', editable=False) + filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True) + checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False) + + class Meta: + unique_together = ('document', 'mayor', 'minor', 'micro', 'release_level', 'serial') + verbose_name = _(u'document version') + verbose_name_plural = _(u'document version') + + def __unicode__(self): + return self.get_version() + + # TODO: Update timestamp + + def get_version(): + ''' + Return the formatted version information + ''' + vers = [u'%(major)i.%(minor)i' % self, ] + + if self.micro: + vers.append(u'.%(micro)i' % self) + if self.releaselevel != RELEASE_LEVEL_FINAL: + vers.append(u'%(releaselevel)s%(serial)i' % self) + return u''.join(vers) + + @property + def pages(self): + return self.documentpage_set + + def open(self): + ''' + Return a file descriptor to a document version's file irrespective of + the storage backend + ''' + return self.file.storage.open(self.file.path) + class DocumentTypeFilename(models.Model): """ @@ -332,7 +451,13 @@ class DocumentPage(models.Model): """ Model that describes a document page including it's content """ - document = models.ForeignKey(Document, verbose_name=_(u'document')) + ## This field is to be removed + #document = models.ForeignKey(Document, verbose_name=_(u'document')) + + # New parent field + document_version = models.ForeignKey(DocumentVersion, verbose_name=_(u'document version'))#, null=True, blank=True) # TODO: Remove these after datamigration + + # Unchanged fields content = models.TextField(blank=True, null=True, verbose_name=_(u'content'), db_index=True) page_label = models.CharField(max_length=32, blank=True, null=True, verbose_name=_(u'page label')) page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_(u'page number'), db_index=True) @@ -341,7 +466,7 @@ class DocumentPage(models.Model): return _(u'Page %(page_num)d out of %(total_pages)d of %(document)s') % { 'document': unicode(self.document), 'page_num': self.page_number, - 'total_pages': self.document.documentpage_set.count() + 'total_pages': self.document_version.documentpage_set.count() } class Meta: @@ -355,6 +480,11 @@ class DocumentPage(models.Model): @models.permalink def get_absolute_url(self): return ('document_page_view', [self.pk]) + + # Compatibility methods + @property + def document(self): + return self.document_version.document class ArgumentsValidator(object): @@ -421,10 +551,10 @@ class RecentDocument(models.Model): # Register the fields that will be searchable register('document', Document, _(u'document'), [ {'name': u'document_type__name', 'title': _(u'Document type')}, - {'name': u'file_mimetype', 'title': _(u'MIME type')}, - {'name': u'file_filename', 'title': _(u'Filename')}, + {'name': u'documentversion__mimetype', 'title': _(u'MIME type')}, + {'name': u'documentversion__filename', 'title': _(u'Filename')}, {'name': u'documentmetadata__value', 'title': _(u'Metadata value')}, - {'name': u'documentpage__content', 'title': _(u'Content')}, + {'name': u'documentversion__documentpage__content', 'title': _(u'Content')}, {'name': u'description', 'title': _(u'Description')}, {'name': u'tags__name', 'title': _(u'Tags')}, {'name': u'comments__comment', 'title': _(u'Comments')}, diff --git a/apps/ocr/api.py b/apps/ocr/api.py index 4d70443f92..e568ecf6c8 100644 --- a/apps/ocr/api.py +++ b/apps/ocr/api.py @@ -88,7 +88,7 @@ def do_document_ocr(queue_document): parser, if the parser fails or if there is no parser registered for the document mimetype do a visual OCR by calling tesseract """ - for document_page in queue_document.document.documentpage_set.all(): + for document_page in queue_document.document.pages.all(): try: # Try to extract text by means of a parser parse_document_page(document_page)