Initial set of model, form and API changes to support document versions

This commit is contained in:
Roberto Rosario
2011-12-02 02:51:59 -04:00
parent b38d84f663
commit d83e8b5428
5 changed files with 182 additions and 41 deletions

View File

@@ -35,7 +35,7 @@ def is_first_page(context):
def is_last_page(context):
return context['page'].page_number >= context['page'].document.documentpage_set.count()
return context['page'].page_number >= context['page'].document_version.pages.count()
def is_min_zoom(context):

View File

@@ -2,11 +2,29 @@ from django.contrib import admin
from metadata.admin import DocumentMetadataInline
from documents.models import DocumentType, Document, \
DocumentTypeFilename, DocumentPage, \
DocumentPageTransformation, RecentDocument
from documents.models import (DocumentType, Document,
DocumentTypeFilename, DocumentPage,
DocumentPageTransformation, RecentDocument,
DocumentVersion)
class DocumentPageInline(admin.StackedInline):
model = DocumentPage
extra = 1
classes = ('collapse-open',)
allow_add = True
class DocumentVersionInline(admin.StackedInline):
model = DocumentVersion
extra = 1
classes = ('collapse-open',)
allow_add = True
inlines = [
DocumentPageInline,
]
class DocumentTypeFilenameInline(admin.StackedInline):
model = DocumentTypeFilename
extra = 1
@@ -24,16 +42,9 @@ class DocumentPageTransformationAdmin(admin.ModelAdmin):
model = DocumentPageTransformation
class DocumentPageInline(admin.StackedInline):
model = DocumentPage
extra = 1
classes = ('collapse-open',)
allow_add = True
class DocumentAdmin(admin.ModelAdmin):
inlines = [
DocumentMetadataInline, DocumentPageInline
DocumentMetadataInline, DocumentVersionInline
]
list_display = ('uuid', 'file_filename',)

View File

@@ -100,7 +100,7 @@ class DocumentPagesCarouselWidget(forms.widgets.Widget):
output = []
output.append(u'<div style="white-space:nowrap; overflow: auto;">')
for page in value.documentpage_set.all():
for page in value.pages.all():
output.append(u'<div style="display: inline-block; margin: 5px 10px 10px 10px;">')
output.append(
document_html_widget(
@@ -128,7 +128,7 @@ class DocumentPreviewForm(forms.Form):
document = kwargs.pop('document', None)
super(DocumentPreviewForm, self).__init__(*args, **kwargs)
self.fields['preview'].initial = document
self.fields['preview'].label = _(u'Document pages (%s)') % document.documentpage_set.count()
self.fields['preview'].label = _(u'Document pages (%s)') % document.pages.count()
preview = forms.CharField(widget=DocumentPagesCarouselWidget())
@@ -198,7 +198,7 @@ class DocumentContentForm(forms.Form):
super(DocumentContentForm, self).__init__(*args, **kwargs)
content = []
self.fields['contents'].initial = u''
for page in self.document.documentpage_set.all():
for page in self.document.pages.all():
if page.content:
content.append(page.content)
content.append(u'\n\n\n - Page %s - \n\n\n' % page.page_number)

View File

@@ -72,17 +72,20 @@ class Document(models.Model):
"""
Defines a single document with it's fields and properties
"""
# Base fields
document_type = models.ForeignKey(DocumentType, verbose_name=_(u'document type'), null=True, blank=True)
file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file'))
uuid = models.CharField(max_length=48, default=UUID_FUNCTION(), blank=True, editable=False)
file_mimetype = models.CharField(max_length=64, default='', editable=False)
file_mime_encoding = models.CharField(max_length=64, default='', editable=False)
#FAT filename can be up to 255 using LFN
file_filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True)
date_added = models.DateTimeField(verbose_name=_(u'added'), auto_now_add=True, db_index=True)
date_updated = models.DateTimeField(verbose_name=_(u'updated'), auto_now=True)
checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False)
description = models.TextField(blank=True, null=True, verbose_name=_(u'description'), db_index=True)
date_added = models.DateTimeField(verbose_name=_(u'added'), auto_now_add=True, db_index=True)
## Fields to migrate
#file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file'))
#file_mimetype = models.CharField(max_length=64, default='', editable=False)
#file_mime_encoding = models.CharField(max_length=64, default='', editable=False)
##FAT filename can be up to 255 using LFN
#file_filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True)
#date_updated = models.DateTimeField(verbose_name=_(u'updated'), auto_now=True)
#checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False)
tags = TaggableManager()
@@ -155,7 +158,8 @@ class Document(models.Model):
Return a file descriptor to a document's file irrespective of
the storage backend
"""
return self.file.storage.open(self.file.path)
#return self.file.storage.open(self.file.path)
return self.get_latest_version().file.storage.open(self.get_latest_version().file.path)
def update_checksum(self, save=True):
"""
@@ -163,11 +167,11 @@ class Document(models.Model):
user provided checksum function
"""
if self.exists():
source = self.open()
self.checksum = unicode(CHECKSUM_FUNCTION(source.read()))
source = self.get_latest_version().open()
self.get_latest_version().checksum = unicode(CHECKSUM_FUNCTION(source.read()))
source.close()
if save:
self.save()
self.get_latest_version().save()
def update_page_count(self, save=True):
handle, filepath = tempfile.mkstemp()
@@ -204,7 +208,8 @@ class Document(models.Model):
@property
def page_count(self):
return self.documentpage_set.count()
#return self.documentpage_set.count()
return self.get_latest_version().documentpage_set.count()
def save_to_file(self, filepath, buffer_size=1024 * 1024):
"""
@@ -229,13 +234,13 @@ class Document(models.Model):
Returns a boolean value that indicates if the document's file
exists in storage
"""
return self.file.storage.exists(self.file.path)
return self.get_latest_version().file.storage.exists(self.get_latest_version().file.path)
def apply_default_transformations(self, transformations):
#Only apply default transformations on new documents
if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0:
if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.pages.all()]) == 0:
for transformation in transformations:
for document_page in self.documentpage_set.all():
for document_page in self.pages.all():
page_transformation = DocumentPageTransformation(
document_page=document_page,
order=0,
@@ -246,7 +251,7 @@ class Document(models.Model):
page_transformation.save()
def get_cached_image_name(self, page):
document_page = self.documentpage_set.get(page_number=page)
document_page = self.pages.get(page_number=page)
transformations, warnings = document_page.get_transformation_list()
hash_value = HASH_FUNCTION(u''.join([self.checksum, unicode(page), unicode(transformations)]))
return os.path.join(CACHE_PATH, hash_value), transformations
@@ -300,15 +305,129 @@ class Document(models.Model):
def delete(self, *args, **kwargs):
super(Document, self).delete(*args, **kwargs)
return self.file.storage.delete(self.file.path)
for version in self.documentversion_set.all():
version.file.storage.delete(version.file.path)
#return self.get_latest_version().file.storage.delete(self.get_latest_version().file.path)
@property
def size(self):
if self.exists():
return self.file.storage.size(self.file.path)
return self.get_latest_version().file.storage.size(self.get_latest_version().file.path)
else:
return None
# Compatibiliy methods
@property
def file(self):
return self.get_latest_version().file
@property
def file_mimetype(self):
return self.get_latest_version().mimetype
@property
def file_mime_encoding(self):
return self.get_latest_version().encoding
@property
def file_filename(self):
return self.get_latest_version().filename
@property
def date_updated(self):
return self.get_latest_version().timestamp
#@property
#def date_added(self):
# return self.get_latest_version().timestamp
@property
def checksum(self):
return self.get_latest_version().checksum
@property
def pages(self):
return self.get_latest_version().pages
#file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file'))
#file_mimetype = models.CharField(max_length=64, default='', editable=False)
#file_mime_encoding = models.CharField(max_length=64, default='', editable=False)
##FAT filename can be up to 255 using LFN
#file_filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True)
#date_updated = models.DateTimeField(verbose_name=_(u'updated'), auto_now=True)
#checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False)
def get_latest_version(self):
return self.documentversion_set.order_by('-timestamp')[0]
RELEASE_LEVEL_FINAL = 1
RELEASE_LEVEL_ALPHA = 2
RELEASE_LEVEL_BETA = 3
RELEASE_LEVEL_RC = 4
RELEASE_LEVEL_HF = 5
RELEASE_LEVEL_CHOICES = (
(RELEASE_LEVEL_FINAL, _(u'final')),
(RELEASE_LEVEL_ALPHA, _(u'alpha')),
(RELEASE_LEVEL_BETA, _(u'beta')),
(RELEASE_LEVEL_RC, _(u'release candidate')),
(RELEASE_LEVEL_HF, _(u'hotfix')),
)
class DocumentVersion(models.Model):
'''
Model that describes a document version and it properties
'''
document = models.ForeignKey(Document, verbose_name=_(u'document'))
mayor = models.PositiveIntegerField(verbose_name=_(u'mayor'), default=1)
minor = models.PositiveIntegerField(verbose_name=_(u'minor'), default=0)
micro = models.PositiveIntegerField(verbose_name=_(u'micro'), default=0)
release_level = models.PositiveIntegerField(choices=RELEASE_LEVEL_CHOICES, default=RELEASE_LEVEL_FINAL, verbose_name=_(u'release level'))
serial = models.PositiveIntegerField(verbose_name=_(u'serial'), default=0)
timestamp = models.DateTimeField(verbose_name=_(u'timestamp'))
# File related fields
file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file'))
mimetype = models.CharField(max_length=64, default='', editable=False)
encoding = models.CharField(max_length=64, default='', editable=False)
filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True)
checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False)
class Meta:
unique_together = ('document', 'mayor', 'minor', 'micro', 'release_level', 'serial')
verbose_name = _(u'document version')
verbose_name_plural = _(u'document version')
def __unicode__(self):
return self.get_version()
# TODO: Update timestamp
def get_version():
'''
Return the formatted version information
'''
vers = [u'%(major)i.%(minor)i' % self, ]
if self.micro:
vers.append(u'.%(micro)i' % self)
if self.releaselevel != RELEASE_LEVEL_FINAL:
vers.append(u'%(releaselevel)s%(serial)i' % self)
return u''.join(vers)
@property
def pages(self):
return self.documentpage_set
def open(self):
'''
Return a file descriptor to a document version's file irrespective of
the storage backend
'''
return self.file.storage.open(self.file.path)
class DocumentTypeFilename(models.Model):
"""
@@ -332,7 +451,13 @@ class DocumentPage(models.Model):
"""
Model that describes a document page including it's content
"""
document = models.ForeignKey(Document, verbose_name=_(u'document'))
## This field is to be removed
#document = models.ForeignKey(Document, verbose_name=_(u'document'))
# New parent field
document_version = models.ForeignKey(DocumentVersion, verbose_name=_(u'document version'))#, null=True, blank=True) # TODO: Remove these after datamigration
# Unchanged fields
content = models.TextField(blank=True, null=True, verbose_name=_(u'content'), db_index=True)
page_label = models.CharField(max_length=32, blank=True, null=True, verbose_name=_(u'page label'))
page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_(u'page number'), db_index=True)
@@ -341,7 +466,7 @@ class DocumentPage(models.Model):
return _(u'Page %(page_num)d out of %(total_pages)d of %(document)s') % {
'document': unicode(self.document),
'page_num': self.page_number,
'total_pages': self.document.documentpage_set.count()
'total_pages': self.document_version.documentpage_set.count()
}
class Meta:
@@ -355,6 +480,11 @@ class DocumentPage(models.Model):
@models.permalink
def get_absolute_url(self):
return ('document_page_view', [self.pk])
# Compatibility methods
@property
def document(self):
return self.document_version.document
class ArgumentsValidator(object):
@@ -421,10 +551,10 @@ class RecentDocument(models.Model):
# Register the fields that will be searchable
register('document', Document, _(u'document'), [
{'name': u'document_type__name', 'title': _(u'Document type')},
{'name': u'file_mimetype', 'title': _(u'MIME type')},
{'name': u'file_filename', 'title': _(u'Filename')},
{'name': u'documentversion__mimetype', 'title': _(u'MIME type')},
{'name': u'documentversion__filename', 'title': _(u'Filename')},
{'name': u'documentmetadata__value', 'title': _(u'Metadata value')},
{'name': u'documentpage__content', 'title': _(u'Content')},
{'name': u'documentversion__documentpage__content', 'title': _(u'Content')},
{'name': u'description', 'title': _(u'Description')},
{'name': u'tags__name', 'title': _(u'Tags')},
{'name': u'comments__comment', 'title': _(u'Comments')},

View File

@@ -88,7 +88,7 @@ def do_document_ocr(queue_document):
parser, if the parser fails or if there is no parser registered for
the document mimetype do a visual OCR by calling tesseract
"""
for document_page in queue_document.document.documentpage_set.all():
for document_page in queue_document.document.pages.all():
try:
# Try to extract text by means of a parser
parse_document_page(document_page)