diff --git a/apps/documents/__init__.py b/apps/documents/__init__.py
index 14ff0b07a3..f47ab4bab9 100644
--- a/apps/documents/__init__.py
+++ b/apps/documents/__init__.py
@@ -35,7 +35,7 @@ def is_first_page(context):
def is_last_page(context):
- return context['page'].page_number >= context['page'].document.documentpage_set.count()
+ return context['page'].page_number >= context['page'].document_version.pages.count()
def is_min_zoom(context):
diff --git a/apps/documents/admin.py b/apps/documents/admin.py
index 71d0fab939..8ef02b1cea 100644
--- a/apps/documents/admin.py
+++ b/apps/documents/admin.py
@@ -2,11 +2,29 @@ from django.contrib import admin
from metadata.admin import DocumentMetadataInline
-from documents.models import DocumentType, Document, \
- DocumentTypeFilename, DocumentPage, \
- DocumentPageTransformation, RecentDocument
+from documents.models import (DocumentType, Document,
+ DocumentTypeFilename, DocumentPage,
+ DocumentPageTransformation, RecentDocument,
+ DocumentVersion)
+class DocumentPageInline(admin.StackedInline):
+ model = DocumentPage
+ extra = 1
+ classes = ('collapse-open',)
+ allow_add = True
+
+
+class DocumentVersionInline(admin.StackedInline):
+ model = DocumentVersion
+ extra = 1
+ classes = ('collapse-open',)
+ allow_add = True
+ inlines = [
+ DocumentPageInline,
+ ]
+
+
class DocumentTypeFilenameInline(admin.StackedInline):
model = DocumentTypeFilename
extra = 1
@@ -24,16 +42,9 @@ class DocumentPageTransformationAdmin(admin.ModelAdmin):
model = DocumentPageTransformation
-class DocumentPageInline(admin.StackedInline):
- model = DocumentPage
- extra = 1
- classes = ('collapse-open',)
- allow_add = True
-
-
class DocumentAdmin(admin.ModelAdmin):
inlines = [
- DocumentMetadataInline, DocumentPageInline
+ DocumentMetadataInline, DocumentVersionInline
]
list_display = ('uuid', 'file_filename',)
diff --git a/apps/documents/forms.py b/apps/documents/forms.py
index 320b60bc71..47ac8215da 100644
--- a/apps/documents/forms.py
+++ b/apps/documents/forms.py
@@ -100,7 +100,7 @@ class DocumentPagesCarouselWidget(forms.widgets.Widget):
output = []
output.append(u'
')
- for page in value.documentpage_set.all():
+ for page in value.pages.all():
output.append(u'
')
output.append(
document_html_widget(
@@ -128,7 +128,7 @@ class DocumentPreviewForm(forms.Form):
document = kwargs.pop('document', None)
super(DocumentPreviewForm, self).__init__(*args, **kwargs)
self.fields['preview'].initial = document
- self.fields['preview'].label = _(u'Document pages (%s)') % document.documentpage_set.count()
+ self.fields['preview'].label = _(u'Document pages (%s)') % document.pages.count()
preview = forms.CharField(widget=DocumentPagesCarouselWidget())
@@ -198,7 +198,7 @@ class DocumentContentForm(forms.Form):
super(DocumentContentForm, self).__init__(*args, **kwargs)
content = []
self.fields['contents'].initial = u''
- for page in self.document.documentpage_set.all():
+ for page in self.document.pages.all():
if page.content:
content.append(page.content)
content.append(u'\n\n\n - Page %s - \n\n\n' % page.page_number)
diff --git a/apps/documents/models.py b/apps/documents/models.py
index ed578c36b9..6d68e03ae6 100644
--- a/apps/documents/models.py
+++ b/apps/documents/models.py
@@ -72,17 +72,20 @@ class Document(models.Model):
"""
Defines a single document with it's fields and properties
"""
+ # Base fields
document_type = models.ForeignKey(DocumentType, verbose_name=_(u'document type'), null=True, blank=True)
- file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file'))
uuid = models.CharField(max_length=48, default=UUID_FUNCTION(), blank=True, editable=False)
- file_mimetype = models.CharField(max_length=64, default='', editable=False)
- file_mime_encoding = models.CharField(max_length=64, default='', editable=False)
- #FAT filename can be up to 255 using LFN
- file_filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True)
- date_added = models.DateTimeField(verbose_name=_(u'added'), auto_now_add=True, db_index=True)
- date_updated = models.DateTimeField(verbose_name=_(u'updated'), auto_now=True)
- checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False)
description = models.TextField(blank=True, null=True, verbose_name=_(u'description'), db_index=True)
+ date_added = models.DateTimeField(verbose_name=_(u'added'), auto_now_add=True, db_index=True)
+
+ ## Fields to migrate
+ #file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file'))
+ #file_mimetype = models.CharField(max_length=64, default='', editable=False)
+ #file_mime_encoding = models.CharField(max_length=64, default='', editable=False)
+ ##FAT filename can be up to 255 using LFN
+ #file_filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True)
+ #date_updated = models.DateTimeField(verbose_name=_(u'updated'), auto_now=True)
+ #checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False)
tags = TaggableManager()
@@ -155,7 +158,8 @@ class Document(models.Model):
Return a file descriptor to a document's file irrespective of
the storage backend
"""
- return self.file.storage.open(self.file.path)
+ #return self.file.storage.open(self.file.path)
+ return self.get_latest_version().file.storage.open(self.get_latest_version().file.path)
def update_checksum(self, save=True):
"""
@@ -163,11 +167,11 @@ class Document(models.Model):
user provided checksum function
"""
if self.exists():
- source = self.open()
- self.checksum = unicode(CHECKSUM_FUNCTION(source.read()))
+ source = self.get_latest_version().open()
+ self.get_latest_version().checksum = unicode(CHECKSUM_FUNCTION(source.read()))
source.close()
if save:
- self.save()
+ self.get_latest_version().save()
def update_page_count(self, save=True):
handle, filepath = tempfile.mkstemp()
@@ -204,7 +208,8 @@ class Document(models.Model):
@property
def page_count(self):
- return self.documentpage_set.count()
+ #return self.documentpage_set.count()
+ return self.get_latest_version().documentpage_set.count()
def save_to_file(self, filepath, buffer_size=1024 * 1024):
"""
@@ -229,13 +234,13 @@ class Document(models.Model):
Returns a boolean value that indicates if the document's file
exists in storage
"""
- return self.file.storage.exists(self.file.path)
+ return self.get_latest_version().file.storage.exists(self.get_latest_version().file.path)
def apply_default_transformations(self, transformations):
#Only apply default transformations on new documents
- if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0:
+ if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.pages.all()]) == 0:
for transformation in transformations:
- for document_page in self.documentpage_set.all():
+ for document_page in self.pages.all():
page_transformation = DocumentPageTransformation(
document_page=document_page,
order=0,
@@ -246,7 +251,7 @@ class Document(models.Model):
page_transformation.save()
def get_cached_image_name(self, page):
- document_page = self.documentpage_set.get(page_number=page)
+ document_page = self.pages.get(page_number=page)
transformations, warnings = document_page.get_transformation_list()
hash_value = HASH_FUNCTION(u''.join([self.checksum, unicode(page), unicode(transformations)]))
return os.path.join(CACHE_PATH, hash_value), transformations
@@ -300,15 +305,129 @@ class Document(models.Model):
def delete(self, *args, **kwargs):
super(Document, self).delete(*args, **kwargs)
- return self.file.storage.delete(self.file.path)
+ for version in self.documentversion_set.all():
+ version.file.storage.delete(version.file.path)
+ #return self.get_latest_version().file.storage.delete(self.get_latest_version().file.path)
@property
def size(self):
if self.exists():
- return self.file.storage.size(self.file.path)
+ return self.get_latest_version().file.storage.size(self.get_latest_version().file.path)
else:
return None
-
+
+ # Compatibiliy methods
+ @property
+ def file(self):
+ return self.get_latest_version().file
+
+ @property
+ def file_mimetype(self):
+ return self.get_latest_version().mimetype
+
+ @property
+ def file_mime_encoding(self):
+ return self.get_latest_version().encoding
+
+ @property
+ def file_filename(self):
+ return self.get_latest_version().filename
+
+ @property
+ def date_updated(self):
+ return self.get_latest_version().timestamp
+
+ #@property
+ #def date_added(self):
+ # return self.get_latest_version().timestamp
+
+ @property
+ def checksum(self):
+ return self.get_latest_version().checksum
+
+ @property
+ def pages(self):
+ return self.get_latest_version().pages
+
+
+ #file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file'))
+ #file_mimetype = models.CharField(max_length=64, default='', editable=False)
+ #file_mime_encoding = models.CharField(max_length=64, default='', editable=False)
+ ##FAT filename can be up to 255 using LFN
+ #file_filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True)
+ #date_updated = models.DateTimeField(verbose_name=_(u'updated'), auto_now=True)
+ #checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False)
+
+ def get_latest_version(self):
+ return self.documentversion_set.order_by('-timestamp')[0]
+
+
+RELEASE_LEVEL_FINAL = 1
+RELEASE_LEVEL_ALPHA = 2
+RELEASE_LEVEL_BETA = 3
+RELEASE_LEVEL_RC = 4
+RELEASE_LEVEL_HF = 5
+
+RELEASE_LEVEL_CHOICES = (
+ (RELEASE_LEVEL_FINAL, _(u'final')),
+ (RELEASE_LEVEL_ALPHA, _(u'alpha')),
+ (RELEASE_LEVEL_BETA, _(u'beta')),
+ (RELEASE_LEVEL_RC, _(u'release candidate')),
+ (RELEASE_LEVEL_HF, _(u'hotfix')),
+)
+
+class DocumentVersion(models.Model):
+ '''
+ Model that describes a document version and it properties
+ '''
+ document = models.ForeignKey(Document, verbose_name=_(u'document'))
+ mayor = models.PositiveIntegerField(verbose_name=_(u'mayor'), default=1)
+ minor = models.PositiveIntegerField(verbose_name=_(u'minor'), default=0)
+ micro = models.PositiveIntegerField(verbose_name=_(u'micro'), default=0)
+ release_level = models.PositiveIntegerField(choices=RELEASE_LEVEL_CHOICES, default=RELEASE_LEVEL_FINAL, verbose_name=_(u'release level'))
+ serial = models.PositiveIntegerField(verbose_name=_(u'serial'), default=0)
+ timestamp = models.DateTimeField(verbose_name=_(u'timestamp'))
+
+ # File related fields
+ file = models.FileField(upload_to=get_filename_from_uuid, storage=STORAGE_BACKEND(), verbose_name=_(u'file'))
+ mimetype = models.CharField(max_length=64, default='', editable=False)
+ encoding = models.CharField(max_length=64, default='', editable=False)
+ filename = models.CharField(max_length=255, default=u'', editable=False, db_index=True)
+ checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False)
+
+ class Meta:
+ unique_together = ('document', 'mayor', 'minor', 'micro', 'release_level', 'serial')
+ verbose_name = _(u'document version')
+ verbose_name_plural = _(u'document version')
+
+ def __unicode__(self):
+ return self.get_version()
+
+ # TODO: Update timestamp
+
+ def get_version():
+ '''
+ Return the formatted version information
+ '''
+ vers = [u'%(major)i.%(minor)i' % self, ]
+
+ if self.micro:
+ vers.append(u'.%(micro)i' % self)
+ if self.releaselevel != RELEASE_LEVEL_FINAL:
+ vers.append(u'%(releaselevel)s%(serial)i' % self)
+ return u''.join(vers)
+
+ @property
+ def pages(self):
+ return self.documentpage_set
+
+ def open(self):
+ '''
+ Return a file descriptor to a document version's file irrespective of
+ the storage backend
+ '''
+ return self.file.storage.open(self.file.path)
+
class DocumentTypeFilename(models.Model):
"""
@@ -332,7 +451,13 @@ class DocumentPage(models.Model):
"""
Model that describes a document page including it's content
"""
- document = models.ForeignKey(Document, verbose_name=_(u'document'))
+ ## This field is to be removed
+ #document = models.ForeignKey(Document, verbose_name=_(u'document'))
+
+ # New parent field
+ document_version = models.ForeignKey(DocumentVersion, verbose_name=_(u'document version'))#, null=True, blank=True) # TODO: Remove these after datamigration
+
+ # Unchanged fields
content = models.TextField(blank=True, null=True, verbose_name=_(u'content'), db_index=True)
page_label = models.CharField(max_length=32, blank=True, null=True, verbose_name=_(u'page label'))
page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_(u'page number'), db_index=True)
@@ -341,7 +466,7 @@ class DocumentPage(models.Model):
return _(u'Page %(page_num)d out of %(total_pages)d of %(document)s') % {
'document': unicode(self.document),
'page_num': self.page_number,
- 'total_pages': self.document.documentpage_set.count()
+ 'total_pages': self.document_version.documentpage_set.count()
}
class Meta:
@@ -355,6 +480,11 @@ class DocumentPage(models.Model):
@models.permalink
def get_absolute_url(self):
return ('document_page_view', [self.pk])
+
+ # Compatibility methods
+ @property
+ def document(self):
+ return self.document_version.document
class ArgumentsValidator(object):
@@ -421,10 +551,10 @@ class RecentDocument(models.Model):
# Register the fields that will be searchable
register('document', Document, _(u'document'), [
{'name': u'document_type__name', 'title': _(u'Document type')},
- {'name': u'file_mimetype', 'title': _(u'MIME type')},
- {'name': u'file_filename', 'title': _(u'Filename')},
+ {'name': u'documentversion__mimetype', 'title': _(u'MIME type')},
+ {'name': u'documentversion__filename', 'title': _(u'Filename')},
{'name': u'documentmetadata__value', 'title': _(u'Metadata value')},
- {'name': u'documentpage__content', 'title': _(u'Content')},
+ {'name': u'documentversion__documentpage__content', 'title': _(u'Content')},
{'name': u'description', 'title': _(u'Description')},
{'name': u'tags__name', 'title': _(u'Tags')},
{'name': u'comments__comment', 'title': _(u'Comments')},
diff --git a/apps/ocr/api.py b/apps/ocr/api.py
index 4d70443f92..e568ecf6c8 100644
--- a/apps/ocr/api.py
+++ b/apps/ocr/api.py
@@ -88,7 +88,7 @@ def do_document_ocr(queue_document):
parser, if the parser fails or if there is no parser registered for
the document mimetype do a visual OCR by calling tesseract
"""
- for document_page in queue_document.document.documentpage_set.all():
+ for document_page in queue_document.document.pages.all():
try:
# Try to extract text by means of a parser
parse_document_page(document_page)