From d80584d087fc290f153dd7a6bb7996f4b684cc5b Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Fri, 4 Feb 2011 13:09:43 -0400 Subject: [PATCH] Added user configurable document checksumming --- apps/documents/conf/settings.py | 2 ++ apps/documents/models.py | 27 +++++++++++++++++++-------- apps/documents/views.py | 2 ++ docs/TODO | 4 ++-- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/apps/documents/conf/settings.py b/apps/documents/conf/settings.py index bb6663bf2a..4a48e7187e 100644 --- a/apps/documents/conf/settings.py +++ b/apps/documents/conf/settings.py @@ -1,4 +1,5 @@ import datetime +import hashlib from django.conf import settings @@ -11,3 +12,4 @@ STAGING_DIRECTORY = getattr(settings, 'DOCUMENTS_STAGIN_DIRECTORY', '/tmp') FILESERVING_PATH = getattr(settings, 'DOCUMENTS_FILESERVING_PATH', '/tmp') DELETE_ORIGINAL = getattr(settings, 'DOCUMENTS_DELETE_ORIGINAL', False) SLUGIFY_PATH = getattr(settings, 'DOCUMENTS_SLUGIFY_PATH', False) +CHECKSUM_FUNCTION = getattr(settings, 'DOCUMENTS_CHECKSUM_FUNCTION', lambda x: hashlib.sha256(x).hexdigest()) diff --git a/apps/documents/models.py b/apps/documents/models.py index f0233c131b..75341f1582 100644 --- a/apps/documents/models.py +++ b/apps/documents/models.py @@ -8,12 +8,13 @@ from django.conf import settings from django.db import models from django.template.defaultfilters import slugify from django.utils.translation import ugettext_lazy as _ - + from dynamic_search.api import register from documents.conf.settings import AVAILABLE_FUNCTIONS from documents.conf.settings import FILESERVING_PATH from documents.conf.settings import SLUGIFY_PATH +from documents.conf.settings import CHECKSUM_FUNCTION if SLUGIFY_PATH == False: #Do not slugify path or filenames and extensions @@ -27,7 +28,7 @@ def get_filename_from_uuid(instance, filename, directory='documents'): def populate_file_extension_and_mimetype(instance, filename): # First populate the file extension and mimetype - instance.file_mimetype, encoding = mimetypes.guess_type(filename) or "" + instance.file_mimetype, encoding = mimetypes.guess_type(filename) or '' filename, extension = os.path.splitext(filename) instance.file_filename = filename #remove prefix '.' @@ -46,13 +47,14 @@ class Document(models.Model): Inherit this model to customise document metadata, see BasicDocument for an example. """ document_type = models.ForeignKey(DocumentType, verbose_name=_(u'document type')) - file = models.FileField(upload_to=get_filename_from_uuid)#lambda i,f: 'documents/%s' % i.uuid) + file = models.FileField(upload_to=get_filename_from_uuid) uuid = models.CharField(max_length=36, default=lambda:unicode(uuid.uuid4()), blank=True, editable=False) - file_mimetype = models.CharField(max_length=50, default="", editable=False) - file_filename = models.CharField(max_length=64, default="", editable=False) - file_extension = models.CharField(max_length=10, default="", editable=False) - date_added = models.DateTimeField("added", auto_now_add=True) - date_updated = models.DateTimeField("updated", auto_now=True) + file_mimetype = models.CharField(max_length=50, default='', editable=False) + file_filename = models.CharField(max_length=64, default='', editable=False) + file_extension = models.CharField(max_length=10, default='', editable=False) + date_added = models.DateTimeField(verbose_name=_(u'added'), auto_now_add=True) + date_updated = models.DateTimeField(verbose_name=_(u'updated'), auto_now=True) + checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False) class Meta: verbose_name = _(u'document') @@ -65,6 +67,15 @@ class Document(models.Model): @models.permalink def get_absolute_url(self): return ('document_view', [self.id]) + + def update_checksum(self, save=True): + self.checksum = unicode(CHECKSUM_FUNCTION(self.file.read())) + if save: + self.save() + + def save(self, *args, **kwargs): + self.update_checksum(save=False) + super(Document, self).save(*args, **kwargs) def create_fs_links(self): for metadata in self.documentmetadata_set.all(): diff --git a/apps/documents/views.py b/apps/documents/views.py index 2d319d2f1f..eef4577c0f 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -81,6 +81,8 @@ def document_view(request, document_id): {'label':_(u'File extension'), 'field':'file_extension'}, {'label':_(u'File mimetype'), 'field':'file_mimetype'}, {'label':_(u'Date added'), 'field':'date_added'}, + {'label':_(u'Checksum'), 'field':'checksum'}, + {'label':_(u'UUID'), 'field':'uuid'} ]) return render_to_response('generic_detail.html', { diff --git a/docs/TODO b/docs/TODO index aad4558d91..4a1544e9ef 100644 --- a/docs/TODO +++ b/docs/TODO @@ -1,7 +1,8 @@ -* Fix repeated search results - DONE +* Fix repeated search results - DONE * File renaming dropdown - DONE * Create indexing filesystem folders from document type metadata type - DONE * Document detail to view document metadata - DONE +* Add file checksums (hashlib) - DONE * Filter by metadata * Jquery upload document upload form with ajax widget * Filterform date filtering widget @@ -10,6 +11,5 @@ * Integrate with http://code.google.com/p/pytesser/ * Delete symlinks when document is deleted of metadata changed * Check duplicated files using checksum -* Add file checksums (hashlib) * If theres only one document type on db skip step 1 of wizard