From 791742811f1a7364ed9eb36e4eb9ca3a403c821a Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 7 Feb 2011 16:04:17 -0400 Subject: [PATCH] Staging file are now indexed by a file content hash, fixed staging file rename on upload --- apps/documents/models.py | 18 +++++++++++++++++- apps/documents/staging.py | 28 ++++++++++++++++------------ apps/documents/views.py | 38 +++++++++++++++++++++++--------------- 3 files changed, 56 insertions(+), 28 deletions(-) diff --git a/apps/documents/models.py b/apps/documents/models.py index 572457b64f..14d4c29f65 100644 --- a/apps/documents/models.py +++ b/apps/documents/models.py @@ -69,7 +69,8 @@ class Document(models.Model): ordering = ['-date_updated', '-date_added'] def __unicode__(self): - return self.uuid + #return self.uuid + return '%s.%s' % (self.file_filename, self.file_extension) @models.permalink def get_absolute_url(self): @@ -134,6 +135,7 @@ available_models_string = (_(u' Available models: %s') % ','.join([name for name class MetadataType(models.Model): name = models.CharField(max_length=48, verbose_name=_(u'name')) +# title = models.CharField(max_length=48, verbose_name=_(u'title'), blank=True, null=True) default = models.CharField(max_length=128, blank=True, null=True, verbose_name=_(u'default'), help_text=_(u'Enter a string to be evaluated.%s') % available_functions_string) lookup = models.CharField(max_length=128, blank=True, null=True, @@ -141,6 +143,7 @@ class MetadataType(models.Model): #datatype = models. def __unicode__(self): +# return self.title if self.title else self.name return self.name class Meta: @@ -148,6 +151,19 @@ class MetadataType(models.Model): verbose_name_plural = _(u'metadata types') +#class MetadataIndexing(models.Model): +# metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type')) +# indexing_string = models.CharField( +# +# +# def __unicode__(self): +# return unicode(self.metadata_type) +# +# class Meta: +# verbose_name = _(u'metadata type') +# verbose_name_plural = _(u'metadata types') + + class DocumentTypeMetadataType(models.Model): document_type = models.ForeignKey(DocumentType, verbose_name=_(u'document type')) metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type')) diff --git a/apps/documents/staging.py b/apps/documents/staging.py index 8973c686e7..0ceeb31d29 100644 --- a/apps/documents/staging.py +++ b/apps/documents/staging.py @@ -1,5 +1,6 @@ import errno import os +import hashlib from django.conf import settings from django.core.exceptions import ObjectDoesNotExist @@ -9,6 +10,11 @@ from django.utils.translation import ugettext from documents.conf.settings import STAGING_DIRECTORY +HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() + +#func = lambda:[StagingFile.get_all() is None for i in range(100)] +#t1=time.time();func();t2=time.time();print '%s took %0.3f ms' % (func.func_name, (t2-t1)*1000.0) + def get_all_files(): try: @@ -25,26 +31,24 @@ class StagingFile(object): @classmethod def get_all(cls): staging_files = [] - for id, filename in enumerate(get_all_files()): + for filename in get_all_files(): staging_files.append(StagingFile( - filepath=os.path.join(STAGING_DIRECTORY, filename), - id=id)) - + filepath=os.path.join(STAGING_DIRECTORY, filename))) + return staging_files @classmethod def get(cls, id): - files = get_all_files() - if id <= len(files): - return StagingFile( - filepath=os.path.join(STAGING_DIRECTORY, files[id]), - id=id) - raise ObjectDoesNotExist + files_dict = dict([(file.id, file) for file in cls.get_all()]) + if id in files_dict: + return files_dict[id] + else: + raise ObjectDoesNotExist - def __init__(self, filepath, id): + def __init__(self, filepath): self.filepath = filepath self.filename = os.path.basename(filepath) - self._id = id + self._id = HASH_FUNCTION(open(filepath).read()) def __unicode__(self): return self.filename diff --git a/apps/documents/views.py b/apps/documents/views.py index 399c0cf970..a2bb9430f7 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -20,7 +20,6 @@ from staging import StagingFile from documents.conf.settings import DELETE_STAGING_FILE_AFTER_UPLOAD from documents.conf.settings import USE_STAGING_DIRECTORY -DELETE_STAGING_FILE_AFTER_UPLOAD = True def document_list(request): return object_list( @@ -30,8 +29,8 @@ def document_list(request): extra_context={ 'title':_(u'documents'), 'extra_columns':[ - {'name':_(u'filename'), 'attribute':'file_filename'}, - {'name':_(u'extension'), 'attribute':'file_extension'}, + #{'name':_(u'filename'), 'attribute':'file_filename'}, + #{'name':_(u'extension'), 'attribute':'file_extension'}, {'name':_(u'mimetype'), 'attribute':'file_mimetype'}, {'name':_(u'added'), 'attribute':lambda x: x.date_added.date()}, ], @@ -102,26 +101,36 @@ def upload_document_with_type(request, document_type_id, multiple=True): if staging_form.is_valid(): staging_file_id = staging_form.cleaned_data['staging_file_id'] - staging_file = StagingFile.get(int(staging_file_id)) try: - document = Document(file=staging_file.upload(), document_type=document_type) - document.save() + staging_file = StagingFile.get(staging_file_id) except Exception, e: messages.error(request, e) else: - _save_metadata(request.GET, document) - messages.success(request, _(u'Staging file: %s, uploaded successfully.') % staging_file.filename) try: - document.create_fs_links() + document = Document(file=staging_file.upload(), document_type=document_type) + document.save() except Exception, e: - messages.error(request, e) - - if DELETE_STAGING_FILE_AFTER_UPLOAD: + messages.error(request, e) + else: + + if 'document_type_available_filenames' in staging_form.cleaned_data: + if staging_form.cleaned_data['document_type_available_filenames']: + document.file_filename = staging_form.cleaned_data['document_type_available_filenames'].filename + document.save() + + _save_metadata(request.GET, document) + messages.success(request, _(u'Staging file: %s, uploaded successfully.') % staging_file.filename) try: - staging_file.delete() - messages.success(request, _(u'Staging file: %s, deleted successfully.') % staging_file.filename) + document.create_fs_links() except Exception, e: messages.error(request, e) + + if DELETE_STAGING_FILE_AFTER_UPLOAD: + try: + staging_file.delete() + messages.success(request, _(u'Staging file: %s, deleted successfully.') % staging_file.filename) + except Exception, e: + messages.error(request, e) if multiple: return HttpResponseRedirect(request.META['HTTP_REFERER']) @@ -221,7 +230,6 @@ def document_edit(request, document_id): document.file_filename = form.cleaned_data['new_filename'] - print form.cleaned_data if 'document_type_available_filenames' in form.cleaned_data: if form.cleaned_data['document_type_available_filenames']: document.file_filename = form.cleaned_data['document_type_available_filenames'].filename