From e08c93e88b0be2ec41f47f0e6a2a83d44ff66fb1 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Thu, 26 Jan 2012 10:31:07 -0400 Subject: [PATCH 1/2] Add out of process bulk document upload --- apps/sources/management/__init__.py | 0 apps/sources/management/commands/__init__.py | 0 .../management/commands/bulk_upload.py | 48 +++++++++++++++++++ apps/sources/models.py | 26 +++++++++- 4 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 apps/sources/management/__init__.py create mode 100644 apps/sources/management/commands/__init__.py create mode 100644 apps/sources/management/commands/bulk_upload.py diff --git a/apps/sources/management/__init__.py b/apps/sources/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/sources/management/commands/__init__.py b/apps/sources/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/sources/management/commands/bulk_upload.py b/apps/sources/management/commands/bulk_upload.py new file mode 100644 index 0000000000..42563116d5 --- /dev/null +++ b/apps/sources/management/commands/bulk_upload.py @@ -0,0 +1,48 @@ +from __future__ import absolute_import + +import os +from optparse import make_option + +from django.core.management.base import BaseCommand, CommandError, LabelCommand + +from ...models import OutOfProcess +from ...compressed_file import CompressedFile, NotACompressedFile + + +class Command(LabelCommand): + args = '' + help = 'Upload documents from a compressed file in to the database.' + option_list = LabelCommand.option_list + ( + make_option('--noinput', action='store_false', dest='interactive', + default=True, help='Do not ask the user for confirmation before ' + 'starting.'), + #make_option('--metadata', action='store', dest='metadata', + # help='A metadata dictionary to apply to the documents.'), + ) + + def handle_label(self, label, **options): + if not os.access(label, os.R_OK): + raise CommandError("File '%s' is not readable." % label) + + if _confirm(options['interactive']) == 'yes': + print 'Beginning upload...' + fd = open(label) + source = OutOfProcess() + try: + result = source.upload_file(fd, filename=None, use_file_name=False, document_type=None, expand=True, metadata_dict_list=None, user=None, document=None, new_version_data=None, verbose=True) + except NotACompressedFile: + print '%s is not a compressed file.' + else: + print 'Finished.' + + fd.close() + else: + print 'Cancelled.' + + +def _confirm(interactive): + if not interactive: + return 'yes' + return raw_input('You have requested to bulk upload a number of documents from a compressed file.\n' + 'Are you sure you want to do this?\n' + 'Type \'yes\' to continue, or any other value to cancel: ') diff --git a/apps/sources/models.py b/apps/sources/models.py index d7fb8f7161..7243583f90 100644 --- a/apps/sources/models.py +++ b/apps/sources/models.py @@ -55,13 +55,17 @@ class BaseModel(models.Model): def get_transformation_list(self): return SourceTransformation.transformations.get_for_object_as_list(self) - def upload_file(self, file_object, filename=None, use_file_name=False, document_type=None, expand=False, metadata_dict_list=None, user=None, document=None, new_version_data=None): + def upload_file(self, file_object, filename=None, use_file_name=False, document_type=None, expand=False, metadata_dict_list=None, user=None, document=None, new_version_data=None, verbose=False): if expand: try: cf = CompressedFile(file_object) + count = 1 for fp in cf.children(): + if verbose: + print 'Uploading file #%d: %s' % (count, fp) self.upload_single_file(fp, None, document_type, metadata_dict_list, user) fp.close() + count += 1 except NotACompressedFile: self.upload_single_file(file_object, filename, document_type, metadata_dict_list, user) @@ -256,3 +260,23 @@ class SourceTransformation(models.Model): ordering = ('order',) verbose_name = _(u'document source transformation') verbose_name_plural = _(u'document source transformations') + + +class OutOfProcess(BaseModel): + #icon = models.CharField(blank=True, null=True, max_length=24, choices=SOURCE_ICON_CHOICES, verbose_name=_(u'icon'), help_text=_(u'An icon to visually distinguish this source.')) + + #def save(self, *args, **kwargs): + # if not self.icon: + # self.icon = self.default_icon + # super(BaseModel, self).save(*args, **kwargs) + + is_interactive = False + #source_type = SOURCE_CHOICE_WEB_FORM + #default_icon = SOURCE_ICON_DISK + + #uncompress = models.CharField(max_length=1, choices=SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES, verbose_name=_(u'uncompress'), help_text=_(u'Whether to expand or not compressed archives.')) + #Default path + + class Meta(BaseModel.Meta): + verbose_name = _(u'out of process') + verbose_name_plural = _(u'out of process') From 850c6dd69a9038c90761a2a9f7e8323115104bf0 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Thu, 26 Jan 2012 11:14:22 -0400 Subject: [PATCH 2/2] Add the options --metadata and --document_type to the bulk uploader --- apps/metadata/api.py | 12 ++++++ .../management/commands/bulk_upload.py | 42 ++++++++++++++++--- apps/sources/models.py | 14 +++---- 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/apps/metadata/api.py b/apps/metadata/api.py index 60e611e5f6..77a86056bb 100644 --- a/apps/metadata/api.py +++ b/apps/metadata/api.py @@ -107,3 +107,15 @@ def get_metadata_string(document): Return a formated representation of a document's metadata values """ return u', '.join([u'%s - %s' % (metadata.metadata_type, metadata.value) for metadata in DocumentMetadata.objects.filter(document=document).select_related('metadata_type')]) + + +def convert_dict_to_dict_list(dictionary): + result = [] + for key, value in dictionary.items(): + try: + metadata_type = MetadataType.objects.get(name=key) + except MetadataType.DoesNotExist: + raise ValueError('Unknown metadata type name') + result.append({'id': metadata_type.pk, 'value': value}) + + return result diff --git a/apps/sources/management/commands/bulk_upload.py b/apps/sources/management/commands/bulk_upload.py index 42563116d5..f31c62d48c 100644 --- a/apps/sources/management/commands/bulk_upload.py +++ b/apps/sources/management/commands/bulk_upload.py @@ -1,9 +1,13 @@ from __future__ import absolute_import -import os +import os, sys from optparse import make_option from django.core.management.base import BaseCommand, CommandError, LabelCommand +from django.utils.simplejson import loads, dumps + +from metadata.api import convert_dict_to_dict_list +from documents.models import DocumentType from ...models import OutOfProcess from ...compressed_file import CompressedFile, NotACompressedFile @@ -16,20 +20,48 @@ class Command(LabelCommand): make_option('--noinput', action='store_false', dest='interactive', default=True, help='Do not ask the user for confirmation before ' 'starting.'), - #make_option('--metadata', action='store', dest='metadata', - # help='A metadata dictionary to apply to the documents.'), + make_option('--metadata', action='store', dest='metadata', + help='A metadata dictionary list to apply to the documents.'), + make_option('--document_type', action='store', dest='document_type_name', + help='The document type to apply to the uploaded documents.'), ) def handle_label(self, label, **options): if not os.access(label, os.R_OK): raise CommandError("File '%s' is not readable." % label) + if options['metadata']: + try: + metadata_dict = loads(options['metadata']) + metadata_dict_list = convert_dict_to_dict_list(metadata_dict) + except Exception, e: + sys.exit('Metadata error: %s' % e) + else: + metadata_dict_list = None + + if options['document_type_name']: + try: + document_type = DocumentType.objects.get(name=options['document_type_name']) + except DocumentType.DoesNotExist: + sys.exit('Unknown document type') + else: + document_type = None + if _confirm(options['interactive']) == 'yes': print 'Beginning upload...' - fd = open(label) + if metadata_dict_list: + print 'Using the metadata values:' + for key, value in metadata_dict.items(): + print '%s: %s' % (key, value) + + if document_type: + print 'Uploaded document will be of type: %s' % options['document_type_name'] + source = OutOfProcess() + fd = open(label) try: - result = source.upload_file(fd, filename=None, use_file_name=False, document_type=None, expand=True, metadata_dict_list=None, user=None, document=None, new_version_data=None, verbose=True) + result = source.upload_file(fd, filename=None, use_file_name=False, document_type=document_type, expand=True, metadata_dict_list=metadata_dict_list, user=None, document=None, new_version_data=None, verbose=True) + pass except NotACompressedFile: print '%s is not a compressed file.' else: diff --git a/apps/sources/models.py b/apps/sources/models.py index 195022cfcf..7d181a17cd 100644 --- a/apps/sources/models.py +++ b/apps/sources/models.py @@ -94,7 +94,7 @@ class BaseModel(models.Model): document.save() apply_default_acls(document, user) - + if metadata_dict_list: save_metadata_list(metadata_dict_list, document, create=True) warnings = update_indexes(document) @@ -163,7 +163,7 @@ class StagingFolder(InteractiveBaseModel): verbose_name = _(u'staging folder') verbose_name_plural = _(u'staging folders') -''' +""" class SourceMetadata(models.Model): content_type = models.ForeignKey(ContentType) object_id = models.PositiveIntegerField() @@ -177,7 +177,7 @@ class SourceMetadata(models.Model): class Meta: verbose_name = _(u'source metadata') verbose_name_plural = _(u'sources metadata') -''' +""" class WebForm(InteractiveBaseModel): @@ -239,9 +239,9 @@ class ArgumentsValidator(object): self.code = code def __call__(self, value): - ''' + """ Validates that the input evaluates correctly. - ''' + """ value = value.strip() try: literal_eval(value) @@ -250,10 +250,10 @@ class ArgumentsValidator(object): class SourceTransformation(models.Model): - ''' + """ Model that stores the transformation and transformation arguments for a given document source - ''' + """ content_type = models.ForeignKey(ContentType) object_id = models.PositiveIntegerField() content_object = generic.GenericForeignKey('content_type', 'object_id')