Add column mapping support

Add support for specifying metadata columns. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
2019-06-19 17:47:32 -04:00
parent b3d59eee39
commit bc072f7b7e
4 changed files with 181 additions and 14 deletions
--- a/mayan/apps/importer/management/commands/import.py
+++ b/mayan/apps/importer/management/commands/import.py
@@ -7,18 +7,34 @@ from django.apps import apps
 from django.core import management
 from django.core.files import File

-from mayan.apps.documents.tasks import task_upload_new_document
+from ...tasks import task_upload_new_document


 class Command(management.BaseCommand):
    help = 'Import documents from a CSV file.'

    def add_arguments(self, parser):
-        #parser.add_argument(
-        #    '-l', '--link',
-        #    action='store_true', dest='link', default=False,
-        #    help='Create a symbolic link to each file instead of copying.',
-        #)
+        parser.add_argument(
+            '--document_type_column',
+            action='store', dest='document_type_column', default=0,
+            help='Column that contains the document type labels. Column '
+            'numbers start at 0.',
+            type=int
+        )
+        parser.add_argument(
+            '--document_path_column',
+            action='store', dest='document_path_column', default=1,
+            help='Column that contains the path to the document files. Column '
+            'numbers start at 0.',
+            type=int
+        )
+        parser.add_argument(
+            '--metadata_pairs_column',
+            action='store', dest='metadata_pairs_column',
+            help='Column that contains metadata name and values for the '
+            'documents. Use the form: <label column>:<value column>. Example: '
+            '2:5. Separate multiple pairs with commas. Example: 2:5,7:10',
+        )
        parser.add_argument('filelist', nargs='?', help='File list')

    def handle(self, *args, **options):
@@ -41,24 +57,44 @@ class Command(management.BaseCommand):
            with open(options['filelist']) as csv_datafile:
                csv_reader = csv.reader(csv_datafile)
                for row in csv_reader:
-                    with open(row[1]) as file_object:
-                        if row[0] not in document_types:
-                            self.stdout.write('New document type: {}. Creating and caching.'.format(row[0]))
-                            document_type, created = DocumentType.objects.get_or_create(
-                                label=row[0]
+                    with open(row[options['document_path_column']]) as file_object:
+                        document_type_label = row[options['document_type_column']]
+
+                        if document_type_label not in document_types:
+                            self.stdout.write(
+                                'New document type: {}. Creating and caching.'.format(
+                                    document_type_label
+                                )
                            )
-                            document_types[row[0]] = document_type
+                            document_type, created = DocumentType.objects.get_or_create(
+                                label=document_type_label
+                            )
+                            document_types[document_type_label] = document_type
                        else:
-                            document_type = document_types[row[0]]
+                            document_type = document_types[document_type_label]

                        shared_uploaded_file = SharedUploadedFile.objects.create(
                            file=File(file_object)
                        )

+                        extra_data = {}
+                        if options['metadata_pairs_column']:
+                            extra_data['metadata_pairs'] = []
+
+                            for pair in options['metadata_pairs_column'].split(','):
+                                name, value = pair.split(':')
+                                extra_data['metadata_pairs'].append(
+                                    {
+                                        'name': row[int(name)],
+                                        'value': row[int(value)]
+                                    }
+                                )
+
                        task_upload_new_document.apply_async(
                            kwargs=dict(
                                document_type_id=document_type.pk,
                                shared_uploaded_file_id=shared_uploaded_file.pk,
+                                extra_data=extra_data
                            )
                        )

--- a/mayan/apps/importer/queues.py
+++ b/mayan/apps/importer/queues.py
@@ -0,0 +1,10 @@
+from __future__ import absolute_import, unicode_literals
+
+from django.utils.translation import ugettext_lazy as _
+
+from mayan.apps.documents.queues import queue_uploads
+
+queue_uploads.add_task_type(
+    dotted_path='mayan.apps.importer.tasks.task_upload_new_document',
+    label=_('Import new document')
+)
--- a/mayan/apps/importer/tasks.py
+++ b/mayan/apps/importer/tasks.py
@@ -0,0 +1,85 @@
+from __future__ import unicode_literals
+
+import logging
+
+from django.apps import apps
+from django.db import OperationalError
+from django.utils.text import slugify
+
+from mayan.celery import app
+
+from mayan.apps.documents.literals import UPLOAD_NEW_DOCUMENT_RETRY_DELAY
+
+logger = logging.getLogger(__name__)
+
+
+@app.task(bind=True, default_retry_delay=UPLOAD_NEW_DOCUMENT_RETRY_DELAY, ignore_result=True)
+def task_upload_new_document(self, document_type_id, shared_uploaded_file_id, extra_data=None):
+    DocumentType = apps.get_model(
+        app_label='documents', model_name='DocumentType'
+    )
+
+    MetadataType = apps.get_model(
+        app_label='metadata', model_name='MetadataType'
+    )
+
+    SharedUploadedFile = apps.get_model(
+        app_label='common', model_name='SharedUploadedFile'
+    )
+
+    try:
+        document_type = DocumentType.objects.get(pk=document_type_id)
+        shared_file = SharedUploadedFile.objects.get(
+            pk=shared_uploaded_file_id
+        )
+    except OperationalError as exception:
+        logger.warning(
+            'Operational error during attempt to retrieve shared data for '
+            'new document of type: %s; %s. Retrying.', document_type, exception
+        )
+        raise self.retry(exc=exception)
+
+    try:
+        with shared_file.open() as file_object:
+            new_document = document_type.new_document(file_object=file_object)
+    except OperationalError as exception:
+        logger.warning(
+            'Operational error during attempt to create new document '
+            'of type: %s; %s. Retrying.', document_type, exception
+        )
+        raise self.retry(exc=exception)
+    except Exception as exception:
+        # This except and else block emulate a finally:
+        logger.error(
+            'Unexpected error during attempt to create new document '
+            'of type: %s; %s', document_type, exception
+        )
+        try:
+            shared_file.delete()
+        except OperationalError as exception:
+            logger.warning(
+                'Operational error during attempt to delete shared '
+                'file: %s; %s.', shared_file, exception
+            )
+    else:
+        if extra_data:
+            for pair in extra_data.get('metadata_pairs', []):
+                name = slugify(pair['name']).replace('-', '_')
+                metadata_type, created = MetadataType.objects.get_or_create(
+                    label=pair['name'], defaults={'name': name}
+                )
+                if created:
+                    new_document.document_type.metadata.create(
+                        metadata_type=metadata_type, required=False
+                    )
+                new_document.metadata.create(
+                    metadata_type=metadata_type, value=pair['value']
+                )
+
+        try:
+            shared_file.delete()
+        except OperationalError as exception:
+            logger.warning(
+                'Operational error during attempt to delete shared '
+                'file: %s; %s.', shared_file, exception
+            )
--- a/mayan/apps/importer/tests/test_management_commands.py
+++ b/mayan/apps/importer/tests/test_management_commands.py
@@ -41,7 +41,12 @@ class ImportManagementCommandTestCase(GenericDocumentTestCase):
            )
            for times in range(self.test_import_count):
                filewriter.writerow(
-                    [self.test_document_type.label, TEST_SMALL_DOCUMENT_PATH]
+                    [
+                        self.test_document_type.label, TEST_SMALL_DOCUMENT_PATH,
+                        'column 2', 'column 3', 'column 4', 'column 5',
+                        'column 6', 'column 7', 'column 8', 'column 9',
+                        'column 10', 'column 11',
+                    ]
                )

    def _destroy_test_csv_file(self):
@@ -56,3 +61,34 @@ class ImportManagementCommandTestCase(GenericDocumentTestCase):

        self.assertTrue(DocumentType.objects.count() > 0)
        self.assertTrue(Document.objects.count() > 0)
+
+    def test_import_document_type_column_mapping(self):
+        self.test_document_type.delete()
+        management.call_command(
+            'import', self.test_csv_path, '--document_type_column', '2'
+        )
+
+        self.assertTrue(DocumentType.objects.first().label == 'column 2')
+        self.assertTrue(Document.objects.count() > 0)
+
+    def test_import_document_path_column_mapping(self):
+        self.test_document_type.delete()
+        with self.assertRaises(IOError):
+            management.call_command(
+                'import', self.test_csv_path, '--document_path_column', '2'
+            )
+
+    def test_import_metadata_column_mapping(self):
+        self.test_document_type.delete()
+        management.call_command(
+            'import', self.test_csv_path, '--metadata_pairs_column', '2:3,4:5',
+        )
+
+        self.assertTrue(DocumentType.objects.count() > 0)
+        self.assertTrue(Document.objects.count() > 0)
+        self.assertTrue(Document.objects.first().metadata.count() > 0)
+        self.assertEqual(
+            Document.objects.first().metadata.get(
+                metadata_type__name='column_2'
+            ).value, 'column 3'
+        )