Compare commits
14 Commits
releases/d
...
nightly
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
091f0d1cfd | ||
|
|
d2affdcf21 | ||
|
|
885d430b98 | ||
|
|
39eabe1c54 | ||
|
|
f6ad579829 | ||
|
|
6fc9e46882 | ||
|
|
2d326a679d | ||
|
|
aa8c2db446 | ||
|
|
925b55d76d | ||
|
|
5808d3653d | ||
|
|
bc072f7b7e | ||
|
|
b3d59eee39 | ||
|
|
7d379a52af | ||
|
|
499ab1f3e7 |
@@ -1,12 +1,19 @@
|
||||
Importer branch
|
||||
===============
|
||||
* Add a reusable task to upload documents.
|
||||
* Add MVP of the importer app.
|
||||
|
||||
3.2.4 (2019-06-XX)
|
||||
==================
|
||||
* Support configurable GUnicorn timeouts. Defaults to
|
||||
* Support configurable GUnicorn timeouts. Defaults to
|
||||
current value of 120 seconds.
|
||||
|
||||
3.2.3 (2019-06-21)
|
||||
==================
|
||||
* Add support for disabling the random primary key
|
||||
test mixin.
|
||||
* Add a reusable task to upload documents.
|
||||
* Add MVP of the importer app.
|
||||
* Fix mailing profile log columns mappings.
|
||||
GitLab issue #626. Thanks to Jesaja Everling (@jeverling)
|
||||
for the report.
|
||||
|
||||
@@ -19,7 +19,6 @@ Changes
|
||||
GitLab issue #625. Thanks to Jesaja Everling (@jeverling)
|
||||
for the report and the research.
|
||||
|
||||
|
||||
Removals
|
||||
--------
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ DEFAULT_DOCUMENT_TYPE_LABEL = _('Default')
|
||||
DOCUMENT_IMAGE_TASK_TIMEOUT = 120
|
||||
STUB_EXPIRATION_INTERVAL = 60 * 60 * 24 # 24 hours
|
||||
UPDATE_PAGE_COUNT_RETRY_DELAY = 10
|
||||
UPLOAD_NEW_DOCUMENT_RETRY_DELAY = 10
|
||||
UPLOAD_NEW_VERSION_RETRY_DELAY = 10
|
||||
|
||||
PAGE_RANGE_ALL = 'all'
|
||||
|
||||
@@ -82,3 +82,7 @@ queue_uploads.add_task_type(
|
||||
dotted_path='mayan.apps.documents.tasks.task_scan_duplicates_for',
|
||||
label=_('Scan document duplicates')
|
||||
)
|
||||
queue_uploads.add_task_type(
|
||||
dotted_path='mayan.apps.documents.tasks.task_upload_new_document',
|
||||
label=_('Upload new document')
|
||||
)
|
||||
|
||||
@@ -9,7 +9,8 @@ from django.db import OperationalError
|
||||
from mayan.celery import app
|
||||
|
||||
from .literals import (
|
||||
UPDATE_PAGE_COUNT_RETRY_DELAY, UPLOAD_NEW_VERSION_RETRY_DELAY
|
||||
UPDATE_PAGE_COUNT_RETRY_DELAY, UPLOAD_NEW_DOCUMENT_RETRY_DELAY,
|
||||
UPLOAD_NEW_VERSION_RETRY_DELAY
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -127,6 +128,60 @@ def task_update_page_count(self, version_id):
|
||||
raise self.retry(exc=exception)
|
||||
|
||||
|
||||
@app.task(bind=True, default_retry_delay=UPLOAD_NEW_DOCUMENT_RETRY_DELAY, ignore_result=True)
|
||||
def task_upload_new_document(self, document_type_id, shared_uploaded_file_id):
|
||||
DocumentType = apps.get_model(
|
||||
app_label='documents', model_name='DocumentType'
|
||||
)
|
||||
|
||||
SharedUploadedFile = apps.get_model(
|
||||
app_label='common', model_name='SharedUploadedFile'
|
||||
)
|
||||
|
||||
try:
|
||||
document_type = DocumentType.objects.get(pk=document_type_id)
|
||||
shared_file = SharedUploadedFile.objects.get(
|
||||
pk=shared_uploaded_file_id
|
||||
)
|
||||
except OperationalError as exception:
|
||||
logger.warning(
|
||||
'Operational error during attempt to retrieve shared data for '
|
||||
'new document of type: %s; %s. Retrying.', document_type, exception
|
||||
)
|
||||
raise self.retry(exc=exception)
|
||||
|
||||
try:
|
||||
with shared_file.open() as file_object:
|
||||
document_type.new_document(file_object=file_object)
|
||||
except OperationalError as exception:
|
||||
logger.warning(
|
||||
'Operational error during attempt to create new document '
|
||||
'of type: %s; %s. Retrying.', document_type, exception
|
||||
)
|
||||
raise self.retry(exc=exception)
|
||||
except Exception as exception:
|
||||
# This except and else block emulate a finally:
|
||||
logger.error(
|
||||
'Unexpected error during attempt to create new document '
|
||||
'of type: %s; %s', document_type, exception
|
||||
)
|
||||
try:
|
||||
shared_file.delete()
|
||||
except OperationalError as exception:
|
||||
logger.warning(
|
||||
'Operational error during attempt to delete shared '
|
||||
'file: %s; %s.', shared_file, exception
|
||||
)
|
||||
else:
|
||||
try:
|
||||
shared_file.delete()
|
||||
except OperationalError as exception:
|
||||
logger.warning(
|
||||
'Operational error during attempt to delete shared '
|
||||
'file: %s; %s.', shared_file, exception
|
||||
)
|
||||
|
||||
|
||||
@app.task(bind=True, default_retry_delay=UPLOAD_NEW_VERSION_RETRY_DELAY, ignore_result=True)
|
||||
def task_upload_new_version(self, document_id, shared_uploaded_file_id, user_id, comment=None):
|
||||
SharedUploadedFile = apps.get_model(
|
||||
|
||||
3
mayan/apps/importer/__init__.py
Normal file
3
mayan/apps/importer/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
default_app_config = 'mayan.apps.importer.apps.ImporterApp'
|
||||
17
mayan/apps/importer/apps.py
Normal file
17
mayan/apps/importer/apps.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from mayan.apps.common.apps import MayanAppConfig
|
||||
|
||||
|
||||
class ImporterApp(MayanAppConfig):
|
||||
app_namespace = 'importer'
|
||||
app_url = 'importer'
|
||||
has_rest_api = False
|
||||
has_tests = True
|
||||
name = 'mayan.apps.importer'
|
||||
verbose_name = _('Importer')
|
||||
|
||||
def ready(self):
|
||||
super(ImporterApp, self).ready()
|
||||
0
mayan/apps/importer/management/__init__.py
Normal file
0
mayan/apps/importer/management/__init__.py
Normal file
0
mayan/apps/importer/management/commands/__init__.py
Normal file
0
mayan/apps/importer/management/commands/__init__.py
Normal file
150
mayan/apps/importer/management/commands/import.py
Normal file
150
mayan/apps/importer/management/commands/import.py
Normal file
@@ -0,0 +1,150 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import csv
|
||||
import time
|
||||
|
||||
from django.apps import apps
|
||||
from django.core import management
|
||||
from django.core.files import File
|
||||
|
||||
from ...tasks import task_upload_new_document
|
||||
|
||||
|
||||
class Command(management.BaseCommand):
|
||||
help = 'Import documents from a CSV file.'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
'--document_type_column',
|
||||
action='store', dest='document_type_column', default=0,
|
||||
help='Column that contains the document type labels. Column '
|
||||
'numbers start at 0.',
|
||||
type=int
|
||||
)
|
||||
parser.add_argument(
|
||||
'--document_path_column',
|
||||
action='store', dest='document_path_column', default=1,
|
||||
help='Column that contains the path to the document files. Column '
|
||||
'numbers start at 0.',
|
||||
type=int
|
||||
)
|
||||
parser.add_argument(
|
||||
'--ignore_errors',
|
||||
action='store_true', dest='ignore_errors', default=False,
|
||||
help='Don\'t stop the import process on common errors like '
|
||||
'incorrect file paths.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--ignore_rows',
|
||||
action='store', dest='ignore_rows', default='',
|
||||
help='Ignore a set of rows. Row numbers must be separated by commas.'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--metadata_pairs_column',
|
||||
action='store', dest='metadata_pairs_column',
|
||||
help='Column that contains metadata name and values for the '
|
||||
'documents. Use the form: <label column>:<value column>. Example: '
|
||||
'2:5. Separate multiple pairs with commas. Example: 2:5,7:10',
|
||||
)
|
||||
parser.add_argument('filelist', nargs='?', help='File list')
|
||||
|
||||
def handle(self, *args, **options):
|
||||
time_start = time.time()
|
||||
time_last_display = time_start
|
||||
document_types = {}
|
||||
uploaded_count = 0
|
||||
row_count = 0
|
||||
rows_to_ignore = []
|
||||
for entry in options['ignore_rows'].split(','):
|
||||
if entry:
|
||||
rows_to_ignore.append(int(entry))
|
||||
|
||||
DocumentType = apps.get_model(
|
||||
app_label='documents', model_name='DocumentType'
|
||||
)
|
||||
SharedUploadedFile = apps.get_model(
|
||||
app_label='common', model_name='SharedUploadedFile'
|
||||
)
|
||||
|
||||
if not options['filelist']:
|
||||
self.stderr.write('Must specify a CSV file path.')
|
||||
exit(1)
|
||||
else:
|
||||
with open(options['filelist']) as csv_datafile:
|
||||
csv_reader = csv.reader(csv_datafile)
|
||||
for row in csv_reader:
|
||||
# Increase row count here even though start index is 0
|
||||
# purpose is to avoid losing row number increments on
|
||||
# exceptions
|
||||
row_count = row_count + 1
|
||||
if row_count - 1 not in rows_to_ignore:
|
||||
try:
|
||||
with open(row[options['document_path_column']]) as file_object:
|
||||
document_type_label = row[options['document_type_column']]
|
||||
|
||||
if document_type_label not in document_types:
|
||||
self.stdout.write(
|
||||
'New document type: {}. Creating and caching.'.format(
|
||||
document_type_label
|
||||
)
|
||||
)
|
||||
document_type, created = DocumentType.objects.get_or_create(
|
||||
label=document_type_label
|
||||
)
|
||||
document_types[document_type_label] = document_type
|
||||
else:
|
||||
document_type = document_types[document_type_label]
|
||||
|
||||
shared_uploaded_file = SharedUploadedFile.objects.create(
|
||||
file=File(file_object)
|
||||
)
|
||||
|
||||
extra_data = {}
|
||||
if options['metadata_pairs_column']:
|
||||
extra_data['metadata_pairs'] = []
|
||||
|
||||
for pair in options['metadata_pairs_column'].split(','):
|
||||
name, value = pair.split(':')
|
||||
extra_data['metadata_pairs'].append(
|
||||
{
|
||||
'name': row[int(name)],
|
||||
'value': row[int(value)]
|
||||
}
|
||||
)
|
||||
|
||||
task_upload_new_document.apply_async(
|
||||
kwargs=dict(
|
||||
document_type_id=document_type.pk,
|
||||
shared_uploaded_file_id=shared_uploaded_file.pk,
|
||||
extra_data=extra_data
|
||||
)
|
||||
)
|
||||
|
||||
uploaded_count = uploaded_count + 1
|
||||
|
||||
if (time.time() - time_last_display) > 1:
|
||||
time_last_display = time.time()
|
||||
self.stdout.write(
|
||||
'Time: {}s, Files copied and queued: {}, files processed per second: {}'.format(
|
||||
int(time.time() - time_start),
|
||||
uploaded_count,
|
||||
uploaded_count / (time.time() - time_start)
|
||||
)
|
||||
)
|
||||
|
||||
except (IOError, OSError) as exception:
|
||||
if not options['ignore_errors']:
|
||||
raise
|
||||
else:
|
||||
self.stderr.write(
|
||||
'Error processing row: {}; {}.'.format(
|
||||
row_count - 1, exception
|
||||
)
|
||||
)
|
||||
|
||||
self.stdout.write(
|
||||
'Total files copied and queues: {}'.format(uploaded_count)
|
||||
)
|
||||
self.stdout.write(
|
||||
'Total time: {}'.format(time.time() - time_start)
|
||||
)
|
||||
10
mayan/apps/importer/queues.py
Normal file
10
mayan/apps/importer/queues.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from mayan.apps.documents.queues import queue_uploads
|
||||
|
||||
queue_uploads.add_task_type(
|
||||
dotted_path='mayan.apps.importer.tasks.task_upload_new_document',
|
||||
label=_('Import new document')
|
||||
)
|
||||
92
mayan/apps/importer/tasks.py
Normal file
92
mayan/apps/importer/tasks.py
Normal file
@@ -0,0 +1,92 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import logging
|
||||
|
||||
from django.apps import apps
|
||||
from django.db import OperationalError
|
||||
from django.utils.text import slugify
|
||||
|
||||
from mayan.celery import app
|
||||
|
||||
from mayan.apps.documents.literals import UPLOAD_NEW_DOCUMENT_RETRY_DELAY
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@app.task(bind=True, default_retry_delay=UPLOAD_NEW_DOCUMENT_RETRY_DELAY, ignore_result=True)
|
||||
def task_upload_new_document(self, document_type_id, shared_uploaded_file_id, extra_data=None):
|
||||
DocumentType = apps.get_model(
|
||||
app_label='documents', model_name='DocumentType'
|
||||
)
|
||||
|
||||
MetadataType = apps.get_model(
|
||||
app_label='metadata', model_name='MetadataType'
|
||||
)
|
||||
|
||||
SharedUploadedFile = apps.get_model(
|
||||
app_label='common', model_name='SharedUploadedFile'
|
||||
)
|
||||
|
||||
try:
|
||||
document_type = DocumentType.objects.get(pk=document_type_id)
|
||||
shared_file = SharedUploadedFile.objects.get(
|
||||
pk=shared_uploaded_file_id
|
||||
)
|
||||
except OperationalError as exception:
|
||||
logger.warning(
|
||||
'Operational error during attempt to retrieve shared data for '
|
||||
'new document of type: %s; %s. Retrying.', document_type, exception
|
||||
)
|
||||
raise self.retry(exc=exception)
|
||||
|
||||
try:
|
||||
with shared_file.open() as file_object:
|
||||
new_document = document_type.new_document(file_object=file_object)
|
||||
except OperationalError as exception:
|
||||
logger.warning(
|
||||
'Operational error during attempt to create new document '
|
||||
'of type: %s; %s. Retrying.', document_type, exception
|
||||
)
|
||||
raise self.retry(exc=exception)
|
||||
except Exception as exception:
|
||||
# This except and else block emulate a finally:
|
||||
logger.error(
|
||||
'Unexpected error during attempt to create new document '
|
||||
'of type: %s; %s', document_type, exception
|
||||
)
|
||||
try:
|
||||
shared_file.delete()
|
||||
except OperationalError as exception:
|
||||
logger.warning(
|
||||
'Operational error during attempt to delete shared '
|
||||
'file: %s; %s.', shared_file, exception
|
||||
)
|
||||
else:
|
||||
if extra_data:
|
||||
for pair in extra_data.get('metadata_pairs', []):
|
||||
name = slugify(pair['name']).replace('-', '_')
|
||||
logger.debug(
|
||||
'Metadata pair (label, name, value): %s, %s, %s',
|
||||
pair['name'], name, pair['value']
|
||||
)
|
||||
|
||||
metadata_type, created = MetadataType.objects.get_or_create(
|
||||
label=pair['name'], name=name
|
||||
)
|
||||
if not new_document.document_type.metadata.filter(metadata_type=metadata_type).exists():
|
||||
logger.debug('Metadata type created')
|
||||
new_document.document_type.metadata.create(
|
||||
metadata_type=metadata_type, required=False
|
||||
)
|
||||
|
||||
new_document.metadata.create(
|
||||
metadata_type=metadata_type, value=pair['value']
|
||||
)
|
||||
|
||||
try:
|
||||
shared_file.delete()
|
||||
except OperationalError as exception:
|
||||
logger.warning(
|
||||
'Operational error during attempt to delete shared '
|
||||
'file: %s; %s.', shared_file, exception
|
||||
)
|
||||
0
mayan/apps/importer/tests/__init__.py
Normal file
0
mayan/apps/importer/tests/__init__.py
Normal file
94
mayan/apps/importer/tests/test_management_commands.py
Normal file
94
mayan/apps/importer/tests/test_management_commands.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import csv
|
||||
|
||||
from django.core import management
|
||||
from django.utils.encoding import force_bytes
|
||||
|
||||
from mayan.apps.documents.models import DocumentType, Document
|
||||
from mayan.apps.documents.tests import GenericDocumentTestCase
|
||||
from mayan.apps.documents.tests.literals import TEST_SMALL_DOCUMENT_PATH
|
||||
from mayan.apps.storage.utils import fs_cleanup, mkstemp
|
||||
|
||||
|
||||
class ImportManagementCommandTestCase(GenericDocumentTestCase):
|
||||
auto_upload_document = False
|
||||
random_primary_key_enable = False
|
||||
test_import_count = 1
|
||||
|
||||
def setUp(self):
|
||||
super(ImportManagementCommandTestCase, self).setUp()
|
||||
self._create_test_csv_file()
|
||||
|
||||
def tearDown(self):
|
||||
self._destroy_test_csv_file()
|
||||
super(ImportManagementCommandTestCase, self).tearDown()
|
||||
|
||||
def _create_test_csv_file(self):
|
||||
self.test_csv_file_descriptor, self.test_csv_path = mkstemp()
|
||||
|
||||
print('Test CSV file: {}'.format(self.test_csv_path))
|
||||
|
||||
with open(self.test_csv_path, mode='wb') as csvfile:
|
||||
filewriter = csv.writer(
|
||||
csvfile, delimiter=force_bytes(','), quotechar=force_bytes('"'),
|
||||
quoting=csv.QUOTE_MINIMAL
|
||||
)
|
||||
print(
|
||||
'Generating test CSV for {} documents'.format(
|
||||
self.test_import_count
|
||||
)
|
||||
)
|
||||
for times in range(self.test_import_count):
|
||||
filewriter.writerow(
|
||||
[
|
||||
self.test_document_type.label, TEST_SMALL_DOCUMENT_PATH,
|
||||
'column 2', 'column 3', 'column 4', 'column 5',
|
||||
'column 6', 'column 7', 'column 8', 'column 9',
|
||||
'column 10', 'column 11',
|
||||
]
|
||||
)
|
||||
|
||||
def _destroy_test_csv_file(self):
|
||||
fs_cleanup(
|
||||
filename=self.test_csv_path,
|
||||
file_descriptor=self.test_csv_file_descriptor
|
||||
)
|
||||
|
||||
def test_import_csv_read(self):
|
||||
self.test_document_type.delete()
|
||||
management.call_command('import', self.test_csv_path)
|
||||
|
||||
self.assertTrue(DocumentType.objects.count() > 0)
|
||||
self.assertTrue(Document.objects.count() > 0)
|
||||
|
||||
def test_import_document_type_column_mapping(self):
|
||||
self.test_document_type.delete()
|
||||
management.call_command(
|
||||
'import', self.test_csv_path, '--document_type_column', '2'
|
||||
)
|
||||
|
||||
self.assertTrue(DocumentType.objects.first().label == 'column 2')
|
||||
self.assertTrue(Document.objects.count() > 0)
|
||||
|
||||
def test_import_document_path_column_mapping(self):
|
||||
self.test_document_type.delete()
|
||||
with self.assertRaises(IOError):
|
||||
management.call_command(
|
||||
'import', self.test_csv_path, '--document_path_column', '2'
|
||||
)
|
||||
|
||||
def test_import_metadata_column_mapping(self):
|
||||
self.test_document_type.delete()
|
||||
management.call_command(
|
||||
'import', self.test_csv_path, '--metadata_pairs_column', '2:3,4:5',
|
||||
)
|
||||
|
||||
self.assertTrue(DocumentType.objects.count() > 0)
|
||||
self.assertTrue(Document.objects.count() > 0)
|
||||
self.assertTrue(Document.objects.first().metadata.count() > 0)
|
||||
self.assertEqual(
|
||||
Document.objects.first().metadata.get(
|
||||
metadata_type__name='column_2'
|
||||
).value, 'column 3'
|
||||
)
|
||||
@@ -120,6 +120,7 @@ INSTALLED_APPS = (
|
||||
'mayan.apps.document_states',
|
||||
'mayan.apps.documents',
|
||||
'mayan.apps.file_metadata',
|
||||
'mayan.apps.importer',
|
||||
'mayan.apps.linking',
|
||||
'mayan.apps.mailer',
|
||||
'mayan.apps.mayan_statistics',
|
||||
|
||||
Reference in New Issue
Block a user