From 382173351ab9749009b0ba6408f3f5bf4600f43e Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 18 Dec 2018 17:25:28 -0400 Subject: [PATCH] Source: Change source test behavior Update sourcs to accept a test argument to their check methods. This is to allow for explicit test behavior like running the check method code even when the source is disabled and to not deleted downloaded content during a test. Signed-off-by: Roberto Rosario --- HISTORY.rst | 3 + mayan/apps/sources/models/email_sources.py | 68 +++++------ .../sources/models/watch_folder_sources.py | 5 +- mayan/apps/sources/tasks.py | 112 +++++++++--------- mayan/apps/sources/views.py | 14 ++- 5 files changed, 108 insertions(+), 94 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index e9dc7a1ca2..aea9d48e8f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,9 @@ * Split sources models into separate modules * Add support for subfolder scanning to watchfolders. Closes GitLab issue #498 and #563. +* Updated the source check behavior to allow checking a source + even when the source is disabled and to not deleted processed files + during a check. 3.1.10 (2019-04-04) =================== diff --git a/mayan/apps/sources/models/email_sources.py b/mayan/apps/sources/models/email_sources.py index 95f0ef21fd..cea2bf1a15 100644 --- a/mayan/apps/sources/models/email_sources.py +++ b/mayan/apps/sources/models/email_sources.py @@ -80,35 +80,6 @@ class EmailBaseModel(IntervalBaseModel): verbose_name = _('Email source') verbose_name_plural = _('Email sources') - def clean(self): - if self.subject_metadata_type: - if self.subject_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): - raise ValidationError( - { - 'subject_metadata_type': _( - 'Subject metadata type "%(metadata_type)s" is not ' - 'valid for the document type: %(document_type)s' - ) % { - 'metadata_type': self.subject_metadata_type, - 'document_type': self.document_type - } - } - ) - - if self.from_metadata_type: - if self.from_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): - raise ValidationError( - { - 'from_metadata_type': _( - '"From" metadata type "%(metadata_type)s" is not ' - 'valid for the document type: %(document_type)s' - ) % { - 'metadata_type': self.from_metadata_type, - 'document_type': self.document_type - } - } - ) - @staticmethod def process_message(source, message_text, message_properties=None): from flanker import mime @@ -199,6 +170,35 @@ class EmailBaseModel(IntervalBaseModel): metadata_dictionary=metadata_dictionary ) + def clean(self): + if self.subject_metadata_type: + if self.subject_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): + raise ValidationError( + { + 'subject_metadata_type': _( + 'Subject metadata type "%(metadata_type)s" is not ' + 'valid for the document type: %(document_type)s' + ) % { + 'metadata_type': self.subject_metadata_type, + 'document_type': self.document_type + } + } + ) + + if self.from_metadata_type: + if self.from_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): + raise ValidationError( + { + 'from_metadata_type': _( + '"From" metadata type "%(metadata_type)s" is not ' + 'valid for the document type: %(document_type)s' + ) % { + 'metadata_type': self.from_metadata_type, + 'document_type': self.document_type + } + } + ) + class IMAPEmail(EmailBaseModel): source_type = SOURCE_CHOICE_EMAIL_IMAP @@ -216,7 +216,7 @@ class IMAPEmail(EmailBaseModel): verbose_name_plural = _('IMAP email') # http://www.doughellmann.com/PyMOTW/imaplib/ - def check_source(self): + def check_source(self, test=False): logger.debug('Starting IMAP email fetch') logger.debug('host: %s', self.host) logger.debug('ssl: %s', self.ssl) @@ -240,7 +240,8 @@ class IMAPEmail(EmailBaseModel): EmailBaseModel.process_message( source=self, message_text=data[0][1] ) - mailbox.store(message_number, '+FLAGS', '\\Deleted') + if not test: + mailbox.store(message_number, '+FLAGS', '\\Deleted') mailbox.expunge() mailbox.close() @@ -260,7 +261,7 @@ class POP3Email(EmailBaseModel): verbose_name = _('POP email') verbose_name_plural = _('POP email') - def check_source(self): + def check_source(self, test=False): logger.debug('Starting POP3 email fetch') logger.debug('host: %s', self.host) logger.debug('ssl: %s', self.ssl) @@ -289,6 +290,7 @@ class POP3Email(EmailBaseModel): EmailBaseModel.process_message( source=self, message_text=complete_message ) - mailbox.dele(message_number) + if not test: + mailbox.dele(message_number) mailbox.quit() diff --git a/mayan/apps/sources/models/watch_folder_sources.py b/mayan/apps/sources/models/watch_folder_sources.py index 07d0b8e5ac..4addda6075 100644 --- a/mayan/apps/sources/models/watch_folder_sources.py +++ b/mayan/apps/sources/models/watch_folder_sources.py @@ -46,7 +46,7 @@ class WatchFolderSource(IntervalBaseModel): verbose_name = _('Watch folder') verbose_name_plural = _('Watch folders') - def check_source(self): + def check_source(self, test=False): path = Path(self.folder_path) if self.include_subdirectories: @@ -62,4 +62,5 @@ class WatchFolderSource(IntervalBaseModel): expand=(self.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y), label=entry.name ) - entry.unlink() + if not test: + entry.unlink() diff --git a/mayan/apps/sources/tasks.py b/mayan/apps/sources/tasks.py index e922e8a00c..3dfa1234c1 100644 --- a/mayan/apps/sources/tasks.py +++ b/mayan/apps/sources/tasks.py @@ -22,7 +22,7 @@ logger = logging.getLogger(__name__) @app.task(ignore_result=True) -def task_check_interval_source(source_id): +def task_check_interval_source(source_id, test=False): Source = apps.get_model( app_label='sources', model_name='Source' ) @@ -38,8 +38,8 @@ def task_check_interval_source(source_id): try: source = Source.objects.get_subclass(pk=source_id) - if source.enabled: - source.check_source() + if source.enabled or test: + source.check_source(test=test) except Exception as exception: logger.error('Error processing source: %s; %s', source, exception) source.logs.create( @@ -51,54 +51,15 @@ def task_check_interval_source(source_id): lock.release() -@app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True) -def task_upload_document(self, source_id, document_type_id, shared_uploaded_file_id, description=None, label=None, language=None, querystring=None, user_id=None): - SharedUploadedFile = apps.get_model( - app_label='common', model_name='SharedUploadedFile' +@app.task() +def task_generate_staging_file_image(staging_folder_pk, encoded_filename, *args, **kwargs): + StagingFolderSource = apps.get_model( + app_label='sources', model_name='StagingFolderSource' ) + staging_folder = StagingFolderSource.objects.get(pk=staging_folder_pk) + staging_file = staging_folder.get_file(encoded_filename=encoded_filename) - DocumentType = apps.get_model( - app_label='documents', model_name='DocumentType' - ) - - Source = apps.get_model( - app_label='sources', model_name='Source' - ) - - try: - document_type = DocumentType.objects.get(pk=document_type_id) - source = Source.objects.get_subclass(pk=source_id) - shared_upload = SharedUploadedFile.objects.get( - pk=shared_uploaded_file_id - ) - - if user_id: - user = get_user_model().objects.get(pk=user_id) - else: - user = None - - with shared_upload.open() as file_object: - source.upload_document( - file_object=file_object, document_type=document_type, - description=description, label=label, language=language, - querystring=querystring, user=user, - ) - - except OperationalError as exception: - logger.warning( - 'Operational exception while trying to create new document "%s" ' - 'from source id %d; %s. Retying.', - label or shared_upload.filename, source_id, exception - ) - raise self.retry(exc=exception) - else: - try: - shared_upload.delete() - except OperationalError as exception: - logger.warning( - 'Operational error during attempt to delete shared upload ' - 'file: %s; %s. Retrying.', shared_upload, exception - ) + return staging_file.generate_image(*args, **kwargs) @app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True) @@ -199,12 +160,51 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s ) -@app.task() -def task_generate_staging_file_image(staging_folder_pk, encoded_filename, *args, **kwargs): - StagingFolderSource = apps.get_model( - app_label='sources', model_name='StagingFolderSource' +@app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True) +def task_upload_document(self, source_id, document_type_id, shared_uploaded_file_id, description=None, label=None, language=None, querystring=None, user_id=None): + SharedUploadedFile = apps.get_model( + app_label='common', model_name='SharedUploadedFile' ) - staging_folder = StagingFolderSource.objects.get(pk=staging_folder_pk) - staging_file = staging_folder.get_file(encoded_filename=encoded_filename) - return staging_file.generate_image(*args, **kwargs) + DocumentType = apps.get_model( + app_label='documents', model_name='DocumentType' + ) + + Source = apps.get_model( + app_label='sources', model_name='Source' + ) + + try: + document_type = DocumentType.objects.get(pk=document_type_id) + source = Source.objects.get_subclass(pk=source_id) + shared_upload = SharedUploadedFile.objects.get( + pk=shared_uploaded_file_id + ) + + if user_id: + user = get_user_model().objects.get(pk=user_id) + else: + user = None + + with shared_upload.open() as file_object: + source.upload_document( + file_object=file_object, document_type=document_type, + description=description, label=label, language=language, + querystring=querystring, user=user, + ) + + except OperationalError as exception: + logger.warning( + 'Operational exception while trying to create new document "%s" ' + 'from source id %d; %s. Retying.', + label or shared_upload.filename, source_id, exception + ) + raise self.retry(exc=exception) + else: + try: + shared_upload.delete() + except OperationalError as exception: + logger.warning( + 'Operational error during attempt to delete shared upload ' + 'file: %s; %s. Retrying.', shared_upload, exception + ) diff --git a/mayan/apps/sources/views.py b/mayan/apps/sources/views.py index dd63452cb7..d38b99fa92 100644 --- a/mayan/apps/sources/views.py +++ b/mayan/apps/sources/views.py @@ -502,12 +502,20 @@ class SetupSourceCheckView(ConfirmView): Trigger the task_check_interval_source task for a given source to test/debug their configuration irrespective of the schedule task setup. """ - view_permission = permission_sources_setup_view + view_permission = permission_sources_setup_create def get_extra_context(self): return { 'object': self.get_object(), - 'title': _('Trigger check for source "%s"?') % self.get_object(), + 'subtitle': _( + 'This will execute the source check code even if the source ' + 'is not enabled. Sources that delete content after ' + 'downloading will not do so while being tested. Check the ' + 'source\'s error log for information during testing. A ' + 'successful test will clear the error log.' + ), 'title': _( + 'Trigger check for source "%s"?' + ) % self.get_object(), } def get_object(self): @@ -516,7 +524,7 @@ class SetupSourceCheckView(ConfirmView): def view_action(self): task_check_interval_source.apply_async( kwargs={ - 'source_id': self.get_object().pk + 'source_id': self.get_object().pk, 'test': True } )