From f77f64cc71a760d606083b49bfba4c380106470d Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 18 Dec 2018 17:25:28 -0400 Subject: [PATCH] Source: Change source test behavior Update sourcs to accept a test argument to their check methods. This is to allow for explicit test behavior like running the check method code even when the source is disabled and to not deleted downloaded content during a test. Signed-off-by: Roberto Rosario --- HISTORY.rst | 3 + mayan/apps/sources/models/email_sources.py | 68 +++++------ .../sources/models/watch_folder_sources.py | 5 +- mayan/apps/sources/tasks.py | 112 +++++++++--------- mayan/apps/sources/views.py | 14 ++- 5 files changed, 108 insertions(+), 94 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 69d68f7cd0..b7c3e4be0c 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -199,6 +199,9 @@ The result of the template tag is applied as the title property of the H3 HTML tag allowing users to view the full title on mouse hover if it was truncated. +- Updated the source test behavior to allow testing a source + even when the source is disabled and to not deleted downloaded + content during a test. 3.1.9 (2018-11-01) ================== diff --git a/mayan/apps/sources/models/email_sources.py b/mayan/apps/sources/models/email_sources.py index 18260a29ca..7c83a9d22a 100644 --- a/mayan/apps/sources/models/email_sources.py +++ b/mayan/apps/sources/models/email_sources.py @@ -80,35 +80,6 @@ class EmailBaseModel(IntervalBaseModel): verbose_name = _('Email source') verbose_name_plural = _('Email sources') - def clean(self): - if self.subject_metadata_type: - if self.subject_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): - raise ValidationError( - { - 'subject_metadata_type': _( - 'Subject metadata type "%(metadata_type)s" is not ' - 'valid for the document type: %(document_type)s' - ) % { - 'metadata_type': self.subject_metadata_type, - 'document_type': self.document_type - } - } - ) - - if self.from_metadata_type: - if self.from_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): - raise ValidationError( - { - 'from_metadata_type': _( - '"From" metadata type "%(metadata_type)s" is not ' - 'valid for the document type: %(document_type)s' - ) % { - 'metadata_type': self.from_metadata_type, - 'document_type': self.document_type - } - } - ) - @staticmethod def process_message(source, message_text, message_properties=None): from flanker import mime @@ -194,6 +165,35 @@ class EmailBaseModel(IntervalBaseModel): metadata_dictionary=metadata_dictionary ) + def clean(self): + if self.subject_metadata_type: + if self.subject_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): + raise ValidationError( + { + 'subject_metadata_type': _( + 'Subject metadata type "%(metadata_type)s" is not ' + 'valid for the document type: %(document_type)s' + ) % { + 'metadata_type': self.subject_metadata_type, + 'document_type': self.document_type + } + } + ) + + if self.from_metadata_type: + if self.from_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): + raise ValidationError( + { + 'from_metadata_type': _( + '"From" metadata type "%(metadata_type)s" is not ' + 'valid for the document type: %(document_type)s' + ) % { + 'metadata_type': self.from_metadata_type, + 'document_type': self.document_type + } + } + ) + class IMAPEmail(EmailBaseModel): source_type = SOURCE_CHOICE_EMAIL_IMAP @@ -211,7 +211,7 @@ class IMAPEmail(EmailBaseModel): verbose_name_plural = _('IMAP email') # http://www.doughellmann.com/PyMOTW/imaplib/ - def check_source(self): + def check_source(self, test=False): logger.debug('Starting IMAP email fetch') logger.debug('host: %s', self.host) logger.debug('ssl: %s', self.ssl) @@ -235,7 +235,8 @@ class IMAPEmail(EmailBaseModel): EmailBaseModel.process_message( source=self, message_text=data[0][1] ) - mailbox.store(message_number, '+FLAGS', '\\Deleted') + if not test: + mailbox.store(message_number, '+FLAGS', '\\Deleted') mailbox.expunge() mailbox.close() @@ -255,7 +256,7 @@ class POP3Email(EmailBaseModel): verbose_name = _('POP email') verbose_name_plural = _('POP email') - def check_source(self): + def check_source(self, test=False): logger.debug('Starting POP3 email fetch') logger.debug('host: %s', self.host) logger.debug('ssl: %s', self.ssl) @@ -284,6 +285,7 @@ class POP3Email(EmailBaseModel): EmailBaseModel.process_message( source=self, message_text=complete_message ) - mailbox.dele(message_number) + if not test: + mailbox.dele(message_number) mailbox.quit() diff --git a/mayan/apps/sources/models/watch_folder_sources.py b/mayan/apps/sources/models/watch_folder_sources.py index 07d0b8e5ac..4addda6075 100644 --- a/mayan/apps/sources/models/watch_folder_sources.py +++ b/mayan/apps/sources/models/watch_folder_sources.py @@ -46,7 +46,7 @@ class WatchFolderSource(IntervalBaseModel): verbose_name = _('Watch folder') verbose_name_plural = _('Watch folders') - def check_source(self): + def check_source(self, test=False): path = Path(self.folder_path) if self.include_subdirectories: @@ -62,4 +62,5 @@ class WatchFolderSource(IntervalBaseModel): expand=(self.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y), label=entry.name ) - entry.unlink() + if not test: + entry.unlink() diff --git a/mayan/apps/sources/tasks.py b/mayan/apps/sources/tasks.py index b8a869f927..df995cebf2 100644 --- a/mayan/apps/sources/tasks.py +++ b/mayan/apps/sources/tasks.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) @app.task(ignore_result=True) -def task_check_interval_source(source_id): +def task_check_interval_source(source_id, test=False): Source = apps.get_model( app_label='sources', model_name='Source' ) @@ -37,8 +37,8 @@ def task_check_interval_source(source_id): try: source = Source.objects.get_subclass(pk=source_id) - if source.enabled: - source.check_source() + if source.enabled or test: + source.check_source(test=test) except Exception as exception: logger.error('Error processing source: %s; %s', source, exception) source.logs.create( @@ -50,54 +50,15 @@ def task_check_interval_source(source_id): lock.release() -@app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True) -def task_upload_document(self, source_id, document_type_id, shared_uploaded_file_id, description=None, label=None, language=None, querystring=None, user_id=None): - SharedUploadedFile = apps.get_model( - app_label='common', model_name='SharedUploadedFile' +@app.task() +def task_generate_staging_file_image(staging_folder_pk, encoded_filename, *args, **kwargs): + StagingFolderSource = apps.get_model( + app_label='sources', model_name='StagingFolderSource' ) + staging_folder = StagingFolderSource.objects.get(pk=staging_folder_pk) + staging_file = staging_folder.get_file(encoded_filename=encoded_filename) - DocumentType = apps.get_model( - app_label='documents', model_name='DocumentType' - ) - - Source = apps.get_model( - app_label='sources', model_name='Source' - ) - - try: - document_type = DocumentType.objects.get(pk=document_type_id) - source = Source.objects.get_subclass(pk=source_id) - shared_upload = SharedUploadedFile.objects.get( - pk=shared_uploaded_file_id - ) - - if user_id: - user = get_user_model().objects.get(pk=user_id) - else: - user = None - - with shared_upload.open() as file_object: - source.upload_document( - file_object=file_object, document_type=document_type, - description=description, label=label, language=language, - querystring=querystring, user=user, - ) - - except OperationalError as exception: - logger.warning( - 'Operational exception while trying to create new document "%s" ' - 'from source id %d; %s. Retying.', - label or shared_upload.filename, source_id, exception - ) - raise self.retry(exc=exception) - else: - try: - shared_upload.delete() - except OperationalError as exception: - logger.warning( - 'Operational error during attempt to delete shared upload ' - 'file: %s; %s. Retrying.', shared_upload, exception - ) + return staging_file.generate_image(*args, **kwargs) @app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True) @@ -198,12 +159,51 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s ) -@app.task() -def task_generate_staging_file_image(staging_folder_pk, encoded_filename, *args, **kwargs): - StagingFolderSource = apps.get_model( - app_label='sources', model_name='StagingFolderSource' +@app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True) +def task_upload_document(self, source_id, document_type_id, shared_uploaded_file_id, description=None, label=None, language=None, querystring=None, user_id=None): + SharedUploadedFile = apps.get_model( + app_label='common', model_name='SharedUploadedFile' ) - staging_folder = StagingFolderSource.objects.get(pk=staging_folder_pk) - staging_file = staging_folder.get_file(encoded_filename=encoded_filename) - return staging_file.generate_image(*args, **kwargs) + DocumentType = apps.get_model( + app_label='documents', model_name='DocumentType' + ) + + Source = apps.get_model( + app_label='sources', model_name='Source' + ) + + try: + document_type = DocumentType.objects.get(pk=document_type_id) + source = Source.objects.get_subclass(pk=source_id) + shared_upload = SharedUploadedFile.objects.get( + pk=shared_uploaded_file_id + ) + + if user_id: + user = get_user_model().objects.get(pk=user_id) + else: + user = None + + with shared_upload.open() as file_object: + source.upload_document( + file_object=file_object, document_type=document_type, + description=description, label=label, language=language, + querystring=querystring, user=user, + ) + + except OperationalError as exception: + logger.warning( + 'Operational exception while trying to create new document "%s" ' + 'from source id %d; %s. Retying.', + label or shared_upload.filename, source_id, exception + ) + raise self.retry(exc=exception) + else: + try: + shared_upload.delete() + except OperationalError as exception: + logger.warning( + 'Operational error during attempt to delete shared upload ' + 'file: %s; %s. Retrying.', shared_upload, exception + ) diff --git a/mayan/apps/sources/views.py b/mayan/apps/sources/views.py index 2bd9794378..ae6de2ce5f 100644 --- a/mayan/apps/sources/views.py +++ b/mayan/apps/sources/views.py @@ -500,12 +500,20 @@ class SetupSourceCheckView(ConfirmView): Trigger the task_check_interval_source task for a given source to test/debug their configuration irrespective of the schedule task setup. """ - view_permission = permission_sources_setup_view + view_permission = permission_sources_setup_create def get_extra_context(self): return { 'object': self.get_object(), - 'title': _('Trigger check for source "%s"?') % self.get_object(), + 'subtitle': _( + 'This will execute the source check code even if the source ' + 'is not enabled. Sources that delete content after ' + 'downloading will not do so while being tested. Check the ' + 'source\'s error log for information during testing. A ' + 'successful test will clear the error log.' + ), 'title': _( + 'Trigger check for source "%s"?' + ) % self.get_object(), } def get_object(self): @@ -514,7 +522,7 @@ class SetupSourceCheckView(ConfirmView): def view_action(self): task_check_interval_source.apply_async( kwargs={ - 'source_id': self.get_object().pk + 'source_id': self.get_object().pk, 'test': True } )