diff --git a/HISTORY.rst b/HISTORY.rst index e9dc7a1ca2..aea9d48e8f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,9 @@ * Split sources models into separate modules * Add support for subfolder scanning to watchfolders. Closes GitLab issue #498 and #563. +* Updated the source check behavior to allow checking a source + even when the source is disabled and to not deleted processed files + during a check. 3.1.10 (2019-04-04) =================== diff --git a/mayan/apps/sources/models/email_sources.py b/mayan/apps/sources/models/email_sources.py index 95f0ef21fd..cea2bf1a15 100644 --- a/mayan/apps/sources/models/email_sources.py +++ b/mayan/apps/sources/models/email_sources.py @@ -80,35 +80,6 @@ class EmailBaseModel(IntervalBaseModel): verbose_name = _('Email source') verbose_name_plural = _('Email sources') - def clean(self): - if self.subject_metadata_type: - if self.subject_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): - raise ValidationError( - { - 'subject_metadata_type': _( - 'Subject metadata type "%(metadata_type)s" is not ' - 'valid for the document type: %(document_type)s' - ) % { - 'metadata_type': self.subject_metadata_type, - 'document_type': self.document_type - } - } - ) - - if self.from_metadata_type: - if self.from_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): - raise ValidationError( - { - 'from_metadata_type': _( - '"From" metadata type "%(metadata_type)s" is not ' - 'valid for the document type: %(document_type)s' - ) % { - 'metadata_type': self.from_metadata_type, - 'document_type': self.document_type - } - } - ) - @staticmethod def process_message(source, message_text, message_properties=None): from flanker import mime @@ -199,6 +170,35 @@ class EmailBaseModel(IntervalBaseModel): metadata_dictionary=metadata_dictionary ) + def clean(self): + if self.subject_metadata_type: + if self.subject_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): + raise ValidationError( + { + 'subject_metadata_type': _( + 'Subject metadata type "%(metadata_type)s" is not ' + 'valid for the document type: %(document_type)s' + ) % { + 'metadata_type': self.subject_metadata_type, + 'document_type': self.document_type + } + } + ) + + if self.from_metadata_type: + if self.from_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True): + raise ValidationError( + { + 'from_metadata_type': _( + '"From" metadata type "%(metadata_type)s" is not ' + 'valid for the document type: %(document_type)s' + ) % { + 'metadata_type': self.from_metadata_type, + 'document_type': self.document_type + } + } + ) + class IMAPEmail(EmailBaseModel): source_type = SOURCE_CHOICE_EMAIL_IMAP @@ -216,7 +216,7 @@ class IMAPEmail(EmailBaseModel): verbose_name_plural = _('IMAP email') # http://www.doughellmann.com/PyMOTW/imaplib/ - def check_source(self): + def check_source(self, test=False): logger.debug('Starting IMAP email fetch') logger.debug('host: %s', self.host) logger.debug('ssl: %s', self.ssl) @@ -240,7 +240,8 @@ class IMAPEmail(EmailBaseModel): EmailBaseModel.process_message( source=self, message_text=data[0][1] ) - mailbox.store(message_number, '+FLAGS', '\\Deleted') + if not test: + mailbox.store(message_number, '+FLAGS', '\\Deleted') mailbox.expunge() mailbox.close() @@ -260,7 +261,7 @@ class POP3Email(EmailBaseModel): verbose_name = _('POP email') verbose_name_plural = _('POP email') - def check_source(self): + def check_source(self, test=False): logger.debug('Starting POP3 email fetch') logger.debug('host: %s', self.host) logger.debug('ssl: %s', self.ssl) @@ -289,6 +290,7 @@ class POP3Email(EmailBaseModel): EmailBaseModel.process_message( source=self, message_text=complete_message ) - mailbox.dele(message_number) + if not test: + mailbox.dele(message_number) mailbox.quit() diff --git a/mayan/apps/sources/models/watch_folder_sources.py b/mayan/apps/sources/models/watch_folder_sources.py index 07d0b8e5ac..4addda6075 100644 --- a/mayan/apps/sources/models/watch_folder_sources.py +++ b/mayan/apps/sources/models/watch_folder_sources.py @@ -46,7 +46,7 @@ class WatchFolderSource(IntervalBaseModel): verbose_name = _('Watch folder') verbose_name_plural = _('Watch folders') - def check_source(self): + def check_source(self, test=False): path = Path(self.folder_path) if self.include_subdirectories: @@ -62,4 +62,5 @@ class WatchFolderSource(IntervalBaseModel): expand=(self.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y), label=entry.name ) - entry.unlink() + if not test: + entry.unlink() diff --git a/mayan/apps/sources/tasks.py b/mayan/apps/sources/tasks.py index e922e8a00c..3dfa1234c1 100644 --- a/mayan/apps/sources/tasks.py +++ b/mayan/apps/sources/tasks.py @@ -22,7 +22,7 @@ logger = logging.getLogger(__name__) @app.task(ignore_result=True) -def task_check_interval_source(source_id): +def task_check_interval_source(source_id, test=False): Source = apps.get_model( app_label='sources', model_name='Source' ) @@ -38,8 +38,8 @@ def task_check_interval_source(source_id): try: source = Source.objects.get_subclass(pk=source_id) - if source.enabled: - source.check_source() + if source.enabled or test: + source.check_source(test=test) except Exception as exception: logger.error('Error processing source: %s; %s', source, exception) source.logs.create( @@ -51,54 +51,15 @@ def task_check_interval_source(source_id): lock.release() -@app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True) -def task_upload_document(self, source_id, document_type_id, shared_uploaded_file_id, description=None, label=None, language=None, querystring=None, user_id=None): - SharedUploadedFile = apps.get_model( - app_label='common', model_name='SharedUploadedFile' +@app.task() +def task_generate_staging_file_image(staging_folder_pk, encoded_filename, *args, **kwargs): + StagingFolderSource = apps.get_model( + app_label='sources', model_name='StagingFolderSource' ) + staging_folder = StagingFolderSource.objects.get(pk=staging_folder_pk) + staging_file = staging_folder.get_file(encoded_filename=encoded_filename) - DocumentType = apps.get_model( - app_label='documents', model_name='DocumentType' - ) - - Source = apps.get_model( - app_label='sources', model_name='Source' - ) - - try: - document_type = DocumentType.objects.get(pk=document_type_id) - source = Source.objects.get_subclass(pk=source_id) - shared_upload = SharedUploadedFile.objects.get( - pk=shared_uploaded_file_id - ) - - if user_id: - user = get_user_model().objects.get(pk=user_id) - else: - user = None - - with shared_upload.open() as file_object: - source.upload_document( - file_object=file_object, document_type=document_type, - description=description, label=label, language=language, - querystring=querystring, user=user, - ) - - except OperationalError as exception: - logger.warning( - 'Operational exception while trying to create new document "%s" ' - 'from source id %d; %s. Retying.', - label or shared_upload.filename, source_id, exception - ) - raise self.retry(exc=exception) - else: - try: - shared_upload.delete() - except OperationalError as exception: - logger.warning( - 'Operational error during attempt to delete shared upload ' - 'file: %s; %s. Retrying.', shared_upload, exception - ) + return staging_file.generate_image(*args, **kwargs) @app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True) @@ -199,12 +160,51 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s ) -@app.task() -def task_generate_staging_file_image(staging_folder_pk, encoded_filename, *args, **kwargs): - StagingFolderSource = apps.get_model( - app_label='sources', model_name='StagingFolderSource' +@app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True) +def task_upload_document(self, source_id, document_type_id, shared_uploaded_file_id, description=None, label=None, language=None, querystring=None, user_id=None): + SharedUploadedFile = apps.get_model( + app_label='common', model_name='SharedUploadedFile' ) - staging_folder = StagingFolderSource.objects.get(pk=staging_folder_pk) - staging_file = staging_folder.get_file(encoded_filename=encoded_filename) - return staging_file.generate_image(*args, **kwargs) + DocumentType = apps.get_model( + app_label='documents', model_name='DocumentType' + ) + + Source = apps.get_model( + app_label='sources', model_name='Source' + ) + + try: + document_type = DocumentType.objects.get(pk=document_type_id) + source = Source.objects.get_subclass(pk=source_id) + shared_upload = SharedUploadedFile.objects.get( + pk=shared_uploaded_file_id + ) + + if user_id: + user = get_user_model().objects.get(pk=user_id) + else: + user = None + + with shared_upload.open() as file_object: + source.upload_document( + file_object=file_object, document_type=document_type, + description=description, label=label, language=language, + querystring=querystring, user=user, + ) + + except OperationalError as exception: + logger.warning( + 'Operational exception while trying to create new document "%s" ' + 'from source id %d; %s. Retying.', + label or shared_upload.filename, source_id, exception + ) + raise self.retry(exc=exception) + else: + try: + shared_upload.delete() + except OperationalError as exception: + logger.warning( + 'Operational error during attempt to delete shared upload ' + 'file: %s; %s. Retrying.', shared_upload, exception + ) diff --git a/mayan/apps/sources/views.py b/mayan/apps/sources/views.py index dd63452cb7..d38b99fa92 100644 --- a/mayan/apps/sources/views.py +++ b/mayan/apps/sources/views.py @@ -502,12 +502,20 @@ class SetupSourceCheckView(ConfirmView): Trigger the task_check_interval_source task for a given source to test/debug their configuration irrespective of the schedule task setup. """ - view_permission = permission_sources_setup_view + view_permission = permission_sources_setup_create def get_extra_context(self): return { 'object': self.get_object(), - 'title': _('Trigger check for source "%s"?') % self.get_object(), + 'subtitle': _( + 'This will execute the source check code even if the source ' + 'is not enabled. Sources that delete content after ' + 'downloading will not do so while being tested. Check the ' + 'source\'s error log for information during testing. A ' + 'successful test will clear the error log.' + ), 'title': _( + 'Trigger check for source "%s"?' + ) % self.get_object(), } def get_object(self): @@ -516,7 +524,7 @@ class SetupSourceCheckView(ConfirmView): def view_action(self): task_check_interval_source.apply_async( kwargs={ - 'source_id': self.get_object().pk + 'source_id': self.get_object().pk, 'test': True } )