Source: Change source test behavior

Update sourcs to accept a test argument to their check methods.
This is to allow for explicit test behavior like running the
check method code even when the source is disabled and to
not deleted downloaded content during a test.

Signed-off-by: Roberto Rosario <Roberto.Rosario@mayan-edms.com>
This commit is contained in:
Roberto Rosario
2018-12-18 17:25:28 -04:00
parent 7672aca7a9
commit f77f64cc71
5 changed files with 108 additions and 94 deletions

View File

@@ -199,6 +199,9 @@
The result of the template tag is applied as the title property
of the H3 HTML tag allowing users to view the full title on
mouse hover if it was truncated.
- Updated the source test behavior to allow testing a source
even when the source is disabled and to not deleted downloaded
content during a test.
3.1.9 (2018-11-01)
==================

View File

@@ -80,35 +80,6 @@ class EmailBaseModel(IntervalBaseModel):
verbose_name = _('Email source')
verbose_name_plural = _('Email sources')
def clean(self):
if self.subject_metadata_type:
if self.subject_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True):
raise ValidationError(
{
'subject_metadata_type': _(
'Subject metadata type "%(metadata_type)s" is not '
'valid for the document type: %(document_type)s'
) % {
'metadata_type': self.subject_metadata_type,
'document_type': self.document_type
}
}
)
if self.from_metadata_type:
if self.from_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True):
raise ValidationError(
{
'from_metadata_type': _(
'"From" metadata type "%(metadata_type)s" is not '
'valid for the document type: %(document_type)s'
) % {
'metadata_type': self.from_metadata_type,
'document_type': self.document_type
}
}
)
@staticmethod
def process_message(source, message_text, message_properties=None):
from flanker import mime
@@ -194,6 +165,35 @@ class EmailBaseModel(IntervalBaseModel):
metadata_dictionary=metadata_dictionary
)
def clean(self):
if self.subject_metadata_type:
if self.subject_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True):
raise ValidationError(
{
'subject_metadata_type': _(
'Subject metadata type "%(metadata_type)s" is not '
'valid for the document type: %(document_type)s'
) % {
'metadata_type': self.subject_metadata_type,
'document_type': self.document_type
}
}
)
if self.from_metadata_type:
if self.from_metadata_type.pk not in self.document_type.metadata.values_list('metadata_type', flat=True):
raise ValidationError(
{
'from_metadata_type': _(
'"From" metadata type "%(metadata_type)s" is not '
'valid for the document type: %(document_type)s'
) % {
'metadata_type': self.from_metadata_type,
'document_type': self.document_type
}
}
)
class IMAPEmail(EmailBaseModel):
source_type = SOURCE_CHOICE_EMAIL_IMAP
@@ -211,7 +211,7 @@ class IMAPEmail(EmailBaseModel):
verbose_name_plural = _('IMAP email')
# http://www.doughellmann.com/PyMOTW/imaplib/
def check_source(self):
def check_source(self, test=False):
logger.debug('Starting IMAP email fetch')
logger.debug('host: %s', self.host)
logger.debug('ssl: %s', self.ssl)
@@ -235,7 +235,8 @@ class IMAPEmail(EmailBaseModel):
EmailBaseModel.process_message(
source=self, message_text=data[0][1]
)
mailbox.store(message_number, '+FLAGS', '\\Deleted')
if not test:
mailbox.store(message_number, '+FLAGS', '\\Deleted')
mailbox.expunge()
mailbox.close()
@@ -255,7 +256,7 @@ class POP3Email(EmailBaseModel):
verbose_name = _('POP email')
verbose_name_plural = _('POP email')
def check_source(self):
def check_source(self, test=False):
logger.debug('Starting POP3 email fetch')
logger.debug('host: %s', self.host)
logger.debug('ssl: %s', self.ssl)
@@ -284,6 +285,7 @@ class POP3Email(EmailBaseModel):
EmailBaseModel.process_message(
source=self, message_text=complete_message
)
mailbox.dele(message_number)
if not test:
mailbox.dele(message_number)
mailbox.quit()

View File

@@ -46,7 +46,7 @@ class WatchFolderSource(IntervalBaseModel):
verbose_name = _('Watch folder')
verbose_name_plural = _('Watch folders')
def check_source(self):
def check_source(self, test=False):
path = Path(self.folder_path)
if self.include_subdirectories:
@@ -62,4 +62,5 @@ class WatchFolderSource(IntervalBaseModel):
expand=(self.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y),
label=entry.name
)
entry.unlink()
if not test:
entry.unlink()

View File

@@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)
@app.task(ignore_result=True)
def task_check_interval_source(source_id):
def task_check_interval_source(source_id, test=False):
Source = apps.get_model(
app_label='sources', model_name='Source'
)
@@ -37,8 +37,8 @@ def task_check_interval_source(source_id):
try:
source = Source.objects.get_subclass(pk=source_id)
if source.enabled:
source.check_source()
if source.enabled or test:
source.check_source(test=test)
except Exception as exception:
logger.error('Error processing source: %s; %s', source, exception)
source.logs.create(
@@ -50,54 +50,15 @@ def task_check_interval_source(source_id):
lock.release()
@app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True)
def task_upload_document(self, source_id, document_type_id, shared_uploaded_file_id, description=None, label=None, language=None, querystring=None, user_id=None):
SharedUploadedFile = apps.get_model(
app_label='common', model_name='SharedUploadedFile'
@app.task()
def task_generate_staging_file_image(staging_folder_pk, encoded_filename, *args, **kwargs):
StagingFolderSource = apps.get_model(
app_label='sources', model_name='StagingFolderSource'
)
staging_folder = StagingFolderSource.objects.get(pk=staging_folder_pk)
staging_file = staging_folder.get_file(encoded_filename=encoded_filename)
DocumentType = apps.get_model(
app_label='documents', model_name='DocumentType'
)
Source = apps.get_model(
app_label='sources', model_name='Source'
)
try:
document_type = DocumentType.objects.get(pk=document_type_id)
source = Source.objects.get_subclass(pk=source_id)
shared_upload = SharedUploadedFile.objects.get(
pk=shared_uploaded_file_id
)
if user_id:
user = get_user_model().objects.get(pk=user_id)
else:
user = None
with shared_upload.open() as file_object:
source.upload_document(
file_object=file_object, document_type=document_type,
description=description, label=label, language=language,
querystring=querystring, user=user,
)
except OperationalError as exception:
logger.warning(
'Operational exception while trying to create new document "%s" '
'from source id %d; %s. Retying.',
label or shared_upload.filename, source_id, exception
)
raise self.retry(exc=exception)
else:
try:
shared_upload.delete()
except OperationalError as exception:
logger.warning(
'Operational error during attempt to delete shared upload '
'file: %s; %s. Retrying.', shared_upload, exception
)
return staging_file.generate_image(*args, **kwargs)
@app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True)
@@ -198,12 +159,51 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s
)
@app.task()
def task_generate_staging_file_image(staging_folder_pk, encoded_filename, *args, **kwargs):
StagingFolderSource = apps.get_model(
app_label='sources', model_name='StagingFolderSource'
@app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True)
def task_upload_document(self, source_id, document_type_id, shared_uploaded_file_id, description=None, label=None, language=None, querystring=None, user_id=None):
SharedUploadedFile = apps.get_model(
app_label='common', model_name='SharedUploadedFile'
)
staging_folder = StagingFolderSource.objects.get(pk=staging_folder_pk)
staging_file = staging_folder.get_file(encoded_filename=encoded_filename)
return staging_file.generate_image(*args, **kwargs)
DocumentType = apps.get_model(
app_label='documents', model_name='DocumentType'
)
Source = apps.get_model(
app_label='sources', model_name='Source'
)
try:
document_type = DocumentType.objects.get(pk=document_type_id)
source = Source.objects.get_subclass(pk=source_id)
shared_upload = SharedUploadedFile.objects.get(
pk=shared_uploaded_file_id
)
if user_id:
user = get_user_model().objects.get(pk=user_id)
else:
user = None
with shared_upload.open() as file_object:
source.upload_document(
file_object=file_object, document_type=document_type,
description=description, label=label, language=language,
querystring=querystring, user=user,
)
except OperationalError as exception:
logger.warning(
'Operational exception while trying to create new document "%s" '
'from source id %d; %s. Retying.',
label or shared_upload.filename, source_id, exception
)
raise self.retry(exc=exception)
else:
try:
shared_upload.delete()
except OperationalError as exception:
logger.warning(
'Operational error during attempt to delete shared upload '
'file: %s; %s. Retrying.', shared_upload, exception
)

View File

@@ -500,12 +500,20 @@ class SetupSourceCheckView(ConfirmView):
Trigger the task_check_interval_source task for a given source to
test/debug their configuration irrespective of the schedule task setup.
"""
view_permission = permission_sources_setup_view
view_permission = permission_sources_setup_create
def get_extra_context(self):
return {
'object': self.get_object(),
'title': _('Trigger check for source "%s"?') % self.get_object(),
'subtitle': _(
'This will execute the source check code even if the source '
'is not enabled. Sources that delete content after '
'downloading will not do so while being tested. Check the '
'source\'s error log for information during testing. A '
'successful test will clear the error log.'
), 'title': _(
'Trigger check for source "%s"?'
) % self.get_object(),
}
def get_object(self):
@@ -514,7 +522,7 @@ class SetupSourceCheckView(ConfirmView):
def view_action(self):
task_check_interval_source.apply_async(
kwargs={
'source_id': self.get_object().pk
'source_id': self.get_object().pk, 'test': True
}
)