Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2018-04-10 02:25:40 -04:00
parent 3618778737
commit 64ad07d95d
6 changed files with 86 additions and 73 deletions

View File

@@ -147,6 +147,7 @@
- Update rendering of the readonly multiselect widget to conform to Django's updated field class interface.
- Add warning when using SQLite as the database backend.
- Use Mailgun's flanker library to process the email sources.
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
2.7.3 (2017-09-11)
==================

View File

@@ -466,6 +466,7 @@ Other changes worth mentioning
- Make error messages persistent and increase the timeout of warning to 10 seconds.
- Improve rendering of the details form.
- Update rendering of the readonly multiselect widget to conform to Django's updated field class interface.
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
Removals

View File

@@ -59,6 +59,7 @@ SOURCE_CHOICES = (
(SOURCE_CHOICE_EMAIL_IMAP, _('IMAP email')),
)
DEFAULT_SOURCE_LOCK_EXPIRE = 600
DEFAULT_INTERVAL = 600
DEFAULT_METADATA_ATTACHMENT_NAME = 'metadata.yaml'
DEFAULT_POP3_TIMEOUT = 60

View File

@@ -551,10 +551,6 @@ class EmailBaseModel(IntervalBaseModel):
}
)
# TODO: Add lock to avoid running more than once concurrent same document
# download
# TODO: Use message ID for lock
@staticmethod
def process_message(source, message_text):
counter = 1

View File

@@ -10,8 +10,12 @@ from django.utils.translation import ugettext_lazy as _
from mayan.celery import app
from common.compressed_files import CompressedFile, NotACompressedFile
from lock_manager import LockError
from lock_manager.runtime import locking_backend
from .literals import DEFAULT_SOURCE_TASK_RETRY_DELAY
from .literals import (
DEFAULT_SOURCE_LOCK_EXPIRE, DEFAULT_SOURCE_TASK_RETRY_DELAY
)
logger = logging.getLogger(__name__)
@@ -22,10 +26,19 @@ def task_check_interval_source(source_id):
app_label='sources', model_name='Source'
)
source = Source.objects.get_subclass(pk=source_id)
if source.enabled:
lock_id = 'task_check_interval_source-%d' % source_id
try:
logger.debug('trying to acquire lock: %s', lock_id)
lock = locking_backend.acquire_lock(lock_id, DEFAULT_SOURCE_LOCK_EXPIRE)
except LockError:
logger.debug('unable to obtain lock: %s' % lock_id)
else:
logger.debug('acquired lock: %s', lock_id)
try:
source.check_source()
source = Source.objects.get_subclass(pk=source_id)
if source.enabled:
source.check_source()
except Exception as exception:
logger.error('Error processing source: %s; %s', source, exception)
source.logs.create(
@@ -33,6 +46,8 @@ def task_check_interval_source(source_id):
)
else:
source.logs.all().delete()
finally:
lock.release()
@app.task(bind=True, default_retry_delay=DEFAULT_SOURCE_TASK_RETRY_DELAY, ignore_result=True)

View File

@@ -23,71 +23,6 @@ from .literals import (
)
@override_settings(OCR_AUTO_OCR=False)
class UploadDocumentTestCase(BaseTestCase):
"""
Test creating documents
"""
def setUp(self):
super(UploadDocumentTestCase, self).setUp()
self.document_type = DocumentType.objects.create(
label=TEST_DOCUMENT_TYPE_LABEL
)
def tearDown(self):
self.document_type.delete()
super(UploadDocumentTestCase, self).tearDown()
def test_issue_gh_163(self):
"""
Non-ASCII chars in document name failing in upload via watch folder
gh-issue #163 https://github.com/mayan-edms/mayan-edms/issues/163
"""
temporary_directory = mkdtemp()
shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, temporary_directory)
watch_folder = WatchFolderSource.objects.create(
document_type=self.document_type, folder_path=temporary_directory,
uncompress=SOURCE_UNCOMPRESS_CHOICE_Y
)
watch_folder.check_source()
self.assertEqual(Document.objects.count(), 1)
document = Document.objects.first()
self.assertEqual(document.exists(), True)
self.assertEqual(document.size, 17436)
self.assertEqual(document.file_mimetype, 'image/png')
self.assertEqual(document.file_mime_encoding, 'binary')
self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME)
self.assertEqual(document.page_count, 1)
# Test Non-ASCII named documents inside Non-ASCII named compressed file
shutil.copy(
TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH, temporary_directory
)
watch_folder.check_source()
document = Document.objects.all()[1]
self.assertEqual(Document.objects.count(), 2)
self.assertEqual(document.exists(), True)
self.assertEqual(document.size, 17436)
self.assertEqual(document.file_mimetype, 'image/png')
self.assertEqual(document.file_mime_encoding, 'binary')
self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME)
self.assertEqual(document.page_count, 1)
shutil.rmtree(temporary_directory)
@override_settings(OCR_AUTO_OCR=False)
class CompressedUploadsTestCase(BaseTestCase):
def setUp(self):
@@ -189,3 +124,67 @@ class EmailFilenameDecodingTestCase(BaseTestCase):
'<Document: test-02.png>'
),
)
@override_settings(OCR_AUTO_OCR=False)
class UploadDocumentTestCase(BaseTestCase):
"""
Test creating documents
"""
def setUp(self):
super(UploadDocumentTestCase, self).setUp()
self.document_type = DocumentType.objects.create(
label=TEST_DOCUMENT_TYPE_LABEL
)
def tearDown(self):
self.document_type.delete()
super(UploadDocumentTestCase, self).tearDown()
def test_issue_gh_163(self):
"""
Non-ASCII chars in document name failing in upload via watch folder
gh-issue #163 https://github.com/mayan-edms/mayan-edms/issues/163
"""
temporary_directory = mkdtemp()
shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, temporary_directory)
watch_folder = WatchFolderSource.objects.create(
document_type=self.document_type, folder_path=temporary_directory,
uncompress=SOURCE_UNCOMPRESS_CHOICE_Y
)
watch_folder.check_source()
self.assertEqual(Document.objects.count(), 1)
document = Document.objects.first()
self.assertEqual(document.exists(), True)
self.assertEqual(document.size, 17436)
self.assertEqual(document.file_mimetype, 'image/png')
self.assertEqual(document.file_mime_encoding, 'binary')
self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME)
self.assertEqual(document.page_count, 1)
# Test Non-ASCII named documents inside Non-ASCII named compressed file
shutil.copy(
TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH, temporary_directory
)
watch_folder.check_source()
document = Document.objects.all()[1]
self.assertEqual(Document.objects.count(), 2)
self.assertEqual(document.exists(), True)
self.assertEqual(document.size, 17436)
self.assertEqual(document.file_mimetype, 'image/png')
self.assertEqual(document.file_mime_encoding, 'binary')
self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME)
self.assertEqual(document.page_count, 1)
shutil.rmtree(temporary_directory)