From 8a70e325c185fd6136e801799fbb452821f8b91a Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 22 Nov 2011 15:03:06 -0400 Subject: [PATCH 01/10] Simplyfied and updated the lock manager app --- apps/lock_manager/admin.py | 10 +++++++ apps/lock_manager/conf/settings.py | 2 +- apps/lock_manager/managers.py | 46 +++++++++++------------------- apps/lock_manager/models.py | 21 ++++++++++---- apps/lock_manager/tests.py | 7 ----- 5 files changed, 44 insertions(+), 42 deletions(-) create mode 100644 apps/lock_manager/admin.py diff --git a/apps/lock_manager/admin.py b/apps/lock_manager/admin.py new file mode 100644 index 0000000000..12dbee8fe1 --- /dev/null +++ b/apps/lock_manager/admin.py @@ -0,0 +1,10 @@ +from django.contrib import admin + +from lock_manager.models import Lock + + +class LockAdmin(admin.ModelAdmin): + model = Lock + + +admin.site.register(Lock, LockAdmin) diff --git a/apps/lock_manager/conf/settings.py b/apps/lock_manager/conf/settings.py index 05f4ae374c..869e121f4d 100644 --- a/apps/lock_manager/conf/settings.py +++ b/apps/lock_manager/conf/settings.py @@ -1,5 +1,5 @@ from django.conf import settings -DEFAULT_LOCK_TIMEOUT_VALUE = 10 +DEFAULT_LOCK_TIMEOUT_VALUE = 30 DEFAULT_LOCK_TIMEOUT = getattr(settings, 'LOCK_MANAGER_DEFAULT_LOCK_TIMEOUT', DEFAULT_LOCK_TIMEOUT_VALUE) diff --git a/apps/lock_manager/managers.py b/apps/lock_manager/managers.py index 96c57ca02b..206df912b6 100644 --- a/apps/lock_manager/managers.py +++ b/apps/lock_manager/managers.py @@ -1,9 +1,4 @@ -try: - from psycopg2 import OperationalError -except ImportError: - class OperationalError(Exception): - pass - +import logging import datetime from django.db.utils import DatabaseError @@ -13,40 +8,33 @@ from django.db import models from lock_manager.exceptions import LockError +logger = logging.getLogger(__name__) + class LockManager(models.Manager): - @transaction.commit_manually + @transaction.commit_on_success def acquire_lock(self, name, timeout=None): + logger.debug('DEBUG: trying to acquire lock: %s' % name) lock = self.model(name=name, timeout=timeout) try: lock.save(force_insert=True) + logger.debug('DEBUG: acquired lock: %s' % name) + return lock except IntegrityError: - transaction.rollback() # There is already an existing lock - # Check it's expiration date and if expired, delete it and - # create it again - lock = self.model.objects.get(name=name) - transaction.rollback() + # Check it's expiration date and if expired, reset it + try: + lock = self.model.objects.get(name=name) + except self.model.DoesNotExist: + # Table based locking + logger.debug('DEBUG: lock: %s does not exist' % name) + raise LockError('Unable to acquire lock') if datetime.datetime.now() > lock.creation_datetime + datetime.timedelta(seconds=lock.timeout): - self.release_lock(name) + logger.debug('DEBUG: reseting deleting stale lock: %s' % name) lock.timeout=timeout + logger.debug('DEBUG: try to reacquire stale lock: %s' % name) lock.save() - transaction.commit() + return lock else: raise LockError('Unable to acquire lock') - except DatabaseError: - transaction.rollback() - # Special case for ./manage.py syncdb - except (OperationalError, ImproperlyConfigured): - transaction.rollback() - # Special for DjangoZoom, which executes collectstatic media - # doing syncdb and creating the database tables - else: - transaction.commit() - - @transaction.commit_manually - def release_lock(self, name): - lock = self.model.objects.get(name=name) - lock.delete() - transaction.commit() diff --git a/apps/lock_manager/models.py b/apps/lock_manager/models.py index 53c6b6e49d..535d3366a6 100644 --- a/apps/lock_manager/models.py +++ b/apps/lock_manager/models.py @@ -10,17 +10,28 @@ from lock_manager.conf.settings import DEFAULT_LOCK_TIMEOUT class Lock(models.Model): creation_datetime = models.DateTimeField(verbose_name=_(u'creation datetime')) timeout = models.IntegerField(default=DEFAULT_LOCK_TIMEOUT, verbose_name=_(u'timeout')) - name = models.CharField(max_length=32, verbose_name=_(u'name'), unique=True) - + name = models.CharField(max_length=48, verbose_name=_(u'name'), unique=True) + objects = LockManager() - + def __unicode__(self): return self.name - + def save(self, *args, **kwargs): self.creation_datetime = datetime.datetime.now() + if not self.timeout and not kwarget.get('timeout'): + self.timeout = DEFAULT_LOCK_TIMEOUT + super(Lock, self).save(*args, **kwargs) - + + def release(self): + try: + lock = Lock.objects.get(name=self.name, creation_datetime=self.creation_datetime) + lock.delete() + except Lock.DoesNotExist: + # Out lock expired and was reassigned + pass + class Meta: verbose_name = _(u'lock') verbose_name_plural = _(u'locks') diff --git a/apps/lock_manager/tests.py b/apps/lock_manager/tests.py index 501deb776c..140393d027 100644 --- a/apps/lock_manager/tests.py +++ b/apps/lock_manager/tests.py @@ -1,10 +1,3 @@ -""" -This file demonstrates writing tests using the unittest module. These will pass -when you run "manage.py test". - -Replace this with more appropriate tests for your application. -""" - from django.test import TestCase From c65b6a0309acbbd431a91ae5ff93de89f2934c62 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 22 Nov 2011 15:06:21 -0400 Subject: [PATCH 02/10] Added aditional loggin to the locking app --- apps/lock_manager/managers.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/apps/lock_manager/managers.py b/apps/lock_manager/managers.py index 206df912b6..6c69fada19 100644 --- a/apps/lock_manager/managers.py +++ b/apps/lock_manager/managers.py @@ -14,11 +14,11 @@ logger = logging.getLogger(__name__) class LockManager(models.Manager): @transaction.commit_on_success def acquire_lock(self, name, timeout=None): - logger.debug('DEBUG: trying to acquire lock: %s' % name) + logger.debug('trying to acquire lock: %s' % name) lock = self.model(name=name, timeout=timeout) try: lock.save(force_insert=True) - logger.debug('DEBUG: acquired lock: %s' % name) + logger.debug('acquired lock: %s' % name) return lock except IntegrityError: # There is already an existing lock @@ -27,14 +27,15 @@ class LockManager(models.Manager): lock = self.model.objects.get(name=name) except self.model.DoesNotExist: # Table based locking - logger.debug('DEBUG: lock: %s does not exist' % name) + logger.debug('lock: %s does not exist' % name) raise LockError('Unable to acquire lock') if datetime.datetime.now() > lock.creation_datetime + datetime.timedelta(seconds=lock.timeout): - logger.debug('DEBUG: reseting deleting stale lock: %s' % name) + logger.debug('reseting deleting stale lock: %s' % name) lock.timeout=timeout - logger.debug('DEBUG: try to reacquire stale lock: %s' % name) + logger.debug('try to reacquire stale lock: %s' % name) lock.save() return lock else: + logger.debug('unable to acquire lock: %s' % name) raise LockError('Unable to acquire lock') From 614ece827f15aae2de01dae12de3998f57a9a898 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 22 Nov 2011 15:06:33 -0400 Subject: [PATCH 03/10] Updated FAQ with unoconv information --- docs/faq.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/faq.rst b/docs/faq.rst index e017c806b1..241e76b4a2 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -119,3 +119,13 @@ How to enable x-sendile support for ``Apache`` XSendFile on XSendFileAllowAbove on + + +The included version of ``unoconv`` in my distribution is too old +------------------------------------------------------------- + + * Only the file 'unoconv' file from https://github.com/dagwieers/unoconv is needed. + Put it in a user designated directory for binaries such as /usr/local/bin and + setup Mayan's configuration option in your settings_local.py file like this:: + + CONVERTER_UNOCONV_PATH = '/usr/local/bin/unoconv' From a6151fd9e5c5205b217f6cfae6dc661e936623ec Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 22 Nov 2011 15:07:09 -0400 Subject: [PATCH 04/10] Added non working memcache backend to the lock manager app --- apps/lock_manager/backend_memcached.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 apps/lock_manager/backend_memcached.py diff --git a/apps/lock_manager/backend_memcached.py b/apps/lock_manager/backend_memcached.py new file mode 100644 index 0000000000..8458fbb395 --- /dev/null +++ b/apps/lock_manager/backend_memcached.py @@ -0,0 +1,16 @@ +from django.core.cache import get_cache + +if CACHE_URI: + try: + cache_backend = get_cache(CACHE_URI) + except ImportError: + # TODO: display or log error + cache_backend = None +else: + cache_backend = None +if cache_backend: + acquire_lock = lambda lock_id: cache_backend.add(lock_id, u'true', LOCK_EXPIRE) + release_lock = lambda lock_id: cache_backend.delete(lock_id) +else: + acquire_lock = lambda lock_id: True + release_lock = lambda lock_id: True From c9e8f2fac0ec8d49ef06f4a007e3c38bbf937239 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 22 Nov 2011 15:07:29 -0400 Subject: [PATCH 05/10] Updated the ocr app to use the lock manager --- apps/ocr/tasks.py | 44 +++++++++++--------------------------------- 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/apps/ocr/tasks.py b/apps/ocr/tasks.py index dee3d64da9..125d8fa2d0 100644 --- a/apps/ocr/tasks.py +++ b/apps/ocr/tasks.py @@ -4,9 +4,10 @@ from time import sleep from random import random from django.db.models import Q -from django.core.cache import get_cache from job_processor.api import process_job +from lock_manager.models import Lock +from lock_manager.exceptions import LockError from ocr.api import do_document_ocr from ocr.literals import QUEUEDOCUMENT_STATE_PENDING, \ @@ -21,36 +22,13 @@ from ocr.conf.settings import QUEUE_PROCESSING_INTERVAL LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes # TODO: Tie LOCK_EXPIRATION with hard task timeout -if CACHE_URI: - try: - cache_backend = get_cache(CACHE_URI) - except ImportError: - # TODO: display or log error - cache_backend = None -else: - cache_backend = None - - -def random_delay(): - sleep(random() * (QUEUE_PROCESSING_INTERVAL - 1)) - return True - - -if cache_backend: - acquire_lock = lambda lock_id: cache_backend.add(lock_id, u'true', LOCK_EXPIRE) - release_lock = lambda lock_id: cache_backend.delete(lock_id) -else: - acquire_lock = lambda lock_id: True - release_lock = lambda lock_id: True - - def task_process_queue_document(queue_document_id): lock_id = u'%s-lock-%d' % (u'task_process_queue_document', queue_document_id) - if acquire_lock(lock_id): + try: + lock = Lock.objects.acquire_lock(lock_id, LOCK_EXPIRE) queue_document = QueueDocument.objects.get(pk=queue_document_id) queue_document.state = QUEUEDOCUMENT_STATE_PROCESSING queue_document.node_name = platform.node() - #queue_document.result = task_process_queue_document.request.id queue_document.save() try: do_document_ocr(queue_document) @@ -59,7 +37,10 @@ def task_process_queue_document(queue_document_id): queue_document.state = QUEUEDOCUMENT_STATE_ERROR queue_document.result = e queue_document.save() - release_lock(lock_id) + + lock.release() + except LockError: + pass def reset_orphans(): @@ -86,11 +67,9 @@ def reset_orphans(): orphan.node_name = None orphan.save() ''' - + def task_process_document_queues(): - if not cache_backend: - random_delay() # reset_orphans() # Causes problems with big clusters increased latency # Disabled until better solution @@ -108,8 +87,7 @@ def task_process_document_queues(): if oldest_queued_document_qs: oldest_queued_document = oldest_queued_document_qs.order_by('datetime_submitted')[0] - #task_process_queue_document.delay(oldest_queued_document.pk) - #task_process_queue_document(oldest_queued_document.pk) process_job(task_process_queue_document, oldest_queued_document.pk) except Exception, e: - print 'DocumentQueueWatcher exception: %s' % e + pass + #print 'DocumentQueueWatcher exception: %s' % e From 78685b9fc57fb930bdecfce4927503e89a48603a Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 22 Nov 2011 15:22:20 -0400 Subject: [PATCH 06/10] Reduce the ocr lock name size --- apps/ocr/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/ocr/tasks.py b/apps/ocr/tasks.py index 125d8fa2d0..74fb240867 100644 --- a/apps/ocr/tasks.py +++ b/apps/ocr/tasks.py @@ -23,7 +23,7 @@ LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes # TODO: Tie LOCK_EXPIRATION with hard task timeout def task_process_queue_document(queue_document_id): - lock_id = u'%s-lock-%d' % (u'task_process_queue_document', queue_document_id) + lock_id = u'task_proc_queue_doc-%d' % queue_document_id try: lock = Lock.objects.acquire_lock(lock_id, LOCK_EXPIRE) queue_document = QueueDocument.objects.get(pk=queue_document_id) From 1367fb9c66458318fb33fcbda07c149e766f2ace Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 22 Nov 2011 15:41:32 -0400 Subject: [PATCH 07/10] Added lock manager abstraction --- apps/lock_manager/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/apps/lock_manager/__init__.py b/apps/lock_manager/__init__.py index e69de29bb2..34913f856f 100644 --- a/apps/lock_manager/__init__.py +++ b/apps/lock_manager/__init__.py @@ -0,0 +1,4 @@ +from lock_manager.exceptions import LockError +from lock_manager.models import Lock as LockModel + +Lock = LockModel.objects From dc63c3225e44379bf038bd317ac10c1288dbc7a4 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 22 Nov 2011 15:42:04 -0400 Subject: [PATCH 08/10] Updated ocr task to use the new lock manager abstracted class --- apps/ocr/tasks.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/apps/ocr/tasks.py b/apps/ocr/tasks.py index 74fb240867..06f55bd69d 100644 --- a/apps/ocr/tasks.py +++ b/apps/ocr/tasks.py @@ -6,8 +6,7 @@ from random import random from django.db.models import Q from job_processor.api import process_job -from lock_manager.models import Lock -from lock_manager.exceptions import LockError +from lock_manager import Lock, LockError from ocr.api import do_document_ocr from ocr.literals import QUEUEDOCUMENT_STATE_PENDING, \ @@ -25,7 +24,7 @@ LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes def task_process_queue_document(queue_document_id): lock_id = u'task_proc_queue_doc-%d' % queue_document_id try: - lock = Lock.objects.acquire_lock(lock_id, LOCK_EXPIRE) + lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) queue_document = QueueDocument.objects.get(pk=queue_document_id) queue_document.state = QUEUEDOCUMENT_STATE_PROCESSING queue_document.node_name = platform.node() From 290fcc925bedaccbd506ba536f3942015a3e92c7 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 22 Nov 2011 15:42:41 -0400 Subject: [PATCH 09/10] Added signal processing to the ocr queue to speed up ocr queue processing --- apps/ocr/__init__.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py index ccca4916ee..c6114148d3 100644 --- a/apps/ocr/__init__.py +++ b/apps/ocr/__init__.py @@ -4,12 +4,15 @@ except ImportError: class OperationalError(Exception): pass +import logging + from django.core.exceptions import ImproperlyConfigured from django.db import transaction from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext from django.db.utils import DatabaseError from django.db.models.signals import post_save +from django.dispatch import receiver from navigation.api import register_links, register_top_menu, register_multi_item_links from permissions.api import register_permission, set_namespace_title @@ -21,9 +24,11 @@ from scheduler.api import register_interval_job from ocr.conf.settings import AUTOMATIC_OCR from ocr.conf.settings import QUEUE_PROCESSING_INTERVAL -from ocr.models import DocumentQueue, QueueTransformation +from ocr.models import DocumentQueue, QueueTransformation, QueueDocument from ocr.tasks import task_process_document_queues +logger = logging.getLogger(__name__) + #Permissions PERMISSION_OCR_DOCUMENT = {'namespace': 'ocr', 'name': 'ocr_document', 'label': _(u'Submit document for OCR')} PERMISSION_OCR_DOCUMENT_DELETE = {'namespace': 'ocr', 'name': 'ocr_document_delete', 'label': _(u'Delete document for OCR queue')} @@ -99,6 +104,13 @@ def document_post_save(sender, instance, **kwargs): post_save.connect(document_post_save, sender=Document) + +@receiver(post_save, dispatch_uid='call_queue', sender=QueueDocument) +def call_queue(sender, **kwargs): + logger.debug('got call_queue signal') + task_process_document_queues() + + create_default_queue() register_interval_job('task_process_document_queues', _(u'Checks the OCR queue for pending documents.'), task_process_document_queues, seconds=QUEUE_PROCESSING_INTERVAL) From 95c300137ccc86a06bc67aabe314aa32d02cb0e0 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 22 Nov 2011 15:48:12 -0400 Subject: [PATCH 10/10] Updated changelog --- docs/changelog.rst | 75 ++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index d51e6768eb..761c7ea160 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,12 +1,12 @@ Version 0.10 ------------ -* Added a proper setup views for the document grouping functionality +* Added a proper setup views for the document grouping functionality. * Document grouping is now called smart linking as it relates better to how it actually works. The data base schema was changed and users must do the required:: - - $ ./manager syncdb - + + $ ./manager syncdb + for the new tables to be created. * Grappelli is no longer required as can be uninstalled. * New smarter document preview widget that doesn't allow zooming or viewing @@ -15,32 +15,38 @@ Version 0.10 * LibreOffice (https://www.libreoffice.org/) * unoconv [version 0.5] (https://github.com/dagwieers/unoconv) - -* The new office documents converter won't convert files with the extension - .docx becasue these files are recognized as zip files instead. This + +* The new office documents converter won't convert files with the extension + .docx because these files are recognized as zip files instead. This is an issue of the libmagic library. -* New configuration option added CONVERTER_UNOCONV_USE_PIPE that controls - how unoconv handles the communication with LibreOffice. The default of - `True` causes unoconv to use pipes, this approach is slower than using - TCP/IP ports but it is more stable. +* New configuration option added ``CONVERTER_UNOCONV_USE_PIPE`` that controls + how unoconv handles the communication with LibreOffice. The default of + ``True`` causes unoconv to use **pipes**, this approach is slower than using + **TCP/IP** ports but it is more stable. -* Initial REST API that exposes documents properties and one method, this - new API is used by the new smart document widget and requires the - packaged `djangorestframework`, users must issue a:: +* Initial `REST` `API` that exposes documents properties and one method, this + new `API` is used by the new smart document widget and requires the + package ``djangorestframework``, users must issue a:: $ pip install -r requirements/production.txt - to install the new requirement. + to install this new requirement. * MIME type detection and caching performance updates. -* Updated the included version of jQuery to 1.7 -* Updated the included version of JqueryAsynchImageLoader to 0.9.7 -* Document image serving response now specifies a MIME type for increased +* Updated the included version of ``jQuery`` to 1.7 +* Updated the included version of ``JqueryAsynchImageLoader`` to 0.9.7 +* Document image serving response now specifies a MIME type for increased browser compatibility. * Small change in the scheduler that increases stability. * Russian translation updates (Сергей Глита [Sergey Glita]) - +* Improved and generalized the OCR queue locking mechanism, this should + eliminate any posibility of race conditions between Mayan EDMS OCR nodes. +* Added support for signals to the OCR queue, this results in instant OCR + processing upon submittal of a document to the OCR queue, this works in + addition to the current polling processing which eliminates the + posibility of stale documents in the OCR queue. + Version 0.9.1 ------------- * Added handling percent encoded unicode query strings in search URL, @@ -51,44 +57,44 @@ Version 0.9.1 Version 0.9.0 ------------- -* Simplified getting mimetypes from files by merging 2 implementations +* Simplified getting mimetypes from files by merging 2 implementations (document based and file based) -* Updated python converter backend, document model and staging module +* Updated python converter backend, document model and staging module to use the new get_mimetype API -* Only allow clickable thumbnails for document and staging files with a +* Only allow clickable thumbnails for document and staging files with a valid image -* Removed tag count from the group document list widget to conserve +* Removed tag count from the group document list widget to conserve vertical space * Updated required Django version to 1.3.1 -* Removed the included 3rd party module django-sendfile, now added to +* Removed the included 3rd party module django-sendfile, now added to the requirement files. - * User should do a pip install -r requirements/production.txt to update + * User should do a pip install -r requirements/production.txt to update -* Changed to Semantic Versioning (http://semver.org/), with +* Changed to Semantic Versioning (http://semver.org/), with recommendations 7, 8 and 9 causing the most effect in the versioning number. * Added Russian locale post OCR cleanup backend (Сергей Глита [Sergei Glita]) -* Reduced severity of the messages displayed when no OCR cleanup backend +* Reduced severity of the messages displayed when no OCR cleanup backend is found for a language * Complete Portuguese translation (Emerson Soares and Renata Oliveira) * Complete Russian translation (Сергей Глита [Sergei Glita]) -* Added animate.css to use CSS to animate flash messages with better +* Added animate.css to use CSS to animate flash messages with better fallback on non JS browsers * The admin and sentry links are no longer hard-coded (Meurig Freeman) -* Improved appearance of the document tag widget +* Improved appearance of the document tag widget (https://p.twimg.com/Ac0Q0b-CAAE1lfA.png:large) -* Added django_compress and cssmin to the requirements files and enabled +* Added django_compress and cssmin to the requirements files and enabled django_compress for CSS and JS files * Added granting and revoking permission methods to the permission model * Correctly calculate the mimetype icons paths when on development mode -* Added a new more comprehensive method of passing multiple variables +* Added a new more comprehensive method of passing multiple variables per item in multi item selection views -* Used new multi parameter passing method to improve the usability of +* Used new multi parameter passing method to improve the usability of the grant/revoke permission view, thanks to Cezar Jenkins (https://twitter.com/#!/emperorcezar) for the suggestion -* Added step to the documentation explaining how to install Mayan EDMS +* Added step to the documentation explaining how to install Mayan EDMS on Webfaction -* Added an entry in the documentation to the screencast explaining how +* Added an entry in the documentation to the screencast explaining how to install Mayan EDMS on DjangoZoom * Added required changes to add Mayan EDMS to Transifex.com * Fixed the apache contrib file static file directory name @@ -96,7 +102,6 @@ Version 0.9.0 Version 0.8.3 ------------- - * Added a Contributors file under the docs directory * Moved the document grouping subtemplate windows into a document information tab