diff --git a/apps/lock_manager/__init__.py b/apps/lock_manager/__init__.py index e69de29bb2..34913f856f 100644 --- a/apps/lock_manager/__init__.py +++ b/apps/lock_manager/__init__.py @@ -0,0 +1,4 @@ +from lock_manager.exceptions import LockError +from lock_manager.models import Lock as LockModel + +Lock = LockModel.objects diff --git a/apps/lock_manager/admin.py b/apps/lock_manager/admin.py new file mode 100644 index 0000000000..12dbee8fe1 --- /dev/null +++ b/apps/lock_manager/admin.py @@ -0,0 +1,10 @@ +from django.contrib import admin + +from lock_manager.models import Lock + + +class LockAdmin(admin.ModelAdmin): + model = Lock + + +admin.site.register(Lock, LockAdmin) diff --git a/apps/lock_manager/backend_memcached.py b/apps/lock_manager/backend_memcached.py new file mode 100644 index 0000000000..8458fbb395 --- /dev/null +++ b/apps/lock_manager/backend_memcached.py @@ -0,0 +1,16 @@ +from django.core.cache import get_cache + +if CACHE_URI: + try: + cache_backend = get_cache(CACHE_URI) + except ImportError: + # TODO: display or log error + cache_backend = None +else: + cache_backend = None +if cache_backend: + acquire_lock = lambda lock_id: cache_backend.add(lock_id, u'true', LOCK_EXPIRE) + release_lock = lambda lock_id: cache_backend.delete(lock_id) +else: + acquire_lock = lambda lock_id: True + release_lock = lambda lock_id: True diff --git a/apps/lock_manager/conf/settings.py b/apps/lock_manager/conf/settings.py index 05f4ae374c..869e121f4d 100644 --- a/apps/lock_manager/conf/settings.py +++ b/apps/lock_manager/conf/settings.py @@ -1,5 +1,5 @@ from django.conf import settings -DEFAULT_LOCK_TIMEOUT_VALUE = 10 +DEFAULT_LOCK_TIMEOUT_VALUE = 30 DEFAULT_LOCK_TIMEOUT = getattr(settings, 'LOCK_MANAGER_DEFAULT_LOCK_TIMEOUT', DEFAULT_LOCK_TIMEOUT_VALUE) diff --git a/apps/lock_manager/managers.py b/apps/lock_manager/managers.py index 96c57ca02b..6c69fada19 100644 --- a/apps/lock_manager/managers.py +++ b/apps/lock_manager/managers.py @@ -1,9 +1,4 @@ -try: - from psycopg2 import OperationalError -except ImportError: - class OperationalError(Exception): - pass - +import logging import datetime from django.db.utils import DatabaseError @@ -13,40 +8,34 @@ from django.db import models from lock_manager.exceptions import LockError +logger = logging.getLogger(__name__) + class LockManager(models.Manager): - @transaction.commit_manually + @transaction.commit_on_success def acquire_lock(self, name, timeout=None): + logger.debug('trying to acquire lock: %s' % name) lock = self.model(name=name, timeout=timeout) try: lock.save(force_insert=True) + logger.debug('acquired lock: %s' % name) + return lock except IntegrityError: - transaction.rollback() # There is already an existing lock - # Check it's expiration date and if expired, delete it and - # create it again - lock = self.model.objects.get(name=name) - transaction.rollback() + # Check it's expiration date and if expired, reset it + try: + lock = self.model.objects.get(name=name) + except self.model.DoesNotExist: + # Table based locking + logger.debug('lock: %s does not exist' % name) + raise LockError('Unable to acquire lock') if datetime.datetime.now() > lock.creation_datetime + datetime.timedelta(seconds=lock.timeout): - self.release_lock(name) + logger.debug('reseting deleting stale lock: %s' % name) lock.timeout=timeout + logger.debug('try to reacquire stale lock: %s' % name) lock.save() - transaction.commit() + return lock else: + logger.debug('unable to acquire lock: %s' % name) raise LockError('Unable to acquire lock') - except DatabaseError: - transaction.rollback() - # Special case for ./manage.py syncdb - except (OperationalError, ImproperlyConfigured): - transaction.rollback() - # Special for DjangoZoom, which executes collectstatic media - # doing syncdb and creating the database tables - else: - transaction.commit() - - @transaction.commit_manually - def release_lock(self, name): - lock = self.model.objects.get(name=name) - lock.delete() - transaction.commit() diff --git a/apps/lock_manager/models.py b/apps/lock_manager/models.py index 53c6b6e49d..535d3366a6 100644 --- a/apps/lock_manager/models.py +++ b/apps/lock_manager/models.py @@ -10,17 +10,28 @@ from lock_manager.conf.settings import DEFAULT_LOCK_TIMEOUT class Lock(models.Model): creation_datetime = models.DateTimeField(verbose_name=_(u'creation datetime')) timeout = models.IntegerField(default=DEFAULT_LOCK_TIMEOUT, verbose_name=_(u'timeout')) - name = models.CharField(max_length=32, verbose_name=_(u'name'), unique=True) - + name = models.CharField(max_length=48, verbose_name=_(u'name'), unique=True) + objects = LockManager() - + def __unicode__(self): return self.name - + def save(self, *args, **kwargs): self.creation_datetime = datetime.datetime.now() + if not self.timeout and not kwarget.get('timeout'): + self.timeout = DEFAULT_LOCK_TIMEOUT + super(Lock, self).save(*args, **kwargs) - + + def release(self): + try: + lock = Lock.objects.get(name=self.name, creation_datetime=self.creation_datetime) + lock.delete() + except Lock.DoesNotExist: + # Out lock expired and was reassigned + pass + class Meta: verbose_name = _(u'lock') verbose_name_plural = _(u'locks') diff --git a/apps/lock_manager/tests.py b/apps/lock_manager/tests.py index 501deb776c..140393d027 100644 --- a/apps/lock_manager/tests.py +++ b/apps/lock_manager/tests.py @@ -1,10 +1,3 @@ -""" -This file demonstrates writing tests using the unittest module. These will pass -when you run "manage.py test". - -Replace this with more appropriate tests for your application. -""" - from django.test import TestCase diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py index ccca4916ee..c6114148d3 100644 --- a/apps/ocr/__init__.py +++ b/apps/ocr/__init__.py @@ -4,12 +4,15 @@ except ImportError: class OperationalError(Exception): pass +import logging + from django.core.exceptions import ImproperlyConfigured from django.db import transaction from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext from django.db.utils import DatabaseError from django.db.models.signals import post_save +from django.dispatch import receiver from navigation.api import register_links, register_top_menu, register_multi_item_links from permissions.api import register_permission, set_namespace_title @@ -21,9 +24,11 @@ from scheduler.api import register_interval_job from ocr.conf.settings import AUTOMATIC_OCR from ocr.conf.settings import QUEUE_PROCESSING_INTERVAL -from ocr.models import DocumentQueue, QueueTransformation +from ocr.models import DocumentQueue, QueueTransformation, QueueDocument from ocr.tasks import task_process_document_queues +logger = logging.getLogger(__name__) + #Permissions PERMISSION_OCR_DOCUMENT = {'namespace': 'ocr', 'name': 'ocr_document', 'label': _(u'Submit document for OCR')} PERMISSION_OCR_DOCUMENT_DELETE = {'namespace': 'ocr', 'name': 'ocr_document_delete', 'label': _(u'Delete document for OCR queue')} @@ -99,6 +104,13 @@ def document_post_save(sender, instance, **kwargs): post_save.connect(document_post_save, sender=Document) + +@receiver(post_save, dispatch_uid='call_queue', sender=QueueDocument) +def call_queue(sender, **kwargs): + logger.debug('got call_queue signal') + task_process_document_queues() + + create_default_queue() register_interval_job('task_process_document_queues', _(u'Checks the OCR queue for pending documents.'), task_process_document_queues, seconds=QUEUE_PROCESSING_INTERVAL) diff --git a/apps/ocr/tasks.py b/apps/ocr/tasks.py index dee3d64da9..06f55bd69d 100644 --- a/apps/ocr/tasks.py +++ b/apps/ocr/tasks.py @@ -4,9 +4,9 @@ from time import sleep from random import random from django.db.models import Q -from django.core.cache import get_cache from job_processor.api import process_job +from lock_manager import Lock, LockError from ocr.api import do_document_ocr from ocr.literals import QUEUEDOCUMENT_STATE_PENDING, \ @@ -21,36 +21,13 @@ from ocr.conf.settings import QUEUE_PROCESSING_INTERVAL LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes # TODO: Tie LOCK_EXPIRATION with hard task timeout -if CACHE_URI: - try: - cache_backend = get_cache(CACHE_URI) - except ImportError: - # TODO: display or log error - cache_backend = None -else: - cache_backend = None - - -def random_delay(): - sleep(random() * (QUEUE_PROCESSING_INTERVAL - 1)) - return True - - -if cache_backend: - acquire_lock = lambda lock_id: cache_backend.add(lock_id, u'true', LOCK_EXPIRE) - release_lock = lambda lock_id: cache_backend.delete(lock_id) -else: - acquire_lock = lambda lock_id: True - release_lock = lambda lock_id: True - - def task_process_queue_document(queue_document_id): - lock_id = u'%s-lock-%d' % (u'task_process_queue_document', queue_document_id) - if acquire_lock(lock_id): + lock_id = u'task_proc_queue_doc-%d' % queue_document_id + try: + lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) queue_document = QueueDocument.objects.get(pk=queue_document_id) queue_document.state = QUEUEDOCUMENT_STATE_PROCESSING queue_document.node_name = platform.node() - #queue_document.result = task_process_queue_document.request.id queue_document.save() try: do_document_ocr(queue_document) @@ -59,7 +36,10 @@ def task_process_queue_document(queue_document_id): queue_document.state = QUEUEDOCUMENT_STATE_ERROR queue_document.result = e queue_document.save() - release_lock(lock_id) + + lock.release() + except LockError: + pass def reset_orphans(): @@ -86,11 +66,9 @@ def reset_orphans(): orphan.node_name = None orphan.save() ''' - + def task_process_document_queues(): - if not cache_backend: - random_delay() # reset_orphans() # Causes problems with big clusters increased latency # Disabled until better solution @@ -108,8 +86,7 @@ def task_process_document_queues(): if oldest_queued_document_qs: oldest_queued_document = oldest_queued_document_qs.order_by('datetime_submitted')[0] - #task_process_queue_document.delay(oldest_queued_document.pk) - #task_process_queue_document(oldest_queued_document.pk) process_job(task_process_queue_document, oldest_queued_document.pk) except Exception, e: - print 'DocumentQueueWatcher exception: %s' % e + pass + #print 'DocumentQueueWatcher exception: %s' % e diff --git a/docs/changelog.rst b/docs/changelog.rst index d51e6768eb..761c7ea160 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,12 +1,12 @@ Version 0.10 ------------ -* Added a proper setup views for the document grouping functionality +* Added a proper setup views for the document grouping functionality. * Document grouping is now called smart linking as it relates better to how it actually works. The data base schema was changed and users must do the required:: - - $ ./manager syncdb - + + $ ./manager syncdb + for the new tables to be created. * Grappelli is no longer required as can be uninstalled. * New smarter document preview widget that doesn't allow zooming or viewing @@ -15,32 +15,38 @@ Version 0.10 * LibreOffice (https://www.libreoffice.org/) * unoconv [version 0.5] (https://github.com/dagwieers/unoconv) - -* The new office documents converter won't convert files with the extension - .docx becasue these files are recognized as zip files instead. This + +* The new office documents converter won't convert files with the extension + .docx because these files are recognized as zip files instead. This is an issue of the libmagic library. -* New configuration option added CONVERTER_UNOCONV_USE_PIPE that controls - how unoconv handles the communication with LibreOffice. The default of - `True` causes unoconv to use pipes, this approach is slower than using - TCP/IP ports but it is more stable. +* New configuration option added ``CONVERTER_UNOCONV_USE_PIPE`` that controls + how unoconv handles the communication with LibreOffice. The default of + ``True`` causes unoconv to use **pipes**, this approach is slower than using + **TCP/IP** ports but it is more stable. -* Initial REST API that exposes documents properties and one method, this - new API is used by the new smart document widget and requires the - packaged `djangorestframework`, users must issue a:: +* Initial `REST` `API` that exposes documents properties and one method, this + new `API` is used by the new smart document widget and requires the + package ``djangorestframework``, users must issue a:: $ pip install -r requirements/production.txt - to install the new requirement. + to install this new requirement. * MIME type detection and caching performance updates. -* Updated the included version of jQuery to 1.7 -* Updated the included version of JqueryAsynchImageLoader to 0.9.7 -* Document image serving response now specifies a MIME type for increased +* Updated the included version of ``jQuery`` to 1.7 +* Updated the included version of ``JqueryAsynchImageLoader`` to 0.9.7 +* Document image serving response now specifies a MIME type for increased browser compatibility. * Small change in the scheduler that increases stability. * Russian translation updates (Сергей Глита [Sergey Glita]) - +* Improved and generalized the OCR queue locking mechanism, this should + eliminate any posibility of race conditions between Mayan EDMS OCR nodes. +* Added support for signals to the OCR queue, this results in instant OCR + processing upon submittal of a document to the OCR queue, this works in + addition to the current polling processing which eliminates the + posibility of stale documents in the OCR queue. + Version 0.9.1 ------------- * Added handling percent encoded unicode query strings in search URL, @@ -51,44 +57,44 @@ Version 0.9.1 Version 0.9.0 ------------- -* Simplified getting mimetypes from files by merging 2 implementations +* Simplified getting mimetypes from files by merging 2 implementations (document based and file based) -* Updated python converter backend, document model and staging module +* Updated python converter backend, document model and staging module to use the new get_mimetype API -* Only allow clickable thumbnails for document and staging files with a +* Only allow clickable thumbnails for document and staging files with a valid image -* Removed tag count from the group document list widget to conserve +* Removed tag count from the group document list widget to conserve vertical space * Updated required Django version to 1.3.1 -* Removed the included 3rd party module django-sendfile, now added to +* Removed the included 3rd party module django-sendfile, now added to the requirement files. - * User should do a pip install -r requirements/production.txt to update + * User should do a pip install -r requirements/production.txt to update -* Changed to Semantic Versioning (http://semver.org/), with +* Changed to Semantic Versioning (http://semver.org/), with recommendations 7, 8 and 9 causing the most effect in the versioning number. * Added Russian locale post OCR cleanup backend (Сергей Глита [Sergei Glita]) -* Reduced severity of the messages displayed when no OCR cleanup backend +* Reduced severity of the messages displayed when no OCR cleanup backend is found for a language * Complete Portuguese translation (Emerson Soares and Renata Oliveira) * Complete Russian translation (Сергей Глита [Sergei Glita]) -* Added animate.css to use CSS to animate flash messages with better +* Added animate.css to use CSS to animate flash messages with better fallback on non JS browsers * The admin and sentry links are no longer hard-coded (Meurig Freeman) -* Improved appearance of the document tag widget +* Improved appearance of the document tag widget (https://p.twimg.com/Ac0Q0b-CAAE1lfA.png:large) -* Added django_compress and cssmin to the requirements files and enabled +* Added django_compress and cssmin to the requirements files and enabled django_compress for CSS and JS files * Added granting and revoking permission methods to the permission model * Correctly calculate the mimetype icons paths when on development mode -* Added a new more comprehensive method of passing multiple variables +* Added a new more comprehensive method of passing multiple variables per item in multi item selection views -* Used new multi parameter passing method to improve the usability of +* Used new multi parameter passing method to improve the usability of the grant/revoke permission view, thanks to Cezar Jenkins (https://twitter.com/#!/emperorcezar) for the suggestion -* Added step to the documentation explaining how to install Mayan EDMS +* Added step to the documentation explaining how to install Mayan EDMS on Webfaction -* Added an entry in the documentation to the screencast explaining how +* Added an entry in the documentation to the screencast explaining how to install Mayan EDMS on DjangoZoom * Added required changes to add Mayan EDMS to Transifex.com * Fixed the apache contrib file static file directory name @@ -96,7 +102,6 @@ Version 0.9.0 Version 0.8.3 ------------- - * Added a Contributors file under the docs directory * Moved the document grouping subtemplate windows into a document information tab diff --git a/docs/faq.rst b/docs/faq.rst index e017c806b1..241e76b4a2 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -119,3 +119,13 @@ How to enable x-sendile support for ``Apache`` XSendFile on XSendFileAllowAbove on + + +The included version of ``unoconv`` in my distribution is too old +------------------------------------------------------------- + + * Only the file 'unoconv' file from https://github.com/dagwieers/unoconv is needed. + Put it in a user designated directory for binaries such as /usr/local/bin and + setup Mayan's configuration option in your settings_local.py file like this:: + + CONVERTER_UNOCONV_PATH = '/usr/local/bin/unoconv'