Merge branch 'feature/lock_manager_update' into development

2011-11-22 15:48:22 -04:00
parent 7e4110450b 95c300137c
commit eb09ac10c2
11 changed files with 139 additions and 112 deletions
--- a/apps/lock_manager/init.py
+++ b/apps/lock_manager/init.py
@@ -0,0 +1,4 @@
+from lock_manager.exceptions import LockError
+from lock_manager.models import Lock as LockModel
+
+Lock = LockModel.objects
--- a/apps/lock_manager/admin.py
+++ b/apps/lock_manager/admin.py
@@ -0,0 +1,10 @@
+from django.contrib import admin
+
+from lock_manager.models import Lock
+
+
+class LockAdmin(admin.ModelAdmin):
+    model = Lock
+
+
+admin.site.register(Lock, LockAdmin)
--- a/apps/lock_manager/backend_memcached.py
+++ b/apps/lock_manager/backend_memcached.py
@@ -0,0 +1,16 @@
+from django.core.cache import get_cache
+
+if CACHE_URI:
+    try:
+        cache_backend = get_cache(CACHE_URI)
+    except ImportError:
+        # TODO: display or log error
+        cache_backend = None
+else:
+    cache_backend = None
+if cache_backend:
+    acquire_lock = lambda lock_id: cache_backend.add(lock_id, u'true', LOCK_EXPIRE)
+    release_lock = lambda lock_id: cache_backend.delete(lock_id)
+else:
+    acquire_lock = lambda lock_id: True
+    release_lock = lambda lock_id: True
--- a/apps/lock_manager/conf/settings.py
+++ b/apps/lock_manager/conf/settings.py
@@ -1,5 +1,5 @@
 from django.conf import settings

-DEFAULT_LOCK_TIMEOUT_VALUE = 10
+DEFAULT_LOCK_TIMEOUT_VALUE = 30

 DEFAULT_LOCK_TIMEOUT = getattr(settings, 'LOCK_MANAGER_DEFAULT_LOCK_TIMEOUT', DEFAULT_LOCK_TIMEOUT_VALUE)
--- a/apps/lock_manager/managers.py
+++ b/apps/lock_manager/managers.py
@@ -1,9 +1,4 @@
-try:
-    from psycopg2 import OperationalError
-except ImportError:
-    class OperationalError(Exception):
-        pass
-
+import logging
 import datetime

 from django.db.utils import DatabaseError
@@ -13,40 +8,34 @@ from django.db import models

 from lock_manager.exceptions import LockError

+logger = logging.getLogger(__name__)
+

 class LockManager(models.Manager):
-    @transaction.commit_manually
+    @transaction.commit_on_success
    def acquire_lock(self, name, timeout=None):
+        logger.debug('trying to acquire lock: %s' % name)
        lock = self.model(name=name, timeout=timeout)
        try:
            lock.save(force_insert=True)
+            logger.debug('acquired lock: %s' % name)
+            return lock
        except IntegrityError:
-            transaction.rollback()
            # There is already an existing lock
-            # Check it's expiration date and if expired, delete it and 
-            # create it again
-            lock = self.model.objects.get(name=name)
-            transaction.rollback()
+            # Check it's expiration date and if expired, reset it
+            try:
+                lock = self.model.objects.get(name=name)
+            except self.model.DoesNotExist:
+                # Table based locking
+                logger.debug('lock: %s does not exist' % name)
+                raise LockError('Unable to acquire lock')

            if datetime.datetime.now() > lock.creation_datetime + datetime.timedelta(seconds=lock.timeout):
-                self.release_lock(name)
+                logger.debug('reseting deleting stale lock: %s' % name)
                lock.timeout=timeout
+                logger.debug('try to reacquire stale lock: %s' % name)
                lock.save()
-                transaction.commit()
+                return lock
            else:
+                logger.debug('unable to acquire lock: %s' % name)
                raise LockError('Unable to acquire lock')
-        except DatabaseError:
-            transaction.rollback()
-            # Special case for ./manage.py syncdb
-        except (OperationalError, ImproperlyConfigured):
-            transaction.rollback()
-            # Special for DjangoZoom, which executes collectstatic media
-            # doing syncdb and creating the database tables
-        else:
-            transaction.commit()
-        
-    @transaction.commit_manually
-    def release_lock(self, name):
-        lock = self.model.objects.get(name=name)
-        lock.delete()
-        transaction.commit()
--- a/apps/lock_manager/models.py
+++ b/apps/lock_manager/models.py
@@ -10,17 +10,28 @@ from lock_manager.conf.settings import DEFAULT_LOCK_TIMEOUT
 class Lock(models.Model):
    creation_datetime = models.DateTimeField(verbose_name=_(u'creation datetime'))
    timeout = models.IntegerField(default=DEFAULT_LOCK_TIMEOUT, verbose_name=_(u'timeout'))
-    name = models.CharField(max_length=32, verbose_name=_(u'name'), unique=True)
-    
+    name = models.CharField(max_length=48, verbose_name=_(u'name'), unique=True)
+
    objects = LockManager()
-    
+
    def __unicode__(self):
        return self.name
-        
+
    def save(self, *args, **kwargs):
        self.creation_datetime = datetime.datetime.now()
+        if not self.timeout and not kwarget.get('timeout'):
+            self.timeout = DEFAULT_LOCK_TIMEOUT
+
        super(Lock, self).save(*args, **kwargs)
-        
+
+    def release(self):
+        try:
+            lock = Lock.objects.get(name=self.name, creation_datetime=self.creation_datetime)
+            lock.delete()
+        except Lock.DoesNotExist:
+            # Out lock expired and was reassigned
+            pass
+
    class Meta:
        verbose_name = _(u'lock')
        verbose_name_plural = _(u'locks')
--- a/apps/lock_manager/tests.py
+++ b/apps/lock_manager/tests.py
@@ -1,10 +1,3 @@
-"""
-This file demonstrates writing tests using the unittest module. These will pass
-when you run "manage.py test".
-
-Replace this with more appropriate tests for your application.
-"""
-
 from django.test import TestCase


--- a/apps/ocr/init.py
+++ b/apps/ocr/init.py
@@ -4,12 +4,15 @@ except ImportError:
    class OperationalError(Exception):
        pass
        
+import logging
+        
 from django.core.exceptions import ImproperlyConfigured
 from django.db import transaction
 from django.utils.translation import ugettext_lazy as _
 from django.utils.translation import ugettext
 from django.db.utils import DatabaseError
 from django.db.models.signals import post_save
+from django.dispatch import receiver

 from navigation.api import register_links, register_top_menu, register_multi_item_links
 from permissions.api import register_permission, set_namespace_title
@@ -21,9 +24,11 @@ from scheduler.api import register_interval_job

 from ocr.conf.settings import AUTOMATIC_OCR
 from ocr.conf.settings import QUEUE_PROCESSING_INTERVAL
-from ocr.models import DocumentQueue, QueueTransformation
+from ocr.models import DocumentQueue, QueueTransformation, QueueDocument
 from ocr.tasks import task_process_document_queues

+logger = logging.getLogger(__name__)
+
 #Permissions
 PERMISSION_OCR_DOCUMENT = {'namespace': 'ocr', 'name': 'ocr_document', 'label': _(u'Submit document for OCR')}
 PERMISSION_OCR_DOCUMENT_DELETE = {'namespace': 'ocr', 'name': 'ocr_document_delete', 'label': _(u'Delete document for OCR queue')}
@@ -99,6 +104,13 @@ def document_post_save(sender, instance, **kwargs):

 post_save.connect(document_post_save, sender=Document)

+
+@receiver(post_save, dispatch_uid='call_queue', sender=QueueDocument)
+def call_queue(sender, **kwargs):
+    logger.debug('got call_queue signal')
+    task_process_document_queues()
+
+
 create_default_queue()

 register_interval_job('task_process_document_queues', _(u'Checks the OCR queue for pending documents.'), task_process_document_queues, seconds=QUEUE_PROCESSING_INTERVAL)
--- a/apps/ocr/tasks.py
+++ b/apps/ocr/tasks.py
@@ -4,9 +4,9 @@ from time import sleep
 from random import random

 from django.db.models import Q
-from django.core.cache import get_cache

 from job_processor.api import process_job
+from lock_manager import Lock, LockError

 from ocr.api import do_document_ocr
 from ocr.literals import QUEUEDOCUMENT_STATE_PENDING, \
@@ -21,36 +21,13 @@ from ocr.conf.settings import QUEUE_PROCESSING_INTERVAL
 LOCK_EXPIRE = 60 * 10  # Lock expires in 10 minutes
 # TODO: Tie LOCK_EXPIRATION with hard task timeout

-if CACHE_URI:
-    try:
-        cache_backend = get_cache(CACHE_URI)
-    except ImportError:
-        # TODO: display or log error
-        cache_backend = None
-else:
-    cache_backend = None
-
-
-def random_delay():
-    sleep(random() * (QUEUE_PROCESSING_INTERVAL - 1))
-    return True
-
-
-if cache_backend:
-    acquire_lock = lambda lock_id: cache_backend.add(lock_id, u'true', LOCK_EXPIRE)
-    release_lock = lambda lock_id: cache_backend.delete(lock_id)
-else:
-    acquire_lock = lambda lock_id: True
-    release_lock = lambda lock_id: True
-
-
 def task_process_queue_document(queue_document_id):
-    lock_id = u'%s-lock-%d' % (u'task_process_queue_document', queue_document_id)
-    if acquire_lock(lock_id):
+    lock_id = u'task_proc_queue_doc-%d' % queue_document_id
+    try:
+        lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE)
        queue_document = QueueDocument.objects.get(pk=queue_document_id)
        queue_document.state = QUEUEDOCUMENT_STATE_PROCESSING
        queue_document.node_name = platform.node()
-        #queue_document.result = task_process_queue_document.request.id
        queue_document.save()
        try:
            do_document_ocr(queue_document)
@@ -59,7 +36,10 @@ def task_process_queue_document(queue_document_id):
            queue_document.state = QUEUEDOCUMENT_STATE_ERROR
            queue_document.result = e
            queue_document.save()
-        release_lock(lock_id)
+        
+        lock.release()
+    except LockError:
+        pass


 def reset_orphans():
@@ -86,11 +66,9 @@ def reset_orphans():
        orphan.node_name = None
        orphan.save()
    '''
-    
+

 def task_process_document_queues():
-    if not cache_backend:
-        random_delay()
    # reset_orphans()
    # Causes problems with big clusters increased latency
    # Disabled until better solution
@@ -108,8 +86,7 @@ def task_process_document_queues():

                if oldest_queued_document_qs:
                    oldest_queued_document = oldest_queued_document_qs.order_by('datetime_submitted')[0]
-                    #task_process_queue_document.delay(oldest_queued_document.pk)
-                    #task_process_queue_document(oldest_queued_document.pk)
                    process_job(task_process_queue_document, oldest_queued_document.pk)
            except Exception, e:
-                print 'DocumentQueueWatcher exception: %s' % e
+                pass
+                #print 'DocumentQueueWatcher exception: %s' % e
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,12 +1,12 @@
 Version 0.10
 ------------
-* Added a proper setup views for the document grouping functionality
+* Added a proper setup views for the document grouping functionality.
 * Document grouping is now called smart linking as it relates better to
  how it actually works.  The data base schema was changed and users must
  do the required::
-  
-  $ ./manager syncdb
-  
+
+    $ ./manager syncdb
+
  for the new tables to be created.
 * Grappelli is no longer required as can be uninstalled.
 * New smarter document preview widget that doesn't allow zooming or viewing
@@ -15,32 +15,38 @@ Version 0.10

    * LibreOffice (https://www.libreoffice.org/)
    * unoconv [version 0.5] (https://github.com/dagwieers/unoconv)
-    
-* The new office documents converter won't convert files with the extension
-  .docx becasue these files are recognized as zip files instead.  This
+
+* The new office documents converter won't convert files with the extension 
+  .docx because these files are recognized as zip files instead.  This 
  is an issue of the libmagic library.

-* New configuration option added CONVERTER_UNOCONV_USE_PIPE that controls
-  how unoconv handles the communication with LibreOffice.  The default of
-  `True` causes unoconv to use pipes, this approach is slower than using 
-  TCP/IP ports but it is more stable.
+* New configuration option added ``CONVERTER_UNOCONV_USE_PIPE`` that controls 
+  how unoconv handles the communication with LibreOffice.  The default of 
+  ``True`` causes unoconv to use **pipes**, this approach is slower than using 
+  **TCP/IP** ports but it is more stable.
  
-* Initial REST API that exposes documents properties and one method, this
-  new API is used by the new smart document widget and requires the 
-  packaged `djangorestframework`, users must issue a::
+* Initial `REST` `API` that exposes documents properties and one method, this 
+  new `API` is used by the new smart document widget and requires the 
+  package ``djangorestframework``, users must issue a::
  
  $ pip install -r requirements/production.txt
  
-  to install the new requirement.
+  to install this new requirement.
  
 * MIME type detection and caching performance updates.
-* Updated the included version of jQuery to 1.7
-* Updated the included version of JqueryAsynchImageLoader to 0.9.7
-* Document image serving response now specifies a MIME type for increased
+* Updated the included version of ``jQuery`` to 1.7
+* Updated the included version of ``JqueryAsynchImageLoader`` to 0.9.7
+* Document image serving response now specifies a MIME type for increased 
  browser compatibility.
 * Small change in the scheduler that increases stability.
 * Russian translation updates (Сергей Глита [Sergey Glita])
-  
+* Improved and generalized the OCR queue locking mechanism, this should 
+  eliminate any posibility of race conditions between Mayan EDMS OCR nodes.
+* Added support for signals to the OCR queue, this results in instant OCR
+  processing upon submittal of a document to the OCR queue, this works in
+  addition to the current polling processing which eliminates the
+  posibility of stale documents in the OCR queue.
+
 Version 0.9.1
 -------------
 * Added handling percent encoded unicode query strings in search URL,
@@ -51,44 +57,44 @@ Version 0.9.1

 Version 0.9.0
 -------------
-* Simplified getting mimetypes from files by merging 2 implementations
+* Simplified getting mimetypes from files by merging 2 implementations 
  (document based and file based)
-* Updated python converter backend, document model and staging module
+* Updated python converter backend, document model and staging module 
  to use the new get_mimetype API
-* Only allow clickable thumbnails for document and staging files with a
+* Only allow clickable thumbnails for document and staging files with a 
  valid image
-* Removed tag count from the group document list widget to conserve
+* Removed tag count from the group document list widget to conserve 
  vertical space
 * Updated required Django version to 1.3.1
-* Removed the included 3rd party module django-sendfile, now added to
+* Removed the included 3rd party module django-sendfile, now added to 
  the requirement files.

-    * User should do a pip install -r requirements/production.txt to update
+  * User should do a pip install -r requirements/production.txt to update

-* Changed to Semantic Versioning (http://semver.org/), with
+* Changed to Semantic Versioning (http://semver.org/), with 
  recommendations 7, 8 and 9 causing the most effect in the versioning number.
 * Added Russian locale post OCR cleanup backend (Сергей Глита [Sergei Glita])
-* Reduced severity of the messages displayed when no OCR cleanup backend
+* Reduced severity of the messages displayed when no OCR cleanup backend 
  is found for a language
 * Complete Portuguese translation (Emerson Soares and Renata Oliveira)
 * Complete Russian translation (Сергей Глита [Sergei Glita])
-* Added animate.css to use CSS to animate flash messages with better
+* Added animate.css to use CSS to animate flash messages with better 
  fallback on non JS browsers
 * The admin and sentry links are no longer hard-coded (Meurig Freeman)
-* Improved appearance of the document tag widget
+* Improved appearance of the document tag widget 
  (https://p.twimg.com/Ac0Q0b-CAAE1lfA.png:large)
-* Added django_compress and cssmin to the requirements files and enabled
+* Added django_compress and cssmin to the requirements files and enabled 
  django_compress for CSS and JS files
 * Added granting and revoking permission methods to the permission model
 * Correctly calculate the mimetype icons paths when on development mode
-* Added a new more comprehensive method of passing multiple variables
+* Added a new more comprehensive method of passing multiple variables 
  per item in multi item selection views
-* Used new multi parameter passing method to improve the usability of
+* Used new multi parameter passing method to improve the usability of 
  the grant/revoke permission view, thanks to Cezar Jenkins
  (https://twitter.com/#!/emperorcezar) for the suggestion
-* Added step to the documentation explaining how to install Mayan EDMS
+* Added step to the documentation explaining how to install Mayan EDMS 
  on Webfaction
-* Added an entry in the documentation to the screencast explaining how
+* Added an entry in the documentation to the screencast explaining how 
  to install Mayan EDMS on DjangoZoom
 * Added required changes to add Mayan EDMS to Transifex.com
 * Fixed the apache contrib file static file directory name
@@ -96,7 +102,6 @@ Version 0.9.0

 Version 0.8.3
 -------------
-
 * Added a Contributors file under the docs directory
 * Moved the document grouping subtemplate windows into a document
  information tab
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -119,3 +119,13 @@ How to enable x-sendile support for ``Apache``
  
      XSendFile on
      XSendFileAllowAbove on
+      
+
+The included version of ``unoconv`` in my distribution is too old
+-------------------------------------------------------------
+      
+  * Only the file 'unoconv' file from https://github.com/dagwieers/unoconv is needed.  
+    Put it in a user designated directory for binaries such as /usr/local/bin and 
+    setup Mayan's configuration option in your settings_local.py file like this::
+    
+      CONVERTER_UNOCONV_PATH = '/usr/local/bin/unoconv'