diff --git a/mayan/apps/ocr/__init__.py b/mayan/apps/ocr/__init__.py index d13fb74bea..c5fd6de623 100644 --- a/mayan/apps/ocr/__init__.py +++ b/mayan/apps/ocr/__init__.py @@ -35,13 +35,17 @@ register_links(['ocr:queue_document_list'], [queue_document_list], menu_name='se register_maintenance_links([all_document_ocr_cleanup], namespace='ocr', title=_(u'OCR')) +def document_ocr_submit(self): + task_do_ocr.apply_async(args=[self.pk], queue='ocr') + + @receiver(post_save, dispatch_uid='document_post_save', sender=DocumentVersion) def document_post_save(sender, instance, **kwargs): logger.debug('received post save signal') logger.debug('instance: %s' % instance) if kwargs.get('created', False): if AUTOMATIC_OCR: - task_do_ocr.apply_async(args=[instance.document.pk], queue='ocr') + instance.document.submit_for_ocr() @receiver(post_migrate, dispatch_uid='create_default_queue') @@ -50,9 +54,11 @@ def create_default_queue_signal_handler(sender, **kwargs): DocumentQueue.objects.get_or_create(name='default') -register_tool(ocr_tool_link) +Document.add_to_class('submit_for_ocr', document_ocr_submit) class_permissions(Document, [PERMISSION_OCR_DOCUMENT]) namespace = StatisticNamespace(name='ocr', label=_(u'OCR')) namespace.add_statistic(OCRStatistics(name='ocr_stats', label=_(u'OCR queue statistics'))) + +register_tool(ocr_tool_link) diff --git a/mayan/apps/ocr/exceptions.py b/mayan/apps/ocr/exceptions.py index 20461b2bdb..5497c92ea5 100644 --- a/mayan/apps/ocr/exceptions.py +++ b/mayan/apps/ocr/exceptions.py @@ -10,7 +10,3 @@ class UnpaperError(Exception): Raised by unpaper """ pass - - -class ReQueueError(Exception): - pass diff --git a/mayan/apps/ocr/models.py b/mayan/apps/ocr/models.py index f402cb2fb5..62bbd262e5 100644 --- a/mayan/apps/ocr/models.py +++ b/mayan/apps/ocr/models.py @@ -8,8 +8,6 @@ from django.utils.translation import ugettext_lazy as _ from documents.models import Document -from .exceptions import ReQueueError - class DocumentQueue(models.Model): name = models.CharField(max_length=64, unique=True, verbose_name=_(u'Name')) @@ -35,18 +33,6 @@ class QueueDocument(models.Model): verbose_name = _(u'Queue document') verbose_name_plural = _(u'Queue documents') - def requeue(self): - # TODO: Fix properly using Celery tasks - if self.state == QUEUEDOCUMENT_STATE_PROCESSING: - raise ReQueueError - else: - self.datetime_submitted = now() - self.state = QUEUEDOCUMENT_STATE_PENDING - self.delay = False - self.result = None - self.node_name = None - self.save() - def __unicode__(self): try: return unicode(self.document) diff --git a/mayan/apps/ocr/tasks.py b/mayan/apps/ocr/tasks.py index 978a4cffaa..1d64894aac 100644 --- a/mayan/apps/ocr/tasks.py +++ b/mayan/apps/ocr/tasks.py @@ -24,6 +24,8 @@ def task_do_ocr(document_pk): lock_id = u'task_do_ocr_doc-%d' % document_pk try: logger.debug('trying to acquire lock: %s' % lock_id) + # Acquire lock to avoid doing OCR on the same document more than once + # concurrently lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) logger.debug('acquired lock: %s' % lock_id) try: diff --git a/mayan/apps/ocr/views.py b/mayan/apps/ocr/views.py index 250bc7fe1a..37ccf86c4c 100644 --- a/mayan/apps/ocr/views.py +++ b/mayan/apps/ocr/views.py @@ -14,7 +14,6 @@ from documents.widgets import document_link, document_thumbnail from permissions.models import Permission from .api import clean_pages -from .exceptions import ReQueueError from .models import DocumentQueue, QueueDocument from .permissions import (PERMISSION_OCR_CLEAN_ALL_PAGES, PERMISSION_OCR_DOCUMENT, @@ -120,7 +119,7 @@ def submit_document_to_queue(request, document, post_submit_redirect=None): This view is meant to be reusable """ - task_do_ocr.apply_async(args=[document.pk], queue='ocr') + document.submit_for_ocr() messages.success(request, _(u'Document: %(document)s was added to the OCR queue.') % { 'document': document} ) @@ -146,21 +145,15 @@ def re_queue_document(request, queue_document_id=None, queue_document_id_list=No if request.method == 'POST': for queue_document in queue_documents: try: - queue_document.requeue() + queue_document.document.submit_for_ocr() messages.success( request, - _(u'Document: %(document)s was re-queued to the OCR queue: %(queue)s') % { - 'document': queue_document.document, - 'queue': queue_document.document_queue.label + _(u'Document: %(document)s was re-queued for OCR.') % { + 'document': queue_document.document } ) except Document.DoesNotExist: messages.error(request, _(u'Document id#: %d, no longer exists.') % queue_document.document_id) - except ReQueueError: - messages.warning( - request, - _(u'Document: %s is already being processed and can\'t be re-queded.') % queue_document - ) return HttpResponseRedirect(next) context = {