Unify the method to submit document for OCR, fix OCR error document re-queue view

This commit is contained in:
Roberto Rosario
2014-10-09 14:08:48 -04:00
parent 8bac1525be
commit c2e35694d8
5 changed files with 14 additions and 31 deletions

View File

@@ -35,13 +35,17 @@ register_links(['ocr:queue_document_list'], [queue_document_list], menu_name='se
register_maintenance_links([all_document_ocr_cleanup], namespace='ocr', title=_(u'OCR'))
def document_ocr_submit(self):
task_do_ocr.apply_async(args=[self.pk], queue='ocr')
@receiver(post_save, dispatch_uid='document_post_save', sender=DocumentVersion)
def document_post_save(sender, instance, **kwargs):
logger.debug('received post save signal')
logger.debug('instance: %s' % instance)
if kwargs.get('created', False):
if AUTOMATIC_OCR:
task_do_ocr.apply_async(args=[instance.document.pk], queue='ocr')
instance.document.submit_for_ocr()
@receiver(post_migrate, dispatch_uid='create_default_queue')
@@ -50,9 +54,11 @@ def create_default_queue_signal_handler(sender, **kwargs):
DocumentQueue.objects.get_or_create(name='default')
register_tool(ocr_tool_link)
Document.add_to_class('submit_for_ocr', document_ocr_submit)
class_permissions(Document, [PERMISSION_OCR_DOCUMENT])
namespace = StatisticNamespace(name='ocr', label=_(u'OCR'))
namespace.add_statistic(OCRStatistics(name='ocr_stats', label=_(u'OCR queue statistics')))
register_tool(ocr_tool_link)

View File

@@ -10,7 +10,3 @@ class UnpaperError(Exception):
Raised by unpaper
"""
pass
class ReQueueError(Exception):
pass

View File

@@ -8,8 +8,6 @@ from django.utils.translation import ugettext_lazy as _
from documents.models import Document
from .exceptions import ReQueueError
class DocumentQueue(models.Model):
name = models.CharField(max_length=64, unique=True, verbose_name=_(u'Name'))
@@ -35,18 +33,6 @@ class QueueDocument(models.Model):
verbose_name = _(u'Queue document')
verbose_name_plural = _(u'Queue documents')
def requeue(self):
# TODO: Fix properly using Celery tasks
if self.state == QUEUEDOCUMENT_STATE_PROCESSING:
raise ReQueueError
else:
self.datetime_submitted = now()
self.state = QUEUEDOCUMENT_STATE_PENDING
self.delay = False
self.result = None
self.node_name = None
self.save()
def __unicode__(self):
try:
return unicode(self.document)

View File

@@ -24,6 +24,8 @@ def task_do_ocr(document_pk):
lock_id = u'task_do_ocr_doc-%d' % document_pk
try:
logger.debug('trying to acquire lock: %s' % lock_id)
# Acquire lock to avoid doing OCR on the same document more than once
# concurrently
lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE)
logger.debug('acquired lock: %s' % lock_id)
try:

View File

@@ -14,7 +14,6 @@ from documents.widgets import document_link, document_thumbnail
from permissions.models import Permission
from .api import clean_pages
from .exceptions import ReQueueError
from .models import DocumentQueue, QueueDocument
from .permissions import (PERMISSION_OCR_CLEAN_ALL_PAGES,
PERMISSION_OCR_DOCUMENT,
@@ -120,7 +119,7 @@ def submit_document_to_queue(request, document, post_submit_redirect=None):
This view is meant to be reusable
"""
task_do_ocr.apply_async(args=[document.pk], queue='ocr')
document.submit_for_ocr()
messages.success(request, _(u'Document: %(document)s was added to the OCR queue.') % {
'document': document}
)
@@ -146,21 +145,15 @@ def re_queue_document(request, queue_document_id=None, queue_document_id_list=No
if request.method == 'POST':
for queue_document in queue_documents:
try:
queue_document.requeue()
queue_document.document.submit_for_ocr()
messages.success(
request,
_(u'Document: %(document)s was re-queued to the OCR queue: %(queue)s') % {
'document': queue_document.document,
'queue': queue_document.document_queue.label
_(u'Document: %(document)s was re-queued for OCR.') % {
'document': queue_document.document
}
)
except Document.DoesNotExist:
messages.error(request, _(u'Document id#: %d, no longer exists.') % queue_document.document_id)
except ReQueueError:
messages.warning(
request,
_(u'Document: %s is already being processed and can\'t be re-queded.') % queue_document
)
return HttpResponseRedirect(next)
context = {