Move old OCR queue handling code to job_processor app

This commit is contained in:
Roberto Rosario
2012-08-06 02:12:58 -04:00
parent 37160efe9f
commit 377032b92a
7 changed files with 68 additions and 187 deletions

View File

@@ -44,3 +44,11 @@ job_requeue = Link(text=_(u'requeue job'), view='job_requeue', args='object.pk',
job_delete = Link(text=_(u'delete job'), view='job_delete', args='object.pk', sprite='cog_delete', permissions=[PERMISSION_JOB_DELETE], condition=is_in_pending_state)
worker_terminate = Link(text=_(u'terminate worker'), view='worker_terminate', args='object.pk', sprite='lorry_delete', permissions=[PERMISSION_WORKER_TERMINATE])
'''
re_queue_document = Link(text=_('re-queue'), view='re_queue_document', args='object.id', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT])
re_queue_multiple_document = Link(text=_('re-queue'), view='re_queue_multiple_document', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT])
queue_document_delete = Link(text=_(u'delete'), view='queue_document_delete', args='object.id', sprite='hourglass_delete', permissions=[PERMISSION_OCR_DOCUMENT_DELETE])
queue_document_multiple_delete = Link(text=_(u'delete'), view='queue_document_multiple_delete', sprite='hourglass_delete', permissions=[PERMISSION_OCR_DOCUMENT_DELETE])
'''

View File

@@ -348,3 +348,58 @@ def worker_terminate(request, worker_pk):
'previous': previous,
'form_icon': u'lorry_delete.png',
}, context_instance=RequestContext(request))
'''
def re_queue_document(request, queue_document_id=None, queue_document_id_list=None):
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT])
if queue_document_id:
queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id)]
elif queue_document_id_list:
queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id) for queue_document_id in queue_document_id_list.split(',')]
else:
messages.error(request, _(u'Must provide at least one queue document.'))
return HttpResponseRedirect(request.META.get('HTTP_REFERER', '/'))
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
if request.method == 'POST':
for queue_document in queue_documents:
try:
queue_document.requeue()
messages.success(
request,
_(u'Document: %(document)s was re-queued to the OCR queue: %(queue)s') % {
'document': queue_document.document_version.document,
'queue': queue_document.document_queue.label
}
)
except Document.DoesNotExist:
messages.error(request, _(u'Document no longer in queue.'))
except ReQueueError:
messages.warning(
request,
_(u'Document: %s is already being processed and can\'t be re-queded.') % queue_document
)
return HttpResponseRedirect(next)
context = {
'next': next,
'previous': previous,
'form_icon': u'hourglass_add.png',
}
if len(queue_documents) == 1:
context['object'] = queue_documents[0]
context['title'] = _(u'Are you sure you wish to re-queue document: %s?') % ', '.join([unicode(d) for d in queue_documents])
elif len(queue_documents) > 1:
context['title'] = _(u'Are you sure you wish to re-queue documents: %s?') % ', '.join([unicode(d) for d in queue_documents])
return render_to_response('generic_confirm.html', context,
context_instance=RequestContext(request))
def re_queue_multiple_document(request):
return re_queue_document(request, queue_document_id_list=request.GET.get('id_list', []))
'''

View File

@@ -15,7 +15,6 @@ from documents.models import Document, DocumentVersion
from maintenance.api import MaintenanceNamespace
from project_tools.api import register_tool
from acls.api import class_permissions
from statistics.api import register_statistics
from job_processor.models import JobQueue, JobType
from job_processor.exceptions import JobQueuePushError
@@ -31,14 +30,12 @@ from .literals import OCR_QUEUE_NAME
logger = logging.getLogger(__name__)
ocr_job_queue = None
from .links import (submit_document, re_queue_multiple_document,
queue_document_multiple_delete, ocr_disable,
from .links import (submit_document, ocr_disable,
ocr_enable, all_document_ocr_cleanup, ocr_log,
ocr_tool_link, submit_document_multiple)
bind_links([Document], [submit_document])
bind_links([OCRProcessingSingleton], [ocr_disable, ocr_enable])
#register_multi_item_links(['queue_document_list'], [re_queue_multiple_document, queue_document_multiple_delete])
namespace = MaintenanceNamespace(label=_(u'OCR'))
namespace.create_tool(all_document_ocr_cleanup)
@@ -65,14 +62,6 @@ def document_post_save(sender, instance, **kwargs):
except JobQueuePushError:
pass
# Disabled because it appears Django execute signals using the same
# process of the signal emiter effectively blocking the view until
# the OCR process completes which could take several minutes :/
#@receiver(post_save, dispatch_uid='call_queue', sender=QueueDocument)
#def call_queue(sender, **kwargs):
# if kwargs.get('created', False):
# logger.debug('got call_queue signal: %s' % kwargs)
# task_process_document_queues()
register_tool(ocr_tool_link)
@@ -80,7 +69,6 @@ class_permissions(Document, [
PERMISSION_OCR_DOCUMENT,
])
#register_statistics(get_statistics)
create_ocr_job_queue()
ocr_job_type = JobType('ocr', _(u'OCR'), do_document_ocr)

View File

@@ -21,11 +21,6 @@ ocr_disable = Link(text=_(u'disable OCR processing'), view='ocr_disable', sprite
ocr_enable = Link(text=_(u'enable OCR processing'), view='ocr_enable', sprite='control_play_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE], conditional_disable=is_enabled)
submit_document = Link(text=_('submit to OCR queue'), view='submit_document', args='object.id', sprite='text_dropcaps', permissions=[PERMISSION_OCR_DOCUMENT])
submit_document_multiple = Link(text=_('submit to OCR queue'), view='submit_document_multiple', sprite='text_dropcaps', permissions=[PERMISSION_OCR_DOCUMENT])
re_queue_document = Link(text=_('re-queue'), view='re_queue_document', args='object.id', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT])
re_queue_multiple_document = Link(text=_('re-queue'), view='re_queue_multiple_document', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT])
queue_document_delete = Link(text=_(u'delete'), view='queue_document_delete', args='object.id', sprite='hourglass_delete', permissions=[PERMISSION_OCR_DOCUMENT_DELETE])
queue_document_multiple_delete = Link(text=_(u'delete'), view='queue_document_multiple_delete', sprite='hourglass_delete', permissions=[PERMISSION_OCR_DOCUMENT_DELETE])
all_document_ocr_cleanup = Link(text=_(u'clean up pages content'), view='all_document_ocr_cleanup', sprite='text_strikethrough', permissions=[PERMISSION_OCR_CLEAN_ALL_PAGES], description=_(u'Runs a language filter to remove common OCR mistakes from document pages content.'))

View File

@@ -1,17 +0,0 @@
from __future__ import absolute_import
from django.utils.translation import ugettext as _
#from .models import DocumentQueue, QueueDocument
def get_statistics():
paragraphs = [
_(u'Document queues: %d') % DocumentQueue.objects.count(),
_(u'Queued documents: %d') % QueueDocument.objects.only('pk').count()
]
return {
'title': _(u'OCR statistics'),
'paragraphs': paragraphs
}

View File

@@ -8,10 +8,6 @@ urlpatterns = patterns('ocr.views',
url(r'^document/(?P<document_id>\d+)/submit/$', 'submit_document', (), 'submit_document'),
url(r'^document/multiple/submit/$', 'submit_document_multiple', (), 'submit_document_multiple'),
#url(r'^queue/document/(?P<queue_document_id>\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'),
#url(r'^queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'),
#url(r'^queue/document/(?P<queue_document_id>\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'),
#url(r'^queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'),
url(r'^document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'),
)

View File

@@ -26,49 +26,10 @@ from .api import clean_pages
from . import ocr_job_queue, ocr_job_type
def ocr_log(request):
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT])
context = {
'queue': OCRProcessingSingleton.get(),
'object_name': _(u'OCR processing'), # TODO fix, not working
'navigation_object_name': 'queue',
'object_list': [],
'title': _(u'OCR log items'),
#'hide_object': True,
#'hide_link': True,
'extra_columns': [
{'name': _(u'document'), 'attribute': encapsulate(lambda x: document_link(x.document_version.document) if hasattr(x, 'document_version') else _(u'Missing document.'))},
{'name': _(u'version'), 'attribute': 'document_version'},
{'name': _(u'thumbnail'), 'attribute': encapsulate(lambda x: document_thumbnail(x.document_version.document))},
{'name': _('submitted'), 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True},
#{'name': _('delay'), 'attribute': 'delay'},
#{'name': _('state'), 'attribute': encapsulate(lambda x: x.get_state_display())},
#{'name': _('node'), 'attribute': 'node_name'},
{'name': _('result'), 'attribute': 'result'},
],
'multi_select_as_buttons': True,
'sidebar_subtemplates_list': [
{
'name': 'generic_subtemplate.html',
'context': {
'side_bar': True,
'title': _(u'OCR processing properties'),
'content': _(u'Current state: %s') % OCRProcessingSingleton.get().get_state_display(),
}
}
]
}
return render_to_response('generic_list.html', context,
context_instance=RequestContext(request))
# 'queue': document_queue,
# 'object_name': _(u'document queue'),
# 'navigation_object_name': 'queue',
# 'list_object_variable_name': 'queue_document',
# },
#)
# {'name': _(u'document'), 'attribute': encapsulate(lambda x: document_link(x.document_version.document) if hasattr(x, 'document_version') else _(u'Missing document.'))},
# {'name': _(u'version'), 'attribute': 'document_version'},
# {'name': _(u'thumbnail'), 'attribute': encapsulate(lambda x: document_thumbnail(x.document_version.document))},
# {'name': _('submitted'), 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True},
def ocr_disable(request):
@@ -123,57 +84,6 @@ def ocr_enable(request):
}, context_instance=RequestContext(request))
def queue_document_delete(request, queue_document_id=None, queue_document_id_list=None):
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT_DELETE])
if queue_document_id:
queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id)]
elif queue_document_id_list:
queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id) for queue_document_id in queue_document_id_list.split(',')]
else:
messages.error(request, _(u'Must provide at least one queue document.'))
return HttpResponseRedirect(request.META.get('HTTP_REFERER', '/'))
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
if request.method == 'POST':
for queue_document in queue_documents:
try:
if queue_document.state == QUEUEDOCUMENT_STATE_PROCESSING:
messages.error(request, _(u'Document: %s is being processed and can\'t be deleted.') % queue_document)
else:
queue_document.delete()
messages.success(request, _(u'Queue document: %(document)s deleted successfully.') % {
'document': queue_document.document})
except Exception, e:
messages.error(request, _(u'Error deleting document: %(document)s; %(error)s') % {
'document': queue_document, 'error': e})
return HttpResponseRedirect(next)
context = {
'next': next,
'previous': previous,
'delete_view': True,
'form_icon': u'hourglass_delete.png',
}
if len(queue_documents) == 1:
context['object'] = queue_documents[0]
context['title'] = _(u'Are you sure you wish to delete queue document: %s?') % ', '.join([unicode(d) for d in queue_documents])
elif len(queue_documents) > 1:
context['title'] = _(u'Are you sure you wish to delete queue documents: %s?') % ', '.join([unicode(d) for d in queue_documents])
return render_to_response('generic_confirm.html', context,
context_instance=RequestContext(request))
def queue_document_multiple_delete(request):
return queue_document_delete(request, queue_document_id_list=request.GET.get('id_list', ''))
def submit_document_multiple(request):
for item_id in request.GET.get('id_list', '').split(','):
submit_document(request, item_id)
@@ -212,60 +122,6 @@ def submit_document_to_queue(request, document, post_submit_redirect=None):
return HttpResponseRedirect(post_submit_redirect)
def re_queue_document(request, queue_document_id=None, queue_document_id_list=None):
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT])
if queue_document_id:
queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id)]
elif queue_document_id_list:
queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id) for queue_document_id in queue_document_id_list.split(',')]
else:
messages.error(request, _(u'Must provide at least one queue document.'))
return HttpResponseRedirect(request.META.get('HTTP_REFERER', '/'))
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
if request.method == 'POST':
for queue_document in queue_documents:
try:
queue_document.requeue()
messages.success(
request,
_(u'Document: %(document)s was re-queued to the OCR queue: %(queue)s') % {
'document': queue_document.document_version.document,
'queue': queue_document.document_queue.label
}
)
except Document.DoesNotExist:
messages.error(request, _(u'Document no longer in queue.'))
except ReQueueError:
messages.warning(
request,
_(u'Document: %s is already being processed and can\'t be re-queded.') % queue_document
)
return HttpResponseRedirect(next)
context = {
'next': next,
'previous': previous,
'form_icon': u'hourglass_add.png',
}
if len(queue_documents) == 1:
context['object'] = queue_documents[0]
context['title'] = _(u'Are you sure you wish to re-queue document: %s?') % ', '.join([unicode(d) for d in queue_documents])
elif len(queue_documents) > 1:
context['title'] = _(u'Are you sure you wish to re-queue documents: %s?') % ', '.join([unicode(d) for d in queue_documents])
return render_to_response('generic_confirm.html', context,
context_instance=RequestContext(request))
def re_queue_multiple_document(request):
return re_queue_document(request, queue_document_id_list=request.GET.get('id_list', []))
def all_document_ocr_cleanup(request):
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_CLEAN_ALL_PAGES])