Initial changes for the new queue based OCR processing

This commit is contained in:
Roberto Rosario
2012-07-29 05:33:04 -04:00
parent d97b3f344d
commit d2e6df4dde
14 changed files with 280 additions and 180 deletions

View File

@@ -17,47 +17,43 @@ from project_tools.api import register_tool
from acls.api import class_permissions from acls.api import class_permissions
from scheduler.api import register_interval_job from scheduler.api import register_interval_job
from statistics.api import register_statistics from statistics.api import register_statistics
from queue_manager.models import Queue
from .conf.settings import (AUTOMATIC_OCR, QUEUE_PROCESSING_INTERVAL) from .conf.settings import (AUTOMATIC_OCR, QUEUE_PROCESSING_INTERVAL)
from .models import DocumentQueue, QueueTransformation from .models import OCRProcessingSingleton
from .tasks import task_process_document_queues from .tasks import task_process_document_queues
from .permissions import PERMISSION_OCR_DOCUMENT from .permissions import PERMISSION_OCR_DOCUMENT
from .exceptions import AlreadyQueued from .exceptions import AlreadyQueued
from . import models as ocr_models from . import models as ocr_models
from .statistics import get_statistics from .statistics import get_statistics
from .literals import OCR_QUEUE_NAME
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from .links import (submit_document, re_queue_multiple_document, from .links import (submit_document, re_queue_multiple_document,
queue_document_multiple_delete, document_queue_disable, queue_document_multiple_delete, ocr_disable,
document_queue_enable, all_document_ocr_cleanup, queue_document_list, ocr_enable, all_document_ocr_cleanup, ocr_log,
ocr_tool_link, setup_queue_transformation_list, ocr_tool_link, submit_document_multiple)
setup_queue_transformation_create, setup_queue_transformation_edit,
setup_queue_transformation_delete, submit_document_multiple)
bind_links([Document], [submit_document]) bind_links([Document], [submit_document])
bind_links([DocumentQueue], [document_queue_disable, document_queue_enable, setup_queue_transformation_list]) bind_links([OCRProcessingSingleton], [ocr_disable, ocr_enable])
bind_links([QueueTransformation], [setup_queue_transformation_edit, setup_queue_transformation_delete]) #bind_links([QueueTransformation], [setup_queue_transformation_edit, setup_queue_transformation_delete])
register_multi_item_links(['queue_document_list'], [re_queue_multiple_document, queue_document_multiple_delete]) #register_multi_item_links(['queue_document_list'], [re_queue_multiple_document, queue_document_multiple_delete])
bind_links(['setup_queue_transformation_create', 'setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'document_queue_disable', 'document_queue_enable', 'queue_document_list', 'setup_queue_transformation_list'], [queue_document_list], menu_name='secondary_menu') #bind_links(['setup_queue_transformation_create', 'setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'document_queue_disable', 'document_queue_enable', 'queue_document_list', 'setup_queue_transformation_list'], [queue_document_list], menu_name='secondary_menu')
bind_links(['setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'setup_queue_transformation_list', 'setup_queue_transformation_create'], [setup_queue_transformation_create], menu_name='sidebar') #bind_links(['setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'setup_queue_transformation_list', 'setup_queue_transformation_create'], [setup_queue_transformation_create], menu_name='sidebar')
register_maintenance_links([all_document_ocr_cleanup], namespace='ocr', title=_(u'OCR')) register_maintenance_links([all_document_ocr_cleanup], namespace='ocr', title=_(u'OCR'))
register_multi_item_links(['folder_view', 'search', 'results', 'index_instance_node_view', 'document_find_duplicates', 'document_type_document_list', 'document_group_view', 'document_list', 'document_list_recent'], [submit_document_multiple]) #register_multi_item_links(['folder_view', 'search', 'results', 'index_instance_node_view', 'document_find_duplicates', 'document_type_document_list', 'document_group_view', 'document_list', 'document_list_recent'], [submit_document_multiple])
@transaction.commit_on_success @transaction.commit_on_success
def create_default_queue(): def create_ocr_queue():
try: try:
default_queue, created = DocumentQueue.objects.get_or_create(name='default') queue, created = Queue.objects.get_or_create(name=OCR_QUEUE_NAME, defaults={'label': _('OCR'), 'unique_names': True})
except DatabaseError: except DatabaseError:
transaction.rollback() transaction.rollback()
else:
if created:
default_queue.label = ugettext(u'Default')
default_queue.save()
@receiver(post_save, dispatch_uid='document_post_save', sender=DocumentVersion) @receiver(post_save, dispatch_uid='document_post_save', sender=DocumentVersion)
@@ -81,11 +77,10 @@ def document_post_save(sender, instance, **kwargs):
# task_process_document_queues() # task_process_document_queues()
@receiver(post_syncdb, dispatch_uid='create_default_queue', sender=ocr_models) #@receiver(post_syncdb, dispatch_uid='create_ocr_queue_on_syncdb', sender=ocr_models)
def create_default_queue_signal_handler(sender, **kwargs): #def create_ocr_queue_on_syncdb(sender, **kwargs):
create_default_queue()
register_interval_job('task_process_document_queues', _(u'Checks the OCR queue for pending documents.'), task_process_document_queues, seconds=QUEUE_PROCESSING_INTERVAL) #register_interval_job('task_process_document_queues', _(u'Checks the OCR queue for pending documents.'), task_process_document_queues, seconds=QUEUE_PROCESSING_INTERVAL)
register_tool(ocr_tool_link) register_tool(ocr_tool_link)
@@ -93,4 +88,5 @@ class_permissions(Document, [
PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT,
]) ])
register_statistics(get_statistics) #register_statistics(get_statistics)
create_ocr_queue()

View File

@@ -1,3 +1,4 @@
"""
from __future__ import absolute_import from __future__ import absolute_import
from django.contrib import admin from django.contrib import admin
@@ -18,3 +19,4 @@ class DocumentQueueAdmin(admin.ModelAdmin):
admin.site.register(DocumentQueue, DocumentQueueAdmin) admin.site.register(DocumentQueue, DocumentQueueAdmin)
"""

View File

@@ -87,7 +87,7 @@ def do_document_ocr(queue_document):
parser, if the parser fails or if there is no parser registered for parser, if the parser fails or if there is no parser registered for
the document mimetype do a visual OCR by calling tesseract the document mimetype do a visual OCR by calling tesseract
""" """
for document_page in queue_document.document.pages.all(): for document_page in queue_document.document_version.pages.all():
try: try:
# Try to extract text by means of a parser # Try to extract text by means of a parser
parse_document_page(document_page) parse_document_page(document_page)

View File

@@ -21,3 +21,11 @@ class UnpaperError(Exception):
class ReQueueError(Exception): class ReQueueError(Exception):
pass pass
class OCRProcessingAlreadyDisabled(Exception):
pass
class OCRProcessingAlreadyEnabled(Exception):
pass

View File

@@ -1,3 +1,4 @@
"""
from __future__ import absolute_import from __future__ import absolute_import
from django import forms from django import forms
@@ -19,3 +20,4 @@ class QueueTransformationForm_create(forms.ModelForm):
class Meta: class Meta:
model = QueueTransformation model = QueueTransformation
exclude = ('content_type', 'object_id') exclude = ('content_type', 'object_id')
"""

View File

@@ -7,7 +7,18 @@ from navigation.api import Link
from .permissions import (PERMISSION_OCR_DOCUMENT, from .permissions import (PERMISSION_OCR_DOCUMENT,
PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE, PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE,
PERMISSION_OCR_CLEAN_ALL_PAGES) PERMISSION_OCR_CLEAN_ALL_PAGES)
from .models import OCRProcessingSingleton
def is_enabled(context):
return OCRProcessingSingleton.get().is_enabled()
def is_disabled(context):
return not OCRProcessingSingleton.get().is_enabled()
ocr_log = Link(text=_(u'queue document list'), view='ocr_log', sprite='text', permissions=[PERMISSION_OCR_DOCUMENT])
ocr_disable = Link(text=_(u'disable OCR processing'), view='ocr_disable', sprite='control_stop_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE], conditional_disable=is_disabled)
ocr_enable = Link(text=_(u'enable OCR processing'), view='ocr_enable', sprite='control_play_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE], conditional_disable=is_enabled)
submit_document = Link(text=_('submit to OCR queue'), view='submit_document', args='object.id', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT]) submit_document = Link(text=_('submit to OCR queue'), view='submit_document', args='object.id', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT])
submit_document_multiple = Link(text=_('submit to OCR queue'), view='submit_document_multiple', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT]) submit_document_multiple = Link(text=_('submit to OCR queue'), view='submit_document_multiple', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT])
re_queue_document = Link(text=_('re-queue'), view='re_queue_document', args='object.id', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT]) re_queue_document = Link(text=_('re-queue'), view='re_queue_document', args='object.id', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT])
@@ -15,15 +26,12 @@ re_queue_multiple_document = Link(text=_('re-queue'), view='re_queue_multiple_do
queue_document_delete = Link(text=_(u'delete'), view='queue_document_delete', args='object.id', sprite='hourglass_delete', permissions=[PERMISSION_OCR_DOCUMENT_DELETE]) queue_document_delete = Link(text=_(u'delete'), view='queue_document_delete', args='object.id', sprite='hourglass_delete', permissions=[PERMISSION_OCR_DOCUMENT_DELETE])
queue_document_multiple_delete = Link(text=_(u'delete'), view='queue_document_multiple_delete', sprite='hourglass_delete', permissions=[PERMISSION_OCR_DOCUMENT_DELETE]) queue_document_multiple_delete = Link(text=_(u'delete'), view='queue_document_multiple_delete', sprite='hourglass_delete', permissions=[PERMISSION_OCR_DOCUMENT_DELETE])
document_queue_disable = Link(text=_(u'stop queue'), view='document_queue_disable', args='queue.id', sprite='control_stop_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE])
document_queue_enable = Link(text=_(u'activate queue'), view='document_queue_enable', args='queue.id', sprite='control_play_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE])
all_document_ocr_cleanup = Link(text=_(u'clean up pages content'), view='all_document_ocr_cleanup', sprite='text_strikethrough', permissions=[PERMISSION_OCR_CLEAN_ALL_PAGES], description=_(u'Runs a language filter to remove common OCR mistakes from document pages content.')) all_document_ocr_cleanup = Link(text=_(u'clean up pages content'), view='all_document_ocr_cleanup', sprite='text_strikethrough', permissions=[PERMISSION_OCR_CLEAN_ALL_PAGES], description=_(u'Runs a language filter to remove common OCR mistakes from document pages content.'))
queue_document_list = Link(text=_(u'queue document list'), view='queue_document_list', sprite='hourglass', permissions=[PERMISSION_OCR_DOCUMENT]) ocr_tool_link = Link(text=_(u'OCR'), view='ocr_log', sprite='hourglass', icon='text.png', permissions=[PERMISSION_OCR_DOCUMENT]) # children_view_regex=[r'queue_', r'document_queue'])
ocr_tool_link = Link(text=_(u'OCR'), view='queue_document_list', sprite='hourglass', icon='text.png', permissions=[PERMISSION_OCR_DOCUMENT], children_view_regex=[r'queue_', r'document_queue'])
setup_queue_transformation_list = Link(text=_(u'transformations'), view='setup_queue_transformation_list', args='queue.pk', sprite='shape_move_front') #setup_queue_transformation_list = Link(text=_(u'transformations'), view='setup_queue_transformation_list', args='queue.pk', sprite='shape_move_front')
setup_queue_transformation_create = Link(text=_(u'add transformation'), view='setup_queue_transformation_create', args='queue.pk', sprite='shape_square_add') #setup_queue_transformation_create = Link(text=_(u'add transformation'), view='setup_queue_transformation_create', args='queue.pk', sprite='shape_square_add')
setup_queue_transformation_edit = Link(text=_(u'edit'), view='setup_queue_transformation_edit', args='transformation.pk', sprite='shape_square_edit') #setup_queue_transformation_edit = Link(text=_(u'edit'), view='setup_queue_transformation_edit', args='transformation.pk', sprite='shape_square_edit')
setup_queue_transformation_delete = Link(text=_(u'delete'), view='setup_queue_transformation_delete', args='transformation.pk', sprite='shape_square_delete') #setup_queue_transformation_delete = Link(text=_(u'delete'), view='setup_queue_transformation_delete', args='transformation.pk', sprite='shape_square_delete')

View File

@@ -1,25 +1,27 @@
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
DOCUMENTQUEUE_STATE_STOPPED = 's' OCR_STATE_DISABLED = 'd'
DOCUMENTQUEUE_STATE_ACTIVE = 'a' OCR_STATE_ENABLED = 'e'
DOCUMENTQUEUE_STATE_CHOICES = ( OCR_STATE_CHOICES = (
(DOCUMENTQUEUE_STATE_STOPPED, _(u'stopped')), (OCR_STATE_DISABLED, _(u'disabled')),
(DOCUMENTQUEUE_STATE_ACTIVE, _(u'active')), (OCR_STATE_ENABLED, _(u'enabled')),
) )
QUEUEDOCUMENT_STATE_PENDING = 'p' #QUEUEDOCUMENT_STATE_PENDING = 'p'
QUEUEDOCUMENT_STATE_PROCESSING = 'i' #QUEUEDOCUMENT_STATE_PROCESSING = 'i'
QUEUEDOCUMENT_STATE_ERROR = 'e' #QUEUEDOCUMENT_STATE_ERROR = 'e'
QUEUEDOCUMENT_STATE_CHOICES = ( #QUEUEDOCUMENT_STATE_CHOICES = (
(QUEUEDOCUMENT_STATE_PENDING, _(u'pending')), # (QUEUEDOCUMENT_STATE_PENDING, _(u'pending')),
(QUEUEDOCUMENT_STATE_PROCESSING, _(u'processing')), # (QUEUEDOCUMENT_STATE_PROCESSING, _(u'processing')),
(QUEUEDOCUMENT_STATE_ERROR, _(u'error')), # (QUEUEDOCUMENT_STATE_ERROR, _(u'error')),
) #)
DEFAULT_OCR_FILE_FORMAT = u'tiff' DEFAULT_OCR_FILE_FORMAT = u'tiff'
DEFAULT_OCR_FILE_EXTENSION = u'tif' DEFAULT_OCR_FILE_EXTENSION = u'tif'
UNPAPER_FILE_FORMAT = u'ppm' UNPAPER_FILE_FORMAT = u'ppm'
OCR_QUEUE_NAME = 'ocr'

View File

@@ -2,19 +2,19 @@ from __future__ import absolute_import
from django.db import models from django.db import models
from .exceptions import AlreadyQueued #from .exceptions import AlreadyQueued
class DocumentQueueManager(models.Manager): class OCRProcessingManager(models.Manager):
''' """
Module manager class to handle adding documents to an OCR document Module manager class to handle adding documents to an OCR queue
queue """
''' def queue_document(self, document):
def queue_document(self, document, queue_name='default'): pass
document_queue = self.model.objects.get(name=queue_name) #document_queue = self.model.objects.get(name=queue_name)
if document_queue.queuedocument_set.filter(document=document): #if document_queue.queuedocument_set.filter(document_version=document.latest_version):
raise AlreadyQueued # raise AlreadyQueued
document_queue.queuedocument_set.create(document=document, delay=True) #document_queue.queuedocument_set.create(document_version=document.latest_version, delay=True)
return document_queue #return document_queue

View File

@@ -1,7 +1,7 @@
from __future__ import absolute_import from __future__ import absolute_import
from ast import literal_eval from ast import literal_eval
from datetime import datetime import datetime
from django.db import models from django.db import models
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
@@ -11,35 +11,109 @@ from django.contrib.contenttypes.models import ContentType
from django.contrib.contenttypes import generic from django.contrib.contenttypes import generic
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from documents.models import Document from common.models import Singleton
from documents.models import Document, DocumentVersion
from converter.api import get_available_transformations_choices from converter.api import get_available_transformations_choices
from sources.managers import SourceTransformationManager from sources.managers import SourceTransformationManager
from .literals import (DOCUMENTQUEUE_STATE_CHOICES, from .literals import (OCR_STATE_CHOICES, OCR_STATE_ENABLED,
QUEUEDOCUMENT_STATE_PENDING, QUEUEDOCUMENT_STATE_CHOICES, OCR_STATE_DISABLED)
QUEUEDOCUMENT_STATE_PROCESSING, DOCUMENTQUEUE_STATE_ACTIVE) from .managers import OCRProcessingManager
from .managers import DocumentQueueManager from .exceptions import (ReQueueError, OCRProcessingAlreadyDisabled,
from .exceptions import ReQueueError OCRProcessingAlreadyEnabled)
class DocumentQueue(models.Model): class OCRProcessingSingleton(Singleton):
name = models.CharField(max_length=64, unique=True, verbose_name=_(u'name'))
label = models.CharField(max_length=64, verbose_name=_(u'label'))
state = models.CharField(max_length=4, state = models.CharField(max_length=4,
choices=DOCUMENTQUEUE_STATE_CHOICES, choices=OCR_STATE_CHOICES,
default=DOCUMENTQUEUE_STATE_ACTIVE, default=OCR_STATE_ENABLED,
verbose_name=_(u'state')) verbose_name=_(u'state'))
objects = DocumentQueueManager() #objects = AnonymousUserSingletonManager()
class Meta:
verbose_name = _(u'document queue')
verbose_name_plural = _(u'document queues')
def __unicode__(self): def __unicode__(self):
return self.label return ugettext('OCR processing')
def disable(self):
if self.state == OCR_STATE_DISABLED:
raise OCRProcessingAlreadyDisabled
self.state = OCR_STATE_DISABLED
self.save()
def enable(self):
if self.state == OCR_STATE_ENABLED:
raise OCRProcessingAlreadyEnabled
self.state = OCR_STATE_ENABLED
self.save()
def is_enabled(self):
return self.state == OCR_STATE_ENABLED
class Meta:
verbose_name = verbose_name_plural = _(u'OCR processing properties')
"""
class OCRLog(models.Model):
#queue = models.ForeignKey(Queue, verbose_name=_(u'queue'))
document_version = models.ForeignKey(DocumentVersion, verbose_name=_(u'document version'))
datetime = models.DateTimeField(verbose_name=_(u'date time'), default=lambda: datetime.datetime.now(), db_index=True)
delay = models.BooleanField(verbose_name=_(u'delay OCR'), default=False)
#state = models.CharField(max_length=4,
# choices=QUEUEDOCUMENT_STATE_CHOICES,
# default=QUEUEDOCUMENT_STATE_PENDING,
# verbose_name=_(u'state'))
result = models.TextField(blank=True, null=True, verbose_name=_(u'result'))
#node_name = models.CharField(max_length=32, verbose_name=_(u'node name'), blank=True, null=True)
class Meta:
ordering = ('datetime',)
verbose_name = _(u'OCR log entry')
verbose_name_plural = _(u'OCR log entries')
#def get_transformation_list(self):
# return QueueTransformation.transformations.get_for_object_as_list(self)
def requeue(self):
pass
#if self.state == QUEUEDOCUMENT_STATE_PROCESSING:
# raise ReQueueError
#else:
# self.datetime_submitted = datetime.now()
# self.state = QUEUEDOCUMENT_STATE_PENDING
# self.delay = False
# self.result = None
# self.node_name = None
# self.save()
def __unicode__(self):
try:
return unicode(self.document)
except ObjectDoesNotExist:
return ugettext(u'Missing document.')
"""
#class DocumentQueue(models.Model):
# name = models.CharField(max_length=64, unique=True, verbose_name=_(u'name'))
# label = models.CharField(max_length=64, verbose_name=_(u'label'))
# state = models.CharField(max_length=4,
# choices=DOCUMENTQUEUE_STATE_CHOICES,
# default=DOCUMENTQUEUE_STATE_ACTIVE,
# verbose_name=_(u'state'))
#
# objects = DocumentQueueManager()#
#
# class Meta:
# verbose_name = _(u'document queue')
# verbose_name_plural = _(u'document queues')#
#
# def __unicode__(self):
# return self.label
"""
class QueueDocument(models.Model): class QueueDocument(models.Model):
document_queue = models.ForeignKey(DocumentQueue, verbose_name=_(u'document queue')) document_queue = models.ForeignKey(DocumentQueue, verbose_name=_(u'document queue'))
document = models.ForeignKey(Document, verbose_name=_(u'document')) document = models.ForeignKey(Document, verbose_name=_(u'document'))
@@ -121,3 +195,4 @@ class QueueTransformation(models.Model):
ordering = ('order',) ordering = ('order',)
verbose_name = _(u'document queue transformation') verbose_name = _(u'document queue transformation')
verbose_name_plural = _(u'document queue transformations') verbose_name_plural = _(u'document queue transformations')
"""

View File

@@ -7,6 +7,6 @@ from permissions.models import Permission, PermissionNamespace
ocr_namespace = PermissionNamespace('ocr', _(u'OCR')) ocr_namespace = PermissionNamespace('ocr', _(u'OCR'))
PERMISSION_OCR_DOCUMENT = Permission.objects.register(ocr_namespace, 'ocr_document', _(u'Submit documents for OCR')) PERMISSION_OCR_DOCUMENT = Permission.objects.register(ocr_namespace, 'ocr_document', _(u'Submit documents for OCR'))
PERMISSION_OCR_DOCUMENT_DELETE = Permission.objects.register(ocr_namespace, 'ocr_document_delete', _(u'Delete documents from OCR queue')) PERMISSION_OCR_DOCUMENT_DELETE = Permission.objects.register(ocr_namespace, 'ocr_document_delete', _(u'Delete documents from OCR queue'))
PERMISSION_OCR_QUEUE_ENABLE_DISABLE = Permission.objects.register(ocr_namespace, 'ocr_queue_enable_disable', _(u'Can enable/disable the OCR queue')) PERMISSION_OCR_QUEUE_ENABLE_DISABLE = Permission.objects.register(ocr_namespace, 'ocr_queue_enable_disable', _(u'Can enable/disable the OCR processing'))
PERMISSION_OCR_CLEAN_ALL_PAGES = Permission.objects.register(ocr_namespace, 'ocr_clean_all_pages', _(u'Can execute the OCR clean up on all document pages')) PERMISSION_OCR_CLEAN_ALL_PAGES = Permission.objects.register(ocr_namespace, 'ocr_clean_all_pages', _(u'Can execute the OCR clean up on all document pages'))
PERMISSION_OCR_QUEUE_EDIT = Permission.objects.register(ocr_namespace, 'ocr_queue_edit', _(u'Can edit an OCR queue properties')) PERMISSION_OCR_QUEUE_EDIT = Permission.objects.register(ocr_namespace, 'ocr_queue_edit', _(u'Can edit an OCR queue properties'))

View File

@@ -2,7 +2,7 @@ from __future__ import absolute_import
from django.utils.translation import ugettext as _ from django.utils.translation import ugettext as _
from .models import DocumentQueue, QueueDocument #from .models import DocumentQueue, QueueDocument
def get_statistics(): def get_statistics():

View File

@@ -10,10 +10,10 @@ from job_processor.api import process_job
from lock_manager import Lock, LockError from lock_manager import Lock, LockError
from .api import do_document_ocr from .api import do_document_ocr
from .literals import (QUEUEDOCUMENT_STATE_PENDING, #from .literals import (QUEUEDOCUMENT_STATE_PENDING,
QUEUEDOCUMENT_STATE_PROCESSING, DOCUMENTQUEUE_STATE_ACTIVE, # QUEUEDOCUMENT_STATE_PROCESSING, DOCUMENTQUEUE_STATE_ACTIVE,
QUEUEDOCUMENT_STATE_ERROR) # QUEUEDOCUMENT_STATE_ERROR)
from .models import QueueDocument, DocumentQueue #from .models import QueueDocument, DocumentQueue
from .conf.settings import NODE_CONCURRENT_EXECUTION, REPLICATION_DELAY from .conf.settings import NODE_CONCURRENT_EXECUTION, REPLICATION_DELAY
LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes

View File

@@ -1,16 +1,18 @@
from django.conf.urls.defaults import patterns, url from django.conf.urls.defaults import patterns, url
urlpatterns = patterns('ocr.views', urlpatterns = patterns('ocr.views',
url(r'^log/$', 'ocr_log', (), 'ocr_log'),
url(r'^processing/enable/$', 'ocr_enable', (), 'ocr_enable'),
url(r'^processing/disable/$', 'ocr_disable', (), 'ocr_disable'),
url(r'^document/(?P<document_id>\d+)/submit/$', 'submit_document', (), 'submit_document'), url(r'^document/(?P<document_id>\d+)/submit/$', 'submit_document', (), 'submit_document'),
url(r'^document/multiple/submit/$', 'submit_document_multiple', (), 'submit_document_multiple'), url(r'^document/multiple/submit/$', 'submit_document_multiple', (), 'submit_document_multiple'),
url(r'^queue/document/list/$', 'queue_document_list', (), 'queue_document_list'),
url(r'^queue/document/(?P<queue_document_id>\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'), url(r'^queue/document/(?P<queue_document_id>\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'),
url(r'^queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'), url(r'^queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'),
url(r'^queue/document/(?P<queue_document_id>\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'), url(r'^queue/document/(?P<queue_document_id>\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'),
url(r'^queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'), url(r'^queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'),
url(r'^queue/(?P<document_queue_id>\d+)/enable/$', 'document_queue_enable', (), 'document_queue_enable'),
url(r'^queue/(?P<document_queue_id>\d+)/disable/$', 'document_queue_disable', (), 'document_queue_disable'),
url(r'^document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'), url(r'^document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'),

View File

@@ -18,52 +18,111 @@ from acls.models import AccessEntry
from .permissions import (PERMISSION_OCR_DOCUMENT, from .permissions import (PERMISSION_OCR_DOCUMENT,
PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE, PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE,
PERMISSION_OCR_CLEAN_ALL_PAGES, PERMISSION_OCR_QUEUE_EDIT) PERMISSION_OCR_CLEAN_ALL_PAGES, PERMISSION_OCR_QUEUE_EDIT)
from .models import DocumentQueue, QueueDocument, QueueTransformation from .models import OCRProcessingSingleton
from .literals import (QUEUEDOCUMENT_STATE_PROCESSING, #from .literals import (QUEUEDOCUMENT_STATE_PROCESSING,
DOCUMENTQUEUE_STATE_ACTIVE, DOCUMENTQUEUE_STATE_STOPPED) # DOCUMENTQUEUE_STATE_ACTIVE, DOCUMENTQUEUE_STATE_STOPPED)
from .exceptions import AlreadyQueued, ReQueueError from .exceptions import (AlreadyQueued, ReQueueError, OCRProcessingAlreadyDisabled,
OCRProcessingAlreadyEnabled)
from .api import clean_pages from .api import clean_pages
from .forms import QueueTransformationForm, QueueTransformationForm_create #from .forms import QueueTransformationForm, QueueTransformationForm_create
def queue_document_list(request, queue_name='default'): def ocr_log(request):
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT]) Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT])
document_queue = get_object_or_404(DocumentQueue, name=queue_name) context = {
'queue': OCRProcessingSingleton.get(),
return object_list( 'object_name': _(u'OCR processing'), # TODO fix, not working
request, 'navigation_object_name': 'queue',
queryset=document_queue.queuedocument_set.all(), 'object_list': [],
template_name='generic_list.html', 'title': _(u'OCR log items'),
extra_context={ #'hide_object': True,
'title': _(u'documents in queue: %s') % document_queue, #'hide_link': True,
'hide_object': True, 'extra_columns': [
'queue': document_queue, {'name': _(u'document'), 'attribute': encapsulate(lambda x: document_link(x.document_version.document) if hasattr(x, 'document_version') else _(u'Missing document.'))},
'object_name': _(u'document queue'), {'name': _(u'version'), 'attribute': 'document_version'},
'navigation_object_name': 'queue', {'name': _(u'thumbnail'), 'attribute': encapsulate(lambda x: document_thumbnail(x.document_version.document))},
'list_object_variable_name': 'queue_document', {'name': _('submitted'), 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True},
'extra_columns': [ #{'name': _('delay'), 'attribute': 'delay'},
{'name': 'document', 'attribute': encapsulate(lambda x: document_link(x.document) if hasattr(x, 'document') else _(u'Missing document.'))}, #{'name': _('state'), 'attribute': encapsulate(lambda x: x.get_state_display())},
{'name': _(u'thumbnail'), 'attribute': encapsulate(lambda x: document_thumbnail(x.document))}, #{'name': _('node'), 'attribute': 'node_name'},
{'name': 'submitted', 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True}, {'name': _('result'), 'attribute': 'result'},
{'name': 'delay', 'attribute': 'delay'}, ],
{'name': 'state', 'attribute': encapsulate(lambda x: x.get_state_display())}, 'multi_select_as_buttons': True,
{'name': 'node', 'attribute': 'node_name'}, 'sidebar_subtemplates_list': [
{'name': 'result', 'attribute': 'result'}, {
], 'name': 'generic_subtemplate.html',
'multi_select_as_buttons': True, 'context': {
'sidebar_subtemplates_list': [ 'side_bar': True,
{ 'title': _(u'OCR processing properties'),
'name': 'generic_subtemplate.html', 'content': _(u'Current state: %s') % OCRProcessingSingleton.get().get_state_display(),
'context': {
'side_bar': True,
'title': _(u'document queue properties'),
'content': _(u'Current state: %s') % document_queue.get_state_display(),
}
} }
] }
}, ]
) }
return render_to_response('generic_list.html', context,
context_instance=RequestContext(request))
# 'queue': document_queue,
# 'object_name': _(u'document queue'),
# 'navigation_object_name': 'queue',
# 'list_object_variable_name': 'queue_document',
# },
#)
def ocr_disable(request):
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_ENABLE_DISABLE])
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
if request.method == 'POST':
try:
OCRProcessingSingleton.get().disable()
except OCRProcessingAlreadyDisabled:
messages.warning(request, _(u'OCR processing already disabled.'))
return HttpResponseRedirect(previous)
else:
messages.success(request, _(u'OCR processing disabled successfully.'))
return HttpResponseRedirect(next)
return render_to_response('generic_confirm.html', {
'queue': OCRProcessingSingleton.get(),
'navigation_object_name': 'queue',
'title': _(u'Are you sure you wish to disable OCR processing?'),
'next': next,
'previous': previous,
'form_icon': u'control_stop_blue.png',
}, context_instance=RequestContext(request))
def ocr_enable(request):
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_ENABLE_DISABLE])
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
if request.method == 'POST':
try:
OCRProcessingSingleton.get().enable()
except OCRProcessingAlreadyDisabled:
messages.warning(request, _(u'OCR processing already enabled.'))
return HttpResponseRedirect(previous)
else:
messages.success(request, _(u'OCR processing enabled successfully.'))
return HttpResponseRedirect(next)
return render_to_response('generic_confirm.html', {
'queue': OCRProcessingSingleton.get(),
'navigation_object_name': 'queue',
'title': _(u'Are you sure you wish to enable OCR processing?'),
'next': next,
'previous': previous,
'form_icon': u'control_play_blue.png',
}, context_instance=RequestContext(request))
def queue_document_delete(request, queue_document_id=None, queue_document_id_list=None): def queue_document_delete(request, queue_document_id=None, queue_document_id_list=None):
@@ -175,12 +234,12 @@ def re_queue_document(request, queue_document_id=None, queue_document_id_list=No
messages.success( messages.success(
request, request,
_(u'Document: %(document)s was re-queued to the OCR queue: %(queue)s') % { _(u'Document: %(document)s was re-queued to the OCR queue: %(queue)s') % {
'document': queue_document.document, 'document': queue_document.document_version.document,
'queue': queue_document.document_queue.label 'queue': queue_document.document_queue.label
} }
) )
except Document.DoesNotExist: except Document.DoesNotExist:
messages.error(request, _(u'Document id#: %d, no longer exists.') % queue_document.document_id) messages.error(request, _(u'Document no longer in queue.'))
except ReQueueError: except ReQueueError:
messages.warning( messages.warning(
request, request,
@@ -208,60 +267,6 @@ def re_queue_multiple_document(request):
return re_queue_document(request, queue_document_id_list=request.GET.get('id_list', [])) return re_queue_document(request, queue_document_id_list=request.GET.get('id_list', []))
def document_queue_disable(request, document_queue_id):
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_ENABLE_DISABLE])
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id)
if document_queue.state == DOCUMENTQUEUE_STATE_STOPPED:
messages.warning(request, _(u'Document queue: %s, already stopped.') % document_queue)
return HttpResponseRedirect(previous)
if request.method == 'POST':
document_queue.state = DOCUMENTQUEUE_STATE_STOPPED
document_queue.save()
messages.success(request, _(u'Document queue: %s, stopped successfully.') % document_queue)
return HttpResponseRedirect(next)
return render_to_response('generic_confirm.html', {
'queue': document_queue,
'navigation_object_name': 'queue',
'title': _(u'Are you sure you wish to disable document queue: %s') % document_queue,
'next': next,
'previous': previous,
'form_icon': u'control_stop_blue.png',
}, context_instance=RequestContext(request))
def document_queue_enable(request, document_queue_id):
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_ENABLE_DISABLE])
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id)
if document_queue.state == DOCUMENTQUEUE_STATE_ACTIVE:
messages.warning(request, _(u'Document queue: %s, already active.') % document_queue)
return HttpResponseRedirect(previous)
if request.method == 'POST':
document_queue.state = DOCUMENTQUEUE_STATE_ACTIVE
document_queue.save()
messages.success(request, _(u'Document queue: %s, activated successfully.') % document_queue)
return HttpResponseRedirect(next)
return render_to_response('generic_confirm.html', {
'queue': document_queue,
'navigation_object_name': 'queue',
'title': _(u'Are you sure you wish to activate document queue: %s') % document_queue,
'next': next,
'previous': previous,
'form_icon': u'control_play_blue.png',
}, context_instance=RequestContext(request))
def all_document_ocr_cleanup(request): def all_document_ocr_cleanup(request):
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_CLEAN_ALL_PAGES]) Permission.objects.check_permissions(request.user, [PERMISSION_OCR_CLEAN_ALL_PAGES])