diff --git a/apps/main/views.py b/apps/main/views.py index 60df537da8..d79d4d4193 100644 --- a/apps/main/views.py +++ b/apps/main/views.py @@ -78,9 +78,9 @@ def check_settings(request): # OCR {'name':'OCR_TESSERACT_PATH', 'value':ocr_settings.TESSERACT_PATH, 'exists':True}, {'name':'OCR_TESSERACT_LANGUAGE', 'value':ocr_settings.TESSERACT_LANGUAGE}, - {'name':'OCR_MAX_CONCURRENT_EXECUTION', 'value':ocr_settings.MAX_CONCURRENT_EXECUTION}, - - + {'name':'OCR_NODE_CONCURRENT_EXECUTION', 'value':ocr_settings.NODE_CONCURRENT_EXECUTION}, + {'name':'OCR_REPLICATION_DELAY', 'value':ocr_settings.REPLICATION_DELAY}, + # Search {'name':'SEARCH_LIMIT', 'value':search_settings.LIMIT}, ] diff --git a/apps/ocr/conf/settings.py b/apps/ocr/conf/settings.py index f9bbf52337..4f6c00ccba 100644 --- a/apps/ocr/conf/settings.py +++ b/apps/ocr/conf/settings.py @@ -2,5 +2,5 @@ from django.conf import settings TESSERACT_PATH = getattr(settings, 'OCR_TESSERACT_PATH', u'/usr/bin/tesseract') TESSERACT_LANGUAGE = getattr(settings, 'OCR_TESSERACT_LANGUAGE', None) -MAX_CONCURRENT_EXECUTION = getattr(settings, 'OCR_MAX_CONCURRENT_EXECUTION', 2) REPLICATION_DELAY = getattr(settings, 'OCR_REPLICATION_DELAY', 10) #In seconds +NODE_CONCURRENT_EXECUTION = getattr(settings, 'OCR_NODE_CONCURRENT_EXECUTION', 1) diff --git a/apps/ocr/models.py b/apps/ocr/models.py index 1b8276bdb4..20b891134a 100644 --- a/apps/ocr/models.py +++ b/apps/ocr/models.py @@ -51,6 +51,7 @@ class QueueDocument(models.Model): default=QUEUEDOCUMENT_STATE_PENDING, verbose_name=_(u'state')) result = models.TextField(blank=True, null=True, verbose_name=_(u'result')) + node_name = models.CharField(max_length=32, verbose_name=_(u'node name'), blank=True, null=True) class Meta: ordering = ('datetime_submitted',) diff --git a/apps/ocr/tasks.py b/apps/ocr/tasks.py index 1da083c154..f739d77dcb 100644 --- a/apps/ocr/tasks.py +++ b/apps/ocr/tasks.py @@ -1,4 +1,5 @@ from datetime import date, timedelta, datetime +import platform from django.db.models import Q @@ -12,7 +13,7 @@ from literals import QUEUEDOCUMENT_STATE_PENDING, \ QUEUEDOCUMENT_STATE_PROCESSING, DOCUMENTQUEUE_STATE_ACTIVE, \ QUEUEDOCUMENT_STATE_ERROR from models import QueueDocument, DocumentQueue -from ocr.conf.settings import MAX_CONCURRENT_EXECUTION +from ocr.conf.settings import NODE_CONCURRENT_EXECUTION from ocr.conf.settings import REPLICATION_DELAY @@ -20,6 +21,7 @@ from ocr.conf.settings import REPLICATION_DELAY def task_process_queue_document(queue_document_id): queue_document = QueueDocument.objects.get(id=queue_document_id) queue_document.state = QUEUEDOCUMENT_STATE_PROCESSING + queue_document.node_name = platform.node() queue_document.save() try: do_document_ocr(queue_document.document) @@ -42,8 +44,9 @@ class DocumentQueueWatcher(PeriodicTask): q_delay_interval = Q(datetime_submitted__lt=datetime.now()-timedelta(seconds=REPLICATION_DELAY)) for document_queue in DocumentQueue.objects.filter(state=DOCUMENTQUEUE_STATE_ACTIVE): logger.debug('Analysing queue: %s' % document_queue) - current_running_queues = QueueDocument.objects.filter(state=QUEUEDOCUMENT_STATE_PROCESSING).count() - if current_running_queues < MAX_CONCURRENT_EXECUTION: + if QueueDocument.objects.filter( + state=QUEUEDOCUMENT_STATE_PROCESSING).filter( + node_name=platform.node()).count() < NODE_CONCURRENT_EXECUTION: try: oldest_queued_document_qs = document_queue.queuedocument_set.filter( (q_pending & ~q_delayed) | (q_pending & q_delayed & q_delay_interval)) diff --git a/apps/ocr/views.py b/apps/ocr/views.py index da8690a53c..e572597ac7 100644 --- a/apps/ocr/views.py +++ b/apps/ocr/views.py @@ -51,6 +51,7 @@ def queue_document_list(request, queue_name='default'): {'name':'submitted', 'attribute': lambda x: unicode(x.datetime_submitted).split('.')[0], 'keep_together':True}, {'name':'delay', 'attribute':'delay'}, {'name':'state', 'attribute': lambda x: x.get_state_display()}, + {'name':'node', 'attribute':'node_name'}, {'name':'result', 'attribute':'result'}, ], 'multi_select_as_buttons':True, diff --git a/settings.py b/settings.py index a41a0b2ca9..b634666df8 100644 --- a/settings.py +++ b/settings.py @@ -243,7 +243,7 @@ LOGIN_EXEMPT_URLS = ( # OCR #OCR_TESSERACT_PATH = u'/usr/bin/tesseract' -#OCR_MAX_CONCURRENT_EXECUTION = 2 +#OCR_NODE_CONCURRENT_EXECUTION = 1 #OCR_TESSERACT_LANGUAGE = None #OCR_REPLICATION_DELAY = 10