diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py index 78f3f297c2..c2f2358039 100644 --- a/apps/ocr/__init__.py +++ b/apps/ocr/__init__.py @@ -1,15 +1,14 @@ from __future__ import absolute_import import logging - + from django.db import transaction from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext from django.db.models.signals import post_save from django.dispatch import receiver -from navigation.api import register_links, register_top_menu, register_multi_item_links -from permissions.models import Permission, PermissionNamespace +from navigation.api import register_links, register_multi_item_links from documents.models import Document, DocumentVersion from main.api import register_maintenance_links from project_tools.api import register_tool @@ -18,7 +17,7 @@ from acls.api import class_permissions from scheduler.api import register_interval_job from .conf.settings import (AUTOMATIC_OCR, QUEUE_PROCESSING_INTERVAL) -from .models import DocumentQueue, QueueTransformation, QueueDocument +from .models import DocumentQueue, QueueTransformation from .tasks import task_process_document_queues from .permissions import (PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE, @@ -85,7 +84,7 @@ def document_post_save(sender, instance, **kwargs): # the OCR process completes which could take several minutes :/ #@receiver(post_save, dispatch_uid='call_queue', sender=QueueDocument) #def call_queue(sender, **kwargs): -# if kwargs.get('created', False): +# if kwargs.get('created', False): # logger.debug('got call_queue signal: %s' % kwargs) # task_process_document_queues() diff --git a/apps/ocr/conf/settings.py b/apps/ocr/conf/settings.py index ff2f7ca04d..31e2c908b8 100644 --- a/apps/ocr/conf/settings.py +++ b/apps/ocr/conf/settings.py @@ -14,7 +14,6 @@ register_settings( {'name': u'NODE_CONCURRENT_EXECUTION', 'global_name': u'OCR_NODE_CONCURRENT_EXECUTION', 'default': 1, 'description': _(u'Maximum amount of concurrent document OCRs a node can perform.')}, {'name': u'AUTOMATIC_OCR', 'global_name': u'OCR_AUTOMATIC_OCR', 'default': False, 'description': _(u'Automatically queue newly created documents for OCR.')}, {'name': u'QUEUE_PROCESSING_INTERVAL', 'global_name': u'OCR_QUEUE_PROCESSING_INTERVAL', 'default': 10}, - {'name': u'CACHE_URI', 'global_name': u'OCR_CACHE_URI', 'default': None, 'description': _(u'URI in the form: "memcached://127.0.0.1:11211/" to specify a cache backend to use for locking. Multiple hosts can be specified separated by a semicolon.')}, {'name': u'UNPAPER_PATH', 'global_name': u'OCR_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True}, ] ) diff --git a/apps/ocr/exceptions.py b/apps/ocr/exceptions.py index 0b6f4a129a..32ec4c4c07 100644 --- a/apps/ocr/exceptions.py +++ b/apps/ocr/exceptions.py @@ -1,21 +1,21 @@ class AlreadyQueued(Exception): - ''' + """ Raised when a trying to queue document already in the queue - ''' + """ pass class TesseractError(Exception): - ''' + """ Raised by tesseract - ''' + """ pass class UnpaperError(Exception): - ''' + """ Raised by unpaper - ''' + """ pass diff --git a/apps/ocr/parsers/__init__.py b/apps/ocr/parsers/__init__.py index 3d5a39635e..6a91d392d4 100644 --- a/apps/ocr/parsers/__init__.py +++ b/apps/ocr/parsers/__init__.py @@ -3,10 +3,9 @@ import logging from django.utils.translation import ugettext as _ -from converter import office_converter from converter import office_converter from converter.office_converter import OfficeConverter -from converter.exceptions import OfficeBackendError, OfficeConversionError +from converter.exceptions import OfficeConversionError from documents.utils import document_save_to_temp_dir from ocr.parsers.exceptions import ParserError, ParserUnknownFile @@ -27,7 +26,7 @@ def register_parser(function, mimetype=None, mimetypes=None): def pdf_parser(document_page, descriptor=None): if not descriptor: descriptor = document_page.document_version.open() - + pdf_pages = slate.PDF(descriptor) descriptor.close() @@ -45,7 +44,7 @@ def office_parser(document_page): office_converter = OfficeConverter() document_file = document_save_to_temp_dir(document_page.document, document_page.document.checksum) logger.debug('document_file: %s', document_file) - + office_converter.convert(document_file, mimetype=document_page.document.file_mimetype) if office_converter.exists: input_filepath = office_converter.output_filepath @@ -58,7 +57,7 @@ def office_parser(document_page): except OfficeConversionError, msg: print msg raise ParserError - + def parse_document_page(document_page): logger.debug('executing') diff --git a/apps/ocr/tasks.py b/apps/ocr/tasks.py index 14c0ce5152..f042c51460 100644 --- a/apps/ocr/tasks.py +++ b/apps/ocr/tasks.py @@ -2,8 +2,6 @@ from __future__ import absolute_import from datetime import timedelta, datetime import platform -from time import sleep -from random import random import logging from django.db.models import Q @@ -17,7 +15,7 @@ from .literals import (QUEUEDOCUMENT_STATE_PENDING, QUEUEDOCUMENT_STATE_ERROR) from .models import QueueDocument, DocumentQueue from .conf.settings import (NODE_CONCURRENT_EXECUTION, REPLICATION_DELAY, - CACHE_URI, QUEUE_PROCESSING_INTERVAL) + QUEUE_PROCESSING_INTERVAL) LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes # TODO: Tie LOCK_EXPIRATION with hard task timeout @@ -100,7 +98,7 @@ def task_process_document_queues(): #print 'DocumentQueueWatcher exception: %s' % e finally: # Don't process anymore from this queryset, might be stale - break; + break else: logger.debug('already processing maximun') else: diff --git a/docs/changelog.rst b/docs/changelog.rst index 572225bb14..4a37902165 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,6 @@ Version 0.12 ------------ +* Removal of the OCR_CACHE_URI configuration option * Upgrade commands: * ./manage.py syncdb diff --git a/docs/settings.rst b/docs/settings.rst index 8d9b0237be..d27a0bb0ac 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -270,14 +270,6 @@ OCR Default: ``10`` -.. data:: OCR_CACHE_URI - - Default: ``None`` - - URI in the form: ``"memcached://127.0.0.1:11211/"`` to specify a cache - backend to use for locking. Multiple hosts can be specified separated - by a semicolon. - .. data:: OCR_UNPAPER_PATH