PEP8 cleanups, remove OCR_CACHE_URI

This commit is contained in:
Roberto Rosario
2012-01-18 13:53:02 -04:00
parent 8a5d0425b6
commit f9a3c4611b
7 changed files with 17 additions and 29 deletions

View File

@@ -1,15 +1,14 @@
from __future__ import absolute_import from __future__ import absolute_import
import logging import logging
from django.db import transaction from django.db import transaction
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from django.utils.translation import ugettext from django.utils.translation import ugettext
from django.db.models.signals import post_save from django.db.models.signals import post_save
from django.dispatch import receiver from django.dispatch import receiver
from navigation.api import register_links, register_top_menu, register_multi_item_links from navigation.api import register_links, register_multi_item_links
from permissions.models import Permission, PermissionNamespace
from documents.models import Document, DocumentVersion from documents.models import Document, DocumentVersion
from main.api import register_maintenance_links from main.api import register_maintenance_links
from project_tools.api import register_tool from project_tools.api import register_tool
@@ -18,7 +17,7 @@ from acls.api import class_permissions
from scheduler.api import register_interval_job from scheduler.api import register_interval_job
from .conf.settings import (AUTOMATIC_OCR, QUEUE_PROCESSING_INTERVAL) from .conf.settings import (AUTOMATIC_OCR, QUEUE_PROCESSING_INTERVAL)
from .models import DocumentQueue, QueueTransformation, QueueDocument from .models import DocumentQueue, QueueTransformation
from .tasks import task_process_document_queues from .tasks import task_process_document_queues
from .permissions import (PERMISSION_OCR_DOCUMENT, from .permissions import (PERMISSION_OCR_DOCUMENT,
PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE, PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE,
@@ -85,7 +84,7 @@ def document_post_save(sender, instance, **kwargs):
# the OCR process completes which could take several minutes :/ # the OCR process completes which could take several minutes :/
#@receiver(post_save, dispatch_uid='call_queue', sender=QueueDocument) #@receiver(post_save, dispatch_uid='call_queue', sender=QueueDocument)
#def call_queue(sender, **kwargs): #def call_queue(sender, **kwargs):
# if kwargs.get('created', False): # if kwargs.get('created', False):
# logger.debug('got call_queue signal: %s' % kwargs) # logger.debug('got call_queue signal: %s' % kwargs)
# task_process_document_queues() # task_process_document_queues()

View File

@@ -14,7 +14,6 @@ register_settings(
{'name': u'NODE_CONCURRENT_EXECUTION', 'global_name': u'OCR_NODE_CONCURRENT_EXECUTION', 'default': 1, 'description': _(u'Maximum amount of concurrent document OCRs a node can perform.')}, {'name': u'NODE_CONCURRENT_EXECUTION', 'global_name': u'OCR_NODE_CONCURRENT_EXECUTION', 'default': 1, 'description': _(u'Maximum amount of concurrent document OCRs a node can perform.')},
{'name': u'AUTOMATIC_OCR', 'global_name': u'OCR_AUTOMATIC_OCR', 'default': False, 'description': _(u'Automatically queue newly created documents for OCR.')}, {'name': u'AUTOMATIC_OCR', 'global_name': u'OCR_AUTOMATIC_OCR', 'default': False, 'description': _(u'Automatically queue newly created documents for OCR.')},
{'name': u'QUEUE_PROCESSING_INTERVAL', 'global_name': u'OCR_QUEUE_PROCESSING_INTERVAL', 'default': 10}, {'name': u'QUEUE_PROCESSING_INTERVAL', 'global_name': u'OCR_QUEUE_PROCESSING_INTERVAL', 'default': 10},
{'name': u'CACHE_URI', 'global_name': u'OCR_CACHE_URI', 'default': None, 'description': _(u'URI in the form: "memcached://127.0.0.1:11211/" to specify a cache backend to use for locking. Multiple hosts can be specified separated by a semicolon.')},
{'name': u'UNPAPER_PATH', 'global_name': u'OCR_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True}, {'name': u'UNPAPER_PATH', 'global_name': u'OCR_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True},
] ]
) )

View File

@@ -1,21 +1,21 @@
class AlreadyQueued(Exception): class AlreadyQueued(Exception):
''' """
Raised when a trying to queue document already in the queue Raised when a trying to queue document already in the queue
''' """
pass pass
class TesseractError(Exception): class TesseractError(Exception):
''' """
Raised by tesseract Raised by tesseract
''' """
pass pass
class UnpaperError(Exception): class UnpaperError(Exception):
''' """
Raised by unpaper Raised by unpaper
''' """
pass pass

View File

@@ -3,10 +3,9 @@ import logging
from django.utils.translation import ugettext as _ from django.utils.translation import ugettext as _
from converter import office_converter
from converter import office_converter from converter import office_converter
from converter.office_converter import OfficeConverter from converter.office_converter import OfficeConverter
from converter.exceptions import OfficeBackendError, OfficeConversionError from converter.exceptions import OfficeConversionError
from documents.utils import document_save_to_temp_dir from documents.utils import document_save_to_temp_dir
from ocr.parsers.exceptions import ParserError, ParserUnknownFile from ocr.parsers.exceptions import ParserError, ParserUnknownFile
@@ -27,7 +26,7 @@ def register_parser(function, mimetype=None, mimetypes=None):
def pdf_parser(document_page, descriptor=None): def pdf_parser(document_page, descriptor=None):
if not descriptor: if not descriptor:
descriptor = document_page.document_version.open() descriptor = document_page.document_version.open()
pdf_pages = slate.PDF(descriptor) pdf_pages = slate.PDF(descriptor)
descriptor.close() descriptor.close()
@@ -45,7 +44,7 @@ def office_parser(document_page):
office_converter = OfficeConverter() office_converter = OfficeConverter()
document_file = document_save_to_temp_dir(document_page.document, document_page.document.checksum) document_file = document_save_to_temp_dir(document_page.document, document_page.document.checksum)
logger.debug('document_file: %s', document_file) logger.debug('document_file: %s', document_file)
office_converter.convert(document_file, mimetype=document_page.document.file_mimetype) office_converter.convert(document_file, mimetype=document_page.document.file_mimetype)
if office_converter.exists: if office_converter.exists:
input_filepath = office_converter.output_filepath input_filepath = office_converter.output_filepath
@@ -58,7 +57,7 @@ def office_parser(document_page):
except OfficeConversionError, msg: except OfficeConversionError, msg:
print msg print msg
raise ParserError raise ParserError
def parse_document_page(document_page): def parse_document_page(document_page):
logger.debug('executing') logger.debug('executing')

View File

@@ -2,8 +2,6 @@ from __future__ import absolute_import
from datetime import timedelta, datetime from datetime import timedelta, datetime
import platform import platform
from time import sleep
from random import random
import logging import logging
from django.db.models import Q from django.db.models import Q
@@ -17,7 +15,7 @@ from .literals import (QUEUEDOCUMENT_STATE_PENDING,
QUEUEDOCUMENT_STATE_ERROR) QUEUEDOCUMENT_STATE_ERROR)
from .models import QueueDocument, DocumentQueue from .models import QueueDocument, DocumentQueue
from .conf.settings import (NODE_CONCURRENT_EXECUTION, REPLICATION_DELAY, from .conf.settings import (NODE_CONCURRENT_EXECUTION, REPLICATION_DELAY,
CACHE_URI, QUEUE_PROCESSING_INTERVAL) QUEUE_PROCESSING_INTERVAL)
LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes
# TODO: Tie LOCK_EXPIRATION with hard task timeout # TODO: Tie LOCK_EXPIRATION with hard task timeout
@@ -100,7 +98,7 @@ def task_process_document_queues():
#print 'DocumentQueueWatcher exception: %s' % e #print 'DocumentQueueWatcher exception: %s' % e
finally: finally:
# Don't process anymore from this queryset, might be stale # Don't process anymore from this queryset, might be stale
break; break
else: else:
logger.debug('already processing maximun') logger.debug('already processing maximun')
else: else:

View File

@@ -1,5 +1,6 @@
Version 0.12 Version 0.12
------------ ------------
* Removal of the OCR_CACHE_URI configuration option
* Upgrade commands: * Upgrade commands:
* ./manage.py syncdb * ./manage.py syncdb

View File

@@ -270,14 +270,6 @@ OCR
Default: ``10`` Default: ``10``
.. data:: OCR_CACHE_URI
Default: ``None``
URI in the form: ``"memcached://127.0.0.1:11211/"`` to specify a cache
backend to use for locking. Multiple hosts can be specified separated
by a semicolon.
.. data:: OCR_UNPAPER_PATH .. data:: OCR_UNPAPER_PATH