55 lines
1.9 KiB
Python
55 lines
1.9 KiB
Python
from __future__ import unicode_literals
|
|
|
|
import logging
|
|
|
|
from django.apps import apps
|
|
from django.db import OperationalError
|
|
|
|
from mayan.apps.lock_manager.exceptions import LockError
|
|
from mayan.apps.lock_manager.runtime import locking_backend
|
|
from mayan.celery import app
|
|
|
|
from .literals import DO_OCR_RETRY_DELAY, LOCK_EXPIRE
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@app.task(bind=True, default_retry_delay=DO_OCR_RETRY_DELAY, ignore_result=True)
|
|
def task_do_ocr(self, document_version_pk):
|
|
DocumentVersion = apps.get_model(
|
|
app_label='documents', model_name='DocumentVersion'
|
|
)
|
|
DocumentVersionPageOCRContent = apps.get_model(
|
|
app_label='ocr', model_name='DocumentVersionPageOCRContent'
|
|
)
|
|
|
|
lock_id = 'task_do_ocr_doc_version-%d' % document_version_pk
|
|
try:
|
|
logger.debug('trying to acquire lock: %s', lock_id)
|
|
# Acquire lock to avoid doing OCR on the same document version more
|
|
# than once concurrently
|
|
lock = locking_backend.acquire_lock(name=lock_id, timeout=LOCK_EXPIRE)
|
|
logger.debug('acquired lock: %s', lock_id)
|
|
document_version = None
|
|
try:
|
|
document_version = DocumentVersion.objects.get(
|
|
pk=document_version_pk
|
|
)
|
|
logger.info(
|
|
'Starting document OCR for document version: %s',
|
|
document_version
|
|
)
|
|
DocumentVersionPageOCRContent.objects.process_document_version(
|
|
document_version=document_version
|
|
)
|
|
except OperationalError as exception:
|
|
logger.warning(
|
|
'OCR error for document version: %d; %s. Retrying.',
|
|
document_version_pk, exception
|
|
)
|
|
raise self.retry(exc=exception)
|
|
finally:
|
|
lock.release()
|
|
except LockError:
|
|
logger.debug('unable to obtain lock: %s' % lock_id)
|