Files
mayan-edms/mayan/apps/ocr/tasks.py
Roberto Rosario 8cf807899a Initial commit to support page mapping
Signed-off-by: Roberto Rosario <roberto.rosario@mayan-edms.com>
2019-10-08 18:45:53 -04:00

55 lines
1.9 KiB
Python

from __future__ import unicode_literals
import logging
from django.apps import apps
from django.db import OperationalError
from mayan.apps.lock_manager.exceptions import LockError
from mayan.apps.lock_manager.runtime import locking_backend
from mayan.celery import app
from .literals import DO_OCR_RETRY_DELAY, LOCK_EXPIRE
logger = logging.getLogger(__name__)
@app.task(bind=True, default_retry_delay=DO_OCR_RETRY_DELAY, ignore_result=True)
def task_do_ocr(self, document_version_pk):
DocumentVersion = apps.get_model(
app_label='documents', model_name='DocumentVersion'
)
DocumentVersionPageOCRContent = apps.get_model(
app_label='ocr', model_name='DocumentVersionPageOCRContent'
)
lock_id = 'task_do_ocr_doc_version-%d' % document_version_pk
try:
logger.debug('trying to acquire lock: %s', lock_id)
# Acquire lock to avoid doing OCR on the same document version more
# than once concurrently
lock = locking_backend.acquire_lock(name=lock_id, timeout=LOCK_EXPIRE)
logger.debug('acquired lock: %s', lock_id)
document_version = None
try:
document_version = DocumentVersion.objects.get(
pk=document_version_pk
)
logger.info(
'Starting document OCR for document version: %s',
document_version
)
DocumentVersionPageOCRContent.objects.process_document_version(
document_version=document_version
)
except OperationalError as exception:
logger.warning(
'OCR error for document version: %d; %s. Retrying.',
document_version_pk, exception
)
raise self.retry(exc=exception)
finally:
lock.release()
except LockError:
logger.debug('unable to obtain lock: %s' % lock_id)