from __future__ import unicode_literals import logging from django.apps import apps from django.db import OperationalError from mayan.apps.lock_manager.exceptions import LockError from mayan.apps.lock_manager.runtime import locking_backend from mayan.celery import app from .literals import DO_OCR_RETRY_DELAY, LOCK_EXPIRE logger = logging.getLogger(__name__) @app.task(bind=True, default_retry_delay=DO_OCR_RETRY_DELAY, ignore_result=True) def task_do_ocr(self, document_version_pk): DocumentVersion = apps.get_model( app_label='documents', model_name='DocumentVersion' ) DocumentVersionPageOCRContent = apps.get_model( app_label='ocr', model_name='DocumentVersionPageOCRContent' ) lock_id = 'task_do_ocr_doc_version-%d' % document_version_pk try: logger.debug('trying to acquire lock: %s', lock_id) # Acquire lock to avoid doing OCR on the same document version more # than once concurrently lock = locking_backend.acquire_lock(name=lock_id, timeout=LOCK_EXPIRE) logger.debug('acquired lock: %s', lock_id) document_version = None try: document_version = DocumentVersion.objects.get( pk=document_version_pk ) logger.info( 'Starting document OCR for document version: %s', document_version ) DocumentVersionPageOCRContent.objects.process_document_version( document_version=document_version ) except OperationalError as exception: logger.warning( 'OCR error for document version: %d; %s. Retrying.', document_version_pk, exception ) raise self.retry(exc=exception) finally: lock.release() except LockError: logger.debug('unable to obtain lock: %s' % lock_id)