Documents now have their own dedicated DocumentPage submodel. The old DocumentPage is now called DocumentVersionPage. This allows mappings between document pages and document version pages, allowing renumbering, appending pages. DocumentPages have a content_object to map them to any other object. For now they only map to DocumentVersionPages. New option added to the version upload form to append the pages of the new version. A new view was added to just append new pages with wraps the new document version upload form and hides the append pages checkbox set to True. Add a new action, reset_pages to reset the pages of the document to those of the latest version. Missing: appending tests, checks for proper content_object in OCR and document parsing. Author: Roberto Rosario <roberto.rosario@mayan-edms.com> Date: Thu Oct 11 12:00:25 2019 -0400
55 lines
1.9 KiB
Python
55 lines
1.9 KiB
Python
from __future__ import unicode_literals
|
|
|
|
import logging
|
|
|
|
from django.apps import apps
|
|
from django.db import OperationalError
|
|
|
|
from mayan.apps.lock_manager.exceptions import LockError
|
|
from mayan.apps.lock_manager.runtime import locking_backend
|
|
from mayan.celery import app
|
|
|
|
from .literals import DO_OCR_RETRY_DELAY, LOCK_EXPIRE
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@app.task(bind=True, default_retry_delay=DO_OCR_RETRY_DELAY, ignore_result=True)
|
|
def task_do_ocr(self, document_version_pk):
|
|
DocumentVersion = apps.get_model(
|
|
app_label='documents', model_name='DocumentVersion'
|
|
)
|
|
DocumentVersionPageOCRContent = apps.get_model(
|
|
app_label='ocr', model_name='DocumentVersionPageOCRContent'
|
|
)
|
|
|
|
lock_id = 'task_do_ocr_doc_version-%d' % document_version_pk
|
|
try:
|
|
logger.debug('trying to acquire lock: %s', lock_id)
|
|
# Acquire lock to avoid doing OCR on the same document version more
|
|
# than once concurrently
|
|
lock = locking_backend.acquire_lock(name=lock_id, timeout=LOCK_EXPIRE)
|
|
logger.debug('acquired lock: %s', lock_id)
|
|
document_version = None
|
|
try:
|
|
document_version = DocumentVersion.objects.get(
|
|
pk=document_version_pk
|
|
)
|
|
logger.info(
|
|
'Starting document OCR for document version: %s',
|
|
document_version
|
|
)
|
|
DocumentVersionPageOCRContent.objects.process_document_version(
|
|
document_version=document_version
|
|
)
|
|
except OperationalError as exception:
|
|
logger.warning(
|
|
'OCR error for document version: %d; %s. Retrying.',
|
|
document_version_pk, exception
|
|
)
|
|
raise self.retry(exc=exception)
|
|
finally:
|
|
lock.release()
|
|
except LockError:
|
|
logger.debug('unable to obtain lock: %s' % lock_id)
|