Text parsers and OCR backends are now used in tandem for each document.

This commit is contained in:
Roberto Rosario
2015-08-08 04:49:08 -04:00
parent cf00ba2c40
commit bec85f38f4
9 changed files with 115 additions and 18 deletions

View File

@@ -11,7 +11,7 @@ from documents.models import DocumentVersion
from lock_manager import Lock, LockError
from mayan.celery import app
from .runtime import ocr_backend_class
from .classes import TextExtractor
from .literals import DO_OCR_RETRY_DELAY, LOCK_EXPIRE
from .models import DocumentVersionOCRError
from .signals import post_document_version_ocr
@@ -35,8 +35,7 @@ def task_do_ocr(self, document_version_pk):
'Starting document OCR for document version: %s',
document_version
)
backend = ocr_backend_class()
backend.process_document_version(document_version)
TextExtractor.process_document_version(document_version)
except OperationalError as exception:
logger.warning(
'OCR error for document version: %s; %s. Retrying.',