Files
mayan-edms/mayan/apps/ocr/managers.py
Roberto Rosario 36a51eeb73 Switch to full app paths
Instead of inserting the path of the apps into the Python app,
the apps are now referenced by their full import path.

This solves name clashes with external or native Python libraries.
Example: Mayan statistics app vs. Python new statistics library.

Every app reference is now prepended with 'mayan.apps'.

Existing config.yml files need to be updated manually.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
2019-04-05 02:02:57 -04:00

106 lines
3.6 KiB
Python

from __future__ import unicode_literals
import logging
import sys
import traceback
from django.apps import apps
from django.conf import settings
from django.db import models
from mayan.apps.documents.storages import storage_documentimagecache
from mayan.apps.documents.literals import DOCUMENT_IMAGE_TASK_TIMEOUT
from mayan.apps.documents.tasks import task_generate_document_page_image
from .events import event_ocr_document_version_finish
from .runtime import ocr_backend
from .signals import post_document_version_ocr
logger = logging.getLogger(__name__)
class DocumentPageOCRContentManager(models.Manager):
def process_document_page(self, document_page):
logger.info(
'Processing page: %d of document version: %s',
document_page.page_number, document_page.document_version
)
DocumentPageOCRContent = apps.get_model(
app_label='ocr', model_name='DocumentPageOCRContent'
)
task = task_generate_document_page_image.apply_async(
kwargs=dict(
document_page_id=document_page.pk
)
)
cache_filename = task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT)
with storage_documentimagecache.open(cache_filename) as file_object:
document_page_content, created = DocumentPageOCRContent.objects.get_or_create(
document_page=document_page
)
document_page_content.content = ocr_backend.execute(
file_object=file_object,
language=document_page.document.language
)
document_page_content.save()
logger.info(
'Finished processing page: %d of document version: %s',
document_page.page_number, document_page.document_version
)
def process_document_version(self, document_version):
logger.info('Starting OCR for document version: %s', document_version)
logger.debug('document version: %d', document_version.pk)
try:
for document_page in document_version.pages.all():
self.process_document_page(document_page=document_page)
except Exception as exception:
logger.error(
'OCR error for document version: %d; %s', document_version.pk,
exception
)
if settings.DEBUG:
result = []
type, value, tb = sys.exc_info()
result.append('%s: %s' % (type.__name__, value))
result.extend(traceback.format_tb(tb))
document_version.ocr_errors.create(
result='\n'.join(result)
)
else:
document_version.ocr_errors.create(result=exception)
else:
logger.info(
'OCR complete for document version: %s', document_version
)
document_version.ocr_errors.all().delete()
event_ocr_document_version_finish.commit(
action_object=document_version.document,
target=document_version
)
post_document_version_ocr.send(
sender=document_version.__class__, instance=document_version
)
class DocumentTypeSettingsManager(models.Manager):
def get_by_natural_key(self, document_type_natural_key):
DocumentType = apps.get_model(
app_label='documents', model_name='DocumentType'
)
try:
document_type = DocumentType.objects.get_by_natural_key(document_type_natural_key)
except DocumentType.DoesNotExist:
raise self.model.DoesNotExist
return self.get(document_type__pk=document_type.pk)