Add version upload form checkbox. Add the append_pages keyword argument. Signed-off-by: Roberto Rosario <roberto.rosario@mayan-edms.com>
126 lines
4.3 KiB
Python
126 lines
4.3 KiB
Python
from __future__ import unicode_literals
|
|
|
|
import logging
|
|
import sys
|
|
import traceback
|
|
|
|
from django.apps import apps
|
|
from django.conf import settings
|
|
from django.db import models, transaction
|
|
|
|
from mayan.apps.documents.literals import DOCUMENT_IMAGE_TASK_TIMEOUT
|
|
from mayan.apps.documents.tasks import (
|
|
task_generate_document_version_page_image
|
|
)
|
|
|
|
from .events import (
|
|
event_ocr_document_content_deleted, event_ocr_document_version_finish
|
|
)
|
|
from .runtime import ocr_backend
|
|
from .signals import post_document_version_ocr
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class DocumentVesionPageOCRContentManager(models.Manager):
|
|
def delete_content_for(self, document, user=None):
|
|
with transaction.atomic():
|
|
for document_page in document.pages.all():
|
|
self.filter(
|
|
document_version_page=document_page.content_object
|
|
).delete()
|
|
|
|
event_ocr_document_content_deleted.commit(
|
|
actor=user, target=document
|
|
)
|
|
|
|
def process_document_version_page(self, document_version_page):
|
|
logger.info(
|
|
'Processing page: %d of document version: %s',
|
|
document_version_page.page_number,
|
|
document_version_page.document_version
|
|
)
|
|
|
|
DocumentVersionPageOCRContent = apps.get_model(
|
|
app_label='ocr', model_name='DocumentVersionPageOCRContent'
|
|
)
|
|
|
|
task = task_generate_document_version_page_image.apply_async(
|
|
kwargs=dict(
|
|
document_version_page_id=document_version_page.pk
|
|
)
|
|
)
|
|
|
|
cache_filename = task.get(
|
|
timeout=DOCUMENT_IMAGE_TASK_TIMEOUT, disable_sync_subtasks=False
|
|
)
|
|
|
|
with document_version_page.cache_partition.get_file(filename=cache_filename).open() as file_object:
|
|
document_version_page_content, created = DocumentVersionPageOCRContent.objects.get_or_create(
|
|
document_version_page=document_version_page
|
|
)
|
|
document_version_page_content.content = ocr_backend.execute(
|
|
file_object=file_object,
|
|
language=document_version_page.document.language
|
|
)
|
|
document_version_page_content.save()
|
|
|
|
logger.info(
|
|
'Finished processing page: %d of document version: %s',
|
|
document_version_page.page_number,
|
|
document_version_page.document_version
|
|
)
|
|
|
|
def process_document_version(self, document_version):
|
|
logger.info('Starting OCR for document version: %s', document_version)
|
|
logger.debug('document version: %d', document_version.pk)
|
|
|
|
try:
|
|
for document_version_page in document_version.pages.all():
|
|
self.process_document_version_page(
|
|
document_version_page=document_version_page
|
|
)
|
|
except Exception as exception:
|
|
logger.error(
|
|
'OCR error for document version: %d; %s', document_version.pk,
|
|
exception
|
|
)
|
|
|
|
if settings.DEBUG:
|
|
result = []
|
|
type, value, tb = sys.exc_info()
|
|
result.append('%s: %s' % (type.__name__, value))
|
|
result.extend(traceback.format_tb(tb))
|
|
document_version.ocr_errors.create(
|
|
result='\n'.join(result)
|
|
)
|
|
else:
|
|
document_version.ocr_errors.create(result=exception)
|
|
else:
|
|
logger.info(
|
|
'OCR complete for document version: %s', document_version
|
|
)
|
|
document_version.ocr_errors.all().delete()
|
|
|
|
event_ocr_document_version_finish.commit(
|
|
action_object=document_version.document,
|
|
target=document_version
|
|
)
|
|
|
|
post_document_version_ocr.send(
|
|
sender=document_version.__class__, instance=document_version
|
|
)
|
|
|
|
|
|
class DocumentTypeSettingsManager(models.Manager):
|
|
def get_by_natural_key(self, document_type_natural_key):
|
|
DocumentType = apps.get_model(
|
|
app_label='documents', model_name='DocumentType'
|
|
)
|
|
try:
|
|
document_type = DocumentType.objects.get_by_natural_key(document_type_natural_key)
|
|
except DocumentType.DoesNotExist:
|
|
raise self.model.DoesNotExist
|
|
|
|
return self.get(document_type__pk=document_type.pk)
|