Refactor the OCR app API

This refactor adds two new endpoints to view the OCR
content of versions and documents.

Signed-off-by: Roberto Rosario <Roberto.Rosario.Gonzalez@mayan-edms.com>
This commit is contained in:
Roberto Rosario
2019-02-19 03:21:36 -04:00
parent e03f017e7f
commit c2dd01d51e
7 changed files with 327 additions and 175 deletions

View File

@@ -2,6 +2,7 @@ from __future__ import unicode_literals
from datetime import timedelta
from django.apps import apps
from django.utils.timezone import now
from django.utils.translation import ugettext_lazy as _
@@ -9,43 +10,56 @@ from mayan.apps.common.settings import settings_db_sync_task_delay
from .events import event_ocr_document_version_submit
from .tasks import task_do_ocr
from .utils import get_document_version_content_iterator
def method_document_ocr_submit(self):
latest_version = self.latest_version
# Don't error out if document has no version
if latest_version:
latest_version.submit_for_ocr()
def method_document_version_ocr_submit(self):
event_ocr_document_version_submit.commit(
action_object=self.document, target=self
)
task_do_ocr.apply_async(
eta=now() + timedelta(seconds=settings_db_sync_task_delay.value),
kwargs={'document_version_pk': self.pk},
)
def method_get_document_ocr_content(self):
def method_document_get_ocr_content(self):
latest_version = self.latest_version
# Don't error out if document has no version
if latest_version:
return latest_version.get_ocr_content()
method_get_document_ocr_content.short_description = _(
method_document_get_ocr_content.short_description = _(
'get_ocr_content()'
)
method_get_document_ocr_content.help_text = _(
method_document_get_ocr_content.help_text = _(
'Return the OCR content of the document.'
)
def method_get_document_version_ocr_content(self):
return ' '.join(
get_document_version_content_iterator(document_version=self)
def method_document_ocr_submit(self, _user=None):
latest_version = self.latest_version
# Don't error out if document has no version
if latest_version:
latest_version.submit_for_ocr(_user=_user)
def method_document_page_get_ocr_content(self):
DocumentPageOCRContent = apps.get_model(
app_label='ocr', model_name='DocumentPageOCRContent'
)
try:
page_content = self.ocr_content.content
except DocumentPageOCRContent.DoesNotExist:
return ''
return page_content
def method_document_version_get_ocr_content(self):
result = []
for page in self.pages.all():
result.append(page.get_ocr_content())
return ''.join(result)
def method_document_version_ocr_submit(self, _user=None):
event_ocr_document_version_submit.commit(
action_object=self.document, actor=_user, target=self
)
task_do_ocr.apply_async(
eta=now() + timedelta(seconds=settings_db_sync_task_delay.value),
kwargs={'document_version_pk': self.pk},
)