From fb83a838fb37feac44444647ede7a8abf2169d44 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 2 Oct 2018 03:54:29 -0400 Subject: [PATCH] Add support for indexing on OCR content changes. Signed-off-by: Roberto Rosario --- HISTORY.rst | 1 + mayan/apps/ocr/apps.py | 9 ++++- mayan/apps/ocr/handlers.py | 8 +++++ .../ocr/migrations/0003_auto_20150617_0401.py | 2 +- mayan/apps/ocr/tests/literals.py | 3 ++ mayan/apps/ocr/tests/test_indexing.py | 35 +++++++++++++++++++ 6 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 mayan/apps/ocr/tests/literals.py create mode 100644 mayan/apps/ocr/tests/test_indexing.py diff --git a/HISTORY.rst b/HISTORY.rst index e33dc40298..fcc985183f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -11,6 +11,7 @@ in the trash. * Load the DropZone CSS from package and remove the hard code CSS from appearance/base.css. +* Add support for indexing on OCR content changes. 3.1.3 (2018-09-27) ================== diff --git a/mayan/apps/ocr/apps.py b/mayan/apps/ocr/apps.py index 96d8dd5d08..e1d21110b1 100644 --- a/mayan/apps/ocr/apps.py +++ b/mayan/apps/ocr/apps.py @@ -25,7 +25,8 @@ from navigation import SourceColumn from .events import event_ocr_document_version_submit from .handlers import ( - handler_initialize_new_ocr_settings, handler_ocr_document_version, + handler_index_document, handler_initialize_new_ocr_settings, + handler_ocr_document_version, ) from .links import ( link_document_page_ocr_content, link_document_ocr_content, @@ -39,6 +40,7 @@ from .permissions import ( permission_ocr_content_view ) from .queues import * # NOQA +from .signals import post_document_version_ocr from .utils import get_document_ocr_content logger = logging.getLogger(__name__) @@ -191,6 +193,11 @@ class OCRApp(MayanAppConfig): ) ) + post_document_version_ocr.connect( + dispatch_uid='ocr_handler_index_document', + receiver=handler_index_document, + sender=DocumentVersion + ) post_save.connect( dispatch_uid='ocr_handler_initialize_new_ocr_settings', receiver=handler_initialize_new_ocr_settings, diff --git a/mayan/apps/ocr/handlers.py b/mayan/apps/ocr/handlers.py index 9d03dfe3d6..706e3f6f81 100644 --- a/mayan/apps/ocr/handlers.py +++ b/mayan/apps/ocr/handlers.py @@ -4,11 +4,19 @@ import logging from django.apps import apps +from document_indexing.tasks import task_index_document + from .settings import setting_auto_ocr logger = logging.getLogger(__name__) +def handler_index_document(sender, **kwargs): + task_index_document.apply_async( + kwargs=dict(document_id=kwargs['instance'].document.pk) + ) + + def handler_initialize_new_ocr_settings(sender, instance, **kwargs): DocumentTypeSettings = apps.get_model( app_label='ocr', model_name='DocumentTypeSettings' diff --git a/mayan/apps/ocr/migrations/0003_auto_20150617_0401.py b/mayan/apps/ocr/migrations/0003_auto_20150617_0401.py index d870c6a4e9..4fc8a85c34 100644 --- a/mayan/apps/ocr/migrations/0003_auto_20150617_0401.py +++ b/mayan/apps/ocr/migrations/0003_auto_20150617_0401.py @@ -9,7 +9,7 @@ def move_content_from_documents_to_ocr_app(apps, schema_editor): DocumentPageContent = apps.get_model('ocr', 'DocumentPageContent') for document_page in DocumentPage.objects.using(schema_editor.connection.alias).all(): - document_page_content = DocumentPageContent.objects.using(schema_editor.connection.alias).create( + DocumentPageContent.objects.using(schema_editor.connection.alias).create( document_page=document_page, content=document_page.content_old or '' ) diff --git a/mayan/apps/ocr/tests/literals.py b/mayan/apps/ocr/tests/literals.py new file mode 100644 index 0000000000..49de0a9aae --- /dev/null +++ b/mayan/apps/ocr/tests/literals.py @@ -0,0 +1,3 @@ +from __future__ import unicode_literals + +TEST_OCR_INDEX_NODE_TEMPLATE = '{% if "mayan" in document.latest_version.ocr_content|join:" "|lower %}mayan{% endif %}' diff --git a/mayan/apps/ocr/tests/test_indexing.py b/mayan/apps/ocr/tests/test_indexing.py new file mode 100644 index 0000000000..56c330c1f1 --- /dev/null +++ b/mayan/apps/ocr/tests/test_indexing.py @@ -0,0 +1,35 @@ +from __future__ import unicode_literals + +from django.test import override_settings + +from common.tests import BaseTestCase +from documents.tests import DocumentTestMixin +from document_indexing.models import Index, IndexInstanceNode +from document_indexing.tests.literals import TEST_INDEX_LABEL + +from .literals import TEST_OCR_INDEX_NODE_TEMPLATE + + +@override_settings(OCR_AUTO_OCR=False) +class OCRIndexingTestCase(DocumentTestMixin, BaseTestCase): + auto_upload_document = False + + def test_ocr_indexing(self): + index = Index.objects.create(label=TEST_INDEX_LABEL) + + index.document_types.add(self.document_type) + + root = index.template_root + index.node_templates.create( + parent=root, expression=TEST_OCR_INDEX_NODE_TEMPLATE, + link_documents=True + ) + + self.document = self.upload_document() + self.document.submit_for_ocr() + + self.assertTrue( + self.document in IndexInstanceNode.objects.get( + value='mayan' + ).documents.all() + )