Add support for reindexing document on content parsing changes.
Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -1,3 +1,8 @@
|
|||||||
|
3.2 (2018-XX-XX)
|
||||||
|
================
|
||||||
|
* Add support for reindexing document on content parsing
|
||||||
|
changes.
|
||||||
|
|
||||||
3.1.4 (2018-10-XX)
|
3.1.4 (2018-10-XX)
|
||||||
==================
|
==================
|
||||||
* Fix the link to the documenation. Closes GitLab issue #516.
|
* Fix the link to the documenation. Closes GitLab issue #516.
|
||||||
|
|||||||
@@ -25,7 +25,8 @@ from navigation import SourceColumn
|
|||||||
|
|
||||||
from .events import event_parsing_document_version_submit
|
from .events import event_parsing_document_version_submit
|
||||||
from .handlers import (
|
from .handlers import (
|
||||||
handler_initialize_new_parsing_settings, handler_parse_document_version
|
handler_index_document, handler_initialize_new_parsing_settings,
|
||||||
|
handler_parse_document_version
|
||||||
)
|
)
|
||||||
from .links import (
|
from .links import (
|
||||||
link_document_content, link_document_content_download,
|
link_document_content, link_document_content_download,
|
||||||
@@ -37,6 +38,7 @@ from .permissions import (
|
|||||||
permission_content_view, permission_document_type_parsing_setup,
|
permission_content_view, permission_document_type_parsing_setup,
|
||||||
permission_parse_document
|
permission_parse_document
|
||||||
)
|
)
|
||||||
|
from .signals import post_document_version_parsing
|
||||||
from .utils import get_document_content
|
from .utils import get_document_content
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -178,8 +180,14 @@ class DocumentParsingApp(MayanAppConfig):
|
|||||||
link_document_type_submit, link_error_list,
|
link_document_type_submit, link_error_list,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
post_document_version_parsing.connect(
|
||||||
|
dispatch_uid='document_parsing_handler_index_document',
|
||||||
|
receiver=handler_index_document,
|
||||||
|
sender=DocumentVersion
|
||||||
|
)
|
||||||
post_save.connect(
|
post_save.connect(
|
||||||
dispatch_uid='handler_initialize_new_parsing_settings',
|
dispatch_uid='document_parsing_handler_initialize_new_parsing_settings',
|
||||||
receiver=handler_initialize_new_parsing_settings,
|
receiver=handler_initialize_new_parsing_settings,
|
||||||
sender=DocumentType
|
sender=DocumentType
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -4,11 +4,19 @@ import logging
|
|||||||
|
|
||||||
from django.apps import apps
|
from django.apps import apps
|
||||||
|
|
||||||
|
from document_indexing.tasks import task_index_document
|
||||||
|
|
||||||
from .settings import setting_auto_parsing
|
from .settings import setting_auto_parsing
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def handler_index_document(sender, **kwargs):
|
||||||
|
task_index_document.apply_async(
|
||||||
|
kwargs=dict(document_id=kwargs['instance'].document.pk)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def handler_initialize_new_parsing_settings(sender, instance, **kwargs):
|
def handler_initialize_new_parsing_settings(sender, instance, **kwargs):
|
||||||
DocumentTypeSettings = apps.get_model(
|
DocumentTypeSettings = apps.get_model(
|
||||||
app_label='document_parsing', model_name='DocumentTypeSettings'
|
app_label='document_parsing', model_name='DocumentTypeSettings'
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from django.db import models
|
|||||||
|
|
||||||
from .events import event_parsing_document_version_finish
|
from .events import event_parsing_document_version_finish
|
||||||
from .parsers import Parser
|
from .parsers import Parser
|
||||||
|
from .signals import post_document_version_parsing
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -50,6 +51,10 @@ class DocumentPageContentManager(models.Manager):
|
|||||||
target=document_version
|
target=document_version
|
||||||
)
|
)
|
||||||
|
|
||||||
|
post_document_version_parsing.send(
|
||||||
|
sender=document_version.__class__, instance=document_version
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DocumentTypeSettingsManager(models.Manager):
|
class DocumentTypeSettingsManager(models.Manager):
|
||||||
def get_by_natural_key(self, document_type_natural_key):
|
def get_by_natural_key(self, document_type_natural_key):
|
||||||
|
|||||||
7
mayan/apps/document_parsing/signals.py
Normal file
7
mayan/apps/document_parsing/signals.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.dispatch import Signal
|
||||||
|
|
||||||
|
post_document_version_parsing = Signal(
|
||||||
|
providing_args=('instance',), use_caching=True
|
||||||
|
)
|
||||||
3
mayan/apps/document_parsing/tests/literals.py
Normal file
3
mayan/apps/document_parsing/tests/literals.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
TEST_PARSING_INDEX_NODE_TEMPLATE = '{% if "sample" in document.latest_version.content|join:" "|lower %}sample{% endif %}'
|
||||||
39
mayan/apps/document_parsing/tests/test_indexing.py
Normal file
39
mayan/apps/document_parsing/tests/test_indexing.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.test import override_settings
|
||||||
|
|
||||||
|
from common.tests import BaseTestCase
|
||||||
|
from documents.tests import DocumentTestMixin, TEST_HYBRID_DOCUMENT
|
||||||
|
|
||||||
|
from document_indexing.models import Index, IndexInstanceNode
|
||||||
|
from document_indexing.tests.literals import TEST_INDEX_LABEL
|
||||||
|
|
||||||
|
from .literals import TEST_PARSING_INDEX_NODE_TEMPLATE
|
||||||
|
|
||||||
|
|
||||||
|
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=False)
|
||||||
|
@override_settings(OCR_AUTO_OCR=False)
|
||||||
|
class ParsingIndexingTestCase(DocumentTestMixin, BaseTestCase):
|
||||||
|
auto_upload_document = False
|
||||||
|
test_document_filename = TEST_HYBRID_DOCUMENT
|
||||||
|
|
||||||
|
def test_parsing_indexing(self):
|
||||||
|
index = Index.objects.create(label=TEST_INDEX_LABEL)
|
||||||
|
|
||||||
|
index.document_types.add(self.document_type)
|
||||||
|
|
||||||
|
root = index.template_root
|
||||||
|
index.node_templates.create(
|
||||||
|
parent=root, expression=TEST_PARSING_INDEX_NODE_TEMPLATE,
|
||||||
|
link_documents=True
|
||||||
|
)
|
||||||
|
|
||||||
|
self.document = self.upload_document()
|
||||||
|
self.document.submit_for_parsing()
|
||||||
|
print '@@@', list(self.document.latest_version.content())
|
||||||
|
|
||||||
|
self.assertTrue(
|
||||||
|
self.document in IndexInstanceNode.objects.get(
|
||||||
|
value='sample'
|
||||||
|
).documents.all()
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user