Indexing: Add document base property reindex

Add support for reindexing documents when their base properties like
the label and description are edited.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2018-10-31 16:54:19 -04:00
parent 9fc7c4fc09
commit e109068b29
9 changed files with 179 additions and 106 deletions

View File

@@ -21,6 +21,8 @@
* Add new management command to display the current configuration
settings.
* Default the YAML flow format to False which never uses inline.
* Add support for reindexing documents when their base properties like
the label and description are edited.
3.1.7 (2018-10-14)
==================

View File

@@ -3,7 +3,7 @@ from __future__ import absolute_import, unicode_literals
from kombu import Exchange, Queue
from django.apps import apps
from django.db.models.signals import post_delete, pre_delete
from django.db.models.signals import post_delete, post_save, pre_delete
from django.utils.translation import ugettext_lazy as _
from acls import ModelPermission
@@ -21,7 +21,8 @@ from navigation import SourceColumn
from .handlers import (
create_default_document_index, handler_delete_empty,
handler_index_document, handler_remove_document
handler_index_document, handler_remove_document,
handler_post_save_index_document
)
from .links import (
link_document_index_list, link_index_main_menu, link_index_setup,
@@ -210,11 +211,6 @@ class DocumentIndexingApp(MayanAppConfig):
receiver=handler_delete_empty,
sender=Document
)
pre_delete.connect(
dispatch_uid='document_indexing_handler_remove_document',
receiver=handler_remove_document,
sender=Document
)
post_document_created.connect(
dispatch_uid='document_indexing_handler_index_document',
receiver=handler_index_document,
@@ -225,3 +221,13 @@ class DocumentIndexingApp(MayanAppConfig):
receiver=create_default_document_index,
sender=DocumentType
)
post_save.connect(
dispatch_uid='document_indexing_handler_post_save_index_document',
receiver=handler_post_save_index_document,
sender=Document
)
pre_delete.connect(
dispatch_uid='document_indexing_handler_remove_document',
receiver=handler_remove_document,
sender=Document
)

View File

@@ -43,6 +43,18 @@ def handler_index_document(sender, **kwargs):
)
def handler_post_save_index_document(sender, **kwargs):
"""
Reindex documents when they get edited. For indexing documents
when they are first created the handler_index_document is called
from the custom post_document_created signal.
"""
if not kwargs['created']:
task_index_document.apply_async(
kwargs=dict(document_id=kwargs['instance'].pk)
)
def handler_remove_document(sender, **kwargs):
task_remove_document.apply_async(
kwargs=dict(document_id=kwargs['instance'].pk)

View File

@@ -6,4 +6,5 @@ TEST_INDEX_SLUG = 'test_slug'
TEST_METADATA_TYPE_LABEL = 'test metadata label'
TEST_METADATA_TYPE_NAME = 'test_metadata_name'
TEST_INDEX_TEMPLATE_METADATA_EXPRESSION = '{{ document.metadata_value_of.%s }}' % TEST_METADATA_TYPE_NAME
TEST_INDEX_TEMPLATE_LABEL_EXPRESSION = '{{ document.label }}'
TEST_INDEX_TEMPLATE_DOCUMENT_LABEL_EXPRESSION = '{{ document.label }}'
TEST_INDEX_TEMPLATE_DOCUMENT_DESCRIPTION_EXPRESSION = '{{ document.description }}'

View File

@@ -5,11 +5,17 @@ from django.utils.encoding import force_text
from common.tests import BaseTestCase
from documents.tests import DocumentTestMixin, TEST_SMALL_DOCUMENT_PATH
from documents.tests.literals import (
TEST_DOCUMENT_DESCRIPTION, TEST_DOCUMENT_DESCRIPTION_EDITED,
TEST_DOCUMENT_LABEL_EDITED
)
from metadata.models import MetadataType, DocumentTypeMetadataType
from ..models import Index, IndexInstanceNode, IndexTemplateNode
from .literals import (
TEST_INDEX_TEMPLATE_DOCUMENT_DESCRIPTION_EXPRESSION,
TEST_INDEX_TEMPLATE_DOCUMENT_LABEL_EXPRESSION,
TEST_INDEX_TEMPLATE_METADATA_EXPRESSION, TEST_METADATA_TYPE_LABEL,
TEST_METADATA_TYPE_NAME
)
@@ -18,7 +24,121 @@ from .mixins import DocumentIndexingTestMixin
@override_settings(OCR_AUTO_OCR=False)
class IndexTestCase(DocumentIndexingTestMixin, DocumentTestMixin, BaseTestCase):
def test_indexing(self):
def test_document_description_index(self):
self._create_index()
self.index.node_templates.create(
parent=self.index.template_root,
expression=TEST_INDEX_TEMPLATE_DOCUMENT_DESCRIPTION_EXPRESSION,
link_documents=True
)
self.document.description = TEST_DOCUMENT_DESCRIPTION
self.document.save()
self.index.rebuild()
self.assertEqual(
IndexInstanceNode.objects.last().value, self.document.description
)
self.document.description = TEST_DOCUMENT_DESCRIPTION_EDITED
self.document.save()
self.assertEqual(
IndexInstanceNode.objects.last().value, self.document.description
)
def test_document_label_index(self):
self._create_index()
self.index.node_templates.create(
parent=self.index.template_root,
expression=TEST_INDEX_TEMPLATE_DOCUMENT_LABEL_EXPRESSION,
link_documents=True
)
self.index.rebuild()
self.assertEqual(
IndexInstanceNode.objects.last().value, self.document.label
)
self.document.label = TEST_DOCUMENT_LABEL_EDITED
self.document.save()
self.assertEqual(
IndexInstanceNode.objects.last().value, self.document.label
)
def test_date_based_index(self):
self._create_index()
level_year = self.index.node_templates.create(
parent=self.index.template_root,
expression='{{ document.date_added|date:"Y" }}',
link_documents=False
)
self.index.node_templates.create(
parent=level_year,
expression='{{ document.date_added|date:"m" }}',
link_documents=True
)
# Index the document created by default
Index.objects.rebuild()
self.document.delete()
# Uploading a new should not trigger an error
document = self.upload_document()
self.assertEqual(
[instance.value for instance in IndexInstanceNode.objects.all().order_by('pk')],
[
'', force_text(document.date_added.year),
force_text(document.date_added.month).zfill(2)
]
)
self.assertTrue(
document in list(IndexInstanceNode.objects.order_by('pk').last().documents.all())
)
def test_dual_level_dual_document_index(self):
"""
Test creation of an index instance with two first levels with different
values and two second levels with the same value but as separate
children of each of the first levels. GitLab issue #391
"""
with open(TEST_SMALL_DOCUMENT_PATH, mode='rb') as file_object:
self.document_2 = self.document_type.new_document(
file_object=file_object
)
self._create_index()
# Create simple index template
root = self.index.template_root
level_1 = self.index.node_templates.create(
parent=root, expression='{{ document.uuid }}',
link_documents=False
)
self.index.node_templates.create(
parent=level_1, expression='{{ document.label }}',
link_documents=True
)
Index.objects.rebuild()
self.assertEqual(
[instance.value for instance in IndexInstanceNode.objects.all().order_by('pk')],
[
'', force_text(self.document_2.uuid), self.document_2.label,
force_text(self.document.uuid), self.document.label
]
)
def test_metadata_indexing(self):
metadata_type = MetadataType.objects.create(
name=TEST_METADATA_TYPE_NAME, label=TEST_METADATA_TYPE_LABEL
)
@@ -106,6 +226,26 @@ class IndexTestCase(DocumentIndexingTestMixin, DocumentTestMixin, BaseTestCase):
), ['']
)
def test_multi_level_template_with_no_result_parent(self):
"""
On a two level template if the first level doesn't return a result
the indexing should stop. GitLab issue #391.
"""
self._create_index()
level_1 = self.index.node_templates.create(
parent=self.index.template_root,
expression='',
link_documents=True
)
self.index.node_templates.create(
parent=level_1, expression='{{ document.label }}',
link_documents=True
)
Index.objects.rebuild()
def test_rebuild_all_indexes(self):
# Add metadata type and connect to document type
metadata_type = MetadataType.objects.create(name='test', label='test')
@@ -143,92 +283,3 @@ class IndexTestCase(DocumentIndexingTestMixin, DocumentTestMixin, BaseTestCase):
self.assertQuerysetEqual(
instance_node.documents.all(), [repr(self.document)]
)
def test_dual_level_dual_document_index(self):
"""
Test creation of an index instance with two first levels with different
values and two second levels with the same value but as separate
children of each of the first levels. GitLab issue #391
"""
with open(TEST_SMALL_DOCUMENT_PATH, 'rb') as file_object:
self.document_2 = self.document_type.new_document(
file_object=file_object
)
self._create_index()
# Create simple index template
root = self.index.template_root
level_1 = self.index.node_templates.create(
parent=root, expression='{{ document.uuid }}',
link_documents=False
)
self.index.node_templates.create(
parent=level_1, expression='{{ document.label }}',
link_documents=True
)
Index.objects.rebuild()
self.assertEqual(
[instance.value for instance in IndexInstanceNode.objects.all().order_by('pk')],
[
'', force_text(self.document_2.uuid), self.document_2.label,
force_text(self.document.uuid), self.document.label
]
)
def test_multi_level_template_with_no_result_parent(self):
"""
On a two level template if the first level doesn't return a result
the indexing should stop. GitLab issue #391.
"""
self._create_index()
level_1 = self.index.node_templates.create(
parent=self.index.template_root,
expression='',
link_documents=True
)
self.index.node_templates.create(
parent=level_1, expression='{{ document.label }}',
link_documents=True
)
Index.objects.rebuild()
def test_date_based_index(self):
self._create_index()
level_year = self.index.node_templates.create(
parent=self.index.template_root,
expression='{{ document.date_added|date:"Y" }}',
link_documents=False
)
self.index.node_templates.create(
parent=level_year,
expression='{{ document.date_added|date:"m" }}',
link_documents=True
)
# Index the document created by default
Index.objects.rebuild()
self.document.delete()
# Uploading a new should not trigger an error
document = self.upload_document()
self.assertEqual(
[instance.value for instance in IndexInstanceNode.objects.all().order_by('pk')],
[
'', force_text(document.date_added.year),
force_text(document.date_added.month).zfill(2)
]
)
self.assertTrue(
document in list(IndexInstanceNode.objects.order_by('pk').last().documents.all())
)

View File

@@ -12,7 +12,7 @@ from ..permissions import (
from .literals import (
TEST_INDEX_LABEL, TEST_INDEX_LABEL_EDITED, TEST_INDEX_SLUG,
TEST_INDEX_TEMPLATE_LABEL_EXPRESSION
TEST_INDEX_TEMPLATE_DOCUMENT_LABEL_EXPRESSION
)
@@ -112,7 +112,7 @@ class IndexViewTestCase(GenericDocumentViewTestCase):
# Create simple index template
root = self.index.template_root
self.index.node_templates.create(
parent=root, expression=TEST_INDEX_TEMPLATE_LABEL_EXPRESSION,
parent=root, expression=TEST_INDEX_TEMPLATE_DOCUMENT_LABEL_EXPRESSION,
link_documents=True
)

View File

@@ -27,6 +27,7 @@ TEST_DEU_DOCUMENT_FILENAME = 'deu_website.png'
TEST_DOCUMENT_DESCRIPTION = 'test description'
TEST_DOCUMENT_DESCRIPTION_EDITED = 'test document description edited'
TEST_DOCUMENT_FILENAME = 'mayan_11_1.pdf'
TEST_DOCUMENT_LABEL_EDITED = 'test document label edited'
TEST_DOCUMENT_TYPE_LABEL = 'test_document_type'
TEST_DOCUMENT_TYPE_2_LABEL = 'test document type 2'
TEST_DOCUMENT_TYPE_LABEL_EDITED = 'test document type edited label'

View File

@@ -27,7 +27,7 @@ class DocumentTestMixin(object):
def upload_document(self):
self._calculate_test_document_path()
with open(self.test_document_path, 'rb') as file_object:
with open(self.test_document_path, mode='rb') as file_object:
document = self.document_type.new_document(
file_object=file_object, label=self.test_document_filename
)

View File

@@ -139,7 +139,7 @@ class PDFCompatibilityTestCase(BaseTestCase):
label=TEST_DOCUMENT_TYPE_LABEL
)
with open(TEST_PDF_INDIRECT_ROTATE_PATH, 'rb') as file_object:
with open(TEST_PDF_INDIRECT_ROTATE_PATH, mode='rb') as file_object:
self.document = self.document_type.new_document(
file_object=file_object
)
@@ -158,7 +158,7 @@ class OfficeDocumentTestCase(BaseTestCase):
label=TEST_DOCUMENT_TYPE_LABEL
)
with open(TEST_OFFICE_DOCUMENT_PATH, 'rb') as file_object:
with open(TEST_OFFICE_DOCUMENT_PATH, mode='rb') as file_object:
self.document = self.document_type.new_document(
file_object=file_object
)
@@ -187,7 +187,7 @@ class MultiPageTiffTestCase(BaseTestCase):
label=TEST_DOCUMENT_TYPE_LABEL
)
with open(TEST_MULTI_PAGE_TIFF_PATH, 'rb') as file_object:
with open(TEST_MULTI_PAGE_TIFF_PATH, mode='rb') as file_object:
self.document = self.document_type.new_document(
file_object=file_object
)
@@ -210,7 +210,7 @@ class DocumentVersionTestCase(GenericDocumentTestCase):
def test_add_new_version(self):
self.assertEqual(self.document.versions.count(), 1)
with open(TEST_DOCUMENT_PATH, 'rb') as file_object:
with open(TEST_DOCUMENT_PATH, mode='rb') as file_object:
self.document.new_version(
file_object=file_object
)
@@ -229,7 +229,7 @@ class DocumentVersionTestCase(GenericDocumentTestCase):
# field
time.sleep(1.01)
with open(TEST_DOCUMENT_PATH, 'rb') as file_object:
with open(TEST_DOCUMENT_PATH, mode='rb') as file_object:
self.document.new_version(
file_object=file_object
)