Rewrite the document indexer to allow single index rebuilds,
less locks and improve performance. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -1,14 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import logging
|
||||
|
||||
from django.db import models, transaction
|
||||
from django.template import Context, Template
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from django.db import models
|
||||
|
||||
|
||||
class DocumentIndexInstanceNodeManager(models.Manager):
|
||||
@@ -17,104 +9,25 @@ class DocumentIndexInstanceNodeManager(models.Manager):
|
||||
|
||||
|
||||
class IndexManager(models.Manager):
|
||||
def index_document(self, document):
|
||||
for index in self.filter(enabled=True, document_types=document.document_type):
|
||||
index.index_document(document=document)
|
||||
|
||||
def get_by_natural_key(self, name):
|
||||
return self.get(name=name)
|
||||
|
||||
def rebuild(self):
|
||||
for index in self.all():
|
||||
index.rebuild()
|
||||
|
||||
|
||||
class IndexInstanceNodeManager(models.Manager):
|
||||
@staticmethod
|
||||
def delete_empty_index_nodes_recursive(instance_node):
|
||||
"""
|
||||
Calls itself recursively deleting empty index instance nodes up to
|
||||
root
|
||||
"""
|
||||
|
||||
if instance_node.get_children().count() == 0:
|
||||
# if there are no children, delete node and check parent for the
|
||||
# same conditions
|
||||
parent = instance_node.parent
|
||||
if parent:
|
||||
instance_node.delete()
|
||||
IndexInstanceNodeManager.delete_empty_index_nodes_recursive(
|
||||
parent
|
||||
)
|
||||
|
||||
def cascade_eval(self, document, template_node, parent_index_instance=None):
|
||||
"""
|
||||
Evaluate an enabled index expression and update or create all the
|
||||
related index instances also recursively calling itself to evaluate
|
||||
all the index's children
|
||||
"""
|
||||
|
||||
if template_node.enabled:
|
||||
try:
|
||||
template = Template(template_node.expression)
|
||||
context = Context({'document': document})
|
||||
result = template.render(context=context)
|
||||
except Exception as exception:
|
||||
error_message = _(
|
||||
'Error indexing document: %(document)s; expression: '
|
||||
'%(expression)s; %(exception)s'
|
||||
) % {
|
||||
'document': document,
|
||||
'expression': template_node.expression,
|
||||
'exception': exception
|
||||
}
|
||||
logger.debug(error_message)
|
||||
else:
|
||||
if result:
|
||||
index_instance, created = self.get_or_create(
|
||||
index_template_node=template_node, value=result,
|
||||
parent=parent_index_instance
|
||||
)
|
||||
|
||||
if template_node.link_documents:
|
||||
index_instance.documents.add(document)
|
||||
|
||||
for child in template_node.get_children():
|
||||
self.cascade_eval(
|
||||
document=document,
|
||||
template_node=child,
|
||||
parent_index_instance=index_instance
|
||||
)
|
||||
|
||||
def delete_empty_index_nodes(self):
|
||||
"""
|
||||
Delete empty index instance nodes
|
||||
"""
|
||||
|
||||
for instance_node in self.filter(documents__isnull=True, parent__isnull=False):
|
||||
IndexInstanceNodeManager.delete_empty_index_nodes_recursive(
|
||||
instance_node
|
||||
)
|
||||
|
||||
def index_document(self, document):
|
||||
"""
|
||||
Update or create all the index instances related to a document
|
||||
"""
|
||||
|
||||
from .models import Index
|
||||
|
||||
with transaction.atomic():
|
||||
self.remove_document(document)
|
||||
|
||||
# Only update indexes where the document type is found
|
||||
for index in Index.objects.filter(enabled=True, document_types=document.document_type):
|
||||
root_instance, created = self.get_or_create(
|
||||
index_template_node=index.template_root, parent=None
|
||||
)
|
||||
for template_node in index.template_root.get_children():
|
||||
self.cascade_eval(document, template_node, root_instance)
|
||||
def delete_empty(self):
|
||||
# Select leaf nodes only because .delete_empty() bubbles up
|
||||
for root_nodes in self.filter(parent=None):
|
||||
for index_instance_node in root_nodes.get_leafnodes():
|
||||
index_instance_node.delete_empty()
|
||||
|
||||
def remove_document(self, document):
|
||||
for index_node in self.filter(documents=document):
|
||||
index_node.documents.remove(document)
|
||||
|
||||
self.delete_empty_index_nodes()
|
||||
|
||||
def rebuild_all_indexes(self):
|
||||
for instance_node in self.all():
|
||||
instance_node.delete()
|
||||
|
||||
for document in Document.objects.all():
|
||||
self.index_document(document)
|
||||
for index_instance_node in self.filter(documents=document):
|
||||
index_instance_node.remove_document(document=document)
|
||||
|
||||
Reference in New Issue
Block a user