Rewrite the document indexer to allow single index rebuilds,
less locks and improve performance. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import logging
|
||||
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.db import models
|
||||
from django.db import models, transaction
|
||||
from django.template import Context, Template
|
||||
from django.utils.encoding import python_2_unicode_compatible
|
||||
from django.utils.translation import ugettext, ugettext_lazy as _
|
||||
|
||||
@@ -11,11 +14,14 @@ from mptt.models import MPTTModel
|
||||
from acls.models import AccessControlList
|
||||
from documents.models import Document, DocumentType
|
||||
from documents.permissions import permission_document_view
|
||||
from lock_manager.runtime import locking_backend
|
||||
|
||||
from .managers import (
|
||||
DocumentIndexInstanceNodeManager, IndexManager, IndexInstanceNodeManager
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@python_2_unicode_compatible
|
||||
class Index(models.Model):
|
||||
@@ -45,14 +51,6 @@ class Index(models.Model):
|
||||
|
||||
objects = IndexManager()
|
||||
|
||||
@property
|
||||
def template_root(self):
|
||||
return self.node_templates.get(parent=None)
|
||||
|
||||
@property
|
||||
def instance_root(self):
|
||||
return self.template_root.node_instance.get()
|
||||
|
||||
def __str__(self):
|
||||
return self.label
|
||||
|
||||
@@ -72,6 +70,14 @@ class Index(models.Model):
|
||||
super(Index, self).save(*args, **kwargs)
|
||||
IndexTemplateNode.objects.get_or_create(parent=None, index=self)
|
||||
|
||||
@property
|
||||
def instance_root(self):
|
||||
return self.template_root.index_instance_nodes.get()
|
||||
|
||||
@property
|
||||
def template_root(self):
|
||||
return self.node_templates.get(parent=None)
|
||||
|
||||
def get_document_types_names(self):
|
||||
return ', '.join(
|
||||
[
|
||||
@@ -79,6 +85,34 @@ class Index(models.Model):
|
||||
] or ['None']
|
||||
)
|
||||
|
||||
def index_document(self, document):
|
||||
logger.debug('Index; Indexing document: %s', document)
|
||||
self.template_root.index_document(document=document)
|
||||
|
||||
def rebuild(self):
|
||||
"""
|
||||
Delete and reconstruct the index by deleting of all its instance nodes
|
||||
and recreating them for the documents whose types are associated with
|
||||
this index
|
||||
"""
|
||||
# Delete all index instance nodes by deleting the root index
|
||||
# instance node. All child index instance nodes will be cascade
|
||||
# deleted.
|
||||
try:
|
||||
self.instance_root.delete()
|
||||
except IndexInstanceNode.DoesNotExist:
|
||||
# Empty index, ignore this exception
|
||||
pass
|
||||
|
||||
# Create the new root index instance node
|
||||
self.template_root.index_instance_nodes.create()
|
||||
|
||||
# Re-index each document with a type associated with this index
|
||||
for document in Document.objects.filter(document_type__in=self.document_types.all()):
|
||||
# Evaluate each index template node for each document
|
||||
# associated with this index.
|
||||
self.index_document(document=document)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _('Index')
|
||||
verbose_name_plural = _('Indexes')
|
||||
@@ -145,6 +179,84 @@ class IndexTemplateNode(MPTTModel):
|
||||
else:
|
||||
return self.expression
|
||||
|
||||
def index_document(self, document, acquire_lock=True):
|
||||
# Avoid another process to index this same document for the same
|
||||
# template node. This prevents this template node's index instance
|
||||
# nodes from being deleted while the template is evaluated and
|
||||
# documents added to it.
|
||||
if acquire_lock:
|
||||
lock = locking_backend.acquire_lock(
|
||||
'indexing:indexing_template_node_{}'.format(self.pk)
|
||||
)
|
||||
|
||||
# Start transaction after the lock in case the locking backend uses
|
||||
# the database.
|
||||
with transaction.atomic():
|
||||
logger.debug('IndexTemplateNode; Indexing document: %s', document)
|
||||
|
||||
logger.debug(
|
||||
'Removing document "%s" from all index instance nodes',
|
||||
document
|
||||
)
|
||||
for index_template_node in self.index_instance_nodes.all():
|
||||
index_template_node.remove_document(
|
||||
document=document, acquire_lock=False
|
||||
)
|
||||
|
||||
if not self.parent:
|
||||
logger.debug(
|
||||
'IndexTemplateNode; parent: creating empty root index '
|
||||
'instance node'
|
||||
)
|
||||
index_instance_node, created = self.index_instance_nodes.get_or_create()
|
||||
|
||||
for child in self.get_children():
|
||||
child.index_document(document=document, acquire_lock=False)
|
||||
|
||||
if acquire_lock:
|
||||
lock.release()
|
||||
|
||||
elif self.enabled:
|
||||
logger.debug('IndexTemplateNode; non parent: evaluating')
|
||||
logger.debug('My parent is: %s', self.parent)
|
||||
logger.debug(
|
||||
'My parent nodes: %s', self.parent.index_instance_nodes.all()
|
||||
)
|
||||
logger.debug(
|
||||
'IndexTemplateNode; Evaluating template: %s', self.expression
|
||||
)
|
||||
|
||||
try:
|
||||
context = Context({'document': document})
|
||||
template = Template(self.expression)
|
||||
result = template.render(context=context)
|
||||
except Exception as exception:
|
||||
logger.debug('Evaluating error: %s', exception)
|
||||
error_message = _(
|
||||
'Error indexing document: %(document)s; expression: '
|
||||
'%(expression)s; %(exception)s'
|
||||
) % {
|
||||
'document': document,
|
||||
'expression': self.expression,
|
||||
'exception': exception
|
||||
}
|
||||
logger.debug(error_message)
|
||||
else:
|
||||
logger.debug('Evaluation result: %s', result)
|
||||
if result:
|
||||
index_instance_node, created = self.index_instance_nodes.get_or_create(
|
||||
parent=self.parent.index_instance_nodes.get(),
|
||||
value=result
|
||||
)
|
||||
if self.link_documents:
|
||||
index_instance_node.documents.add(document)
|
||||
|
||||
for child in self.get_children():
|
||||
child.index_document(document=document, acquire_lock=False)
|
||||
finally:
|
||||
if acquire_lock:
|
||||
lock.release()
|
||||
|
||||
class Meta:
|
||||
verbose_name = _('Index node template')
|
||||
verbose_name_plural = _('Indexes node template')
|
||||
@@ -154,14 +266,15 @@ class IndexTemplateNode(MPTTModel):
|
||||
class IndexInstanceNode(MPTTModel):
|
||||
parent = TreeForeignKey('self', null=True, blank=True)
|
||||
index_template_node = models.ForeignKey(
|
||||
IndexTemplateNode, related_name='node_instance',
|
||||
IndexTemplateNode, related_name='index_instance_nodes',
|
||||
verbose_name=_('Index template node')
|
||||
)
|
||||
value = models.CharField(
|
||||
blank=True, db_index=True, max_length=128, verbose_name=_('Value')
|
||||
)
|
||||
documents = models.ManyToManyField(
|
||||
Document, related_name='node_instances', verbose_name=_('Documents')
|
||||
Document, related_name='index_instance_nodes',
|
||||
verbose_name=_('Documents')
|
||||
)
|
||||
|
||||
objects = IndexInstanceNodeManager()
|
||||
@@ -200,6 +313,52 @@ class IndexInstanceNode(MPTTModel):
|
||||
|
||||
return ' / '.join(result)
|
||||
|
||||
def delete_empty(self, acquire_lock=True):
|
||||
"""
|
||||
The argument `acquire_lock` controls whether or not this method
|
||||
acquires or lock. The case for this is to acquire when called directly
|
||||
or not to acquire when called as part of a larger index process
|
||||
that already has a lock
|
||||
"""
|
||||
# Prevent another process to work on this node. We use the node's
|
||||
# parent template node for the lock
|
||||
if acquire_lock:
|
||||
lock = locking_backend.acquire_lock(
|
||||
'indexing:indexing_template_node_{}'.format(
|
||||
self.index_template_node.pk
|
||||
)
|
||||
)
|
||||
# Start transaction after the lock in case the locking backend uses
|
||||
# the database.
|
||||
with transaction.atomic():
|
||||
if self.documents.count() == 0 and self.get_children().count() == 0:
|
||||
if self.parent:
|
||||
self.delete()
|
||||
self.parent.delete_empty(acquire_lock=False)
|
||||
if acquire_lock:
|
||||
lock.release()
|
||||
|
||||
def remove_document(self, document, acquire_lock=True):
|
||||
"""
|
||||
The argument `acquire_lock` controls whether or not this method
|
||||
acquires or lock. The case for this is to acquire when called directly
|
||||
or not to acquire when called as part of a larger index process
|
||||
that already has a lock
|
||||
"""
|
||||
# Prevent another process to work on this node. We use the node's
|
||||
# parent template node for the lock
|
||||
if acquire_lock:
|
||||
lock = locking_backend.acquire_lock(
|
||||
'indexing:indexing_template_node_{}'.format(
|
||||
self.index_template_node.pk
|
||||
)
|
||||
)
|
||||
self.documents.remove(document)
|
||||
self.delete_empty(acquire_lock=False)
|
||||
|
||||
if acquire_lock:
|
||||
lock.release()
|
||||
|
||||
class Meta:
|
||||
verbose_name = _('Index node instance')
|
||||
verbose_name_plural = _('Indexes node instances')
|
||||
|
||||
Reference in New Issue
Block a user