Rewrite the document indexer to allow single index rebuilds,

less locks and improve performance.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2017-05-28 15:22:13 -04:00
parent 3e3e247997
commit 88151df2bc
14 changed files with 306 additions and 208 deletions

View File

@@ -7,7 +7,6 @@ from django.db import OperationalError
from mayan.celery import app
from lock_manager import LockError
from lock_manager.runtime import locking_backend
from .literals import RETRY_DELAY
@@ -15,23 +14,36 @@ logger = logging.getLogger(__name__)
@app.task(bind=True, default_retry_delay=RETRY_DELAY, max_retries=None, ignore_result=True)
def task_delete_empty_index_nodes(self):
def task_delete_empty(self):
IndexInstanceNode = apps.get_model(
app_label='document_indexing', model_name='IndexInstanceNode'
)
try:
rebuild_lock = locking_backend.acquire_lock(
'document_indexing_task_do_rebuild_all_indexes'
)
IndexInstanceNode.objects.delete_empty()
except LockError as exception:
# A rebuild is happening, retry later
raise self.retry(exc=exception)
@app.task(bind=True, default_retry_delay=RETRY_DELAY, max_retries=None, ignore_result=True)
def task_remove_document(self, document_id):
Document = apps.get_model(
app_label='documents', model_name='Document'
)
IndexInstanceNode = apps.get_model(
app_label='document_indexing', model_name='IndexInstanceNode'
)
try:
document = Document.objects.get(pk=document_id)
except Document.DoesNotExist:
# Document was deleted before we could execute, abort
pass
else:
try:
IndexInstanceNode.objects.delete_empty_index_nodes()
finally:
rebuild_lock.release()
IndexInstanceNode.objects.remove_document(document=document)
except LockError as exception:
raise self.retry(exc=exception)
@app.task(bind=True, default_retry_delay=RETRY_DELAY, max_retries=None, ignore_result=True)
@@ -39,66 +51,42 @@ def task_index_document(self, document_id):
Document = apps.get_model(
app_label='documents', model_name='Document'
)
IndexInstanceNode = apps.get_model(
app_label='document_indexing', model_name='IndexInstanceNode'
Index = apps.get_model(
app_label='document_indexing', model_name='Index'
)
try:
rebuild_lock = locking_backend.acquire_lock(
'document_indexing_task_do_rebuild_all_indexes'
)
except LockError as exception:
# A rebuild is happening, retry later
raise self.retry(exc=exception)
document = Document.objects.get(pk=document_id)
except Document.DoesNotExist:
# Document was deleted before we could execute, abort about
# updating
pass
else:
try:
lock = locking_backend.acquire_lock(
'document_indexing_task_update_index_document_%d' % document_id
Index.objects.index_document(document=document)
except OperationalError as exception:
logger.warning(
'Operational error while trying to index document: '
'%s; %s', document, exception
)
raise self.retry(exc=exception)
except LockError as exception:
logger.warning(
'Unable to acquire lock for document %s; %s ',
document, exception
)
except LockError as exception:
# This document is being reindexed by another task, retry later
raise self.retry(exc=exception)
else:
try:
document = Document.objects.get(pk=document_id)
except Document.DoesNotExist:
# Document was deleted before we could execute, abort about
# updating
pass
else:
try:
IndexInstanceNode.objects.index_document(document)
except OperationalError as exception:
logger.warning(
'Operational error while trying to index document: '
'%s; %s', document, exception
)
lock.release()
raise self.retry(exc=exception)
else:
lock.release()
finally:
lock.release()
finally:
rebuild_lock.release()
@app.task(bind=True, default_retry_delay=RETRY_DELAY, ignore_result=True)
def task_do_rebuild_all_indexes(self):
IndexInstanceNode = apps.get_model(
app_label='document_indexing', model_name='IndexInstanceNode'
def task_rebuild_index(self, index_id):
Index = apps.get_model(
app_label='document_indexing', model_name='Index'
)
try:
lock = locking_backend.acquire_lock(
'document_indexing_task_do_rebuild_all_indexes'
)
index = Index.objects.get(pk=index_id)
index.rebuild()
except LockError as exception:
# Another rebuild is happening, retry later
# This index is being rebuilt by another task, retry later
raise self.retry(exc=exception)
else:
try:
IndexInstanceNode.objects.rebuild_all_indexes()
finally:
lock.release()