Rewrite the document indexer to allow single index rebuilds,
less locks and improve performance. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -7,7 +7,6 @@ from django.db import OperationalError
|
||||
|
||||
from mayan.celery import app
|
||||
from lock_manager import LockError
|
||||
from lock_manager.runtime import locking_backend
|
||||
|
||||
from .literals import RETRY_DELAY
|
||||
|
||||
@@ -15,23 +14,36 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@app.task(bind=True, default_retry_delay=RETRY_DELAY, max_retries=None, ignore_result=True)
|
||||
def task_delete_empty_index_nodes(self):
|
||||
def task_delete_empty(self):
|
||||
IndexInstanceNode = apps.get_model(
|
||||
app_label='document_indexing', model_name='IndexInstanceNode'
|
||||
)
|
||||
|
||||
try:
|
||||
rebuild_lock = locking_backend.acquire_lock(
|
||||
'document_indexing_task_do_rebuild_all_indexes'
|
||||
)
|
||||
IndexInstanceNode.objects.delete_empty()
|
||||
except LockError as exception:
|
||||
# A rebuild is happening, retry later
|
||||
raise self.retry(exc=exception)
|
||||
|
||||
|
||||
@app.task(bind=True, default_retry_delay=RETRY_DELAY, max_retries=None, ignore_result=True)
|
||||
def task_remove_document(self, document_id):
|
||||
Document = apps.get_model(
|
||||
app_label='documents', model_name='Document'
|
||||
)
|
||||
IndexInstanceNode = apps.get_model(
|
||||
app_label='document_indexing', model_name='IndexInstanceNode'
|
||||
)
|
||||
|
||||
try:
|
||||
document = Document.objects.get(pk=document_id)
|
||||
except Document.DoesNotExist:
|
||||
# Document was deleted before we could execute, abort
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
IndexInstanceNode.objects.delete_empty_index_nodes()
|
||||
finally:
|
||||
rebuild_lock.release()
|
||||
IndexInstanceNode.objects.remove_document(document=document)
|
||||
except LockError as exception:
|
||||
raise self.retry(exc=exception)
|
||||
|
||||
|
||||
@app.task(bind=True, default_retry_delay=RETRY_DELAY, max_retries=None, ignore_result=True)
|
||||
@@ -39,66 +51,42 @@ def task_index_document(self, document_id):
|
||||
Document = apps.get_model(
|
||||
app_label='documents', model_name='Document'
|
||||
)
|
||||
|
||||
IndexInstanceNode = apps.get_model(
|
||||
app_label='document_indexing', model_name='IndexInstanceNode'
|
||||
Index = apps.get_model(
|
||||
app_label='document_indexing', model_name='Index'
|
||||
)
|
||||
|
||||
try:
|
||||
rebuild_lock = locking_backend.acquire_lock(
|
||||
'document_indexing_task_do_rebuild_all_indexes'
|
||||
)
|
||||
except LockError as exception:
|
||||
# A rebuild is happening, retry later
|
||||
raise self.retry(exc=exception)
|
||||
document = Document.objects.get(pk=document_id)
|
||||
except Document.DoesNotExist:
|
||||
# Document was deleted before we could execute, abort about
|
||||
# updating
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
lock = locking_backend.acquire_lock(
|
||||
'document_indexing_task_update_index_document_%d' % document_id
|
||||
Index.objects.index_document(document=document)
|
||||
except OperationalError as exception:
|
||||
logger.warning(
|
||||
'Operational error while trying to index document: '
|
||||
'%s; %s', document, exception
|
||||
)
|
||||
raise self.retry(exc=exception)
|
||||
except LockError as exception:
|
||||
logger.warning(
|
||||
'Unable to acquire lock for document %s; %s ',
|
||||
document, exception
|
||||
)
|
||||
except LockError as exception:
|
||||
# This document is being reindexed by another task, retry later
|
||||
raise self.retry(exc=exception)
|
||||
else:
|
||||
try:
|
||||
document = Document.objects.get(pk=document_id)
|
||||
except Document.DoesNotExist:
|
||||
# Document was deleted before we could execute, abort about
|
||||
# updating
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
IndexInstanceNode.objects.index_document(document)
|
||||
except OperationalError as exception:
|
||||
logger.warning(
|
||||
'Operational error while trying to index document: '
|
||||
'%s; %s', document, exception
|
||||
)
|
||||
lock.release()
|
||||
raise self.retry(exc=exception)
|
||||
else:
|
||||
lock.release()
|
||||
finally:
|
||||
lock.release()
|
||||
finally:
|
||||
rebuild_lock.release()
|
||||
|
||||
|
||||
@app.task(bind=True, default_retry_delay=RETRY_DELAY, ignore_result=True)
|
||||
def task_do_rebuild_all_indexes(self):
|
||||
IndexInstanceNode = apps.get_model(
|
||||
app_label='document_indexing', model_name='IndexInstanceNode'
|
||||
def task_rebuild_index(self, index_id):
|
||||
Index = apps.get_model(
|
||||
app_label='document_indexing', model_name='Index'
|
||||
)
|
||||
|
||||
try:
|
||||
lock = locking_backend.acquire_lock(
|
||||
'document_indexing_task_do_rebuild_all_indexes'
|
||||
)
|
||||
index = Index.objects.get(pk=index_id)
|
||||
index.rebuild()
|
||||
except LockError as exception:
|
||||
# Another rebuild is happening, retry later
|
||||
# This index is being rebuilt by another task, retry later
|
||||
raise self.retry(exc=exception)
|
||||
else:
|
||||
try:
|
||||
IndexInstanceNode.objects.rebuild_all_indexes()
|
||||
finally:
|
||||
lock.release()
|
||||
|
||||
Reference in New Issue
Block a user