diff --git a/mayan/apps/document_indexing/__init__.py b/mayan/apps/document_indexing/__init__.py index 916da6087b..4c394b6183 100644 --- a/mayan/apps/document_indexing/__init__.py +++ b/mayan/apps/document_indexing/__init__.py @@ -18,23 +18,23 @@ from .links import (document_index_list, document_index_main_menu_link, rebuild_index_instances, template_node_create, template_node_delete, template_node_edit) from .models import Index, IndexTemplateNode, IndexInstanceNode -from .tasks import task_delete_indexes, task_update_indexes +from .tasks import task_delete_empty_index_nodes, task_update_indexes -@receiver(pre_delete, dispatch_uid='document_index_delete', sender=Document) +@receiver(post_delete, dispatch_uid='document_index_delete', sender=Document) def document_index_delete(sender, **kwargs): - task_delete_indexes.apply_async(kwargs=dict(document_id=kwargs['instance'].pk), queue='indexing') + task_delete_empty_index_nodes.apply_async(queue='indexing') @receiver(post_save, dispatch_uid='document_metadata_index_update', sender=DocumentMetadata) def document_metadata_index_update(sender, **kwargs): - task_delete_indexes.apply_async(kwargs=dict(document_id=kwargs['instance'].document.pk), queue='indexing') task_update_indexes.apply_async(kwargs=dict(document_id=kwargs['instance'].document.pk), queue='indexing') + task_delete_empty_index_nodes.apply_async(queue='indexing') -@receiver(pre_delete, dispatch_uid='document_metadata_index_delete', sender=DocumentMetadata) +@receiver(post_delete, dispatch_uid='document_metadata_index_delete', sender=DocumentMetadata) def document_metadata_index_delete(sender, **kwargs): - task_delete_indexes.apply_async(kwargs=dict(document_id=kwargs['instance'].document.pk), queue='indexing') + task_delete_empty_index_nodes.apply_async(kwargs=dict(document_id=kwargs['instance'].document.pk), queue='indexing') @receiver(post_delete, dispatch_uid='document_metadata_index_post_delete', sender=DocumentMetadata) diff --git a/mayan/apps/document_indexing/admin.py b/mayan/apps/document_indexing/admin.py index dd9f239d45..409bee5e34 100644 --- a/mayan/apps/document_indexing/admin.py +++ b/mayan/apps/document_indexing/admin.py @@ -4,8 +4,7 @@ from django.contrib import admin from mptt.admin import MPTTModelAdmin -from .models import (DocumentRenameCount, Index, IndexInstanceNode, - IndexTemplateNode) +from .models import Index, IndexInstanceNode, IndexTemplateNode class IndexTemplateNodeAdmin(MPTTModelAdmin): @@ -17,7 +16,6 @@ class IndexInstanceNodeAdmin(MPTTModelAdmin): list_display = ('value',) -admin.site.register(DocumentRenameCount) admin.site.register(Index) admin.site.register(IndexTemplateNode, IndexTemplateNodeAdmin) admin.site.register(IndexInstanceNode, IndexInstanceNodeAdmin) diff --git a/mayan/apps/document_indexing/api.py b/mayan/apps/document_indexing/api.py index 3b09aaa709..c1bf3b5f0c 100644 --- a/mayan/apps/document_indexing/api.py +++ b/mayan/apps/document_indexing/api.py @@ -3,24 +3,13 @@ from __future__ import absolute_import import logging from django.db.models import Q -from django.template.defaultfilters import slugify from django.utils.translation import ugettext from django.utils.translation import ugettext_lazy as _ -from .exceptions import MaxSuffixCountReached -from .filesystem import (assemble_suffixed_filename, fs_create_index_directory, - fs_create_document_link, fs_delete_document_link, - fs_delete_index_directory) -from .models import Index, IndexInstanceNode, DocumentRenameCount -from .settings import (AVAILABLE_INDEXING_FUNCTIONS, MAX_SUFFIX_COUNT, - SLUGIFY_PATHS) +from .models import Index, IndexInstanceNode +from .settings import AVAILABLE_INDEXING_FUNCTIONS logger = logging.getLogger(__name__) -if SLUGIFY_PATHS: - SLUGIFY_FUNCTION = slugify -else: - # Do not slugify path or filenames and extensions - SLUGIFY_FUNCTION = lambda x: x # External functions @@ -42,22 +31,6 @@ def update_indexes(document): return warnings -def delete_indexes(document): - """ - Delete all the index instances related to a document - """ - - # TODO: convert this fuction into a manager method - - warnings = [] - - for index_node in document.node_instances.all(): - index_warnings = cascade_document_remove(document, index_node) - warnings.extend(index_warnings) - - return warnings - - # Internal functions def find_lowest_available_suffix(index_instance, document): index_instance_documents = DocumentRenameCount.objects.filter(index_instance_node=index_instance) @@ -84,19 +57,13 @@ def cascade_eval(document, template_node, parent_index_instance=None): try: result = eval(template_node.expression, {'document': document}, AVAILABLE_INDEXING_FUNCTIONS) except Exception as exception: - error_message = _(u'Error in document indexing update expression: %(expression)s; %(exception)s') % { - 'expression': template_node.expression, 'exception': exception} + error_message = _(u'Error indexing document: %(document)s; expression: %(expression)s; %(exception)s') % { + 'document': document, 'expression': template_node.expression, 'exception': exception} warnings.append(error_message) logger.debug(error_message) else: if result: index_instance, created = IndexInstanceNode.objects.get_or_create(index_template_node=template_node, value=result, parent=parent_index_instance) - # if created: - try: - fs_create_index_directory(index_instance) - except Exception as exception: - warnings.append(_(u'Error updating document index, expression: %(expression)s; %(exception)s') % { - 'expression': template_node.expression, 'exception': exception}) if template_node.link_documents: suffix = find_lowest_available_suffix(index_instance, document) @@ -106,15 +73,6 @@ def cascade_eval(document, template_node, parent_index_instance=None): suffix=suffix ) document_count.save() - - try: - fs_create_document_link(index_instance, document, suffix) - except Exception as exception: - error_message = _(u'Error updating document index, expression: %(expression)s; %(exception)s') % { - 'expression': template_node.expression, 'exception': exception} - warnings.append(error_message) - logger.debug(error_message) - index_instance.documents.add(document) for child in template_node.get_children(): @@ -128,32 +86,24 @@ def cascade_eval(document, template_node, parent_index_instance=None): return warnings -def cascade_document_remove(document, index_instance): +def delete_empty_index_nodes(): """ - Delete a documents reference from an index instance and call itself - recusively deleting documents and empty index instances up to the - root of the tree + Delete empty index instance nodes """ - warnings = [] - try: - document_rename_count = DocumentRenameCount.objects.get(index_instance_node=index_instance, document=document) - fs_delete_document_link(index_instance, document, document_rename_count.suffix) - document_rename_count.delete() - index_instance.documents.remove(document) - if index_instance.documents.count() == 0 and index_instance.get_children().count() == 0: - # if there are no more documents and no children, delete - # node and check parent for the same conditions - parent = index_instance.parent - fs_delete_index_directory(index_instance) - index_instance.delete() - parent_warnings = cascade_document_remove( - document, parent - ) - warnings.extend(parent_warnings) - except DocumentRenameCount.DoesNotExist: - return warnings - except Exception as exception: - warnings.append(_(u'Unable to delete document indexing node; %s') % exception) + for instance_node in IndexInstanceNode.objects.filter(documents__isnull=True): + task_delete_empty_index_nodes_recursive(instance_node) - return warnings + + +def task_delete_empty_index_nodes_recursive(instance_node): + """ + Calls itself recursively deleting empty index instance nodes up to root + """ + + if instance_node.get_children().count() == 0: + # if there are no children, delete node and check parent for the + # same conditions + parent = instance_node.parent + instance_node.delete() + delete_empty_indexes(parent) diff --git a/mayan/apps/document_indexing/exceptions.py b/mayan/apps/document_indexing/exceptions.py deleted file mode 100644 index 9e095fae11..0000000000 --- a/mayan/apps/document_indexing/exceptions.py +++ /dev/null @@ -1,6 +0,0 @@ -class MaxSuffixCountReached(Exception): - """ - Raised when there are too many documents with the same filename in the - same node/directory - """ - pass diff --git a/mayan/apps/document_indexing/filesystem.py b/mayan/apps/document_indexing/filesystem.py deleted file mode 100644 index d4a75a000f..0000000000 --- a/mayan/apps/document_indexing/filesystem.py +++ /dev/null @@ -1,106 +0,0 @@ -from __future__ import absolute_import - -import errno -import os - -from django.utils.translation import ugettext_lazy as _ - -from .settings import (FILESYSTEM_SERVING, SUFFIX_SEPARATOR) - - -def assemble_suffixed_filename(filename, suffix=0): - """ - Split document filename, to attach suffix to the name part then - re attacht the extension - """ - - if suffix: - name, extension = os.path.splitext(filename) - return SUFFIX_SEPARATOR.join([name, unicode(suffix), os.extsep, extension]) - else: - return filename - - -def assemble_path_from_list(directory_list): - return os.path.normpath(os.sep.join(directory_list)) - - -def get_instance_path(index_instance): - """ - Return a platform formated filesytem path corresponding to an - index instance - """ - names = [] - for ancestor in index_instance.get_ancestors(): - names.append(ancestor.value) - - names.append(index_instance.value) - - return assemble_path_from_list(names) - - -def fs_create_index_directory(index_instance): - if index_instance.index_template_node.index.name in FILESYSTEM_SERVING: - target_directory = assemble_path_from_list([FILESYSTEM_SERVING[index_instance.index_template_node.index.name], get_instance_path(index_instance)]) - try: - os.mkdir(target_directory) - except OSError as exception: - if exception.errno == errno.EEXIST: - pass - else: - raise Exception(_(u'Unable to create indexing directory; %s') % exception) - - -def fs_create_document_link(index_instance, document, suffix=0): - if index_instance.index_template_node.index.name in FILESYSTEM_SERVING: - filename = assemble_suffixed_filename(document.file_filename, suffix) - filepath = assemble_path_from_list([FILESYSTEM_SERVING[index_instance.index_template_node.index.name], get_instance_path(index_instance), filename]) - - try: - os.symlink(document.file.path, filepath) - except OSError as exception: - if exception.errno == errno.EEXIST: - # This link should not exist, try to delete it - try: - os.unlink(filepath) - # Try again - os.symlink(document.file.path, filepath) - except Exception as exception: - raise Exception(_(u'Unable to create symbolic link, file exists and could not be deleted: %(filepath)s; %(exception)s') % {'filepath': filepath, 'exception': exception}) - else: - raise Exception(_(u'Unable to create symbolic link: %(filepath)s; %(exception)s') % {'filepath': filepath, 'exception': exception}) - - -def fs_delete_document_link(index_instance, document, suffix=0): - if index_instance.index_template_node.index.name in FILESYSTEM_SERVING: - filename = assemble_suffixed_filename(document.file_filename, suffix) - filepath = assemble_path_from_list([FILESYSTEM_SERVING[index_instance.index_template_node.index.name], get_instance_path(index_instance), filename]) - - try: - os.unlink(filepath) - except OSError as exception: - if exception.errno != errno.ENOENT: - # Raise when any error other than doesn't exits - raise Exception(_(u'Unable to delete document symbolic link; %s') % exception) - - -def fs_delete_index_directory(index_instance): - if index_instance.index_template_node.index.name in FILESYSTEM_SERVING: - target_directory = assemble_path_from_list([FILESYSTEM_SERVING[index_instance.index_template_node.index.name], get_instance_path(index_instance)]) - try: - os.removedirs(target_directory) - except OSError as exception: - if exception.errno == errno.EEXIST: - pass - else: - raise Exception(_(u'Unable to delete indexing directory; %s') % exception) - - -def fs_delete_directory_recusive(index): - if index.name in FILESYSTEM_SERVING: - path = FILESYSTEM_SERVING[index.name] - for dirpath, dirnames, filenames in os.walk(path, topdown=False): - for filename in filenames: - os.unlink(os.path.join(dirpath, filename)) - for dirname in dirnames: - os.rmdir(os.path.join(dirpath, dirname)) diff --git a/mayan/apps/document_indexing/models.py b/mayan/apps/document_indexing/models.py index 287dad91e0..2e35f00b37 100644 --- a/mayan/apps/document_indexing/models.py +++ b/mayan/apps/document_indexing/models.py @@ -98,16 +98,3 @@ class IndexInstanceNode(MPTTModel): class Meta: verbose_name = _(u'Index node instance') verbose_name_plural = _(u'Indexes node instances') - - -class DocumentRenameCount(models.Model): - index_instance_node = models.ForeignKey(IndexInstanceNode, verbose_name=_(u'Index instance')) - document = models.ForeignKey(Document, verbose_name=_(u'Document')) - suffix = models.PositiveIntegerField(blank=True, verbose_name=(u'Suffix')) - - def __unicode__(self): - return u'%s - %s - %s' % (self.index_instance_node, self.document, self.suffix or u'0') - - class Meta: - verbose_name = _(u'Document rename count') - verbose_name_plural = _(u'Documents rename count') diff --git a/mayan/apps/document_indexing/settings.py b/mayan/apps/document_indexing/settings.py index 1c8f8bb43b..81feb424eb 100644 --- a/mayan/apps/document_indexing/settings.py +++ b/mayan/apps/document_indexing/settings.py @@ -13,10 +13,5 @@ register_settings( settings=[ # Definition {'name': u'AVAILABLE_INDEXING_FUNCTIONS', 'global_name': u'DOCUMENT_INDEXING_AVAILABLE_INDEXING_FUNCTIONS', 'default': available_indexing_functions}, - {'name': u'SUFFIX_SEPARATOR', 'global_name': u'DOCUMENT_INDEXING_SUFFIX_SEPARATOR', 'default': u'_'}, - # Filesystem serving - {'name': u'SLUGIFY_PATHS', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_SLUGIFY_PATHS', 'default': False}, - {'name': u'MAX_SUFFIX_COUNT', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_MAX_SUFFIX_COUNT', 'default': 1000}, - {'name': u'FILESYSTEM_SERVING', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_SERVING', 'default': {}, 'description': _(u'A dictionary that maps the index name and where on the filesystem that index will be mirrored.')} ] ) diff --git a/mayan/apps/document_indexing/tasks.py b/mayan/apps/document_indexing/tasks.py index 87d4448202..91c6860e07 100644 --- a/mayan/apps/document_indexing/tasks.py +++ b/mayan/apps/document_indexing/tasks.py @@ -4,7 +4,7 @@ from mayan.celery import app from documents.models import Document from lock_manager import Lock, LockError -from .api import update_indexes, delete_indexes +from .api import update_indexes, delete_empty_index_nodes from .tools import do_rebuild_all_indexes logger = logging.getLogger(__name__) @@ -12,13 +12,13 @@ RETRY_DELAY = 20 # TODO: convert this into a config option @app.task(ignore_result=True) -def task_delete_indexes(document_id): - document = Document.objects.get(pk=document_id) - delete_indexes(document) +def task_delete_empty_index_nodes(): + delete_empty_index_nodes() @app.task(bind=True, ignore_result=True) def task_update_indexes(self, document_id): + # TODO: Add concurrent task control try: lock = Lock.acquire_lock('document_indexing_task_update_index_document_%d' % document_id) except LockError as exception: diff --git a/mayan/apps/document_indexing/tools.py b/mayan/apps/document_indexing/tools.py index 93ba9c6566..641e21dab4 100644 --- a/mayan/apps/document_indexing/tools.py +++ b/mayan/apps/document_indexing/tools.py @@ -3,15 +3,16 @@ from __future__ import absolute_import from documents.models import Document from .api import update_indexes -from .filesystem import fs_delete_directory_recusive -from .models import Index, IndexInstanceNode, DocumentRenameCount +from .models import Index, IndexInstanceNode def do_rebuild_all_indexes(): - for index in Index.objects.all(): - fs_delete_directory_recusive(index) + for instance_node in IndexInstanceNode.objects.all(): + instance_node.delete() + + for index in Index.objects.all(): + index.delete() - IndexInstanceNode.objects.all().delete() - DocumentRenameCount.objects.all().delete() for document in Document.objects.all(): + # TODO: Launch all concurrently as background tasks update_indexes(document)