Added remaining functionality to document indexing (filesystem rebuild, better warning reporting)

This commit is contained in:
Roberto Rosario
2011-05-20 00:34:31 -04:00
parent 7f59c16316
commit a02c5c7c8d
9 changed files with 108 additions and 173 deletions

View File

@@ -1,6 +1,6 @@
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from navigation.api import register_links, register_menu from navigation.api import register_menu
from permissions.api import register_permissions from permissions.api import register_permissions
from main.api import register_tool from main.api import register_tool

View File

@@ -12,9 +12,10 @@ from document_indexing.models import Index, IndexInstance, \
from document_indexing.conf.settings import AVAILABLE_INDEXING_FUNCTIONS from document_indexing.conf.settings import AVAILABLE_INDEXING_FUNCTIONS
from document_indexing.conf.settings import MAX_SUFFIX_COUNT from document_indexing.conf.settings import MAX_SUFFIX_COUNT
from document_indexing.filesystem import fs_create_index_directory, \ from document_indexing.filesystem import fs_create_index_directory, \
fs_create_document_link, fs_delete_document_link fs_create_document_link, fs_delete_document_link, \
fs_delete_index_directory, fs_delete_directory_recusive
from document_indexing.conf.settings import SLUGIFY_PATHS from document_indexing.conf.settings import SLUGIFY_PATHS
from document_indexing.os_agnostic import assemble_document_filename
if SLUGIFY_PATHS == False: if SLUGIFY_PATHS == False:
# Do not slugify path or filenames and extensions # Do not slugify path or filenames and extensions
@@ -32,7 +33,6 @@ def update_indexes(document):
""" """
Update or create all the index instances related to a document Update or create all the index instances related to a document
""" """
print 'update_indexes'
warnings = [] warnings = []
eval_dict = {} eval_dict = {}
@@ -54,7 +54,8 @@ def delete_indexes(document):
warnings = [] warnings = []
for index_instance in document.indexinstance_set.all(): for index_instance in document.indexinstance_set.all():
_remove_document_from_index_instance(document, index_instance) index_warnings = _remove_document_from_index_instance(document, index_instance)
warnings.extend(index_warnings)
return warnings return warnings
@@ -83,7 +84,7 @@ def get_instance_link(index_instance=None, text=None, simple=False):
} }
def get_breadcrumbs(index_instance, simple=False, single_link=False): def get_breadcrumbs(index_instance, simple=False, single_link=False, include_count=False):
""" """
Return a joined string of HTML anchors to every index instance's Return a joined string of HTML anchors to every index instance's
parent from the root of the tree to the index instance parent from the root of the tree to the index instance
@@ -100,32 +101,43 @@ def get_breadcrumbs(index_instance, simple=False, single_link=False):
result.append(get_instance_link(index_instance, simple=simple)) result.append(get_instance_link(index_instance, simple=simple))
output = []
if include_count:
output.append(u'(%d)' % index_instance.documents.count())
if single_link: if single_link:
# Return the entire breadcrumb path as a single HTML anchor # Return the entire breadcrumb path as a single HTML anchor
return mark_safe(get_instance_link(index_instance=index_instance, text=(u' / '.join(result)))) output.insert(0, get_instance_link(index_instance=index_instance, text=(u' / '.join(result))))
return mark_safe(u' '.join(output))
else: else:
return mark_safe(u' / '.join(result)) output.insert(0, u' / '.join(result))
return mark_safe(u' '.join(output))
def do_rebuild_all_indexes(): def do_rebuild_all_indexes():
fs_delete_directory_recusive()
IndexInstance.objects.all().delete() IndexInstance.objects.all().delete()
DocumentRenameCount.objects.all().delete() DocumentRenameCount.objects.all().delete()
for document in Document.objects.all(): for document in Document.objects.all():
update_indexes(document) update_indexes(document)
return [] # Warnings - None
# Internal functions # Internal functions
def find_lowest_available_suffix(suffix_list): def find_lowest_available_suffix(index_instance, document):
print 'suffix_list', suffix_list index_instance_documents = DocumentRenameCount.objects.filter(index_instance=index_instance).filter(document__file_extension=document.file_extension)
suffix = 0 files_list = []
for index_instance_document in index_instance_documents:
files_list.append(assemble_document_filename(index_instance_document.document, index_instance_document.suffix))
while suffix in suffix_list: for suffix in xrange(MAX_SUFFIX_COUNT):
suffix += 1 if assemble_document_filename(document, suffix) not in files_list:
if suffix > MAX_SUFFIX_COUNT:
raise MaxSuffixCountReached(ugettext(u'Maximum suffix (%s) count reached.') % MAX_SUFFIX_COUNT)
else:
return suffix return suffix
raise MaxSuffixCountReached(ugettext(u'Maximum suffix (%s) count reached.') % MAX_SUFFIX_COUNT)
def _evaluate_index(eval_dict, document, node, parent_index_instance=None): def _evaluate_index(eval_dict, document, node, parent_index_instance=None):
""" """
@@ -137,14 +149,12 @@ def _evaluate_index(eval_dict, document, node, parent_index_instance=None):
if node.enabled: if node.enabled:
try: try:
result = eval(node.expression, eval_dict, AVAILABLE_INDEXING_FUNCTIONS) result = eval(node.expression, eval_dict, AVAILABLE_INDEXING_FUNCTIONS)
if result:
index_instance, created = IndexInstance.objects.get_or_create(index=node, value=result, parent=parent_index_instance) index_instance, created = IndexInstance.objects.get_or_create(index=node, value=result, parent=parent_index_instance)
if created: #if created:
fs_create_index_directory(index_instance) fs_create_index_directory(index_instance)
if node.link_documents and document not in index_instance.documents.all(): if node.link_documents:
#suffix_list = index_instance.documents.filter(file_filename=document.file_filename).filter(file_extension=document.file_extension).values_list('suffix', flat=True) suffix = find_lowest_available_suffix(index_instance, document)
suffix_list = DocumentRenameCount.objects.filter(document__file_filename=document.file_filename).filter(document__file_extension=document.file_extension).filter(index_instance=index_instance).values_list('suffix', flat=True)
suffix = find_lowest_available_suffix(suffix_list)
print 'lower suffix: %s' % suffix
document_count = DocumentRenameCount( document_count = DocumentRenameCount(
index_instance=index_instance, index_instance=index_instance,
document=document, document=document,
@@ -164,7 +174,6 @@ def _evaluate_index(eval_dict, document, node, parent_index_instance=None):
except (NameError, AttributeError), exc: except (NameError, AttributeError), exc:
warnings.append(_(u'Error in document indexing update expression: %(expression)s; %(exception)s') % { warnings.append(_(u'Error in document indexing update expression: %(expression)s; %(exception)s') % {
'expression': node.expression, 'exception': exc}) 'expression': node.expression, 'exception': exc})
except Exception, exc: except Exception, exc:
warnings.append(_(u'Error updating document index, expression: %(expression)s; %(exception)s') % { warnings.append(_(u'Error updating document index, expression: %(expression)s; %(exception)s') % {
'expression': node.expression, 'exception': exc}) 'expression': node.expression, 'exception': exc})
@@ -188,14 +197,15 @@ def _remove_document_from_index_instance(document, index_instance):
# if there are no more documents and no children, delete # if there are no more documents and no children, delete
# node and check parent for the same conditions # node and check parent for the same conditions
parent = index_instance.parent parent = index_instance.parent
fs_delete_index_directory(index_instance)
index_instance.delete() index_instance.delete()
parent_warnings = _remove_document_from_index_instance( parent_warnings = _remove_document_from_index_instance(
document, parent document, parent
) )
warnings.extend(parent_warnings) warnings.extend(parent_warnings)
except DocumentRenameCount.DoesNotExist:
return warnings
except Exception, exc: except Exception, exc:
print '_remove_document_from_index_instance exception: %s' % exc
warnings.append(_(u'Unable to delete document indexing node; %s') % exc) warnings.append(_(u'Unable to delete document indexing node; %s') % exc)
return warnings return warnings

View File

@@ -1,8 +1,5 @@
"""Configuration options for the document_indexing app""" """Configuration options for the document_indexing app"""
import hashlib
import uuid
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from common.utils import proper_name from common.utils import proper_name
@@ -18,6 +15,7 @@ register_settings(
settings=[ settings=[
# Definition # Definition
{'name': u'AVAILABLE_INDEXING_FUNCTIONS', 'global_name': u'DOCUMENT_INDEXING_AVAILABLE_INDEXING_FUNCTIONS', 'default': available_indexing_functions}, {'name': u'AVAILABLE_INDEXING_FUNCTIONS', 'global_name': u'DOCUMENT_INDEXING_AVAILABLE_INDEXING_FUNCTIONS', 'default': available_indexing_functions},
{'name': u'SUFFIX_SEPARATOR', 'global_name': u'DOCUMENT_INDEXING_SUFFIX_SEPARATOR', 'default': u'_'},
# Filesystem serving # Filesystem serving
{'name': u'SLUGIFY_PATHS', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_SLUGIFY_PATHS', 'default': False}, {'name': u'SLUGIFY_PATHS', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_SLUGIFY_PATHS', 'default': False},
{'name': u'MAX_SUFFIX_COUNT', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_MAX_SUFFIX_COUNT', 'default': 1000}, {'name': u'MAX_SUFFIX_COUNT', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_MAX_SUFFIX_COUNT', 'default': 1000},

View File

@@ -3,20 +3,23 @@ import os
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from metadata.classes import MetadataObject from document_indexing.os_agnostic import assemble_document_filename
from document_indexing.models import IndexInstance
from document_indexing.conf.settings import FILESERVING_ENABLE from document_indexing.conf.settings import FILESERVING_ENABLE
from document_indexing.conf.settings import FILESERVING_PATH from document_indexing.conf.settings import FILESERVING_PATH
# TODO: delete fileserving document function for use with rebuild index function
def get_instance_path(index_instance): def get_instance_path(index_instance):
""" """
Return a platform formated filesytem path corresponding to an Return a platform formated filesytem path corresponding to an
index instance index instance
""" """
return os.sep.join(index_instance.get_ancestors().values_list(u'value', flat=True)) names = []
for ancestor in index_instance.get_ancestors():
names.append(ancestor.value)
names.append(index_instance.value)
return os.sep.join(names)
def fs_create_index_directory(index_instance): def fs_create_index_directory(index_instance):
@@ -33,14 +36,9 @@ def fs_create_index_directory(index_instance):
def fs_create_document_link(index_instance, document, suffix=0): def fs_create_document_link(index_instance, document, suffix=0):
if FILESERVING_ENABLE: if FILESERVING_ENABLE:
name_part = document.file_filename name_part = assemble_document_filename(document, suffix)
if suffix:
name_part = u'_'.join([name_part, unicode(suffix)])
filename = os.extsep.join([name_part, document.file_extension]) filename = os.extsep.join([name_part, document.file_extension])
filepath = os.path.join(FILESERVING_PATH, get_instance_path(index_instance), filename) filepath = os.path.join(FILESERVING_PATH, get_instance_path(index_instance), filename)
print 'filepath', filepath
try: try:
os.symlink(document.file.path, filepath) os.symlink(document.file.path, filepath)
except OSError, exc: except OSError, exc:
@@ -64,7 +62,6 @@ def fs_delete_document_link(index_instance, document, suffix=0):
filename = os.extsep.join([name_part, document.file_extension]) filename = os.extsep.join([name_part, document.file_extension])
filepath = os.path.join(FILESERVING_PATH, get_instance_path(index_instance), filename) filepath = os.path.join(FILESERVING_PATH, get_instance_path(index_instance), filename)
print 'delete filepath', filepath
try: try:
os.unlink(filepath) os.unlink(filepath)
@@ -73,98 +70,23 @@ def fs_delete_document_link(index_instance, document, suffix=0):
# Raise when any error other than doesn't exits # Raise when any error other than doesn't exits
raise OSError(_(u'Unable to delete document symbolic link; %s') % exc) raise OSError(_(u'Unable to delete document symbolic link; %s') % exc)
'''
path, filename = os.path.split(document_metadata_index.filename)
#Cleanup directory of dead stuff def fs_delete_index_directory(index_instance):
#Delete siblings that are dead links if FILESERVING_ENABLE:
target_directory = os.path.join(FILESERVING_PATH, get_instance_path(index_instance))
try: try:
for f in os.listdir(path): os.removedirs(target_directory)
filepath = os.path.join(path, f)
if os.path.islink(filepath):
#Get link's source
source = os.readlink(filepath)
if os.path.isabs(source):
if not os.path.exists(source):
#link's source is absolute and doesn't exit
os.unlink(filepath)
else:
os.unlink(os.path.join(path, filepath))
elif os.path.isdir(filepath):
#is a directory, try to delete it
try:
os.removedirs(path)
except:
pass
except OSError, exc:
pass
#Remove the directory if it is empty
try:
os.removedirs(path)
except:
pass
'''
'''
def next_available_filename(document, metadata_index, path, filename, extension, suffix=0):
target = filename
if suffix:
target = '_'.join([filename, unicode(suffix)])
filepath = os.path.join(path, os.extsep.join([target, extension]))
matches = DocumentMetadataIndex.objects.filter(filename=filepath)
if matches.count() == 0:
document_metadata_index = DocumentMetadataIndex(
document=document, metadata_index=metadata_index,
filename=filepath)
try:
os.symlink(document.file.path, filepath)
document_metadata_index.save()
except OSError, exc: except OSError, exc:
if exc.errno == errno.EEXIST: if exc.errno == errno.EEXIST:
#This link should not exist, try to delete it pass
try:
os.unlink(filepath)
#Try again with same suffix
return next_available_filename(document, metadata_index, path, filename, extension, suffix)
except Exception, exc:
raise Exception(_(u'Unable to create symbolic link, filename clash: %(filepath)s; %(exc)s') % {'filepath': filepath, 'exc': exc})
else: else:
raise OSError(_(u'Unable to create symbolic link: %(filepath)s; %(exc)s') % {'filepath': filepath, 'exc': exc}) raise OSError(_(u'Unable to delete indexing directory; %s') % exc)
return filepath
else:
if suffix > MAX_RENAME_COUNT:
raise Exception(_(u'Maximum rename count reached, not creating symbolic link'))
return next_available_filename(document, metadata_index, path, filename, extension, suffix + 1)
#TODO: diferentiate between evaluation error and filesystem errors def fs_delete_directory_recusive(path=FILESERVING_PATH):
def do_recreate_all_links(raise_exception=True): if FILESERVING_ENABLE:
errors = [] for dirpath, dirnames, filenames in os.walk(path, topdown=False):
warnings = [] for filename in filenames:
os.unlink(os.path.join(dirpath, filename))
for document in Document.objects.all(): for dirname in dirnames:
try: os.rmdir(os.path.join(dirpath, dirname))
document_delete_fs_links(document)
except NameError, e:
warnings.append('%s: %s' % (document, e))
except Exception, e:
if raise_exception:
raise Exception(e)
else:
errors.append('%s: %s' % (document, e))
for document in Document.objects.all():
try:
create_warnings = document_create_fs_links(document)
except Exception, e:
if raise_exception:
raise Exception(e)
else:
errors.append('%s: %s' % (document, e))
for warning in create_warnings:
warnings.append('%s: %s' % (document, warning))
return errors, warnings
'''

View File

@@ -0,0 +1,8 @@
from document_indexing.conf.settings import SUFFIX_SEPARATOR
def assemble_document_filename(document, suffix=0):
if suffix:
return SUFFIX_SEPARATOR.join([document.file_filename, unicode(suffix)])
else:
return document.file_filename

View File

@@ -11,7 +11,7 @@ def get_document_indexing_subtemplate(document):
object_list = [] object_list = []
for index_instance in document.indexinstance_set.all(): for index_instance in document.indexinstance_set.all():
object_list.append(get_breadcrumbs(index_instance, single_link=True)) object_list.append(get_breadcrumbs(index_instance, single_link=True, include_count=True))
return { return {
'name': 'generic_list_subtemplate.html', 'name': 'generic_list_subtemplate.html',

View File

@@ -1,11 +1,8 @@
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from django.utils.translation import ugettext
from django.http import HttpResponseRedirect from django.http import HttpResponseRedirect
from django.shortcuts import render_to_response, get_object_or_404 from django.shortcuts import render_to_response, get_object_or_404
from django.template import RequestContext from django.template import RequestContext
from django.contrib import messages from django.contrib import messages
from django.core.urlresolvers import reverse
from django.conf import settings
from django.utils.safestring import mark_safe from django.utils.safestring import mark_safe
from permissions.api import check_permissions from permissions.api import check_permissions
@@ -55,7 +52,7 @@ def rebuild_index_instances(request):
}, context_instance=RequestContext(request)) }, context_instance=RequestContext(request))
else: else:
try: try:
errors, warnings = do_rebuild_all_indexes() warnings = do_rebuild_all_indexes()
messages.success(request, _(u'Index rebuild completed successfully.')) messages.success(request, _(u'Index rebuild completed successfully.'))
for warning in warnings: for warning in warnings:
messages.warning(request, warning) messages.warning(request, warning)