Added remaining functionality to document indexing (filesystem rebuild, better warning reporting)

This commit is contained in:
Roberto Rosario
2011-05-20 00:34:31 -04:00
parent 7f59c16316
commit a02c5c7c8d
9 changed files with 108 additions and 173 deletions

View File

@@ -1,6 +1,6 @@
from django.utils.translation import ugettext_lazy as _
from navigation.api import register_links, register_menu
from navigation.api import register_menu
from permissions.api import register_permissions
from main.api import register_tool

View File

@@ -12,9 +12,10 @@ from document_indexing.models import Index, IndexInstance, \
from document_indexing.conf.settings import AVAILABLE_INDEXING_FUNCTIONS
from document_indexing.conf.settings import MAX_SUFFIX_COUNT
from document_indexing.filesystem import fs_create_index_directory, \
fs_create_document_link, fs_delete_document_link
fs_create_document_link, fs_delete_document_link, \
fs_delete_index_directory, fs_delete_directory_recusive
from document_indexing.conf.settings import SLUGIFY_PATHS
from document_indexing.os_agnostic import assemble_document_filename
if SLUGIFY_PATHS == False:
# Do not slugify path or filenames and extensions
@@ -32,7 +33,6 @@ def update_indexes(document):
"""
Update or create all the index instances related to a document
"""
print 'update_indexes'
warnings = []
eval_dict = {}
@@ -54,7 +54,8 @@ def delete_indexes(document):
warnings = []
for index_instance in document.indexinstance_set.all():
_remove_document_from_index_instance(document, index_instance)
index_warnings = _remove_document_from_index_instance(document, index_instance)
warnings.extend(index_warnings)
return warnings
@@ -83,7 +84,7 @@ def get_instance_link(index_instance=None, text=None, simple=False):
}
def get_breadcrumbs(index_instance, simple=False, single_link=False):
def get_breadcrumbs(index_instance, simple=False, single_link=False, include_count=False):
"""
Return a joined string of HTML anchors to every index instance's
parent from the root of the tree to the index instance
@@ -100,32 +101,43 @@ def get_breadcrumbs(index_instance, simple=False, single_link=False):
result.append(get_instance_link(index_instance, simple=simple))
output = []
if include_count:
output.append(u'(%d)' % index_instance.documents.count())
if single_link:
# Return the entire breadcrumb path as a single HTML anchor
return mark_safe(get_instance_link(index_instance=index_instance, text=(u' / '.join(result))))
output.insert(0, get_instance_link(index_instance=index_instance, text=(u' / '.join(result))))
return mark_safe(u' '.join(output))
else:
return mark_safe(u' / '.join(result))
output.insert(0, u' / '.join(result))
return mark_safe(u' '.join(output))
def do_rebuild_all_indexes():
fs_delete_directory_recusive()
IndexInstance.objects.all().delete()
DocumentRenameCount.objects.all().delete()
for document in Document.objects.all():
update_indexes(document)
return [] # Warnings - None
# Internal functions
def find_lowest_available_suffix(suffix_list):
print 'suffix_list', suffix_list
suffix = 0
def find_lowest_available_suffix(index_instance, document):
index_instance_documents = DocumentRenameCount.objects.filter(index_instance=index_instance).filter(document__file_extension=document.file_extension)
files_list = []
for index_instance_document in index_instance_documents:
files_list.append(assemble_document_filename(index_instance_document.document, index_instance_document.suffix))
while suffix in suffix_list:
suffix += 1
if suffix > MAX_SUFFIX_COUNT:
raise MaxSuffixCountReached(ugettext(u'Maximum suffix (%s) count reached.') % MAX_SUFFIX_COUNT)
else:
for suffix in xrange(MAX_SUFFIX_COUNT):
if assemble_document_filename(document, suffix) not in files_list:
return suffix
raise MaxSuffixCountReached(ugettext(u'Maximum suffix (%s) count reached.') % MAX_SUFFIX_COUNT)
def _evaluate_index(eval_dict, document, node, parent_index_instance=None):
"""
@@ -137,14 +149,12 @@ def _evaluate_index(eval_dict, document, node, parent_index_instance=None):
if node.enabled:
try:
result = eval(node.expression, eval_dict, AVAILABLE_INDEXING_FUNCTIONS)
if result:
index_instance, created = IndexInstance.objects.get_or_create(index=node, value=result, parent=parent_index_instance)
if created:
#if created:
fs_create_index_directory(index_instance)
if node.link_documents and document not in index_instance.documents.all():
#suffix_list = index_instance.documents.filter(file_filename=document.file_filename).filter(file_extension=document.file_extension).values_list('suffix', flat=True)
suffix_list = DocumentRenameCount.objects.filter(document__file_filename=document.file_filename).filter(document__file_extension=document.file_extension).filter(index_instance=index_instance).values_list('suffix', flat=True)
suffix = find_lowest_available_suffix(suffix_list)
print 'lower suffix: %s' % suffix
if node.link_documents:
suffix = find_lowest_available_suffix(index_instance, document)
document_count = DocumentRenameCount(
index_instance=index_instance,
document=document,
@@ -164,7 +174,6 @@ def _evaluate_index(eval_dict, document, node, parent_index_instance=None):
except (NameError, AttributeError), exc:
warnings.append(_(u'Error in document indexing update expression: %(expression)s; %(exception)s') % {
'expression': node.expression, 'exception': exc})
except Exception, exc:
warnings.append(_(u'Error updating document index, expression: %(expression)s; %(exception)s') % {
'expression': node.expression, 'exception': exc})
@@ -188,14 +197,15 @@ def _remove_document_from_index_instance(document, index_instance):
# if there are no more documents and no children, delete
# node and check parent for the same conditions
parent = index_instance.parent
fs_delete_index_directory(index_instance)
index_instance.delete()
parent_warnings = _remove_document_from_index_instance(
document, parent
)
warnings.extend(parent_warnings)
except DocumentRenameCount.DoesNotExist:
return warnings
except Exception, exc:
print '_remove_document_from_index_instance exception: %s' % exc
warnings.append(_(u'Unable to delete document indexing node; %s') % exc)
return warnings

View File

@@ -1,8 +1,5 @@
"""Configuration options for the document_indexing app"""
import hashlib
import uuid
from django.utils.translation import ugettext_lazy as _
from common.utils import proper_name
@@ -18,6 +15,7 @@ register_settings(
settings=[
# Definition
{'name': u'AVAILABLE_INDEXING_FUNCTIONS', 'global_name': u'DOCUMENT_INDEXING_AVAILABLE_INDEXING_FUNCTIONS', 'default': available_indexing_functions},
{'name': u'SUFFIX_SEPARATOR', 'global_name': u'DOCUMENT_INDEXING_SUFFIX_SEPARATOR', 'default': u'_'},
# Filesystem serving
{'name': u'SLUGIFY_PATHS', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_SLUGIFY_PATHS', 'default': False},
{'name': u'MAX_SUFFIX_COUNT', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_MAX_SUFFIX_COUNT', 'default': 1000},

View File

@@ -3,20 +3,23 @@ import os
from django.utils.translation import ugettext_lazy as _
from metadata.classes import MetadataObject
from document_indexing.models import IndexInstance
from document_indexing.os_agnostic import assemble_document_filename
from document_indexing.conf.settings import FILESERVING_ENABLE
from document_indexing.conf.settings import FILESERVING_PATH
# TODO: delete fileserving document function for use with rebuild index function
def get_instance_path(index_instance):
"""
Return a platform formated filesytem path corresponding to an
index instance
"""
return os.sep.join(index_instance.get_ancestors().values_list(u'value', flat=True))
names = []
for ancestor in index_instance.get_ancestors():
names.append(ancestor.value)
names.append(index_instance.value)
return os.sep.join(names)
def fs_create_index_directory(index_instance):
@@ -33,14 +36,9 @@ def fs_create_index_directory(index_instance):
def fs_create_document_link(index_instance, document, suffix=0):
if FILESERVING_ENABLE:
name_part = document.file_filename
if suffix:
name_part = u'_'.join([name_part, unicode(suffix)])
name_part = assemble_document_filename(document, suffix)
filename = os.extsep.join([name_part, document.file_extension])
filepath = os.path.join(FILESERVING_PATH, get_instance_path(index_instance), filename)
print 'filepath', filepath
try:
os.symlink(document.file.path, filepath)
except OSError, exc:
@@ -64,7 +62,6 @@ def fs_delete_document_link(index_instance, document, suffix=0):
filename = os.extsep.join([name_part, document.file_extension])
filepath = os.path.join(FILESERVING_PATH, get_instance_path(index_instance), filename)
print 'delete filepath', filepath
try:
os.unlink(filepath)
@@ -73,98 +70,23 @@ def fs_delete_document_link(index_instance, document, suffix=0):
# Raise when any error other than doesn't exits
raise OSError(_(u'Unable to delete document symbolic link; %s') % exc)
'''
path, filename = os.path.split(document_metadata_index.filename)
#Cleanup directory of dead stuff
#Delete siblings that are dead links
def fs_delete_index_directory(index_instance):
if FILESERVING_ENABLE:
target_directory = os.path.join(FILESERVING_PATH, get_instance_path(index_instance))
try:
for f in os.listdir(path):
filepath = os.path.join(path, f)
if os.path.islink(filepath):
#Get link's source
source = os.readlink(filepath)
if os.path.isabs(source):
if not os.path.exists(source):
#link's source is absolute and doesn't exit
os.unlink(filepath)
else:
os.unlink(os.path.join(path, filepath))
elif os.path.isdir(filepath):
#is a directory, try to delete it
try:
os.removedirs(path)
except:
pass
except OSError, exc:
pass
#Remove the directory if it is empty
try:
os.removedirs(path)
except:
pass
'''
'''
def next_available_filename(document, metadata_index, path, filename, extension, suffix=0):
target = filename
if suffix:
target = '_'.join([filename, unicode(suffix)])
filepath = os.path.join(path, os.extsep.join([target, extension]))
matches = DocumentMetadataIndex.objects.filter(filename=filepath)
if matches.count() == 0:
document_metadata_index = DocumentMetadataIndex(
document=document, metadata_index=metadata_index,
filename=filepath)
try:
os.symlink(document.file.path, filepath)
document_metadata_index.save()
os.removedirs(target_directory)
except OSError, exc:
if exc.errno == errno.EEXIST:
#This link should not exist, try to delete it
try:
os.unlink(filepath)
#Try again with same suffix
return next_available_filename(document, metadata_index, path, filename, extension, suffix)
except Exception, exc:
raise Exception(_(u'Unable to create symbolic link, filename clash: %(filepath)s; %(exc)s') % {'filepath': filepath, 'exc': exc})
pass
else:
raise OSError(_(u'Unable to create symbolic link: %(filepath)s; %(exc)s') % {'filepath': filepath, 'exc': exc})
return filepath
else:
if suffix > MAX_RENAME_COUNT:
raise Exception(_(u'Maximum rename count reached, not creating symbolic link'))
return next_available_filename(document, metadata_index, path, filename, extension, suffix + 1)
raise OSError(_(u'Unable to delete indexing directory; %s') % exc)
#TODO: diferentiate between evaluation error and filesystem errors
def do_recreate_all_links(raise_exception=True):
errors = []
warnings = []
for document in Document.objects.all():
try:
document_delete_fs_links(document)
except NameError, e:
warnings.append('%s: %s' % (document, e))
except Exception, e:
if raise_exception:
raise Exception(e)
else:
errors.append('%s: %s' % (document, e))
for document in Document.objects.all():
try:
create_warnings = document_create_fs_links(document)
except Exception, e:
if raise_exception:
raise Exception(e)
else:
errors.append('%s: %s' % (document, e))
for warning in create_warnings:
warnings.append('%s: %s' % (document, warning))
return errors, warnings
'''
def fs_delete_directory_recusive(path=FILESERVING_PATH):
if FILESERVING_ENABLE:
for dirpath, dirnames, filenames in os.walk(path, topdown=False):
for filename in filenames:
os.unlink(os.path.join(dirpath, filename))
for dirname in dirnames:
os.rmdir(os.path.join(dirpath, dirname))

View File

@@ -0,0 +1,8 @@
from document_indexing.conf.settings import SUFFIX_SEPARATOR
def assemble_document_filename(document, suffix=0):
if suffix:
return SUFFIX_SEPARATOR.join([document.file_filename, unicode(suffix)])
else:
return document.file_filename

View File

@@ -11,7 +11,7 @@ def get_document_indexing_subtemplate(document):
object_list = []
for index_instance in document.indexinstance_set.all():
object_list.append(get_breadcrumbs(index_instance, single_link=True))
object_list.append(get_breadcrumbs(index_instance, single_link=True, include_count=True))
return {
'name': 'generic_list_subtemplate.html',

View File

@@ -1,11 +1,8 @@
from django.utils.translation import ugettext_lazy as _
from django.utils.translation import ugettext
from django.http import HttpResponseRedirect
from django.shortcuts import render_to_response, get_object_or_404
from django.template import RequestContext
from django.contrib import messages
from django.core.urlresolvers import reverse
from django.conf import settings
from django.utils.safestring import mark_safe
from permissions.api import check_permissions
@@ -55,7 +52,7 @@ def rebuild_index_instances(request):
}, context_instance=RequestContext(request))
else:
try:
errors, warnings = do_rebuild_all_indexes()
warnings = do_rebuild_all_indexes()
messages.success(request, _(u'Index rebuild completed successfully.'))
for warning in warnings:
messages.warning(request, warning)