Added remaining functionality to document indexing (filesystem rebuild, better warning reporting)

This commit is contained in:
Roberto Rosario
2011-05-20 00:34:31 -04:00
parent 7f59c16316
commit a02c5c7c8d
9 changed files with 108 additions and 173 deletions

View File

@@ -1,6 +1,6 @@
from django.utils.translation import ugettext_lazy as _
from navigation.api import register_links, register_menu
from navigation.api import register_menu
from permissions.api import register_permissions
from main.api import register_tool

View File

@@ -15,8 +15,8 @@ class IndexInstanceInline(admin.StackedInline):
class IndexAdmin(MPTTModelAdmin):
list_display = ('expression', 'enabled', 'link_documents')
class IndexInstanceAdmin(MPTTModelAdmin):
model = IndexInstance
list_display = ('value', 'index', 'get_document_list_display')

View File

@@ -12,9 +12,10 @@ from document_indexing.models import Index, IndexInstance, \
from document_indexing.conf.settings import AVAILABLE_INDEXING_FUNCTIONS
from document_indexing.conf.settings import MAX_SUFFIX_COUNT
from document_indexing.filesystem import fs_create_index_directory, \
fs_create_document_link, fs_delete_document_link
fs_create_document_link, fs_delete_document_link, \
fs_delete_index_directory, fs_delete_directory_recusive
from document_indexing.conf.settings import SLUGIFY_PATHS
from document_indexing.os_agnostic import assemble_document_filename
if SLUGIFY_PATHS == False:
# Do not slugify path or filenames and extensions
@@ -32,7 +33,6 @@ def update_indexes(document):
"""
Update or create all the index instances related to a document
"""
print 'update_indexes'
warnings = []
eval_dict = {}
@@ -50,20 +50,21 @@ def update_indexes(document):
def delete_indexes(document):
"""
Delete all the index instances related to a document
"""
"""
warnings = []
for index_instance in document.indexinstance_set.all():
_remove_document_from_index_instance(document, index_instance)
index_warnings = _remove_document_from_index_instance(document, index_instance)
warnings.extend(index_warnings)
return warnings
def get_instance_link(index_instance=None, text=None, simple=False):
"""
Return an HTML anchor to an index instance
"""
"""
if simple:
# Just display the instance's value or overrided text, no
# HTML anchor
@@ -81,18 +82,18 @@ def get_instance_link(index_instance=None, text=None, simple=False):
'url': reverse('index_instance_list'),
'value': ugettext(u'root')
}
def get_breadcrumbs(index_instance, simple=False, single_link=False):
def get_breadcrumbs(index_instance, simple=False, single_link=False, include_count=False):
"""
Return a joined string of HTML anchors to every index instance's
parent from the root of the tree to the index instance
"""
"""
result = []
if single_link:
# Return the entire breadcrumb path as a single HTML anchor
simple = True
result.append(get_instance_link(simple=simple))
for instance in index_instance.get_ancestors():
@@ -100,31 +101,42 @@ def get_breadcrumbs(index_instance, simple=False, single_link=False):
result.append(get_instance_link(index_instance, simple=simple))
output = []
if include_count:
output.append(u'(%d)' % index_instance.documents.count())
if single_link:
# Return the entire breadcrumb path as a single HTML anchor
return mark_safe(get_instance_link(index_instance=index_instance, text=(u' / '.join(result))))
output.insert(0, get_instance_link(index_instance=index_instance, text=(u' / '.join(result))))
return mark_safe(u' '.join(output))
else:
return mark_safe(u' / '.join(result))
output.insert(0, u' / '.join(result))
return mark_safe(u' '.join(output))
def do_rebuild_all_indexes():
fs_delete_directory_recusive()
IndexInstance.objects.all().delete()
DocumentRenameCount.objects.all().delete()
for document in Document.objects.all():
update_indexes(document)
return [] # Warnings - None
# Internal functions
def find_lowest_available_suffix(suffix_list):
print 'suffix_list', suffix_list
suffix = 0
while suffix in suffix_list:
suffix += 1
if suffix > MAX_SUFFIX_COUNT:
raise MaxSuffixCountReached(ugettext(u'Maximum suffix (%s) count reached.') % MAX_SUFFIX_COUNT)
else:
return suffix
def find_lowest_available_suffix(index_instance, document):
index_instance_documents = DocumentRenameCount.objects.filter(index_instance=index_instance).filter(document__file_extension=document.file_extension)
files_list = []
for index_instance_document in index_instance_documents:
files_list.append(assemble_document_filename(index_instance_document.document, index_instance_document.suffix))
for suffix in xrange(MAX_SUFFIX_COUNT):
if assemble_document_filename(document, suffix) not in files_list:
return suffix
raise MaxSuffixCountReached(ugettext(u'Maximum suffix (%s) count reached.') % MAX_SUFFIX_COUNT)
def _evaluate_index(eval_dict, document, node, parent_index_instance=None):
@@ -132,50 +144,47 @@ def _evaluate_index(eval_dict, document, node, parent_index_instance=None):
Evaluate an enabled index expression and update or create all the
related index instances also recursively calling itself to evaluate
all the index's children
"""
"""
warnings = []
if node.enabled:
try:
result = eval(node.expression, eval_dict, AVAILABLE_INDEXING_FUNCTIONS)
index_instance, created = IndexInstance.objects.get_or_create(index=node, value=result, parent=parent_index_instance)
if created:
if result:
index_instance, created = IndexInstance.objects.get_or_create(index=node, value=result, parent=parent_index_instance)
#if created:
fs_create_index_directory(index_instance)
if node.link_documents and document not in index_instance.documents.all():
#suffix_list = index_instance.documents.filter(file_filename=document.file_filename).filter(file_extension=document.file_extension).values_list('suffix', flat=True)
suffix_list = DocumentRenameCount.objects.filter(document__file_filename=document.file_filename).filter(document__file_extension=document.file_extension).filter(index_instance=index_instance).values_list('suffix', flat=True)
suffix = find_lowest_available_suffix(suffix_list)
print 'lower suffix: %s' % suffix
document_count = DocumentRenameCount(
index_instance=index_instance,
document=document,
suffix=suffix
)
document_count.save()
if node.link_documents:
suffix = find_lowest_available_suffix(index_instance, document)
document_count = DocumentRenameCount(
index_instance=index_instance,
document=document,
suffix=suffix
)
document_count.save()
fs_create_document_link(index_instance, document, suffix)
index_instance.documents.add(document)
fs_create_document_link(index_instance, document, suffix)
index_instance.documents.add(document)
for children in node.get_children():
children_warnings = _evaluate_index(
eval_dict, document, children, index_instance
)
warnings.extend(children_warnings)
for children in node.get_children():
children_warnings = _evaluate_index(
eval_dict, document, children, index_instance
)
warnings.extend(children_warnings)
except (NameError, AttributeError), exc:
warnings.append(_(u'Error in document indexing update expression: %(expression)s; %(exception)s') % {
'expression': node.expression, 'exception': exc})
except Exception, exc:
warnings.append(_(u'Error updating document index, expression: %(expression)s; %(exception)s') % {
'expression': node.expression, 'exception': exc})
return warnings
def _remove_document_from_index_instance(document, index_instance):
"""
Delete a documents reference from an index instance and call itself
recusively deleting documents and empty index instances up to the
recusively deleting documents and empty index instances up to the
root of the tree
"""
warnings = []
@@ -188,14 +197,15 @@ def _remove_document_from_index_instance(document, index_instance):
# if there are no more documents and no children, delete
# node and check parent for the same conditions
parent = index_instance.parent
fs_delete_index_directory(index_instance)
index_instance.delete()
parent_warnings = _remove_document_from_index_instance(
document, parent
)
warnings.extend(parent_warnings)
except DocumentRenameCount.DoesNotExist:
return warnings
except Exception, exc:
print '_remove_document_from_index_instance exception: %s' % exc
warnings.append(_(u'Unable to delete document indexing node; %s') % exc)
return warnings

View File

@@ -1,8 +1,5 @@
"""Configuration options for the document_indexing app"""
import hashlib
import uuid
from django.utils.translation import ugettext_lazy as _
from common.utils import proper_name
@@ -18,6 +15,7 @@ register_settings(
settings=[
# Definition
{'name': u'AVAILABLE_INDEXING_FUNCTIONS', 'global_name': u'DOCUMENT_INDEXING_AVAILABLE_INDEXING_FUNCTIONS', 'default': available_indexing_functions},
{'name': u'SUFFIX_SEPARATOR', 'global_name': u'DOCUMENT_INDEXING_SUFFIX_SEPARATOR', 'default': u'_'},
# Filesystem serving
{'name': u'SLUGIFY_PATHS', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_SLUGIFY_PATHS', 'default': False},
{'name': u'MAX_SUFFIX_COUNT', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_MAX_SUFFIX_COUNT', 'default': 1000},

View File

@@ -3,20 +3,23 @@ import os
from django.utils.translation import ugettext_lazy as _
from metadata.classes import MetadataObject
from document_indexing.models import IndexInstance
from document_indexing.os_agnostic import assemble_document_filename
from document_indexing.conf.settings import FILESERVING_ENABLE
from document_indexing.conf.settings import FILESERVING_PATH
# TODO: delete fileserving document function for use with rebuild index function
def get_instance_path(index_instance):
"""
Return a platform formated filesytem path corresponding to an
index instance
"""
return os.sep.join(index_instance.get_ancestors().values_list(u'value', flat=True))
names = []
for ancestor in index_instance.get_ancestors():
names.append(ancestor.value)
names.append(index_instance.value)
return os.sep.join(names)
def fs_create_index_directory(index_instance):
@@ -28,19 +31,14 @@ def fs_create_index_directory(index_instance):
if exc.errno == errno.EEXIST:
pass
else:
raise OSError(_(u'Unable to create indexing directory; %s') % exc)
raise OSError(_(u'Unable to create indexing directory; %s') % exc)
def fs_create_document_link(index_instance, document, suffix=0):
if FILESERVING_ENABLE:
name_part = document.file_filename
if suffix:
name_part = u'_'.join([name_part, unicode(suffix)])
name_part = assemble_document_filename(document, suffix)
filename = os.extsep.join([name_part, document.file_extension])
filepath = os.path.join(FILESERVING_PATH, get_instance_path(index_instance), filename)
print 'filepath', filepath
try:
os.symlink(document.file.path, filepath)
except OSError, exc:
@@ -61,11 +59,10 @@ def fs_delete_document_link(index_instance, document, suffix=0):
name_part = document.file_filename
if suffix:
name_part = u'_'.join([name_part, unicode(suffix)])
filename = os.extsep.join([name_part, document.file_extension])
filepath = os.path.join(FILESERVING_PATH, get_instance_path(index_instance), filename)
print 'delete filepath', filepath
try:
os.unlink(filepath)
except OSError, exc:
@@ -73,98 +70,23 @@ def fs_delete_document_link(index_instance, document, suffix=0):
# Raise when any error other than doesn't exits
raise OSError(_(u'Unable to delete document symbolic link; %s') % exc)
'''
path, filename = os.path.split(document_metadata_index.filename)
#Cleanup directory of dead stuff
#Delete siblings that are dead links
try:
for f in os.listdir(path):
filepath = os.path.join(path, f)
if os.path.islink(filepath):
#Get link's source
source = os.readlink(filepath)
if os.path.isabs(source):
if not os.path.exists(source):
#link's source is absolute and doesn't exit
os.unlink(filepath)
else:
os.unlink(os.path.join(path, filepath))
elif os.path.isdir(filepath):
#is a directory, try to delete it
try:
os.removedirs(path)
except:
pass
except OSError, exc:
pass
#Remove the directory if it is empty
try:
os.removedirs(path)
except:
pass
'''
'''
def next_available_filename(document, metadata_index, path, filename, extension, suffix=0):
target = filename
if suffix:
target = '_'.join([filename, unicode(suffix)])
filepath = os.path.join(path, os.extsep.join([target, extension]))
matches = DocumentMetadataIndex.objects.filter(filename=filepath)
if matches.count() == 0:
document_metadata_index = DocumentMetadataIndex(
document=document, metadata_index=metadata_index,
filename=filepath)
def fs_delete_index_directory(index_instance):
if FILESERVING_ENABLE:
target_directory = os.path.join(FILESERVING_PATH, get_instance_path(index_instance))
try:
os.symlink(document.file.path, filepath)
document_metadata_index.save()
os.removedirs(target_directory)
except OSError, exc:
if exc.errno == errno.EEXIST:
#This link should not exist, try to delete it
try:
os.unlink(filepath)
#Try again with same suffix
return next_available_filename(document, metadata_index, path, filename, extension, suffix)
except Exception, exc:
raise Exception(_(u'Unable to create symbolic link, filename clash: %(filepath)s; %(exc)s') % {'filepath': filepath, 'exc': exc})
pass
else:
raise OSError(_(u'Unable to create symbolic link: %(filepath)s; %(exc)s') % {'filepath': filepath, 'exc': exc})
return filepath
else:
if suffix > MAX_RENAME_COUNT:
raise Exception(_(u'Maximum rename count reached, not creating symbolic link'))
return next_available_filename(document, metadata_index, path, filename, extension, suffix + 1)
raise OSError(_(u'Unable to delete indexing directory; %s') % exc)
#TODO: diferentiate between evaluation error and filesystem errors
def do_recreate_all_links(raise_exception=True):
errors = []
warnings = []
for document in Document.objects.all():
try:
document_delete_fs_links(document)
except NameError, e:
warnings.append('%s: %s' % (document, e))
except Exception, e:
if raise_exception:
raise Exception(e)
else:
errors.append('%s: %s' % (document, e))
for document in Document.objects.all():
try:
create_warnings = document_create_fs_links(document)
except Exception, e:
if raise_exception:
raise Exception(e)
else:
errors.append('%s: %s' % (document, e))
for warning in create_warnings:
warnings.append('%s: %s' % (document, warning))
return errors, warnings
'''
def fs_delete_directory_recusive(path=FILESERVING_PATH):
if FILESERVING_ENABLE:
for dirpath, dirnames, filenames in os.walk(path, topdown=False):
for filename in filenames:
os.unlink(os.path.join(dirpath, filename))
for dirname in dirnames:
os.rmdir(os.path.join(dirpath, dirname))

View File

@@ -17,7 +17,7 @@ class Index(MPTTModel):
# % available_indexing_functions_string)
enabled = models.BooleanField(default=True, verbose_name=_(u'enabled'))
link_documents = models.BooleanField(default=False, verbose_name=_(u'link documents'))
def __unicode__(self):
return self.expression if not self.link_documents else u'%s/[document]' % self.expression
@@ -31,7 +31,7 @@ class IndexInstance(MPTTModel):
index = models.ForeignKey(Index, verbose_name=_(u'index'))
value = models.CharField(max_length=128, blank=True, verbose_name=_(u'value'))
documents = models.ManyToManyField(Document, verbose_name=_(u'documents'))
def __unicode__(self):
return self.value

View File

@@ -0,0 +1,8 @@
from document_indexing.conf.settings import SUFFIX_SEPARATOR
def assemble_document_filename(document, suffix=0):
if suffix:
return SUFFIX_SEPARATOR.join([document.file_filename, unicode(suffix)])
else:
return document.file_filename

View File

@@ -9,10 +9,10 @@ def get_document_indexing_subtemplate(document):
list of index instances where a document may be found
"""
object_list = []
for index_instance in document.indexinstance_set.all():
object_list.append(get_breadcrumbs(index_instance, single_link=True))
object_list.append(get_breadcrumbs(index_instance, single_link=True, include_count=True))
return {
'name': 'generic_list_subtemplate.html',
'context': {

View File

@@ -1,11 +1,8 @@
from django.utils.translation import ugettext_lazy as _
from django.utils.translation import ugettext
from django.http import HttpResponseRedirect
from django.shortcuts import render_to_response, get_object_or_404
from django.template import RequestContext
from django.contrib import messages
from django.core.urlresolvers import reverse
from django.conf import settings
from django.utils.safestring import mark_safe
from permissions.api import check_permissions
@@ -33,13 +30,13 @@ def index_instance_list(request, index_id=None):
breadcrumbs = get_instance_link()
title = mark_safe(_(u'contents for index: %s') % breadcrumbs)
return render_to_response('generic_list.html', {
'object_list': index_instance_list,
'title': title,
'hide_links': True,
}, context_instance=RequestContext(request))
def rebuild_index_instances(request):
check_permissions(request.user, 'document_indexing', [PERMISSION_DOCUMENT_INDEXING_REBUILD_INDEXES])
@@ -55,7 +52,7 @@ def rebuild_index_instances(request):
}, context_instance=RequestContext(request))
else:
try:
errors, warnings = do_rebuild_all_indexes()
warnings = do_rebuild_all_indexes()
messages.success(request, _(u'Index rebuild completed successfully.'))
for warning in warnings:
messages.warning(request, warning)