Added remaining functionality to document indexing (filesystem rebuild, better warning reporting)
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from navigation.api import register_links, register_menu
|
||||
from navigation.api import register_menu
|
||||
from permissions.api import register_permissions
|
||||
from main.api import register_tool
|
||||
|
||||
|
||||
@@ -15,8 +15,8 @@ class IndexInstanceInline(admin.StackedInline):
|
||||
|
||||
class IndexAdmin(MPTTModelAdmin):
|
||||
list_display = ('expression', 'enabled', 'link_documents')
|
||||
|
||||
|
||||
|
||||
|
||||
class IndexInstanceAdmin(MPTTModelAdmin):
|
||||
model = IndexInstance
|
||||
list_display = ('value', 'index', 'get_document_list_display')
|
||||
|
||||
@@ -12,9 +12,10 @@ from document_indexing.models import Index, IndexInstance, \
|
||||
from document_indexing.conf.settings import AVAILABLE_INDEXING_FUNCTIONS
|
||||
from document_indexing.conf.settings import MAX_SUFFIX_COUNT
|
||||
from document_indexing.filesystem import fs_create_index_directory, \
|
||||
fs_create_document_link, fs_delete_document_link
|
||||
|
||||
fs_create_document_link, fs_delete_document_link, \
|
||||
fs_delete_index_directory, fs_delete_directory_recusive
|
||||
from document_indexing.conf.settings import SLUGIFY_PATHS
|
||||
from document_indexing.os_agnostic import assemble_document_filename
|
||||
|
||||
if SLUGIFY_PATHS == False:
|
||||
# Do not slugify path or filenames and extensions
|
||||
@@ -32,7 +33,6 @@ def update_indexes(document):
|
||||
"""
|
||||
Update or create all the index instances related to a document
|
||||
"""
|
||||
print 'update_indexes'
|
||||
warnings = []
|
||||
|
||||
eval_dict = {}
|
||||
@@ -50,20 +50,21 @@ def update_indexes(document):
|
||||
def delete_indexes(document):
|
||||
"""
|
||||
Delete all the index instances related to a document
|
||||
"""
|
||||
"""
|
||||
warnings = []
|
||||
|
||||
|
||||
for index_instance in document.indexinstance_set.all():
|
||||
_remove_document_from_index_instance(document, index_instance)
|
||||
|
||||
index_warnings = _remove_document_from_index_instance(document, index_instance)
|
||||
warnings.extend(index_warnings)
|
||||
|
||||
return warnings
|
||||
|
||||
|
||||
def get_instance_link(index_instance=None, text=None, simple=False):
|
||||
"""
|
||||
Return an HTML anchor to an index instance
|
||||
"""
|
||||
|
||||
"""
|
||||
|
||||
if simple:
|
||||
# Just display the instance's value or overrided text, no
|
||||
# HTML anchor
|
||||
@@ -81,18 +82,18 @@ def get_instance_link(index_instance=None, text=None, simple=False):
|
||||
'url': reverse('index_instance_list'),
|
||||
'value': ugettext(u'root')
|
||||
}
|
||||
|
||||
|
||||
def get_breadcrumbs(index_instance, simple=False, single_link=False):
|
||||
|
||||
def get_breadcrumbs(index_instance, simple=False, single_link=False, include_count=False):
|
||||
"""
|
||||
Return a joined string of HTML anchors to every index instance's
|
||||
parent from the root of the tree to the index instance
|
||||
"""
|
||||
"""
|
||||
result = []
|
||||
if single_link:
|
||||
# Return the entire breadcrumb path as a single HTML anchor
|
||||
simple = True
|
||||
|
||||
|
||||
result.append(get_instance_link(simple=simple))
|
||||
|
||||
for instance in index_instance.get_ancestors():
|
||||
@@ -100,31 +101,42 @@ def get_breadcrumbs(index_instance, simple=False, single_link=False):
|
||||
|
||||
result.append(get_instance_link(index_instance, simple=simple))
|
||||
|
||||
output = []
|
||||
|
||||
if include_count:
|
||||
output.append(u'(%d)' % index_instance.documents.count())
|
||||
|
||||
if single_link:
|
||||
# Return the entire breadcrumb path as a single HTML anchor
|
||||
return mark_safe(get_instance_link(index_instance=index_instance, text=(u' / '.join(result))))
|
||||
output.insert(0, get_instance_link(index_instance=index_instance, text=(u' / '.join(result))))
|
||||
return mark_safe(u' '.join(output))
|
||||
else:
|
||||
return mark_safe(u' / '.join(result))
|
||||
output.insert(0, u' / '.join(result))
|
||||
return mark_safe(u' '.join(output))
|
||||
|
||||
|
||||
def do_rebuild_all_indexes():
|
||||
fs_delete_directory_recusive()
|
||||
IndexInstance.objects.all().delete()
|
||||
DocumentRenameCount.objects.all().delete()
|
||||
for document in Document.objects.all():
|
||||
update_indexes(document)
|
||||
|
||||
|
||||
|
||||
return [] # Warnings - None
|
||||
|
||||
|
||||
# Internal functions
|
||||
def find_lowest_available_suffix(suffix_list):
|
||||
print 'suffix_list', suffix_list
|
||||
suffix = 0
|
||||
|
||||
while suffix in suffix_list:
|
||||
suffix += 1
|
||||
if suffix > MAX_SUFFIX_COUNT:
|
||||
raise MaxSuffixCountReached(ugettext(u'Maximum suffix (%s) count reached.') % MAX_SUFFIX_COUNT)
|
||||
else:
|
||||
return suffix
|
||||
def find_lowest_available_suffix(index_instance, document):
|
||||
index_instance_documents = DocumentRenameCount.objects.filter(index_instance=index_instance).filter(document__file_extension=document.file_extension)
|
||||
files_list = []
|
||||
for index_instance_document in index_instance_documents:
|
||||
files_list.append(assemble_document_filename(index_instance_document.document, index_instance_document.suffix))
|
||||
|
||||
for suffix in xrange(MAX_SUFFIX_COUNT):
|
||||
if assemble_document_filename(document, suffix) not in files_list:
|
||||
return suffix
|
||||
|
||||
raise MaxSuffixCountReached(ugettext(u'Maximum suffix (%s) count reached.') % MAX_SUFFIX_COUNT)
|
||||
|
||||
|
||||
def _evaluate_index(eval_dict, document, node, parent_index_instance=None):
|
||||
@@ -132,50 +144,47 @@ def _evaluate_index(eval_dict, document, node, parent_index_instance=None):
|
||||
Evaluate an enabled index expression and update or create all the
|
||||
related index instances also recursively calling itself to evaluate
|
||||
all the index's children
|
||||
"""
|
||||
"""
|
||||
warnings = []
|
||||
if node.enabled:
|
||||
try:
|
||||
result = eval(node.expression, eval_dict, AVAILABLE_INDEXING_FUNCTIONS)
|
||||
index_instance, created = IndexInstance.objects.get_or_create(index=node, value=result, parent=parent_index_instance)
|
||||
if created:
|
||||
if result:
|
||||
index_instance, created = IndexInstance.objects.get_or_create(index=node, value=result, parent=parent_index_instance)
|
||||
#if created:
|
||||
fs_create_index_directory(index_instance)
|
||||
if node.link_documents and document not in index_instance.documents.all():
|
||||
#suffix_list = index_instance.documents.filter(file_filename=document.file_filename).filter(file_extension=document.file_extension).values_list('suffix', flat=True)
|
||||
suffix_list = DocumentRenameCount.objects.filter(document__file_filename=document.file_filename).filter(document__file_extension=document.file_extension).filter(index_instance=index_instance).values_list('suffix', flat=True)
|
||||
suffix = find_lowest_available_suffix(suffix_list)
|
||||
print 'lower suffix: %s' % suffix
|
||||
document_count = DocumentRenameCount(
|
||||
index_instance=index_instance,
|
||||
document=document,
|
||||
suffix=suffix
|
||||
)
|
||||
document_count.save()
|
||||
if node.link_documents:
|
||||
suffix = find_lowest_available_suffix(index_instance, document)
|
||||
document_count = DocumentRenameCount(
|
||||
index_instance=index_instance,
|
||||
document=document,
|
||||
suffix=suffix
|
||||
)
|
||||
document_count.save()
|
||||
|
||||
fs_create_document_link(index_instance, document, suffix)
|
||||
index_instance.documents.add(document)
|
||||
fs_create_document_link(index_instance, document, suffix)
|
||||
index_instance.documents.add(document)
|
||||
|
||||
for children in node.get_children():
|
||||
children_warnings = _evaluate_index(
|
||||
eval_dict, document, children, index_instance
|
||||
)
|
||||
warnings.extend(children_warnings)
|
||||
for children in node.get_children():
|
||||
children_warnings = _evaluate_index(
|
||||
eval_dict, document, children, index_instance
|
||||
)
|
||||
warnings.extend(children_warnings)
|
||||
|
||||
except (NameError, AttributeError), exc:
|
||||
warnings.append(_(u'Error in document indexing update expression: %(expression)s; %(exception)s') % {
|
||||
'expression': node.expression, 'exception': exc})
|
||||
|
||||
except Exception, exc:
|
||||
warnings.append(_(u'Error updating document index, expression: %(expression)s; %(exception)s') % {
|
||||
'expression': node.expression, 'exception': exc})
|
||||
|
||||
|
||||
return warnings
|
||||
|
||||
|
||||
def _remove_document_from_index_instance(document, index_instance):
|
||||
"""
|
||||
Delete a documents reference from an index instance and call itself
|
||||
recusively deleting documents and empty index instances up to the
|
||||
recusively deleting documents and empty index instances up to the
|
||||
root of the tree
|
||||
"""
|
||||
warnings = []
|
||||
@@ -188,14 +197,15 @@ def _remove_document_from_index_instance(document, index_instance):
|
||||
# if there are no more documents and no children, delete
|
||||
# node and check parent for the same conditions
|
||||
parent = index_instance.parent
|
||||
fs_delete_index_directory(index_instance)
|
||||
index_instance.delete()
|
||||
parent_warnings = _remove_document_from_index_instance(
|
||||
document, parent
|
||||
)
|
||||
warnings.extend(parent_warnings)
|
||||
|
||||
except DocumentRenameCount.DoesNotExist:
|
||||
return warnings
|
||||
except Exception, exc:
|
||||
print '_remove_document_from_index_instance exception: %s' % exc
|
||||
warnings.append(_(u'Unable to delete document indexing node; %s') % exc)
|
||||
|
||||
return warnings
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
"""Configuration options for the document_indexing app"""
|
||||
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from common.utils import proper_name
|
||||
@@ -18,6 +15,7 @@ register_settings(
|
||||
settings=[
|
||||
# Definition
|
||||
{'name': u'AVAILABLE_INDEXING_FUNCTIONS', 'global_name': u'DOCUMENT_INDEXING_AVAILABLE_INDEXING_FUNCTIONS', 'default': available_indexing_functions},
|
||||
{'name': u'SUFFIX_SEPARATOR', 'global_name': u'DOCUMENT_INDEXING_SUFFIX_SEPARATOR', 'default': u'_'},
|
||||
# Filesystem serving
|
||||
{'name': u'SLUGIFY_PATHS', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_SLUGIFY_PATHS', 'default': False},
|
||||
{'name': u'MAX_SUFFIX_COUNT', 'global_name': u'DOCUMENT_INDEXING_FILESYSTEM_MAX_SUFFIX_COUNT', 'default': 1000},
|
||||
|
||||
@@ -3,20 +3,23 @@ import os
|
||||
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from metadata.classes import MetadataObject
|
||||
|
||||
from document_indexing.models import IndexInstance
|
||||
from document_indexing.os_agnostic import assemble_document_filename
|
||||
from document_indexing.conf.settings import FILESERVING_ENABLE
|
||||
from document_indexing.conf.settings import FILESERVING_PATH
|
||||
|
||||
# TODO: delete fileserving document function for use with rebuild index function
|
||||
|
||||
def get_instance_path(index_instance):
|
||||
"""
|
||||
Return a platform formated filesytem path corresponding to an
|
||||
index instance
|
||||
"""
|
||||
return os.sep.join(index_instance.get_ancestors().values_list(u'value', flat=True))
|
||||
names = []
|
||||
for ancestor in index_instance.get_ancestors():
|
||||
names.append(ancestor.value)
|
||||
|
||||
names.append(index_instance.value)
|
||||
|
||||
return os.sep.join(names)
|
||||
|
||||
|
||||
def fs_create_index_directory(index_instance):
|
||||
@@ -28,19 +31,14 @@ def fs_create_index_directory(index_instance):
|
||||
if exc.errno == errno.EEXIST:
|
||||
pass
|
||||
else:
|
||||
raise OSError(_(u'Unable to create indexing directory; %s') % exc)
|
||||
raise OSError(_(u'Unable to create indexing directory; %s') % exc)
|
||||
|
||||
|
||||
def fs_create_document_link(index_instance, document, suffix=0):
|
||||
if FILESERVING_ENABLE:
|
||||
name_part = document.file_filename
|
||||
if suffix:
|
||||
name_part = u'_'.join([name_part, unicode(suffix)])
|
||||
|
||||
name_part = assemble_document_filename(document, suffix)
|
||||
filename = os.extsep.join([name_part, document.file_extension])
|
||||
filepath = os.path.join(FILESERVING_PATH, get_instance_path(index_instance), filename)
|
||||
print 'filepath', filepath
|
||||
|
||||
try:
|
||||
os.symlink(document.file.path, filepath)
|
||||
except OSError, exc:
|
||||
@@ -61,11 +59,10 @@ def fs_delete_document_link(index_instance, document, suffix=0):
|
||||
name_part = document.file_filename
|
||||
if suffix:
|
||||
name_part = u'_'.join([name_part, unicode(suffix)])
|
||||
|
||||
|
||||
filename = os.extsep.join([name_part, document.file_extension])
|
||||
filepath = os.path.join(FILESERVING_PATH, get_instance_path(index_instance), filename)
|
||||
print 'delete filepath', filepath
|
||||
|
||||
|
||||
try:
|
||||
os.unlink(filepath)
|
||||
except OSError, exc:
|
||||
@@ -73,98 +70,23 @@ def fs_delete_document_link(index_instance, document, suffix=0):
|
||||
# Raise when any error other than doesn't exits
|
||||
raise OSError(_(u'Unable to delete document symbolic link; %s') % exc)
|
||||
|
||||
'''
|
||||
path, filename = os.path.split(document_metadata_index.filename)
|
||||
|
||||
#Cleanup directory of dead stuff
|
||||
#Delete siblings that are dead links
|
||||
try:
|
||||
for f in os.listdir(path):
|
||||
filepath = os.path.join(path, f)
|
||||
if os.path.islink(filepath):
|
||||
#Get link's source
|
||||
source = os.readlink(filepath)
|
||||
if os.path.isabs(source):
|
||||
if not os.path.exists(source):
|
||||
#link's source is absolute and doesn't exit
|
||||
os.unlink(filepath)
|
||||
else:
|
||||
os.unlink(os.path.join(path, filepath))
|
||||
elif os.path.isdir(filepath):
|
||||
#is a directory, try to delete it
|
||||
try:
|
||||
os.removedirs(path)
|
||||
except:
|
||||
pass
|
||||
except OSError, exc:
|
||||
pass
|
||||
|
||||
#Remove the directory if it is empty
|
||||
try:
|
||||
os.removedirs(path)
|
||||
except:
|
||||
pass
|
||||
'''
|
||||
|
||||
'''
|
||||
def next_available_filename(document, metadata_index, path, filename, extension, suffix=0):
|
||||
target = filename
|
||||
if suffix:
|
||||
target = '_'.join([filename, unicode(suffix)])
|
||||
filepath = os.path.join(path, os.extsep.join([target, extension]))
|
||||
matches = DocumentMetadataIndex.objects.filter(filename=filepath)
|
||||
if matches.count() == 0:
|
||||
document_metadata_index = DocumentMetadataIndex(
|
||||
document=document, metadata_index=metadata_index,
|
||||
filename=filepath)
|
||||
def fs_delete_index_directory(index_instance):
|
||||
if FILESERVING_ENABLE:
|
||||
target_directory = os.path.join(FILESERVING_PATH, get_instance_path(index_instance))
|
||||
try:
|
||||
os.symlink(document.file.path, filepath)
|
||||
document_metadata_index.save()
|
||||
os.removedirs(target_directory)
|
||||
except OSError, exc:
|
||||
if exc.errno == errno.EEXIST:
|
||||
#This link should not exist, try to delete it
|
||||
try:
|
||||
os.unlink(filepath)
|
||||
#Try again with same suffix
|
||||
return next_available_filename(document, metadata_index, path, filename, extension, suffix)
|
||||
except Exception, exc:
|
||||
raise Exception(_(u'Unable to create symbolic link, filename clash: %(filepath)s; %(exc)s') % {'filepath': filepath, 'exc': exc})
|
||||
pass
|
||||
else:
|
||||
raise OSError(_(u'Unable to create symbolic link: %(filepath)s; %(exc)s') % {'filepath': filepath, 'exc': exc})
|
||||
|
||||
return filepath
|
||||
else:
|
||||
if suffix > MAX_RENAME_COUNT:
|
||||
raise Exception(_(u'Maximum rename count reached, not creating symbolic link'))
|
||||
return next_available_filename(document, metadata_index, path, filename, extension, suffix + 1)
|
||||
raise OSError(_(u'Unable to delete indexing directory; %s') % exc)
|
||||
|
||||
|
||||
#TODO: diferentiate between evaluation error and filesystem errors
|
||||
def do_recreate_all_links(raise_exception=True):
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
for document in Document.objects.all():
|
||||
try:
|
||||
document_delete_fs_links(document)
|
||||
except NameError, e:
|
||||
warnings.append('%s: %s' % (document, e))
|
||||
except Exception, e:
|
||||
if raise_exception:
|
||||
raise Exception(e)
|
||||
else:
|
||||
errors.append('%s: %s' % (document, e))
|
||||
|
||||
for document in Document.objects.all():
|
||||
try:
|
||||
create_warnings = document_create_fs_links(document)
|
||||
except Exception, e:
|
||||
if raise_exception:
|
||||
raise Exception(e)
|
||||
else:
|
||||
errors.append('%s: %s' % (document, e))
|
||||
|
||||
for warning in create_warnings:
|
||||
warnings.append('%s: %s' % (document, warning))
|
||||
return errors, warnings
|
||||
'''
|
||||
def fs_delete_directory_recusive(path=FILESERVING_PATH):
|
||||
if FILESERVING_ENABLE:
|
||||
for dirpath, dirnames, filenames in os.walk(path, topdown=False):
|
||||
for filename in filenames:
|
||||
os.unlink(os.path.join(dirpath, filename))
|
||||
for dirname in dirnames:
|
||||
os.rmdir(os.path.join(dirpath, dirname))
|
||||
|
||||
@@ -17,7 +17,7 @@ class Index(MPTTModel):
|
||||
# % available_indexing_functions_string)
|
||||
enabled = models.BooleanField(default=True, verbose_name=_(u'enabled'))
|
||||
link_documents = models.BooleanField(default=False, verbose_name=_(u'link documents'))
|
||||
|
||||
|
||||
def __unicode__(self):
|
||||
return self.expression if not self.link_documents else u'%s/[document]' % self.expression
|
||||
|
||||
@@ -31,7 +31,7 @@ class IndexInstance(MPTTModel):
|
||||
index = models.ForeignKey(Index, verbose_name=_(u'index'))
|
||||
value = models.CharField(max_length=128, blank=True, verbose_name=_(u'value'))
|
||||
documents = models.ManyToManyField(Document, verbose_name=_(u'documents'))
|
||||
|
||||
|
||||
def __unicode__(self):
|
||||
return self.value
|
||||
|
||||
|
||||
8
apps/document_indexing/os_agnostic.py
Normal file
8
apps/document_indexing/os_agnostic.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from document_indexing.conf.settings import SUFFIX_SEPARATOR
|
||||
|
||||
|
||||
def assemble_document_filename(document, suffix=0):
|
||||
if suffix:
|
||||
return SUFFIX_SEPARATOR.join([document.file_filename, unicode(suffix)])
|
||||
else:
|
||||
return document.file_filename
|
||||
@@ -9,10 +9,10 @@ def get_document_indexing_subtemplate(document):
|
||||
list of index instances where a document may be found
|
||||
"""
|
||||
object_list = []
|
||||
|
||||
|
||||
for index_instance in document.indexinstance_set.all():
|
||||
object_list.append(get_breadcrumbs(index_instance, single_link=True))
|
||||
|
||||
object_list.append(get_breadcrumbs(index_instance, single_link=True, include_count=True))
|
||||
|
||||
return {
|
||||
'name': 'generic_list_subtemplate.html',
|
||||
'context': {
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
from django.utils.translation import ugettext
|
||||
from django.http import HttpResponseRedirect
|
||||
from django.shortcuts import render_to_response, get_object_or_404
|
||||
from django.template import RequestContext
|
||||
from django.contrib import messages
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.conf import settings
|
||||
from django.utils.safestring import mark_safe
|
||||
|
||||
from permissions.api import check_permissions
|
||||
@@ -33,13 +30,13 @@ def index_instance_list(request, index_id=None):
|
||||
breadcrumbs = get_instance_link()
|
||||
|
||||
title = mark_safe(_(u'contents for index: %s') % breadcrumbs)
|
||||
|
||||
|
||||
return render_to_response('generic_list.html', {
|
||||
'object_list': index_instance_list,
|
||||
'title': title,
|
||||
'hide_links': True,
|
||||
}, context_instance=RequestContext(request))
|
||||
|
||||
|
||||
|
||||
def rebuild_index_instances(request):
|
||||
check_permissions(request.user, 'document_indexing', [PERMISSION_DOCUMENT_INDEXING_REBUILD_INDEXES])
|
||||
@@ -55,7 +52,7 @@ def rebuild_index_instances(request):
|
||||
}, context_instance=RequestContext(request))
|
||||
else:
|
||||
try:
|
||||
errors, warnings = do_rebuild_all_indexes()
|
||||
warnings = do_rebuild_all_indexes()
|
||||
messages.success(request, _(u'Index rebuild completed successfully.'))
|
||||
for warning in warnings:
|
||||
messages.warning(request, warning)
|
||||
|
||||
Reference in New Issue
Block a user