Implement a dirty flag to index newly modified objects

This commit is contained in:
Roberto Rosario
2012-04-11 21:43:30 -04:00
parent 6a7ca922f0
commit 5adffd4e75
4 changed files with 108 additions and 31 deletions

View File

@@ -1,6 +1,18 @@
from __future__ import absolute_import
import logging
from django.utils.translation import ugettext_lazy as _
from django.dispatch import receiver
from navigation.api import register_sidebar_template, register_links
from documents.models import Document
from scheduler.runtime import scheduler
from signaler.signals import post_update_index, pre_update_index
from .models import IndexableObject
logger = logging.getLogger(__name__)
search = {'text': _(u'search'), 'view': 'search', 'famfam': 'zoom'}
search_advanced = {'text': _(u'advanced search'), 'view': 'search_advanced', 'famfam': 'zoom_in'}
@@ -12,3 +24,21 @@ register_links(['search', 'search_advanced', 'results'], [search, search_advance
register_links(['results'], [search_again], menu_name='sidebar')
register_sidebar_template(['search', 'search_advanced', 'results'], 'recent_searches.html')
def mark_dirty(obj):
IndexableObject.objects.mark_dirty(content_object=obj)
Document.add_to_class('mark_dirty', lambda obj: IndexableObject.objects.mark_dirty(obj))
@receiver(post_update_index, dispatch_uid='clear_dirty_indexables')
def clear_dirty_indexables(sender, **kwargs):
logger.debug('Clearing all indexable flags post update index signal')
IndexableObject.objects.clear_all()
@receiver(pre_update_index, dispatch_uid='scheduler_shutdown_pre_update_index')
def scheduler_shutdown_pre_update_index(sender, **kwargs):
logger.debug('Scheduler shut down on pre update index signal')
scheduler.shutdown()

View File

@@ -1,6 +1,6 @@
from django.contrib import admin
from dynamic_search.models import RecentSearch
from dynamic_search.models import RecentSearch, IndexableObject
class RecentSearchAdmin(admin.ModelAdmin):
@@ -9,3 +9,4 @@ class RecentSearchAdmin(admin.ModelAdmin):
readonly_fields = ('user', 'query', 'datetime_created', 'hits')
admin.site.register(RecentSearch, RecentSearchAdmin)
admin.site.register(IndexableObject)

View File

@@ -1,22 +1,26 @@
from __future__ import absolute_import
import urlparse
import urllib
from datetime import datetime
import datetime
from django.db import models
from django.utils.translation import ugettext as _
from django.contrib.auth.models import User
from django.core.urlresolvers import reverse
from django.utils.encoding import smart_unicode, smart_str
from django.contrib.contenttypes.models import ContentType
from django.contrib.contenttypes import generic
from django.utils.translation import ugettext_lazy as _
from dynamic_search.managers import RecentSearchManager
from dynamic_search.api import registered_search_dict
from .managers import RecentSearchManager
from .api import registered_search_dict
class RecentSearch(models.Model):
'''
"""
Keeps a list of the n most recent search keywords for a given user
'''
"""
user = models.ForeignKey(User, verbose_name=_(u'user'), editable=False)
query = models.TextField(verbose_name=_(u'query'), editable=False)
datetime_created = models.DateTimeField(verbose_name=_(u'datetime created'), editable=False)
@@ -46,7 +50,7 @@ class RecentSearch(models.Model):
return u'%s (%s)' % (display_string, self.hits)
def save(self, *args, **kwargs):
self.datetime_created = datetime.now()
self.datetime_created = datetime.datetime.now()
super(RecentSearch, self).save(*args, **kwargs)
def url(self):
@@ -60,3 +64,45 @@ class RecentSearch(models.Model):
ordering = ('-datetime_created',)
verbose_name = _(u'recent search')
verbose_name_plural = _(u'recent searches')
class IndexableObjectManager(models.Manager):
def get_dirty(self, datetime=None):
if datetime:
return self.model.objects.filter(datetime__gte=datetime)
else:
return self.model.objects.all()
def get_dirty_pk_list(self, datetime=None):
return self.get_dirty(datetime).values_list('object_id', flat=True)
def mark_dirty(self, obj):
content_type = ContentType.objects.get_for_model(obj)
self.model.objects.get_or_create(content_type=content_type, object_id=obj.pk)
def clear_all(self):
self.model.objects.all().delete()
class IndexableObject(models.Model):
"""
Store a list of object links that have been modified and are
meant to be indexed in the next search index update
"""
datetime = models.DateTimeField(verbose_name=_(u'date time'))
content_type = models.ForeignKey(ContentType, blank=True, null=True)
object_id = models.PositiveIntegerField(blank=True, null=True)
content_object = generic.GenericForeignKey('content_type', 'object_id')
objects = IndexableObjectManager()
def __unicode__(self):
return unicode(self.content_object)
def save(self, *args, **kwargs):
self.datetime = datetime.datetime.now()
super(IndexableObject, self).save(*args, **kwargs)
class Meta:
verbose_name = _(u'indexable object')
verbose_name_plural = _(u'indexable objects')

View File

@@ -7,39 +7,39 @@ from haystack import indexes
from documents.models import Document
from .models import IndexableObject
'''
date_added = models.DateTimeField(verbose_name=_(u'added'), db_index=True, editable=False)
document = models.ForeignKey(Document, verbose_name=_(u'document'), editable=False)
major = models.PositiveIntegerField(verbose_name=_(u'mayor'), default=1, editable=False)
minor = models.PositiveIntegerField(verbose_name=_(u'minor'), default=0, editable=False)
micro = models.PositiveIntegerField(verbose_name=_(u'micro'), default=0, editable=False)
release_level = models.PositiveIntegerField(choices=RELEASE_LEVEL_CHOICES, default=RELEASE_LEVEL_FINAL, verbose_name=_(u'release level'), editable=False)
serial = models.PositiveIntegerField(verbose_name=_(u'serial'), default=0, editable=False)
timestamp = models.DateTimeField(verbose_name=_(u'timestamp'), editable=False)
comment = models.TextField(blank=True, verbose_name=_(u'comment'))
checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False)
page_label = models.CharField(max_length=32, blank=True, null=True, verbose_name=_(u'page label'))
page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_(u'page number'), db_index=True)
# Register the fields that will be searchable
register('document', Document, _(u'document'), [
{'name': u'document_type__name', 'title': _(u'Document type')},
{'name': u'documentversion__mimetype', 'title': _(u'MIME type')},
{'name': u'documentversion__filename', 'title': _(u'Filename')},
{'name': u'documentmetadata__value', 'title': _(u'Metadata value')},
{'name': u'documentversion__documentpage__content', 'title': _(u'Content')},
{'name': u'description', 'title': _(u'Description')},
{'name': u'tags__name', 'title': _(u'Tags')},
{'name': u'comments__comment', 'title': _(u'Comments')},
]
)
#register(Document, _(u'document'), ['document_type__name', 'file_mimetype', 'documentmetadata__value', 'documentpage__content', 'description', {'field_name':'file_filename', 'comparison':'iexact'}])
'''
class DocumentIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
#fractional_filename = indexes.CharField(model_attr='filename')#, boost=1.125)
cleaned_filename = indexes.CharField(model_attr='filename')#, boost=1.125)
def get_model(self):
return Document
#def index_queryset(self):
# """Used when the entire index for model is updated."""
# #return self.get_model().objects.filter(date_added__lte=datetime.datetime.now())
# return self.get_model().objects.filter(pk__lte=3000)
#def prepare_cleaned_filename(self, obj):
# #print 'CLEAN'
# return 'qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq'
# return unidecode(obj.filename)
# #print "1,2: %s - %s" % (obj.filename, after)
# #return after
def build_queryset(self, start_date=None, end_date=None):
print "DIRTY", IndexableObject.objects.get_dirty_pk_list()
#return self.get_model().objects.filter(date_added__lte=datetime.datetime.now())
return self.get_model().objects.filter(pk__in=IndexableObject.objects.get_dirty_pk_list())
#return self.get_model().objects.all()