diff --git a/apps/dynamic_search/classes.py b/apps/dynamic_search/classes.py new file mode 100644 index 0000000000..d6905c739c --- /dev/null +++ b/apps/dynamic_search/classes.py @@ -0,0 +1,222 @@ +from __future__ import absolute_import + +import re +import types +import logging +import datetime + +from django.db.models import Q +from django.db.models.loading import get_model + +from .conf.settings import LIMIT + +logger = logging.getLogger(__name__) + + +class SearchModel(object): + registry = {} + + @classmethod + def get_all(cls): + return cls.registry.values() + + def __init__(self, app_label, model_name, label=None): + self.app_label = app_label + self.model_name = model_name + self.search_fields = [] + self.model = get_model(app_label, model_name) + self.label = label or self.model._meta.verbose_name + self.__class__.registry[id(self)] = self + + def get_fields_simple_list(self): + """ + Returns a list of the fields for the SearchModel + """ + result = [] + for search_field in self.search_fields: + result.append((search_field.get_full_name(), search_field.label)) + + return result + + def add_model_field(self, *args, **kwargs): + """ + Add a search field that directly belongs to the parent SearchModel + """ + self.search_fields.append(SearchField(self, *args, **kwargs)) + + def add_related_field(self, *args, **kwargs): + """ + Add a search field that will search content in a related field in + a separate model + """ + self.search_fields.append(RelatedSearchField(self, *args, **kwargs)) + + def normalize_query(self, query_string, + findterms=re.compile(r'"([^"]+)"|(\S+)').findall, + normspace=re.compile(r'\s{2,}').sub): + """ + Splits the query string in invidual keywords, getting rid of unecessary spaces + and grouping quoted words together. + Example: + >>> normalize_query(' some random words "with quotes " and spaces') + ['some', 'random', 'words', 'with quotes', 'and', 'spaces'] + """ + return [normspace(' ', (t[0] or t[1]).strip()) for t in findterms(query_string)] + + def simple_search(self, query_string): + search_dict = {} + model_list = {} + flat_list = [] + result_count = 0 + shown_result_count = 0 + elapsed_time = 0 + start_time = datetime.datetime.now() + + for search_field in self.search_fields: + search_dict.setdefault(search_field.get_model(), { + 'query_entries': [], + 'label': search_field.label, + 'return_value': search_field.return_value + }) + search_dict[search_field.get_model()]['query_entries'].append( + { + 'field_name': [search_field.field], + 'terms': self.normalize_query(query_string) + } + ) + + logger.debug('search_dict: %s' % search_dict) + + for model, data in search_dict.items(): + label = data['label'] + queries = [] + + for query_entry in data['query_entries']: + queries.extend(self.assemble_query(query_entry['terms'], query_entry['field_name'])) + + # Initialize per SearchFiel model id result list + model_result_ids = set() + + for query in queries: + logger.debug('query: %s' % query) + + # Get results per search field + field_result_ids = set(model.objects.filter(query).values_list(data['return_value'], flat=True)) + + # Convert the QuerySet to a Python set and perform the + # AND operation on the program and not as a query. + # This operation ANDs all the SearchField results + # belonging to a single model, making sure to only include + # results in the model result variable if all the terms + # are found in a single field + if not model_result_ids: + model_result_ids = field_result_ids + else: + model_result_ids &= field_result_ids + + logger.debug('field_result_ids: %s' % field_result_ids) + logger.debug('model_result_ids: %s' % model_result_ids) + + # Update the search result total count + result_count += len(model_result_ids) + + # Search the field results return values (PK) in the SearchModel's model + results = self.model.objects.in_bulk(list(model_result_ids)[: LIMIT]).values() + + # Update the search result visible count (limited by LIMIT config option) + shown_result_count += len(results) + + if results: + model_list[label] = results + for result in results: + if result not in flat_list: + flat_list.append(result) + + logger.debug('model_list: %s' % model_list) + logger.debug('flat_list: %s' % flat_list) + + elapsed_time = unicode(datetime.datetime.now() - start_time).split(':')[2] + + return model_list, flat_list, shown_result_count, result_count, elapsed_time + + def advanced_search(self, dictionary): + for key, value in dictionary.items(): + try: + model, field_name = key.split('__', 1) + model_entry = registered_search_dict.get(model, {}) + if model_entry: + for model_field in model_entry.get('fields', [{}]): + if model_field.get('name') == field_name: + search_dict.setdefault(model_entry['model'], {'query_entries': [], 'label': model_entry['label']}) + search_dict[model_entry['model']]['query_entries'].append( + { + 'field_name': [field_name], + 'terms': normalize_query(value.strip()) + } + ) + except ValueError: + pass + + def assemble_query(self, terms, search_fields): + """ + Returns a query, that is a combination of Q objects. That combination + aims to search keywords within a model by testing the given search fields. + """ + queries = [] + for term in terms: + or_query = None + for field in search_fields: + if isinstance(field, types.StringTypes): + comparison = u'icontains' + field_name = field + elif isinstance(field, types.DictType): + comparison = field.get('comparison', u'icontains') + field_name = field.get('field_name', u'') + + if field_name: + q = Q(**{'%s__%s' % (field_name, comparison): term}) + if or_query is None: + or_query = q + else: + or_query = or_query | q + + queries.append(or_query) + return queries + + +# SearchField classes +class SearchField(object): + """ + Search for terms in fields that directly belong to the parent SearchModel + """ + def __init__(self, search_model, field, label): + self.search_model = search_model + self.field = field + self.label = label + self.return_value = 'pk' + + def get_full_name(self): + return self.field + + def get_model(self): + return self.search_model.model + + +class RelatedSearchField(object): + """ + Search for terms in fields that are related to the parent SearchModel + """ + def __init__(self, search_model, app_label, model_name, field, return_value, label): + self.search_model = search_model + self.app_label = app_label + self.model_name = model_name + self.field = field + self.return_value = return_value + self.model = get_model(app_label, model_name) + self.label = label + + def get_full_name(self): + return '%s.%s.%s' % (self.app_label, self.model_name, self.field) + + def get_model(self): + return self.model diff --git a/apps/dynamic_search/forms.py b/apps/dynamic_search/forms.py index 82fc2f6c9d..a09756e0e1 100644 --- a/apps/dynamic_search/forms.py +++ b/apps/dynamic_search/forms.py @@ -1,8 +1,8 @@ +from __future__ import absolute_import + from django import forms from django.utils.translation import ugettext_lazy as _ -from dynamic_search.api import registered_search_dict - class SearchForm(forms.Form): q = forms.CharField(max_length=128, label=_(u'Search terms')) @@ -15,11 +15,11 @@ class SearchForm(forms.Form): class AdvancedSearchForm(forms.Form): def __init__(self, *args, **kwargs): + self.search_model = kwargs.pop('search_model') super(AdvancedSearchForm, self).__init__(*args, **kwargs) - for model_name, values in registered_search_dict.items(): - for field in values['fields']: - self.fields['%s__%s' % (model_name, field['name'])] = forms.CharField( - label=field['title'], - required=False - ) + for name, label in self.search_model.get_fields_simple_list(): + self.fields[name] = forms.CharField( + label=label, + required=False + ) diff --git a/apps/dynamic_search/views.py b/apps/dynamic_search/views.py index 0fff396b85..e0a3655b97 100644 --- a/apps/dynamic_search/views.py +++ b/apps/dynamic_search/views.py @@ -1,4 +1,7 @@ +from __future__ import absolute_import + import urlparse +import logging from django.shortcuts import render_to_response from django.template import RequestContext @@ -9,51 +12,64 @@ from django.http import HttpResponseRedirect from django.core.urlresolvers import reverse from django.utils.http import urlencode -from dynamic_search.models import RecentSearch -from dynamic_search.api import perform_search -from dynamic_search.forms import SearchForm, AdvancedSearchForm -from dynamic_search.conf.settings import SHOW_OBJECT_TYPE -from dynamic_search.conf.settings import LIMIT +from .classes import SearchModel +from .conf.settings import SHOW_OBJECT_TYPE +from .conf.settings import LIMIT +from .forms import SearchForm, AdvancedSearchForm +from .models import RecentSearch + +logger = logging.getLogger(__name__) + +# HACK: since we will only be doing search for Documents (for now) +document_search = SearchModel.get_all()[0] def results(request, extra_context=None): - context = {} - - context.update({ + context = { 'query_string': request.GET, #'hide_header': True, 'hide_links': True, 'multi_select_as_buttons': True, 'search_results_limit': LIMIT, - }) + } - try: - response = perform_search(request.GET) - if response['shown_result_count'] != response['result_count']: + if request.GET: + # Only do search if there is user input, otherwise just render + # the template with the extra_context + + if 'q' in request.GET: + # Simple query + logger.debug('simple search') + query_string = request.GET.get('q', u'').strip() + model_list, flat_list, shown_result_count, result_count, elapsed_time = document_search.simple_search(query_string) + else: + # Advanced search + logger.debug('advanced search') + model_list, flat_list, shown_result_count, result_count, elapsed_time = document_search.advanced_search(request.GET) + + if shown_result_count != result_count: title = _(u'results, (showing only %(shown_result_count)s out of %(result_count)s)') % { - 'shown_result_count': response['shown_result_count'], - 'result_count': response['result_count']} + 'shown_result_count': shown_result_count, + 'result_count': result_count} + else: title = _(u'results') - - if extra_context: - context.update(extra_context) + + # Update the context with the search results + context.update({ + 'found_entries': model_list, + 'object_list': flat_list, + 'title': title, + 'time_delta': elapsed_time, + }) + query = urlencode(dict(request.GET.items())) if query: - RecentSearch.objects.add_query_for_user(request.user, query, response['result_count']) + RecentSearch.objects.add_query_for_user(request.user, query, result_count) - context.update({ - 'found_entries': response['model_list'], - 'object_list': response['flat_list'], - 'title': title, - 'time_delta': response['elapsed_time'], - }) - except Exception, e: - if settings.DEBUG: - raise - elif request.user.is_staff or request.user.is_superuser: - messages.error(request, _(u'Search error: %s') % e) + if extra_context: + context.update(extra_context) if SHOW_OBJECT_TYPE: context.update({'extra_columns': @@ -65,7 +81,7 @@ def results(request, extra_context=None): def search(request, advanced=False): if advanced: - form = AdvancedSearchForm(data=request.GET) + form = AdvancedSearchForm(data=request.GET, search_model=document_search) return render_to_response('generic_form.html', { 'form': form,