Search: Refactor classes

Add additional classes to split existing classes that are too complex.

Remove search timming.

Add query explainer.

Move literals to their own module.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2018-12-04 00:01:57 -04:00
parent 1d1b4f5f5f
commit 396f9f6fca
3 changed files with 242 additions and 155 deletions

View File

@@ -1,26 +1,78 @@
from __future__ import absolute_import, unicode_literals
import datetime
import logging
from django.apps import apps
from django.db.models import Q
from django.utils.encoding import force_text
from django.utils.encoding import force_text, python_2_unicode_compatible
from django.utils.module_loading import import_string
from django.utils.translation import ugettext as _
from .literals import (
QUERY_OPERATION_AND, QUERY_OPERATION_OR, TERM_OPERATION_OR,
TERM_OPERATIONS, TERM_QUOTES, TERM_NEGATION_CHARACTER,
TERM_SPACE_CHARACTER
)
from .settings import setting_limit
logger = logging.getLogger(__name__)
QUERY_OPERATION_AND = 1
QUERY_OPERATION_OR = 2
TERM_OPERATION_AND = 'AND'
TERM_OPERATION_OR = 'OR'
TERM_OPERATIONS = [TERM_OPERATION_AND, TERM_OPERATION_OR]
@python_2_unicode_compatible
class FieldQuery(object):
def __init__(self, search_field, search_term_collection):
query_operation = QUERY_OPERATION_AND
self.query = None
self.parts = []
for term in search_term_collection.terms:
if term.is_meta:
# It is a meta term, modifies the query operation
# and is not searched
if term.string == TERM_OPERATION_OR:
query_operation = QUERY_OPERATION_OR
else:
q_object = Q(
**{'%s__%s' % (search_field.field, 'icontains'): term.string}
)
if term.negated:
q_object = ~q_object
if self.query is None:
self.query = q_object
else:
if query_operation == QUERY_OPERATION_AND:
self.query = self.query & q_object
else:
self.query = self.query | q_object
if not term.is_meta:
self.parts.append(force_text(search_field.label))
self.parts.append(force_text(term))
else:
self.parts.append(term.string)
def __str__(self):
return ' '.join(self.parts)
class SearchField(object):
"""
Search for terms in fields that directly belong to the parent SearchModel
"""
def __init__(self, search_model, field, label):
self.search_model = search_model
self.field = field
self.label = label
def get_full_name(self):
return self.field
def get_model(self):
return self.search_model.model
@python_2_unicode_compatible
class SearchModel(object):
_registry = {}
@@ -43,84 +95,6 @@ class SearchModel(object):
return result
@staticmethod
def get_terms(text):
"""
Takes a text string and returns a list of dictionaries.
Each dictionary has two key "negated" and "string"
String 'a "b c" d "e" \'f g\' h -i -"j k" l -\'m n\' o OR p'
Results in:
[
{'negated': False, 'string': 'a'}, {'negated': False, 'string': 'b c'},
{'negated': False, 'string': 'd'}, {'negated': False, 'string': 'e'},
{'negated': False, 'string': 'f g'}, {'negated': False, 'string': 'h'},
{'negated': True, 'string': 'i'}, {'negated': True, 'string': 'j k'},
{'negated': False, 'string': 'l'}, {'negated': True, 'string': 'm n'},
{'negated': False, 'string': 'o'}, {'negated': False, 'string': 'OR'},
{'negated': False, 'string': 'p'}
]
"""
QUOTES = ['"', '\'']
NEGATION_CHARACTER = '-'
SPACE_CHARACTER = ' '
inside_quotes = False
negated = False
term_letters = []
terms = []
for letter in text:
if not inside_quotes and letter == NEGATION_CHARACTER:
negated = True
else:
if letter in QUOTES:
if inside_quotes:
if term_letters:
terms.append(
{
'meta': False,
'negated': negated,
'string': ''.join(term_letters)
}
)
negated = False
term_letters = []
inside_quotes = not inside_quotes
else:
if not inside_quotes and letter == SPACE_CHARACTER:
if term_letters:
term_string = ''.join(term_letters)
if term_string in TERM_OPERATIONS:
meta = True
else:
meta = False
terms.append(
{
'meta': meta,
'negated': negated,
'string': term_string
}
)
negated = False
term_letters = []
else:
term_letters.append(letter)
if term_letters:
terms.append(
{
'meta': False,
'negated': negated,
'string': ''.join(term_letters)
}
)
return terms
def __init__(self, app_label, model_name, serializer_string, label=None, permission=None):
self.app_label = app_label
self.model_name = model_name
@@ -131,6 +105,9 @@ class SearchModel(object):
self.permission = permission
self.__class__._registry[self.get_full_name()] = self
def __str__(self):
return force_text(self.label)
@property
def label(self):
if not self._label:
@@ -154,9 +131,6 @@ class SearchModel(object):
search_field = SearchField(self, *args, **kwargs)
self.search_fields.append(search_field)
def get_all_search_fields(self):
return self.search_fields
def get_full_name(self):
return '%s.%s' % (self.app_label, self.model_name)
@@ -165,7 +139,7 @@ class SearchModel(object):
Returns a list of the fields for the SearchModel
"""
result = []
for search_field in self.get_all_search_fields():
for search_field in self.search_fields:
result.append((search_field.get_full_name(), search_field.label))
return result
@@ -176,87 +150,171 @@ class SearchModel(object):
except KeyError:
raise KeyError('No search field named: %s' % full_name)
def get_search_query(self, query_string, global_and_search=False):
return SearchQuery(
query_string=query_string, search_model=self,
global_and_search=global_and_search
)
def search(self, query_string, user, global_and_search=False):
AccessControlList = apps.get_model(
app_label='acls', model_name='AccessControlList'
)
elapsed_time = 0
start_time = datetime.datetime.now()
result = None
for search_field in self.get_all_search_fields():
search_query = self.get_search_query(
query_string=query_string, global_and_search=global_and_search
)
terms = self.get_terms(
query_string.get(
search_field.field, query_string.get('q', '')
).strip()
queryset = self.model.objects.filter(
pk__in=set(
self.model.objects.filter(search_query.query).values_list(
'pk', flat=True
)[
:setting_limit.value
]
)
field_query = search_field.get_query(terms=terms)
if field_query:
if result is None:
result = field_query
else:
if global_and_search:
result = result & field_query
else:
result = result | field_query
elapsed_time = force_text(
datetime.datetime.now() - start_time
).split(':')[2]
logger.debug('elapsed_time: %s', elapsed_time)
pk_list = set(self.model.objects.filter(result or Q()).values_list('pk', flat=True)[:setting_limit.value])
queryset = self.model.objects.filter(pk__in=pk_list)
)
if self.permission:
queryset = AccessControlList.objects.filter_by_access(
permission=self.permission, user=user, queryset=queryset
)
return queryset, elapsed_time
return queryset
class SearchField(object):
"""
Search for terms in fields that directly belong to the parent SearchModel
"""
def __init__(self, search_model, field, label):
self.search_model = search_model
self.field = field
self.label = label
@python_2_unicode_compatible
class SearchQuery(object):
def __init__(self, query_string, search_model, global_and_search=False):
self.query = None
self.text = []
def get_full_name(self):
return self.field
for search_field in search_model.search_fields:
search_term_collection = SearchTermCollection(
text=query_string.get(
search_field.field, query_string.get('q', '')
).strip()
)
def get_model(self):
return self.search_model.model
field_query = FieldQuery(
search_field=search_field,
search_term_collection=search_term_collection
)
def get_query(self, terms):
query_operation = QUERY_OPERATION_AND
result = None
if field_query.query:
self.text.append('({})'.format(force_text(field_query)))
for term in terms:
if term['meta']:
# It is a meta term, modifies the query operation
# and is not searched
if term['string'] == TERM_OPERATION_OR:
query_operation = QUERY_OPERATION_OR
else:
q_object = Q(
**{'%s__%s' % (self.field, 'icontains'): term['string']}
)
if term['negated']:
q_object = ~q_object
if result is None:
result = q_object
else:
if query_operation == QUERY_OPERATION_AND:
result = result & q_object
if global_and_search:
self.text.append('AND')
else:
result = result | q_object
self.text.append('OR')
return result
if self.query is None:
self.query = field_query.query
else:
if global_and_search:
self.query = self.query & field_query.query
else:
self.query = self.query | field_query.query
self.query = self.query or Q()
def __str__(self):
return ' '.join(self.text[:-1])
@python_2_unicode_compatible
class SearchTerm(object):
def __init__(self, negated, string, is_meta):
self.negated = negated
self.string = string
self.is_meta = is_meta
def __str__(self):
if self.is_meta:
return ''
else:
return '{}contains "{}"'.format(
'does not ' if self.negated else '', self.string
)
@python_2_unicode_compatible
class SearchTermCollection(object):
def __init__(self, text):
"""
Takes a text string and returns a list of dictionaries.
Each dictionary has two key "negated" and "string"
String 'a "b c" d "e" \'f g\' h -i -"j k" l -\'m n\' o OR p'
Results in:
[
{'negated': False, 'string': 'a'}, {'negated': False, 'string': 'b c'},
{'negated': False, 'string': 'd'}, {'negated': False, 'string': 'e'},
{'negated': False, 'string': 'f g'}, {'negated': False, 'string': 'h'},
{'negated': True, 'string': 'i'}, {'negated': True, 'string': 'j k'},
{'negated': False, 'string': 'l'}, {'negated': True, 'string': 'm n'},
{'negated': False, 'string': 'o'}, {'negated': False, 'string': 'OR'},
{'negated': False, 'string': 'p'}
]
"""
inside_quotes = False
negated = False
term_letters = []
self.terms = []
for letter in text:
if not inside_quotes and letter == TERM_NEGATION_CHARACTER:
negated = True
else:
if letter in TERM_QUOTES:
if inside_quotes:
if term_letters:
self.terms.append(
SearchTerm(
is_meta=False, negated=negated,
string=''.join(term_letters)
)
)
negated = False
term_letters = []
inside_quotes = not inside_quotes
else:
if not inside_quotes and letter == TERM_SPACE_CHARACTER:
if term_letters:
term_string = ''.join(term_letters)
if term_string in TERM_OPERATIONS:
is_meta = True
else:
is_meta = False
self.terms.append(
SearchTerm(
is_meta=is_meta, negated=negated,
string=term_string
)
)
negated = False
term_letters = []
else:
term_letters.append(letter)
if term_letters:
self.terms.append(
SearchTerm(
is_meta=False, negated=negated,
string=''.join(term_letters)
)
)
def __str__(self):
result = []
for term in self.terms:
if term.is_meta:
result.append(term.string)
else:
result.append(force_text(term))
return ' '.join(result)

View File

@@ -0,0 +1,12 @@
from __future__ import unicode_literals
QUERY_OPERATION_AND = 1
QUERY_OPERATION_OR = 2
TERM_OPERATION_AND = 'AND'
TERM_OPERATION_OR = 'OR'
TERM_OPERATIONS = [TERM_OPERATION_AND, TERM_OPERATION_OR]
TERM_QUOTES = ['"', '\'']
TERM_NEGATION_CHARACTER = '-'
TERM_SPACE_CHARACTER = ' '

View File

@@ -18,21 +18,38 @@ logger = logging.getLogger(__name__)
class ResultsView(SearchModelMixin, SingleObjectListView):
def get_extra_context(self):
search_query = self.get_search_query()
context = {
'hide_links': True,
'list_as_items': True,
'no_results_icon': icon_search_submit,
'no_results_text': _(
'Try again using different terms. '
'Try again using different terms.'
),
'no_results_title': _('No search results'),
'search_model': self.search_model,
'search_results_limit': setting_limit.value,
'title': _('Search results for: %s') % self.search_model.label,
'subtitle': search_query,
'title': _('Search results for: %s') % self.get_search_model(),
}
return context
def get_search_query(self):
if self.request.GET:
# Only do search if there is user input, otherwise just render
# the template with the extra_context
if self.request.GET.get('_match_all', 'off') == 'on':
global_and_search = True
else:
global_and_search = False
return self.search_model.get_search_query(
query_string=self.request.GET, global_and_search=global_and_search
)
def get_object_list(self):
self.search_model = self.get_search_model()
@@ -45,7 +62,7 @@ class ResultsView(SearchModelMixin, SingleObjectListView):
else:
global_and_search = False
queryset, timedelta = self.search_model.search(
queryset = self.search_model.search(
query_string=self.request.GET, user=self.request.user,
global_and_search=global_and_search
)