From 20acc24c7f38a8bdbdec90992dcf9a410531d616 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Fri, 26 Apr 2019 00:35:55 -0400 Subject: [PATCH] Backport search improvements, remove SEARCH_LIMIT Signed-off-by: Roberto Rosario --- HISTORY.rst | 3 + docs/releases/3.2.rst | 3 + mayan/apps/dynamic_search/api_views.py | 8 +- mayan/apps/dynamic_search/classes.py | 490 ++++++++++-------- mayan/apps/dynamic_search/literals.py | 12 + mayan/apps/dynamic_search/settings.py | 12 - .../apps/dynamic_search/tests/test_models.py | 78 +-- mayan/apps/dynamic_search/tests/test_views.py | 2 +- mayan/apps/dynamic_search/views.py | 6 +- 9 files changed, 342 insertions(+), 272 deletions(-) create mode 100644 mayan/apps/dynamic_search/literals.py delete mode 100644 mayan/apps/dynamic_search/settings.py diff --git a/HISTORY.rst b/HISTORY.rst index 174d6fe406..f9e4eab63a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -158,6 +158,9 @@ from an user. * Require dual permissions when add or removing users to and from group. Same with group to users. +* Backport search improvements. +* Remove search elapsed time calculation. +* Remove SEARCH_LIMIT setting. 3.1.11 (2019-04-XX) =================== diff --git a/docs/releases/3.2.rst b/docs/releases/3.2.rst index 30dee2d5c5..9183739253 100644 --- a/docs/releases/3.2.rst +++ b/docs/releases/3.2.rst @@ -190,6 +190,9 @@ Other changes from an user. * Require dual permissions when add or removing users to and from group. Same with group to users. +* Backport search improvements. +* Remove search elapsed time calculation. +* Remove SEARCH_LIMIT setting. Removals -------- diff --git a/mayan/apps/dynamic_search/api_views.py b/mayan/apps/dynamic_search/api_views.py index 98d047390f..b374059f9e 100644 --- a/mayan/apps/dynamic_search/api_views.py +++ b/mayan/apps/dynamic_search/api_views.py @@ -29,7 +29,7 @@ class APISearchView(SearchModelMixin, generics.ListAPIView): self.mayan_object_permissions = {'GET': (search_model.permission,)} try: - queryset, timedelta = search_model.search( + queryset = search_model.search( query_string=self.request.GET, user=self.request.user ) except Exception as exception: @@ -68,9 +68,9 @@ class APIAdvancedSearchView(SearchModelMixin, generics.ListAPIView): global_and_search = False try: - queryset, timedelta = self.search_model.search( - query_string=self.request.GET, user=self.request.user, - global_and_search=global_and_search + queryset = self.search_model.search( + global_and_search=global_and_search, + query_string=self.request.GET, user=self.request.user ) except Exception as exception: raise ParseError(force_text(exception)) diff --git a/mayan/apps/dynamic_search/classes.py b/mayan/apps/dynamic_search/classes.py index 390268e182..43d9d88566 100644 --- a/mayan/apps/dynamic_search/classes.py +++ b/mayan/apps/dynamic_search/classes.py @@ -1,222 +1,57 @@ from __future__ import absolute_import, unicode_literals -import datetime import logging from django.apps import apps from django.db.models import Q -from django.utils.encoding import force_text +from django.utils.encoding import force_text, python_2_unicode_compatible from django.utils.module_loading import import_string from django.utils.translation import ugettext as _ -from .settings import setting_limit +from .literals import ( + QUERY_OPERATION_AND, QUERY_OPERATION_OR, TERM_NEGATION_CHARACTER, + TERM_OPERATION_OR, TERM_OPERATIONS, TERM_QUOTES, TERM_SPACE_CHARACTER +) logger = logging.getLogger(__name__) -QUERY_OPERATION_AND = 1 -QUERY_OPERATION_OR = 2 -TERM_OPERATION_AND = 'AND' -TERM_OPERATION_OR = 'OR' -TERM_OPERATIONS = [TERM_OPERATION_AND, TERM_OPERATION_OR] +@python_2_unicode_compatible +class FieldQuery(object): + def __init__(self, search_field, search_term_collection): + query_operation = QUERY_OPERATION_AND + self.query = None + self.parts = [] - -class SearchModel(object): - _registry = {} - - @classmethod - def all(cls): - return list(cls._registry.values()) - - @classmethod - def as_choices(cls): - return cls._registry - - @classmethod - def get(cls, full_name): - try: - result = cls._registry[full_name] - except KeyError: - raise KeyError(_('No search model matching the query')) - if not hasattr(result, 'serializer'): - result.serializer = import_string(result.serializer_string) - - return result - - @staticmethod - def get_terms(text): - """ - Takes a text string and returns a list of dictionaries. - Each dictionary has two key "negated" and "string" - - String 'a "b c" d "e" \'f g\' h -i -"j k" l -\'m n\' o OR p' - - Results in: - [ - {'negated': False, 'string': 'a'}, {'negated': False, 'string': 'b c'}, - {'negated': False, 'string': 'd'}, {'negated': False, 'string': 'e'}, - {'negated': False, 'string': 'f g'}, {'negated': False, 'string': 'h'}, - {'negated': True, 'string': 'i'}, {'negated': True, 'string': 'j k'}, - {'negated': False, 'string': 'l'}, {'negated': True, 'string': 'm n'}, - {'negated': False, 'string': 'o'}, {'negated': False, 'string': 'OR'}, - {'negated': False, 'string': 'p'} - ] - """ - QUOTES = ['"', '\''] - NEGATION_CHARACTER = '-' - SPACE_CHARACTER = ' ' - - inside_quotes = False - negated = False - term_letters = [] - terms = [] - - for letter in text: - if not inside_quotes and letter == NEGATION_CHARACTER: - negated = True + for term in search_term_collection.terms: + if term.is_meta: + # It is a meta term, modifies the query operation + # and is not searched + if term.string == TERM_OPERATION_OR: + query_operation = QUERY_OPERATION_OR else: - if letter in QUOTES: - if inside_quotes: - if term_letters: - terms.append( - { - 'meta': False, - 'negated': negated, - 'string': ''.join(term_letters) - } - ) - negated = False - term_letters = [] + q_object = Q( + **{'%s__%s' % (search_field.field, 'icontains'): term.string} + ) + if term.negated: + q_object = ~q_object - inside_quotes = not inside_quotes + if self.query is None: + self.query = q_object else: - if not inside_quotes and letter == SPACE_CHARACTER: - if term_letters: - term_string = ''.join(term_letters) - if term_string in TERM_OPERATIONS: - meta = True - else: - meta = False - - terms.append( - { - 'meta': meta, - 'negated': negated, - 'string': term_string - } - ) - negated = False - term_letters = [] + if query_operation == QUERY_OPERATION_AND: + self.query = self.query & q_object else: - term_letters.append(letter) + self.query = self.query | q_object - if term_letters: - terms.append( - { - 'meta': False, - 'negated': negated, - 'string': ''.join(term_letters) - } - ) + if not term.is_meta: + self.parts.append(force_text(search_field.label)) + self.parts.append(force_text(term)) + else: + self.parts.append(term.string) - return terms - - def __init__(self, app_label, model_name, serializer_string, label=None, permission=None): - self.app_label = app_label - self.model_name = model_name - self.search_fields = [] - self._model = None # Lazy - self._label = label - self.serializer_string = serializer_string - self.permission = permission - self.__class__._registry[self.get_full_name()] = self - - @property - def label(self): - if not self._label: - self._label = self.model._meta.verbose_name - return self._label - - @property - def model(self): - if not self._model: - self._model = apps.get_model(self.app_label, self.model_name) - return self._model - - @property - def pk(self): - return self.get_full_name() - - def add_model_field(self, *args, **kwargs): - """ - Add a search field that directly belongs to the parent SearchModel - """ - search_field = SearchField(self, *args, **kwargs) - self.search_fields.append(search_field) - - def get_all_search_fields(self): - return self.search_fields - - def get_full_name(self): - return '%s.%s' % (self.app_label, self.model_name) - - def get_fields_simple_list(self): - """ - Returns a list of the fields for the SearchModel - """ - result = [] - for search_field in self.get_all_search_fields(): - result.append((search_field.get_full_name(), search_field.label)) - - return result - - def get_search_field(self, full_name): - try: - return self.search_fields[full_name] - except KeyError: - raise KeyError('No search field named: %s' % full_name) - - def search(self, query_string, user, global_and_search=False): - AccessControlList = apps.get_model( - app_label='acls', model_name='AccessControlList' - ) - elapsed_time = 0 - start_time = datetime.datetime.now() - - result = None - for search_field in self.get_all_search_fields(): - - terms = self.get_terms( - query_string.get( - search_field.field, query_string.get('q', '') - ).strip() - ) - - field_query = search_field.get_query(terms=terms) - if field_query: - if result is None: - result = field_query - else: - if global_and_search: - result = result & field_query - else: - result = result | field_query - - elapsed_time = force_text( - datetime.datetime.now() - start_time - ).split(':')[2] - - logger.debug('elapsed_time: %s', elapsed_time) - - pk_list = set(self.model.objects.filter(result or Q()).values_list('pk', flat=True)[:setting_limit.value]) - queryset = self.model.objects.filter(pk__in=pk_list) - - if self.permission: - queryset = AccessControlList.objects.filter_by_access( - permission=self.permission, user=user, queryset=queryset - ) - - return queryset, elapsed_time + def __str__(self): + return ' '.join(self.parts) class SearchField(object): @@ -234,29 +69,242 @@ class SearchField(object): def get_model(self): return self.search_model.model - def get_query(self, terms): - query_operation = QUERY_OPERATION_AND - result = None - for term in terms: - if term['meta']: - # It is a meta term, modifies the query operation - # and is not searched - if term['string'] == TERM_OPERATION_OR: - query_operation = QUERY_OPERATION_OR - else: - q_object = Q( - **{'%s__%s' % (self.field, 'icontains'): term['string']} - ) - if term['negated']: - q_object = ~q_object +@python_2_unicode_compatible +class SearchModel(object): + _registry = {} - if result is None: - result = q_object - else: - if query_operation == QUERY_OPERATION_AND: - result = result & q_object - else: - result = result | q_object + @classmethod + def all(cls): + return list(cls._registry.values()) + + @classmethod + def as_choices(cls): + return cls._registry + + @classmethod + def get(cls, name): + try: + result = cls._registry[name] + except KeyError: + raise KeyError(_('No search model matching the query')) + if not hasattr(result, 'serializer'): + result.serializer = import_string(result.serializer_path) return result + + def __init__(self, app_label, model_name, serializer_string, label=None, permission=None): + self.app_label = app_label + self.model_name = model_name + self.search_fields = [] + self._model = None # Lazy + self._label = label + self.serializer_path = serializer_string + self.permission = permission + self.__class__._registry[self.get_full_name()] = self + + def __str__(self): + return force_text(self.label) + + def add_model_field(self, *args, **kwargs): + """ + Add a search field that directly belongs to the parent SearchModel + """ + search_field = SearchField(self, *args, **kwargs) + self.search_fields.append(search_field) + + def get_fields_simple_list(self): + """ + Returns a list of the fields for the SearchModel + """ + result = [] + for search_field in self.search_fields: + result.append((search_field.get_full_name(), search_field.label)) + + return result + + def get_full_name(self): + return '%s.%s' % (self.app_label, self.model_name) + + def get_search_field(self, full_name): + try: + return self.search_fields[full_name] + except KeyError: + raise KeyError('No search field named: %s' % full_name) + + def get_search_query(self, query_string, global_and_search=False): + return SearchQuery( + query_string=query_string, search_model=self, + global_and_search=global_and_search + ) + + @property + def label(self): + if not self._label: + self._label = self.model._meta.verbose_name + return self._label + + @property + def model(self): + if not self._model: + self._model = apps.get_model(self.app_label, self.model_name) + return self._model + + @property + def pk(self): + return self.get_full_name() + + def search(self, query_string, user, global_and_search=False): + AccessControlList = apps.get_model( + app_label='acls', model_name='AccessControlList' + ) + + search_query = self.get_search_query( + query_string=query_string, global_and_search=global_and_search + ) + + queryset = self.model.objects.filter(search_query.query).distinct() + + if self.permission: + queryset = AccessControlList.objects.filter_by_access( + permission=self.permission, queryset=queryset, user=user + ) + + return queryset + + +@python_2_unicode_compatible +class SearchQuery(object): + def __init__(self, query_string, search_model, global_and_search=False): + self.query = None + self.text = [] + + for search_field in search_model.search_fields: + search_term_collection = SearchTermCollection( + text=query_string.get( + search_field.field, query_string.get('q', '') + ).strip() + ) + + field_query = FieldQuery( + search_field=search_field, + search_term_collection=search_term_collection + ) + + if field_query.query: + self.text.append('({})'.format(force_text(field_query))) + + if global_and_search: + self.text.append('AND') + else: + self.text.append('OR') + + if self.query is None: + self.query = field_query.query + else: + if global_and_search: + self.query = self.query & field_query.query + else: + self.query = self.query | field_query.query + + self.query = self.query or Q() + + def __str__(self): + return ' '.join(self.text[:-1]) + + +@python_2_unicode_compatible +class SearchTerm(object): + def __init__(self, negated, string, is_meta): + self.negated = negated + self.string = string + self.is_meta = is_meta + + def __str__(self): + if self.is_meta: + return '' + else: + return '{}contains "{}"'.format( + 'does not ' if self.negated else '', self.string + ) + + +@python_2_unicode_compatible +class SearchTermCollection(object): + def __init__(self, text): + """ + Takes a text string and returns a list of dictionaries. + Each dictionary has two key "negated" and "string" + + String 'a "b c" d "e" \'f g\' h -i -"j k" l -\'m n\' o OR p' + + Results in: + [ + {'negated': False, 'string': 'a'}, {'negated': False, 'string': 'b c'}, + {'negated': False, 'string': 'd'}, {'negated': False, 'string': 'e'}, + {'negated': False, 'string': 'f g'}, {'negated': False, 'string': 'h'}, + {'negated': True, 'string': 'i'}, {'negated': True, 'string': 'j k'}, + {'negated': False, 'string': 'l'}, {'negated': True, 'string': 'm n'}, + {'negated': False, 'string': 'o'}, {'negated': False, 'string': 'OR'}, + {'negated': False, 'string': 'p'} + ] + """ + inside_quotes = False + negated = False + term_letters = [] + self.terms = [] + + for letter in text: + if not inside_quotes and letter == TERM_NEGATION_CHARACTER: + negated = True + else: + if letter in TERM_QUOTES: + if inside_quotes: + if term_letters: + self.terms.append( + SearchTerm( + is_meta=False, negated=negated, + string=''.join(term_letters) + ) + ) + negated = False + term_letters = [] + + inside_quotes = not inside_quotes + else: + if not inside_quotes and letter == TERM_SPACE_CHARACTER: + if term_letters: + term_string = ''.join(term_letters) + if term_string in TERM_OPERATIONS: + is_meta = True + else: + is_meta = False + + self.terms.append( + SearchTerm( + is_meta=is_meta, negated=negated, + string=term_string + ) + ) + negated = False + term_letters = [] + else: + term_letters.append(letter) + + if term_letters: + self.terms.append( + SearchTerm( + is_meta=False, negated=negated, + string=''.join(term_letters) + ) + ) + + def __str__(self): + result = [] + for term in self.terms: + if term.is_meta: + result.append(term.string) + else: + result.append(force_text(term)) + + return ' '.join(result) diff --git a/mayan/apps/dynamic_search/literals.py b/mayan/apps/dynamic_search/literals.py new file mode 100644 index 0000000000..aab43b3afa --- /dev/null +++ b/mayan/apps/dynamic_search/literals.py @@ -0,0 +1,12 @@ +from __future__ import unicode_literals + +QUERY_OPERATION_AND = 1 +QUERY_OPERATION_OR = 2 + +TERM_OPERATION_AND = 'AND' +TERM_OPERATION_OR = 'OR' +TERM_OPERATIONS = [TERM_OPERATION_AND, TERM_OPERATION_OR] + +TERM_QUOTES = ['"', '\''] +TERM_NEGATION_CHARACTER = '-' +TERM_SPACE_CHARACTER = ' ' diff --git a/mayan/apps/dynamic_search/settings.py b/mayan/apps/dynamic_search/settings.py deleted file mode 100644 index e3e3dd928c..0000000000 --- a/mayan/apps/dynamic_search/settings.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import unicode_literals - -from django.utils.translation import ugettext_lazy as _ - -from mayan.apps.smart_settings import Namespace - - -namespace = Namespace(name='dynamic_search', label=_('Search')) -setting_limit = namespace.add_setting( - global_name='SEARCH_LIMIT', default=100, - help_text=_('Maximum amount search hits to fetch and display.') -) diff --git a/mayan/apps/dynamic_search/tests/test_models.py b/mayan/apps/dynamic_search/tests/test_models.py index f6be32a7c0..0628548a24 100644 --- a/mayan/apps/dynamic_search/tests/test_models.py +++ b/mayan/apps/dynamic_search/tests/test_models.py @@ -3,28 +3,25 @@ from __future__ import unicode_literals from mayan.apps.common.tests import BaseTestCase from mayan.apps.documents.permissions import permission_document_view from mayan.apps.documents.search import document_search -from mayan.apps.documents.tests import ( - DocumentTestMixin, TEST_DOCUMENT_FILENAME, TEST_SMALL_DOCUMENT_FILENAME -) +from mayan.apps.documents.tests import DocumentTestMixin class DocumentSearchTestCase(DocumentTestMixin, BaseTestCase): auto_upload_document = False - test_document_filename = TEST_DOCUMENT_FILENAME def test_simple_search_after_related_name_change(self): """ Test that simple search works after related_name changes to document versions and document version pages """ - self.upload_document() + self.upload_document(label='first_doc') self.grant_access( obj=self.test_document, permission=permission_document_view ) - queryset, elapsed_time = document_search.search( - {'q': 'Mayan'}, user=self._test_case_user + queryset = document_search.search( + {'q': 'first'}, user=self._test_case_user ) self.assertEqual(queryset.count(), 1) self.assertTrue(self.test_document in queryset) @@ -36,90 +33,109 @@ class DocumentSearchTestCase(DocumentTestMixin, BaseTestCase): obj=self.test_document, permission=permission_document_view ) - queryset, elapsed_time = document_search.search( + queryset = document_search.search( {'label': self.test_document.label}, user=self._test_case_user ) self.assertEqual(queryset.count(), 1) self.assertTrue(self.test_document in queryset) # Test versions__mimetype - queryset, elapsed_time = document_search.search( + queryset = document_search.search( {'versions__mimetype': self.test_document.file_mimetype}, user=self._test_case_user ) self.assertEqual(queryset.count(), 1) self.assertTrue(self.test_document in queryset) - def test_simple_or_search(self): - self.upload_document(label='first_doc.pdf') - + def test_meta_only(self): + self.upload_document(label='first_doc') self.grant_access( obj=self.test_document, permission=permission_document_view ) + queryset = document_search.search( + {'q': 'OR first'}, user=self._test_case_user + ) + self.assertEqual(queryset.count(), 1) - self.test_document_filename = TEST_SMALL_DOCUMENT_FILENAME - self.test_document_2 = self.upload_document(label='second_doc.pdf') - + def test_simple_or_search(self): + self.upload_document(label='first_doc') + self.grant_access( + obj=self.test_document, permission=permission_document_view + ) + self.test_document_2 = self.upload_document(label='second_doc') self.grant_access( obj=self.test_document_2, permission=permission_document_view ) - - queryset, elapsed_time = document_search.search( + queryset = document_search.search( {'q': 'first OR second'}, user=self._test_case_user ) self.assertEqual(queryset.count(), 2) self.assertTrue(self.test_document in queryset) self.assertTrue(self.test_document_2 in queryset) - def test_simple_and_search(self): - self.test_document_filename = TEST_SMALL_DOCUMENT_FILENAME - self.test_document_2 = self.upload_document(label='second_doc.pdf') + def test_advanced_or_search(self): + self.upload_document(label='first_doc') + self.grant_access( + obj=self.test_document, permission=permission_document_view + ) + self.test_document_2 = self.upload_document(label='second_doc') self.grant_access( obj=self.test_document_2, permission=permission_document_view ) - queryset, elapsed_time = document_search.search( + queryset = document_search.search( + {'label': 'first OR second'}, user=self._test_case_user + ) + self.assertEqual(queryset.count(), 2) + self.assertTrue(self.test_document in queryset) + self.assertTrue(self.test_document_2 in queryset) + + def test_simple_and_search(self): + self.upload_document(label='second_doc') + + self.grant_access( + obj=self.test_document, permission=permission_document_view + ) + + queryset = document_search.search( {'q': 'non_valid second'}, user=self._test_case_user ) self.assertEqual(queryset.count(), 0) - queryset, elapsed_time = document_search.search( + queryset = document_search.search( {'q': 'second non_valid'}, user=self._test_case_user ) self.assertEqual(queryset.count(), 0) def test_simple_negated_search(self): - self.test_document_filename = TEST_SMALL_DOCUMENT_FILENAME - self.test_document_2 = self.upload_document() - self.test_document_2.label = 'second_doc.pdf' - self.test_document_2.save() + self.test_document_2 = self.upload_document(label='second_doc') self.grant_access( obj=self.test_document_2, permission=permission_document_view ) - queryset, elapsed_time = document_search.search( + queryset = document_search.search( {'q': '-non_valid second'}, user=self._test_case_user ) self.assertEqual(queryset.count(), 1) - queryset, elapsed_time = document_search.search( + queryset = document_search.search( {'label': '-second'}, user=self._test_case_user ) self.assertEqual(queryset.count(), 0) - queryset, elapsed_time = document_search.search( + queryset = document_search.search( {'label': '-second -Mayan'}, user=self._test_case_user ) self.assertEqual(queryset.count(), 0) - queryset, elapsed_time = document_search.search( + queryset = document_search.search( {'label': '-second OR -Mayan'}, user=self._test_case_user ) self.assertEqual(queryset.count(), 1) - queryset, elapsed_time = document_search.search( + queryset = document_search.search( {'label': '-non_valid -second'}, user=self._test_case_user ) self.assertEqual(queryset.count(), 0) diff --git a/mayan/apps/dynamic_search/tests/test_views.py b/mayan/apps/dynamic_search/tests/test_views.py index 80935be39c..a7c01d73bd 100644 --- a/mayan/apps/dynamic_search/tests/test_views.py +++ b/mayan/apps/dynamic_search/tests/test_views.py @@ -30,7 +30,7 @@ class Issue46TestCase(DocumentTestMixin, GenericViewTestCase): ) # Make sure all documents are returned by the search - queryset, elapsed_time = document_search.search( + queryset = document_search.search( {'label': test_document_label}, user=self._test_case_user ) self.assertEqual(queryset.count(), self.test_document_count) diff --git a/mayan/apps/dynamic_search/views.py b/mayan/apps/dynamic_search/views.py index 6548ce6def..38c68f65db 100644 --- a/mayan/apps/dynamic_search/views.py +++ b/mayan/apps/dynamic_search/views.py @@ -45,9 +45,9 @@ class ResultsView(SearchModelMixin, SingleObjectListView): else: global_and_search = False - queryset, timedelta = self.search_model.search( - query_string=self.request.GET, user=self.request.user, - global_and_search=global_and_search + queryset = self.search_model.search( + global_and_search=global_and_search, + query_string=self.request.GET, user=self.request.user ) return queryset