diff --git a/HISTORY.rst b/HISTORY.rst index 310ac8f4f8..999a7f0637 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -46,6 +46,7 @@ Next (2018-XX-XX) test_document_views.py, test_duplicated_document_views.py - Sort smart links by label. - Rename the internal name of the document type permissions namespace. Existing permissions will need to be updated. +- Add support for OR type searches. Use the "OR" string between the terms. Example: term1 OR term2. 2.8 (2018-02-27) ================ diff --git a/mayan/apps/dynamic_search/classes.py b/mayan/apps/dynamic_search/classes.py index 982fdf9add..895e479d7f 100644 --- a/mayan/apps/dynamic_search/classes.py +++ b/mayan/apps/dynamic_search/classes.py @@ -10,19 +10,26 @@ from django.utils.encoding import force_text from django.utils.module_loading import import_string from django.utils.translation import ugettext as _ - from .settings import setting_limit logger = logging.getLogger(__name__) class SearchModel(object): - registry = {} + _registry = {} + + @classmethod + def all(cls): + return cls._registry.values() + + @classmethod + def as_choices(cls): + return cls._registry @classmethod def get(cls, full_name): try: - result = cls.registry[full_name] + result = cls._registry[full_name] except KeyError: raise KeyError(_('No search model matching the query')) if not hasattr(result, 'serializer'): @@ -30,13 +37,82 @@ class SearchModel(object): return result - @classmethod - def as_choices(cls): - return cls.registry + @staticmethod + def get_terms(text): + """ + Takes a text string and returns a list of dictionaries. + Each dictionary has two key "negated" and "string" - @classmethod - def all(cls): - return cls.registry.values() + String 'a "b c" d "e" \'f g\' h -i -"j k" l -\'m n\' o OR p' + + Results in: + [ + {'negated': False, 'string': 'a'}, {'negated': False, 'string': 'b c'}, + {'negated': False, 'string': 'd'}, {'negated': False, 'string': 'e'}, + {'negated': False, 'string': 'f g'}, {'negated': False, 'string': 'h'}, + {'negated': True, 'string': 'i'}, {'negated': True, 'string': 'j k'}, + {'negated': False, 'string': 'l'}, {'negated': True, 'string': 'm n'}, + {'negated': False, 'string': 'o'}, {'negated': False, 'string': 'OR'}, + {'negated': False, 'string': 'p'} + ] + """ + QUOTES = ['"', '\''] + NEGATION_CHARACTER = '-' + SPACE_CHARACTER = ' ' + + inside_quotes = False + negated = False + term = [] + terms = [] + + for letter in text: + if not inside_quotes and letter == NEGATION_CHARACTER: + negated = True + else: + if letter in QUOTES: + if inside_quotes: + if term: + terms.append( + { + 'meta': False, + 'negated': negated, + 'string': ''.join(term) + } + ) + negated = False + term = [] + + inside_quotes = not inside_quotes + else: + if not inside_quotes and letter == SPACE_CHARACTER: + if term: + if term == ['O', 'R']: + meta = True + else: + meta = False + + terms.append( + { + 'meta': meta, + 'negated': negated, + 'string': ''.join(term) + } + ) + negated = False + term = [] + else: + term.append(letter) + + if term: + terms.append( + { + 'meta': False, + 'negated': negated, + 'string': ''.join(term) + } + ) + + return terms def __init__(self, app_label, model_name, serializer_string, label=None, permission=None): self.app_label = app_label @@ -46,11 +122,13 @@ class SearchModel(object): self._label = label self.serializer_string = serializer_string self.permission = permission - self.__class__.registry[self.get_full_name()] = self + self.__class__._registry[self.get_full_name()] = self @property - def pk(self): - return self.get_full_name() + def label(self): + if not self._label: + self._label = self.model._meta.verbose_name + return self._label @property def model(self): @@ -59,10 +137,8 @@ class SearchModel(object): return self._model @property - def label(self): - if not self._label: - self._label = self.model._meta.verbose_name - return self._label + def pk(self): + return self.get_full_name() def add_model_field(self, *args, **kwargs): """ @@ -78,16 +154,22 @@ class SearchModel(object): fields. """ queries = [] - for term in terms: - or_query = None - for field in search_fields: - q = Q(**{'%s__%s' % (field, 'icontains'): term}) - if or_query is None: - or_query = q - else: - or_query = or_query | q - queries.append(or_query) + for term in terms: + query = None + if term['string'] != 'OR': + for field in search_fields: + q = Q(**{'%s__%s' % (field, 'icontains'): term['string']}) + + if term['negated']: + q = ~q + + if query is None: + query = q + else: + query = query | q + + queries.append(query) return queries def get_all_search_fields(self): @@ -112,20 +194,6 @@ class SearchModel(object): except KeyError: raise KeyError('No search field named: %s' % full_name) - def normalize_query(self, query_string, - findterms=re.compile(r'"([^"]+)"|(\S+)').findall, - normspace=re.compile(r'\s{2,}').sub): - """ - Splits the query string in invidual keywords, getting rid of - unecessary spaces and grouping quoted words together. - Example: - >>> normalize_query(' some random words "with quotes " and spaces') - ['some', 'random', 'words', 'with quotes', 'and', 'spaces'] - """ - return [ - normspace(' ', (t[0] or t[1]).strip()) for t in findterms(query_string) - ] - def search(self, query_string, user, global_and_search=False): AccessControlList = apps.get_model( app_label='acls', model_name='AccessControlList' @@ -147,7 +215,7 @@ class SearchModel(object): search_dict[search_field.get_model()]['searches'].append( { 'field_name': [search_field.field], - 'terms': self.normalize_query( + 'terms': SearchModel.get_terms( query_string.get('q', '').strip() ) } @@ -163,7 +231,7 @@ class SearchModel(object): search_dict[search_field.get_model()]['searches'].append( { 'field_name': [search_field.field], - 'terms': self.normalize_query( + 'terms': SearchModel.get_terms( query_string[search_field.field] ) } @@ -187,29 +255,40 @@ class SearchModel(object): field_result_set = set() # Get results per search field + intersection = True for query in field_query_list: logger.debug('query: %s', query) - term_query_result_set = set( - model.objects.filter(query).values_list( - data['return_value'], flat=True + + if query: + term_query_result_set = set( + model.objects.filter(query).values_list( + data['return_value'], flat=True + ) ) - ) - # Convert the QuerySet to a Python set and perform the - # AND operation on the program and not as a query. - # This operation ANDs all the field term results - # belonging to a single model, making sure to only include - # results in the final field result variable if all the - # terms are found in a single field. - if not field_result_set: - field_result_set = term_query_result_set + # Convert the QuerySet to a Python set and perform the + # AND operation on the program and not as a query. + # This operation ANDs all the field term results + # belonging to a single model, making sure to only include + # results in the final field result variable if all the + # terms are found in a single field. + if not field_result_set: + field_result_set = term_query_result_set + else: + if intersection: + field_result_set &= term_query_result_set + else: + field_result_set |= term_query_result_set + + + logger.debug( + 'term_query_result_set: %s', len(term_query_result_set) + ) + logger.debug('field_result_set: %s', len(field_result_set)) + + intersection = True else: - field_result_set &= term_query_result_set - - logger.debug( - 'term_query_result_set: %s', term_query_result_set - ) - logger.debug('field_result_set: %s', field_result_set) + intersection = False if global_and_search: if not model_result_set: @@ -233,7 +312,7 @@ class SearchModel(object): if self.permission: queryset = AccessControlList.objects.filter_by_access( - self.permission, user, queryset + permission=self.permission, user=user, queryset=queryset ) return queryset, elapsed_time diff --git a/mayan/apps/dynamic_search/tests/test_models.py b/mayan/apps/dynamic_search/tests/test_models.py index 5059c22bc9..b75b998153 100644 --- a/mayan/apps/dynamic_search/tests/test_models.py +++ b/mayan/apps/dynamic_search/tests/test_models.py @@ -52,3 +52,27 @@ class DocumentSearchTestCase(BaseTestCase): ) self.assertEqual(queryset.count(), 1) self.assertTrue(self.document in queryset) + + def test_simple_or_search(self): + with open(TEST_SMALL_DOCUMENT_PATH) as file_object: + self.document_2 = self.document_type.new_document( + file_object=file_object, label='second_doc.pdf' + ) + + queryset, elapsed_time = document_search.search( + {'q': 'Mayan OR second'}, user=self.admin_user + ) + self.assertEqual(queryset.count(), 2) + self.assertTrue(self.document in queryset) + self.assertTrue(self.document_2 in queryset) + + def test_simple_and_search(self): + with open(TEST_SMALL_DOCUMENT_PATH) as file_object: + self.document_2 = self.document_type.new_document( + file_object=file_object, label='second_doc.pdf' + ) + + queryset, elapsed_time = document_search.search( + {'q': 'Mayan second'}, user=self.admin_user + ) + self.assertEqual(queryset.count(), 0)