# HG changeset patch # User Arthur Lutz <arthur.lutz@logilab.fr> # Date 1463662860 -7200 # Thu May 19 15:01:00 2016 +0200 # Node ID 4674c8a25531d94c845ecd43806cbf61188c27e3 # Parent 5a2f152284a8e02fb5f61cf571ec038eb4e98eab [search_helpers] build composite searches (with tests) diff --git a/search_helpers.py b/search_helpers.py new file mode 100644 --- /dev/null +++ b/search_helpers.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +# copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# contact http://www.logilab.fr -- mailto:contact@logilab.fr +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 2.1 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from compiler.ast import flatten + +from elasticsearch_dsl import Q + + +def compose_search(query): + ''' + Compose a elasticsearch-dsl query from queries : + + * simple term + * simple terms (OR) + * negation (add - in front of a term) + * explicit OR + * quoted terms (AND) + ''' + # FIXME TODO - restructure entier code base, have a proper lexer + must = [] + must_not = [] + should = [] + if '"' in query: + elements = flatten([x.split() for x in query.split('"') if x]) + elif "'" in query: + elements = flatten([x.split() for x in query.split("'") if x]) + else: + elements = query.split() + elements_lowercase = [e.lower() for e in elements] + if 'or' in elements_lowercase and len(elements) >= 3: + for element in query.split('or'): + should.append(Q('multi_match', + query=element.strip(), + fields=())) + elements = [] + elif '-' not in query: + elements = [' '.join(elements)] + for element in elements: + if element.startswith('-'): + must_not.append(Q('multi_match', + query=element[1:], + fields=())) + else: + must.append(Q('multi_match', + query=element, + fields=())) + return Q('bool', + must=must, + must_not=must_not, + should=should) diff --git a/test/test_compose_search.py b/test/test_compose_search.py new file mode 100644 --- /dev/null +++ b/test/test_compose_search.py @@ -0,0 +1,75 @@ +# copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# contact http://www.logilab.fr -- mailto:contact@logilab.fr +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 2.1 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import unittest +from elasticsearch_dsl import Q + +from cubicweb.devtools import testlib +from cubes.elasticsearch.search_helpers import compose_search + + +class ComposeSearchTestCase(testlib.TestCase): + def test_simple(self): + self.assertEquals(compose_search('test'), + Q('bool', + must=[Q('multi_match', query='test', fields=())])) + + def test_two_terms(self): + self.assertEquals(compose_search('test this'), + Q('bool', + must=[Q('multi_match', query='test this', fields=())])) + + def test_two_with_quotes(self): + self.assertEquals(compose_search('"test this"'), + Q('bool', + must=[Q('multi_match', query='test', fields=()), + Q('multi_match', query='this', fields=())])) + self.assertEquals(compose_search("'test this'"), + Q('bool', + must=[Q('multi_match', query='test', fields=()), + Q('multi_match', query='this', fields=())])) + + # def test_three_with_quotes(self): + # self.assertEquals(compose_search('"test this" this_too'), + # Q('bool', + # must=[Q('multi_match', query='test', fields=()), + # Q('multi_match', query='this', fields=())], + # should=)) + + def test_two_with_negate(self): + self.assertEquals(compose_search('test -this'), + Q('bool', + must=[Q('multi_match', query='test', fields=())], + must_not=[Q('multi_match', query='this', fields=())])) + + def test_two_with_or(self): + self.assertEquals(compose_search('test or this'), + Q('bool', + should=[Q('multi_match', query='test', fields=()), + Q('multi_match', query='this', fields=())])) + # self.assertEquals(compose_search('test ou this'), + # Q('bool', + # should=[Q('multi_match', query='test', fields=()), + # Q('multi_match', query='this', fields=())])) + + def test_or_on_its_own(self): + self.assertEquals(compose_search('or'), + Q('bool', + must=[Q('multi_match', query='or', fields=())])) + + +if __name__ == '__main__': + unittest.main()