# -*- coding: utf-8 -*- # copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved. # contact http://www.logilab.fr -- mailto:contact@logilab.fr # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 2.1 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more # details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from elasticsearch_dsl import Q, query as dsl_query from logilab.mtconverter import xml_escape def compose_search(search, query=None, fields=()): ''' Compose a elasticsearch-dsl query from queries : * simple term * simple terms (OR) * negation (add - in front of a term) * explicit OR * quoted terms (AND) ''' # FIXME TODO - restructure entier code base, have a proper lexer for char in ('"', "'", xml_escape('"'), xml_escape("'")): if char in query: return search.query('bool', must=Q('multi_match', query=query.split(char)[1], type="phrase", fields=fields)) must = [] must_not = [] should = [] elements = query.split() elements_lowercase = [e.lower() for e in elements] if 'or' in elements_lowercase and len(elements) >= 3: for element in query.split('or'): should.append(Q('multi_match', query=element.strip(), fields=fields)) elements = [] elif '-' not in query: if len(elements) > 1: # should with match_phrase to increase score for proximity # https://www.elastic.co/guide/en/elasticsearch/guide/current/proximity-relevance.html # TODO configurable slop ? should.append(Q('multi_match', type="phrase", query=query, fields=fields, slop=50)) elements = [' '.join(elements)] for element in elements: if element.startswith('-'): must_not.append(Q('multi_match', query=element[1:], fields=fields)) else: should.append(Q('multi_match', query=element, fields=fields)) should.append(dsl_query.Fuzzy(_all=element)) return search.query('bool', must=must, must_not=must_not, should=should, minimum_should_match="30%", )