Skip to content
Snippets Groups Projects
search_helpers.py 3.05 KiB
Newer Older
# -*- coding: utf-8 -*-
# copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from elasticsearch_dsl import Q, query as dsl_query
from logilab.mtconverter import xml_escape

def compose_search(search, query=None, fields=()):
    '''
    Compose a elasticsearch-dsl query from queries :

    * simple term
    * simple terms (OR)
    * negation (add - in front of a term)
    * explicit OR
    * quoted terms (AND)
    '''
    # FIXME TODO - restructure entier code base, have a proper lexer
    for char in ('"', "'", xml_escape('"'), xml_escape("'")):
        if char in query:
            return search.query('bool',
                                must=Q('multi_match',
                                       query=query.split(char)[1],
                                       type="phrase",
                                       fields=fields))
    must = []
    must_not = []
    should = []
    elements = query.split()
    elements_lowercase = [e.lower() for e in elements]
    if 'or' in elements_lowercase and len(elements) >= 3:
        for element in query.split('or'):
            should.append(Q('multi_match',
                            query=element.strip(),
                            fields=fields))
        elements = []
    elif '-' not in query:
        if len(elements) > 1:
            # should with match_phrase to increase score for proximity
            # https://www.elastic.co/guide/en/elasticsearch/guide/current/proximity-relevance.html
            # TODO configurable slop ?
            should.append(Q('multi_match',
                            type="phrase",
                            query=query,
                            fields=fields,
                            slop=50))
        elements = [' '.join(elements)]
    for element in elements:
        if element.startswith('-'):
            must_not.append(Q('multi_match',
                              query=element[1:],
            should.append(Q('multi_match',
                            query=element,
                            fields=fields))
            should.append(dsl_query.Fuzzy(_all=element))
    return search.query('bool',
                        must=must,
                        must_not=must_not,
                        should=should,
                        minimum_should_match="30%",
                        )