Skip to content
Snippets Groups Projects
views.py 10.2 KiB
Newer Older
# -*- coding: utf-8 -*-
# copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""elasticsearch search views"""
from elasticsearch.exceptions import NotFoundError
from elasticsearch_dsl import FacetedSearch, TermsFacet, DateHistogramFacet
from bs4 import BeautifulSoup

from logilab.mtconverter import xml_escape

import cwtags.tag as t
from cubes.elasticsearch.es import indexable_types, get_connection
from cubes.elasticsearch.search_helpers import compose_search
class CWFacetedSearch(FacetedSearch):
    # fields that should be searched

    facets = {
        # use bucket aggregations to define facets
        'cw_etype': TermsFacet(field='cw_etype'),
        'creation_date': DateHistogramFacet(field='creation_date', interval='month'),
    }
    def __init__(self, query=None, filters={}, doc_types=None):
        if doc_types:
            self.doc_types = doc_types
        super(CWFacetedSearch, self).__init__(query, filters)

    def query(self, search, query):
        if query:
            return compose_search(search, query=query, fields=self.fields)
Arthur Lutz's avatar
Arthur Lutz committed
            # return search.query('multi_match', fields=self.fields,
            # query=query)
    def highlight(self, search):
        """
        Add custom highlighting
        """
        return search.highlight(*self.fields) \
                     .highlight_options(pre_tags="",
                                        post_tags="",
                                        fragment_size=150)


class ElasticSearchView(StartupView):
    __regid__ = "esearch"

    def render_search_comp(self):
        search_comp = self._cw.vreg['components'].select_or_none('search-comp',
                                                                 self._cw)
        if search_comp:
            search_comp.render(w=self.w)

    def call(self, **kwargs):
        # TODO if no ES configuration, redirect or display warning
        self.render_search_comp()
        self.w(u'<h1>%s</h1>' % self._cw._('Recherche'))
        query_string = xml_escape(self._cw.form.get('search', ''))
        self.w(u'<h2>Resultats pour : <em>%s</em></h2>' % query_string)
        facet_selections = {}
        start, stop = 0, 10
        for key, value in self._cw.form.items():
            if key.startswith('es_'):
                facet_selections[key.replace('es_', '')] = value
            if key == 'page':
                start = (max(int(value) - 1, 0)) * 10
                stop = start + 10
        search = self.customize_search(query_string, facet_selections,
                                       start, stop)
        # force search to be executed on our index, otherwise FacetSearch
        # uses '_all'
        search.index = self._cw.vreg.config['index-name']
        try:
            response = search.execute()
        except NotFoundError:
            self.w(u'index not found in elasticsearch')
            return
        if hasattr(response, 'facets'):
            self.display_facets(response)
    def customize_search(self, query_string, facet_selections,
                         start=0, stop=10):
        '''
        This is where one can customize the search by modifying the
        query string and facet selection in an inherited class.

        For example :
        * add specific keywords sur as id:text and
          add them the facet_selection
        * use your own CWFacetedSearch class to modify fields
          and facets
        '''
        indexable = indexable_types(self._cw.vreg.schema)
        return CWFacetedSearch(query_string,
                               facet_selections,
                               doc_types=indexable)[start:stop]

        self.w(u'<div id="main-center" class="col-xs-10" role="main">')
        self.pagination(response)
        self.w(u'<ul>')
        for result in response:
            self.w(u'<li>')
            infos = result.to_dict()
            infos['_score'] = result.meta.score
            infos['keys'] = result.to_dict().keys()
Arthur Lutz's avatar
Arthur Lutz committed
            infos['url'] = infos['cwuri'].startswith(
                '_auto_generated') and infos['eid'] or infos['cwuri']
Arthur Lutz's avatar
Arthur Lutz committed
                self.w(
                    u'<a href="%(url)s">%(title)s</a> (%(_score).2f)<br/>' % (infos))
                if self._cw.form.get('debug-es'):
                    self.w(u' [%(keys)s] <br/>' % infos)
            except KeyError:
                self.w(u'Missing key in : %s' % infos.keys())
            try:
                for fragment in result.meta.highlight.content:
Arthur Lutz's avatar
Arthur Lutz committed
                    self.w(u'... %s' %
                           BeautifulSoup(fragment, 'lxml').get_text())
                    self.w(u' ... <br/>')
            except AttributeError:
                pass
            self.w(u'</li>')
        self.w(u'</ul>')
        self.pagination(response)
    def customize_infos(self, infos):
        '''
        This is where one can customize the infos being displayed

        For example : set the title according to your rules and data set
        '''
        pass

    def pagination(self, response):
        if response.hits.total <= 10:
            return
        url_params = self._cw.form.copy()
        with t.ul(self.w, klass="pagination") as ul:
            current_page = int(url_params.get('page', 1))
            url_params['page'] = current_page - 1
            if current_page - 1 >= 1:
                            href=self._cw.build_url(**url_params))))
            else:
                ul(t.li(t.a('&lt;' * 3)))
            total_pages = (response.hits.total / 10) + 2
            page_padding = 3
            if current_page > page_padding:
                for page in range(1,
                                  min(page_padding + 1,
                                      current_page - page_padding)):
                    self.page_number(url_params, page, current_page, ul)
                if current_page > (page_padding * 2) + 1:
Arthur Lutz's avatar
Arthur Lutz committed
                    ul(t.li(t.a("&middot;" * 3)))
            for page in range(max(1, current_page - page_padding),
                              min(current_page + page_padding, total_pages)):
                self.page_number(url_params, page, current_page, ul)
            if current_page < total_pages - page_padding:
                if current_page < total_pages - page_padding * 2:
Arthur Lutz's avatar
Arthur Lutz committed
                    ul(t.li(t.a("&middot" * 3)))
                for page in range(max(current_page + page_padding,
Arthur Lutz's avatar
Arthur Lutz committed
                                      total_pages - page_padding),
                                  total_pages):
                    self.page_number(url_params, page, current_page, ul)

            url_params['page'] = current_page + 1
            if current_page + 1 >= (total_pages):
                ul(t.li(t.a('&gt;' * 3)))
Arthur Lutz's avatar
Arthur Lutz committed
                ul(t.li(t.a('&gt;' * 3,
                            href=self._cw.build_url(**url_params))))
    def page_number(self, url_params, page, current_page, ul):
Arthur Lutz's avatar
Arthur Lutz committed
        '''
        Generate HTML for page number (bold if page is current_page)
        '''
        url_params['page'] = page
        url = self._cw.build_url(**url_params)
        if page == current_page:
            ul(t.li(t.a(t.b(page),
                        href=url)))
        else:
            ul(t.li(t.a(page,
                        href=url)))
        return url

    @property
    def facets_to_display(self):
        '''
        Method to list facets to display (can be customized)
        '''
        return ('cw_etype', )
Arthur Lutz's avatar
Arthur Lutz committed

Arthur Lutz's avatar
Arthur Lutz committed
        '''
        Generate HTML for facets
        '''
        self.w(u'''<aside id="aside-main-left" class="col-xs-2 cwjs-aside">
                   <div class="panel panel-default contextFreeBox facet_filterbox">
                      <div class="panel-heading">
                         <div class="panel-title">Facettes</div>
            url_params = self._cw.form.copy()
            if 'page' in url_params:
                del url_params['page']
            self.w(u'<div class="facetBody vocabularyFacet">')
            self.w(u'<div class="facetTitle">{}</div>'.format(attribute))
            for (tag, count, selected) in response.facets[attribute]:
                # facetValueSelected / facetValueDisabled in class
                facet_item = u'<div class="facetValue facetCheckBox">' \
                             '    <span>' \
                             '      <a href="{}">{} {}</a>' \
                             '    </span>' \
                             '</div>'
                if url_params.get('es_{}'.format(attribute)) != tag:
                    url_params['es_{}'.format(attribute)] = str(tag)
                else:
                    del url_params['es_{}'.format(attribute)]
                url = self._cw.build_url(**url_params)
Arthur Lutz's avatar
Arthur Lutz committed
                content = selected and '<b>{}</b>'.format(tag) or tag
                self.w(facet_item.format(url,
Arthur Lutz's avatar
Arthur Lutz committed
                                         content,
                                         count))