# -*- coding: utf-8 -*- # copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved. # contact http://www.logilab.fr -- mailto:contact@logilab.fr # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 2.1 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more # details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """elasticsearch search views""" from elasticsearch.exceptions import NotFoundError from elasticsearch_dsl.connections import connections from elasticsearch_dsl import FacetedSearch, TermsFacet, DateHistogramFacet from bs4 import BeautifulSoup from logilab.mtconverter import xml_escape import cwtags.tag as t from cubicweb.view import StartupView from cubes.elasticsearch.es import indexable_types from cubes.elasticsearch.search_helpers import compose_search def get_connection(config): try: connections.get_connection() except KeyError: locations = config['elasticsearch-locations'] index_name = config['index-name'] # TODO sanitize locations connections.create_connection(hosts=locations.split(','), index=index_name, timeout=20) class CWFacetedSearch(FacetedSearch): # fields that should be searched fields = ["unitid^6", "title^3", "unittitle^3", "name^3", "content^2", 'content', '_all'] facets = { # use bucket aggregations to define facets 'cw_etype': TermsFacet(field='cw_etype'), 'unitid': TermsFacet(field='unitid'), 'creation_date': DateHistogramFacet(field='creation_date', interval='month'), 'commemoration_year': DateHistogramFacet(field='commemoration_year', interval='year'), 'year': DateHistogramFacet(field='year', interval='year'), } def __init__(self, query=None, filters={}, doc_types=None): if doc_types: self.doc_types = doc_types super(CWFacetedSearch, self).__init__(query, filters) def query(self, search, query): if query: return compose_search(search, query=query, fields=self.fields) #return search.query('multi_match', fields=self.fields, query=query) return search def highlight(self, search): """ Add custom highlighting """ return search.highlight(*self.fields) \ .highlight_options(pre_tags="", post_tags="", fragment_size=150) class ElasticSearchView(StartupView): __regid__ = "esearch" def call(self, **kwargs): # TODO if no ES configuration, redirect or display warning search_comp = self._cw.vreg['components'].select_or_none('search-comp', self._cw) if search_comp: search_comp.render(w=self.w) self.w(u'<h1>%s</h1>' % self._cw._('Recherche')) query_string = xml_escape(self._cw.form.get('search', '')) self.w(u'<h2>Résultats pour : <em>%s</em></h2>' % query_string) get_connection(self._cw.vreg.config) facet_selections = {} start, stop = 0, 10 for key, value in self._cw.form.items(): if key.startswith('es_'): facet_selections[key.replace('es_', '')] = value if key == 'page': start = (max(int(value) - 1, 0)) * 10 stop = start + 10 indexable = indexable_types(self._cw.vreg.schema) if query_string.startswith('cote:'): query_string = query_string.split(':')[1] facet_selections['unitid'] = query_string search = CWFacetedSearch(query_string, facet_selections, doc_types=indexable)[start:stop] try: response = search.execute() except NotFoundError: self.w(u'index not found in elasticsearch') return self.w(u'Résultats: %s' % response.hits.total) if response.facets: self.display_facets(response) self.display_results(response) def display_results(self, response): self.w(u'<div id="main-center" class="col-xs-10" role="main">') self.pagination(response) self.w(u'<ul>') for result in response: self.w(u'<li>') infos = result.to_dict() infos['_score'] = result.meta.score infos['keys'] = result.to_dict().keys() infos['url'] = infos['cwuri'].startswith('_auto_generated') and infos['eid'] or infos['cwuri'] infos.setdefault('title', infos.get('name', infos.get('reference', infos.get('unittitle', u'n/a')))) try: self.w(u'<a href="%(url)s">%(title)s</a> (%(_score).2f)<br/>' % (infos)) if self._cw.form.get('debug-es'): self.w(u' [%(keys)s] <br/>' % infos) except KeyError: self.w(u'Missing key in : %s' % infos.keys()) try: for fragment in result.meta.highlight.content: self.w(u'... %s' % BeautifulSoup(fragment, 'lxml').get_text()) self.w(u' ... <br/>') except AttributeError: pass self.w(u'</li>') self.w(u'</ul>') self.pagination(response) self.w(u'</div>') def pagination(self, response): if response.hits.total < 10: return url_params = self._cw.form.copy() with t.ul(self.w, klass="pagination") as ul: current_page = int(url_params.get('page', 1)) url_params['page'] = current_page - 1 if current_page - 1 >= 1: ul(t.li(t.a('<<<', href=self._cw.build_url(**url_params)))) else: ul(t.li(t.a('<<<'))) for i in range(0, min((response.hits.total / 10) + 1, 10)): page = i + 1 url_params['page'] = page url = self._cw.build_url(**url_params) if page == current_page: ul(t.li(t.a(t.b(page), href=url))) else: ul(t.li(t.a(page, href=url))) if response.hits.total / 10 > 10: ul(t.li(t.a("..."))) for i in range((response.hits.total / 10) - 3, response.hits.total / 10): url_params['page'] = i url = self._cw.build_url(**url_params) ul(t.li(t.a(i, href=url))) url_params['page'] = current_page + 1 if current_page + 1 >= (response.hits.total / 10): ul(t.li(t.a('>>>'))) else: ul(t.li(t.a('>>>', href=self._cw.build_url(**url_params)))) def display_facets(self, response): self.w(u'''<aside id="aside-main-left" class="col-xs-2 cwjs-aside"> <div class="panel panel-default contextFreeBox facet_filterbox"> <div class="panel-heading"> <div class="panel-title">Facettes</div> </div> ''') for attribute in ('cw_etype', 'creation_date'): url_params = self._cw.form.copy() if 'page' in url_params: del url_params['page'] self.w(u'<div class="facetBody vocabularyFacet">') self.w(u'<div class="facetTitle">{}</div>'.format(attribute)) for (tag, count, selected) in response.facets[attribute]: # facetValueSelected / facetValueDisabled in class facet_item = u'<div class="facetValue facetCheckBox">' \ ' <span>' \ ' <a href="{}">{} {}</a>' \ ' </span>' \ '</div>' if url_params.get('es_{}'.format(attribute)) != tag: url_params['es_{}'.format(attribute)] = str(tag) else: del url_params['es_{}'.format(attribute)] url = self._cw.build_url(**url_params) self.w(facet_item.format(url, '<b>{}</b>'.format(tag) if selected else tag, count)) self.w(u'</div>') self.w(u'</div></aside>')