Newer
Older
# -*- coding: utf-8 -*-
# copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

Arthur Lutz
committed
"""elasticsearch search views"""
from elasticsearch.exceptions import NotFoundError

Arthur Lutz
committed
from elasticsearch_dsl.connections import connections
from elasticsearch_dsl import FacetedSearch, TermsFacet, DateHistogramFacet

Arthur Lutz
committed
from bs4 import BeautifulSoup
from logilab.mtconverter import xml_escape

Arthur Lutz
committed
from cubicweb.view import StartupView
from cubes.elasticsearch.es import indexable_types

Arthur Lutz
committed
def get_connection(config):
try:
connections.get_connection()
except KeyError:
locations = config['elasticsearch-locations']
index_name = config['index-name']
# TODO sanitize locations
connections.create_connection(hosts=locations.split(','),
index=index_name,
timeout=20)

Arthur Lutz
committed
class CWFacetedSearch(FacetedSearch):
# fields that should be searched
fields = ["title^3", "name^3", "content^2", 'content', '_all']

Arthur Lutz
committed
facets = {
# use bucket aggregations to define facets
'cw_etype': TermsFacet(field='cw_etype'),
'creation_date': DateHistogramFacet(field='creation_date', interval='month'),
'commemoration_year': DateHistogramFacet(field='commemoration_year', interval='year'),
'year': DateHistogramFacet(field='year', interval='year'),
}

Arthur Lutz
committed
def __init__(self, query=None, filters={}, doc_types=None):
if doc_types:
self.doc_types = doc_types
super(CWFacetedSearch, self).__init__(query, filters)
def highlight(self, search):
"""
Add custom highlighting
"""
return search.highlight(*self.fields) \
.highlight_options(pre_tags="",
post_tags="",
fragment_size=150)

Arthur Lutz
committed
class ElasticSearchView(StartupView):
__regid__ = "esearch"
def call(self, **kwargs):

Arthur Lutz
committed
# TODO if no ES configuration, redirect or display warning

Arthur Lutz
committed
search_comp = self._cw.vreg['components'].select_or_none('search-comp',
self._cw)
if search_comp:
search_comp.render(w=self.w)
self.w(u'<h1>%s</h1>' % self._cw._('Recherche'))
query_string = xml_escape(self._cw.form.get('search', ''))
self.w(u'<h2>Résultats pour : <em>%s</em></h2>' % query_string)

Arthur Lutz
committed
get_connection(self._cw.vreg.config)
facet_selections = {}
start, stop = 0, 10
for key, value in self._cw.form.items():
if key.startswith('es_'):
facet_selections[key.replace('es_', '')] = value
if key == 'page':
start = (max(int(value) - 1, 0)) * 10
stop = start + 10
indexable = indexable_types(self._cw.vreg.schema)

Arthur Lutz
committed
search = CWFacetedSearch(query_string,
facet_selections,
doc_types=indexable)[start:stop]

Arthur Lutz
committed
try:
response = search.execute()
except NotFoundError:
self.w(u'index not found in elasticsearch')
return
self.w(u'Résultats: %s' % response.hits.total)
if response.facets:
self.display_facets(response)

Arthur Lutz
committed
self.display_results(response)
def display_results(self, response):
self.w(u'<div id="main-center" class="col-xs-10" role="main">')

Arthur Lutz
committed
self.w(u'<ul>')
for result in response:
self.w(u'<li>')
infos = result.to_dict()
infos['_score'] = result.meta.score
infos['keys'] = result.to_dict().keys()
infos.setdefault('title', infos.get('name', infos.get('reference', u'n/a')))
try:
self.w(u'<a href="%(cwuri)s">%(title)s</a> (%(_score).2f)<br/>' % (infos))
if self._cw.form.get('debug-es'):
self.w(u' [%(keys)s] <br/>' % infos)
except KeyError:
self.w(u'Missing key in : %s' % infos.keys())
try:
for fragment in result.meta.highlight.content:
self.w(u'... %s' % BeautifulSoup(fragment, 'lxml').get_text())
self.w(u' ... <br/>')
except AttributeError:
pass
self.w(u'</li>')
self.w(u'</ul>')

Arthur Lutz
committed
self.w(u'</div>')
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def pagination(self, response):
if response.hits.total < 10:
return
url_params = self._cw.form.copy()
with t.ul(self.w, klass="pagination") as ul:
current_page = int(url_params.get('page', 1))
url_params['page'] = current_page - 1
if current_page - 1 >= 1:
ul(t.li(t.a('<<<',
href=self._cw.build_url(**url_params))))
else:
ul(t.li(t.a('<<<')))
for i in range(0, min((response.hits.total / 10) + 1, 10)):
page = i + 1
url_params['page'] = page
url = self._cw.build_url(**url_params)
if page == current_page:
ul(t.li(t.a(t.b(page),
href=url)))
else:
ul(t.li(t.a(page,
href=url)))
if response.hits.total / 10 > 10:
ul(t.li(t.a("...")))
for i in range((response.hits.total / 10) - 3, response.hits.total / 10):
url_params['page'] = i
url = self._cw.build_url(**url_params)
ul(t.li(t.a(i,
href=url)))
url_params['page'] = current_page + 1
if current_page + 1 >= (response.hits.total / 10):
ul(t.li(t.a('>>>')))
else:
ul(t.li(t.a('>>>',
href=self._cw.build_url(**url_params))))

Arthur Lutz
committed
def display_facets(self, response):
self.w(u'''<aside id="aside-main-left" class="col-xs-2 cwjs-aside">
<div class="panel panel-default contextFreeBox facet_filterbox">
<div class="panel-heading">

Arthur Lutz
committed
</div>
''')
for attribute in ('cw_etype', 'creation_date'):
if 'page' in url_params:
del url_params['page']

Arthur Lutz
committed
self.w(u'<div class="facetBody vocabularyFacet">')
self.w(u'<div class="facetTitle">{}</div>'.format(attribute))
for (tag, count, selected) in response.facets[attribute]:
# facetValueSelected / facetValueDisabled in class
facet_item = u'<div class="facetValue facetCheckBox">' \
' <span>' \
' <a href="{}">{} {}</a>' \
' </span>' \
'</div>'
if url_params.get('es_{}'.format(attribute)) != tag:
url_params['es_{}'.format(attribute)] = str(tag)
else:
del url_params['es_{}'.format(attribute)]
url = self._cw.build_url(**url_params)
self.w(facet_item.format(url,
'<b>{}</b>'.format(tag) if selected else tag,
count))

Arthur Lutz
committed
self.w(u'</div>')
self.w(u'</div></aside>')