views.py 13 KB
Newer Older
Arthur Lutz's avatar
Arthur Lutz committed
1
# -*- coding: utf-8 -*-
2
# copyright 2016-2021 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
Arthur Lutz's avatar
Arthur Lutz committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

18
19
"""elasticsearch search views"""
from elasticsearch.exceptions import NotFoundError
20
from elasticsearch_dsl import FacetedSearch, TermsFacet, DateHistogramFacet
21
22
23
24
from bs4 import BeautifulSoup

from logilab.mtconverter import xml_escape

25
import cwtags.tag as t
26
from cubicweb import _
27
28
from cubicweb.view import StartupView

29
from cubicweb_elasticsearch.es import get_connection
30
from cubicweb_elasticsearch.search_helpers import compose_search
31

32

33
34
def normalize_value(value):
    # FIXME TODO have better typing mechanisme (inspect facets?)
35
36
    if not isinstance(value, str):
        return value
37
    try:
Simon Chabot's avatar
Simon Chabot committed
38
        if value.lower() == "true":
39
            return True
Simon Chabot's avatar
Simon Chabot committed
40
        elif value.lower() == "false":
41
42
43
44
45
46
47
48
            return False
        else:
            return int(value)
    except ValueError:
        pass
    return value


49
50
class CWFacetedSearch(FacetedSearch):
    # fields that should be searched
Simon Chabot's avatar
Simon Chabot committed
51
    fields = ["title^3", "description^2", "alltext"]
52
53
54

    facets = {
        # use bucket aggregations to define facets
Simon Chabot's avatar
Simon Chabot committed
55
56
        "cw_etype": TermsFacet(field="cw_etype"),
        "creation_date": DateHistogramFacet(field="creation_date", interval="month"),
57
    }
58

Simon Chabot's avatar
Simon Chabot committed
59
60
61
62
63
64
65
66
67
68
69
    def __init__(
        self,
        query=None,
        filters={},
        sort=(),
        doc_types=None,
        index=None,
        form=None,
        track_total_hits=True,
        **kwargs
    ):
70
71
        if index:
            self.index = index
72
73
        if doc_types:
            self.doc_types = doc_types
74
75
76
77
        if form:
            self.form = form
        else:
            self.form = {}
78
79
80
81

        if type(sort) not in [list, tuple]:
            sort = (sort,)

82
83
        # Count all the hits by default
        self.track_total_hits = track_total_hits
84
        self.extra_kwargs = kwargs
85
        super(CWFacetedSearch, self).__init__(query, filters, sort=sort)
86

87
88
89
90
91
    def search(self):
        # override methods to add custom pieces
        s = super(CWFacetedSearch, self).search()
        return s.extra(track_total_hits=self.track_total_hits)

92
93
    def query(self, search, query):
        if query:
Simon Chabot's avatar
Simon Chabot committed
94
95
96
97
98
99
100
101
102
103
104
            common = "debug-es-disable-common" not in self.form  # default True
            phrase = "debug-es-disable-phrase" not in self.form  # default True
            fuzzy = "fuzzy" in self.form  # default False
            return compose_search(
                search,
                query=query,
                fields=self.fields,
                fuzzy=fuzzy,
                common=common,
                phrase=phrase,
            )
105
106
        return search

107
108
109
110
    def highlight(self, search):
        """
        Add custom highlighting
        """
Simon Chabot's avatar
Simon Chabot committed
111
112
113
114
        search = search.highlight(
            *(f if "^" not in f else f.split("^", 1)[0] for f in self.fields)
        )
        return search.highlight_options(fragment_size=150, encoder="html")
115

116
117
118

class ElasticSearchView(StartupView):
    __regid__ = "esearch"
Simon Chabot's avatar
Simon Chabot committed
119
120
    previous_link = u" &lt; "
    next_link = u" &gt; "
121
    middle_link = u" &middot; " * 3
Simon Chabot's avatar
Simon Chabot committed
122
    title = _("Search")
123
    default_items_per_page = 10
124

125
    def render_search_comp(self):
Simon Chabot's avatar
Simon Chabot committed
126
127
128
        search_comp = self._cw.vreg["components"].select_or_none(
            "search-comp", self._cw
        )
129
130
131
        if search_comp:
            search_comp.render(w=self.w)

132
133
    def do_search(self, query_string):
        get_connection(self._cw.vreg.config)
Arthur Lutz's avatar
Arthur Lutz committed
134
        facet_selections = {}
135
136
137
        items_per_page = int(
            self._cw.form.get("items_per_page", self.default_items_per_page)
        )
138
        start, stop = 0, items_per_page
Arthur Lutz's avatar
Arthur Lutz committed
139
        for key, value in self._cw.form.items():
Simon Chabot's avatar
Simon Chabot committed
140
            if key.startswith("es_"):
141
142
143
144
145
146
                if isinstance(value, list):
                    for index in range(len(value)):
                        value[index] = normalize_value(value[index])
                else:
                    value = normalize_value(value)

Simon Chabot's avatar
Simon Chabot committed
147
148
                facet_selections[key.replace("es_", "")] = value
            if key == "page":
149
                try:
150
151
                    start = (max(int(value) - 1, 0)) * items_per_page
                    stop = start + items_per_page
152
153
                except ValueError:
                    pass
Simon Chabot's avatar
Simon Chabot committed
154
155
        search = self.customize_search(query_string, facet_selections, start, stop)
        if "debug-es" in self._cw.form:
156
            import json
Simon Chabot's avatar
Simon Chabot committed
157
158

            self.w(u"<pre>")
Katia Saurfelt's avatar
Katia Saurfelt committed
159
            self.w(str(json.dumps(search._s.to_dict(), indent=2)))
Simon Chabot's avatar
Simon Chabot committed
160
161
            self.w(u"</pre>")
            self.w(u"<br/>")
162
        try:
163
            response = search.execute()
Simon Chabot's avatar
Simon Chabot committed
164
            if "debug-es" in self._cw.form:
165
                import json
Simon Chabot's avatar
Simon Chabot committed
166
167

                self.w(u"<pre>")
Katia Saurfelt's avatar
Katia Saurfelt committed
168
                self.w(str(json.dumps(response.to_dict(), indent=2)))
Simon Chabot's avatar
Simon Chabot committed
169
170
                self.w(u"</pre>")
                self.w(u"<br/>")
171
            return response
172
        except NotFoundError:
Simon Chabot's avatar
Simon Chabot committed
173
            self.w(u"index not found in elasticsearch")
174
            return
175
176
177
178

    def call(self, **kwargs):
        # TODO if no ES configuration, redirect or display warning
        self.render_search_comp()
Simon Chabot's avatar
Simon Chabot committed
179
180
        query_string = self._cw.form.get("q", self._cw.form.get("search", ""))
        self.w(u"<h1>%s</h1>" % self._cw._(self.title))
181
        response = self.do_search(query_string)
182
        if response.hits.total.value:
Simon Chabot's avatar
Simon Chabot committed
183
184
185
            self.w(u"<h2>Resultats pour : <em>%s</em></h2>" % xml_escape(query_string))
            self.w(u"Resultats: %s" % response.hits.total.value)
            if hasattr(response, "facets"):
186
                self.display_facets(response)
187
188
        self.display_results(response)

Simon Chabot's avatar
Simon Chabot committed
189
190
191
192
    def customize_search(
        self, query_string, facet_selections, start=0, stop=10, **kwargs
    ):
        """
193
194
195
196
197
198
199
200
        This is where one can customize the search by modifying the
        query string and facet selection in an inherited class.

        For example :
        * add specific keywords sur as id:text and
          add them the facet_selection
        * use your own CWFacetedSearch class to modify fields
          and facets
Simon Chabot's avatar
Simon Chabot committed
201
202
203
204
205
206
207
208
209
        """
        return CWFacetedSearch(
            query_string,
            facet_selections,
            index=self._cw.vreg.config.get("index-name"),
            doc_types=["_doc"],
            form=self._cw.form,
            **kwargs
        )[start:stop]
210

211
    def display_results(self, response):
Simon Chabot's avatar
Simon Chabot committed
212
        """
213
        Display results obtained from elasticsearch
Simon Chabot's avatar
Simon Chabot committed
214
        """
215
        self.w(u'<div id="main-center" class="col-xs-10" role="main">')
216
        self.pagination(response)
Simon Chabot's avatar
Simon Chabot committed
217
        self.w(u"<ul>")
218
        for result in response:
Simon Chabot's avatar
Simon Chabot committed
219
            self.w(u"<li>")
220
            infos = result.to_dict()
Simon Chabot's avatar
Simon Chabot committed
221
222
223
224
225
226
227
            infos["_score"] = result.meta.score
            infos["keys"] = result.to_dict().keys()
            infos["url"] = (
                infos["cwuri"].startswith("_auto_generated")
                and infos["eid"]
                or infos["cwuri"]
            )
228
            self.customize_infos(infos)
229
            try:
Simon Chabot's avatar
Simon Chabot committed
230
231
232
                self.w(u'<a href="%(url)s">%(title)s</a> (%(_score).2f)<br/>' % (infos))
                if self._cw.form.get("debug-es"):
                    self.w(u" [%(keys)s] <br/>" % infos)
233
            except KeyError:
Simon Chabot's avatar
Simon Chabot committed
234
                self.w(u"Missing key in : %s" % infos.keys())
235
236
            try:
                for fragment in result.meta.highlight.content:
Simon Chabot's avatar
Simon Chabot committed
237
238
                    self.w(u"... %s" % BeautifulSoup(fragment, "lxml").get_text())
                    self.w(u" ... <br/>")
239
240
            except AttributeError:
                pass
Simon Chabot's avatar
Simon Chabot committed
241
242
            self.w(u"</li>")
        self.w(u"</ul>")
243
        self.pagination(response)
Simon Chabot's avatar
Simon Chabot committed
244
        self.w(u"</div>")
245

246
    def customize_infos(self, infos):
Simon Chabot's avatar
Simon Chabot committed
247
        """
248
249
250
        This is where one can customize the infos being displayed

        For example : set the title according to your rules and data set
Simon Chabot's avatar
Simon Chabot committed
251
        """
252
253
        pass

254
    def pagination(self, response):
Simon Chabot's avatar
Simon Chabot committed
255
        """
256
        Pagination HTML generation
Simon Chabot's avatar
Simon Chabot committed
257
        """
258
        if response.hits.total.value <= 10:
259
260
261
            return
        url_params = self._cw.form.copy()
        with t.ul(self.w, klass="pagination") as ul:
Simon Chabot's avatar
Simon Chabot committed
262
263
            current_page = int(url_params.get("page", 1))
            url_params["page"] = current_page - 1
264
            if current_page - 1 >= 1:
Simon Chabot's avatar
Simon Chabot committed
265
266
267
268
269
270
271
272
                ul(
                    t.li(
                        t.a(
                            self.previous_link,
                            href=xml_escape(self._cw.build_url(**url_params)),
                        )
                    )
                )
273
            else:
274
                ul(t.li(t.a(self.previous_link)))
275
276
277
            total_pages = min(
                (response.hits.total.value // self.default_items_per_page) + 2, 1000
            )
278
279
280
            page_padding = 3

            if current_page > page_padding:
Simon Chabot's avatar
Simon Chabot committed
281
282
283
                for page in range(
                    1, min(page_padding + 1, current_page - page_padding)
                ):
284
285
                    self.page_number(url_params, page, current_page, ul)
                if current_page > (page_padding * 2) + 1:
286
                    ul(t.li(t.a(self.middle_link)))
Simon Chabot's avatar
Simon Chabot committed
287
288
289
290
            for page in range(
                max(1, current_page - page_padding),
                min(current_page + page_padding, total_pages),
            ):
291
292
293
                self.page_number(url_params, page, current_page, ul)
            if current_page < total_pages - page_padding:
                if current_page < total_pages - page_padding * 2:
294
                    ul(t.li(t.a(self.middle_link)))
Simon Chabot's avatar
Simon Chabot committed
295
296
297
298
                for page in range(
                    max(current_page + page_padding, total_pages - page_padding),
                    total_pages,
                ):
299
300
                    self.page_number(url_params, page, current_page, ul)

Simon Chabot's avatar
Simon Chabot committed
301
            url_params["page"] = current_page + 1
302
            if current_page + 1 >= (total_pages):
303
                ul(t.li(t.a(self.next_link)))
304
            else:
Simon Chabot's avatar
Simon Chabot committed
305
306
307
308
309
310
311
312
                ul(
                    t.li(
                        t.a(
                            self.next_link,
                            href=xml_escape(self._cw.build_url(**url_params)),
                        )
                    )
                )
313

314
    def page_number(self, url_params, page, current_page, ul):
Simon Chabot's avatar
Simon Chabot committed
315
        """
Arthur Lutz's avatar
Arthur Lutz committed
316
        Generate HTML for page number (bold if page is current_page)
Simon Chabot's avatar
Simon Chabot committed
317
318
        """
        url_params["page"] = page
319
320
        url = self._cw.build_url(**url_params)
        if page == current_page:
Arthur Lutz's avatar
Arthur Lutz committed
321
322
323
324
325
326
            ul(
                t.li(
                    t.a(t.b(page), href=xml_escape(url)),
                    Class="active",
                )
            )
327
        else:
Simon Chabot's avatar
Simon Chabot committed
328
            ul(t.li(t.a(page, href=xml_escape(url))))
329
330
        return url

331
332
    @property
    def facets_to_display(self):
Simon Chabot's avatar
Simon Chabot committed
333
        """
334
        Method to list facets to display (can be customized)
Simon Chabot's avatar
Simon Chabot committed
335
336
        """
        return ("cw_etype",)
Arthur Lutz's avatar
flake8    
Arthur Lutz committed
337

338
    def display_facets(self, response):
Simon Chabot's avatar
Simon Chabot committed
339
        """
Arthur Lutz's avatar
Arthur Lutz committed
340
        Generate HTML for facets
Simon Chabot's avatar
Simon Chabot committed
341
342
343
        """
        self.w(
            u"""<aside id="aside-main-left" class="col-xs-2 cwjs-aside">
344
345
                   <div class="panel panel-default contextFreeBox facet_filterbox">
                      <div class="panel-heading">
Arthur Lutz's avatar
Arthur Lutz committed
346
                         <div class="panel-title">Facettes</div>
347
                      </div>
Simon Chabot's avatar
Simon Chabot committed
348
349
        """
        )
350
        for attribute in self.facets_to_display:
Arthur Lutz's avatar
Arthur Lutz committed
351
            url_params = self._cw.form.copy()
Simon Chabot's avatar
Simon Chabot committed
352
353
            if "page" in url_params:
                del url_params["page"]
354
355
356
357
            self.w(u'<div class="facetBody vocabularyFacet">')
            self.w(u'<div class="facetTitle">{}</div>'.format(attribute))
            for (tag, count, selected) in response.facets[attribute]:
                # facetValueSelected / facetValueDisabled in class
Simon Chabot's avatar
Simon Chabot committed
358
359
360
361
362
363
364
365
366
                facet_item = (
                    u'<div class="facetValue facetCheckBox">'
                    "    <span>"
                    '      <a href="{}">{} {}</a>'
                    "    </span>"
                    "</div>"
                )
                if url_params.get("es_{}".format(attribute)) != tag:
                    url_params["es_{}".format(attribute)] = str(tag)
Arthur Lutz's avatar
Arthur Lutz committed
367
                else:
Simon Chabot's avatar
Simon Chabot committed
368
                    del url_params["es_{}".format(attribute)]
Arthur Lutz's avatar
Arthur Lutz committed
369
                url = self._cw.build_url(**url_params)
Simon Chabot's avatar
Simon Chabot committed
370
371
372
373
374
375
                content = (
                    selected and '<div class="facet-active">{}</div>'.format(tag) or tag
                )
                self.w(facet_item.format(url, content, count))
            self.w(u"</div>")
        self.w(u"</div></aside>")