Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# -*- coding: utf-8 -*-
# copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""cubicweb-elasticsearch entity's classes"""
import collections
from cubicweb import view, neg_role
from cubicweb.predicates import is_instance
from cubicweb.appobject import AppObject
from cubicweb_elasticsearch import es
def deep_update(d1, d2):
for key, value in d2.iteritems():
if isinstance(value, collections.Mapping):
d1[key] = deep_update(d1.get(key, {}), value)
else:
d1[key] = d2[key]
return d1
class EsRegistry(AppObject):
__registry__ = 'es'
class Indexer(EsRegistry):
__regid__ = 'indexer'
settings = {
'settings': {
'analysis': {
'analyzer': {
'default': {'filter': ['standard',
'my_ascii_folding',
'lowercase',
'french_snowball'],
'tokenizer': 'standard'}
},
'filter': {'my_ascii_folding': {'preserve_original': True,
'type': 'asciifolding'},
'french_snowball': {'type': 'snowball',
'language': 'French'}}
},
}
}
@property
def index_name(self):
return self._cw.vreg.config['index-name']
def get_connection(self):
self.create_index()
return es.get_connection(self._cw.vreg.config)
def create_index(self, index_name=None, custom_settings=None):
index_name = index_name or self.index_name
if custom_settings is None:
settings = self.settings
else:
settings = {}
deep_update(settings, self.settings)
deep_update(settings, custom_settings)
es_cnx = es.get_connection(self._cw.vreg.config)
if es_cnx is not None:
es.create_index(es_cnx, index_name, settings)
class IFullTextIndexSerializable(view.EntityAdapter):
"""Adapter to serialize an entity to a bare python structure that may be
directly serialized to e.g. JSON.
"""
__regid__ = 'IFullTextIndexSerializable'
__select__ = is_instance('Any')
def serialize(self, complete=False):
entity = self.entity
if complete:
entity.complete()
data = {
'cw_etype': entity.cw_etype,
'eid': entity.eid,
'cwuri': entity.cwuri,
}
data.update(entity.cw_attr_cache)
# TODO take a look at what's in entity.cw_relation_cache
return data
class File(IFullTextIndexSerializable):
__select__ = IFullTextIndexSerializable.__select__ & is_instance('File')
def serialize(self, complete=False):
"""this could be a generic implementation of fulltext_containers indexation, but for
now we can not return more than one parent json which is fine
for Files
"""
for rschema, role in self._cw.vreg.schema['File'].fulltext_containers():
for parent in self.entity.related(
rschema.type, role=neg_role(role)).entities():
return parent.cw_adapt_to(
'IFullTextIndexSerializable').serialize(complete)
return {}