Skip to content
Snippets Groups Projects
Commit 0f08b2192be6 authored by Arthur Lutz's avatar Arthur Lutz
Browse files

[es] index only fulltextindex=True attributes

parent cc6df274a4ca
No related branches found
No related tags found
No related merge requests found
......@@ -16,7 +16,8 @@
from cubicweb.utils import admincnx
from cubicweb.toolsutils import Command
from cubes.elasticsearch.es import indexable_types, INDEX_SETTINGS
from cubes.elasticsearch.es import indexable_types, INDEX_SETTINGS, fulltext_indexable_rql
__docformat__ = "restructuredtext en"
......@@ -27,7 +28,7 @@
# TODO progress bar
def bulk_actions(rset, index_name, etype, dry_run=False):
for entity in rset.entities():
serializer = entity.cw_adapt_to('ISerializable')
serializer = entity.cw_adapt_to('IFullTextIndexSerializable')
json = serializer.serialize()
if not dry_run:
yield {'_op_type': 'index',
......@@ -39,6 +40,7 @@
class IndexInES(Command):
"""Index content in ElasticSearch.
<instance id>
......@@ -53,7 +55,8 @@
('bulk', {'type': 'yn', 'default': False,
'help': 'set to True if you want to insert in bulk in ES'}),
('debug', {'type': 'yn', 'default': False,
'help': 'set to True if you want to print out debug info and progress'}),
'help': 'set to True if you want to print'
'out debug info and progress'}),
]
def run(self, args):
......@@ -76,8 +79,7 @@
print(u'found indexable_types {}'.format(
','.join(indexable_types(schema))))
for etype in indexable_types(schema):
rset = cnx.execute(
'Any X WHERE X is %(etype)s' % {'etype': etype})
rset = cnx.execute(fulltext_indexable_rql(etype, schema))
if len(rset) == 0:
continue
if self.config.debug:
......@@ -92,8 +94,8 @@
pass
else:
for entity in rset.entities():
# TODO add specific IFTIES adapter
serializer = entity.cw_adapt_to('ISerializable')
serializer = entity.cw_adapt_to(
'IFullTextIndexSerializable')
json = serializer.serialize()
if not self.config.bulk:
if not self.config.dry_run:
......@@ -105,4 +107,5 @@
if self.config.debug:
print(u'no elasticsearch configuration found, skipping')
CWCTL.register(IndexInES)
......@@ -16,3 +16,37 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""cubicweb-elasticsearch entity's classes"""
from cubicweb import view
from cubicweb.predicates import is_instance
class IFullTextIndexSerializable(view.EntityAdapter):
"""Adapter to serialize an entity to a bare python structure that may be
directly serialized to e.g. JSON.
"""
__regid__ = 'IFullTextIndexSerializable'
__select__ = is_instance('Any')
def serialize(self, complete=False):
entity = self.entity
if complete:
entity.complete()
data = {
'cw_etype': entity.cw_etype,
'cw_source': entity.cw_metainformation()['source']['uri'],
'eid': entity.eid,
}
for rschema in entity.e_schema.indexable_attributes():
attr = rschema.type
try:
value = entity.cw_attr_cache[attr]
except KeyError:
# Bytes
continue
data[attr] = value
return data
def registration_callback(vreg):
vreg.register(IFullTextIndexSerializable)
......@@ -15,6 +15,8 @@
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from rql.utils import rqlvar_maker
INDEXABLE_TYPES = None
......@@ -53,3 +55,30 @@
indexable_types.append(eschema.type)
INDEXABLE_TYPES = indexable_types
return indexable_types
def fulltext_indexable_rql(etype, schema, eid=None):
'''
Generate RQL with fulltext_indexable attributes for a given entity type
:eid:
defaults to None, set it to an eid to get RQL for a single element (used in hooks)
'''
varmaker = rqlvar_maker()
V = next(varmaker)
rql = ['WHERE %s is %s' % (V, etype)]
if eid:
rql.append('%s eid %i' % (V, eid))
selected = []
for rschema in schema.eschema(etype).indexable_attributes():
attr = rschema.type
var = next(varmaker)
rql.append('%s %s %s' % (V, attr, var))
selected.append((attr, var))
for attr in ('creation_date', 'modification_date'):
var = next(varmaker)
rql.append('%s %s %s' % (V, attr, var))
selected.append((attr, var))
# TODO inlined relations ?
return 'Any %s,%s %s' % (V, ','.join(var for attr, var in selected),
','.join(rql))
......@@ -25,7 +25,7 @@
from cubicweb.server import hook
from cubicweb.predicates import score_entity
from cubes.elasticsearch.es import indexable_types, INDEX_SETTINGS
from cubes.elasticsearch.es import indexable_types, INDEX_SETTINGS, fulltext_indexable_rql
log = logging.getLogger(__name__)
......@@ -58,8 +58,10 @@
ignore=400)
except (ConnectionError, ProtocolError):
log.debug('Failed to index in hook, could not connect to ES')
# serializer = self.entity.cw_adapt_to('IFTISerializable')
serializer = self.entity.cw_adapt_to('ISerializable')
indexable_entity = self._cw.execute(fulltext_indexable_rql(self.entity.cw_etype,
self.entity._cw.vreg.schema,
eid=self.entity.eid)).one()
serializer = indexable_entity.cw_adapt_to('IFullTextIndexSerializable')
json = serializer.serialize()
try:
# TODO option pour coté async ?
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment