diff --git a/ccplugin.py b/ccplugin.py index cc6df274a4ca975f55875e6618dd723fba8a002f_Y2NwbHVnaW4ucHk=..0f08b2192be6077634645669d688cd68481cd66f_Y2NwbHVnaW4ucHk= 100644 --- a/ccplugin.py +++ b/ccplugin.py @@ -16,7 +16,8 @@ from cubicweb.utils import admincnx from cubicweb.toolsutils import Command -from cubes.elasticsearch.es import indexable_types, INDEX_SETTINGS +from cubes.elasticsearch.es import indexable_types, INDEX_SETTINGS, fulltext_indexable_rql + __docformat__ = "restructuredtext en" @@ -27,7 +28,7 @@ # TODO progress bar def bulk_actions(rset, index_name, etype, dry_run=False): for entity in rset.entities(): - serializer = entity.cw_adapt_to('ISerializable') + serializer = entity.cw_adapt_to('IFullTextIndexSerializable') json = serializer.serialize() if not dry_run: yield {'_op_type': 'index', @@ -39,6 +40,7 @@ class IndexInES(Command): + """Index content in ElasticSearch. <instance id> @@ -53,7 +55,8 @@ ('bulk', {'type': 'yn', 'default': False, 'help': 'set to True if you want to insert in bulk in ES'}), ('debug', {'type': 'yn', 'default': False, - 'help': 'set to True if you want to print out debug info and progress'}), + 'help': 'set to True if you want to print' + 'out debug info and progress'}), ] def run(self, args): @@ -76,8 +79,7 @@ print(u'found indexable_types {}'.format( ','.join(indexable_types(schema)))) for etype in indexable_types(schema): - rset = cnx.execute( - 'Any X WHERE X is %(etype)s' % {'etype': etype}) + rset = cnx.execute(fulltext_indexable_rql(etype, schema)) if len(rset) == 0: continue if self.config.debug: @@ -92,8 +94,8 @@ pass else: for entity in rset.entities(): - # TODO add specific IFTIES adapter - serializer = entity.cw_adapt_to('ISerializable') + serializer = entity.cw_adapt_to( + 'IFullTextIndexSerializable') json = serializer.serialize() if not self.config.bulk: if not self.config.dry_run: @@ -105,4 +107,5 @@ if self.config.debug: print(u'no elasticsearch configuration found, skipping') + CWCTL.register(IndexInES) diff --git a/entities.py b/entities.py index cc6df274a4ca975f55875e6618dd723fba8a002f_ZW50aXRpZXMucHk=..0f08b2192be6077634645669d688cd68481cd66f_ZW50aXRpZXMucHk= 100644 --- a/entities.py +++ b/entities.py @@ -16,3 +16,37 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. """cubicweb-elasticsearch entity's classes""" +from cubicweb import view +from cubicweb.predicates import is_instance + + +class IFullTextIndexSerializable(view.EntityAdapter): + """Adapter to serialize an entity to a bare python structure that may be + directly serialized to e.g. JSON. + """ + + __regid__ = 'IFullTextIndexSerializable' + __select__ = is_instance('Any') + + def serialize(self, complete=False): + entity = self.entity + if complete: + entity.complete() + data = { + 'cw_etype': entity.cw_etype, + 'cw_source': entity.cw_metainformation()['source']['uri'], + 'eid': entity.eid, + } + for rschema in entity.e_schema.indexable_attributes(): + attr = rschema.type + try: + value = entity.cw_attr_cache[attr] + except KeyError: + # Bytes + continue + data[attr] = value + return data + + +def registration_callback(vreg): + vreg.register(IFullTextIndexSerializable) diff --git a/es.py b/es.py index cc6df274a4ca975f55875e6618dd723fba8a002f_ZXMucHk=..0f08b2192be6077634645669d688cd68481cd66f_ZXMucHk= 100644 --- a/es.py +++ b/es.py @@ -15,6 +15,8 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +from rql.utils import rqlvar_maker + INDEXABLE_TYPES = None @@ -53,3 +55,30 @@ indexable_types.append(eschema.type) INDEXABLE_TYPES = indexable_types return indexable_types + + +def fulltext_indexable_rql(etype, schema, eid=None): + ''' + Generate RQL with fulltext_indexable attributes for a given entity type + + :eid: + defaults to None, set it to an eid to get RQL for a single element (used in hooks) + ''' + varmaker = rqlvar_maker() + V = next(varmaker) + rql = ['WHERE %s is %s' % (V, etype)] + if eid: + rql.append('%s eid %i' % (V, eid)) + selected = [] + for rschema in schema.eschema(etype).indexable_attributes(): + attr = rschema.type + var = next(varmaker) + rql.append('%s %s %s' % (V, attr, var)) + selected.append((attr, var)) + for attr in ('creation_date', 'modification_date'): + var = next(varmaker) + rql.append('%s %s %s' % (V, attr, var)) + selected.append((attr, var)) + # TODO inlined relations ? + return 'Any %s,%s %s' % (V, ','.join(var for attr, var in selected), + ','.join(rql)) diff --git a/hooks.py b/hooks.py index cc6df274a4ca975f55875e6618dd723fba8a002f_aG9va3MucHk=..0f08b2192be6077634645669d688cd68481cd66f_aG9va3MucHk= 100644 --- a/hooks.py +++ b/hooks.py @@ -25,7 +25,7 @@ from cubicweb.server import hook from cubicweb.predicates import score_entity -from cubes.elasticsearch.es import indexable_types, INDEX_SETTINGS +from cubes.elasticsearch.es import indexable_types, INDEX_SETTINGS, fulltext_indexable_rql log = logging.getLogger(__name__) @@ -58,8 +58,10 @@ ignore=400) except (ConnectionError, ProtocolError): log.debug('Failed to index in hook, could not connect to ES') - # serializer = self.entity.cw_adapt_to('IFTISerializable') - serializer = self.entity.cw_adapt_to('ISerializable') + indexable_entity = self._cw.execute(fulltext_indexable_rql(self.entity.cw_etype, + self.entity._cw.vreg.schema, + eid=self.entity.eid)).one() + serializer = indexable_entity.cw_adapt_to('IFullTextIndexSerializable') json = serializer.serialize() try: # TODO option pour coté async ?