# HG changeset patch # User Arthur Lutz <arthur.lutz@logilab.fr> # Date 1463673544 -7200 # Thu May 19 17:59:04 2016 +0200 # Node ID a0e41f6bd19bd43783c641dadf85b42c84d9ab28 # Parent 4674c8a25531d94c845ecd43806cbf61188c27e3 [ccplugin/hooks] use eid as id to avoid duplicates diff --git a/ccplugin.py b/ccplugin.py --- a/ccplugin.py +++ b/ccplugin.py @@ -68,7 +68,10 @@ json = serializer.serialize() # TODO remove non indexable data or (better) serialize only if not self.config.dry_run: - es.index(index=index_name, doc_type=etype, body=json) + es.index(index=index_name, + id=entity.eid, + doc_type=etype, + body=json) # TODO optimize with elasticsearch.helpers.bulk # or elasticsearch.helpers.parallel_bulk # or elasticsearch.helpers.streaming_bulk diff --git a/hooks.py b/hooks.py --- a/hooks.py +++ b/hooks.py @@ -53,6 +53,7 @@ try: # TODO option pour coté async ? es.index(index=index_name, + id=self.entity.eid, doc_type=self.entity.cw_etype, body=json) except (ConnectionError, ProtocolError):