Commit 74ebe1cc authored by Denis Laxalde's avatar Denis Laxalde
Browse files

Rename post321_import module into dataimport

Also move ext_dump_relations and dump_relations function from sobjects
to avoid circular import and try to gather dataimport-related utilities
in a single module.

We still keep a dummy post321_import module as dowstream projects still
import it (noticeably cubicweb-seda).

Closes #17133718.
parent 8da083e5256c
# copyright 2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
"""SKOS dataimport utilities."""
from itertools import imap
from six import text_type
from cubicweb import schema
from cubicweb.dataimport.importer import (ExtEntity, ExtEntitiesImporter, RelationMapping,
cwuri2eid, use_extid_as_cwuri)
class SKOSRelationMapping(RelationMapping):
def __getitem__(self, rtype):
if rtype in ('exact_match', 'close_match'):
rql = 'Any S,O WHERE S {0} O'.format(rtype)
return set(tuple(x) for x in self.cnx.execute(rql))
return super(SKOSRelationMapping, self).__getitem__(rtype)
def ext_dump_relations(cnx, extid2eid, extentity):
"""Wrapper around :func:`dump_relations` but receiving extentity and extdid2eid mapping as
argument, to attempt to work around store's limitation on inlined relation.
"""
eid = extid2eid[extentity.extid]
etype = cnx.entity_metas(eid)['type']
relations = []
rschema = cnx.vreg.schema.rschema
for subj, rtype, obj in dump_relations(cnx, eid, etype):
if rschema(rtype).inlined:
if subj is None:
if obj is None:
obj = extentity.extid
else:
extid2eid[obj] = obj
extentity.values.setdefault(rtype, set()).add(obj)
else:
raise Exception("can't dump inlined object relation {0}".format(rtype))
else:
relations.append((subj, rtype, obj))
return relations
def dump_relations(cnx, eid, etype):
"""Return a list of relation 3-uples `(subject_eid, relation, object_eid)` with None instead of
`subject_eid` or `object_eid` depending on whether the entity type corresponding to `eid` is
subject or object.
"""
eschema = cnx.vreg.schema.eschema(etype)
relations = []
for rschema, _, role in eschema.relation_definitions():
if rschema.rule: # computed relation
continue
rtype = rschema.type
if rtype in schema.VIRTUAL_RTYPES or rtype in schema.META_RTYPES:
continue
if role == 'subject':
for object_eid, in cnx.execute('Any Y WHERE X %s Y, X eid %%(x)s' % rtype, {'x': eid}):
if object_eid == eid:
object_eid = None
relations.append((None, rtype, object_eid))
else:
for subject_eid, in cnx.execute('Any Y WHERE Y %s X, X eid %%(x)s' % rtype, {'x': eid}):
if subject_eid == eid:
subject_eid = None
relations.append((subject_eid, rtype, None))
return relations
def store_skos_extentities(cnx, store, entities, import_log,
source=None, raise_on_error=False,
extid2eid=None, extid_as_cwuri=True):
"""Add SKOS external entities to the store. Don't commit/flush any data.
Arguments:
* `cnx`, RQL connection to the CubicWeb instance
* `store`, store to use for the import
* `entities`, iterable (usualy a generator) on external entities to import
* `import_log`, import log instance to give to the store
* `source`, optional source in which existing entities will be looked for
(default to the system source)
* `raise_on_error`, boolean flag telling if we should fail on error or simply log it (default
to `False`)
* `extid_as_cwuri`, boolean flag telling if we should use the external entities'extid as `cwuri`
attribute of imported entities (default to `True`)
"""
# only consider the system source for schemes and labels
if source is None:
source_eid = cnx.repo.system_source.eid
else:
source_eid = source.eid
if extid2eid is None:
extid2eid = cwuri2eid(cnx, ('ConceptScheme', 'Label'), source_eid=source_eid)
# though concepts and external URIs may come from any source
extid2eid.update(cwuri2eid(cnx, ('Concept', 'ExternalUri')))
# plug function that turn previously known external uris by newly inserted concepts
restore_relations = {}
# pylint: disable=dangerous-default-value
def externaluri_to_concept(extentity, cnx=cnx, extid2eid=extid2eid,
restore_relations=restore_relations):
try:
eid = extid2eid[extentity.extid]
except KeyError:
pass
else:
if extentity.etype == 'Concept' and cnx.entity_metas(eid)['type'] == 'ExternalUri':
# We have replaced the external uri by the new concept. As entities.extid column is
# unique, we've to drop the external uri before inserting the concept, so we:
# 1. record every relations from/to the external uri,
# 2. remove it,
# 3. insert the concept and
# 4. reinsert relations using the concept instead
#
# 1. record relations from/to the external uri
restore_relations[extentity.extid] = ext_dump_relations(cnx, extid2eid, extentity)
# 2. remove the external uri entity
cnx.execute('DELETE ExternalUri X WHERE X eid %(x)s', {'x': eid})
# 3. drop its extid from the mapping to trigger insertion of the concept by the
# importer
del extid2eid[extentity.extid]
# 4. will be done in SKOSExtEntitiesImporter
return extentity
entities = imap(externaluri_to_concept, entities)
# plug function to detect the concept scheme
concept_schemes = []
def record_scheme(extentity):
if extentity.etype == 'ConceptScheme':
concept_schemes.append(extentity.extid)
return extentity
entities = imap(record_scheme, entities)
etypes_order_hint = ('ConceptScheme', 'Concept', 'Label')
existing_relations = SKOSRelationMapping(cnx, source)
importer = SKOSExtEntitiesImporter(schema=cnx.vreg.schema, store=store,
extid2eid=extid2eid, existing_relations=existing_relations,
restore_relations=restore_relations,
etypes_order_hint=etypes_order_hint, import_log=import_log,
raise_on_error=raise_on_error)
if extid_as_cwuri:
set_cwuri = use_extid_as_cwuri(importer.extid2eid)
entities = set_cwuri(entities)
importer.import_entities(entities)
stats = (importer.created, importer.updated)
return stats, concept_schemes
class SKOSExtEntitiesImporter(ExtEntitiesImporter):
"""Override ExtEntitiesImporter to handle creation of additionnal relations to newly created
concepts that replace a former external uri, and to create ExternalUri entities for URIs used in
exact_match / close_match relations which have no known entity in the repository yet.
"""
def __init__(self, *args, **kwargs):
self.restore_relations = kwargs.pop('restore_relations')
super(SKOSExtEntitiesImporter, self).__init__(*args, **kwargs)
def prepare_insert_entity(self, extentity):
eid = super(SKOSExtEntitiesImporter, self).prepare_insert_entity(extentity)
# (4.) restore relations formerly from/to an equivalent external uri
try:
relations = self.restore_relations.pop(extentity.extid)
except KeyError:
return eid
for subject_eid, rtype, object_eid in relations:
if subject_eid is None:
subject_eid = eid
if object_eid is None:
object_eid = eid
self.store.prepare_insert_relation(subject_eid, rtype, object_eid)
return eid
def prepare_insert_deferred_relations(self, deferred):
# create missing targets for exact_match and close_match relations
for rtype in ('exact_match', 'close_match'):
relations = deferred.get(rtype, ())
for _, object_uri in relations:
if object_uri not in self.extid2eid:
extentity = ExtEntity('ExternalUri', object_uri,
values=dict(uri=text_type(object_uri),
cwuri=text_type(object_uri)))
self.prepare_insert_entity(extentity)
return super(SKOSExtEntitiesImporter, self).prepare_insert_deferred_relations(deferred)
......@@ -23,7 +23,7 @@ from cubicweb import _
from cubicweb.server import hook
from cubicweb.predicates import is_instance
from cubes.skos.sobjects import dump_relations
from cubes.skos.dataimport import dump_relations
class CreateRelationsOp(hook.DataOperationMixIn, hook.Operation):
......
# copyright 2015-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
"""SKOS Import code"""
from warnings import warn
from itertools import imap
from .dataimport import * # noqa F401
from six import text_type
from cubicweb.dataimport.importer import (ExtEntity, ExtEntitiesImporter, RelationMapping,
cwuri2eid, use_extid_as_cwuri)
from cubes.skos.sobjects import ext_dump_relations
class SKOSRelationMapping(RelationMapping):
def __getitem__(self, rtype):
if rtype in ('exact_match', 'close_match'):
rql = 'Any S,O WHERE S {0} O'.format(rtype)
return set(tuple(x) for x in self.cnx.execute(rql))
return super(SKOSRelationMapping, self).__getitem__(rtype)
def store_skos_extentities(cnx, store, entities, import_log,
source=None, raise_on_error=False,
extid2eid=None, extid_as_cwuri=True):
"""Add SKOS external entities to the store. Don't commit/flush any data.
Arguments:
* `cnx`, RQL connection to the CubicWeb instance
* `store`, store to use for the import
* `entities`, iterable (usualy a generator) on external entities to import
* `import_log`, import log instance to give to the store
* `source`, optional source in which existing entities will be looked for
(default to the system source)
* `raise_on_error`, boolean flag telling if we should fail on error or simply log it (default
to `False`)
* `extid_as_cwuri`, boolean flag telling if we should use the external entities'extid as `cwuri`
attribute of imported entities (default to `True`)
"""
# only consider the system source for schemes and labels
if source is None:
source_eid = cnx.repo.system_source.eid
else:
source_eid = source.eid
if extid2eid is None:
extid2eid = cwuri2eid(cnx, ('ConceptScheme', 'Label'), source_eid=source_eid)
# though concepts and external URIs may come from any source
extid2eid.update(cwuri2eid(cnx, ('Concept', 'ExternalUri')))
# plug function that turn previously known external uris by newly inserted concepts
restore_relations = {}
# pylint: disable=dangerous-default-value
def externaluri_to_concept(extentity, cnx=cnx, extid2eid=extid2eid,
restore_relations=restore_relations):
try:
eid = extid2eid[extentity.extid]
except KeyError:
pass
else:
if extentity.etype == 'Concept' and cnx.entity_metas(eid)['type'] == 'ExternalUri':
# We have replaced the external uri by the new concept. As entities.extid column is
# unique, we've to drop the external uri before inserting the concept, so we:
# 1. record every relations from/to the external uri,
# 2. remove it,
# 3. insert the concept and
# 4. reinsert relations using the concept instead
#
# 1. record relations from/to the external uri
restore_relations[extentity.extid] = ext_dump_relations(cnx, extid2eid, extentity)
# 2. remove the external uri entity
cnx.execute('DELETE ExternalUri X WHERE X eid %(x)s', {'x': eid})
# 3. drop its extid from the mapping to trigger insertion of the concept by the
# importer
del extid2eid[extentity.extid]
# 4. will be done in SKOSExtEntitiesImporter
return extentity
entities = imap(externaluri_to_concept, entities)
# plug function to detect the concept scheme
concept_schemes = []
def record_scheme(extentity):
if extentity.etype == 'ConceptScheme':
concept_schemes.append(extentity.extid)
return extentity
entities = imap(record_scheme, entities)
etypes_order_hint = ('ConceptScheme', 'Concept', 'Label')
existing_relations = SKOSRelationMapping(cnx, source)
importer = SKOSExtEntitiesImporter(schema=cnx.vreg.schema, store=store,
extid2eid=extid2eid, existing_relations=existing_relations,
restore_relations=restore_relations,
etypes_order_hint=etypes_order_hint, import_log=import_log,
raise_on_error=raise_on_error)
if extid_as_cwuri:
set_cwuri = use_extid_as_cwuri(importer.extid2eid)
entities = set_cwuri(entities)
importer.import_entities(entities)
stats = (importer.created, importer.updated)
return stats, concept_schemes
class SKOSExtEntitiesImporter(ExtEntitiesImporter):
"""Override ExtEntitiesImporter to handle creation of additionnal relations to newly created
concepts that replace a former external uri, and to create ExternalUri entities for URIs used in
exact_match / close_match relations which have no known entity in the repository yet.
"""
def __init__(self, *args, **kwargs):
self.restore_relations = kwargs.pop('restore_relations')
super(SKOSExtEntitiesImporter, self).__init__(*args, **kwargs)
def prepare_insert_entity(self, extentity):
eid = super(SKOSExtEntitiesImporter, self).prepare_insert_entity(extentity)
# (4.) restore relations formerly from/to an equivalent external uri
try:
relations = self.restore_relations.pop(extentity.extid)
except KeyError:
return eid
for subject_eid, rtype, object_eid in relations:
if subject_eid is None:
subject_eid = eid
if object_eid is None:
object_eid = eid
self.store.prepare_insert_relation(subject_eid, rtype, object_eid)
return eid
def prepare_insert_deferred_relations(self, deferred):
# create missing targets for exact_match and close_match relations
for rtype in ('exact_match', 'close_match'):
relations = deferred.get(rtype, ())
for _, object_uri in relations:
if object_uri not in self.extid2eid:
extentity = ExtEntity('ExternalUri', object_uri,
values=dict(uri=text_type(object_uri),
cwuri=text_type(object_uri)))
self.prepare_insert_entity(extentity)
return super(SKOSExtEntitiesImporter, self).prepare_insert_deferred_relations(deferred)
warn(
'[1.3.0] "post321_import" module got renamed into "dataimport"',
DeprecationWarning,
)
......@@ -31,7 +31,7 @@ from itertools import imap
from logilab.common.deprecation import deprecated
from cubicweb import schema, dataimport
from cubicweb import dataimport
from cubicweb.dataimport.importer import (
ExtEntity,
HTMLImportLog,
......@@ -42,55 +42,7 @@ from cubicweb.server.sources import datafeed
from cubes.skos import (LABELS_RDF_MAPPING, register_skos_rdf_input_mapping,
rdfio, lcsv, to_unicode)
def ext_dump_relations(cnx, extid2eid, extentity):
"""Wrapper around :func:`dump_relations` but receiving extentity and extdid2eid mapping as
argument, to attempt to work around store's limitation on inlined relation.
"""
eid = extid2eid[extentity.extid]
etype = cnx.entity_metas(eid)['type']
relations = []
rschema = cnx.vreg.schema.rschema
for subj, rtype, obj in dump_relations(cnx, eid, etype):
if rschema(rtype).inlined:
if subj is None:
if obj is None:
obj = extentity.extid
else:
extid2eid[obj] = obj
extentity.values.setdefault(rtype, set()).add(obj)
else:
raise Exception("can't dump inlined object relation {0}".format(rtype))
else:
relations.append((subj, rtype, obj))
return relations
def dump_relations(cnx, eid, etype):
"""Return a list of relation 3-uples `(subject_eid, relation, object_eid)` with None instead of
`subject_eid` or `object_eid` depending on whether the entity type corresponding to `eid` is
subject or object.
"""
eschema = cnx.vreg.schema.eschema(etype)
relations = []
for rschema, _, role in eschema.relation_definitions():
if rschema.rule: # computed relation
continue
rtype = rschema.type
if rtype in schema.VIRTUAL_RTYPES or rtype in schema.META_RTYPES:
continue
if role == 'subject':
for object_eid, in cnx.execute('Any Y WHERE X %s Y, X eid %%(x)s' % rtype, {'x': eid}):
if object_eid == eid:
object_eid = None
relations.append((None, rtype, object_eid))
else:
for subject_eid, in cnx.execute('Any Y WHERE Y %s X, X eid %%(x)s' % rtype, {'x': eid}):
if subject_eid == eid:
subject_eid = None
relations.append((subject_eid, rtype, None))
return relations
from .dataimport import store_skos_extentities
class RDFSKOSImportService(Service):
......@@ -275,9 +227,6 @@ def import_skos_extentities(cnx, entities, import_log,
return res
from .post321_import import store_skos_extentities # noqa E402
@deprecated('[skos 0.9] use lcsv_extentities then import_skos_extentities instead')
def _skos_import_lcsv(cnx, stream, import_log, scheme_uri, delimiter,
encoding, language_code=None, **kwargs):
......
......@@ -20,6 +20,8 @@ from contextlib import contextmanager
from cubicweb.devtools import testlib
from cubes.skos.dataimport import dump_relations
@contextmanager
def silent_logging(logger=None):
......@@ -32,7 +34,6 @@ def silent_logging(logger=None):
class DumpRelationsTC(testlib.CubicWebTC):
def test(self):
from cubes.skos.sobjects import dump_relations
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'preexisting')
concept = scheme.add_concept(u'something',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment