Commit 3101ac4e authored by Denis Laxalde's avatar Denis Laxalde
Browse files

Rely on cubicweb-skos's extid2eid mapping during LCSV data import

We drop monkeypatches and hacks introduced in 49d118aa8254 in 2016 which
were supposed to be transient waiting for a cubicweb-skos release which
came a long time ago now.

Now we build the extid2eid dict before importing LCSV concept schemes
and pass it to store_skos_extentities() so that it will be re-used for
all files instead of re-built. Both lcsv_import and lcsv_check functions
are updated, as the latter is used in test_dataimport.py as a kind of
"mock" but must respect the former's interface. Also note that we now
have to update 'extid2eid' dict after creating a new concept scheme
(with init_seda_scheme()): while this sounds perfectly reasonable, I
don't completely understand how this works without this before...

We update requirement on cubicweb-skos so that the latest release is
used (though some previous release would probably work as well, but I'm
not sure which and it's not so easy to test).
parent aa9acf6aa53a
......@@ -22,7 +22,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot
BuildRequires: %{python} %{python}-setuptools
Requires: cubicweb >= 3.25.3
Requires: cubicweb-eac
Requires: cubicweb-skos >= 0.12.1
Requires: cubicweb-skos >= 1.3.0
Requires: cubicweb-compound >= 0.7
Requires: cubicweb-relationwidget >= 0.4l
Requires: cubicweb-squareui
......
......@@ -17,7 +17,7 @@ __depends__ = {
'cubicweb': '>= 3.25.4, < 3.27',
'six': '>= 1.4.0',
'cubicweb-eac': None,
'cubicweb-skos': '>= 0.12.1',
'cubicweb-skos': '>= 1.3.0',
'cubicweb-compound': '>= 0.7',
'cubicweb-relationwidget': '>= 0.4',
'cubicweb-squareui': None,
......
......@@ -25,7 +25,10 @@ from six import text_type
from cubicweb.server.checkintegrity import reindex_entities
from cubicweb.dataimport.stores import NoHookRQLObjectStore
from cubicweb.dataimport.importer import SimpleImportLog
from cubicweb.dataimport.importer import (
SimpleImportLog,
cwuri2eid,
)
from cubes.skos import lcsv, sobjects as skos
......@@ -105,16 +108,16 @@ LCSV_FILES = [(title, rtype, etype,
)]
def lcsv_import(cnx, store, fname, scheme_uri):
def lcsv_import(cnx, store, fname, scheme_uri, **kwargs):
"""Actually import LCSV data file."""
with open(fname) as stream:
extentities = skos.lcsv_extentities(stream, scheme_uri, ';', 'utf-8')
import_log = SimpleImportLog(fname)
skos.store_skos_extentities(cnx, store, extentities, import_log,
raise_on_error=True, extid_as_cwuri=False)
raise_on_error=True, extid_as_cwuri=False, **kwargs)
def lcsv_check(cnx, store, fname, scheme_uri, separator=';'):
def lcsv_check(cnx, store, fname, scheme_uri, separator=';', **kwargs):
"""Simply check data file consistency."""
counter = count()
......@@ -161,22 +164,16 @@ def get_store(cnx):
def import_seda_schemes(cnx, lcsv_import=lcsv_import, lcsv_files=LCSV_FILES):
"""Import all LCSV data files defined in LCSV_FILES."""
orig_cwuri2eid = post321_import.cwuri2eid
try:
_import_seda_schemes(cnx, lcsv_import, lcsv_files)
finally:
post321_import.cwuri2eid = orig_cwuri2eid
def _import_seda_schemes(cnx, lcsv_import=lcsv_import, lcsv_files=LCSV_FILES):
"""Import all LCSV data files defined in LCSV_FILES."""
feed_extid2eid_cache(cnx)
extid2eid = cwuri2eid(cnx, ('ConceptScheme', 'Label'))
# concepts and external URIs may come from any source
extid2eid.update(cwuri2eid(cnx, ('Concept', 'ExternalUri')))
store = get_store(cnx)
for title, rtypes, etypes, fname in lcsv_files:
if not cnx.find('ConceptScheme', title=title):
print('importing', title.encode('utf-8'))
scheme = init_seda_scheme(cnx, title)
lcsv_import(cnx, store, fname, scheme.cwuri)
extid2eid[scheme.cwuri] = scheme.eid
lcsv_import(cnx, store, fname, scheme.cwuri, extid2eid=extid2eid)
if not isinstance(rtypes, tuple):
rtypes = (rtypes,)
for rtype in rtypes:
......@@ -193,23 +190,3 @@ def _import_seda_schemes(cnx, lcsv_import=lcsv_import, lcsv_files=LCSV_FILES):
if not isinstance(store, NoHookRQLObjectStore):
# when using the massive store, we need explicit reindexation
reindex_entities(cnx.repo.schema, cnx, etypes=['Concept', 'ConceptScheme'])
# hack to avoid recomputing extid2eid mapping for each lcsv file, this is costly with massive store
# since index may have been removed
from logilab.common.decorators import monkeypatch # noqa
from cubicweb.dataimport.importer import cwuri2eid as orig_cwuri2eid # noqa
from cubes.skos import post321_import # noqa
EXTID2EID_CACHE = None
def feed_extid2eid_cache(cnx):
global EXTID2EID_CACHE
EXTID2EID_CACHE = orig_cwuri2eid(cnx, ('ConceptScheme', 'Label'))
# though concepts and external URIs may come from any source
EXTID2EID_CACHE.update(patched_cwuri2eid(cnx, ('Concept', 'ExternalUri')))
def patched_cwuri2eid(cnx, etypes, source_eid=None):
return EXTID2EID_CACHE
......@@ -14,7 +14,7 @@ Architecture: all
Depends:
python-cubicweb (>= 3.25.3),
cubicweb-eac,
cubicweb-skos (>= 0.12.1),
cubicweb-skos (>= 1.3.0),
cubicweb-compound (>= 0.7),
cubicweb-relationwidget (>= 0.4),
cubicweb-squareui,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment