Commit 99d2aa82 authored by Denis Laxalde's avatar Denis Laxalde
Browse files

Handle LCSV format in "skos-import" command

We add a --format option to "skos-import" ccplugin command with values
being "rdf" or "lcsv".

The lcsv format import behaves a bit differently than RDF import:

* only on file is accepted,
* a concept scheme is created beforehand to hold concepts to be imported
  from the LCSV file,
* this scheme is eventually dropped in case of import failure or
  interruption
* since lcsv_extentities() accepts a file object, we have to handle file
  opening and closing ourselves, hence the extentities_generator()
  wrapper function.

Closes extranet #37463080.
parent c39e8b4e1da6
......@@ -18,6 +18,7 @@
from __future__ import print_function
import logging
from os import path
from cubicweb.toolsutils import Command, underline_title
from cubicweb.cwctl import CWCTL
......@@ -25,7 +26,7 @@ from cubicweb.utils import admincnx
from cubicweb.dataimport.importer import SimpleImportLog
from cubes.skos import rdfio
from cubes.skos.sobjects import graph_extentities, import_skos_extentities
from cubes.skos.sobjects import graph_extentities, import_skos_extentities, lcsv_extentities
def _massive_store_factory(cnx, **kwargs):
......@@ -67,6 +68,12 @@ class ImportSkosData(Command):
name = 'skos-import'
min_args = 2
options = (
('format',
{'type': 'choice',
'choices': ('rdf', 'lcsv'),
'default': 'rdf',
'help': 'format of input data (rdf or lcsv)'
}),
('cw-store',
{'short': 's',
'type': 'choice', 'choices': ('rql', 'nohook', 'massive'), 'default': 'rql',
......@@ -76,7 +83,7 @@ class ImportSkosData(Command):
('rdf-store',
{'short': 'r',
'type': 'choice', 'choices': ('librdf', 'rdflib'), 'default': 'rdflib',
'help': 'RDF store type: librdf or rdflib.'
'help': 'RDF store type: librdf or rdflib (only with --format rdf).'
}),
)
......@@ -94,19 +101,54 @@ class ImportSkosData(Command):
def run(self, args):
print(u'\n%s' % underline_title('Importing Skos dataset'))
appid = args[0]
graph = self.rdf_store_factories[self.get('rdf-store')]()
for filepath in args[1:]:
print(u'loading {} into RDF graph'.format(filepath))
graph.load(filepath)
connection = admincnx(appid)
drop_scheme = None
if self.get('format') == 'lcsv':
try:
fpath, = args[1:]
except TypeError:
raise Exception('LCSV format expects exactly one input file')
with connection as cnx:
title = path.basename(fpath).decode('utf-8')
scheme = cnx.create_entity('ConceptScheme', title=title)
cnx.commit()
scheme_uri = scheme.cwuri
def drop_scheme(cnx):
"""Drop ConceptScheme created prior to LCSV import."""
rset = cnx.find('ConceptScheme', title=title)
if rset:
rset.one().cw_delete()
def extentities_generator():
"""ExtEntity generator function holding control on `fpath` file."""
with open(fpath) as stream:
for extentity in lcsv_extentities(stream, scheme_uri,
language_code=u'fr'):
yield extentity
extentities = extentities_generator()
else:
graph = self.rdf_store_factories[self.get('rdf-store')]()
for filepath in args[1:]:
print(u'loading {} into RDF graph'.format(filepath))
graph.load(filepath)
extentities = graph_extentities(graph)
import_log = LoggingImportLog()
with admincnx(appid) as cnx:
with connection as cnx:
store = self.cw_store_factories[self.get('cw-store')](cnx)
try:
(created, updated), conceptschemes = import_skos_extentities(
cnx, graph_extentities(graph), import_log, store=store)
cnx, extentities, import_log, store=store)
cnx.commit()
except BaseException as exc:
cnx.rollback()
if drop_scheme is not None:
drop_scheme(cnx)
cnx.commit()
print(u"Aborting due to: '%s'" % exc)
else:
print(u'Created: %d\nUpdated: %d' % (len(created), len(updated)))
......
......@@ -86,6 +86,27 @@ class ImportSkosDataCommandTC(testlib.CubicWebTC):
'-s', 'massive')
self._test_base()
def test_lcsv(self):
self.run_import_skos(self.datapath('lcsv_example_shortened.csv'),
'--format', 'lcsv')
with self.admin_access.cnx() as cnx:
rset = cnx.find('ConceptScheme', title=u'lcsv_example_shortened.csv')
self.assertTrue(rset)
scheme = rset.one()
self.assertEqual(scheme.title, u'lcsv_example_shortened.csv')
self.assertEqual(len(scheme.reverse_in_scheme), 5)
def test_lcsv_rollback(self):
self.run_import_skos(
self.datapath('lcsv_example_missing_prolog.csv'),
'--format', 'lcsv',
stdout_check=u"Aborting due to: 'missing prolog column (#)'",
)
with self.admin_access.cnx() as cnx:
rset = cnx.find('ConceptScheme',
title=u'lcsv_example_missing_prolog.csv')
self.assertFalse(rset)
if __name__ == '__main__':
from unittest import main
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment