Commit 6fb3267d authored by Sylvain Thénault's avatar Sylvain Thénault
Browse files

Move seda schemes initialization to a dataimport module

with some code cleanups and refactor it that we may ensure data files validity
from tests.
parent d975d2435d7d
# coding: utf-8
# copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""cubicweb-seda data import tools"""
from __future__ import print_function
from itertools import count
from os.path import join, dirname
from six import text_type
from cubicweb.dataimport.stores import NoHookRQLObjectStore
from cubicweb.dataimport.importer import SimpleImportLog
from cubes.skos import lcsv, sobjects as skos
LCSV_FILES = (
# schemes extracted from SEDA 2 XSD
(u'SEDA 2 : Actions',
'seda_final_action', 'SEDAStorageRule',
'final_action_storage_code_type.csv'),
(u'SEDA 2 : Unités de mesure',
'seda_unit', ('SEDAWidth', 'SEDAHeight', 'SEDADepth',
'SEDADiameter', 'SEDALength', 'SEDAThickness'),
'measurement_units_type.csv'),
(u'SEDA 2 : Unités de poids',
'seda_unit', 'SEDAWeight',
'measurement_weight_units_type.csv'),
(u'SEDA 2 : Types de mot-clé',
'seda_keyword_type_to', (),
'code_keyword_type.csv'),
(u'SEDA : Niveaux de description',
'seda_description_level', (),
'level_type.csv'),
# schemes extracted from SEDA 2 XSD, completed to support earlier SEDA versions
(u'SEDA : Sort final',
'seda_final_action', 'SEDAAppraisalRule',
'final_action_appraisal_code_type.csv'),
# schemes extracted from earlier SEDA versions
(u"SEDA : Durée d'utilité administrative",
'seda_rule', 'SEDASeqAppraisalRuleRule',
'dua.csv'),
(u"SEDA : Codes de restriction d'accès",
'seda_rule', 'SEDASeqAccessRuleRule',
'access_control.csv'),
# other schemes
(u'Types MIME',
'seda_mime_type_to', (),
'mime_types.csv'),
(u"Types d'évènement",
'seda_event_type_to', (),
'event_types.csv'),
(u'Encodages (extraits du schéma UN/CEFACT)',
'seda_encoding_to', (),
'encodings.csv'),
(u'Formats de fichier (PRONOM)',
'seda_format_id_to', (),
'file_formats.csv'),
(u'Niveau de classification (IGI 1300)',
'seda_classification_level', (),
'classification_levels.csv'),
(u'Langues (ISO-639-3)',
('seda_language_to', 'seda_description_language_to'), (),
'languages.csv'),
)
def lcsv_import(cnx, store, fname, scheme_uri):
"""Actually import LCSV data file."""
with open(join(dirname(__file__), 'migration', 'data', fname)) as stream:
extentities = skos.lcsv_extentities(stream, scheme_uri, ';', 'utf-8')
import_log = SimpleImportLog(fname)
skos.store_skos_extentities(cnx, store, extentities, import_log,
raise_on_error=True, extid_as_cwuri=False)
def lcsv_check(cnx, store, fname, scheme_uri):
"""Simply check data file consistency."""
counter = count()
def uri_generator(val):
return text_type(next(counter)) + val
with open(join(dirname(__file__), 'migration', 'data', fname)) as stream:
lcsv2rdf = lcsv.LCSV2RDF(stream, ';', 'utf-8',
# XXX drop once skos is released
uri_generator=uri_generator, uri_cls=text_type)
list(lcsv2rdf.triples())
def import_seda_schemes(cnx, lcsv_import=lcsv_import):
"""Import all LCSV data files defined in LCSV_FILES"""
store = NoHookRQLObjectStore(cnx)
for title, rtypes, etypes, fname in LCSV_FILES:
if not cnx.find('ConceptScheme', title=title):
print('importing', title.encode('utf-8'))
description = u'edition 2009' if title.startswith('SEDA :') else None
scheme = cnx.create_entity('ConceptScheme', title=title,
description=description)
lcsv_import(cnx, store, fname, scheme.cwuri)
if not isinstance(rtypes, tuple):
rtypes = (rtypes,)
for rtype in rtypes:
rtype_e = cnx.find('CWRType', name=rtype).one()
scheme.cw_set(scheme_relation_type=rtype_e)
if not isinstance(etypes, tuple):
etypes = (etypes,)
for etype in etypes:
etype_e = cnx.find('CWEType', name=etype).one()
scheme.cw_set(scheme_entity_type=etype_e)
store.flush()
store.commit()
store.finish()
......@@ -18,108 +18,8 @@
from __future__ import print_function
from os.path import join, dirname
from cubicweb.dataimport.stores import NoHookRQLObjectStore
from cubicweb.dataimport.importer import SimpleImportLog
from cubes.skos.sobjects import lcsv_extentities, store_skos_extentities
def lcsv_import(cnx, store, fname, scheme_uri):
import_log = SimpleImportLog(fname)
stream = open(join(dirname(__file__), 'data', fname))
entities = lcsv_extentities(stream, scheme_uri, ';', 'utf-8')
store_skos_extentities(cnx, store, entities, import_log,
raise_on_error=True, extid_as_cwuri=False)
def import_seda_schemes(cnx):
store = NoHookRQLObjectStore(cnx)
for title, rtypes, etypes, fname in (
# schemes extracted from SEDA 2 XSD
(u'SEDA 2 : Actions',
'seda_final_action',
'SEDAStorageRule',
'final_action_storage_code_type.csv'),
(u'SEDA 2 : Unités de mesure',
'seda_unit',
('SEDAWidth', 'SEDAHeight', 'SEDADepth',
'SEDADiameter', 'SEDALength', 'SEDAThickness'),
'measurement_units_type.csv'),
(u'SEDA 2 : Unités de poids',
'seda_unit',
'SEDAWeight',
'measurement_weight_units_type.csv'),
(u'SEDA 2 : Types de mot-clé',
'seda_keyword_type_to',
(),
'code_keyword_type.csv'),
(u'SEDA : Niveaux de description',
'seda_description_level',
(),
'level_type.csv'),
# schemes extracted from SEDA 2 XSD, completed to support earlier SEDA versions
(u'SEDA : Sort final',
'seda_final_action',
'SEDAAppraisalRule',
'final_action_appraisal_code_type.csv'),
# schemes extracted from earlier SEDA versions
(u"SEDA : Durée d'utilité administrative",
'seda_rule',
'SEDASeqAppraisalRuleRule',
'dua.csv'),
(u"SEDA : Codes de restriction d'accès",
'seda_rule',
'SEDASeqAccessRuleRule',
'access_control.csv'),
# other schemes
(u'Types MIME',
'seda_mime_type_to',
(),
'mime_types.csv'),
(u"Types d'évènement",
'seda_event_type_to',
(),
'event_types.csv'),
(u'Encodages (extraits du schéma UN/CEFACT)',
'seda_encoding_to',
(),
'encodings.csv'),
(u'Formats de fichier (PRONOM)',
'seda_format_id_to',
(),
'file_formats.csv'),
(u'Niveau de classification (IGI 1300)',
'seda_classification_level',
(),
'classification_levels.csv'),
(u'Langues (ISO-639-3)',
('seda_language_to', 'seda_description_language_to'),
(),
'languages.csv'),
):
if not cnx.find('ConceptScheme', title=title):
print('importing', title.encode('utf-8'))
description = u'edition 2009' if title.startswith('SEDA') else None
scheme = cnx.create_entity('ConceptScheme', title=title,
description=description)
lcsv_import(cnx, store, fname, scheme.cwuri)
if not isinstance(rtypes, tuple):
rtypes = (rtypes,)
for rtype in rtypes:
rtype_e = cnx.find('CWRType', name=rtype).one()
scheme.cw_set(scheme_relation_type=rtype_e)
if not isinstance(etypes, tuple):
etypes = (etypes,)
for etype in etypes:
etype_e = cnx.find('CWEType', name=etype).one()
scheme.cw_set(scheme_entity_type=etype_e)
store.flush()
store.commit()
store.finish()
if config.mode != 'test':
from cubes.seda.dataimport import import_seda_schemes
print('-> creating SEDA concept schemes')
import_seda_schemes(cnx)
# copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
"""cubicweb-seda unit tests for dataimport"""
from cubicweb.devtools.testlib import CubicWebTC
from cubes.seda import dataimport
class ConcepSchemeImportTC(CubicWebTC):
def test_import_seda_schemes(self):
with self.admin_access.client_cnx() as cnx:
dataimport.import_seda_schemes(cnx, lcsv_import=dataimport.lcsv_check)
self.assertEqual(len(cnx.find('ConceptScheme')), 14)
if __name__ == '__main__':
import unittest
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment