Commit 32a99430 authored by Laura Médioni's avatar Laura Médioni
Browse files

[views] Allow the import of a "hierarchical csv" file

ex :

Concept

\\tSub-concept

Fellow-concept

Separators may be configured.
parent 9cdfdbb8af9f
......@@ -52,14 +52,49 @@ class ConceptScheme(AnyEntity):
"""Add a top-concept to this scheme"""
return _add_concept(self, label, language_code, **kwargs)
def add_concepts_from_file(self, source_file, encoding, language_code):
def add_concepts_from_file(self, source_file, encoding, language_code, separator='\t'):
"""Read source file (url or stream) and create the listed concepts inside the ConceptScheme
'separators' are considered as hierarchical information. There must be a concept per line,
each line starting by N separators (indicating the hierarchical level) or nothing if the
concept has no parent.
Example (sep = u';')
titi
;toto
;;tata
--> ok: titi is a top-concept of the scheme, toto is a narrower concept of titi, tata is a
narrower concept of toto
titi
;toto
--> 'titi' and ' ;toto' will be considered as two concepts of the scheme.
"""
# This ordered list behaves like a state machine. It will contain all the concepts from
# the conceptscheme to the more recent broader concept. The last element is consequently
# the current parent concept. When de-indenting, the concepts are popped.
broaderconcepts = [(-1, self)]
for line in source_file:
line = line.strip()
line = line.rstrip().decode(encoding)
if not line:
continue
self.add_concept(line.decode(encoding), language_code)
# count separators up to the first non-separator character. The line must be well-formed:
# the first character must be a separator or must be part of the concept label.
nb_sep = 0
while line[nb_sep] == separator:
nb_sep += 1
line = line[nb_sep:]
# can't have "concept\n tab tab subconcept"
if nb_sep - broaderconcepts[-1][0] > 1:
raise ValueError('Inconsistent concept indentation')
value = line.strip()
if nb_sep <= broaderconcepts[-1][0]:
# it is possible to de-indent various levels, but always keep the conceptscheme
# in the list
while nb_sep <= broaderconcepts[-1][0]:
broaderconcepts.pop()
concept = broaderconcepts[-1][1].add_concept(value, language_code)
broaderconcepts.append((nb_sep, concept))
class Concept(AnyEntity):
......
APPELLATION DES LOIS ET DES RAPPORTS
LOIS RAPPORTS JURISPRUDENCE
APPELLATION DE DECISIONS DE JURISPRUDENCE
ARRET BERKANI
ARRET TERNON
APPELLATION DES RAPPORTS
RAPPORT ARTHUIS
LOLF
LOV
LISTE DES MOTS OUTILS
CHAPITRE MOTS OUTILS
MOTS OUTILS
ABATTEMENT
ACCORD
......@@ -15,8 +15,29 @@
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
from StringIO import StringIO
from cubicweb.devtools.testlib import CubicWebTC
def get_narrower_concepts(concept):
"""Get a dictionnary matching the name of the concept to the dictionnary of its narrower
concepts (recursive)
"""
narrower_concepts = {}
for concept in concept.narrower_concept:
narrower_concepts[concept.dc_title()] = get_narrower_concepts(concept)
return narrower_concepts
def build_result_dict(scheme):
"""Create a hierarchy with the concept names in a dictionary"""
concepts = scheme.top_concepts
result = {}
for concept in concepts:
result[concept.dc_title()] = get_narrower_concepts(concept)
return result
class ConceptSchemeTC(CubicWebTC):
def test_top_concepts(self):
......@@ -32,6 +53,96 @@ class ConceptSchemeTC(CubicWebTC):
self.assertEqual(set(x.eid for x in scheme.top_concepts),
set((c1.eid,)))
def test_add_concepts_from_file_ok(self):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
csv_file = self.datapath('hierarchical_csv_example_shortened.csv')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
with open(csv_file) as sourcefile:
scheme.add_concepts_from_file(sourcefile, u'utf-8', u'fr', u'\t')
result = build_result_dict(scheme)
expected = {u'APPELLATION DES LOIS ET DES RAPPORTS':
{u'LOIS RAPPORTS JURISPRUDENCE':
{u'APPELLATION DE DECISIONS DE JURISPRUDENCE':
{u'ARRET BERKANI': {}, u'ARRET TERNON': {}},
u'APPELLATION DES RAPPORTS':
{u'RAPPORT ARTHUIS': {}, u'LOLF': {}, u'LOV': {}}
}
},
u'LISTE DES MOTS OUTILS':
{u'CHAPITRE MOTS OUTILS':
{u'MOTS OUTILS':
{u'ABATTEMENT': {}, u'ACCORD': {}}
}
}
}
self.assertEqual(result, expected)
def test_add_concepts_from_file_sep_inside_concept(self):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
rapport = StringIO('toto\n\tti\tti\n\ttata')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
scheme.add_concepts_from_file(rapport, u'utf-8', u'fr', u'\t')
result = build_result_dict(scheme)
expected = {u'toto': {u'ti\tti': {},
u'tata' : {}}}
self.assertEqual(result, expected)
def test_add_concepts_from_file_wrong_indentation(self):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
rapport = StringIO('toto\n\ttiti\n\t\t\ttata')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
with self.assertRaises(ValueError) as ve:
scheme.add_concepts_from_file(rapport, u'utf-8', u'fr', u'\t')
self.assertEqual(str(ve.exception), 'Inconsistent concept indentation')
def test_add_concepts_from_file_multiple_deindentation(self):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
rapport = StringIO('toto\n\ttiti\n\t\ttata\ntutu')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
scheme.add_concepts_from_file(rapport, u'utf-8', u'fr', u'\t')
result = build_result_dict(scheme)
expected = {u'toto': {u'titi': {u'tata' : {}}},
u'tutu': {}}
self.assertEqual(result, expected)
def test_add_concepts_from_file_sep_colon(self):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
rapport = StringIO('toto\n,titi\n,,tata\n,, tati \n tutu \n,tuti\n,,titu\n,toti\n')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
scheme.add_concepts_from_file(rapport, u'utf-8', u'fr', u',')
result = build_result_dict(scheme)
expected = {u'toto': {u'titi': {u'tata' : {}, u'tati': {}}},
u'tutu': {u'tuti': {u'titu': {}}, u'toti': {}}}
self.assertEqual(result, expected)
def test_add_concepts_from_file_sep_space(self):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
rapport = StringIO('toto\n titi iti\n tata\n tati \ntutu \n tuti\n titu\n toti\n')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
scheme.add_concepts_from_file(rapport, u'utf-8', u'fr', u' ')
result = build_result_dict(scheme)
expected = {u'toto': {u'titi iti': {u'tata' : {}, u'tati': {}}},
u'tutu': {u'tuti': {u'titu': {}}, u'toti': {}}}
self.assertEqual(result, expected)
class ConceptTC(CubicWebTC):
def setUp(self):
......
......@@ -23,7 +23,7 @@ from cubicweb.devtools.testlib import CubicWebTC
class ViewsTC(CubicWebTC):
def test_scheme_concepts_import(self):
def test_flat_scheme_concepts_import(self):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'musique')
cnx.commit()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment