Commit e51ddfcb authored by Denis Laxalde's avatar Denis Laxalde
Browse files

[py3] Feed extentities generator with an encoded stream

We now explicitly use a file opened in bytes mode to produce the stream
for LCSV and SKOS data import. Similarly, when using an in-memory
object, we use a BytesIO. This does not change anything in python2, but
this is required to keep things working on python3.
parent 4dd85c74c276
......@@ -17,6 +17,7 @@
"""
from __future__ import print_function
import io
import logging
import sys
......@@ -130,7 +131,7 @@ class ImportSkosData(Command):
def extentities_generator():
"""ExtEntity generator function holding control on `fpath` file."""
with open(fpath) as stream:
with io.open(fpath, 'rb') as stream:
for extentity in lcsv_extentities(stream, scheme_uri,
language_code=u'fr'):
yield extentity
......
......@@ -16,7 +16,8 @@
# with this program. If not, see <http://www.gnu.org/licenses/>.
from functools import wraps
from StringIO import StringIO
import io
from io import BytesIO
from logilab.common.testlib import require_module
......@@ -76,7 +77,7 @@ class ConceptSchemeTC(testlib.CubicWebTC):
csv_file = self.datapath('thesaurus_interdoc_7_hierageneTab_shortened.csv')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
with open(csv_file) as sourcefile:
with io.open(csv_file, 'rb') as sourcefile:
scheme.add_concepts_from_file(sourcefile, u'utf-8', u'fr', u',')
result = build_result_dict(scheme)
expected = {
......@@ -133,7 +134,7 @@ class ConceptSchemeTC(testlib.CubicWebTC):
csv_file = self.datapath('hierarchical_csv_example_shortened.csv')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
with open(csv_file) as sourcefile:
with io.open(csv_file, 'rb') as sourcefile:
scheme.add_concepts_from_file(sourcefile, u'utf-8', u'fr', u'\t')
result = build_result_dict(scheme)
expected = {
......@@ -160,7 +161,7 @@ class ConceptSchemeTC(testlib.CubicWebTC):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
rapport = StringIO('toto\n\tti\tti\n\ttata')
rapport = BytesIO(b'toto\n\tti\tti\n\ttata')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
scheme.add_concepts_from_file(rapport, u'utf-8', u'fr', u'\t')
......@@ -173,7 +174,7 @@ class ConceptSchemeTC(testlib.CubicWebTC):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
with open(self.datapath('bad_encoding.csv')) as fobj:
with io.open(self.datapath('bad_encoding.csv'), 'rb') as fobj:
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
with self.assertRaises(CSVDecodeError) as cm:
......@@ -191,7 +192,7 @@ class ConceptSchemeTC(testlib.CubicWebTC):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
rapport = StringIO('toto\n\ttiti\n\t\t\ttata')
rapport = BytesIO(b'toto\n\ttiti\n\t\t\ttata')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
with self.assertRaises(CSVIndentationError) as cm:
......@@ -202,7 +203,7 @@ class ConceptSchemeTC(testlib.CubicWebTC):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
rapport = StringIO('toto\n\ttiti\n\t\ttata\ntutu')
rapport = BytesIO(b'toto\n\ttiti\n\t\ttata\ntutu')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
scheme.add_concepts_from_file(rapport, u'utf-8', u'fr', u'\t')
......@@ -215,7 +216,7 @@ class ConceptSchemeTC(testlib.CubicWebTC):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
rapport = StringIO('toto\n,titi\n,,tata\n,, tati \n tutu \n,tuti\n,,titu\n,toti\n')
rapport = BytesIO(b'toto\n,titi\n,,tata\n,, tati \n tutu \n,tuti\n,,titu\n,toti\n')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
scheme.add_concepts_from_file(rapport, u'utf-8', u'fr', u',')
......@@ -228,7 +229,7 @@ class ConceptSchemeTC(testlib.CubicWebTC):
with self.admin_access.client_cnx() as cnx:
scheme = cnx.create_entity('ConceptScheme', title=u'rapport')
cnx.commit()
rapport = StringIO('toto\n titi iti\n tata\n tati \ntutu \n tuti\n titu\n toti\n')
rapport = BytesIO(b'toto\n titi iti\n tata\n tati \ntutu \n tuti\n titu\n toti\n')
with self.admin_access.web_request() as req:
scheme = req.entity_from_eid(scheme.eid)
scheme.add_concepts_from_file(rapport, u'utf-8', u'fr', u' ')
......
......@@ -15,6 +15,8 @@
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
import io
from cubicweb.devtools.testlib import BaseTestCase as TestCase
from cubicweb_skos import lcsv
......@@ -24,20 +26,20 @@ from cubicweb_skos.rdfio import unicode_with_language as ul
class LCSV2RDFTC(TestCase):
def test_missing_prolog_column(self):
stream = open(self.datapath('lcsv_example_missing_prolog.csv'))
stream = io.open(self.datapath('lcsv_example_missing_prolog.csv'), 'rb')
with self.assertRaises(lcsv.InvalidLCSVFile) as cm:
lcsv.LCSV2RDF(stream, '\t', 'utf-8')
self.assertIn("missing prolog column", str(cm.exception))
def test_missing_id_column(self):
stream = open(self.datapath('lcsv_example_missing_id.csv'))
stream = io.open(self.datapath('lcsv_example_missing_id.csv'), 'rb')
with self.assertRaises(lcsv.InvalidLCSVFile) as cm:
lcsv.LCSV2RDF(stream, '\t', 'utf-8')
self.assertIn("missing $id column", str(cm.exception))
def test_lcsv_parsing(self):
fpath = self.datapath('lcsv_example_shortened.csv')
lcsv2rdf = lcsv.LCSV2RDF(open(fpath), '\t', 'utf-8',
lcsv2rdf = lcsv.LCSV2RDF(io.open(fpath, 'rb'), '\t', 'utf-8',
uri_generator=lambda x: x, default_lang='es')
self.assertEqual(set(list(lcsv2rdf.triples())),
set([
......@@ -81,7 +83,7 @@ class LCSV2RDFTC(TestCase):
def test_lcsv_parsing_sniff(self):
fpath = self.datapath('lcsv_example_shortened.csv')
lcsv2rdf = lcsv.LCSV2RDF(open(fpath),
lcsv2rdf = lcsv.LCSV2RDF(io.open(fpath, 'rb'),
uri_generator=lambda x: x, default_lang='es')
self.assertEqual(len(list(lcsv2rdf.triples())), 18)
......
......@@ -15,6 +15,7 @@
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
import io
import logging
from contextlib import contextmanager
......@@ -185,14 +186,14 @@ class LCSVImportTC(testlib.CubicWebTC):
def test_import_lcsv(self):
with self.admin_access.repo_cnx() as cnx:
cnx.call_service('lcsv.skos.import', scheme_uri=self.scheme_uri,
stream=open(self.datapath('lcsv_example_shortened.csv')),
stream=io.open(self.datapath('lcsv_example_shortened.csv'), 'rb'),
delimiter='\t', encoding='utf-8', language_code=u'es')
self._check_imported_lcsv(cnx, 'es')
def test_import_lcsv_without_language_code(self):
with self.admin_access.repo_cnx() as cnx:
cnx.call_service('lcsv.skos.import', scheme_uri=self.scheme_uri,
stream=open(self.datapath('lcsv_example_shortened.csv')),
stream=io.open(self.datapath('lcsv_example_shortened.csv'), 'rb'),
delimiter='\t', encoding='utf-8')
self._check_imported_lcsv(cnx, None)
......
......@@ -16,6 +16,7 @@
# with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import io
from io import BytesIO
from tempfile import NamedTemporaryFile
......@@ -37,8 +38,9 @@ class ViewsTC(testlib.CubicWebTC):
scheme = req.find('ConceptScheme', title=u'musique').one()
# simply test the form properly render and is well formed
self.view('skos.scheme.import', rset=scheme.as_rset(), req=req, template=None)
content = u'\n\nélectro\nhip-hop\nrap\njazz\nclassique\n'
req.form = self.fake_form('skos.scheme.import', {
'stream': ('filename.txt', BytesIO('\n\nélectro\nhip-hop\nrap\njazz\nclassique\n')),
'stream': ('filename.txt', BytesIO(content.encode('utf-8'))),
'encoding': u'utf-8',
'language_code': u'fr',
'format': u'simple',
......@@ -60,7 +62,7 @@ class ViewsTC(testlib.CubicWebTC):
self.view('skos.scheme.import', rset=scheme.as_rset(), req=req,
template=None)
fname = 'lcsv_example_shortened.csv'
posted = {'stream': (fname, open(self.datapath(fname))),
posted = {'stream': (fname, io.open(self.datapath(fname), 'rb')),
'encoding': u'utf-8',
'language_code': u'fr',
'delimiter': u'tab',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment