Commit e1c17d46 authored by Denis Laxalde's avatar Denis Laxalde
Browse files

Use a csv.Sniffer to detect the delimiter of LCSV files

Thus making the "delimiter" parameter of LCSV2RDF and lcsv_extentities()
optional.
parent 922de1bbdf39
......@@ -14,6 +14,7 @@
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
import csv
from itertools import count
from six import text_type
......@@ -38,9 +39,13 @@ class LCSV2RDF(object):
reference : W3C unofficial draft, http://jenit.github.io/linked-csv/
"""
def __init__(self, stream, delimiter, encoding='utf-8',
def __init__(self, stream, delimiter=None, encoding='utf-8',
uri_generator=None, uri_cls=text_type, default_lang=None):
""" check stream validity and init attribute"""
if delimiter is None:
sample = stream.read(1024)
stream.seek(0)
delimiter = csv.Sniffer().sniff(sample).delimiter
self.source_file = ucsvreader(stream, encoding=encoding, delimiter=delimiter)
# raise an error value if the line doesn't contains a #
if uri_generator is None:
......
......@@ -156,7 +156,7 @@ class SKOSParser(datafeed.DataFeedParser):
source=self.source, raise_on_error=raise_on_error, **kwargs)
def lcsv_extentities(stream, scheme_uri, delimiter, encoding='utf-8', language_code=None):
def lcsv_extentities(stream, scheme_uri, delimiter=None, encoding='utf-8', language_code=None):
"""Return external entities generator from SKOS LCSV stream or URL (by transforming it to RDF
first).
......@@ -171,8 +171,9 @@ def lcsv_extentities(stream, scheme_uri, delimiter, encoding='utf-8', language_c
"""
graph = rdfio.RDFLibRDFGraph()
# add LCSV statements to the RDF graph
lcsv2rdf = lcsv.LCSV2RDF(stream, delimiter, encoding=encoding, default_lang=language_code,
uri_cls=graph.uri, uri_generator=lambda x: str(uuid4()) + x)
lcsv2rdf = lcsv.LCSV2RDF(stream, delimiter=delimiter, encoding=encoding,
default_lang=language_code, uri_cls=graph.uri,
uri_generator=lambda x: str(uuid4()) + x)
for (subj, pred, obj) in lcsv2rdf.triples():
graph.add(subj, pred, obj)
......
......@@ -81,6 +81,12 @@ class LCSV2RDFTC(TestCase):
'#4')
]))
def test_lcsv_parsing_sniff(self):
fpath = self.datapath('lcsv_example_shortened.csv')
lcsv2rdf = lcsv.LCSV2RDF(open(fpath),
uri_generator=lambda x: x, default_lang='es')
self.assertEqual(len(list(lcsv2rdf.triples())), 18)
if __name__ == "__main__":
from unittest import main
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment