# HG changeset patch
# User Fabien Amarger <fabien.amarger@logilab.fr>
# Date 1712237865 -7200
#      Thu Apr 04 15:37:45 2024 +0200
# Node ID 1327a35441b13f0dde2e7f146d617ccac33ed5ef
# Parent  4ae10c14846ba5c43a47c35beeffb5b185f24125
feat(schema): Add content_type to DataService

This is used to parse data

diff --git a/cubicweb_rodolf/__pkginfo__.py b/cubicweb_rodolf/__pkginfo__.py
--- a/cubicweb_rodolf/__pkginfo__.py
+++ b/cubicweb_rodolf/__pkginfo__.py
@@ -3,7 +3,7 @@
 modname = "cubicweb_rodolf"
 distname = "cubicweb-rodolf"
 
-numversion = (0, 3, 2)
+numversion = (0, 4, 0)
 version = ".".join(str(num) for num in numversion)
 
 license = "LGPL"
diff --git a/cubicweb_rodolf/migration/0.4.0_Any.py b/cubicweb_rodolf/migration/0.4.0_Any.py
new file mode 100644
--- /dev/null
+++ b/cubicweb_rodolf/migration/0.4.0_Any.py
@@ -0,0 +1,1 @@
+add_attribute("DataService", "content_type")
diff --git a/cubicweb_rodolf/process_helpers.py b/cubicweb_rodolf/process_helpers.py
--- a/cubicweb_rodolf/process_helpers.py
+++ b/cubicweb_rodolf/process_helpers.py
@@ -3,7 +3,6 @@
 
 from rdflib import Graph, ConjunctiveGraph
 
-from cubicweb.rdf import RDF_MIMETYPE_TO_FORMAT
 
 from rdf_data_manager import delete_graph, upload_graph
 
@@ -14,10 +13,11 @@
 UPLOAD_DELAY = 10
 
 
-def get_graph_from_url(download_url: str, log: logging.Logger) -> Graph:
+def get_graph_from_url(
+    download_url: str, content_type: str, log: logging.Logger
+) -> Graph:
     response = requests.get(
         download_url,
-        headers={"Accept": ";".join(RDF_MIMETYPE_TO_FORMAT.keys())},
         allow_redirects=True,
         timeout=4,
     )
@@ -27,15 +27,9 @@
             f" {response.status_code} {response.text}"
         )
         response.raise_for_status()
-    content_type = response.headers["Content-Type"]
-    rdf_parse_format = content_type
-    for mime_type, rdf_format in RDF_MIMETYPE_TO_FORMAT.items():
-        if mime_type in content_type:
-            rdf_parse_format = rdf_format
-            break
 
     graph = ConjunctiveGraph()
-    graph.parse(data=response.text, format=rdf_parse_format)
+    graph.parse(data=response.text, format=content_type)
     return graph
 
 
diff --git a/cubicweb_rodolf/schema.py b/cubicweb_rodolf/schema.py
--- a/cubicweb_rodolf/schema.py
+++ b/cubicweb_rodolf/schema.py
@@ -24,6 +24,7 @@
     SubjectRelation,
 )
 from cubicweb.schema import WorkflowableEntityType
+from cubicweb.rdf import RDF_MIMETYPE_TO_FORMAT
 
 
 class ImportProcedure(EntityType):
@@ -46,6 +47,11 @@
         default="daily",
     )
     description = String()
+    content_type = String(
+        required=True,
+        vocabulary=RDF_MIMETYPE_TO_FORMAT.keys(),
+        default="text/turtle",
+    )
 
 
 class ImportRecipe(EntityType):
diff --git a/cubicweb_rodolf/sobjects/process_type_registry.py b/cubicweb_rodolf/sobjects/process_type_registry.py
--- a/cubicweb_rodolf/sobjects/process_type_registry.py
+++ b/cubicweb_rodolf/sobjects/process_type_registry.py
@@ -18,7 +18,7 @@
         import_recipe = import_process.import_recipe[0]
         import_procedure = import_process.import_procedure[0]
         dataservice = import_recipe.dataservice[0]
-        graph = get_graph_from_url(dataservice.data_url, log)
+        graph = get_graph_from_url(dataservice.data_url, dataservice.content_type, log)
         dataset_file = import_process.update_dataset(graph)
         upload_graph_to_virtuoso_endpoint(
             import_procedure,
@@ -36,6 +36,6 @@
     def __call__(self, import_process: ImportProcess, log: logging.Logger) -> Graph:
         import_recipe = import_process.import_recipe[0]
         dataservice = import_recipe.dataservice[0]
-        graph = get_graph_from_url(dataservice.data_url, log)
+        graph = get_graph_from_url(dataservice.data_url, dataservice.content_type, log)
         import_process.update_dataset(graph)
         return graph