diff --git a/cubicweb_rodolf/__pkginfo__.py b/cubicweb_rodolf/__pkginfo__.py index 4ae10c14846ba5c43a47c35beeffb5b185f24125_Y3ViaWN3ZWJfcm9kb2xmL19fcGtnaW5mb19fLnB5..1327a35441b13f0dde2e7f146d617ccac33ed5ef_Y3ViaWN3ZWJfcm9kb2xmL19fcGtnaW5mb19fLnB5 100644 --- a/cubicweb_rodolf/__pkginfo__.py +++ b/cubicweb_rodolf/__pkginfo__.py @@ -3,7 +3,7 @@ modname = "cubicweb_rodolf" distname = "cubicweb-rodolf" -numversion = (0, 3, 2) +numversion = (0, 4, 0) version = ".".join(str(num) for num in numversion) license = "LGPL" diff --git a/cubicweb_rodolf/migration/0.4.0_Any.py b/cubicweb_rodolf/migration/0.4.0_Any.py new file mode 100644 index 0000000000000000000000000000000000000000..1327a35441b13f0dde2e7f146d617ccac33ed5ef_Y3ViaWN3ZWJfcm9kb2xmL21pZ3JhdGlvbi8wLjQuMF9BbnkucHk= --- /dev/null +++ b/cubicweb_rodolf/migration/0.4.0_Any.py @@ -0,0 +1,1 @@ +add_attribute("DataService", "content_type") diff --git a/cubicweb_rodolf/process_helpers.py b/cubicweb_rodolf/process_helpers.py index 4ae10c14846ba5c43a47c35beeffb5b185f24125_Y3ViaWN3ZWJfcm9kb2xmL3Byb2Nlc3NfaGVscGVycy5weQ==..1327a35441b13f0dde2e7f146d617ccac33ed5ef_Y3ViaWN3ZWJfcm9kb2xmL3Byb2Nlc3NfaGVscGVycy5weQ== 100644 --- a/cubicweb_rodolf/process_helpers.py +++ b/cubicweb_rodolf/process_helpers.py @@ -3,7 +3,6 @@ from rdflib import Graph, ConjunctiveGraph -from cubicweb.rdf import RDF_MIMETYPE_TO_FORMAT from rdf_data_manager import delete_graph, upload_graph @@ -14,6 +13,8 @@ UPLOAD_DELAY = 10 -def get_graph_from_url(download_url: str, log: logging.Logger) -> Graph: +def get_graph_from_url( + download_url: str, content_type: str, log: logging.Logger +) -> Graph: response = requests.get( download_url, @@ -18,6 +19,5 @@ response = requests.get( download_url, - headers={"Accept": ";".join(RDF_MIMETYPE_TO_FORMAT.keys())}, allow_redirects=True, timeout=4, ) @@ -27,11 +27,5 @@ f" {response.status_code} {response.text}" ) response.raise_for_status() - content_type = response.headers["Content-Type"] - rdf_parse_format = content_type - for mime_type, rdf_format in RDF_MIMETYPE_TO_FORMAT.items(): - if mime_type in content_type: - rdf_parse_format = rdf_format - break graph = ConjunctiveGraph() @@ -36,6 +30,6 @@ graph = ConjunctiveGraph() - graph.parse(data=response.text, format=rdf_parse_format) + graph.parse(data=response.text, format=content_type) return graph diff --git a/cubicweb_rodolf/schema.py b/cubicweb_rodolf/schema.py index 4ae10c14846ba5c43a47c35beeffb5b185f24125_Y3ViaWN3ZWJfcm9kb2xmL3NjaGVtYS5weQ==..1327a35441b13f0dde2e7f146d617ccac33ed5ef_Y3ViaWN3ZWJfcm9kb2xmL3NjaGVtYS5weQ== 100644 --- a/cubicweb_rodolf/schema.py +++ b/cubicweb_rodolf/schema.py @@ -24,6 +24,7 @@ SubjectRelation, ) from cubicweb.schema import WorkflowableEntityType +from cubicweb.rdf import RDF_MIMETYPE_TO_FORMAT class ImportProcedure(EntityType): @@ -46,6 +47,11 @@ default="daily", ) description = String() + content_type = String( + required=True, + vocabulary=RDF_MIMETYPE_TO_FORMAT.keys(), + default="text/turtle", + ) class ImportRecipe(EntityType): diff --git a/cubicweb_rodolf/sobjects/process_type_registry.py b/cubicweb_rodolf/sobjects/process_type_registry.py index 4ae10c14846ba5c43a47c35beeffb5b185f24125_Y3ViaWN3ZWJfcm9kb2xmL3NvYmplY3RzL3Byb2Nlc3NfdHlwZV9yZWdpc3RyeS5weQ==..1327a35441b13f0dde2e7f146d617ccac33ed5ef_Y3ViaWN3ZWJfcm9kb2xmL3NvYmplY3RzL3Byb2Nlc3NfdHlwZV9yZWdpc3RyeS5weQ== 100644 --- a/cubicweb_rodolf/sobjects/process_type_registry.py +++ b/cubicweb_rodolf/sobjects/process_type_registry.py @@ -18,7 +18,7 @@ import_recipe = import_process.import_recipe[0] import_procedure = import_process.import_procedure[0] dataservice = import_recipe.dataservice[0] - graph = get_graph_from_url(dataservice.data_url, log) + graph = get_graph_from_url(dataservice.data_url, dataservice.content_type, log) dataset_file = import_process.update_dataset(graph) upload_graph_to_virtuoso_endpoint( import_procedure, @@ -36,6 +36,6 @@ def __call__(self, import_process: ImportProcess, log: logging.Logger) -> Graph: import_recipe = import_process.import_recipe[0] dataservice = import_recipe.dataservice[0] - graph = get_graph_from_url(dataservice.data_url, log) + graph = get_graph_from_url(dataservice.data_url, dataservice.content_type, log) import_process.update_dataset(graph) return graph