Commit fb9bb00f authored by Denis Laxalde's avatar Denis Laxalde
Browse files

[entities,hooks] Add a complete support for resources

In particular, rely on a specific adapter and a `ckan_resource_id` attribute
similarly to dataset-like entity types.

This allows to completely support file upload and update on CKAN instance
(though we always upload data upon metadata modifications).

Closes #4753964.
parent e7ff8cdf0adf
......@@ -18,8 +18,9 @@
import re
import unicodedata
from urllib2 import urlopen
from cubicweb.predicates import relation_possible
from cubicweb.predicates import relation_possible, adaptable
from cubicweb.view import EntityAdapter
from cubes.ckanpublish.utils import ckan_post
......@@ -39,9 +40,9 @@ def slugify(value):
return re.sub('[-\s]+', '-', value)
class CKANPublishableAdapter(EntityAdapter):
class CKANDatasetAdapter(EntityAdapter):
"""Adapter for entity that can be mapped to a CKAN dataset"""
__regid__ = 'ICKANPublishable'
__regid__ = 'ICKANDataset'
__abstract__ = True
__select__ = (EntityAdapter.__select__ &
relation_possible('ckan_dataset_id', role='subject'))
......@@ -128,30 +129,53 @@ class CKANPublishableAdapter(EntityAdapter):
"""
return None
def dataset_resources(self):
"""May return a list of entities adaptable as IDownloadable to be set
as resources of the CKAN dataset.
class CKANResourceAdapter(EntityAdapter):
"""Adapter for entity that can be mapped to a CKAN resource"""
__regid__ = 'ICKANResource'
__abstract__ = True
__select__ = (EntityAdapter.__select__ &
relation_possible('ckan_resource_id', role='subject'))
@property
def dataset(self):
"""The dataset-like entity (adaptable as ICKANDataset) associated with
this resource.
"""
return []
raise NotImplementedError()
def ckan_resources(self):
"""Yield dicts of CKAN dataset resource info"""
resources_url = self.ckan_package_resources()
for entity in self.dataset_resources():
adapted = entity.cw_adapt_to('IDownloadable')
if adapted is None:
self.warning(
'invalid resource %r, could not adapt to IDownloadable',
entity)
continue
url = adapted.download_url()
if url in resources_url:
self.info('skipping resource %s and it seems to be already '
'on CKAN side', entity)
continue
yield {'url': url,
'name': adapted.download_file_name(),
'mimetype': adapted.download_content_type(),
'created': str(entity.creation_date),
'last_modified': str(entity.modification_date),
}
def ckan_metadata(self):
"""Return a dict of metadata about the resource"""
metadata = {
'created': self.entity.creation_date.isoformat(),
'last_modified': self.entity.modification_date.isoformat(),
}
return metadata
def read(self):
"""Read resource content (file-like interface)."""
raise NotImplementedError()
class DownloadableCKANResourceAdapter(CKANResourceAdapter):
"""Adapter for downloadable entities that can be mapped to a CKAN
resource.
"""
__abstract__ = True
__select__ = CKANResourceAdapter.__select__ & adaptable('IDownloadable')
def ckan_metadata(self):
"""Basic metadata extended with IDownloadable"""
metadata = super(DownloadableCKANResourceAdapter, self).ckan_metadata()
idownload = self.entity.cw_adapt_to('IDownloadable')
metadata.update(
{'name': idownload.download_file_name(),
'mimetype': idownload.download_content_type(),
}
)
return metadata
def read(self):
"""Read content using IDownloadable adapter from URL."""
idownload = self.entity.cw_adapt_to('IDownloadable')
return urlopen(idownload.download_url())
......@@ -26,10 +26,10 @@ from cubes.ckanpublish.utils import (ckan_post, CKANPostError,
ckan_instance_configured)
def _ckan_action(config, eid, action, data=None):
def _ckan_action(config, eid, action, **kwargs):
"""Run `ckan_post` and eventually raise ValidationError."""
try:
return ckan_post(config, action, data=data)
return ckan_post(config, action, **kwargs)
except (CKANPostError, RequestException) as exc:
raise ValidationError(eid, {'ckan_dataset_id': unicode(exc)})
......@@ -38,7 +38,7 @@ def create_dataset(config, eid, data):
"""Create a CKAN dataset and set `ckan_dataset_id` attribute or
respective entity. Return the dataset id.
"""
res = _ckan_action(config, eid, 'package_create', data)
res = _ckan_action(config, eid, 'package_create', data=data)
return res['id']
......@@ -54,28 +54,31 @@ def delete_dataset(config, eid, datasetid):
_ckan_action(config, eid, 'package_delete', data={'id': datasetid})
def add_dataset_resource(config, eid, datasetid, resource_data):
def create_dataset_resource(config, eid, datasetid, metadata, data):
"""Add a resource to an existing CKAN dataset"""
resource_data['package_id'] = datasetid
return _ckan_action(config, eid, 'resource_create', data=resource_data)
metadata['package_id'] = datasetid
res = _ckan_action(config, eid, 'resource_create', data=metadata,
files=[('upload', data)])
return res['id']
def update_dataset_resource(config, eid, resourceid, metadata, data):
"""Update an existing CKAN resource."""
metadata['id'] = resourceid
_ckan_action(config, eid, 'resource_update', data=metadata,
files=[('upload', data)])
def delete_dataset_resources(config, eid, datasetid):
"""Delete resources of a CKAN dataset"""
res = _ckan_action(config, eid, 'package_show', data={'id': datasetid})
resources = res['resources']
deleted = set([])
for resource in resources:
_ckan_action(config, eid, 'resource_delete', {'id': resource['id']})
deleted.add(resource['id'])
return deleted
def delete_dataset_resource(config, eid, resourceid):
"""Delete a CKAN resource"""
_ckan_action(config, eid, 'resource_delete', data={'id': resourceid})
class DeleteCKANDataSetHook(hook.Hook):
"""Delete CKAN dataset upon deletion of the corresponding entity"""
__regid__ = 'ckanpublish.delete-ckan-dataset'
__select__ = (hook.Hook.__select__ & ckan_instance_configured &
adaptable('ICKANPublishable') &
adaptable('ICKANDataset') &
score_entity(lambda x: x.ckan_dataset_id))
events = ('before_delete_entity', )
......@@ -87,7 +90,7 @@ class AddOrUpdateCKANDataSetHook(hook.Hook):
"""Add or update a CKAN dataset upon addition or update of an entity"""
__regid__ = 'ckanpublish.add-update-ckan-dataset'
__select__ = (hook.Hook.__select__ & ckan_instance_configured &
adaptable('ICKANPublishable'))
adaptable('ICKANDataset'))
events = ('after_add_entity', 'after_update_entity', )
def __call__(self):
......@@ -103,12 +106,10 @@ class CKANDatasetOp(hook.DataOperationMixIn, hook.Operation):
datasetid = entity.ckan_dataset_id
config = self.cnx.vreg.config
if self.cnx.deleted_in_transaction(eid):
deleted = delete_dataset_resources(config, eid, datasetid)
self.info('deleted CKAN resources %s', ', '.join(deleted))
delete_dataset(config, eid, datasetid)
self.info('deleted CKAN dataset %s', datasetid)
else:
cpublish = entity.cw_adapt_to('ICKANPublishable')
cpublish = entity.cw_adapt_to('ICKANDataset')
data = cpublish.ckan_data()
if datasetid is not None:
update_dataset(config, eid, datasetid, data)
......@@ -120,8 +121,60 @@ class CKANDatasetOp(hook.DataOperationMixIn, hook.Operation):
'SET X ckan_dataset_id %(dsid)s WHERE X eid %(eid)s',
{'eid': eid, 'dsid': datasetid})
self.info('created CKAN dataset %s', datasetid)
for resource_data in cpublish.ckan_resources():
resource_id = add_dataset_resource(config, eid, datasetid,
resource_data)
self.info('add resource %s to CKAN dataset %s' %
(resource_id, datasetid))
class DeleteCKANResourceHook(hook.Hook):
"""Delete CKAN resource upon deletion of the corresponding entity"""
__regid__ = 'ckanpublish.delete-ckan-resource'
__select__ = (hook.Hook.__select__ & ckan_instance_configured &
adaptable('ICKANResource') &
score_entity(lambda x: x.ckan_resource_id))
events = ('before_delete_entity', )
def __call__(self):
CKANResourceOp.get_instance(self._cw).add_data(self.entity.eid)
class AddOrUpdateCKANResourceHook(hook.Hook):
"""Add or update a CKAN resource upon addition or update of an entity"""
__regid__ = 'ckanpublish.add-update-ckan-resource'
__select__ = (hook.Hook.__select__ & ckan_instance_configured &
adaptable('ICKANResource'))
events = ('after_add_entity', 'after_update_entity', )
def __call__(self):
CKANResourceOp.get_instance(self._cw).add_data(self.entity.eid)
class CKANResourceOp(hook.DataOperationMixIn, hook.Operation):
"""Operation to create, update or delete a CKAN resource"""
def precommit_event(self):
for eid in self.get_data():
entity = self.cnx.entity_from_eid(eid)
resourceid = entity.ckan_resource_id
iresource = entity.cw_adapt_to('ICKANResource')
config = self.cnx.vreg.config
if self.cnx.deleted_in_transaction(eid) and resourceid is not None:
delete_dataset_resource(config, eid, resourceid)
self.info('deleted resource %s', resourceid)
else:
metadata = iresource.ckan_metadata()
data = iresource.read()
if resourceid is None:
dataset = iresource.dataset
assert dataset, 'no dataset for resource #%d' % eid
if not dataset.ckan_dataset_id:
self.error('skipping resource #%d as its dataset %#d is '
'not in the CKAN instance', eid, dataset.eid)
continue
resourceid = create_dataset_resource(
config, eid, dataset.ckan_dataset_id, metadata, data)
self.cnx.execute(
'SET X ckan_resource_id %(rid)s WHERE X eid %(eid)s',
{'eid': eid, 'rid': resourceid})
self.info('added resource %s', resourceid)
else:
update_dataset_resource(
config, eid, resourceid, metadata, data)
self.info('updated resource %s', resourceid)
from cubicweb.predicates import is_instance
from cubicweb.predicates import is_instance, relation_possible
from cubes.ckanpublish.entities import CKANPublishableAdapter
from cubes.ckanpublish.entities import (CKANDatasetAdapter,
DownloadableCKANResourceAdapter)
class CWDataSetCKANPublish(CKANPublishableAdapter):
__select__ = CKANPublishableAdapter.__select__ & is_instance('CWDataSet')
class CWDatasetCKANDatasetAdapter(CKANDatasetAdapter):
__select__ = CKANDatasetAdapter.__select__ & is_instance('CWDataSet')
def dataset_maintainer(self):
if self.entity.maintainer:
return self.entity.maintainer[0]
def dataset_resources(self):
return self.entity.resources
class FileCKANResourceAdapter(DownloadableCKANResourceAdapter):
__select__ = (DownloadableCKANResourceAdapter.__select__ &
is_instance('File') &
relation_possible('resources', role='object'))
@property
def dataset(self):
if self.entity.reverse_resources:
return self.entity.reverse_resources[0]
......@@ -11,3 +11,11 @@ class CWDataSet(EntityType):
)
maintainer = SubjectRelation('CWUser', cardinality='?*')
resources = SubjectRelation('File', cardinality='*?', composite='subject')
class ckan_resource_id(RelationDefinition):
__permissions__={'read': ('managers', 'users', 'guests'),
'add': (),
'update': ()}
subject = 'File'
object = 'String'
......@@ -29,24 +29,24 @@ class CKANPublishHooksTC(CubicWebTC):
def tearDown(self):
with self.admin_access.repo_cnx() as cnx:
# Delete Table linked to a CKAN dataset, so that the latter gets
# deleted.
# Delete entities linked to a CKAN dataset as well as their
# resources, so that the CKAN dataset and resources get deleted.
# However, datasets will still have to be purge from the web ui.
cnx.execute('DELETE CWDataSet X WHERE EXISTS(X ckan_dataset_id I)')
cnx.execute('DELETE File X WHERE EXISTS(X ckan_resource_id I)')
cnx.commit()
super(CKANPublishHooksTC, self).tearDown()
def test_entity_creation(self):
def test_dataset(self):
with self.admin_access.repo_cnx() as cnx:
entity = cnx.create_entity('CWDataSet', name=u'buz buz ?!',
description=u'opendata buzzzz')
cnx.commit()
yield self._check_entity_create, cnx, entity
yield self._check_entity_update, cnx, entity
yield self._check_entity_resources, cnx, entity
yield self._check_entity_delete, cnx, entity
yield self._check_dataset_create, cnx, entity
yield self._check_dataset_update, cnx, entity
yield self._check_dataset_delete, cnx, entity
def _check_entity_create(self, cnx, entity):
def _check_dataset_create(self, cnx, entity):
self.set_description('entity creation')
self.assertIsNotNone(entity.ckan_dataset_id)
result = ckan_post(self.ckan_config, 'package_show',
......@@ -54,12 +54,12 @@ class CKANPublishHooksTC(CubicWebTC):
self.assertEqual(result['name'], '%s-buz-buz' % entity.eid)
self.assertEqual(result['title'], entity.name)
self.assertEqual(result['notes'], entity.description)
cpublish = entity.cw_adapt_to('ICKANPublishable')
cpublish = entity.cw_adapt_to('ICKANDataset')
organization_id = cpublish.ckan_get_organization_id(
self.dataset_owner_org)
self.assertEqual(result['owner_org'], organization_id)
def _check_entity_update(self, cnx, entity):
def _check_dataset_update(self, cnx, entity):
self.set_description('entity update')
entity.cw_set(description=u'no this is actually serious')
cnx.commit()
......@@ -75,40 +75,58 @@ class CKANPublishHooksTC(CubicWebTC):
self.assertEqual(result['maintainer'], 'T. Oto')
self.assertEqual(result['maintainer_email'], 'to@t.o')
def _check_entity_resources(self, cnx, entity):
self.set_description('entity resources')
resource = cnx.create_entity('File', data=Binary('yui'),
data_format=u'text/plain',
data_name=u'blurp',
reverse_resources=entity)
cnx.commit()
result = ckan_post(self.ckan_config, 'package_show',
{'id': entity.ckan_dataset_id})
resources = result['resources']
self.assertEqual(len(resources), 1)
r0 = resources[0]
iresource = resource.cw_adapt_to('IDownloadable')
self.assertEqual(r0['url'], iresource.download_url())
# Update the entity (should trigger another push to CKAN, but no new
# resource).
entity.cw_set(description=u'blurp')
cnx.commit()
result = ckan_post(self.ckan_config, 'package_show',
{'id': entity.ckan_dataset_id})
resources = result['resources']
self.assertEqual(len(resources), 1)
def _check_entity_delete(self, cnx, entity):
def _check_dataset_delete(self, cnx, entity):
self.set_description('entity deletion')
ckanid = entity.ckan_dataset_id
result = ckan_post(self.ckan_config, 'package_show',
{'id': ckanid})
resource_id = result['resources'][0]['id']
entity.cw_delete()
cnx.commit()
result = ckan_post(self.ckan_config, 'package_show',
{'id': ckanid})
self.assertEqual(result['state'], 'deleted')
def test_resources(self):
with self.admin_access.repo_cnx() as cnx:
dataset = cnx.create_entity('CWDataSet', name=u'blurp',
description=u'flop')
resource = cnx.create_entity('File', data=Binary('yui'),
data_format=u'text/plain',
data_name=u'blurp',
reverse_resources=dataset)
cnx.commit()
yield self._check_resource_creation, cnx, dataset, resource
yield self._check_resource_update, cnx, resource
yield self._check_resource_delete, cnx, dataset, resource
def _check_resource_creation(self, cnx, dataset, resource):
self.set_description('resource creation')
self.assertIsNotNone(resource.ckan_resource_id)
result = ckan_post(self.ckan_config, 'package_show',
{'id': dataset.ckan_dataset_id})
resources = result['resources']
self.assertEqual(len(resources), 1)
result = ckan_post(self.ckan_config, 'resource_show',
{'id': resource.ckan_resource_id})
self.assertEqual(result['created'],
resource.creation_date.isoformat())
self.assertEqual(result['last_modified'],
resource.modification_date.isoformat())
self.assertEqual(result['name'], u'blurp')
self.assertEqual(result['mimetype'], 'text/plain')
def _check_resource_update(self, cnx, resource):
self.set_description('resource update')
resource.cw_set(data_name=u'gloups')
cnx.commit()
result = ckan_post(self.ckan_config, 'resource_show',
{'id': resource.ckan_resource_id})
self.assertEqual(result['name'], u'gloups')
def _check_resource_delete(self, cnx, dataset, resource):
self.set_description('resource deletion')
ckanid = dataset.ckan_dataset_id
resource_id = resource.ckan_resource_id
resource.cw_delete()
cnx.commit()
result = ckan_post(self.ckan_config, 'resource_show',
{'id': resource_id})
self.assertEqual(result['state'], 'deleted')
......
......@@ -27,14 +27,16 @@ class CKANPostError(Exception):
"""CKAN post action error"""
def ckan_post(config, action, data=None):
def ckan_post(config, action, data=None, files=None):
base = config['ckan-baseurl']
if not base.endswith('/'):
base += '/'
url = urljoin(base, 'api/3/action/' + action)
headers = {'Authorization': config['ckan-api-key'],
'Content-Type': 'application/json'}
resp = requests.post(url, headers=headers, data=json.dumps(data or {}))
headers = {'Authorization': config['ckan-api-key']}
if files is None:
data = json.dumps(data or {})
headers['Content-Type'] = 'application/json'
resp = requests.post(url, headers=headers, data=data, files=files)
try:
jresp = resp.json()
except ValueError:
......@@ -43,7 +45,11 @@ def ckan_post(config, action, data=None):
if resp.ok:
return jresp['result']
else:
error = jresp['error']
try:
error = jresp['error']
except TypeError:
# Sometimes, jresp is not as dict.
error = jresp
raise CKANPostError('action %s failed: %s' % (action, error))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment