# copyright 2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved. # contact http://www.logilab.fr -- mailto:contact@logilab.fr # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 2.1 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more # details. # # You should have received a copy of the GNU Lesser General Public License along # with this program. If not, see . """cubicweb-ckanpublish entity's classes""" import re import unicodedata from cubicweb.predicates import relation_possible from cubicweb.view import EntityAdapter from cubes.ckanpublish.utils import ckan_post def slugify(value): """Converts to lowercase, removes non-word characters (alphanumerics and underscores) and converts spaces to hyphens. Also strips leading and trailing whitespace. Adapted from django.utils.text and novaclient.utils. """ if not isinstance(value, unicode): value = unicode(value) value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') value = unicode(re.sub('[^\w\s-]', '', value).strip().lower()) return re.sub('[-\s]+', '-', value) class CKANPublishableAdapter(EntityAdapter): """Adapter for entity that can be mapped to a CKAN dataset""" __regid__ = 'ICKANPublishable' __select__ = (EntityAdapter.__select__ & relation_possible('ckan_dataset_id', role='subject')) @property def ckan_name(self): """name field suitable for CKAN (must be unique, hence the eid prefix) """ mainattr = self.entity.e_schema.main_attribute().type name = getattr(self.entity, mainattr) return str(self.entity.eid) + '-' + slugify(name).lower() def ckan_notes(self): """Build the CKAN dataset notes attribute This contains the entity description along with the list of related columns """ if 'description' not in self.entity.e_schema.subject_relations(): return # XXX CKAN uses Markdown syntax for notes field. return self.entity.printable_value('description', format='text/plain') def ckan_package_resources(self): """Return the list of resources URL for dataset""" data = {'id': self.entity.ckan_dataset_id} res = ckan_post(self._cw.vreg.config, 'package_show', data) if res: return [d['url'] for d in res['resources']] return [] def ckan_get_organization_id(self, orgname): """Retrieve the ID of an organization given its name""" data = {'organizations': [orgname], 'all_fields': True} res = ckan_post(self._cw.vreg.config, 'organization_list', data) if res: return res[0]['id'] else: raise Exception('no organization named %s in CKAN instance' % orgname) def dataset_title(self): """Title of the CKAN dataset""" return self.entity.dc_title() def ckan_data(self): """Return a dict with all data to build a CKAN dataset from entity""" data = {'name': self.ckan_name, 'title': self.dataset_title(), 'notes': self.ckan_notes(), 'maintainer': None, 'maintainer_email': None, } orgname = self._cw.vreg.config.get('ckan-organization') data['owner_org'] = self.ckan_get_organization_id(orgname) maintainer = self.dataset_maintainer() if maintainer: data['maintainer'] = maintainer.dc_long_title() if maintainer.primary_email: data['maintainer_email'] = maintainer.primary_email[0].address data['tags'] = list(self.dataset_tags()) data['extras'] = list(self.dataset_extras()) return data def dataset_extras(self): """Extra fields for the dataset""" return [] def dataset_tags(self): """Yield tag data for entity""" if self.entity.e_schema.has_relation('tags', role='object'): for tag in self.entity.reverse_tags: yield {'name': slugify(tag.name)} def dataset_maintainer(self): """May return a CWUser entity corresponding to the maintainer of dataset-like entity. """ return None def dataset_resources(self): """May return a list of entities adaptable as IDownloadable to be set as resources of the CKAN dataset. """ return [] def ckan_resources(self): """Yield dicts of CKAN dataset resource info""" resources_url = self.ckan_package_resources() for entity in self.dataset_resources(): adapted = entity.cw_adapt_to('IDownloadable') if adapted is None: self.warning( 'invalid resource %r, could not adapt to IDownloadable', entity) continue url = adapted.download_url() if url in resources_url: self.info('skipping resource %s and it seems to be already ' 'on CKAN side', entity) continue yield {'url': url, 'name': adapted.download_file_name(), 'mimetype': adapted.download_content_type(), 'created': str(entity.creation_date), 'last_modified': str(entity.modification_date), }