Commit bc051843 authored by Nicolas Chauvat's avatar Nicolas Chauvat
Browse files

add sioc parser to enable import/export of blog posts via sioc

parent 4acad691ab73
......@@ -27,7 +27,7 @@ class BlogIFeedAdapter(EntityAdapter):
vtitle=self.entity.dc_title())
class BlogISiocContainerAdapter(EntityAdapter):
__regid__ = 'ISiocContainer'
__regid__ = 'ISIOCContainer'
__select__ = EntityAdapter.__select__ & is_instance('Blog')
def isioc_type(self):
......@@ -84,7 +84,7 @@ class BlogEntryICalendarViewsAdapter(EntityAdapter):
class BlogEntryISiocItemAdapter(EntityAdapter):
__regid__ = 'ISiocItem'
__regid__ = 'ISIOCItem'
__select__ = EntityAdapter.__select__ & is_instance('BlogEntry')
def isioc_content(self):
......
# XML <-> yams equivalence
from cubicweb.xy import xy
xy.add_equivalence('Blog', 'sioc:Weblog')
xy.add_equivalence('BlogEntry', 'sioc:BlogPost')
xy.add_equivalence('BlogEntry title', 'dcterms:title')
xy.add_equivalence('BlogEntry content', 'sioc:content')
# -*- coding: utf-8 -*-
import sys
from datetime import datetime
from lxml import etree
import feedparser
import rdflib
from cubes.datafeed.sobjects import DataFeedParser
SIOC = 'http://rdfs.org/sioc/ns#'
RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
DCTERMS = 'http://purl.org/dc/terms/'
def get_subject(g, pred, obj):
subjects = list(g.subjects(pred, obj))
assert len(subjects) == 1
return subjects[0]
def get_object(g, subj, pred):
objects = list(g.objects(subj, pred))
assert len(objects) == 1
return objects[0]
def parse_blogpost_sioc(url):
g = rdflib.ConjunctiveGraph()
g.parse(url)
rdf_type = rdflib.URIRef(RDF+'type')
sioc_blogpost = rdflib.URIRef(SIOC+'BlogPost')
dcterms_title = rdflib.URIRef(DCTERMS+'title')
sioc_content = rdflib.URIRef(SIOC+'content')
for post, type_, blogpost_ in g.triples((None, rdf_type, sioc_blogpost)):
item = {'uri': unicode(post)}
item['title'] = unicode(get_object(g, post, dcterms_title))
item['content'] = unicode(get_object(g, post, sioc_content))
yield item
def parse_blogpost_rss(url):
feed = feedparser.parse(url)
for entry in feed.entries:
item = {}
item['uri'] = entry.id
item['title'] = entry.title
item['content'] = entry.description
item['creation_date'] = datetime(*entry.date_parsed[:6])
yield item
class BlogPostParser(DataFeedParser):
__abstract__ = True
def process(self, url):
for item in self.parse(url):
euri = self.sget_externaluri(item.pop('uri'))
if euri.same_as:
sys.stdout.write('.')
self.update_blogpost(euri.same_as[0], item)
else:
sys.stdout.write('+')
self.create_blogpost(item, euri)
sys.stdout.flush()
def create_blogpost(self, item, uri):
entity = self._cw.create_entity('BlogEntry', **item)
entity.set_relations(same_as=uri)
return self.update_blogpost(entity, None)
def update_blogpost(self, entity, item):
if item:
entity.set_attributes(**item)
return entity
class BlogPostSiocParser(BlogPostParser):
__regid__ = 'blogpost-sioc'
parse = staticmethod(parse_blogpost_sioc)
class BlogPostRSSParser(BlogPostParser):
__regid__ = 'blogpost-rss'
parse = staticmethod(parse_blogpost_rss)
if __name__ == '__main__':
import sys
from pprint import pprint
name = sys.argv[1]
url = sys.argv[2]
parser = globals()[name]
pprint(list(parser(url)))
# -*- coding: utf-8 -*-
import sys
feeds = rql('Any A WHERE A is DataFeed').entities()
for feed in feeds:
if 'reset' in sys.argv:
feed.set_attributes(latest_retrieval=None)
else:
print '----- processing %r with %s' % (feed.title, feed.parser)
feed.pull_data()
print
commit()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment