Skip to content
Snippets Groups Projects
Commit 72715685d5e3 authored by Sylvain Thénault's avatar Sylvain Thénault
Browse files

make feedparser,rdflib,datafeed only recommands

parent 852e7c11f252
No related branches found
No related tags found
No related merge requests found
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
from datetime import datetime from datetime import datetime
from lxml.html import fromstring, tostring from lxml.html import fromstring, tostring
import feedparser
import rdflib try:
import feedparser
except ImportError:
feedparser = None
...@@ -10,3 +13,10 @@ ...@@ -10,3 +13,10 @@
from cubes.datafeed.sobjects import DataFeedParser try:
import rdflib
except ImportError:
rdflib = None
else:
RDF = rdflib.Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
SIOC = rdflib.Namespace('http://rdfs.org/sioc/ns#')
DCTERMS = rdflib.Namespace('http://purl.org/dc/terms/')
...@@ -12,7 +22,8 @@ ...@@ -12,7 +22,8 @@
RDF = rdflib.Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') try:
SIOC = rdflib.Namespace('http://rdfs.org/sioc/ns#') from cubes.datafeed.sobjects import DataFeedParser
DCTERMS = rdflib.Namespace('http://purl.org/dc/terms/') except ImportError:
DataFeedParser = None
def get_subject(g, pred, obj): def get_subject(g, pred, obj):
subjects = list(g.subjects(pred, obj)) subjects = list(g.subjects(pred, obj))
...@@ -135,7 +146,27 @@ ...@@ -135,7 +146,27 @@
AVATAR_CACHE[screen_name] = user['profile_image_url'] AVATAR_CACHE[screen_name] = user['profile_image_url']
return AVATAR_CACHE[screen_name] return AVATAR_CACHE[screen_name]
class BlogPostParser(DataFeedParser): if DataFeedParser is not None:
__abstract__ = True class BlogPostParser(DataFeedParser):
entity_type = 'BlogEntry' __abstract__ = True
entity_type = 'BlogEntry'
def process(self, url):
for item in self.parse(url):
author = item.pop('author', None)
avatar = item.pop('avatar', None)
euri = self.sget_entity('ExternalUri', uri=item.pop('uri'))
if euri.same_as:
sys.stdout.write('.')
post = self.update_blogpost(euri.same_as[0], item)
else:
sys.stdout.write('+')
post = self.create_blogpost(item, euri)
if author:
account = self.sget_entity('UserAccount', name=author)
self.sget_relation(post.eid, 'has_creator', account.eid)
if avatar:
auri = self.sget_entity('ExternalUri', uri=avatar)
self.sget_relation(account.eid, 'has_avatar', auri.eid)
sys.stdout.flush()
...@@ -141,20 +172,10 @@ ...@@ -141,20 +172,10 @@
def process(self, url): def create_blogpost(self, item, uri):
for item in self.parse(url): entity = self._cw.create_entity(self.entity_type, **item)
author = item.pop('author', None) entity.set_relations(same_as=uri)
avatar = item.pop('avatar', None) return entity
euri = self.sget_entity('ExternalUri', uri=item.pop('uri'))
if euri.same_as: def update_blogpost(self, entity, item):
sys.stdout.write('.') entity.set_attributes(**item)
post = self.update_blogpost(euri.same_as[0], item) return entity
else:
sys.stdout.write('+')
post = self.create_blogpost(item, euri)
if author:
account = self.sget_entity('UserAccount', name=author)
self.sget_relation(post.eid, 'has_creator', account.eid)
if avatar:
auri = self.sget_entity('ExternalUri', uri=avatar)
self.sget_relation(account.eid, 'has_avatar', auri.eid)
sys.stdout.flush()
...@@ -160,10 +181,6 @@ ...@@ -160,10 +181,6 @@
def create_blogpost(self, item, uri): if rdflib is not None:
entity = self._cw.create_entity(self.entity_type, **item) class BlogPostSiocParser(BlogPostParser):
entity.set_relations(same_as=uri) __regid__ = 'blogpost-sioc'
return entity parse = staticmethod(parse_blogpost_sioc)
def update_blogpost(self, entity, item):
entity.set_attributes(**item)
return entity
...@@ -169,5 +186,6 @@ ...@@ -169,5 +186,6 @@
class BlogPostSiocParser(BlogPostParser): if feedparser is not None:
__regid__ = 'blogpost-sioc' class BlogPostRSSParser(BlogPostParser):
parse = staticmethod(parse_blogpost_sioc) __regid__ = 'blogpost-rss'
parse = staticmethod(parse_blogpost_rss)
...@@ -173,5 +191,6 @@ ...@@ -173,5 +191,6 @@
class BlogPostRSSParser(BlogPostParser): class MicroBlogPostRSSParser(BlogPostParser):
__regid__ = 'blogpost-rss' __regid__ = 'microblogpost-rss'
parse = staticmethod(parse_blogpost_rss) entity_type = 'MicroBlogEntry'
parse = staticmethod(parse_microblogpost_rss)
...@@ -177,8 +196,4 @@ ...@@ -177,8 +196,4 @@
class MicroBlogPostRSSParser(BlogPostParser):
__regid__ = 'microblogpost-rss'
entity_type = 'MicroBlogEntry'
parse = staticmethod(parse_microblogpost_rss)
if __name__ == '__main__': if __name__ == '__main__':
import sys import sys
...@@ -189,4 +204,3 @@ ...@@ -189,4 +204,3 @@
parser = globals()[name] parser = globals()[name]
pprint(list(parser(url))) pprint(list(parser(url)))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment