| Revision 5 (by dpavlin, 2008/03/15 21:57:40) |
strip html from entry content |
create type blog as (feed text, title text, content text, pubdate timestamptz, author text, link text);
create or replace function getfeed(text)
returns setof blog
language plperlu
as $$
my ($uri) = @_;
use XML::Feed;
my $feed = XML::Feed->parse(URI->new($uri)) or die XML::Feed->errstr;
sub strip_html {
my $t = shift;
$t =~ s/<\/?[^>]+>//gs;
$t =~ s/\s\s+/ /gs;
return $t;
}
for my $entry ($feed->entries) {
return_next({
feed => $feed->title,
title => $entry->title,
content => strip_html( $entry->content->body ),
link => $entry->link,
pubdate => $entry->issued,
author => $entry->author,
});
}
return undef;
$$;
create view my_feeds as
select feed,author,title,content,pubdate,link from getfeed('http://blog.rot13.org/index.xml')
union
select feed,author,title,content,pubdate,link from getfeed('http://saturn.ffzg.hr/noauth/feed/workspace/rot13?category=Recent%20Changes;type=Atom')
;
-- if your terminal isn't iso-8859-2, change this!
set client_encoding = 'iso-8859-2';
-- materialize view
select *
into feeds
from my_feeds
;
select
feed,author,title,substr(content,0,80),pubdate,link
from feeds
order by pubdate desc
limit 30
;