Revision 5 (by dpavlin, 2008/03/15 21:57:40) strip html from entry content
create type blog as (feed text, title text, content text, pubdate timestamptz, author text, link text);

create or replace function getfeed(text)
returns setof blog
language plperlu
as $$
	my ($uri) = @_;

	use XML::Feed;

	my $feed = XML::Feed->parse(URI->new($uri)) or die XML::Feed->errstr;

	sub strip_html {
		my $t = shift;
		$t =~ s/<\/?[^>]+>//gs;
		$t =~ s/\s\s+/ /gs;
		return $t;
	}

	for my $entry ($feed->entries) {
		return_next({
			feed	=> $feed->title,
			title	=> $entry->title,
			content	=> strip_html( $entry->content->body ),
			link	=> $entry->link,
			pubdate	=> $entry->issued,
			author	=> $entry->author,
		});
	}

	return undef;
$$;

create view my_feeds as
select feed,author,title,content,pubdate,link from getfeed('http://blog.rot13.org/index.xml')
union
select feed,author,title,content,pubdate,link from getfeed('http://saturn.ffzg.hr/noauth/feed/workspace/rot13?category=Recent%20Changes;type=Atom')
;

-- if your terminal isn't iso-8859-2, change this!
set client_encoding  = 'iso-8859-2';

-- materialize view
select *
into feeds
from my_feeds
;

select
	feed,author,title,substr(content,0,80),pubdate,link
from feeds
order by pubdate desc
limit 30
;