/[Grep]/lib/Grep/Source/Feed.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Source/Feed.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 72 - (hide annotations)
Fri Feb 23 09:54:28 2007 UTC (17 years, 3 months ago) by dpavlin
File size: 1367 byte(s)
another great refactoring: added new Source object which implements
searching within feed (which now can be anything as long as it produce fields
which somewhat resamble RSS feed). Source plugins implement just (site or
source format specific) fetching of items. 

Sample implementation of MoinMoin scraper, which fetch full pages from wiki
for results, so it has performance impact on remote wiki, be kind to it.
1 dpavlin 72 # Dobrica Pavlinusic, <dpavlin@rot13.org> 02/22/07 19:42:14 CET
2    
3     use strict;
4     use warnings;
5    
6     package Grep::Source::Feed;
7    
8     use Data::Dump qw/dump/;
9    
10     =head2 fetch
11    
12     Grep::Source::Feed->fetch( $source );
13    
14     =cut
15    
16     sub fetch {
17     my $self = shift;
18    
19     my $parent = shift;
20     my ($feed,$uri) = ($parent->feed, $parent->uri);
21     die "no uri" unless ($uri);
22     die "feed is not a Grep::Model::Feed but ", ref $feed unless $feed->isa('Grep::Model::Feed');
23    
24     my $ua = LWP::UserAgent->new;
25     $ua->default_header( 'Cookie' => $feed->cookie );
26     my $r = $ua->get( $uri );
27     die $feed->title . " returned " . $r->status_line . " for $uri\n" unless ( $r->is_success );
28     my $content = $r->content;
29    
30     die "No content returned from $uri\n" unless length( $content ) > 1;
31    
32     my $xml_feed = XML::Feed->parse( \$content )
33     or die $feed->title, " returned ", XML::Feed->errstr, "\n";
34    
35     warn "getting entries from ", $xml_feed->title, "\n";
36    
37     for my $entry ($xml_feed->entries) {
38    
39     $parent->add_record(
40     in_feed => $feed,
41     title => $entry->title,
42     link => $entry->link,
43     content => $entry->content->body,
44     summary => $entry->summary->body,
45     category => $entry->category,
46     author => $entry->author,
47     issued => $entry->issued ? $entry->issued->strftime("%Y-%m-%d %H:%M:%S") : undef,
48     modified => $entry->modified ? $entry->modified->strftime("%Y-%m-%d %H:%M:%S") : undef,
49     );
50    
51     }
52    
53     }
54    
55     1;

  ViewVC Help
Powered by ViewVC 1.1.26