/[Grep]/lib/Grep/Action/AddFeed.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /lib/Grep/Action/AddFeed.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 86 - (show annotations)
Fri Feb 23 21:16:44 2007 UTC (17 years, 2 months ago) by dpavlin
File size: 3199 byte(s)
added hooks to Grep::Source->save to keep useful snippets of html in /tmp/grep (if writable)
1 use strict;
2 use warnings;
3
4 =head1 NAME
5
6 Grep::Action::AddFeed
7
8 =cut
9
10 package Grep::Action::AddFeed;
11 use base qw/Grep::Action::CreateFeed/;
12
13 use Feed::Find;
14 use LWP::UserAgent;
15
16 use Data::Dump qw/dump/;
17
18 =head2 canonicalize_uri
19
20 Replace C<grep>' with C<%s> in URI arguments
21
22 =cut
23
24 sub canonicalize_uri {
25 my $self = shift;
26 my $value = shift;
27 warn "uri: $value";
28 if ($value =~ s/\bgrep\b/%s/) {
29 $self->canonicalization_note( uri => 'Replaced grep with %s' );
30 }
31 return $value;
32 }
33
34 =head2 canonicalize_cookie
35
36 Remove C<Cookie:> header from beginning and replace EOL with space.
37
38 =cut
39
40 # disabled for now
41 sub xx_canonicalize_cookie {
42 my $self = shift;
43 my $value = shift;
44
45 warn "cookie: $value";
46
47 $self->canonicalization_note( uri => 'Removed Cookie: header' )
48 if ($value =~ s/^Cookie:\s+//);
49
50 $self->canonicalization_note( uri => 'Converted EOL to space' )
51 if ($value =~ s/[\n\r]/ /gs);
52
53 return $value;
54 }
55 =head2 take_action
56
57 =cut
58
59 sub take_action {
60 my $self = shift;
61
62 my @ARGS = @_;
63
64 # Custom action code
65
66 my $ua = LWP::UserAgent->new;
67
68 my $cookie = $self->argument_value('cookie');
69 if ($cookie =~ s/{x!(26|3b)}/chr(hex($1))/gei) {
70 $self->argument_value('cookie', $cookie);
71 }
72
73 Jifty->log->debug("using cookie: $cookie");
74 $ua->default_header( 'Cookie' => $cookie );
75
76 my $search_moniker = 'grep';
77
78 my $uri = $self->argument_value('uri');
79 $uri =~ s/{x!(26|3b)}/chr(hex($1))/gei;
80
81 Jifty->log->debug("trying to find feed on $uri");
82
83 my $r = $ua->get( sprintf( $uri, $search_moniker ) );
84
85 return $self->result->error( $r->status_line . " from $uri" ) unless ( $r->is_success );
86
87 my $ct = $r->header('Content-type') or warn "can't get Content-type";
88 my $content = $r->content;
89
90 Jifty->log->debug("got ", length( $content ), " bytes $ct");
91
92 if ( $ct =~ /xml/ ) {
93 Grep::Source->save( 'addfeed.xml', $content );
94 $self->result->message( "Assuming $uri is feed from $ct" );
95 return $self->SUPER::take_action( @ARGS );
96 }
97
98 Grep::Source->save( 'addfeed.html', $content );
99
100 my $base_uri = $uri;
101 $base_uri =~ s!/[^/]+$!!;
102
103 my @feeds = Feed::Find->find_in_html( \$content, $base_uri );
104
105 if (@feeds) {
106
107 Jifty->log->info("found possible feeds: ", dump( @feeds ));
108
109 my @search_feeds = map {
110 my $t = $_;
111 $t =~ s/\b$search_moniker\b/%s/;
112 $t
113 } grep(/\b$search_moniker\b/,@feeds);
114
115
116 if ( my $feed_uri = shift @search_feeds ) {
117
118 $self->result->message('Found ' . @feeds . " feeds, using first: $feed_uri" );
119 $self->argument_value('uri', $feed_uri);
120
121 Jifty->log->debug("calling parent take_action with new uri $feed_uri");
122
123 return $self->SUPER::take_action( @ARGS );
124
125 } else {
126 Jifty->log->debug("found feeds ", dump( @feeds ), " but none of them has search moniker!");
127 }
128 }
129
130 Jifty->log->debug("no feeds found, trying content_class detection");
131
132 my $source = Grep::Source->new();
133
134 if ( my $class = $source->content_class( $content ) ) {
135
136 Jifty->log->debug("$class registred for feed $uri");
137
138 $self->argument_value('source', "$class" );
139 $self->result->message("Found $class scraper for $uri" );
140 return $self->SUPER::take_action( @ARGS );
141
142 } else {
143
144 $self->result->error('No feeds found on supplied URI');
145 return 0;
146
147 }
148
149 }
150
151 1;
152

  ViewVC Help
Powered by ViewVC 1.1.26