/[Grep]/lib/Grep/Action/AddFeed.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Action/AddFeed.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 102 - (hide annotations)
Sun Mar 4 22:16:23 2007 UTC (17 years, 3 months ago) by dpavlin
File size: 3226 byte(s)
removed all debug warn(s) or move them to $self->log->debug
1 dpavlin 21 use strict;
2     use warnings;
3    
4     =head1 NAME
5    
6     Grep::Action::AddFeed
7    
8     =cut
9    
10     package Grep::Action::AddFeed;
11     use base qw/Grep::Action::CreateFeed/;
12    
13     use Feed::Find;
14     use LWP::UserAgent;
15 dpavlin 86
16 dpavlin 21 use Data::Dump qw/dump/;
17    
18     =head2 canonicalize_uri
19    
20     Replace C<grep>' with C<%s> in URI arguments
21    
22     =cut
23    
24     sub canonicalize_uri {
25     my $self = shift;
26     my $value = shift;
27 dpavlin 102 $self->log->debug("canonicalize uri $value");
28 dpavlin 26 if ($value =~ s/\bgrep\b/%s/) {
29 dpavlin 21 $self->canonicalization_note( uri => 'Replaced grep with %s' );
30     }
31     return $value;
32     }
33    
34 dpavlin 26 =head2 canonicalize_cookie
35    
36     Remove C<Cookie:> header from beginning and replace EOL with space.
37    
38     =cut
39    
40     # disabled for now
41     sub xx_canonicalize_cookie {
42     my $self = shift;
43     my $value = shift;
44    
45 dpavlin 102 #warn "cookie: $value";
46 dpavlin 26
47     $self->canonicalization_note( uri => 'Removed Cookie: header' )
48     if ($value =~ s/^Cookie:\s+//);
49    
50     $self->canonicalization_note( uri => 'Converted EOL to space' )
51     if ($value =~ s/[\n\r]/ /gs);
52    
53     return $value;
54     }
55 dpavlin 21 =head2 take_action
56    
57     =cut
58    
59     sub take_action {
60     my $self = shift;
61    
62     my @ARGS = @_;
63    
64     # Custom action code
65    
66     my $ua = LWP::UserAgent->new;
67    
68     my $cookie = $self->argument_value('cookie');
69     if ($cookie =~ s/{x!(26|3b)}/chr(hex($1))/gei) {
70     $self->argument_value('cookie', $cookie);
71     }
72    
73     Jifty->log->debug("using cookie: $cookie");
74     $ua->default_header( 'Cookie' => $cookie );
75    
76     my $search_moniker = 'grep';
77    
78     my $uri = $self->argument_value('uri');
79     $uri =~ s/{x!(26|3b)}/chr(hex($1))/gei;
80    
81     Jifty->log->debug("trying to find feed on $uri");
82    
83 dpavlin 69 my $r = $ua->get( sprintf( $uri, $search_moniker ) );
84 dpavlin 21
85 dpavlin 69 return $self->result->error( $r->status_line . " from $uri" ) unless ( $r->is_success );
86    
87 dpavlin 73 my $ct = $r->header('Content-type') or warn "can't get Content-type";
88     my $content = $r->content;
89    
90     Jifty->log->debug("got ", length( $content ), " bytes $ct");
91    
92     if ( $ct =~ /xml/ ) {
93 dpavlin 86 Grep::Source->save( 'addfeed.xml', $content );
94 dpavlin 73 $self->result->message( "Assuming $uri is feed from $ct" );
95 dpavlin 69 return $self->SUPER::take_action( @ARGS );
96     }
97    
98 dpavlin 86 Grep::Source->save( 'addfeed.html', $content );
99    
100 dpavlin 78 my $base_uri = $uri;
101     $base_uri =~ s!/[^/]+$!!;
102 dpavlin 69
103 dpavlin 78 my @feeds = Feed::Find->find_in_html( \$content, $base_uri );
104    
105 dpavlin 21 if (@feeds) {
106    
107     Jifty->log->info("found possible feeds: ", dump( @feeds ));
108    
109 dpavlin 73 my @search_feeds = map {
110 dpavlin 21 my $t = $_;
111 dpavlin 26 $t =~ s/\b$search_moniker\b/%s/;
112 dpavlin 21 $t
113 dpavlin 26 } grep(/\b$search_moniker\b/,@feeds);
114 dpavlin 21
115    
116 dpavlin 73 if ( my $feed_uri = shift @search_feeds ) {
117 dpavlin 21
118 dpavlin 73 $self->result->message('Found ' . @feeds . " feeds, using first: $feed_uri" );
119     $self->argument_value('uri', $feed_uri);
120 dpavlin 21
121 dpavlin 73 Jifty->log->debug("calling parent take_action with new uri $feed_uri");
122 dpavlin 21
123 dpavlin 73 return $self->SUPER::take_action( @ARGS );
124 dpavlin 21
125 dpavlin 73 } else {
126     Jifty->log->debug("found feeds ", dump( @feeds ), " but none of them has search moniker!");
127     }
128     }
129    
130     Jifty->log->debug("no feeds found, trying content_class detection");
131    
132     my $source = Grep::Source->new();
133    
134     if ( my $class = $source->content_class( $content ) ) {
135    
136     Jifty->log->debug("$class registred for feed $uri");
137    
138 dpavlin 80 $self->argument_value('source', "$class" );
139 dpavlin 73 $self->result->message("Found $class scraper for $uri" );
140 dpavlin 58 return $self->SUPER::take_action( @ARGS );
141 dpavlin 21
142     } else {
143    
144     $self->result->error('No feeds found on supplied URI');
145     return 0;
146 dpavlin 73
147 dpavlin 21 }
148    
149     }
150    
151     1;
152    

  ViewVC Help
Powered by ViewVC 1.1.26