--- trunk/bin/irc-logger.pl 2008/03/09 22:12:06 111 +++ trunk/bin/irc-logger.pl 2008/03/14 17:44:23 129 @@ -2,7 +2,7 @@ use strict; $|++; -use POE qw(Component::IRC Component::Server::HTTP); +use POE qw(Component::IRC Component::Server::HTTP Component::Client::HTTP); use HTTP::Status; use DBI; use Regexp::Common qw /URI/; @@ -18,6 +18,7 @@ use Carp qw/confess/; use XML::Feed; use DateTime::Format::Flexible; +use Encode; =head1 NAME @@ -47,6 +48,8 @@ ## CONFIG +my $debug = 0; + my $irc_config = { nick => 'irc-logger', server => 'irc.freenode.net', @@ -61,9 +64,9 @@ my $CHANNEL = '#razmjenavjestina'; if ( $HOSTNAME =~ m/llin/ ) { - $irc_config->{nick} = 'irc-logger-dev'; + $irc_config->{nick} = 'irc-logger-llin'; # $irc_config = { -# nick => 'irc-logger-dev', +# nick => 'irc-logger-llin', # server => 'localhost', # port => 6668, # }; @@ -75,7 +78,7 @@ my @channels = ( $CHANNEL ); -warn "# config = ", dump( $irc_config ), $/; +warn "## config = ", dump( $irc_config ) if $debug; my $NICK = $irc_config->{nick} or die "no nick?"; @@ -106,18 +109,20 @@ GetOptions( 'import-dircproxy:s' => \$import_dircproxy, 'log:s' => \$log_path, + 'debug!' => \$debug, ); #$SIG{__DIE__} = sub { # confess "fatal error"; #}; -open(STDOUT, '>', $log_path) || warn "can't redirect log to $log_path: $!"; - sub _log { - print strftime($TIMESTAMP,localtime()) . ' ' . join(" ",@_) . $/; + print strftime($TIMESTAMP,localtime()) . ' ' . join(" ",map { ref($_) ? dump( $_ ) : $_ } @_) . $/; } +open(STDOUT, '>', $log_path) && warn "log to $log_path: $!\n"; + + # HTML formatters my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); @@ -160,6 +165,13 @@ }, }; +# POE IRC +my $poe_irc = POE::Component::IRC->spawn( %$irc_config ) or + die "can't start ", dump( $irc_config ), ": $!"; + +my $irc = $poe_irc->session_id(); +_log "IRC session_id $irc"; + my $dbh = DBI->connect($DSN,"","", { RaiseError => 1, AutoCommit => 1 }) || die $DBI::errstr; $dbh->do( qq{ set client_encoding = 'UTF-8' } ); @@ -593,7 +605,7 @@ $a->{me} ||= 0; $a->{time} ||= strftime($TIMESTAMP,localtime()); - _log + _log "ARCHIVE", $a->{channel}, " ", $a->{me} ? "***" . $a->{nick} : "<" . $a->{nick} . ">", " " . $a->{message}; @@ -643,27 +655,47 @@ my $_stat; +POE::Component::Client::HTTP->spawn( + Alias => 'rss-fetch', + Timeout => 30, +); + +=head2 rss_parse_xml + + rss_parse_xml({ + url => 'http://www.example.com/rss', + send_rss_msgs => 42, + }); + +=cut + +sub rss_parse_xml { + my ($kernel,$args) = @_; -sub rss_fetch { - my ($args) = @_; + warn "## rss_parse_xml ",dump( @_ ) if $debug; # how many messages to send out when feed is seen for the first time? - my $send_rss_msgs = 1; + my $send_rss_msgs = $args->{send_rss_msgs}; + $send_rss_msgs = 1 if ! defined $send_rss_msgs; - _log "RSS fetch", $args->{url}; + warn "## RSS fetch first $send_rss_msgs items from", $args->{url} if $debug; - my $feed = XML::Feed->parse(URI->new( $args->{url} )); + my $feed = XML::Feed->parse( \$args->{xml} ); if ( ! $feed ) { - _log("can't fetch RSS ", $args->{url}); + _log "can't fetch RSS ", $args->{url}, XML::Feed->errstr; return; } + $_stat->{rss}->{url2link}->{ $args->{url} } = $feed->link; + my ( $total, $updates ) = ( 0, 0 ); for my $entry ($feed->entries) { $total++; + my $seen_times = $_stat->{rss}->{seen}->{$args->{channel}}->{$feed->link}->{$entry->id}++; # seen allready? - next if $_stat->{rss}->{seen}->{$args->{channel}}->{$feed->link}->{$entry->id}++ > 0; + warn "## $seen_times ",$entry->id if $debug; + next if $seen_times > 0; sub prefix { my ($txt,$var) = @_; @@ -689,18 +721,26 @@ $msg .= prefix( ' | ' , $entry->title ); $msg .= prefix( ' | ' , $link ); # $msg .= prefix( ' id ' , $entry->id ); + if ( my $tags = $entry->category ) { + $tags =~ s!^\s+!!; + $tags =~ s!\s*$! !; + $tags =~ s!,?\s+!// !g; + $msg .= prefix( ' ' , $tags ); + } - if ( $args->{kernel} && $send_rss_msgs ) { + if ( $seen_times == 0 && $send_rss_msgs ) { $send_rss_msgs--; if ( ! $args->{private} ) { # FIXME bug! should be save_message -# save_message( channel => $args->{channel}, me => 1, nick => $NICK, message => $msg ); - $sth_insert_log->execute( $args->{channel}, 1, $NICK, $msg, 'now()' ); + save_message( channel => $args->{channel}, me => 1, nick => $NICK, message => $msg ); +# $sth_insert_log->execute( $args->{channel}, 1, $NICK, $msg, 'now()' ); } my ( $type, $to ) = ( 'notice', $args->{channel} ); ( $type, $to ) = ( 'privmsg', $args->{nick} ) if $args->{private}; - _log(">> $type $to |", $msg); - $args->{kernel}->post( irc => $type => $to, $msg ); + + _log(">> RSS $type to $to:", $msg); + $kernel->post( $irc => $type => $to => $msg ); + $updates++; } } @@ -710,29 +750,37 @@ $sql .= qq{where id = } . $args->{id}; eval { $dbh->do( $sql ) }; - _log "RSS got $total items of which $updates new"; + _log "RSS $updates/$total new items from", $args->{url}; return $updates; } sub rss_fetch_all { - my $kernel = shift; + my ( $kernel, $send_rss_msgs ) = @_; + warn "## rss_fetch_all -- send_rss_msgs: $send_rss_msgs\n" if $debug; my $sql = qq{ select id, url, name, channel, nick, private from feeds where active is true }; # limit to newer feeds only if we are not sending messages out - $sql .= qq{ and last_update + delay < now() } if $kernel; + $sql .= qq{ and last_update + delay < now() } if defined ( $_stat->{rss}->{fetch} ); my $sth = $dbh->prepare( $sql ); $sth->execute(); warn "# ",$sth->rows," active RSS feeds\n"; my $count = 0; while (my $row = $sth->fetchrow_hashref) { - $row->{kernel} = $kernel if $kernel; - $count += rss_fetch( $row ); + $row->{send_rss_msgs} = $send_rss_msgs if defined $send_rss_msgs; + $_stat->{rss}->{fetch}->{ $row->{url} } = $row; + $kernel->post( + 'rss-fetch', + 'request', + 'rss_response', + HTTP::Request->new( GET => $row->{url} ), + ); + warn "## queued rss-fetch ", dump( $row ) if $debug; } - return "OK, fetched $count posts from " . $sth->rows . " feeds"; + return "OK, scheduled " . $sth->rows . " feeds for refresh"; } @@ -740,26 +788,13 @@ my $kernel = shift; $_stat->{rss}->{last_poll} ||= time(); my $dt = time() - $_stat->{rss}->{last_poll}; - warn "## rss_check_updates $dt > $rss_min_delay\n"; if ( $dt > $rss_min_delay ) { + warn "## rss_check_updates $dt > $rss_min_delay\n"; $_stat->{rss}->{last_poll} = time(); _log rss_fetch_all( $kernel ); } } -# seed rss seen cache so we won't send out all items on startup -_log rss_fetch_all; - -# -# POE handing part -# - -my $poe_irc = POE::Component::IRC->spawn( %$irc_config ) or - die "can't start ", dump( $irc_config ), ": $!"; - -my $irc = $poe_irc->session_id(); -_log "session_id $irc"; - POE::Session->create( inline_states => { _start => sub { $_[KERNEL]->post( $irc => register => 'all' ); @@ -770,12 +805,13 @@ my $poco_object = $sender->get_heap(); _log "connected to",$poco_object->server_name(); $kernel->post( $sender => join => $_ ) for @channels; + # seen RSS cache, so don't send out messages + _log rss_fetch_all( $kernel, 0 ); undef; }, - irc_255 => sub { # server is done blabbing - $_[KERNEL]->post( $irc => join => $CHANNEL); - $_[KERNEL]->post( $irc => privmsg => 'nickserv', "IDENTIFY $NICK" ); - }, +# irc_255 => sub { # server is done blabbing +# $_[KERNEL]->post( $irc => join => $CHANNEL); +# }, irc_public => sub { my $kernel = $_[KERNEL]; my $nick = (split /!/, $_[ARG0])[0]; @@ -825,6 +861,7 @@ my $nick = (split /!/, $_[ARG0])[0]; my $msg = $_[ARG2]; my $channel = $_[ARG1]->[0]; + warn "# ARG = ",dump( @_[ARG0,ARG1,ARG2] ) if $debug; my $res = "unknown command '$msg', try /msg $NICK help!"; my @out; @@ -953,10 +990,6 @@ } } elsif ($msg =~ m/^rss-update/) { $res = rss_fetch_all( $_[KERNEL] ); - } elsif ($msg =~ m/^rss-clean/) { - $_stat->{rss} = undef; - $dbh->do( qq{ update feeds set last_update = now() - delay } ); - $res = "OK, cleaned RSS cache"; } elsif ($msg =~ m/^rss-list/) { my $sth = $dbh->prepare(qq{ select url,name,last_update,active,channel,nick,private from feeds }); $sth->execute; @@ -964,7 +997,7 @@ $_[KERNEL]->post( $irc => privmsg => $nick, join(' | ',@row) ); } $res = ''; - } elsif ($msg =~ m!^rss-(add|remove|stop|start)(?:-(private))?\s+(http://\S+)\s*(.*)!) { + } elsif ($msg =~ m!^rss-(add|remove|stop|start|clean)(?:-(private))?\s+(http://\S+)\s*(.*)!) { my ( $command, $sub, $url, $arg ) = ( $1,$2,$3,$4 ); my $channel = $1 if ( $arg =~ s/\s*(#\S+)\s*// ); @@ -972,9 +1005,10 @@ my $sql = { add => qq{ insert into feeds (url,name,channel,nick,private) values (?,?,?,?,?) }, -# remove => qq{ delete from feeds where url = ? and name = ? }, + remove => qq{ delete from feeds where url = ? and nick = ? }, start => qq{ update feeds set active = true where url = ? }, stop => qq{ update feeds set active = false where url = ? }, + clean => qq{ update feeds set last_update = now() - delay where url = ? }, }; if ( $command eq 'add' && ! $channel ) { @@ -984,17 +1018,41 @@ my @data = ( $url ); if ( $command eq 'add' ) { push @data, ( $arg, $channel, $nick, $sub eq 'private' ? 1 : 0 ); + } elsif ( $command eq 'remove' ) { + push @data, $nick; } warn "## $command SQL $q with ",dump( @data ),"\n"; eval { $sth->execute( @data ) }; if ($@) { $res = "ERROR: $@"; } else { - $res = "OK, RSS [$command|$sub|$url|$arg]"; + $res = "OK, RSS executed $command" . + ( $sub ? "-$sub " : ' ' ) . + ( $channel ? "on $channel " : '' ) . + "url $url"; + if ( $command eq 'clean' ) { + my $seen = $_stat->{rss}->{seen} || die "no seen?"; + my $want_link = $_stat->{rss}->{url2link}->{$url} || warn "no url2link($url)"; + foreach my $c ( keys %$seen ) { + my $c_hash = $seen->{$c} || die "no seen->{$c}"; + die "not HASH with rss links but ", dump($c_hash) unless ref($c_hash) eq 'HASH'; + foreach my $link ( keys %$c_hash ) { + next unless $link eq $want_link; + _log "RSS removed seen $c $url $link"; + } + } + } elsif ( $command eq 'add' ) { + rss_fetch_all( $_[KERNEL] ); + } } } else { $res = "ERROR: don't know what to do with: $msg"; } + } elsif ($msg =~ m/^rss-clean/) { + # this makes sense because we didn't catch rss-clean http://... before! + $_stat->{rss} = undef; + $dbh->do( qq{ update feeds set last_update = now() - delay } ); + $res = rss_fetch_all( $_[KERNEL] ); } if ($res) { @@ -1013,13 +1071,22 @@ irc_376 => sub { _log "<< motd", $_[ARG0], "end"; }, +# irc_433 => sub { +# print "# irc_433: ",$_[ARG1], "\n"; +# warn "## indetify $NICK\n"; +# $_[KERNEL]->post( $irc => privmsg => 'nickserv', "IDENTIFY $NICK" ); +# }, +# irc_451 # please register irc_477 => sub { _log "<< irc_477: ",$_[ARG1]; - $_[KERNEL]->post( $irc => privmsg => 'nickserv', "register $NICK" ); + _log ">> IDENTIFY $NICK"; + $_[KERNEL]->post( $irc => privmsg => 'NickServ', "IDENTIFY $NICK" ); }, irc_505 => sub { _log "<< irc_505: ",$_[ARG1]; - $_[KERNEL]->post( $irc => privmsg => 'nickserv', "register $NICK" ); + _log ">> register $NICK"; + $_[KERNEL]->post( $irc => privmsg => 'NickServ', "register $NICK" ); +# $_[KERNEL]->post( $irc => privmsg => 'nickserv', "IDENTIFY $NICK" ); # $_[KERNEL]->post( $irc => privmsg => 'nickserv', "set hide email on" ); # $_[KERNEL]->post( $irc => privmsg => 'nickserv', "set email dpavlin\@rot13.org" ); }, @@ -1036,20 +1103,21 @@ sleep($sleep_on_error); $_[KERNEL]->post( $irc => connect => {} ); }, -# irc_433 => sub { -# print "# irc_433: ",$_[ARG1], "\n"; -# warn "## indetify $NICK\n"; -# $_[KERNEL]->post( $irc => privmsg => 'nickserv', "IDENTIFY $NICK" ); -# }, -# irc_451 # please register irc_notice => sub { - _log "<< notice",$_[ARG0]; - if ( $_[ARG0] =~ m!/msg\s+NickServ\s+IDENTIFY!i ) { + _log "<< notice from ", $_[ARG0], $_[ARG1], $_[ARG2]; + my $m = $_[ARG2]; + if ( $m =~ m!/msg.*(NickServ).*(IDENTIFY)!i ) { + _log ">> suggested to $1 $2"; + $_[KERNEL]->post( $irc => privmsg => $1, "$2 $NICK" ); + } elsif ( $m =~ m!\Q$NICK\E.*registered!i ) { + _log ">> registreted, so IDENTIFY"; $_[KERNEL]->post( $irc => privmsg => 'nickserv', "IDENTIFY $NICK" ); + } else { + warn "## ignore $m\n" if $debug; } }, irc_snotice => sub { - _log "<< snotice",$_[ARG0]; + _log "<< snotice", $_[ARG0]; #dump( $_[ARG0],$_[ARG1], $_[ARG2] ); if ( $_[ARG0] =~ m!/(QUOTE)\s+(PASS\s+\d+)!i ) { warn ">> $1 | $2\n"; $_[KERNEL]->post( $irc => lc($1) => $2); @@ -1064,11 +1132,26 @@ ""; 0; # false for signals }, + rss_response => sub { + my ($request_packet, $response_packet) = @_[ARG0, ARG1]; + my $request_object = $request_packet->[0]; + my $response_object = $response_packet->[0]; + + my $row = delete( $_stat->{rss}->{fetch}->{ $request_object->uri } ); + if ( $row ) { + $row->{xml} = $response_object->content; + rss_parse_xml( $_[KERNEL], $row ); + } else { + warn "## can't find rss->fetch for ", $request_object->uri; + } + }, }, ); # http server +_log "WEB archive at $url"; + my $httpd = POE::Component::Server::HTTP->new( Port => $http_port, PreHandler => { @@ -1115,7 +1198,7 @@ $style .= ".col-${max_color} { background: $c }\n"; $max_color++; } -warn "defined $max_color colors for users...\n"; +_log "WEB defined $max_color colors for users..."; sub root_handler { my ($request, $response) = @_; @@ -1237,7 +1320,7 @@ $feed->add_entry( $feed_entry ); } else { - _log "unknown rss request $r_url"; + _log "WEB unknown rss request $r_url"; $feed->title( "unknown $r_url" ); foreach my $c ( @commands ) { my $feed_entry = XML::Feed::Entry->new($type); @@ -1340,7 +1423,7 @@

See history of all messages.

}; - $response->content( $html ); + $response->content( decode('utf-8',$html) ); warn "<< ", $request->method, " ", $request->uri, " created ", length($html), " bytes\n"; return RC_OK; }