--- trunk/bin/irc-logger.pl 2008/03/14 14:45:04 123 +++ trunk/bin/irc-logger.pl 2008/04/01 19:04:32 132 @@ -18,8 +18,6 @@ use Carp qw/confess/; use XML::Feed; use DateTime::Format::Flexible; -use IPC::DirQueue; -use File::Slurp; use Encode; =head1 NAME @@ -59,8 +57,6 @@ ircname => 'Anna the bot: try /msg irc-logger help', }; -my $queue_dir = './queue'; - my $HOSTNAME = `hostname -f`; chomp($HOSTNAME); @@ -113,7 +109,6 @@ GetOptions( 'import-dircproxy:s' => \$import_dircproxy, 'log:s' => \$log_path, - 'queue:s' => \$queue_dir, 'debug!' => \$debug, ); @@ -127,14 +122,6 @@ open(STDOUT, '>', $log_path) && warn "log to $log_path: $!\n"; -# queue - -if ( ! -d $queue_dir ) { - warn "## creating queue directory $queue_dir"; - mkdir $queue_dir or die "can't create queue directory $queue_dir: $!"; -} - -my $dq = IPC::DirQueue->new({ dir => $queue_dir }); # HTML formatters @@ -618,12 +605,13 @@ $a->{me} ||= 0; $a->{time} ||= strftime($TIMESTAMP,localtime()); - _log + _log "ARCHIVE", $a->{channel}, " ", $a->{me} ? "***" . $a->{nick} : "<" . $a->{nick} . ">", " " . $a->{message}; - $sth_insert_log->execute($a->{channel}, $a->{me}, $a->{nick}, $a->{message}, $a->{time}); + eval { $sth_insert_log->execute($a->{channel}, $a->{me}, $a->{nick}, $a->{message}, $a->{time}); }; + _log "ERROR: can't archive ", $a->{message} if $@; add_tag( id => $dbh->last_insert_id(undef,undef,"log",undef), %$a ); } @@ -673,15 +661,25 @@ Timeout => 30, ); +=head2 rss_parse_xml + + rss_parse_xml({ + url => 'http://www.example.com/rss', + send_rss_msgs => 42, + }); + +=cut + sub rss_parse_xml { - my ($args) = @_; + my ($kernel,$args) = @_; - warn "## rss_parse_xml ",dump( @_ ) if $debug; + warn "## rss_parse_xml ",dump( $args ) if $debug; # how many messages to send out when feed is seen for the first time? - my $send_rss_msgs = 1; + my $send_rss_msgs = $args->{send_rss_msgs}; + $send_rss_msgs = 1 if ! defined $send_rss_msgs; - _log "RSS fetch", $args->{url}; + warn "## RSS fetch first $send_rss_msgs items from", $args->{url} if $debug; my $feed = XML::Feed->parse( \$args->{xml} ); if ( ! $feed ) { @@ -724,7 +722,10 @@ $msg .= prefix( ' | ' , $entry->title ); $msg .= prefix( ' | ' , $link ); # $msg .= prefix( ' id ' , $entry->id ); + my @categories = $entry->category; + warn "## category = ", dump( @categories ) if $debug; if ( my $tags = $entry->category ) { + $tags = join(' ', @$tags) if ref($tags) eq 'ARRAY'; $tags =~ s!^\s+!!; $tags =~ s!\s*$! !; $tags =~ s!,?\s+!// !g; @@ -741,14 +742,8 @@ my ( $type, $to ) = ( 'notice', $args->{channel} ); ( $type, $to ) = ( 'privmsg', $args->{nick} ) if $args->{private}; - _log("RSS generated $type to $to:", $msg); - # XXX enqueue message to send later - sub enqueue_post { - my $post = dump( @_ ); - warn "## queue_post $post\n" if $debug; - $dq->enqueue_string( $post ); - } - enqueue_post( $type => $to => $msg ); + _log(">> RSS $type to $to:", $msg); + $kernel->post( $irc => $type => $to => $msg ); $updates++; } @@ -759,13 +754,14 @@ $sql .= qq{where id = } . $args->{id}; eval { $dbh->do( $sql ) }; - _log "RSS got $total items of which $updates new"; + _log "RSS $updates/$total new items from", $args->{url}; return $updates; } sub rss_fetch_all { - my $kernel = shift; + my ( $kernel, $send_rss_msgs ) = @_; + warn "## rss_fetch_all -- send_rss_msgs: $send_rss_msgs\n" if $debug; my $sql = qq{ select id, url, name, channel, nick, private from feeds @@ -778,7 +774,7 @@ warn "# ",$sth->rows," active RSS feeds\n"; my $count = 0; while (my $row = $sth->fetchrow_hashref) { - warn "## queued rss-fetch for ", $row->{url} if $debug; + $row->{send_rss_msgs} = $send_rss_msgs if defined $send_rss_msgs; $_stat->{rss}->{fetch}->{ $row->{url} } = $row; $kernel->post( 'rss-fetch', @@ -786,6 +782,7 @@ 'rss_response', HTTP::Request->new( GET => $row->{url} ), ); + warn "## queued rss-fetch ", dump( $row ) if $debug; } return "OK, scheduled " . $sth->rows . " feeds for refresh"; } @@ -800,16 +797,201 @@ $_stat->{rss}->{last_poll} = time(); _log rss_fetch_all( $kernel ); } - # XXX send queue messages - while ( my $job = $dq->pickup_queued_job() ) { - my $data = read_file( $job->get_data_path ) || die "can't load ", $job->get_data_path, ": $!"; -# $kernel->post( $irc => $type => $to, $msg ); - my @data = eval $data; - _log "IRC post from queue:", @data; - $kernel->post( $irc => @data ); - $job->finish; - warn "## done queued job: ",dump( @data ) if $debug; +} + +sub process_command { + my ( $kernel, $nick, $channel, $msg ) = @_; + + my $res = "unknown command '$msg', try /msg $NICK help!"; + + if ($msg =~ m/^help/i) { + + $res = "usage: /msg $NICK comand | commands: stat - user/message stat | last - show backtrace | grep foobar - find foobar"; + + } elsif ($msg =~ m/^(privmsg|notice)\s+(\S+)\s+(.*)$/i) { + + _log ">> /$1 $2 $3"; + $kernel->post( $irc => $1 => $2, $3 ); + $res = ''; + + } elsif ($msg =~ m/^stat.*?\s*(\d*)/i) { + + my $nr = $1 || 10; + + my $sth = $dbh->prepare(qq{ + select + trim(both '_' from nick) as nick, + count(*) as count, + sum(length(message)) as len + from log + group by trim(both '_' from nick) + order by len desc,count desc + limit $nr + }); + $sth->execute(); + $res = "Top $nr users: "; + my @users; + while (my $row = $sth->fetchrow_hashref) { + push @users,$row->{nick} . ': ' . $row->{count} . '/' . $row->{len} . '=' . sprintf("%.2f", $row->{len}/$row->{count}); + } + $res .= join(" | ", @users); + } elsif ($msg =~ m/^last.*?\s*(\d*)/i) { + + my $limit = $1 || meta( $nick, $channel, 'last-size' ) || 10; + + foreach my $res (get_from_log( limit => $limit )) { + _log "last: $res"; + $kernel->post( $irc => privmsg => $nick, $res ); + } + + $res = ''; + + } elsif ($msg =~ m/^(search|grep)\s+(.*)\s*$/i) { + + my $what = $2; + + foreach my $res (get_from_log( + limit => 20, + search => $what, + )) { + _log "search [$what]: $res"; + $kernel->post( $irc => privmsg => $nick, $res ); + } + + $res = ''; + + } elsif ($msg =~ m/^(?:count|poll)\s+(.*)(?:\s+(\d+))?\s*$/i) { + + my ($what,$limit) = ($1,$2); + $limit ||= 100; + + my $stat; + + foreach my $res (get_from_log( + limit => $limit, + search => $what, + full_rows => 1, + )) { + while ($res->{message} =~ s/\Q$what\E(\+|\-)//) { + $stat->{vote}->{$1}++; + $stat->{from}->{ $res->{nick} }++; + } + } + + my @nicks; + foreach my $nick (sort { $stat->{from}->{$a} <=> $stat->{from}->{$b} } keys %{ $stat->{from} }) { + push @nicks, $nick . ( $stat->{from}->{$nick} == 1 ? '' : + "(" . $stat->{from}->{$nick} . ")" + ); + } + + $res = + "$what ++ " . ( $stat->{vote}->{'+'} || 0 ) . + " : " . ( $stat->{vote}->{'-'} || 0 ) . " --" . + " from " . ( join(", ", @nicks) || 'nobody' ); + + $kernel->post( $irc => notice => $nick, $res ); + + } elsif ($msg =~ m/^ping/) { + $res = "ping = " . dump( $_stat->{ping} ); + } elsif ($msg =~ m/^conf(?:ig)*\s*(last-size|twitter)*\s*(.*)/) { + if ( ! defined( $1 ) ) { + my $sth = $dbh->prepare(qq{ select name,value,changed from meta where nick = ? and channel = ? }); + $sth->execute( $nick, $channel ); + $res = "config for $nick on $channel"; + while ( my ($n,$v) = $sth->fetchrow_array ) { + $res .= " | $n = $v"; + } + } elsif ( ! $2 ) { + my $val = meta( $nick, $channel, $1 ); + $res = "current $1 = " . ( $val ? $val : 'undefined' ); + } else { + my $validate = { + 'last-size' => qr/^\d+/, + 'twitter' => qr/^\w+\s+\w+/, + }; + + my ( $op, $val ) = ( $1, $2 ); + + if ( my $regex = $validate->{$op} ) { + if ( $val =~ $regex ) { + meta( $nick, $channel, $op, $val ); + $res = "saved $op = $val"; + } else { + $res = "config option $op = $val doesn't validate against $regex"; + } + } else { + $res = "config option $op doesn't exist"; + } + } + } elsif ($msg =~ m/^rss-update/) { + $res = rss_fetch_all( $kernel ); + } elsif ($msg =~ m/^rss-list/) { + my $sth = $dbh->prepare(qq{ select url,name,last_update,active,channel,nick,private from feeds }); + $sth->execute; + while (my @row = $sth->fetchrow_array) { + $kernel->post( $irc => privmsg => $nick, join(' | ',@row) ); + } + $res = ''; + } elsif ($msg =~ m!^rss-(add|remove|stop|start|clean)(?:-(private))?\s+(http://\S+)\s*(.*)!) { + my ( $command, $sub, $url, $arg ) = ( $1,$2,$3,$4 ); + + my $channel = $1 if ( $arg =~ s/\s*(#\S+)\s*// ); + $channel = $nick if $sub eq 'private'; + + my $sql = { + add => qq{ insert into feeds (url,name,channel,nick,private) values (?,?,?,?,?) }, + remove => qq{ delete from feeds where url = ? and nick = ? }, + start => qq{ update feeds set active = true where url = ? }, + stop => qq{ update feeds set active = false where url = ? }, + clean => qq{ update feeds set last_update = now() - delay where url = ? }, + }; + + if ( $command eq 'add' && ! $channel ) { + $res = "ERROR: got '$msg' which doesn't have #channel in it, ignoring!"; + } elsif (my $q = $sql->{$command} ) { + my $sth = $dbh->prepare( $q ); + my @data = ( $url ); + if ( $command eq 'add' ) { + push @data, ( $arg, $channel, $nick, $sub eq 'private' ? 1 : 0 ); + } elsif ( $command eq 'remove' ) { + push @data, $nick; + } + warn "## $command SQL $q with ",dump( @data ),"\n"; + eval { $sth->execute( @data ) }; + if ($@) { + $res = "ERROR: $@"; + } else { + $res = "OK, RSS executed $command" . + ( $sub ? "-$sub " : ' ' ) . + ( $channel ? "on $channel " : '' ) . + "url $url"; + if ( $command eq 'clean' ) { + my $seen = $_stat->{rss}->{seen} || die "no seen?"; + my $want_link = $_stat->{rss}->{url2link}->{$url} || warn "no url2link($url)"; + foreach my $c ( keys %$seen ) { + my $c_hash = $seen->{$c} || die "no seen->{$c}"; + die "not HASH with rss links but ", dump($c_hash) unless ref($c_hash) eq 'HASH'; + foreach my $link ( keys %$c_hash ) { + next unless $link eq $want_link; + _log "RSS removed seen $c $url $link"; + } + } + } elsif ( $command eq 'add' ) { + rss_fetch_all( $kernel ); + } + } + } else { + $res = "ERROR: don't know what to do with: $msg"; + } + } elsif ($msg =~ m/^rss-clean/) { + # this makes sense because we didn't catch rss-clean http://... before! + $_stat->{rss} = undef; + $dbh->do( qq{ update feeds set last_update = now() - delay } ); + $res = rss_fetch_all( $kernel ); } + + return $res; } POE::Session->create( inline_states => { @@ -822,8 +1004,8 @@ my $poco_object = $sender->get_heap(); _log "connected to",$poco_object->server_name(); $kernel->post( $sender => join => $_ ) for @channels; - # seen RSS cache - _log rss_fetch_all( $kernel ); + # seen RSS cache, so don't send out messages + _log rss_fetch_all( $kernel, 0 ); undef; }, # irc_255 => sub { # server is done blabbing @@ -876,194 +1058,13 @@ irc_msg => sub { my $kernel = $_[KERNEL]; my $nick = (split /!/, $_[ARG0])[0]; - my $msg = $_[ARG2]; my $channel = $_[ARG1]->[0]; + my $msg = $_[ARG2]; warn "# ARG = ",dump( @_[ARG0,ARG1,ARG2] ) if $debug; - my $res = "unknown command '$msg', try /msg $NICK help!"; - my @out; - _log "<< $msg"; - if ($msg =~ m/^help/i) { - - $res = "usage: /msg $NICK comand | commands: stat - user/message stat | last - show backtrace | grep foobar - find foobar"; - - } elsif ($msg =~ m/^(privmsg|notice)\s+(\S+)\s+(.*)$/i) { - - _log ">> /$1 $2 $3"; - $_[KERNEL]->post( $irc => $1 => $2, $3 ); - $res = ''; - - } elsif ($msg =~ m/^stat.*?\s*(\d*)/i) { - - my $nr = $1 || 10; - - my $sth = $dbh->prepare(qq{ - select - trim(both '_' from nick) as nick, - count(*) as count, - sum(length(message)) as len - from log - group by trim(both '_' from nick) - order by len desc,count desc - limit $nr - }); - $sth->execute(); - $res = "Top $nr users: "; - my @users; - while (my $row = $sth->fetchrow_hashref) { - push @users,$row->{nick} . ': ' . $row->{count} . '/' . $row->{len} . '=' . sprintf("%.2f", $row->{len}/$row->{count}); - } - $res .= join(" | ", @users); - } elsif ($msg =~ m/^last.*?\s*(\d*)/i) { - - my $limit = $1 || meta( $nick, $channel, 'last-size' ) || 10; - - foreach my $res (get_from_log( limit => $limit )) { - _log "last: $res"; - $_[KERNEL]->post( $irc => privmsg => $nick, $res ); - } - - $res = ''; - - } elsif ($msg =~ m/^(search|grep)\s+(.*)\s*$/i) { - - my $what = $2; - - foreach my $res (get_from_log( - limit => 20, - search => $what, - )) { - _log "search [$what]: $res"; - $_[KERNEL]->post( $irc => privmsg => $nick, $res ); - } - - $res = ''; - - } elsif ($msg =~ m/^(?:count|poll)\s+(.*)(?:\s+(\d+))?\s*$/i) { - - my ($what,$limit) = ($1,$2); - $limit ||= 100; - - my $stat; - - foreach my $res (get_from_log( - limit => $limit, - search => $what, - full_rows => 1, - )) { - while ($res->{message} =~ s/\Q$what\E(\+|\-)//) { - $stat->{vote}->{$1}++; - $stat->{from}->{ $res->{nick} }++; - } - } - - my @nicks; - foreach my $nick (sort { $stat->{from}->{$a} <=> $stat->{from}->{$b} } keys %{ $stat->{from} }) { - push @nicks, $nick . ( $stat->{from}->{$nick} == 1 ? '' : - "(" . $stat->{from}->{$nick} . ")" - ); - } - - $res = - "$what ++ " . ( $stat->{vote}->{'+'} || 0 ) . - " : " . ( $stat->{vote}->{'-'} || 0 ) . " --" . - " from " . ( join(", ", @nicks) || 'nobody' ); - - $_[KERNEL]->post( $irc => notice => $nick, $res ); - - } elsif ($msg =~ m/^ping/) { - $res = "ping = " . dump( $_stat->{ping} ); - } elsif ($msg =~ m/^conf(?:ig)*\s*(last-size|twitter)*\s*(.*)/) { - if ( ! defined( $1 ) ) { - my $sth = $dbh->prepare(qq{ select name,value,changed from meta where nick = ? and channel = ? }); - $sth->execute( $nick, $channel ); - $res = "config for $nick on $channel"; - while ( my ($n,$v) = $sth->fetchrow_array ) { - $res .= " | $n = $v"; - } - } elsif ( ! $2 ) { - my $val = meta( $nick, $channel, $1 ); - $res = "current $1 = " . ( $val ? $val : 'undefined' ); - } else { - my $validate = { - 'last-size' => qr/^\d+/, - 'twitter' => qr/^\w+\s+\w+/, - }; - - my ( $op, $val ) = ( $1, $2 ); - - if ( my $regex = $validate->{$op} ) { - if ( $val =~ $regex ) { - meta( $nick, $channel, $op, $val ); - $res = "saved $op = $val"; - } else { - $res = "config option $op = $val doesn't validate against $regex"; - } - } else { - $res = "config option $op doesn't exist"; - } - } - } elsif ($msg =~ m/^rss-update/) { - $res = rss_fetch_all( $_[KERNEL] ); - } elsif ($msg =~ m/^rss-list/) { - my $sth = $dbh->prepare(qq{ select url,name,last_update,active,channel,nick,private from feeds }); - $sth->execute; - while (my @row = $sth->fetchrow_array) { - $_[KERNEL]->post( $irc => privmsg => $nick, join(' | ',@row) ); - } - $res = ''; - } elsif ($msg =~ m!^rss-(add|remove|stop|start|clean)(?:-(private))?\s+(http://\S+)\s*(.*)!) { - my ( $command, $sub, $url, $arg ) = ( $1,$2,$3,$4 ); - - my $channel = $1 if ( $arg =~ s/\s*(#\S+)\s*// ); - $channel = $nick if $sub eq 'private'; - - my $sql = { - add => qq{ insert into feeds (url,name,channel,nick,private) values (?,?,?,?,?) }, -# remove => qq{ delete from feeds where url = ? and name = ? }, - start => qq{ update feeds set active = true where url = ? }, - stop => qq{ update feeds set active = false where url = ? }, - clean => qq{ update feeds set last_update = now() - delay where url = ? }, - }; - - if ( $command eq 'add' && ! $channel ) { - $res = "ERROR: got '$msg' which doesn't have #channel in it, ignoring!"; - } elsif (my $q = $sql->{$command} ) { - my $sth = $dbh->prepare( $q ); - my @data = ( $url ); - if ( $command eq 'add' ) { - push @data, ( $arg, $channel, $nick, $sub eq 'private' ? 1 : 0 ); - } - warn "## $command SQL $q with ",dump( @data ),"\n"; - eval { $sth->execute( @data ) }; - if ($@) { - $res = "ERROR: $@"; - } else { - $res = "OK, RSS executed $command " . ( $sub ? "-$sub" : '' ) ."on $channel url $url"; - if ( $command eq 'clean' ) { - my $seen = $_stat->{rss}->{seen} || die "no seen?"; - my $want_link = $_stat->{rss}->{url2link}->{$url} || warn "no url2link($url)"; - foreach my $c ( keys %$seen ) { - my $c_hash = $seen->{$c} || die "no seen->{$c}"; - die "not HASH with rss links but ", dump($c_hash) unless ref($c_hash) eq 'HASH'; - foreach my $link ( keys %$c_hash ) { - next unless $link eq $want_link; - _log "RSS removed seen $c $url $link"; - } - } - } - } - } else { - $res = "ERROR: don't know what to do with: $msg"; - } - } elsif ($msg =~ m/^rss-clean/) { - # this makes sense because we didn't catch rss-clean http://... before! - $_stat->{rss} = undef; - $dbh->do( qq{ update feeds set last_update = now() - delay } ); - $res = "OK, cleaned RSS cache"; - } + my $res = process_command( $_[KERNEL], $nick, $channel, $msg ); if ($res) { _log ">> [$nick] $res"; @@ -1135,12 +1136,8 @@ }, _child => sub {}, _default => sub { - _log sprintf "sID:%s %s %s", - $_[SESSION]->ID, $_[ARG0], - ref($_[ARG1]) eq "ARRAY" ? join(",", map { ref($_) eq "ARRAY" ? join(";", @{$_}) : $_ } @{ $_[ARG1] }) : - $_[ARG1] ? $_[ARG1] : - ""; - 0; # false for signals + _log '_default SID:', $_[SESSION]->ID, $_[ARG0], dump( $_[ARG1] ); + 0; # false for signals }, rss_response => sub { my ($request_packet, $response_packet) = @_[ARG0, ARG1]; @@ -1150,7 +1147,7 @@ my $row = delete( $_stat->{rss}->{fetch}->{ $request_object->uri } ); if ( $row ) { $row->{xml} = $response_object->content; - rss_parse_xml( $row ); + rss_parse_xml( $_[KERNEL], $row ); } else { warn "## can't find rss->fetch for ", $request_object->uri; }