--- trunk/bin/irc-logger.pl 2008/02/29 22:11:07 83 +++ trunk/bin/irc-logger.pl 2008/03/08 00:14:41 100 @@ -20,8 +20,6 @@ =item --log=irc-logger.log -Name of log file - =back =head1 DESCRIPTION @@ -46,15 +44,8 @@ $CHANNEL = '#irc-logger' if ($HOSTNAME =~ m/llin/); my $IRC_ALIAS = "log"; -my %FOLLOWS = - ( - ACCESS => "/var/log/apache/access.log", - ERROR => "/var/log/apache/error.log", - ); - my $DSN = 'DBI:Pg:dbname=' . $NICK; -my $ENCODING = 'ISO-8859-2'; my $TIMESTAMP = '%Y-%m-%d %H:%M:%S'; my $sleep_on_error = 5; @@ -62,18 +53,18 @@ # number of last tags to keep in circular buffer my $last_x_tags = 50; +# don't pull rss feeds more often than this +my $rss_min_delay = 60; + my $http_port = $NICK =~ m/-dev/ ? 8001 : 8000; my $url = "http://$HOSTNAME:$http_port"; ## END CONFIG - - -use POE qw(Component::IRC Wheel::FollowTail Component::Server::HTTP); +use POE qw(Component::IRC Component::Server::HTTP); use HTTP::Status; use DBI; -use Encode qw/from_to is_utf8/; use Regexp::Common qw /URI/; use CGI::Simple; use HTML::TagCloud; @@ -99,14 +90,14 @@ 'log:s' => \$log_path, ); -$SIG{__DIE__} = sub { - confess "fatal error"; -}; +#$SIG{__DIE__} = sub { +# confess "fatal error"; +#}; open(STDOUT, '>', $log_path) || warn "can't redirect log to $log_path: $!"; sub _log { - print strftime($TIMESTAMP,localtime()), ' ', join(" ",@_), $/; + print strftime($TIMESTAMP,localtime()) . ' ' . join(" ",@_) . $/; } # HTML formatters @@ -126,11 +117,11 @@ # protect HTML from wiki modifications sub e { my $t = shift; - return 'uri_unescape{' . uri_escape($t) . '}'; + return 'uri_unescape{' . uri_escape($t, '^a-zA-Z0-9') . '}'; } $m =~ s/($escape_re)/$escape{$1}/gs; - $m =~ s#($RE{URI}{HTTP})#e(qq{$1})#egs || + $m =~ s#($RE{URI}{HTTP})#e(qq{$1})#egs; $m =~ s#\/(\w+)\/#$1#gs; $m =~ s#$tag_regex#e(qq{$1})#egs; $m =~ s#\*(\w+)\*#$1#gs; @@ -152,9 +143,10 @@ }; my $dbh = DBI->connect($DSN,"","", { RaiseError => 1, AutoCommit => 1 }) || die $DBI::errstr; +$dbh->do( qq{ set client_encoding = 'UTF-8' } ); my $sql_schema = { - log => ' + log => qq{ create table log ( id serial, time timestamp default now(), @@ -168,17 +160,34 @@ create index log_time on log(time); create index log_channel on log(channel); create index log_nick on log(nick); - ', - meta => ' + }, + meta => q{ create table meta ( nick text not null, channel text not null, name text not null, value text, - changed timestamp default now(), + changed timestamp default 'now()', primary key(nick,channel,name) ); - ', + }, + feeds => qq{ +create table feeds ( + id serial, + url text not null, + name text, + delay interval not null default '5 min', + active boolean default true, + channel text not null, + nick text not null, + private boolean default false, + last_update timestamp default 'now()', + polls int default 0, + updates int default 0 +); +create unique index feeds_url on feeds(url); +insert into feeds (url,name,channel,nick) values ('http://wiki.razmjenavjestina.org/feed/workspace/razmjenavjestina?category=Recent%20Changes','wiki','$CHANNEL','dpavlin'); + }, }; foreach my $table ( keys %$sql_schema ) { @@ -221,9 +230,9 @@ if ( $@ || ! $sth->rows ) { $sth = $dbh->prepare(qq{ insert into meta (value,nick,channel,name,changed) values (?,?,?,?,now()) }); $sth->execute( $value, $nick, $channel, $name ); - _log "created $nick/$channel/$name = $value"; + warn "## created $nick/$channel/$name = $value\n"; } else { - _log "updated $nick/$channel/$name = $value "; + warn "## updated $nick/$channel/$name = $value\n"; } return $value; @@ -233,7 +242,7 @@ my $sth = $dbh->prepare(qq{ select value,changed from meta where nick = ? and channel = ? and name = ? }); $sth->execute( $nick, $channel, $name ); my ($v,$c) = $sth->fetchrow_array; - _log "fetched $nick/$channel/$name = $v [$c]"; + warn "## fetched $nick/$channel/$name = $v [$c]\n"; return ($v,$c) if wantarray; return $v; @@ -242,7 +251,7 @@ -my $sth = $dbh->prepare(qq{ +my $sth_insert_log = $dbh->prepare(qq{ insert into log (channel, me, nick, message, time) values (?,?,?,?,?) @@ -330,25 +339,26 @@ my @where; my @args; + my $msg; if (my $search = $args->{search}) { $search =~ s/^\s+//; $search =~ s/\s+$//; push @where, 'message ilike ? or nick ilike ?'; push @args, ( ( '%' . $search . '%' ) x 2 ); - _log "search for '$search'"; + $msg = "Search for '$search'"; } if ($args->{tag} && $tags->{ $args->{tag} }) { push @where, 'id in (' . join(',', @{ $tags->{ $args->{tag} } }) . ')'; - _log "search for tags $args->{tag}"; + $msg = "Search for tags $args->{tag}"; } if (my $date = $args->{date} ) { $date = check_date( $date ); push @where, 'date(time) = ?'; push @args, $date; - _log "search for date $date"; + $msg = "search for date $date"; } $sql .= " where " . join(" and ", @where) if @where; @@ -362,6 +372,8 @@ eval { $sth->execute( @args ) }; return if $@; + my $nr_results = $sth->rows; + my $last_row = { date => '', time => '', @@ -382,10 +394,14 @@ return @rows if ($args->{full_rows}); - my @msgs = ( - "Showing " . ($#rows + 1) . " messages..." + $msg .= ' produced ' . ( + $nr_results == 0 ? 'no results' : + $nr_results == 0 ? 'one result' : + $nr_results . ' results' ); + my @msgs = ( $msg ); + if ($context) { my @ids = @rows; @rows = (); @@ -494,7 +510,6 @@ return unless ($arg->{id} && $arg->{message}); my $m = $arg->{message}; - from_to('UTF-8', 'iso-8859-2', $m) if (is_utf8($m)); my @tags; @@ -563,9 +578,7 @@ $a->{me} ? "***" . $a->{nick} : "<" . $a->{nick} . ">", " " . $a->{message}; - from_to($a->{message}, 'UTF-8', $ENCODING); - - $sth->execute($a->{channel}, $a->{me}, $a->{nick}, $a->{message}, $a->{time}); + $sth_insert_log->execute($a->{channel}, $a->{me}, $a->{nick}, $a->{message}, $a->{time}); add_tag( id => $dbh->last_insert_id(undef,undef,"log",undef), %$a ); } @@ -604,27 +617,130 @@ exit; } +# +# RSS follow +# + +my $_rss; + + +sub rss_fetch { + my ($args) = @_; + + # how many messages to send out when feed is seen for the first time? + my $send_rss_msgs = 1; + + _log "RSS fetch", $args->{url}; + + my $feed = XML::Feed->parse(URI->new( $args->{url} )); + if ( ! $feed ) { + _log("can't fetch RSS ", $args->{url}); + return; + } + + my ( $total, $updates ) = ( 0, 0 ); + for my $entry ($feed->entries) { + $total++; + + # seen allready? + next if $_rss->{$args->{channel}}->{$feed->link}->{$entry->id}++ > 0; + + sub prefix { + my ($txt,$var) = @_; + $var =~ s/\s+/ /gs; + $var =~ s/^\s+//g; + $var =~ s/\s+$//g; + return $txt . $var if $var; + } + + # fix absolute and relative links to feed entries + my $link = $entry->link; + if ( $link =~ m!^/! ) { + my $host = $args->{url}; + $host =~ s!^(http://[^/]+).*$!$1!; #!vim + $link = "$host/$link"; + } elsif ( $link !~ m!^http! ) { + $link = $args->{url} . $link; + } + + my $msg; + $msg .= prefix( 'From: ' , $args->{name} || $feed->title ); + $msg .= prefix( ' by ' , $entry->author ); + $msg .= prefix( ' | ' , $entry->title ); + $msg .= prefix( ' | ' , $link ); +# $msg .= prefix( ' id ' , $entry->id ); + + if ( $args->{kernel} && $send_rss_msgs ) { + $send_rss_msgs--; + $sth_insert_log->execute( $args->{channel}, 1, $NICK, $msg, 'now()' ); + my ( $type, $to ) = ( 'notice', $args->{channel} ); + ( $type, $to ) = ( 'privmsg', $args->{nick} ) if $args->{private}; + _log(">> $type $to |", $msg); + $args->{kernel}->post( $IRC_ALIAS => $type => $to, $msg ); + $updates++; + } + } + + my $sql = qq{ update feeds set last_update = now(), polls = polls + 1 }; + $sql .= qq{, updates = updates + $updates } if $updates; + $sql .= qq{where id = } . $args->{id}; + eval { $dbh->do( $sql ) }; + + _log "RSS got $total items of which $updates new"; + + return $updates; +} + +sub rss_fetch_all { + my $kernel = shift; + my $sql = qq{ + select id, url, name, channel, nick, private + from feeds + where active is true + }; + # limit to newer feeds only if we are not sending messages out + $sql .= qq{ and last_update + delay < now() } if $kernel; + my $sth = $dbh->prepare( $sql ); + $sth->execute(); + warn "# ",$sth->rows," active RSS feeds\n"; + my $count = 0; + while (my $row = $sth->fetchrow_hashref) { + $row->{kernel} = $kernel if $kernel; + $count += rss_fetch( $row ); + } + return "OK, fetched $count posts from " . $sth->rows . " feeds"; +} + + +sub rss_check_updates { + my $kernel = shift; + $_rss->{last_poll} ||= time(); + my $dt = time() - $_rss->{last_poll}; + warn "## rss_check_updates $dt > $rss_min_delay\n"; + if ( $dt > $rss_min_delay ) { + $_rss->{last_poll} = time(); + _log rss_fetch_all( $kernel ); + } +} + +# seed rss seen cache so we won't send out all items on startup +_log rss_fetch_all; # # POE handing part # -my $SKIPPING = 0; # if skipping, how many we've done -my $SEND_QUEUE; # cache my $ping; # ping stats POE::Component::IRC->new($IRC_ALIAS); -POE::Session->create( inline_states => - {_start => sub { +POE::Session->create( inline_states => { + _start => sub { $_[KERNEL]->post($IRC_ALIAS => register => 'all'); $_[KERNEL]->post($IRC_ALIAS => connect => $CONNECT); }, irc_255 => sub { # server is done blabbing $_[KERNEL]->post($IRC_ALIAS => join => $CHANNEL); - $_[KERNEL]->post($IRC_ALIAS => join => '#logger'); - $_[KERNEL]->yield("heartbeat"); # start heartbeat -# $_[KERNEL]->yield("my_add", $_) for keys %FOLLOWS; $_[KERNEL]->post( $IRC_ALIAS => privmsg => 'nickserv', "IDENTIFY $NICK" ); }, irc_public => sub { @@ -635,6 +751,7 @@ save_message( channel => $channel, me => 0, nick => $nick, message => $msg); meta( $nick, $channel, 'last-msg', $msg ); + rss_check_updates( $kernel ); }, irc_ctcp_action => sub { my $kernel = $_[KERNEL]; @@ -655,15 +772,16 @@ }, irc_ping => sub { - warn "pong ", $_[ARG0], $/; + _log( "pong ", $_[ARG0] ); $ping->{ $_[ARG0] }++; + rss_check_updates( $_[KERNEL] ); }, irc_invite => sub { my $kernel = $_[KERNEL]; my $nick = (split /!/, $_[ARG0])[0]; my $channel = $_[ARG1]; - warn "invited to $channel by $nick"; + _log "invited to $channel by $nick"; $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, "how nice of you to invite me to $channel, I'll be right there..." ); $_[KERNEL]->post($IRC_ALIAS => join => $channel); @@ -674,7 +792,6 @@ my $nick = (split /!/, $_[ARG0])[0]; my $msg = $_[ARG2]; my $channel = $_[ARG1]->[0]; - from_to($msg, 'UTF-8', $ENCODING); my $res = "unknown command '$msg', try /msg $NICK help!"; my @out; @@ -718,7 +835,6 @@ foreach my $res (get_from_log( limit => $limit )) { _log "last: $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } @@ -733,7 +849,6 @@ search => $what, )) { _log "search [$what]: $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } @@ -803,14 +918,58 @@ $res = "config option $op doesn't exist"; } } + } elsif ($msg =~ m/^rss-update/) { + $res = rss_fetch_all( $_[KERNEL] ); + } elsif ($msg =~ m/^rss-clean/) { + $_rss = undef; + $dbh->do( qq{ update feeds set last_update = now() - delay } ); + $res = "OK, cleaned RSS cache"; + } elsif ($msg =~ m/^rss-list/) { + my $sth = $dbh->prepare(qq{ select url,name,last_update,active,channel,nick,private from feeds }); + $sth->execute; + while (my @row = $sth->fetchrow_array) { + $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, join(' | ',@row) ); + } + $res = ''; + } elsif ($msg =~ m!^rss-(add|remove|stop|start)(?:-(private))?\s+(http://\S+)\s*(.*)!) { + my ( $command, $sub, $url, $arg ) = ( $1,$2,$3,$4 ); + + my $channel = $1 if ( $arg =~ s/\s*(#\S+)\s*// ); + $channel = $nick if $sub eq 'private'; + + my $sql = { + add => qq{ insert into feeds (url,name,channel,nick,private) values (?,?,?,?,?) }, +# remove => qq{ delete from feeds where url = ? and name = ? }, + start => qq{ update feeds set active = true where url = ? }, + stop => qq{ update feeds set active = false where url = ? }, + }; + + if ( $command eq 'add' && ! $channel ) { + $res = "ERROR: got '$msg' which doesn't have #channel in it, ignoring!"; + } elsif (my $q = $sql->{$command} ) { + my $sth = $dbh->prepare( $q ); + my @data = ( $url ); + if ( $command eq 'add' ) { + push @data, ( $arg, $channel, $nick, $sub eq 'private' ? 1 : 0 ); + } + warn "## $command SQL $q with ",dump( @data ),"\n"; + eval { $sth->execute( @data ) }; + if ($@) { + $res = "ERROR: $@"; + } else { + $res = "OK, RSS [$command|$sub|$url|$arg]"; + } + } else { + $res = "ERROR: don't know what to do with: $msg"; + } } if ($res) { _log ">> [$nick] $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } + rss_check_updates( $_[KERNEL] ); }, irc_477 => sub { _log "# irc_477: ",$_[ARG1]; @@ -849,66 +1008,6 @@ ""; 0; # false for signals }, - my_add => sub { - my $trailing = $_[ARG0]; - my $session = $_[SESSION]; - POE::Session->create - (inline_states => - {_start => sub { - $_[HEAP]->{wheel} = - POE::Wheel::FollowTail->new - ( - Filename => $FOLLOWS{$trailing}, - InputEvent => 'got_line', - ); - }, - got_line => sub { - $_[KERNEL]->post($session => my_tailed => - time, $trailing, $_[ARG0]); - }, - }, - ); - - }, - my_tailed => sub { - my ($time, $file, $line) = @_[ARG0..ARG2]; - ## $time will be undef on a probe, or a time value if a real line - - ## PoCo::IRC has throttling built in, but no external visibility - ## so this is reaching "under the hood" - $SEND_QUEUE ||= - $_[KERNEL]->alias_resolve($IRC_ALIAS)->get_heap->{send_queue}; - - ## handle "no need to keep skipping" transition - if ($SKIPPING and @$SEND_QUEUE < 1) { - $_[KERNEL]->post($IRC_ALIAS => privmsg => $CHANNEL => - "[discarded $SKIPPING messages]"); - $SKIPPING = 0; - } - - ## handle potential message display - if ($time) { - if ($SKIPPING or @$SEND_QUEUE > 3) { # 3 msgs per 10 seconds - $SKIPPING++; - } else { - my @time = localtime $time; - $_[KERNEL]->post($IRC_ALIAS => privmsg => $CHANNEL => - sprintf "%02d:%02d:%02d: %s: %s", - ($time[2] + 11) % 12 + 1, $time[1], $time[0], - $file, $line); - } - } - - ## handle re-probe/flush if skipping - if ($SKIPPING) { - $_[KERNEL]->delay($_[STATE] => 0.5); # $time will be undef - } - - }, - my_heartbeat => sub { - $_[KERNEL]->yield(my_tailed => time, "heartbeat", "beep"); - $_[KERNEL]->delay($_[STATE] => 10); - } }, ); @@ -983,7 +1082,7 @@ my $search = $q->param('search') || $q->param('grep') || ''; - if ($request->url =~ m#/rss(?:/(tags|last-tag)\w*(?:=(\d+))?)?#i) { + if ($request->url =~ m#/rss(?:/(tags|last-tag|follow.*)\w*(?:=(\d+))?)?#i) { my $show = lc($1); my $nr = $2; @@ -995,6 +1094,7 @@ #warn "create $type feed from ",dump( @last_tags ); my $feed = XML::Feed->new( $type ); + $feed->link( $url ); if ( $show eq 'tags' ) { $nr ||= 50; @@ -1021,7 +1121,6 @@ $nr = $last_x_tags if $nr > $last_x_tags; $feed->title( "last $nr tagged messages from $CHANNEL" ); - $feed->link( $url ); $feed->description( "collects messages which have tags// in them" ); foreach my $m ( @last_tags ) { @@ -1036,7 +1135,6 @@ my $message = $filter->{message}->( $m->{message} ); $message .= "
\n" unless $message =~ m!<(/p|br/?)>!; # warn "## message = $message\n"; - from_to( $message, $ENCODING, 'UTF-8' ); #$feed_entry->summary( $feed_entry->content( @@ -1050,8 +1148,32 @@ } + } elsif ( $show =~ m/^follow/ ) { + + $feed->title( "Feeds which this bot follows" ); + + my $sth = $dbh->prepare( qq{ select * from feeds order by last_update desc } ); + $sth->execute; + while (my $row = $sth->fetchrow_hashref) { + my $feed_entry = XML::Feed::Entry->new($type); + $feed_entry->title( $row->{name} ); + $feed_entry->link( $row->{url} ); + $feed_entry->issued( DateTime::Format::Flexible->build( $row->{last_update} ) ); + $feed_entry->content( + '' . dump( $row ) . ']]>' + ); + $feed->add_entry( $feed_entry ); + } + + my $feed_entry = XML::Feed::Entry->new($type); + $feed_entry->title( "Internal stats" ); + $feed_entry->content( + '' . dump( $_rss ) . ']]>' + ); + $feed->add_entry( $feed_entry ); + } else { - warn "!! unknown rss request for $show\n"; + _log "unknown rss request ",$request->url; return RC_DENY; } @@ -1063,7 +1185,7 @@ warn "$@"; } - $response->content_type("text/html; charset=$ENCODING"); + $response->content_type("text/html; charset=UTF-8"); my $html = qq{$NICK