--- trunk/bin/irc-logger.pl 2007/12/16 19:03:35 72 +++ trunk/bin/irc-logger.pl 2008/03/07 10:30:57 92 @@ -20,8 +20,6 @@ =item --log=irc-logger.log -Name of log file - =back =head1 DESCRIPTION @@ -32,7 +30,8 @@ ## CONFIG -my $HOSTNAME = `hostname`; +my $HOSTNAME = `hostname -f`; +chomp($HOSTNAME); my $NICK = 'irc-logger'; $NICK .= '-dev' if ($HOSTNAME =~ m/llin/); @@ -45,31 +44,28 @@ $CHANNEL = '#irc-logger' if ($HOSTNAME =~ m/llin/); my $IRC_ALIAS = "log"; -my %FOLLOWS = - ( - ACCESS => "/var/log/apache/access.log", - ERROR => "/var/log/apache/error.log", - ); - my $DSN = 'DBI:Pg:dbname=' . $NICK; -my $ENCODING = 'ISO-8859-2'; my $TIMESTAMP = '%Y-%m-%d %H:%M:%S'; my $sleep_on_error = 5; -my $http_port = $NICK =~ m/-dev/ ? 8001 : 8000; -my $http_hostname = `hostname`; -chomp( $http_hostname ); +# number of last tags to keep in circular buffer +my $last_x_tags = 50; -## END CONFIG +# don't pull rss feeds more often than this +my $rss_min_delay = 60; +$rss_min_delay = 15; +my $http_port = $NICK =~ m/-dev/ ? 8001 : 8000; + +my $url = "http://$HOSTNAME:$http_port"; +## END CONFIG -use POE qw(Component::IRC Wheel::FollowTail Component::Server::HTTP); +use POE qw(Component::IRC Component::Server::HTTP); use HTTP::Status; use DBI; -use Encode qw/from_to is_utf8/; use Regexp::Common qw /URI/; use CGI::Simple; use HTML::TagCloud; @@ -95,14 +91,14 @@ 'log:s' => \$log_path, ); -$SIG{__DIE__} = sub { - confess "fatal error"; -}; +#$SIG{__DIE__} = sub { +# confess "fatal error"; +#}; open(STDOUT, '>', $log_path) || warn "can't redirect log to $log_path: $!"; sub _log { - print strftime($TIMESTAMP,localtime()), ' ', join(" ",@_), $/; + print strftime($TIMESTAMP,localtime()) . ' ' . join(" ",@_) . $/; } # HTML formatters @@ -128,7 +124,7 @@ $m =~ s/($escape_re)/$escape{$1}/gs; $m =~ s#($RE{URI}{HTTP})#e(qq{$1})#egs || $m =~ s#\/(\w+)\/#$1#gs; - $m =~ s#$tag_regex#e(qq{$1})#egs; + $m =~ s#$tag_regex#e(qq{$1})#egs; $m =~ s#\*(\w+)\*#$1#gs; $m =~ s#_(\w+)_#$1#gs; @@ -148,9 +144,10 @@ }; my $dbh = DBI->connect($DSN,"","", { RaiseError => 1, AutoCommit => 1 }) || die $DBI::errstr; +$dbh->do( qq{ set client_encoding = 'UTF-8' } ); my $sql_schema = { - log => ' + log => qq{ create table log ( id serial, time timestamp default now(), @@ -164,17 +161,31 @@ create index log_time on log(time); create index log_channel on log(channel); create index log_nick on log(nick); - ', - meta => ' + }, + meta => q{ create table meta ( nick text not null, channel text not null, name text not null, value text, - changed timestamp default now(), + changed timestamp default 'now()', primary key(nick,channel,name) ); - ', + }, + feeds => qq{ +create table feeds ( + id serial, + url text not null, + name text, + delay interval not null default '5 min', + active boolean default true, + last_update timestamp default 'now()', + polls int default 0, + updates int default 0 +); +create unique index feeds_url on feeds(url); +insert into feeds (url,name) values ('http://wiki.razmjenavjestina.org/feed/workspace/razmjenavjestina?category=Recent%20Changes','wiki'); + }, }; foreach my $table ( keys %$sql_schema ) { @@ -238,7 +249,7 @@ -my $sth = $dbh->prepare(qq{ +my $sth_insert_log = $dbh->prepare(qq{ insert into log (channel, me, nick, message, time) values (?,?,?,?,?) @@ -482,7 +493,6 @@ =cut -my $last_x_tags = 5; my @last_tags; sub add_tag { @@ -491,7 +501,6 @@ return unless ($arg->{id} && $arg->{message}); my $m = $arg->{message}; - from_to('UTF-8', 'iso-8859-2', $m) if (is_utf8($m)); my @tags; @@ -500,14 +509,14 @@ next if (! $tag || $tag =~ m/https?:/i); push @{ $tags->{$tag} }, $arg->{id}; #warn "+tag $tag: $arg->{id}\n"; - $cloud->add($tag, "?tag=$tag", scalar @{$tags->{$tag}} + 1); + $cloud->add($tag, "$url?tag=$tag", scalar @{$tags->{$tag}} + 1); push @tags, $tag; } if ( @tags ) { - shift @last_tags if $#last_tags == $last_x_tags; - push @last_tags, { tags => [ @tags ], %$arg }; + pop @last_tags if $#last_tags == $last_x_tags; + unshift @last_tags, { tags => [ @tags ], %$arg }; } } @@ -519,14 +528,14 @@ =cut sub seed_tags { - my $sth = $dbh->prepare(qq{ select id,message,nick,me,time from log where message like '%//%' }); + my $sth = $dbh->prepare(qq{ select id,message,nick,me,time from log where message like '%//%' order by time asc }); $sth->execute; while (my $row = $sth->fetchrow_hashref) { add_tag( %$row ); } foreach my $tag (keys %$tags) { - $cloud->add($tag, "?tag=$tag", scalar @{$tags->{$tag}} + 1); + $cloud->add($tag, "$url?tag=$tag", scalar @{$tags->{$tag}} + 1); } } @@ -560,9 +569,7 @@ $a->{me} ? "***" . $a->{nick} : "<" . $a->{nick} . ">", " " . $a->{message}; - from_to($a->{message}, 'UTF-8', $ENCODING); - - $sth->execute($a->{channel}, $a->{me}, $a->{nick}, $a->{message}, $a->{time}); + $sth_insert_log->execute($a->{channel}, $a->{me}, $a->{nick}, $a->{message}, $a->{time}); add_tag( id => $dbh->last_insert_id(undef,undef,"log",undef), %$a ); } @@ -601,27 +608,115 @@ exit; } +# +# RSS follow +# + +my $_rss; + + +sub rss_fetch { + my ($args) = @_; + + # how many messages to send out when feed is seen for the first time? + my $send_rss_msgs = 1; + + _log "RSS fetch", $args->{url}; + + my $feed = XML::Feed->parse(URI->new( $args->{url} )); + if ( ! $feed ) { + _log("can't fetch RSS ", $args->{url}); + return; + } + + my ( $total, $updates ) = ( 0, 0 ); + for my $entry ($feed->entries) { + $total++; + + # seen allready? + next if $_rss->{$feed->link}->{seen}->{$entry->id}++ > 0; + + sub prefix { + my ($txt,$var) = @_; + $var =~ s/^\s+//g; + return $txt . $var if $var; + } + + my $msg; + $msg .= prefix( 'From: ' , $args->{name} || $feed->title ); + $msg .= prefix( ' by ' , $entry->author ); + $msg .= prefix( ' | ' , $entry->title ); + $msg .= prefix( ' | ' , $entry->link ); +# $msg .= prefix( ' id ' , $entry->id ); + + if ( $args->{kernel} && $send_rss_msgs ) { + $send_rss_msgs--; + _log('>>', $msg); + $sth_insert_log->execute( $CHANNEL, 1, $NICK, $msg, 'now()' ); + $args->{kernel}->post( $IRC_ALIAS => notice => $CHANNEL, $msg ); + $updates++; + } + } + + my $sql = qq{ update feeds set last_update = now(), polls = polls + 1 }; + $sql .= qq{, updates = updates + $updates } if $updates; + $sql .= qq{where id = } . $args->{id}; + eval { $dbh->do( $sql ) }; + + _log "RSS got $total items of which $updates new"; + + return $updates; +} + +sub rss_fetch_all { + my $kernel = shift; + my $sql = qq{ + select id, url, name + from feeds + where active is true + }; + # limit to newer feeds only if we are not sending messages out + $sql .= qq{ and last_update + delay < now() } if $kernel; + my $sth = $dbh->prepare( $sql ); + $sth->execute(); + warn "# ",$sth->rows," active RSS feeds\n"; + my $count = 0; + while (my $row = $sth->fetchrow_hashref) { + $row->{kernel} = $kernel if $kernel; + $count += rss_fetch( $row ); + } + return "OK, fetched $count posts from " . $sth->rows . " feeds"; +} + + +sub rss_check_updates { + my $kernel = shift; + my $last_t = $_rss->{last_poll} || time(); + my $t = time(); + if ( $t - $last_t > $rss_min_delay ) { + $_rss->{last_poll} = $t; + _log rss_fetch_all( $kernel ); + } +} + +# seed rss seen cache so we won't send out all items on startup +_log rss_fetch_all; # # POE handing part # -my $SKIPPING = 0; # if skipping, how many we've done -my $SEND_QUEUE; # cache my $ping; # ping stats POE::Component::IRC->new($IRC_ALIAS); -POE::Session->create( inline_states => - {_start => sub { +POE::Session->create( inline_states => { + _start => sub { $_[KERNEL]->post($IRC_ALIAS => register => 'all'); $_[KERNEL]->post($IRC_ALIAS => connect => $CONNECT); }, irc_255 => sub { # server is done blabbing $_[KERNEL]->post($IRC_ALIAS => join => $CHANNEL); - $_[KERNEL]->post($IRC_ALIAS => join => '#logger'); - $_[KERNEL]->yield("heartbeat"); # start heartbeat -# $_[KERNEL]->yield("my_add", $_) for keys %FOLLOWS; $_[KERNEL]->post( $IRC_ALIAS => privmsg => 'nickserv', "IDENTIFY $NICK" ); }, irc_public => sub { @@ -652,15 +747,16 @@ }, irc_ping => sub { - warn "pong ", $_[ARG0], $/; + _log( "pong ", $_[ARG0] ); $ping->{ $_[ARG0] }++; + rss_check_updates( $_[KERNEL] ); }, irc_invite => sub { my $kernel = $_[KERNEL]; my $nick = (split /!/, $_[ARG0])[0]; my $channel = $_[ARG1]; - warn "invited to $channel by $nick"; + _log "invited to $channel by $nick"; $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, "how nice of you to invite me to $channel, I'll be right there..." ); $_[KERNEL]->post($IRC_ALIAS => join => $channel); @@ -671,7 +767,6 @@ my $nick = (split /!/, $_[ARG0])[0]; my $msg = $_[ARG2]; my $channel = $_[ARG1]->[0]; - from_to($msg, 'UTF-8', $ENCODING); my $res = "unknown command '$msg', try /msg $NICK help!"; my @out; @@ -715,7 +810,6 @@ foreach my $res (get_from_log( limit => $limit )) { _log "last: $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } @@ -730,7 +824,6 @@ search => $what, )) { _log "search [$what]: $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } @@ -800,14 +893,44 @@ $res = "config option $op doesn't exist"; } } + } elsif ($msg =~ m/^rss-update/) { + $res = rss_fetch_all( $_[KERNEL] ); + } elsif ($msg =~ m/^rss-clean/) { + $_rss = undef; + $dbh->do( qq{ update feeds set last_update = now() - delay } ); + $res = "OK, cleaned RSS cache"; + } elsif ($msg =~ m/^rss-list/) { + my $sth = $dbh->prepare(qq{ select url,name,last_update,active from feeds }); + $sth->execute; + while (my @row = $sth->fetchrow_array) { + $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, join(' | ',@row) ); + } + $res = ''; + } elsif ($msg =~ m!^rss-(add|remove|stop|start)\s+(http://\S+)\s*(.*)!) { + my $sql = { + add => qq{ insert into feeds (url,name) values (?,?) }, +# remove => qq{ delete from feeds where url = ? and name = ? }, + start => qq{ update feeds set active = true where url = ? }, + stop => qq{ update feeds set active = false where url = ? }, + + }; + if (my $q = $sql->{$1} ) { + my $sth = $dbh->prepare( $q ); + my @data = ( $2 ); + push @data, $3 if ( $q =~ s/\?//g == 2 ); + warn "## $1 SQL $q with ",dump( @data ),"\n"; + eval { $sth->execute( @data ) }; + } + + $res = "OK, RSS $1 : $2 - $3"; } if ($res) { _log ">> [$nick] $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } + rss_check_updates( $_[KERNEL] ); }, irc_477 => sub { _log "# irc_477: ",$_[ARG1]; @@ -846,66 +969,6 @@ ""; 0; # false for signals }, - my_add => sub { - my $trailing = $_[ARG0]; - my $session = $_[SESSION]; - POE::Session->create - (inline_states => - {_start => sub { - $_[HEAP]->{wheel} = - POE::Wheel::FollowTail->new - ( - Filename => $FOLLOWS{$trailing}, - InputEvent => 'got_line', - ); - }, - got_line => sub { - $_[KERNEL]->post($session => my_tailed => - time, $trailing, $_[ARG0]); - }, - }, - ); - - }, - my_tailed => sub { - my ($time, $file, $line) = @_[ARG0..ARG2]; - ## $time will be undef on a probe, or a time value if a real line - - ## PoCo::IRC has throttling built in, but no external visibility - ## so this is reaching "under the hood" - $SEND_QUEUE ||= - $_[KERNEL]->alias_resolve($IRC_ALIAS)->get_heap->{send_queue}; - - ## handle "no need to keep skipping" transition - if ($SKIPPING and @$SEND_QUEUE < 1) { - $_[KERNEL]->post($IRC_ALIAS => privmsg => $CHANNEL => - "[discarded $SKIPPING messages]"); - $SKIPPING = 0; - } - - ## handle potential message display - if ($time) { - if ($SKIPPING or @$SEND_QUEUE > 3) { # 3 msgs per 10 seconds - $SKIPPING++; - } else { - my @time = localtime $time; - $_[KERNEL]->post($IRC_ALIAS => privmsg => $CHANNEL => - sprintf "%02d:%02d:%02d: %s: %s", - ($time[2] + 11) % 12 + 1, $time[1], $time[0], - $file, $line); - } - } - - ## handle re-probe/flush if skipping - if ($SKIPPING) { - $_[KERNEL]->delay($_[STATE] => 0.5); # $time will be undef - } - - }, - my_heartbeat => sub { - $_[KERNEL]->yield(my_tailed => time, "heartbeat", "beep"); - $_[KERNEL]->delay($_[STATE] => 10); - } }, ); @@ -913,6 +976,11 @@ my $httpd = POE::Component::Server::HTTP->new( Port => $http_port, + PreHandler => { + '/' => sub { + $_[0]->header(Connection => 'close') + } + }, ContentHandler => { '/' => \&root_handler }, Headers => { Server => 'irc-logger' }, ); @@ -958,6 +1026,11 @@ my ($request, $response) = @_; $response->code(RC_OK); + # this doesn't seem to work, so moved to PreHandler + #$response->header(Connection => 'close'); + + return RC_OK if $request->uri =~ m/favicon.ico$/; + my $q; if ( $request->method eq 'POST' ) { @@ -970,37 +1043,92 @@ my $search = $q->param('search') || $q->param('grep') || ''; - if ($request->url =~ m#/rss#i) { + if ($request->url =~ m#/rss(?:/(tags|last-tag|follow.*)\w*(?:=(\d+))?)?#i) { + my $show = lc($1); + my $nr = $2; + my $type = 'RSS'; # Atom $response->content_type( 'application/' . lc($type) . '+xml' ); my $html = ''; - warn "create $type feed from ",dump( @last_tags ); + #warn "create $type feed from ",dump( @last_tags ); my $feed = XML::Feed->new( $type ); + $feed->link( $url ); - $feed->title( "last $last_x_tags from $CHANNEL" ); -# $feed->link( "http://$http_hostname:$http_port" ); - $feed->description( "collects messages which have tags// in them" ); - - foreach my $m ( @last_tags ) { - warn dump( $m ); - #my $tags = join(' ', @{$m->{tags}} ); + if ( $show eq 'tags' ) { + $nr ||= 50; + $feed->title( "tags from $CHANNEL" ); + $feed->link( "$url/tags" ); + $feed->description( "tag cloud created from messages on channel $CHANNEL which have tags// in them" ); my $feed_entry = XML::Feed::Entry->new($type); - $feed_entry->title( $m->{nick} . '@' . $m->{time} ); - $feed_entry->author( $m->{nick} ); -# $feed_entry->link( ); - $feed_entry->issued( DateTime::Format::Flexible->build( $m->{time} ) ); - $feed_entry->summary( - '{nick}->( $m->{nick} ) . -# '' . $m->{nick} . ' ' . - $filter->{message}->( $m->{message} ) . - ']]>' + $feed_entry->title( "$nr tags from $CHANNEL" ); + $feed_entry->author( $NICK ); + $feed_entry->link( '/#tags' ); + + $feed_entry->content( + qq{} + . $cloud->css + . qq{} + . $cloud->html( $nr ) + . qq{]]>} ); - $feed_entry->category( join(', ', @{$m->{tags}}) ); $feed->add_entry( $feed_entry ); + + } elsif ( $show eq 'last-tag' ) { + + $nr ||= $last_x_tags; + $nr = $last_x_tags if $nr > $last_x_tags; + + $feed->title( "last $nr tagged messages from $CHANNEL" ); + $feed->description( "collects messages which have tags// in them" ); + + foreach my $m ( @last_tags ) { +# warn dump( $m ); + #my $tags = join(' ', @{$m->{tags}} ); + my $feed_entry = XML::Feed::Entry->new($type); + $feed_entry->title( $m->{nick} . '@' . $m->{time} ); + $feed_entry->author( $m->{nick} ); + $feed_entry->link( '/#' . $m->{id} ); + $feed_entry->issued( DateTime::Format::Flexible->build( $m->{time} ) ); + + my $message = $filter->{message}->( $m->{message} ); + $message .= "
\n" unless $message =~ m!<(/p|br/?)>!; +# warn "## message = $message\n"; + + #$feed_entry->summary( + $feed_entry->content( + "" + ); + $feed_entry->category( join(', ', @{$m->{tags}}) ); + $feed->add_entry( $feed_entry ); + + $nr--; + last if $nr <= 0; + + } + + } elsif ( $show =~ m/^follow/ ) { + + $feed->title( "Feeds which this bot follows" ); + + my $sth = $dbh->prepare( qq{ select * from feeds order by last_update desc } ); + $sth->execute; + while (my $row = $sth->fetchrow_hashref) { + my $feed_entry = XML::Feed::Entry->new($type); + $feed_entry->title( $row->{name} ); + $feed_entry->link( $row->{url} ); + $feed_entry->issued( DateTime::Format::Flexible->build( $row->{last_update} ) ); + $feed_entry->content( + '' . dump( $row ) . ']]>' + ); + $feed->add_entry( $feed_entry ); + } + + } else { + _log "unknown rss request ",$request->url; + return RC_DENY; } $response->content( $feed->as_xml ); @@ -1011,21 +1139,24 @@ warn "$@"; } - $response->content_type("text/html; charset=$ENCODING"); + $response->content_type("text/html; charset=UTF-8"); my $html = - qq{$NICK} . - qq{ + qq{$NICK} + . qq{ - } . - $cloud->html(500) . - qq{

}; - if ($request->url =~ m#/history#) { + } + . $cloud->html(500) + . qq{

}; + + if ($request->url =~ m#/tags?#) { + # nop + } elsif ($request->url =~ m#/history#) { my $sth = $dbh->prepare(qq{ select date(time) as date,count(*) as nr,sum(length(message)) as len from log @@ -1057,9 +1188,9 @@ $cal->weekdays('MON','TUE','WED','THU','FRI'); ($l_yyyy,$l_mm) = ($yyyy,$mm); } - $cal->setcontent($dd, qq{ - $row->{nr}
$row->{len} - }); + $cal->setcontent($dd, qq[ + $row->{nr}
$row->{len} + ]) if $cal; } $html .= qq{} . $cal->as_HTML() . qq{}; @@ -1074,7 +1205,7 @@ fmt => { date => sub { my $date = shift || return; - qq{


$date
}; + qq{
$date
}; }, time => '%s ', time_channel => '%s %s ', @@ -1093,6 +1224,7 @@ }; $response->content( $html ); + warn "<< ", $request->method, " ", $request->uri, " created ", length($html), " bytes\n"; return RC_OK; }