--- trunk/bin/irc-logger.pl 2008/02/20 20:28:15 80 +++ trunk/bin/irc-logger.pl 2008/03/06 22:57:16 86 @@ -22,6 +22,10 @@ Name of log file +=item --follow=file.log + +Follows new messages in file + =back =head1 DESCRIPTION @@ -46,14 +50,12 @@ $CHANNEL = '#irc-logger' if ($HOSTNAME =~ m/llin/); my $IRC_ALIAS = "log"; -my %FOLLOWS = - ( - ACCESS => "/var/log/apache/access.log", - ERROR => "/var/log/apache/error.log", - ); +# default log to follow and announce messages +my $follows_path = 'follows.log'; my $DSN = 'DBI:Pg:dbname=' . $NICK; +# log output encoding my $ENCODING = 'ISO-8859-2'; my $TIMESTAMP = '%Y-%m-%d %H:%M:%S'; @@ -62,14 +64,16 @@ # number of last tags to keep in circular buffer my $last_x_tags = 50; +# don't pull rss feeds more often than this +my $rss_min_delay = 60; +$rss_min_delay = 15; + my $http_port = $NICK =~ m/-dev/ ? 8001 : 8000; my $url = "http://$HOSTNAME:$http_port"; ## END CONFIG - - use POE qw(Component::IRC Wheel::FollowTail Component::Server::HTTP); use HTTP::Status; use DBI; @@ -96,6 +100,7 @@ my $log_path; GetOptions( 'import-dircproxy:s' => \$import_dircproxy, + 'follows:s' => \$follows_path, 'log:s' => \$log_path, ); @@ -106,9 +111,29 @@ open(STDOUT, '>', $log_path) || warn "can't redirect log to $log_path: $!"; sub _log { - print strftime($TIMESTAMP,localtime()), ' ', join(" ",@_), $/; + my $out = strftime($TIMESTAMP,localtime()) . ' ' . join(" ",@_) . $/; + from_to( $out, 'UTF-8', $ENCODING ); + print $out; } +# LOG following + +my %FOLLOWS = + ( +# ACCESS => "/var/log/apache/access.log", +# ERROR => "/var/log/apache/error.log", + ); + +sub add_follow_path { + my $path = shift; + my $name = $path; + $name =~ s/\..*$//; + warn "# using $path to announce messages from $name\n"; + $FOLLOWS{$name} = $path; +} + +add_follow_path( $follows_path ) if ( -e $follows_path ); + # HTML formatters my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); @@ -152,9 +177,10 @@ }; my $dbh = DBI->connect($DSN,"","", { RaiseError => 1, AutoCommit => 1 }) || die $DBI::errstr; +$dbh->do( qq{ set client_encoding = 'UTF-8' } ); my $sql_schema = { - log => ' + log => qq{ create table log ( id serial, time timestamp default now(), @@ -168,17 +194,31 @@ create index log_time on log(time); create index log_channel on log(channel); create index log_nick on log(nick); - ', - meta => ' + }, + meta => q{ create table meta ( nick text not null, channel text not null, name text not null, value text, - changed timestamp default now(), + changed timestamp default 'now()', primary key(nick,channel,name) ); - ', + }, + feeds => qq{ +create table feeds ( + id serial, + url text not null, + name text, + delay interval not null default '5 min', + active boolean default true, + last_update timestamp default 'now()', + polls int default 0, + updates int default 0 +); +create unique index feeds_url on feeds(url); +insert into feeds (url,name) values ('http://wiki.razmjenavjestina.org/feed/workspace/razmjenavjestina?category=Recent%20Changes','wiki'); + }, }; foreach my $table ( keys %$sql_schema ) { @@ -563,8 +603,6 @@ $a->{me} ? "***" . $a->{nick} : "<" . $a->{nick} . ">", " " . $a->{message}; - from_to($a->{message}, 'UTF-8', $ENCODING); - $sth->execute($a->{channel}, $a->{me}, $a->{nick}, $a->{message}, $a->{time}); add_tag( id => $dbh->last_insert_id(undef,undef,"log",undef), %$a ); } @@ -604,6 +642,94 @@ exit; } +# +# RSS follow +# + +my $_rss; + + +sub rss_fetch { + my ($args) = @_; + + # how many messages to send out when feed is seen for the first time? + my $send_rss_msgs = 1; + + my $feed = XML::Feed->parse(URI->new( $args->{url} )); + if ( ! $feed ) { + _log("can't fetch RSS ", $args->{url}); + return; + } + my $updates = 0; + for my $entry ($feed->entries) { + + # seen allready? + return if $_rss->{$feed->link}->{seen}->{$entry->id}++ > 0; + + sub prefix { + my ($txt,$var) = @_; + $var =~ s/^\s+//g; + return $txt . $var if $var; + } + + my $msg; + $msg .= prefix( 'From: ' , $feed->title ); + $msg .= prefix( ' by ' , $entry->author ); + $msg .= prefix( ' -- ' , $entry->link ); +# $msg .= prefix( ' id ' , $entry->id ); + + if ( $args->{kernel} && $send_rss_msgs ) { + warn "# sending to $CHANNEL\n"; + $send_rss_msgs--; + $args->{kernel}->post( $IRC_ALIAS => notice => $CHANNEL, $msg ); + $updates++; + save_message( channel => $CHANNEL, me => 1, nick => $NICK, message => $msg ); + _log('RSS', $msg); + } + } + + my $sql = qq{ update feeds set last_update = now(), polls = polls + 1 }; + $sql .= qq{, updates = updates + $updates } if $updates; + $sql .= qq{where id = } . $args->{id}; + eval { $dbh->do( $sql ) }; + + return $updates; +} + +sub rss_fetch_all { + my $kernel = shift; + my $sql = qq{ + select id, url, name + from feeds + where active is true + }; + # limit to newer feeds only if we are not sending messages out + $sql .= qq{ and last_update + delay < now() } if $kernel; + my $sth = $dbh->prepare( $sql ); + $sth->execute(); + warn "# ",$sth->rows," active RSS feeds\n"; + my $count = 0; + while (my $row = $sth->fetchrow_hashref) { + warn "+++ fetch RSS feed: ",dump( $row ); + $row->{kernel} = $kernel if $kernel; + $count += rss_fetch( $row ); + } + return "OK, fetched $count posts from " . $sth->rows . " feeds"; +} + +my $rss_last_poll = time(); + +sub rss_check_updates { + my $kernel = shift; + my $t = time(); + if ( $rss_last_poll - $t > $rss_min_delay ) { + $rss_last_poll = $t; + _log rss_fetch_all( $kernel ); + } +} + +# seed rss seen cache so we won't send out all items on startup +_log rss_fetch_all; # # POE handing part @@ -615,8 +741,8 @@ POE::Component::IRC->new($IRC_ALIAS); -POE::Session->create( inline_states => - {_start => sub { +POE::Session->create( inline_states => { + _start => sub { $_[KERNEL]->post($IRC_ALIAS => register => 'all'); $_[KERNEL]->post($IRC_ALIAS => connect => $CONNECT); }, @@ -624,7 +750,7 @@ $_[KERNEL]->post($IRC_ALIAS => join => $CHANNEL); $_[KERNEL]->post($IRC_ALIAS => join => '#logger'); $_[KERNEL]->yield("heartbeat"); # start heartbeat -# $_[KERNEL]->yield("my_add", $_) for keys %FOLLOWS; + $_[KERNEL]->yield("my_add", $_) for keys %FOLLOWS; $_[KERNEL]->post( $IRC_ALIAS => privmsg => 'nickserv', "IDENTIFY $NICK" ); }, irc_public => sub { @@ -655,15 +781,16 @@ }, irc_ping => sub { - warn "pong ", $_[ARG0], $/; + _log( "pong ", $_[ARG0] ); $ping->{ $_[ARG0] }++; + rss_check_updates( $_[KERNEL] ); }, irc_invite => sub { my $kernel = $_[KERNEL]; my $nick = (split /!/, $_[ARG0])[0]; my $channel = $_[ARG1]; - warn "invited to $channel by $nick"; + _log "invited to $channel by $nick"; $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, "how nice of you to invite me to $channel, I'll be right there..." ); $_[KERNEL]->post($IRC_ALIAS => join => $channel); @@ -674,7 +801,6 @@ my $nick = (split /!/, $_[ARG0])[0]; my $msg = $_[ARG2]; my $channel = $_[ARG1]->[0]; - from_to($msg, 'UTF-8', $ENCODING); my $res = "unknown command '$msg', try /msg $NICK help!"; my @out; @@ -718,7 +844,6 @@ foreach my $res (get_from_log( limit => $limit )) { _log "last: $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } @@ -733,7 +858,6 @@ search => $what, )) { _log "search [$what]: $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } @@ -803,14 +927,34 @@ $res = "config option $op doesn't exist"; } } + } elsif ($msg =~ m/^rss-update/) { + $res = rss_fetch_all( $_[KERNEL] ); + } elsif ($msg =~ m/^rss-clean/) { + $_rss = undef; + $res = "OK, cleaned RSS cache"; + } elsif ($msg =~ m!^rss-(add|remove|stop|start)\s+(http://\S+)\s*(.*)!) { + my $sql = { + add => qq{ insert into feeds (url,name) values (?,?) }, +# remove => qq{ delete from feeds where url = ? and name = ? }, + start => qq{ update feeds set active = true where url = ? -- ? }, + stop => qq{ update feeds set active = false where url = ? -- ? }, + + }; + if (my $q = $sql->{$1} ) { + my $sth = $dbh->prepare( $q ); + warn "## SQL $q ( $2 | $3 )\n"; + eval { $sth->execute( $2, $3 ) }; + } + + $res ||= "OK, RSS $1 : $2 - $3"; } if ($res) { _log ">> [$nick] $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } + rss_check_updates( $_[KERNEL] ); }, irc_477 => sub { _log "# irc_477: ",$_[ARG1]; @@ -861,10 +1005,11 @@ Filename => $FOLLOWS{$trailing}, InputEvent => 'got_line', ); + warn "+++ following $trailing at $FOLLOWS{$trailing}\n"; }, got_line => sub { - $_[KERNEL]->post($session => my_tailed => - time, $trailing, $_[ARG0]); + warn "+++ $trailing : $_[ARG0]\n"; + $_[KERNEL]->post($session => my_tailed => time, $trailing, $_[ARG0]); }, }, ); @@ -916,6 +1061,11 @@ my $httpd = POE::Component::Server::HTTP->new( Port => $http_port, + PreHandler => { + '/' => sub { + $_[0]->header(Connection => 'close') + } + }, ContentHandler => { '/' => \&root_handler }, Headers => { Server => 'irc-logger' }, ); @@ -961,6 +1111,9 @@ my ($request, $response) = @_; $response->code(RC_OK); + # this doesn't seem to work, so moved to PreHandler + #$response->header(Connection => 'close'); + return RC_OK if $request->uri =~ m/favicon.ico$/; my $q; @@ -975,7 +1128,7 @@ my $search = $q->param('search') || $q->param('grep') || ''; - if ($request->url =~ m#/rss(?:/(tags|last-tag?)\w+(?:=(\d+))?)?#i) { + if ($request->url =~ m#/rss(?:/(tags|last-tag|follow.*)\w*(?:=(\d+))?)?#i) { my $show = lc($1); my $nr = $2; @@ -987,6 +1140,7 @@ #warn "create $type feed from ",dump( @last_tags ); my $feed = XML::Feed->new( $type ); + $feed->link( $url ); if ( $show eq 'tags' ) { $nr ||= 50; @@ -1013,7 +1167,6 @@ $nr = $last_x_tags if $nr > $last_x_tags; $feed->title( "last $nr tagged messages from $CHANNEL" ); - $feed->link( $url ); $feed->description( "collects messages which have tags// in them" ); foreach my $m ( @last_tags ) { @@ -1028,7 +1181,6 @@ my $message = $filter->{message}->( $m->{message} ); $message .= "
\n" unless $message =~ m!<(/p|br/?)>!; # warn "## message = $message\n"; - from_to( $message, $ENCODING, 'UTF-8' ); #$feed_entry->summary( $feed_entry->content( @@ -1042,8 +1194,25 @@ } + } elsif ( $show =~ m/^follow/ ) { + + $feed->title( "Feeds which this bot follows" ); + + my $sth = $dbh->prepare( qq{ select * from feeds order by last_update desc } ); + $sth->execute; + while (my $row = $sth->fetchrow_hashref) { + my $feed_entry = XML::Feed::Entry->new($type); + $feed_entry->title( $row->{name} ); + $feed_entry->link( $row->{url} ); + $feed_entry->issued( DateTime::Format::Flexible->build( $row->{last_update} ) ); + $feed_entry->content( + '' . dump( $row ) . ']]>' + ); + $feed->add_entry( $feed_entry ); + } + } else { - warn "!! unknown rss request for $show\n"; + _log "unknown rss request ",$request->url; return RC_DENY; } @@ -1055,7 +1224,7 @@ warn "$@"; } - $response->content_type("text/html; charset=$ENCODING"); + $response->content_type("text/html; charset=UTF-8"); my $html = qq{$NICK