--- trunk/bin/irc-logger.pl 2008/02/29 22:11:07 83 +++ trunk/bin/irc-logger.pl 2008/03/07 09:50:53 90 @@ -20,8 +20,6 @@ =item --log=irc-logger.log -Name of log file - =back =head1 DESCRIPTION @@ -46,15 +44,8 @@ $CHANNEL = '#irc-logger' if ($HOSTNAME =~ m/llin/); my $IRC_ALIAS = "log"; -my %FOLLOWS = - ( - ACCESS => "/var/log/apache/access.log", - ERROR => "/var/log/apache/error.log", - ); - my $DSN = 'DBI:Pg:dbname=' . $NICK; -my $ENCODING = 'ISO-8859-2'; my $TIMESTAMP = '%Y-%m-%d %H:%M:%S'; my $sleep_on_error = 5; @@ -62,18 +53,19 @@ # number of last tags to keep in circular buffer my $last_x_tags = 50; +# don't pull rss feeds more often than this +my $rss_min_delay = 60; +$rss_min_delay = 15; + my $http_port = $NICK =~ m/-dev/ ? 8001 : 8000; my $url = "http://$HOSTNAME:$http_port"; ## END CONFIG - - -use POE qw(Component::IRC Wheel::FollowTail Component::Server::HTTP); +use POE qw(Component::IRC Component::Server::HTTP); use HTTP::Status; use DBI; -use Encode qw/from_to is_utf8/; use Regexp::Common qw /URI/; use CGI::Simple; use HTML::TagCloud; @@ -99,14 +91,14 @@ 'log:s' => \$log_path, ); -$SIG{__DIE__} = sub { - confess "fatal error"; -}; +#$SIG{__DIE__} = sub { +# confess "fatal error"; +#}; open(STDOUT, '>', $log_path) || warn "can't redirect log to $log_path: $!"; sub _log { - print strftime($TIMESTAMP,localtime()), ' ', join(" ",@_), $/; + print strftime($TIMESTAMP,localtime()) . ' ' . join(" ",@_) . $/; } # HTML formatters @@ -152,9 +144,10 @@ }; my $dbh = DBI->connect($DSN,"","", { RaiseError => 1, AutoCommit => 1 }) || die $DBI::errstr; +$dbh->do( qq{ set client_encoding = 'UTF-8' } ); my $sql_schema = { - log => ' + log => qq{ create table log ( id serial, time timestamp default now(), @@ -168,17 +161,31 @@ create index log_time on log(time); create index log_channel on log(channel); create index log_nick on log(nick); - ', - meta => ' + }, + meta => q{ create table meta ( nick text not null, channel text not null, name text not null, value text, - changed timestamp default now(), + changed timestamp default 'now()', primary key(nick,channel,name) ); - ', + }, + feeds => qq{ +create table feeds ( + id serial, + url text not null, + name text, + delay interval not null default '5 min', + active boolean default true, + last_update timestamp default 'now()', + polls int default 0, + updates int default 0 +); +create unique index feeds_url on feeds(url); +insert into feeds (url,name) values ('http://wiki.razmjenavjestina.org/feed/workspace/razmjenavjestina?category=Recent%20Changes','wiki'); + }, }; foreach my $table ( keys %$sql_schema ) { @@ -242,7 +249,7 @@ -my $sth = $dbh->prepare(qq{ +my $sth_insert_log = $dbh->prepare(qq{ insert into log (channel, me, nick, message, time) values (?,?,?,?,?) @@ -494,7 +501,6 @@ return unless ($arg->{id} && $arg->{message}); my $m = $arg->{message}; - from_to('UTF-8', 'iso-8859-2', $m) if (is_utf8($m)); my @tags; @@ -563,9 +569,7 @@ $a->{me} ? "***" . $a->{nick} : "<" . $a->{nick} . ">", " " . $a->{message}; - from_to($a->{message}, 'UTF-8', $ENCODING); - - $sth->execute($a->{channel}, $a->{me}, $a->{nick}, $a->{message}, $a->{time}); + $sth_insert_log->execute($a->{channel}, $a->{me}, $a->{nick}, $a->{message}, $a->{time}); add_tag( id => $dbh->last_insert_id(undef,undef,"log",undef), %$a ); } @@ -604,27 +608,113 @@ exit; } +# +# RSS follow +# + +my $_rss; + + +sub rss_fetch { + my ($args) = @_; + + # how many messages to send out when feed is seen for the first time? + my $send_rss_msgs = 1; + + _log "RSS fetch", $args->{url}; + + my $feed = XML::Feed->parse(URI->new( $args->{url} )); + if ( ! $feed ) { + _log("can't fetch RSS ", $args->{url}); + return; + } + my ( $total, $updates ) = ( 0, 0 ); + for my $entry ($feed->entries) { + $total++; + + # seen allready? + return if $_rss->{$feed->link}->{seen}->{$entry->id}++ > 0; + + sub prefix { + my ($txt,$var) = @_; + $var =~ s/^\s+//g; + return $txt . $var if $var; + } + + my $msg; + $msg .= prefix( 'From: ' , $args->{name} || $feed->title ); + $msg .= prefix( ' by ' , $entry->author ); + $msg .= prefix( ' -- ' , $entry->link ); +# $msg .= prefix( ' id ' , $entry->id ); + + if ( $args->{kernel} && $send_rss_msgs ) { + $send_rss_msgs--; + _log('RSS', $msg); + $sth_insert_log->execute( $CHANNEL, 1, $NICK, $msg, undef ); + $args->{kernel}->post( $IRC_ALIAS => notice => $CHANNEL, $msg ); + $updates++; + } + } + + my $sql = qq{ update feeds set last_update = now(), polls = polls + 1 }; + $sql .= qq{, updates = updates + $updates } if $updates; + $sql .= qq{where id = } . $args->{id}; + eval { $dbh->do( $sql ) }; + + _log "RSS got $total items of which $updates new"; + + return $updates; +} + +sub rss_fetch_all { + my $kernel = shift; + my $sql = qq{ + select id, url, name + from feeds + where active is true + }; + # limit to newer feeds only if we are not sending messages out + $sql .= qq{ and last_update + delay < now() } if $kernel; + my $sth = $dbh->prepare( $sql ); + $sth->execute(); + warn "# ",$sth->rows," active RSS feeds\n"; + my $count = 0; + while (my $row = $sth->fetchrow_hashref) { + $row->{kernel} = $kernel if $kernel; + $count += rss_fetch( $row ); + } + return "OK, fetched $count posts from " . $sth->rows . " feeds"; +} + + +sub rss_check_updates { + my $kernel = shift; + my $last_t = $_rss->{last_poll} || time(); + my $t = time(); + if ( $t - $last_t > $rss_min_delay ) { + $_rss->{last_poll} = $t; + _log rss_fetch_all( $kernel ); + } +} + +# seed rss seen cache so we won't send out all items on startup +_log rss_fetch_all; # # POE handing part # -my $SKIPPING = 0; # if skipping, how many we've done -my $SEND_QUEUE; # cache my $ping; # ping stats POE::Component::IRC->new($IRC_ALIAS); -POE::Session->create( inline_states => - {_start => sub { +POE::Session->create( inline_states => { + _start => sub { $_[KERNEL]->post($IRC_ALIAS => register => 'all'); $_[KERNEL]->post($IRC_ALIAS => connect => $CONNECT); }, irc_255 => sub { # server is done blabbing $_[KERNEL]->post($IRC_ALIAS => join => $CHANNEL); - $_[KERNEL]->post($IRC_ALIAS => join => '#logger'); - $_[KERNEL]->yield("heartbeat"); # start heartbeat -# $_[KERNEL]->yield("my_add", $_) for keys %FOLLOWS; $_[KERNEL]->post( $IRC_ALIAS => privmsg => 'nickserv', "IDENTIFY $NICK" ); }, irc_public => sub { @@ -655,15 +745,16 @@ }, irc_ping => sub { - warn "pong ", $_[ARG0], $/; + _log( "pong ", $_[ARG0] ); $ping->{ $_[ARG0] }++; + rss_check_updates( $_[KERNEL] ); }, irc_invite => sub { my $kernel = $_[KERNEL]; my $nick = (split /!/, $_[ARG0])[0]; my $channel = $_[ARG1]; - warn "invited to $channel by $nick"; + _log "invited to $channel by $nick"; $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, "how nice of you to invite me to $channel, I'll be right there..." ); $_[KERNEL]->post($IRC_ALIAS => join => $channel); @@ -674,7 +765,6 @@ my $nick = (split /!/, $_[ARG0])[0]; my $msg = $_[ARG2]; my $channel = $_[ARG1]->[0]; - from_to($msg, 'UTF-8', $ENCODING); my $res = "unknown command '$msg', try /msg $NICK help!"; my @out; @@ -718,7 +808,6 @@ foreach my $res (get_from_log( limit => $limit )) { _log "last: $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } @@ -733,7 +822,6 @@ search => $what, )) { _log "search [$what]: $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } @@ -803,14 +891,35 @@ $res = "config option $op doesn't exist"; } } + } elsif ($msg =~ m/^rss-update/) { + $res = rss_fetch_all( $_[KERNEL] ); + } elsif ($msg =~ m/^rss-clean/) { + $_rss = undef; + $dbh->do( qq{ update feeds set last_update = now() - delay } ); + $res = "OK, cleaned RSS cache"; + } elsif ($msg =~ m!^rss-(add|remove|stop|start)\s+(http://\S+)\s*(.*)!) { + my $sql = { + add => qq{ insert into feeds (url,name) values (?,?) }, +# remove => qq{ delete from feeds where url = ? and name = ? }, + start => qq{ update feeds set active = true where url = ? -- ? }, + stop => qq{ update feeds set active = false where url = ? -- ? }, + + }; + if (my $q = $sql->{$1} ) { + my $sth = $dbh->prepare( $q ); + warn "## SQL $q ( $2 | $3 )\n"; + eval { $sth->execute( $2, $3 ) }; + } + + $res = "OK, RSS $1 : $2 - $3"; } if ($res) { _log ">> [$nick] $res"; - from_to($res, $ENCODING, 'UTF-8'); $_[KERNEL]->post( $IRC_ALIAS => privmsg => $nick, $res ); } + rss_check_updates( $_[KERNEL] ); }, irc_477 => sub { _log "# irc_477: ",$_[ARG1]; @@ -849,66 +958,6 @@ ""; 0; # false for signals }, - my_add => sub { - my $trailing = $_[ARG0]; - my $session = $_[SESSION]; - POE::Session->create - (inline_states => - {_start => sub { - $_[HEAP]->{wheel} = - POE::Wheel::FollowTail->new - ( - Filename => $FOLLOWS{$trailing}, - InputEvent => 'got_line', - ); - }, - got_line => sub { - $_[KERNEL]->post($session => my_tailed => - time, $trailing, $_[ARG0]); - }, - }, - ); - - }, - my_tailed => sub { - my ($time, $file, $line) = @_[ARG0..ARG2]; - ## $time will be undef on a probe, or a time value if a real line - - ## PoCo::IRC has throttling built in, but no external visibility - ## so this is reaching "under the hood" - $SEND_QUEUE ||= - $_[KERNEL]->alias_resolve($IRC_ALIAS)->get_heap->{send_queue}; - - ## handle "no need to keep skipping" transition - if ($SKIPPING and @$SEND_QUEUE < 1) { - $_[KERNEL]->post($IRC_ALIAS => privmsg => $CHANNEL => - "[discarded $SKIPPING messages]"); - $SKIPPING = 0; - } - - ## handle potential message display - if ($time) { - if ($SKIPPING or @$SEND_QUEUE > 3) { # 3 msgs per 10 seconds - $SKIPPING++; - } else { - my @time = localtime $time; - $_[KERNEL]->post($IRC_ALIAS => privmsg => $CHANNEL => - sprintf "%02d:%02d:%02d: %s: %s", - ($time[2] + 11) % 12 + 1, $time[1], $time[0], - $file, $line); - } - } - - ## handle re-probe/flush if skipping - if ($SKIPPING) { - $_[KERNEL]->delay($_[STATE] => 0.5); # $time will be undef - } - - }, - my_heartbeat => sub { - $_[KERNEL]->yield(my_tailed => time, "heartbeat", "beep"); - $_[KERNEL]->delay($_[STATE] => 10); - } }, ); @@ -983,7 +1032,7 @@ my $search = $q->param('search') || $q->param('grep') || ''; - if ($request->url =~ m#/rss(?:/(tags|last-tag)\w*(?:=(\d+))?)?#i) { + if ($request->url =~ m#/rss(?:/(tags|last-tag|follow.*)\w*(?:=(\d+))?)?#i) { my $show = lc($1); my $nr = $2; @@ -995,6 +1044,7 @@ #warn "create $type feed from ",dump( @last_tags ); my $feed = XML::Feed->new( $type ); + $feed->link( $url ); if ( $show eq 'tags' ) { $nr ||= 50; @@ -1021,7 +1071,6 @@ $nr = $last_x_tags if $nr > $last_x_tags; $feed->title( "last $nr tagged messages from $CHANNEL" ); - $feed->link( $url ); $feed->description( "collects messages which have tags// in them" ); foreach my $m ( @last_tags ) { @@ -1036,7 +1085,6 @@ my $message = $filter->{message}->( $m->{message} ); $message .= "
\n" unless $message =~ m!<(/p|br/?)>!; # warn "## message = $message\n"; - from_to( $message, $ENCODING, 'UTF-8' ); #$feed_entry->summary( $feed_entry->content( @@ -1050,8 +1098,25 @@ } + } elsif ( $show =~ m/^follow/ ) { + + $feed->title( "Feeds which this bot follows" ); + + my $sth = $dbh->prepare( qq{ select * from feeds order by last_update desc } ); + $sth->execute; + while (my $row = $sth->fetchrow_hashref) { + my $feed_entry = XML::Feed::Entry->new($type); + $feed_entry->title( $row->{name} ); + $feed_entry->link( $row->{url} ); + $feed_entry->issued( DateTime::Format::Flexible->build( $row->{last_update} ) ); + $feed_entry->content( + '' . dump( $row ) . ']]>' + ); + $feed->add_entry( $feed_entry ); + } + } else { - warn "!! unknown rss request for $show\n"; + _log "unknown rss request ",$request->url; return RC_DENY; } @@ -1063,7 +1128,7 @@ warn "$@"; } - $response->content_type("text/html; charset=$ENCODING"); + $response->content_type("text/html; charset=UTF-8"); my $html = qq{$NICK