#!/usr/bin/perl
use warnings;
use strict;
#
# emulate GROUP BY availabe in relational database for shell pipes
#
# Dobrica Pavlinusic <dpavlin@rot13.org>, http://www.rot13.org/~dpavlin/
#
# SYNTAX: groupby.pl [-d delimiter_char] [-f](sum|count|min|max):]\d+,...
#
# by default, delimiter is space and -f flag is optional -- similar to cut(1)
# you can also use matematical operations between columns
#
# - show memory usage of processes grouped by process name
# ps axv | awk '{ print $10,($6+$7+$8) * 1024 }' | groupby.pl sum:2,1,count:1 | sort -k1 -nr | sum.pl -h
#
# - same example with added minimum and maximum usage
# ps axv | awk '{ print $10,($6+$7+$8) * 1024 }' | groupby.pl sum:2,1,count:1,min:2,max:2 | sort -rn | align | head -10
#
# - stats of pppd packages traffic
# sudo grep pppd /var/log/debug | egrep '(rcvd|sent)' | ./groupby.pl 6-8 | sort -rn
#
# - you can also use do calculations of fields in-line
# ps axv | ./groupby.pl 'sum:(($6+$7+$8)*1024),10,count:10,min:($6+$7+$8),max:($6+$7+$8)' | sort -k1 -nr | head | align | sum.pl -h
#
# - OpenVZ examples:
# vzps -E axv --no-headers | ./groupby.pl 'sum:($7+$8+$9*1024),count:1,1' | sort -rn | ./sum.pl -h
#
# - example how to join some other data (convert OpenVZ VEID to hostname)
# vzps -E axv --no-headers | ./groupby.pl 'sum:($7+$8+$9*1024),1,count:1' --join 'sudo vzlist -H -o veid,hostname' --on 2 | sort -rn | align | ./sum.pl -h
use Data::Dump qw/dump/;
use Getopt::Long;
use Carp qw/confess/;
use IPC::System::Simple qw/capture/;
my $debug = 0;
my $delimiter;
my $cols;
my $join;
my $join_on = 1;
GetOptions(
'd|delimiter=s' => \$delimiter,
'f|fields=s' => \$cols,
'j|join=s' => \$join,
'on|join-on=i' => \$join_on,
'debug+' => \$debug,
);
if ( defined $delimiter ) {
$delimiter = "\Q$delimiter\E";
} else {
$delimiter = '\s+';
}
$cols ||= shift @ARGV;
die "usage: $0 [1|sum:3,1,count:1,max:1,min:1]\n" unless $cols;
# rewrite dummy 1- to full form
if ( $cols !~ m/,/ ) {
$cols = "count:$cols,show:$cols";
$cols .= '-' unless $cols =~ m/-/;
}
my $group_by;
my @col_op;
my $sum_cols; # check count of summed columns
my @sum_cols;
my @show_cols;
my $extremes;
foreach my $col ( split(/,/,$cols) ) {
if ( $col =~ m/^count:(.+)$/ ) {
# FIXME check if key is valid
confess "count:$1 can be used only once in $cols\n" if $group_by;
push @col_op, { count => $1 };
$group_by = $1;
} elsif ( $col =~ m/^sum:(.+)$/i ) {
push @col_op, { sum => $1 };
push @sum_cols, $1;
$sum_cols->{$1}++;
warn "column $1 sum created more than once in $cols\n" if $sum_cols->{$1} > 1;
} elsif ( $col =~ m/^(min|max):(.+)$/ ) {
push @col_op, { 'extreme' => $1 };
$extremes->{ $1 }->{col} = $2;
} else {
$col =~ s/^show://i;
push @col_op, { show => $col };
push @show_cols, $col;
}
}
my $col;
warn "## $cols col_op = ",dump( @col_op )," debug level: $debug\n" if $debug > 1;
warn "# group by $group_by show ", dump( @show_cols ), " sum ",dump( @sum_cols ),$/ if $debug;
# convert column range to arrray of column values
sub col_data_range {
my ( $range, $want_data, $line ) = @_;
warn "## col_data_range",dump( $range, $want_data, $line ),$/ if $debug > 1;
my @cols;
if ($range =~ m/^(\d+)-$/) {
push @cols,$_ foreach ( $1 .. $#$line );
} elsif ($range =~ m/^-(\d+)$/) {
push @cols,$_ foreach ( 0 .. $1 );
} elsif ($range =~ m/^(\d+)-(\d+)$/) {
push @cols,$_ foreach ( $1 .. $2 );
} elsif ($range =~ m/^(\d+)$/) {
push @cols,$1;
} elsif ( $range =~ m/^\((.+[+\-\/\*].+)\)$/ ) {
my $code = $1;
if ( $want_data ) {
sub col_val {
my ($line,$col) = @_;
my $val = $line->[$col];
return 0 unless $val =~ m/^\d+$/;
confess "can't find column $col in ",dump( $line ) unless defined $val;
return $val;
}
$code =~ s/\$(\d+)/col_val($line,$1)/ge;
my $val = eval $code;
confess "eval $code from $range returned $@\n" if $@;
warn "# $range => $code = $val\n" if $debug;
# fake new column
push @$line, $val;
push @cols, $#$line;
} else {
push @cols, $code;
}
} else {
confess "can't parse range $range\n";
}
if ( $want_data ) {
warn "## $range -> cols ", dump( @cols ), " line " . dump( $line ) . " => " . dump( @$line[ @cols ] ), $/ if $debug > 1;
return @$line[ @cols ] if wantarray;
return join(' ', @$line[ @cols ]);
} else {
warn "## $range -> cols ", dump( @cols ) if $debug > 1;
return @cols;
}
}
while(<STDIN>) {
chomp;
s/^\s+//;
my $line = [ '', split(/$delimiter/,$_) ];
# implement count for columns
my $key;
if ($group_by =~ m/,/) {
foreach my $c (split(/,/,$group_by)) {
$key .= join('!', col_data_range($c, 1, $line));
}
} else {
$key = join('!', col_data_range($group_by, 1, $line));
}
$col->{count}->{$key}++;
warn "## key:$key count:$col->{count}->{$key}\n" if $debug > 1;
# implement sum for columns
foreach my $c ( @sum_cols ) {
$col->{sum}->{$key}->{$c} += $_ foreach col_data_range( $c, 1, $line );
}
# save columns to show
$col->{show}->{$key}->{$_} = col_data_range( $_, 1, $line ) foreach @show_cols;
# max/min
foreach my $op ( keys %$extremes ) {
my $col = $extremes->{$op}->{col};
my $val = col_data_range( $col, 1, $line );
$extremes->{$op}->{val}->{$key} = $val if ! defined $extremes->{$op}->{val}->{$key};
next unless defined $val; #$val =~ m/^\d+$/;
my $code = '$val ' . ( $op eq 'min' ? '<' : '>' ) . ' $extremes->{$op}->{val}->{$key}';
warn "# $code val = ",dump( $val ) if $debug;
$extremes->{$op}->{val}->{$key} = $val if eval $code;
confess "$@\t$code\n" if $@;
}
}
if ( $debug ) {
warn "# col = ",dump( $col ),$/;
warn "# extremes = ",dump( $extremes );
}
my $join_data;
warn "# join on $join_on from $join\n" if $debug;
map {
s/^\s+//;
s/\s+$//;
my @v = split(/\s+/,$_,2);
$join_data->{ $v[0] } = $v[1];
} capture( $join ) if $join;
warn "# $join join_data = ",dump( $join_data ) if $debug;
foreach my $c (sort keys %{$col->{count}}) {
my @out;
foreach my $op ( @col_op ) {
if ( my $val = $op->{sum} ) {
push @out, $col->{sum}->{$c}->{$val};
} elsif ( $op->{count} ) {
push @out, $col->{count}->{$c};
} elsif ( $val = $op->{show} ) {
my $data = $col->{show}->{$c}->{$val};
if ( $join_data && $#out == ( $join_on - 2 ) ) {
push @out, $join_data->{ $data } || $data;
} else {
push @out, $data;
}
} elsif ( my $op = $op->{extreme} ) {
push @out, $extremes->{$op}->{val}->{$c};
}
warn "## op ",dump( $op ), " out ",dump( $out[$#out] ),$/ if $debug > 1;
}
print join(' ',@out),$/;
}