#!/usr/bin/perl use warnings; use strict; # # emulate GROUP BY availabe in relational database for shell pipes # # Dobrica Pavlinusic , http://www.rot13.org/~dpavlin/ # # SYNTAX: groupby.pl [-d delimiter_char] [-f](sum|count|min|max):]\d+,... # # by default, delimiter is space and -f flag is optional -- similar to cut(1) # you can also use matematical operations between columns # # - show memory usage of processes grouped by process name # ps axv | awk '{ print $10,($6+$7+$8) * 1024 }' | groupby.pl sum:2,1,count:1 | sort -k1 -nr | sum.pl -h # # - same example with added minimum and maximum usage # ps axv | awk '{ print $10,($6+$7+$8) * 1024 }' | groupby.pl sum:2,1,count:1,min:2,max:2 | sort -rn | align | head -10 # # - stats of pppd packages traffic # sudo grep pppd /var/log/debug | egrep '(rcvd|sent)' | ./groupby.pl 6-8 | sort -rn # # - you can also use do calculations of fields in-line # ps axv | ./groupby.pl 'sum:(($6+$7+$8)*1024),10,count:10,min:($6+$7+$8),max:($6+$7+$8)' | sort -k1 -nr | head | align | sum.pl -h # # - OpenVZ examples: # vzps -E axv --no-headers | ./groupby.pl 'sum:($7+$8+$9*1024),count:1,1' | sort -rn | ./sum.pl -h # # - example how to join some other data (convert OpenVZ VEID to hostname) # vzps -E axv --no-headers | ./groupby.pl 'sum:($7+$8+$9*1024),1,count:1' --join 'sudo vzlist -H -o veid,hostname' --on 2 | sort -rn | align | ./sum.pl -h use Data::Dump qw/dump/; use Getopt::Long; use Carp qw/confess/; use IPC::System::Simple qw/capture/; my $debug = 0; my $delimiter; my $cols; my $join; my $join_on = 1; GetOptions( 'd|delimiter=s' => \$delimiter, 'f|fields=s' => \$cols, 'j|join=s' => \$join, 'on|join-on=i' => \$join_on, 'debug+' => \$debug, ); if ( defined $delimiter ) { $delimiter = "\Q$delimiter\E"; } else { $delimiter = '\s+'; } $cols ||= shift @ARGV; die "usage: $0 [1|sum:3,1,count:1,max:1,min:1]\n" unless $cols; # rewrite dummy 1- to full form if ( $cols !~ m/,/ ) { $cols = "count:$cols,show:$cols"; $cols .= '-' unless $cols =~ m/-/; } my $group_by; my @col_op; my $sum_cols; # check count of summed columns my @sum_cols; my @show_cols; my $extremes; foreach my $col ( split(/,/,$cols) ) { if ( $col =~ m/^count:(.+)$/ ) { # FIXME check if key is valid confess "count:$1 can be used only once in $cols\n" if $group_by; push @col_op, { count => $1 }; $group_by = $1; } elsif ( $col =~ m/^sum:(.+)$/i ) { push @col_op, { sum => $1 }; push @sum_cols, $1; $sum_cols->{$1}++; warn "column $1 sum created more than once in $cols\n" if $sum_cols->{$1} > 1; } elsif ( $col =~ m/^(min|max):(.+)$/ ) { push @col_op, { 'extreme' => $1 }; $extremes->{ $1 }->{col} = $2; } else { $col =~ s/^show://i; push @col_op, { show => $col }; push @show_cols, $col; } } my $col; warn "## $cols col_op = ",dump( @col_op )," debug level: $debug\n" if $debug > 1; warn "# group by $group_by show ", dump( @show_cols ), " sum ",dump( @sum_cols ),$/ if $debug; # convert column range to arrray of column values sub col_data_range { my ( $range, $want_data, $line ) = @_; warn "## col_data_range",dump( $range, $want_data, $line ),$/ if $debug > 1; my @cols; if ($range =~ m/^(\d+)-$/) { push @cols,$_ foreach ( $1 .. $#$line ); } elsif ($range =~ m/^-(\d+)$/) { push @cols,$_ foreach ( 0 .. $1 ); } elsif ($range =~ m/^(\d+)-(\d+)$/) { push @cols,$_ foreach ( $1 .. $2 ); } elsif ($range =~ m/^(\d+)$/) { push @cols,$1; } elsif ( $range =~ m/^\((.+[+\-\/\*].+)\)$/ ) { my $code = $1; if ( $want_data ) { sub col_val { my ($line,$col) = @_; my $val = $line->[$col]; return 0 unless $val =~ m/^\d+$/; confess "can't find column $col in ",dump( $line ) unless defined $val; return $val; } $code =~ s/\$(\d+)/col_val($line,$1)/ge; my $val = eval $code; confess "eval $code from $range returned $@\n" if $@; warn "# $range => $code = $val\n" if $debug; # fake new column push @$line, $val; push @cols, $#$line; } else { push @cols, $code; } } else { confess "can't parse range $range\n"; } if ( $want_data ) { warn "## $range -> cols ", dump( @cols ), " line " . dump( $line ) . " => " . dump( @$line[ @cols ] ), $/ if $debug > 1; return @$line[ @cols ] if wantarray; return join(' ', @$line[ @cols ]); } else { warn "## $range -> cols ", dump( @cols ) if $debug > 1; return @cols; } } while() { chomp; s/^\s+//; my $line = [ '', split(/$delimiter/,$_) ]; # implement count for columns my $key; if ($group_by =~ m/,/) { foreach my $c (split(/,/,$group_by)) { $key .= join('!', col_data_range($c, 1, $line)); } } else { $key = join('!', col_data_range($group_by, 1, $line)); } $col->{count}->{$key}++; warn "## key:$key count:$col->{count}->{$key}\n" if $debug > 1; # implement sum for columns foreach my $c ( @sum_cols ) { $col->{sum}->{$key}->{$c} += $_ foreach col_data_range( $c, 1, $line ); } # save columns to show $col->{show}->{$key}->{$_} = col_data_range( $_, 1, $line ) foreach @show_cols; # max/min foreach my $op ( keys %$extremes ) { my $col = $extremes->{$op}->{col}; my $val = col_data_range( $col, 1, $line ); $extremes->{$op}->{val}->{$key} = $val if ! defined $extremes->{$op}->{val}->{$key}; next unless defined $val; #$val =~ m/^\d+$/; my $code = '$val ' . ( $op eq 'min' ? '<' : '>' ) . ' $extremes->{$op}->{val}->{$key}'; warn "# $code val = ",dump( $val ) if $debug; $extremes->{$op}->{val}->{$key} = $val if eval $code; confess "$@\t$code\n" if $@; } } if ( $debug ) { warn "# col = ",dump( $col ),$/; warn "# extremes = ",dump( $extremes ); } my $join_data; warn "# join on $join_on from $join\n" if $debug; map { s/^\s+//; s/\s+$//; my @v = split(/\s+/,$_,2); $join_data->{ $v[0] } = $v[1]; } capture( $join ) if $join; warn "# $join join_data = ",dump( $join_data ) if $debug; foreach my $c (sort keys %{$col->{count}}) { my @out; foreach my $op ( @col_op ) { if ( my $val = $op->{sum} ) { push @out, $col->{sum}->{$c}->{$val}; } elsif ( $op->{count} ) { push @out, $col->{count}->{$c}; } elsif ( $val = $op->{show} ) { my $data = $col->{show}->{$c}->{$val}; if ( $join_data && $#out == ( $join_on - 2 ) ) { push @out, $join_data->{ $data } || $data; } else { push @out, $data; } } elsif ( my $op = $op->{extreme} ) { push @out, $extremes->{$op}->{val}->{$c}; } warn "## op ",dump( $op ), " out ",dump( $out[$#out] ),$/ if $debug > 1; } print join(' ',@out),$/; }