| 1 |
178 |
dpavlin |
#!/usr/bin/perl |
| 2 |
53 |
dpavlin |
|
| 3 |
185 |
dpavlin |
use warnings; |
| 4 |
|
|
use strict; |
| 5 |
|
|
|
| 6 |
178 |
dpavlin |
# |
| 7 |
|
|
# emulate GROUP BY availabe in relational database for shell pipes |
| 8 |
|
|
# |
| 9 |
185 |
dpavlin |
# Dobrica Pavlinusic <dpavlin@rot13.org>, http://www.rot13.org/~dpavlin/ |
| 10 |
|
|
# |
| 11 |
184 |
dpavlin |
# SYNTAX: groupby.pl [-d delimiter_char] [-f](sum|count|min|max):]\d+,... |
| 12 |
181 |
dpavlin |
# |
| 13 |
184 |
dpavlin |
# by default, delimiter is space and -f flag is optional -- similar to cut(1) |
| 14 |
185 |
dpavlin |
# you can also use matematical operations between columns |
| 15 |
184 |
dpavlin |
# |
| 16 |
178 |
dpavlin |
# - show memory usage of processes grouped by process name |
| 17 |
185 |
dpavlin |
# ps axv | awk '{ print $10,($6+$7+$8) * 1024 }' | groupby.pl sum:2,1,count:1 | sort -k1 -nr | sum.pl -h |
| 18 |
181 |
dpavlin |
# |
| 19 |
183 |
dpavlin |
# - same example with added minimum and maximum usage |
| 20 |
185 |
dpavlin |
# ps axv | awk '{ print $10,($6+$7+$8) * 1024 }' | groupby.pl sum:2,1,count:1,min:2,max:2 | sort -rn | align | head -10 |
| 21 |
183 |
dpavlin |
# |
| 22 |
181 |
dpavlin |
# - stats of pppd packages traffic |
| 23 |
|
|
# sudo grep pppd /var/log/debug | egrep '(rcvd|sent)' | ./groupby.pl 6-8 | sort -rn |
| 24 |
|
|
# |
| 25 |
185 |
dpavlin |
# - you can also use do calculations of fields in-line |
| 26 |
|
|
# ps axv | ./groupby.pl 'sum:(($6+$7+$8)*1024),10,count:10,min:($6+$7+$8),max:($6+$7+$8)' | sort -k1 -nr | head | align | sum.pl -h |
| 27 |
187 |
dpavlin |
# |
| 28 |
|
|
# - OpenVZ examples: |
| 29 |
|
|
# vzps -E axv --no-headers | ./groupby.pl 'sum:($7+$8+$9*1024),count:1,1' | sort -rn | ./sum.pl -h |
| 30 |
|
|
# |
| 31 |
|
|
# - example how to join some other data (convert OpenVZ VEID to hostname) |
| 32 |
|
|
# vzps -E axv --no-headers | ./groupby.pl 'sum:($7+$8+$9*1024),1,count:1' --join 'sudo vzlist -H -o veid,hostname' --on 2 | sort -rn | align | ./sum.pl -h |
| 33 |
178 |
dpavlin |
|
| 34 |
|
|
use Data::Dump qw/dump/; |
| 35 |
182 |
dpavlin |
use Getopt::Long; |
| 36 |
185 |
dpavlin |
use Carp qw/confess/; |
| 37 |
187 |
dpavlin |
use IPC::System::Simple qw/capture/; |
| 38 |
57 |
dpavlin |
|
| 39 |
178 |
dpavlin |
my $debug = 0; |
| 40 |
182 |
dpavlin |
my $delimiter; |
| 41 |
184 |
dpavlin |
my $cols; |
| 42 |
187 |
dpavlin |
my $join; |
| 43 |
|
|
my $join_on = 1; |
| 44 |
178 |
dpavlin |
|
| 45 |
182 |
dpavlin |
GetOptions( |
| 46 |
184 |
dpavlin |
'd|delimiter=s' => \$delimiter, |
| 47 |
|
|
'f|fields=s' => \$cols, |
| 48 |
187 |
dpavlin |
'j|join=s' => \$join, |
| 49 |
|
|
'on|join-on=i' => \$join_on, |
| 50 |
|
|
'debug+' => \$debug, |
| 51 |
182 |
dpavlin |
); |
| 52 |
|
|
|
| 53 |
|
|
if ( defined $delimiter ) { |
| 54 |
|
|
$delimiter = "\Q$delimiter\E"; |
| 55 |
|
|
} else { |
| 56 |
|
|
$delimiter = '\s+'; |
| 57 |
|
|
} |
| 58 |
|
|
|
| 59 |
184 |
dpavlin |
$cols ||= shift @ARGV; |
| 60 |
|
|
die "usage: $0 [1|sum:3,1,count:1,max:1,min:1]\n" unless $cols; |
| 61 |
180 |
dpavlin |
|
| 62 |
181 |
dpavlin |
# rewrite dummy 1- to full form |
| 63 |
|
|
if ( $cols !~ m/,/ ) { |
| 64 |
|
|
$cols = "count:$cols,show:$cols"; |
| 65 |
|
|
$cols .= '-' unless $cols =~ m/-/; |
| 66 |
|
|
} |
| 67 |
|
|
|
| 68 |
178 |
dpavlin |
my $group_by; |
| 69 |
|
|
my @col_op; |
| 70 |
|
|
my $sum_cols; # check count of summed columns |
| 71 |
|
|
my @sum_cols; |
| 72 |
|
|
my @show_cols; |
| 73 |
183 |
dpavlin |
my $extremes; |
| 74 |
178 |
dpavlin |
foreach my $col ( split(/,/,$cols) ) { |
| 75 |
|
|
if ( $col =~ m/^count:(.+)$/ ) { |
| 76 |
|
|
# FIXME check if key is valid |
| 77 |
185 |
dpavlin |
confess "count:$1 can be used only once in $cols\n" if $group_by; |
| 78 |
178 |
dpavlin |
push @col_op, { count => $1 }; |
| 79 |
|
|
$group_by = $1; |
| 80 |
185 |
dpavlin |
} elsif ( $col =~ m/^sum:(.+)$/i ) { |
| 81 |
178 |
dpavlin |
push @col_op, { sum => $1 }; |
| 82 |
|
|
push @sum_cols, $1; |
| 83 |
|
|
$sum_cols->{$1}++; |
| 84 |
|
|
warn "column $1 sum created more than once in $cols\n" if $sum_cols->{$1} > 1; |
| 85 |
185 |
dpavlin |
} elsif ( $col =~ m/^(min|max):(.+)$/ ) { |
| 86 |
|
|
push @col_op, { 'extreme' => $1 }; |
| 87 |
|
|
$extremes->{ $1 }->{col} = $2; |
| 88 |
178 |
dpavlin |
} else { |
| 89 |
|
|
$col =~ s/^show://i; |
| 90 |
|
|
push @col_op, { show => $col }; |
| 91 |
|
|
push @show_cols, $col; |
| 92 |
|
|
} |
| 93 |
|
|
} |
| 94 |
|
|
|
| 95 |
185 |
dpavlin |
my $col; |
| 96 |
178 |
dpavlin |
|
| 97 |
185 |
dpavlin |
warn "## $cols col_op = ",dump( @col_op )," debug level: $debug\n" if $debug > 1; |
| 98 |
|
|
warn "# group by $group_by show ", dump( @show_cols ), " sum ",dump( @sum_cols ),$/ if $debug; |
| 99 |
178 |
dpavlin |
|
| 100 |
185 |
dpavlin |
# convert column range to arrray of column values |
| 101 |
|
|
sub col_data_range { |
| 102 |
|
|
my ( $range, $want_data, $line ) = @_; |
| 103 |
|
|
warn "## col_data_range",dump( $range, $want_data, $line ),$/ if $debug > 1; |
| 104 |
|
|
my @cols; |
| 105 |
|
|
if ($range =~ m/^(\d+)-$/) { |
| 106 |
|
|
push @cols,$_ foreach ( $1 .. $#$line ); |
| 107 |
|
|
} elsif ($range =~ m/^-(\d+)$/) { |
| 108 |
|
|
push @cols,$_ foreach ( 0 .. $1 ); |
| 109 |
|
|
} elsif ($range =~ m/^(\d+)-(\d+)$/) { |
| 110 |
|
|
push @cols,$_ foreach ( $1 .. $2 ); |
| 111 |
|
|
} elsif ($range =~ m/^(\d+)$/) { |
| 112 |
|
|
push @cols,$1; |
| 113 |
|
|
} elsif ( $range =~ m/^\((.+[+\-\/\*].+)\)$/ ) { |
| 114 |
|
|
my $code = $1; |
| 115 |
|
|
if ( $want_data ) { |
| 116 |
|
|
sub col_val { |
| 117 |
|
|
my ($line,$col) = @_; |
| 118 |
|
|
my $val = $line->[$col]; |
| 119 |
|
|
return 0 unless $val =~ m/^\d+$/; |
| 120 |
|
|
confess "can't find column $col in ",dump( $line ) unless defined $val; |
| 121 |
|
|
return $val; |
| 122 |
|
|
} |
| 123 |
|
|
$code =~ s/\$(\d+)/col_val($line,$1)/ge; |
| 124 |
|
|
my $val = eval $code; |
| 125 |
|
|
confess "eval $code from $range returned $@\n" if $@; |
| 126 |
|
|
warn "# $range => $code = $val\n" if $debug; |
| 127 |
|
|
# fake new column |
| 128 |
|
|
push @$line, $val; |
| 129 |
|
|
push @cols, $#$line; |
| 130 |
180 |
dpavlin |
} else { |
| 131 |
185 |
dpavlin |
push @cols, $code; |
| 132 |
180 |
dpavlin |
} |
| 133 |
185 |
dpavlin |
} else { |
| 134 |
|
|
confess "can't parse range $range\n"; |
| 135 |
|
|
} |
| 136 |
|
|
|
| 137 |
|
|
if ( $want_data ) { |
| 138 |
|
|
warn "## $range -> cols ", dump( @cols ), " line " . dump( $line ) . " => " . dump( @$line[ @cols ] ), $/ if $debug > 1; |
| 139 |
|
|
return @$line[ @cols ] if wantarray; |
| 140 |
|
|
return join(' ', @$line[ @cols ]); |
| 141 |
|
|
} else { |
| 142 |
|
|
warn "## $range -> cols ", dump( @cols ) if $debug > 1; |
| 143 |
180 |
dpavlin |
return @cols; |
| 144 |
|
|
} |
| 145 |
185 |
dpavlin |
} |
| 146 |
180 |
dpavlin |
|
| 147 |
185 |
dpavlin |
while(<STDIN>) { |
| 148 |
|
|
chomp; |
| 149 |
|
|
s/^\s+//; |
| 150 |
|
|
my $line = [ '', split(/$delimiter/,$_) ]; |
| 151 |
|
|
|
| 152 |
178 |
dpavlin |
# implement count for columns |
| 153 |
57 |
dpavlin |
|
| 154 |
178 |
dpavlin |
my $key; |
| 155 |
|
|
if ($group_by =~ m/,/) { |
| 156 |
|
|
foreach my $c (split(/,/,$group_by)) { |
| 157 |
185 |
dpavlin |
$key .= join('!', col_data_range($c, 1, $line)); |
| 158 |
178 |
dpavlin |
} |
| 159 |
53 |
dpavlin |
} else { |
| 160 |
185 |
dpavlin |
$key = join('!', col_data_range($group_by, 1, $line)); |
| 161 |
53 |
dpavlin |
} |
| 162 |
178 |
dpavlin |
|
| 163 |
185 |
dpavlin |
$col->{count}->{$key}++; |
| 164 |
|
|
warn "## key:$key count:$col->{count}->{$key}\n" if $debug > 1; |
| 165 |
178 |
dpavlin |
|
| 166 |
|
|
# implement sum for columns |
| 167 |
185 |
dpavlin |
foreach my $c ( @sum_cols ) { |
| 168 |
|
|
$col->{sum}->{$key}->{$c} += $_ foreach col_data_range( $c, 1, $line ); |
| 169 |
|
|
} |
| 170 |
178 |
dpavlin |
|
| 171 |
|
|
# save columns to show |
| 172 |
185 |
dpavlin |
$col->{show}->{$key}->{$_} = col_data_range( $_, 1, $line ) foreach @show_cols; |
| 173 |
183 |
dpavlin |
|
| 174 |
|
|
# max/min |
| 175 |
|
|
foreach my $op ( keys %$extremes ) { |
| 176 |
185 |
dpavlin |
my $col = $extremes->{$op}->{col}; |
| 177 |
|
|
my $val = col_data_range( $col, 1, $line ); |
| 178 |
|
|
$extremes->{$op}->{val}->{$key} = $val if ! defined $extremes->{$op}->{val}->{$key}; |
| 179 |
|
|
next unless defined $val; #$val =~ m/^\d+$/; |
| 180 |
|
|
my $code = '$val ' . ( $op eq 'min' ? '<' : '>' ) . ' $extremes->{$op}->{val}->{$key}'; |
| 181 |
|
|
warn "# $code val = ",dump( $val ) if $debug; |
| 182 |
|
|
$extremes->{$op}->{val}->{$key} = $val if eval $code; |
| 183 |
|
|
confess "$@\t$code\n" if $@; |
| 184 |
183 |
dpavlin |
} |
| 185 |
53 |
dpavlin |
} |
| 186 |
|
|
|
| 187 |
178 |
dpavlin |
if ( $debug ) { |
| 188 |
185 |
dpavlin |
warn "# col = ",dump( $col ),$/; |
| 189 |
|
|
warn "# extremes = ",dump( $extremes ); |
| 190 |
178 |
dpavlin |
} |
| 191 |
|
|
|
| 192 |
187 |
dpavlin |
my $join_data; |
| 193 |
|
|
warn "# join on $join_on from $join\n" if $debug; |
| 194 |
|
|
map { |
| 195 |
|
|
s/^\s+//; |
| 196 |
|
|
s/\s+$//; |
| 197 |
|
|
my @v = split(/\s+/,$_,2); |
| 198 |
|
|
$join_data->{ $v[0] } = $v[1]; |
| 199 |
|
|
} capture( $join ) if $join; |
| 200 |
|
|
|
| 201 |
|
|
warn "# $join join_data = ",dump( $join_data ) if $debug; |
| 202 |
|
|
|
| 203 |
185 |
dpavlin |
foreach my $c (sort keys %{$col->{count}}) { |
| 204 |
178 |
dpavlin |
my @out; |
| 205 |
|
|
foreach my $op ( @col_op ) { |
| 206 |
185 |
dpavlin |
if ( my $val = $op->{sum} ) { |
| 207 |
|
|
push @out, $col->{sum}->{$c}->{$val}; |
| 208 |
178 |
dpavlin |
} elsif ( $op->{count} ) { |
| 209 |
185 |
dpavlin |
push @out, $col->{count}->{$c}; |
| 210 |
|
|
} elsif ( $val = $op->{show} ) { |
| 211 |
187 |
dpavlin |
my $data = $col->{show}->{$c}->{$val}; |
| 212 |
|
|
if ( $join_data && $#out == ( $join_on - 2 ) ) { |
| 213 |
|
|
push @out, $join_data->{ $data } || $data; |
| 214 |
|
|
} else { |
| 215 |
|
|
push @out, $data; |
| 216 |
|
|
} |
| 217 |
183 |
dpavlin |
} elsif ( my $op = $op->{extreme} ) { |
| 218 |
|
|
push @out, $extremes->{$op}->{val}->{$c}; |
| 219 |
178 |
dpavlin |
} |
| 220 |
185 |
dpavlin |
warn "## op ",dump( $op ), " out ",dump( $out[$#out] ),$/ if $debug > 1; |
| 221 |
178 |
dpavlin |
} |
| 222 |
|
|
print join(' ',@out),$/; |
| 223 |
53 |
dpavlin |
} |