75 |
sub human { |
sub human { |
76 |
my $s = shift; |
my $s = shift; |
77 |
|
|
78 |
if ( $s =~ m/(\d+)\s*($multiplier_regex)/i) { |
if ( $s =~ m/^\s*(\d+)\s*($multiplier_regex)\s*$/i) { |
79 |
my ( $v, $m ) = ( $1, lc($2) ); |
my ( $v, $m ) = ( $1, lc($2) ); |
80 |
my $factor = $multiplier->{$m}; |
my $factor = $multiplier->{$m}; |
81 |
confess "can't find multiplier $m" unless defined $factor; |
confess "can't find multiplier $m" unless defined $factor; |
86 |
return; |
return; |
87 |
} |
} |
88 |
|
|
89 |
|
sub strip_prefix { |
90 |
|
my @data = @_; |
91 |
|
my $prefix = shift @data; |
92 |
|
|
93 |
|
my $p; |
94 |
|
|
95 |
|
foreach my $d ( @data ) { |
96 |
|
my $chomp = length($prefix); |
97 |
|
# find end of common string |
98 |
|
$chomp-- while( |
99 |
|
lc(substr( $prefix, 0, $chomp )) ne lc(substr( $d, 0, $chomp )) |
100 |
|
&& |
101 |
|
$chomp > 0 |
102 |
|
); |
103 |
|
if ( $chomp == 0 ) { |
104 |
|
warn "no common prefix in ",dump( @_ ); |
105 |
|
return @_; |
106 |
|
} |
107 |
|
|
108 |
|
my $prefix = substr( $prefix, 0, $chomp ); |
109 |
|
$p->{$prefix}++; |
110 |
|
} |
111 |
|
warn "prefixes found = ",dump($p); |
112 |
|
my @sorted = sort { $p->{$b} <=> $p->{$a} } keys %$p; |
113 |
|
my $strip = shift @sorted || return @_; |
114 |
|
warn "longest preffix: '$strip'\n"; |
115 |
|
return map { my $v = $_; $v =~ s/^\Q$strip\E//i; $v; } @_; |
116 |
|
} |
117 |
|
|
118 |
while (<$csv_fh>) { |
while (<$csv_fh>) { |
119 |
$csv_parser->parse($_); |
$csv_parser->parse($_); |
120 |
my @fields = $csv_parser->fields; |
my @fields = $csv_parser->fields; |
155 |
$h->{ $l . '_' . $j } = $v; |
$h->{ $l . '_' . $j } = $v; |
156 |
} |
} |
157 |
|
|
158 |
$split_stats->{$v}->{$j}++; |
$split_stats->{$v}->{pos}->{$j}++; |
159 |
$split_stats->{$v}->{sum}++; |
$split_stats->{$v}->{sum}++; |
160 |
push @{ $split_stats->{$v}->{rec}->{$#dump + 1} }, $j; |
push @{ $split_stats->{$v}->{rec}->{$#dump + 1} }, $j; |
161 |
} |
} |
199 |
} |
} |
200 |
} |
} |
201 |
|
|
202 |
#warn "split_stats = ", dump( $split_stats ), "\n"; |
warn "split_stats = ", dump( $split_stats ), "\n"; |
203 |
|
|
204 |
|
my @all = map { $_->{label_0} || die "no label_0 for ",dump($_) } @dump; |
205 |
|
warn "all = ",dump(@all); |
206 |
|
my @stripped = strip_prefix( @all ); |
207 |
|
$dump[$_]->{label_0} = $stripped[$_] foreach ( 0 .. $#stripped ); |
208 |
|
|
209 |
warn "dump = ", dump( @dump ), "\n"; |
warn "dump = ", dump( @dump ), "\n"; |
210 |
|
|