--- links/csv2js.pl 2007/08/23 13:27:14 6 +++ links/csv2js.pl 2007/08/23 13:27:56 7 @@ -75,7 +75,7 @@ sub human { my $s = shift; - if ( $s =~ m/(\d+)\s*($multiplier_regex)/i) { + if ( $s =~ m/^\s*(\d+)\s*($multiplier_regex)\s*$/i) { my ( $v, $m ) = ( $1, lc($2) ); my $factor = $multiplier->{$m}; confess "can't find multiplier $m" unless defined $factor; @@ -86,6 +86,35 @@ return; } +sub strip_prefix { + my @data = @_; + my $prefix = shift @data; + + my $p; + + foreach my $d ( @data ) { + my $chomp = length($prefix); + # find end of common string + $chomp-- while( + lc(substr( $prefix, 0, $chomp )) ne lc(substr( $d, 0, $chomp )) + && + $chomp > 0 + ); + if ( $chomp == 0 ) { + warn "no common prefix in ",dump( @_ ); + return @_; + } + + my $prefix = substr( $prefix, 0, $chomp ); + $p->{$prefix}++; + } + warn "prefixes found = ",dump($p); + my @sorted = sort { $p->{$b} <=> $p->{$a} } keys %$p; + my $strip = shift @sorted || return @_; + warn "longest preffix: '$strip'\n"; + return map { my $v = $_; $v =~ s/^\Q$strip\E//i; $v; } @_; +} + while (<$csv_fh>) { $csv_parser->parse($_); my @fields = $csv_parser->fields; @@ -126,7 +155,7 @@ $h->{ $l . '_' . $j } = $v; } - $split_stats->{$v}->{$j}++; + $split_stats->{$v}->{pos}->{$j}++; $split_stats->{$v}->{sum}++; push @{ $split_stats->{$v}->{rec}->{$#dump + 1} }, $j; } @@ -170,7 +199,12 @@ } } -#warn "split_stats = ", dump( $split_stats ), "\n"; +warn "split_stats = ", dump( $split_stats ), "\n"; + +my @all = map { $_->{label_0} || die "no label_0 for ",dump($_) } @dump; +warn "all = ",dump(@all); +my @stripped = strip_prefix( @all ); +$dump[$_]->{label_0} = $stripped[$_] foreach ( 0 .. $#stripped ); warn "dump = ", dump( @dump ), "\n";