--- links/csv2js.pl 2007/08/23 13:27:56 7 +++ links/csv2js.pl 2007/08/23 14:08:06 8 @@ -111,10 +111,38 @@ warn "prefixes found = ",dump($p); my @sorted = sort { $p->{$b} <=> $p->{$a} } keys %$p; my $strip = shift @sorted || return @_; - warn "longest preffix: '$strip'\n"; + warn "longest prefix: '$strip' (stripped)\n"; return map { my $v = $_; $v =~ s/^\Q$strip\E//i; $v; } @_; } +sub group_by { + my ( $group, $detail ) = ( shift, shift ); + my $what = shift; + confess "expected CODE as first argument!" unless ref($what) eq 'CODE'; + my @data = @_; + + my $stat; + my @details; + + foreach my $i ( 0 .. $#data ) { + my $v = $data[$i]; + my ( $by, $rest ) = $what->($v); +# warn "## group_by: $i $v -> $by\n"; + push @{ $stat->{$by} }, $i; + $details[$i] = $rest; + } + + warn "group_by '$group' stats = ",dump( $stat ); + + foreach my $g ( keys %$stat ) { + foreach my $r ( @{ $stat->{$g} } ) { + warn "## $group $g $r\n"; + $dump[$r]->{$group} = $g; + $dump[$r]->{$detail} = $details[$r]; + } + } +} + while (<$csv_fh>) { $csv_parser->parse($_); my @fields = $csv_parser->fields; @@ -145,10 +173,6 @@ my $v = clean( $sv[$j] ); - if ( $j == 0 ) { - $h->{ $l . '_short' } = $v; - } - if ( my $human = human( $v ) ) { $h->{ $l . '_' . $j . '_human' } = $human; } else { @@ -201,11 +225,17 @@ warn "split_stats = ", dump( $split_stats ), "\n"; +# +# split prefix from label_0 +# my @all = map { $_->{label_0} || die "no label_0 for ",dump($_) } @dump; warn "all = ",dump(@all); my @stripped = strip_prefix( @all ); $dump[$_]->{label_0} = $stripped[$_] foreach ( 0 .. $#stripped ); +# group products by manufacturers +group_by( qw/manufacturer player_name/, sub { $_[0] =~ m/^(\S+)\s+(.+)/; ($1,$2) }, @stripped ); + warn "dump = ", dump( @dump ), "\n"; print "features: .", join(', .', keys %$split_stats), "\n";