143 |
} |
} |
144 |
} |
} |
145 |
|
|
146 |
|
sub number { |
147 |
|
my $v = shift; |
148 |
|
if ( $v =~ m/^([\d.]+),(\d+)$/) { |
149 |
|
my ( $i, $f ) = ( $1, $2 ); |
150 |
|
$i =~ s/\.//g; |
151 |
|
my $new = ( $i . '.' . $f ) + 0; |
152 |
|
warn "## number $v -> $new\n"; |
153 |
|
return $new; |
154 |
|
} |
155 |
|
return $v; |
156 |
|
} |
157 |
|
|
158 |
|
sub numeric_range { |
159 |
|
my ( $name, $groups ) = ( shift, shift ); |
160 |
|
|
161 |
|
my $min; |
162 |
|
my $max; |
163 |
|
|
164 |
|
my @numbers; |
165 |
|
|
166 |
|
foreach my $i ( 0 .. $#dump ) { |
167 |
|
my $v = number( $dump[$i]->{$name} ); |
168 |
|
die "element $i doesn't have $name in ",dump( $dump[$i] ) unless defined $v; |
169 |
|
|
170 |
|
$min ||= $v; |
171 |
|
$max ||= $v; |
172 |
|
|
173 |
|
$min = $v if $v < $min; |
174 |
|
$max = $v if $v > $max; |
175 |
|
|
176 |
|
push @numbers, $v; |
177 |
|
} |
178 |
|
|
179 |
|
sub round { |
180 |
|
my $v = number( shift ); |
181 |
|
my $step = shift || 10; |
182 |
|
my $f = 1; |
183 |
|
while ( $v > $step ) { |
184 |
|
$f *= $step; |
185 |
|
$v /= $step; |
186 |
|
} |
187 |
|
my $new = int($v) * $f; |
188 |
|
warn "## round step: $step v: $v f: $f => $new\n"; |
189 |
|
return $new; |
190 |
|
} |
191 |
|
|
192 |
|
my $range = $max - $min; |
193 |
|
my $step = $range / $groups; |
194 |
|
|
195 |
|
warn "## numeric_range $min - $max / $step step into $groups groups\n"; |
196 |
|
|
197 |
|
$step = round($step); |
198 |
|
$min = round($min); |
199 |
|
$max = round($max, $step) + $step; |
200 |
|
|
201 |
|
my @ranges; |
202 |
|
my $v = $min; |
203 |
|
while ( $v <= $max ) { |
204 |
|
push @ranges, $v; |
205 |
|
$v += $step; |
206 |
|
} |
207 |
|
|
208 |
|
warn "## round $min - $max / $step produced ranges: ",dump( @ranges ),"\n"; |
209 |
|
|
210 |
|
my $usage; |
211 |
|
|
212 |
|
foreach my $i ( 0 .. $#numbers ) { |
213 |
|
my $n = $numbers[$i]; |
214 |
|
|
215 |
|
my $start = 0; |
216 |
|
foreach my $r ( @ranges ) { |
217 |
|
if ( $n < $r ) { |
218 |
|
$dump[$i]->{ $name . '_range' } = $start . '-' . $r; |
219 |
|
last; |
220 |
|
} |
221 |
|
$start = $r; |
222 |
|
} |
223 |
|
} |
224 |
|
} |
225 |
|
|
226 |
|
|
227 |
while (<$csv_fh>) { |
while (<$csv_fh>) { |
228 |
$csv_parser->parse($_); |
$csv_parser->parse($_); |
229 |
my @fields = $csv_parser->fields; |
my @fields = $csv_parser->fields; |
309 |
# |
# |
310 |
# split prefix from label_0 |
# split prefix from label_0 |
311 |
# |
# |
312 |
my @all = map { $_->{label_0} || die "no label_0 for ",dump($_) } @dump; |
my @stripped = strip_prefix( map { $_->{label_0} } @dump ); |
|
warn "all = ",dump(@all); |
|
|
my @stripped = strip_prefix( @all ); |
|
313 |
$dump[$_]->{label_0} = $stripped[$_] foreach ( 0 .. $#stripped ); |
$dump[$_]->{label_0} = $stripped[$_] foreach ( 0 .. $#stripped ); |
314 |
|
|
315 |
# group products by manufacturers |
# group products by manufacturers |
316 |
group_by( qw/manufacturer player_name/, sub { $_[0] =~ m/^(\S+)\s+(.+)/; ($1,$2) }, @stripped ); |
group_by( qw/manufacturer player_name/, sub { $_[0] =~ m/^(\S+)\s+(.+)/; ($1,$2) }, @stripped ); |
317 |
|
|
318 |
|
# create price ranges |
319 |
|
numeric_range( 'gotovina', 5 ); |
320 |
|
|
321 |
warn "dump = ", dump( @dump ), "\n"; |
warn "dump = ", dump( @dump ), "\n"; |
322 |
|
|
323 |
print "features: .", join(', .', keys %$split_stats), "\n"; |
print "features: .", join(', .', keys %$split_stats), "\n"; |