7 |
use Text::CSV::Separator qw(get_separator); |
use Text::CSV::Separator qw(get_separator); |
8 |
use Carp qw/confess/; |
use Carp qw/confess/; |
9 |
use LWP::Simple; |
use LWP::Simple; |
10 |
use Imager; |
use Number::Bytes::Human qw/format_bytes/; |
11 |
|
|
12 |
use JSON::Syck; |
use JSON::Syck; |
13 |
use Data::Dump qw/dump/; |
use Data::Dump qw/dump/; |
52 |
|
|
53 |
my $multiplier = { |
my $multiplier = { |
54 |
kb => 1024, |
kb => 1024, |
55 |
mb => 1024 * 1204, |
mb => 1024 * 1024, |
56 |
gb => 1024 * 1024 * 1024, |
gb => 1024 * 1024 * 1024, |
57 |
}; |
}; |
58 |
|
|
72 |
return shift @out; |
return shift @out; |
73 |
} |
} |
74 |
|
|
75 |
|
sub human { |
76 |
|
my $s = shift; |
77 |
|
|
78 |
|
if ( $s =~ m/^\s*(\d+)\s*($multiplier_regex)\s*$/i) { |
79 |
|
my ( $v, $m ) = ( $1, lc($2) ); |
80 |
|
my $factor = $multiplier->{$m}; |
81 |
|
confess "can't find multiplier $m" unless defined $factor; |
82 |
|
my $new = format_bytes( $v * $factor, bs => 1024 ); |
83 |
|
warn "## [$s] $v * $factor ($m) -> $new\n"; |
84 |
|
return $new; |
85 |
|
} |
86 |
|
return; |
87 |
|
} |
88 |
|
|
89 |
|
sub strip_prefix { |
90 |
|
my @data = @_; |
91 |
|
my $prefix = shift @data; |
92 |
|
|
93 |
|
my $p; |
94 |
|
|
95 |
|
foreach my $d ( @data ) { |
96 |
|
my $chomp = length($prefix); |
97 |
|
# find end of common string |
98 |
|
$chomp-- while( |
99 |
|
lc(substr( $prefix, 0, $chomp )) ne lc(substr( $d, 0, $chomp )) |
100 |
|
&& |
101 |
|
$chomp > 0 |
102 |
|
); |
103 |
|
if ( $chomp == 0 ) { |
104 |
|
warn "no common prefix in ",dump( @_ ); |
105 |
|
return @_; |
106 |
|
} |
107 |
|
|
108 |
|
my $prefix = substr( $prefix, 0, $chomp ); |
109 |
|
$p->{$prefix}++; |
110 |
|
} |
111 |
|
warn "prefixes found = ",dump($p); |
112 |
|
my @sorted = sort { $p->{$b} <=> $p->{$a} } keys %$p; |
113 |
|
my $strip = shift @sorted || return @_; |
114 |
|
warn "longest prefix: '$strip' (stripped)\n"; |
115 |
|
return map { my $v = $_; $v =~ s/^\Q$strip\E//i; $v; } @_; |
116 |
|
} |
117 |
|
|
118 |
|
sub group_by { |
119 |
|
my ( $group, $detail ) = ( shift, shift ); |
120 |
|
my $what = shift; |
121 |
|
confess "expected CODE as first argument!" unless ref($what) eq 'CODE'; |
122 |
|
my @data = @_; |
123 |
|
|
124 |
|
my $stat; |
125 |
|
my @details; |
126 |
|
|
127 |
|
foreach my $i ( 0 .. $#data ) { |
128 |
|
my $v = $data[$i]; |
129 |
|
my ( $by, $rest ) = $what->($v); |
130 |
|
# warn "## group_by: $i $v -> $by\n"; |
131 |
|
push @{ $stat->{$by} }, $i; |
132 |
|
$details[$i] = $rest; |
133 |
|
} |
134 |
|
|
135 |
|
warn "group_by '$group' stats = ",dump( $stat ); |
136 |
|
|
137 |
|
foreach my $g ( keys %$stat ) { |
138 |
|
foreach my $r ( @{ $stat->{$g} } ) { |
139 |
|
warn "## $group $g $r\n"; |
140 |
|
$dump[$r]->{$group} = $g; |
141 |
|
$dump[$r]->{$detail} = $details[$r]; |
142 |
|
} |
143 |
|
} |
144 |
|
} |
145 |
|
|
146 |
while (<$csv_fh>) { |
while (<$csv_fh>) { |
147 |
$csv_parser->parse($_); |
$csv_parser->parse($_); |
148 |
my @fields = $csv_parser->fields; |
my @fields = $csv_parser->fields; |
173 |
|
|
174 |
my $v = clean( $sv[$j] ); |
my $v = clean( $sv[$j] ); |
175 |
|
|
176 |
if ( $j == 0 ) { |
if ( my $human = human( $v ) ) { |
177 |
$h->{ $l . '_short' } = $v; |
$h->{ $l . '_' . $j . '_human' } = $human; |
178 |
|
} else { |
179 |
|
$h->{ $l . '_' . $j } = $v; |
180 |
} |
} |
181 |
|
|
182 |
if ( $v =~ m/(\d+)\s*($multiplier_regex)/) { |
$split_stats->{$v}->{pos}->{$j}++; |
|
my $new = $1 * $multiplier_regex->{$2}; |
|
|
warn "## $v -> $new\n"; |
|
|
$v = $new; |
|
|
} |
|
|
|
|
|
$split_stats->{$v}->{$j}++; |
|
183 |
$split_stats->{$v}->{sum}++; |
$split_stats->{$v}->{sum}++; |
184 |
push @{ $split_stats->{$v}->{rec}->{$#dump + 1} }, $j; |
push @{ $split_stats->{$v}->{rec}->{$#dump + 1} }, $j; |
185 |
} |
} |
201 |
if ( mirror( $url, $img_orig_path ) != RC_NOT_MODIFIED ) { |
if ( mirror( $url, $img_orig_path ) != RC_NOT_MODIFIED ) { |
202 |
warn "$url -> $img_orig_path\n"; |
warn "$url -> $img_orig_path\n"; |
203 |
} |
} |
204 |
system('convert', '-geometry', '320x200', $img_orig_path, $img_thumb_path ) if -e $img_thumb_path; |
system('convert', '-geometry', '320x200', $img_orig_path, $img_thumb_path ) if ! -e $img_thumb_path; |
205 |
|
|
206 |
$h->{'image-url'} = $img_orig_path; |
$h->{'image-url'} = $img_orig_path; |
207 |
$h->{'image-thumb-url'} = $img_thumb_path; |
$h->{'image-thumb-url'} = $img_thumb_path; |
223 |
} |
} |
224 |
} |
} |
225 |
|
|
226 |
#warn "split_stats = ", dump( $split_stats ), "\n"; |
warn "split_stats = ", dump( $split_stats ), "\n"; |
227 |
|
|
228 |
|
# |
229 |
|
# split prefix from label_0 |
230 |
|
# |
231 |
|
my @all = map { $_->{label_0} || die "no label_0 for ",dump($_) } @dump; |
232 |
|
warn "all = ",dump(@all); |
233 |
|
my @stripped = strip_prefix( @all ); |
234 |
|
$dump[$_]->{label_0} = $stripped[$_] foreach ( 0 .. $#stripped ); |
235 |
|
|
236 |
|
# group products by manufacturers |
237 |
|
group_by( qw/manufacturer player_name/, sub { $_[0] =~ m/^(\S+)\s+(.+)/; ($1,$2) }, @stripped ); |
238 |
|
|
239 |
warn "dump = ", dump( @dump ), "\n"; |
warn "dump = ", dump( @dump ), "\n"; |
240 |
|
|