1 |
dpavlin |
198 |
package Sack::Merge; |
2 |
|
|
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
use Digest::MD5 qw(md5); |
7 |
|
|
use Time::HiRes qw(time); |
8 |
|
|
use Data::Dump qw(dump); |
9 |
|
|
|
10 |
|
|
our $out; |
11 |
|
|
sub out { $out } |
12 |
|
|
|
13 |
|
|
our $nr = 0; |
14 |
|
|
our $md5_nr; |
15 |
|
|
our $digest_fh; |
16 |
|
|
our @digest_offset; |
17 |
|
|
|
18 |
|
|
sub add { |
19 |
|
|
my ( $self, $new ) = @_; |
20 |
|
|
|
21 |
|
|
my $t_merge = time(); |
22 |
|
|
|
23 |
|
|
my $tick = 0; |
24 |
|
|
|
25 |
|
|
my $missing; |
26 |
|
|
|
27 |
|
|
foreach my $k1 ( keys %$new ) { |
28 |
|
|
|
29 |
|
|
foreach my $k2 ( keys %{ $new->{$k1} } ) { |
30 |
|
|
|
31 |
|
|
my $n = delete $new->{$k1}->{$k2}; |
32 |
|
|
|
33 |
|
|
if ( $k1 =~ m{#} ) { |
34 |
|
|
my $md5 = md5 $k2; |
35 |
|
|
if ( defined $md5_nr->{$md5} ) { |
36 |
|
|
$k2 = $md5_nr->{$md5}; |
37 |
|
|
} else { |
38 |
|
|
open( $digest_fh, '>', '/tmp/sack.digest' ) unless $digest_fh; |
39 |
|
|
$digest_offset[ $nr ] = tell( $digest_fh ); |
40 |
|
|
print $digest_fh "$k2\n"; |
41 |
|
|
|
42 |
|
|
$k2 = $md5_nr->{$md5} = $nr; |
43 |
|
|
$nr++; |
44 |
|
|
} |
45 |
|
|
} |
46 |
|
|
|
47 |
|
|
my $ref = ref $out->{$k1}->{$k2}; |
48 |
|
|
|
49 |
|
|
if ( ! defined $out->{$k1}->{$k2} ) { |
50 |
|
|
$out->{$k1}->{$k2} = $n; |
51 |
|
|
} elsif ( $k1 =~ m{\+} ) { |
52 |
|
|
# warn "## agregate $k1 $k2"; |
53 |
|
|
$out->{$k1}->{$k2} += $n; |
54 |
|
|
} elsif ( $ref eq 'ARRAY' ) { |
55 |
|
|
if ( ref $n eq 'ARRAY' ) { |
56 |
|
|
push @{ $out->{$k1}->{$k2} }, $_ foreach @$n; |
57 |
|
|
} else { |
58 |
|
|
push @{ $out->{$k1}->{$k2} }, $n; |
59 |
|
|
} |
60 |
|
|
} elsif ( $ref eq '' ) { |
61 |
|
|
$out->{$k1}->{$k2} = [ $out->{$k1}->{$k2}, $n ]; |
62 |
|
|
} else { |
63 |
|
|
die "can't merge $k2 [$ref] from ",dump($n), " into ", dump($out->{$k1}->{$k2}); |
64 |
|
|
} |
65 |
|
|
|
66 |
|
|
if ( $tick++ % 1000 == 0 ) { |
67 |
|
|
print STDERR "."; |
68 |
|
|
} elsif ( $tick % 10000 == 0 ) { |
69 |
|
|
print STDERR $tick; |
70 |
|
|
} |
71 |
|
|
} |
72 |
|
|
} |
73 |
|
|
|
74 |
|
|
$t_merge = time - $t_merge; |
75 |
|
|
warn sprintf "\nmerged %d in %.4fs\n", $tick, $t_merge; |
76 |
|
|
|
77 |
|
|
return $tick; |
78 |
|
|
} |
79 |
|
|
|
80 |
|
|
1; |