1 |
package Sack::Shards; |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
|
6 |
use Storable; |
7 |
use Data::Dump qw(dump); |
8 |
|
9 |
use lib '/srv/Sack/lib'; |
10 |
use Sack; |
11 |
|
12 |
use lib "/srv/webpac2/lib/"; |
13 |
use WebPAC::Input::ISI; |
14 |
|
15 |
$WebPAC::Input::ISI::subfields = undef; # disable parsing of subfields |
16 |
|
17 |
sub new { |
18 |
my $class = shift; |
19 |
|
20 |
my $args = {@_}; |
21 |
duration 'sharding',dump $args; |
22 |
|
23 |
my $self = bless $args, $class; |
24 |
my $limit = delete $args->{limit} || die "no shard size limit?"; |
25 |
|
26 |
$self->{input} = WebPAC::Input::ISI->new( %$args ); |
27 |
|
28 |
duration "got ", $self->size, " records"; |
29 |
|
30 |
$self->{pos} = 1; |
31 |
my $offset = 0; |
32 |
|
33 |
while ( $offset <= $self->size ) { |
34 |
$self->shard( $offset, $limit ); |
35 |
$offset += $limit; |
36 |
} |
37 |
|
38 |
duration 'sharding finished'; |
39 |
|
40 |
return $self; |
41 |
} |
42 |
|
43 |
sub input { $_[0]->{input} } |
44 |
sub size { $_[0]->{input}->size } |
45 |
|
46 |
sub shard { |
47 |
my ($self,$offset,$limit) = @_; |
48 |
|
49 |
my $name = 'shard'; |
50 |
my $r_len = length $self->size; |
51 |
my $range = sprintf "%0${r_len}d-%0${r_len}d", $offset, $offset + $limit - 1; |
52 |
|
53 |
foreach ( '/tmp/sack', "/tmp/sack/$name" ) { |
54 |
mkdir $_ unless -e $_; |
55 |
} |
56 |
|
57 |
my $path = "/tmp/sack/$name/$range.$limit"; |
58 |
|
59 |
if ( -e $path ) { |
60 |
warn "retrive $path ", -s $path, " bytes\n"; |
61 |
return retrieve $path; |
62 |
} |
63 |
|
64 |
my $pos = $offset; |
65 |
my $data; |
66 |
|
67 |
foreach ( 1 .. $limit ) { |
68 |
push @$data, $self->input->fetch_rec( $pos++ ); |
69 |
} |
70 |
|
71 |
warn "shard $range / ", $#{ $data }, "\n"; |
72 |
|
73 |
store $data, $path; |
74 |
warn "store $path ", -s $path, " bytes\n"; |
75 |
|
76 |
warn sprintf("%2.1f%% done\n", $offset * 100 / $self->size ); |
77 |
|
78 |
return $data; |
79 |
} |
80 |
|
81 |
1; |