1 |
dpavlin |
92 |
package Sack::From; |
2 |
|
|
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
use Data::Dump qw(dump); |
7 |
|
|
|
8 |
|
|
use lib "/srv/webpac2/lib/"; |
9 |
|
|
use WebPAC::Input::ISI; |
10 |
|
|
|
11 |
|
|
$WebPAC::Input::ISI::subfields = undef; # disable parsing of subfields |
12 |
|
|
|
13 |
dpavlin |
131 |
use Storable; |
14 |
|
|
|
15 |
dpavlin |
95 |
sub new { |
16 |
|
|
my $class = shift; |
17 |
|
|
my $self = bless {@_}, $class; |
18 |
dpavlin |
92 |
|
19 |
dpavlin |
95 |
warn "from ",dump( @_ ); |
20 |
dpavlin |
92 |
|
21 |
dpavlin |
95 |
$self->{input} = WebPAC::Input::ISI->new( @_ ); |
22 |
dpavlin |
131 |
warn "got ", $self->size, " records for ", dump @_; |
23 |
dpavlin |
92 |
|
24 |
dpavlin |
95 |
$self->{pos} = 1; |
25 |
|
|
$self->{shard} = 0; |
26 |
|
|
|
27 |
|
|
return $self; |
28 |
|
|
} |
29 |
|
|
|
30 |
|
|
sub input { $_[0]->{input} } |
31 |
dpavlin |
131 |
sub size { $_[0]->{input}->size } |
32 |
dpavlin |
95 |
|
33 |
|
|
sub shard { |
34 |
|
|
my ($self,$limit) = @_; |
35 |
|
|
|
36 |
dpavlin |
131 |
my $path = "/tmp/sack.shard.$self->{shard}.$limit"; |
37 |
|
|
|
38 |
|
|
if ( -e $path ) { |
39 |
|
|
warn "retrive $path ", -s $path, " bytes"; |
40 |
|
|
return retrieve $path; |
41 |
|
|
} |
42 |
|
|
|
43 |
dpavlin |
95 |
my $start_pos = $self->{pos}; |
44 |
|
|
$self->{shard}++; |
45 |
dpavlin |
92 |
my $data; |
46 |
|
|
|
47 |
dpavlin |
95 |
foreach my $pos ( 1 .. $limit ) { |
48 |
|
|
push @$data, $self->input->fetch_rec( $self->{pos}++ ); |
49 |
dpavlin |
92 |
} |
50 |
|
|
|
51 |
dpavlin |
131 |
my $end_pos = $self->{pos} - 1; |
52 |
|
|
my $r_len = length $self->size; |
53 |
dpavlin |
95 |
|
54 |
dpavlin |
131 |
my $range = sprintf "%${r_len}d-%${r_len}d", $start_pos, $end_pos; |
55 |
|
|
|
56 |
|
|
warn "shard $self->{shard} range: $range / $self->size\n"; |
57 |
|
|
|
58 |
|
|
store $data, $path; |
59 |
|
|
warn "store $path ", -s $path, " bytes"; |
60 |
|
|
|
61 |
dpavlin |
92 |
return $data; |
62 |
|
|
} |
63 |
|
|
|
64 |
|
|
1; |