3 |
use warnings; |
use warnings; |
4 |
use strict; |
use strict; |
5 |
|
|
6 |
our $VERSION = '0.01'; |
our $VERSION = '0.04'; |
7 |
|
|
8 |
use Time::HiRes qw(time); |
use Time::HiRes qw(time); |
9 |
use Data::Dump qw(dump); |
use Data::Dump qw(dump); |
10 |
use File::Slurp; |
use File::Slurp; |
11 |
use Getopt::Long; |
use Getopt::Long; |
12 |
use IO::Socket::INET; |
use IO::Socket::INET; |
13 |
use Storable qw/freeze thaw/; |
use Storable qw/freeze thaw store/; |
14 |
|
|
15 |
|
|
16 |
my $debug = 0; |
my $debug = 0; |
18 |
my $limit = 5000; |
my $limit = 5000; |
19 |
my $offset = 0; |
my $offset = 0; |
20 |
my @views; |
my @views; |
21 |
my $listen = 0; # off |
my $port = 0; # interactive |
22 |
my @nodes; |
my @nodes; |
23 |
|
|
24 |
|
|
27 |
'offset=i' => \$offset, |
'offset=i' => \$offset, |
28 |
'limit=i' => \$limit, |
'limit=i' => \$limit, |
29 |
'view=s' => \@views, |
'view=s' => \@views, |
30 |
'listen|port=i' => \$listen, |
'listen|port=i' => \$port, |
31 |
'connect=s' => \@nodes, |
'connect=s' => \@nodes, |
32 |
'debug!' => \$debug, |
'debug!' => \$debug, |
33 |
) or die $!; |
) or die $!; |
40 |
our $prefix; |
our $prefix; |
41 |
sub BEGIN { |
sub BEGIN { |
42 |
$prefix = $0; |
$prefix = $0; |
43 |
if ( $prefix =~ s{^./}{} ) { |
if ( $prefix !~ m{^/} ) { |
44 |
chomp( my $pwd = `pwd` ); |
chomp( my $pwd = `pwd` ); |
45 |
$prefix = "$pwd/$prefix"; |
$prefix = "$pwd/$prefix"; |
46 |
} |
} |
47 |
$prefix =~ s{^(.*)/srv/Sack/bin.+$}{$1}; |
$prefix =~ s{^(.*)/srv/Sack/.+$}{$1}; |
48 |
warn "# prefix $prefix"; |
warn "# prefix $prefix"; |
49 |
|
|
50 |
$SIG{INT} = sub { |
$SIG{INT} = sub { |
51 |
my $signame = shift; |
my $signame = shift; |
52 |
send_nodes 'exit'; |
send_nodes 'exit'; |
53 |
|
#clean if $clean; # FIXME |
54 |
die "SIG$signame"; |
die "SIG$signame"; |
55 |
}; |
}; |
56 |
} |
} |
57 |
|
|
58 |
|
use lib "$prefix/srv/Sack/lib/"; |
59 |
|
use Sack::Digest; |
60 |
|
our $digest = Sack::Digest->new( port => $port, clean => 1 ); |
61 |
|
sub digest { $digest->to_int($_[0]) } |
62 |
|
|
63 |
use lib "$prefix/srv/webpac2/lib/"; |
use lib "$prefix/srv/webpac2/lib/"; |
64 |
use WebPAC::Input::ISI; |
use WebPAC::Input::ISI; |
74 |
our $num_records = $input->size; |
our $num_records = $input->size; |
75 |
|
|
76 |
sub report { |
sub report { |
77 |
my $description = shift; |
my $description = join(' ',@_); |
78 |
my $dt = time - $t; |
my $dt = time - $t; |
79 |
printf "%s in %1.4fs %.2f/s\n", $description, $dt, $input->size / $dt; |
printf "%s in %1.4fs %.2f/s\n", $description, $dt, $input->size / $dt; |
80 |
$t = time; |
$t = time; |
99 |
foreach my $node ( @nodes ) { |
foreach my $node ( @nodes ) { |
100 |
|
|
101 |
my $sock = IO::Socket::INET->new( |
my $sock = IO::Socket::INET->new( |
102 |
PeerAddr => $node, |
PeerAddr => '127.0.0.1', |
103 |
|
PeerPort => $node, |
104 |
Proto => 'tcp', |
Proto => 'tcp', |
105 |
); |
); |
106 |
|
|
109 |
next; |
next; |
110 |
} |
} |
111 |
|
|
112 |
print ">>>> $listen $node $header\n"; |
warn "[$port] >>>> $node $header\n"; |
113 |
print $sock "$header\n$content" || warn "can't send $header to $node: $!"; |
print $sock "$header\n$content" || warn "can't send $header to $node: $!"; |
114 |
|
|
115 |
$connected->{$node} = $sock; |
$connected->{$node} = $sock; |
121 |
|
|
122 |
my $sock = $connected->{$node}; |
my $sock = $connected->{$node}; |
123 |
if ( ! $sock ) { |
if ( ! $sock ) { |
124 |
warn "ERROR: lost connection to $node"; |
warn "[$port] ERROR lost connection to $node"; |
125 |
delete $connected->{$node}; |
delete $connected->{$node}; |
126 |
return; |
return; |
127 |
} |
} |
128 |
chomp( my $size = <$sock> ); |
chomp( my $size = <$sock> ); |
129 |
warn "<<<< $listen $node $size bytes\n"; |
warn "[$port] <<<< $node $size bytes\n" if $debug || $size > 1024; |
130 |
my $data; |
my $data; |
131 |
read $sock, $data, $size; |
read $sock, $data, $size; |
132 |
return $data; |
return $data; |
135 |
sub send_sock { |
sub send_sock { |
136 |
my ( $sock, $data ) = @_; |
my ( $sock, $data ) = @_; |
137 |
my $size = length $data; |
my $size = length $data; |
138 |
warn ">>>> $listen ", $sock->peerhost, " $size bytes\n"; |
warn "[$port] >>>> $size bytes\n" if $debug || $size > 1024; |
139 |
print $sock "$size\n$data" || warn "can't send $size bytes to ", $sock->peerhost; |
print $sock "$size\n$data" || warn "can't send $size bytes to ", $sock->peerhost; |
140 |
} |
} |
141 |
|
|
142 |
sub merge_out { |
sub merge_out { |
143 |
my $new = shift; |
my ( $from_node, $new ) = @_; |
144 |
|
|
145 |
|
warn "### merge $from_node"; |
146 |
|
|
147 |
|
my $from_port = $from_node; |
148 |
|
$from_port =~ s{.+:(\d+)$}{$1}; |
149 |
|
|
150 |
|
my $remote_digest = Sack::Digest->new( port => $from_port ); |
151 |
|
my ( $local, $remote ) = ( 0, 0 ); |
152 |
|
|
153 |
foreach my $k1 ( keys %$new ) { |
foreach my $k1 ( keys %$new ) { |
154 |
|
|
155 |
foreach my $k2 ( keys %{ $new->{$k1} } ) { |
foreach my $k2 ( keys %{ $new->{$k1} } ) { |
156 |
|
|
157 |
my $n = $new->{$k1}->{$k2}; |
my $n = delete $new->{$k1}->{$k2}; |
158 |
my $ref = ref $out->{$k1}->{$k2}; |
|
159 |
|
if ( $k1 =~ m{#} ) { |
160 |
|
die "ASSERT $k1 $k2" unless $k2 =~ m{^\d+$}; |
161 |
|
#warn "XXX $k1 $k2"; |
162 |
|
my $md5 = $remote_digest->{nr_md5}->[$k2] || warn "[$port] no2md5 $n not found in $from_port\n"; |
163 |
|
if ( my $local_k2 = $digest->{md5_nr}->{$md5} ) { |
164 |
|
$k2 = $local_k2; |
165 |
|
$local++; |
166 |
|
} else { |
167 |
|
$k2 = $digest->to_int( $remote_digest->{md5}->{$md5} ); |
168 |
|
$remote++; |
169 |
|
} |
170 |
|
} |
171 |
|
|
172 |
|
my $ref = ref $out->{$k1}->{$k2}; |
173 |
|
#warn "XXXX $k1 $k2 $ref"; |
174 |
if ( ! defined $out->{$k1}->{$k2} ) { |
if ( ! defined $out->{$k1}->{$k2} ) { |
175 |
$out->{$k1}->{$k2} = $n; |
$out->{$k1}->{$k2} = $n; |
176 |
} elsif ( $k1 =~ m{\+} ) { |
} elsif ( $k1 =~ m{\+} ) { |
190 |
} |
} |
191 |
} |
} |
192 |
|
|
193 |
|
warn "[$port] merge local $local remote $remote from $from_port\n"; |
194 |
warn "## merge out ", dump $out if $debug; |
warn "## merge out ", dump $out if $debug; |
195 |
} |
} |
196 |
|
|
209 |
foreach my $pos ( $offset + 1 .. $offset + $input->size ) { |
foreach my $pos ( $offset + 1 .. $offset + $input->size ) { |
210 |
my $rec = $cache->{$pos} ||= $input->fetch_rec( $pos ); |
my $rec = $cache->{$pos} ||= $input->fetch_rec( $pos ); |
211 |
if ( ! $rec ) { |
if ( ! $rec ) { |
212 |
warn "END at $pos"; |
print STDERR "END @ $pos"; |
213 |
last; |
last; |
214 |
} |
} |
215 |
|
|
216 |
eval "$code"; |
eval "$code"; |
217 |
if ( $@ ) { |
if ( $@ ) { |
218 |
warn "ABORT [$pos] $@\n"; |
warn "ABORT $pos $@\n"; |
219 |
last; |
last; |
220 |
} else { |
} else { |
221 |
$affected++; |
$affected++; |
222 |
} |
} |
223 |
|
|
224 |
|
$pos % 10000 == 0 ? print STDERR $pos : |
225 |
|
$pos % 1000 == 0 ? print STDERR "." : 0 ; |
226 |
}; |
}; |
227 |
|
|
228 |
report "$affected affected records $view"; |
report "\n[$port] RECS $affected $view"; |
229 |
|
|
230 |
warn "WARN no \$out defined!" unless defined $out; |
warn "WARN no \$out defined!" unless defined $out; |
231 |
|
|
232 |
|
$digest->sync; |
233 |
|
|
234 |
if ( $connected ) { |
if ( $connected ) { |
235 |
foreach my $node ( keys %$connected ) { |
foreach my $node ( keys %$connected ) { |
236 |
warn "# $listen get_node $node\n"; |
warn "[$port] get_node $node\n"; |
237 |
my $o = get_node $node; |
my $o = get_node $node; |
238 |
my $s = length $o; |
my $s = length $o; |
239 |
$o = thaw $o; |
$o = thaw $o; |
240 |
warn "# $listen merge $s bytes\n"; |
warn "[$port] merge $node $s bytes\n"; |
241 |
merge_out $o; |
merge_out $node => $o; |
242 |
} |
} |
243 |
} |
} |
244 |
} |
} |
256 |
run_code $view => $code; |
run_code $view => $code; |
257 |
|
|
258 |
if ( defined $out ) { |
if ( defined $out ) { |
|
my $dump = dump $out; |
|
|
my $len = length $dump; |
|
259 |
|
|
260 |
my $path = $view; |
my $path = $view; |
261 |
$path =~ s{views?/}{out/} || die "no view in $view"; |
$path =~ s{views?/}{out/} || die "no view in $view"; |
262 |
$path =~ s{\.pl}{}; |
$path =~ s{\.pl}{.storable}; |
|
|
|
|
print "OUT $view $offset/$limit $len bytes $path" |
|
|
, ( $len < 10000 ? " \$out = $dump" : ' SAVED ONLY' ) |
|
|
, "\n" |
|
|
; |
|
263 |
|
|
264 |
unlink "$path.last" if -e "$path.last"; |
unlink "$path.last" if -e "$path.last"; |
265 |
rename $path, "$path.last"; |
rename $path, "$path.last"; |
266 |
write_file $path, $dump; |
|
267 |
report "SAVE $path"; |
store $out => $path; |
268 |
|
report "[$port] SAVE $path $offset-$limit", -s $path, "bytes"; |
269 |
|
|
270 |
|
if ( -s $path < 4096 ) { |
271 |
|
print '$out = ', dump $digest->undigest_out($out); |
272 |
|
} |
273 |
} |
} |
274 |
|
|
275 |
} |
} |
276 |
|
|
277 |
} |
} |
278 |
|
|
279 |
if ( $listen ) { |
|
280 |
|
sub info_tabs { |
281 |
|
"$port\t$offset\t$limit\t$num_records\t$path\t" |
282 |
|
. join("\t", map { |
283 |
|
my $b = $_; |
284 |
|
$b =~ s{^.+\.$port\.([^/]+)$}{$1}; |
285 |
|
"$b " . -s $_ |
286 |
|
} glob "/dev/shm/sack.$port.*" ); |
287 |
|
} |
288 |
|
|
289 |
|
|
290 |
|
if ( $port ) { |
291 |
my $sock = IO::Socket::INET->new( |
my $sock = IO::Socket::INET->new( |
292 |
Listen => SOMAXCONN, |
Listen => SOMAXCONN, |
293 |
LocalAddr => '127.0.0.1', |
LocalAddr => '127.0.0.1', |
294 |
LocalPort => $listen, |
LocalPort => $port, |
295 |
Proto => 'tcp', |
Proto => 'tcp', |
296 |
Reuse => 1, |
Reuse => 1, |
297 |
) or die $!; |
) or die $!; |
298 |
|
|
299 |
while (1) { |
while (1) { |
300 |
|
|
301 |
warn "NODE $listen ready - path: $path offset: $offset limit: $limit #recs: $num_records\n"; |
warn "[$port] READY path: $path offset: $offset limit: $limit #recs: $num_records\n"; |
302 |
|
|
303 |
my $client = $sock->accept(); |
my $client = $sock->accept(); |
304 |
|
|
305 |
warn "<<<< $listen connect from ", $client->peerhost, $/; |
warn "[$port] <<<< connect from ", $client->peerhost, $/; |
306 |
|
|
307 |
my @header = split(/\s/, <$client>); |
my @header = split(/\s/, <$client>); |
308 |
warn "<<<< $listen header ",dump(@header),$/; |
warn "[$port] <<<< header ",dump(@header),$/; |
309 |
|
|
310 |
my $size = shift @header; |
my $size = shift @header; |
311 |
|
|
316 |
run_code $header[1] => $content; |
run_code $header[1] => $content; |
317 |
send_sock $client => freeze $out; |
send_sock $client => freeze $out; |
318 |
} elsif ( $header[0] eq 'info' ) { |
} elsif ( $header[0] eq 'info' ) { |
319 |
my $info = "$listen\t$offset\t$limit\t$num_records\t$path"; |
my $info = info_tabs; |
320 |
$info .= "\t" . eval $header[1] if $header[1]; |
warn "[$port] info $info\n"; |
|
warn "info $info\n"; |
|
321 |
send_sock $client => $info; |
send_sock $client => $info; |
322 |
} elsif ( $header[0] eq 'exit' ) { |
} elsif ( $header[0] eq 'exit' ) { |
323 |
warn "exit $listen"; |
warn "[$port] exit"; |
324 |
exit; |
exit; |
325 |
} else { |
} else { |
326 |
warn "WARN $listen unknown"; |
warn "[$port] UNKNOWN $header[0]"; |
327 |
} |
} |
328 |
|
|
329 |
} |
} |
330 |
} |
} |
331 |
|
|
332 |
|
sub info { |
333 |
|
send_nodes 'info' => $2; |
334 |
|
|
335 |
|
my @info = ( |
336 |
|
"port\toffset\tlimit\t#recs\tpath", |
337 |
|
"----\t------\t-----\t-----\t----", |
338 |
|
info_tabs, |
339 |
|
); |
340 |
|
|
341 |
|
push @info, get_node $_ foreach @nodes; |
342 |
|
|
343 |
|
print "[$port] INFO\n" |
344 |
|
, join("\n", @info) |
345 |
|
, "\n\n" ; |
346 |
|
|
347 |
|
return @info; |
348 |
|
} |
349 |
|
|
350 |
|
info; |
351 |
run_views; |
run_views; |
352 |
|
|
353 |
while ( 1 ) { |
while ( 1 ) { |
366 |
|
|
367 |
__HELP__ |
__HELP__ |
368 |
} elsif ( $cmd =~ m{^(vi|\\e|o)}i ) { |
} elsif ( $cmd =~ m{^(vi|\\e|o)}i ) { |
369 |
system "vi out/*"; |
#system "vi out/*"; |
370 |
|
$digest->sync; |
371 |
|
system "bin/storableedit.pl", (glob('out/*.storable'))[0]; |
372 |
} elsif ( $cmd =~ m{^i(?:nfo)?\s?(.+)?$}i ) { |
} elsif ( $cmd =~ m{^i(?:nfo)?\s?(.+)?$}i ) { |
373 |
print "# nodes: ", join(' ',@nodes), $/; |
info; |
|
|
|
|
send_nodes 'info' => $2; |
|
|
|
|
|
my @info = ( |
|
|
"node\toffset\tlimit\t#recs\tpath", |
|
|
"----\t------\t-----\t-----\t----", |
|
|
"0\t$offset\t$limit\t$num_records\t$path", |
|
|
); |
|
|
|
|
|
push @info, get_node $_ foreach @nodes; |
|
|
|
|
|
print "$_\n" foreach @info; |
|
|
|
|
374 |
} elsif ( $cmd =~ m{^(q|e|x)}i ) { |
} elsif ( $cmd =~ m{^(q|e|x)}i ) { |
375 |
warn "# exit"; |
warn "# exit"; |
376 |
send_nodes 'exit'; |
send_nodes 'exit'; |
377 |
exit; |
exit; |
378 |
} elsif ( $cmd =~ m{^(v|r)}i ) { |
} elsif ( $cmd =~ m{^(v|r)}i ) { |
379 |
run_views; |
run_views; |
380 |
|
} elsif ( $cmd =~ m{^n(ode)?\s*(\d+)}i ) { |
381 |
|
push @nodes, $1; |
382 |
|
info; |
383 |
} elsif ( $cmd ) { |
} elsif ( $cmd ) { |
384 |
warn "UNKNOWN ", dump $cmd; |
warn "UNKNOWN ", dump $cmd; |
385 |
} |
} |