| Revision 479 (by dpavlin, 2010/01/23 17:49:07) |
display progress while importing
|
#!/usr/bin/perl
# http://wiki.apache.org/couchdb/HTTP_Bulk_Document_API
use warnings;
use strict;
use IO::Socket::INET;
use Storable qw();
use JSON;
use Data::Dump qw(dump);
use Time::HiRes qw(time);
use File::Path qw(make_path remove_tree);
use MongoDB;
my $name = shift @ARGV || 'pxelator';
my $conn = MongoDB::Connection->new;
my $db = $conn->get_database( $name );
my $audit = $db->get_collection("audit");
sub couchdb_socket {
IO::Socket::INET->new(
PeerAddr => '10.60.0.91',
PeerPort => 5984,
Proto => 'tcp',
) || die $!;
}
sub get_chunk {
my $sock = shift;
my $chunk;
while(<$sock>) {
$chunk .= $_;
last if /^[\n\r]+$/;
}
# warn "# $sock\n$chunk\n";
return $chunk;
}
my $sock = couchdb_socket;
print $sock "GET /$name/_all_docs?include_docs=true HTTP/1.0\r\n\r\n";
get_chunk($sock);
my $total = <$sock>;
$total =~ s{^.*total_rows\D+(\d+).+$}{$1};
$|=1;
print "# $name total: $total\n";
my $start_t = time();
my $count = 0;
while(<$sock>) {
if ( /"id":"([^"]+)"/ ) {
s/,[\r\n]+$//; # cleanup JSON
my $json = from_json( $_ );
$audit->insert( $json->{doc} );
printf " %d/%d %.2f%% %.2f/s\r", $count, $total
, ( $count / $total ) * 100
, ( $count / ( time() - $start_t ) )
if ++$count % 1000 == 0;
} else {
warn "UNKNOWN: $_";
}
}