Revision 479 (by dpavlin, 2010/01/23 17:49:07) display progress while importing
#!/usr/bin/perl

# http://wiki.apache.org/couchdb/HTTP_Bulk_Document_API

use warnings;
use strict;

use IO::Socket::INET;
use Storable qw();
use JSON;
use Data::Dump qw(dump);
use Time::HiRes qw(time);
use File::Path qw(make_path remove_tree);
use MongoDB;

my $name = shift @ARGV || 'pxelator';

my $conn = MongoDB::Connection->new;
my $db = $conn->get_database( $name );
my $audit = $db->get_collection("audit");

sub couchdb_socket {
	IO::Socket::INET->new(
		PeerAddr => '10.60.0.91',
		PeerPort => 5984,
		Proto => 'tcp',
	) || die $!;
}

sub get_chunk {
	my $sock = shift;
	my $chunk;
	while(<$sock>) {
		$chunk .= $_;
		last if /^[\n\r]+$/;
	}
#	warn "# $sock\n$chunk\n";
	return $chunk;
}

my $sock = couchdb_socket;

print $sock "GET /$name/_all_docs?include_docs=true HTTP/1.0\r\n\r\n";

get_chunk($sock);

my $total = <$sock>;
$total =~ s{^.*total_rows\D+(\d+).+$}{$1};

$|=1;
print "# $name total: $total\n";

my $start_t = time();
my $count = 0;


while(<$sock>) {
	if ( /"id":"([^"]+)"/ ) {

		s/,[\r\n]+$//; # cleanup JSON
		my $json = from_json( $_ );
		$audit->insert( $json->{doc} );
		printf " %d/%d %.2f%% %.2f/s\r", $count, $total
			, ( $count / $total ) * 100
			, ( $count / ( time() - $start_t ) )
			if ++$count % 1000 == 0;
	} else {
		warn "UNKNOWN: $_";
	}
}