--- scripts/reblog2couchdb.pl 2008/08/05 13:28:54 2 +++ scripts/reblog2couchdb.pl 2008/08/05 16:36:57 7 @@ -9,53 +9,96 @@ my $database = 'reblog'; -my $connect = "DBI:mysql:database=$database"; -#$connect = "DBI:mysql:database=$database;host=localhost;port=13306"; +my $dbi = "DBI:mysql:database=$database"; +$dbi .= ";host=127.0.0.1;port=13306"; # XXX over ssh -my $dbh = DBI->connect($connect,"","") || die $DBI::errstr; - -# select all posts which have been read or unread my $sql = qq{ select - * + items.id as _id, + items.*, + feeds.url as feed_url, + feeds.title as feed_title, + feeds.link as feed_link, + feeds.description as feed_description from items --- join items_userdata on id=item_id --- where label = 'read' and value_numeric = ? --- limit 3 - order by id asc + join items_userdata on items.id = item_id + join feeds on items.feed_id = feeds.id + where items.id > ? + order by items.id asc + limit 5000 }; -my $sth = $dbh->prepare($sql) || die $dbh->errstr(); -$sth->execute( 1 ) || die $sth->errstr(); - -print "found ",$sth->rows," items to process..."; my $c = CouchDB::Client->new(uri => 'http://localhost:5984/'); $c->testConnection or die "The server cannot be reached"; -print "Running version " . $c->serverInfo->{version} . "\n"; +print "CouchDB version " . $c->serverInfo->{version} . "\n"; my $db = $c->newDB( $database ); $db->create unless $c->dbExists( $database ); -my @docs = $db->listDocs; -my $row_id = shift @docs || 0; +my $status = $db->newDoc( '_sync' ); +eval { $status->retrieve }; +$status->create if $@; + +print "status ",dump( $status->{data} ), "\n"; + +my $last_row = $status->{data}->{last_row_id} || 0; + +sub commit_last_row { + warn "commit_last_row $last_row\n"; + $status->{data}->{last_row_id} = $last_row; + $status->update; +} + +my $dbh = DBI->connect($dbi,"","") || die $DBI::errstr; + +print "Fetching items from $dbi id > $last_row\n"; + +my $sth = $dbh->prepare($sql) || die $dbh->errstr(); +$sth->execute( $last_row ) || die $sth->errstr(); + +warn dump( $sth->{NAME} ); + +print "found ",$sth->rows," items to process..."; my $pk = 'id'; +my $count = 0; + while (my $row = $sth->fetchrow_hashref() ) { - my $_id = $row->{$pk}; + my $_id = $row->{_id} || die "row needs _id"; my $doc = $db->newDoc( $_id ); - $doc->{data} = $row; + + sub row2doc { + my ( $row, $doc ) = @_; + my $a = delete( $row->{xml} ); + $doc->addAttachment( 'item.xml', 'application/xhtml+xml', $a ) if $a; + my $a = delete( $row->{content} ); + $doc->addAttachment( 'content.html', 'text/html', $a ) if $a; + $doc->{data} = $row; + return $doc; + } + + row2doc( $row, $doc ); + eval { $doc->create }; if ( $@ ) { - warn "can't create $_id $@"; - eval { $doc->update }; - warn $@ ? "can't update $_id $@" : "updated $_id"; + $doc->retrieve; + row2doc( $row, $doc )->update; +# eval { $doc->update }; + warn $@ ? "ERROR $_id $@\n" : "updated $_id\n"; } else { - warn "created ",dump( $row ); + warn "created $_id\n"; } + + $last_row = $row->{id}; + $count++; + + commit_last_row if $count % 100 == 0 # checkpoint every 100 records } +commit_last_row; + __END__ $sql = qq{