--- trunk/perl/scripts/est-spider 2005/09/03 19:16:48 5 +++ trunk/perl/scripts/est-spider 2005/09/10 09:31:53 18 @@ -6,6 +6,9 @@ use HyperEstraier; use Text::Iconv; +# do we use Node API? +my $node_url; + my $collection; # name which will be inserted my $path_add; # add additional info in path my $verbose; @@ -19,6 +22,7 @@ "verbose!" => \$verbose, "debug!" => \$verbose, "exclude=s" => \$exclude, + "node=s" => \$node_url, ); my $dir = shift @ARGV || die "usage: $0 [dir]"; @@ -36,31 +40,38 @@ print STDERR "using $pdftotext to convert pdf into html\n" if ($pdftotext && $verbose); +my $db; +if ($node_url) { + $db = HyperEstraier::Node->new($node_url); + $db->set_auth('admin', 'admin'); +} else { + # open the database + $db = HyperEstraier::Database->new(); + $db->open('/tmp/casket', $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT); + + sub signal { + my($sig) = @_; + print "\nCaught a SIG$sig--syncing database and shutting down\n"; + $db->sync(); + exit(0); + } -# open the database -my $db = HyperEstraier::Database->new(); -$db->open('/tmp/casket', $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT); - -sub signal { - my($sig) = @_; - print "\nCaught a SIG$sig--syncing database and shutting down\n"; - $db->sync(); - exit(0); + $SIG{'INT'} = \&signal; + $SIG{'QUIT'} = \&signal; } -$SIG{'INT'} = \&signal; -$SIG{'QUIT'} = \&signal; - find({ wanted => \&file, follow => 1, no_chdir => 1 }, $dir); -print "--- sync\n"; -$db->sync(); +unless ($node_url) { + print "--- sync\n"; + $db->sync(); -print "--- optimize...\n"; -$db->optimize(0); + print "--- optimize...\n"; + $db->optimize(0); +} exit; sub dump_contents($$$$) { @@ -98,7 +109,11 @@ # print $doc->dump_draft if ($verbose); # register the document object to the database - $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN); + if ($node_url) { + $db->put_doc($doc); + } else { + $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN); + } }