/[hyperestraier_wrappers]/trunk/perl/scripts/est-spider
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/perl/scripts/est-spider

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 5 by dpavlin, Sat Sep 3 19:16:48 2005 UTC revision 18 by dpavlin, Sat Sep 10 09:31:53 2005 UTC
# Line 6  use File::Which; Line 6  use File::Which;
6  use HyperEstraier;  use HyperEstraier;
7  use Text::Iconv;  use Text::Iconv;
8    
9    # do we use Node API?
10    my $node_url;
11    
12  my $collection;         # name which will be inserted  my $collection;         # name which will be inserted
13  my $path_add;           # add additional info in path  my $path_add;           # add additional info in path
14  my $verbose;  my $verbose;
# Line 19  my $result = GetOptions( Line 22  my $result = GetOptions(
22          "verbose!" => \$verbose,          "verbose!" => \$verbose,
23          "debug!" => \$verbose,          "debug!" => \$verbose,
24          "exclude=s" => \$exclude,          "exclude=s" => \$exclude,
25            "node=s" => \$node_url,
26  );  );
27    
28  my $dir = shift @ARGV || die "usage: $0 [dir]";  my $dir = shift @ARGV || die "usage: $0 [dir]";
# Line 36  select(STDOUT); $|=1; Line 40  select(STDOUT); $|=1;
40    
41  print STDERR "using $pdftotext to convert pdf into html\n" if ($pdftotext && $verbose);  print STDERR "using $pdftotext to convert pdf into html\n" if ($pdftotext && $verbose);
42    
43    my $db;
44    if ($node_url) {
45            $db = HyperEstraier::Node->new($node_url);
46            $db->set_auth('admin', 'admin');
47    } else {
48            # open the database
49            $db = HyperEstraier::Database->new();
50            $db->open('/tmp/casket', $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
51    
52            sub signal {
53                    my($sig) = @_;
54                    print "\nCaught a SIG$sig--syncing database and shutting down\n";
55                    $db->sync();
56                    exit(0);
57            }
58    
59  # open the database          $SIG{'INT'}  = \&signal;
60  my $db = HyperEstraier::Database->new();          $SIG{'QUIT'} = \&signal;
 $db->open('/tmp/casket', $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);  
   
 sub signal {  
         my($sig) = @_;  
         print "\nCaught a SIG$sig--syncing database and shutting down\n";  
         $db->sync();  
         exit(0);  
61  }  }
62    
 $SIG{'INT'}  = \&signal;  
 $SIG{'QUIT'} = \&signal;  
   
63  find({ wanted => \&file,  find({ wanted => \&file,
64          follow => 1,          follow => 1,
65          no_chdir => 1          no_chdir => 1
66  }, $dir);  }, $dir);
67    
68  print "--- sync\n";  unless ($node_url) {
69  $db->sync();          print "--- sync\n";
70            $db->sync();
71    
72  print "--- optimize...\n";          print "--- optimize...\n";
73  $db->optimize(0);          $db->optimize(0);
74    }
75  exit;  exit;
76    
77  sub dump_contents($$$$) {  sub dump_contents($$$$) {
# Line 98  sub dump_contents($$$$) { Line 109  sub dump_contents($$$$) {
109  #       print $doc->dump_draft if ($verbose);  #       print $doc->dump_draft if ($verbose);
110    
111          # register the document object to the database          # register the document object to the database
112          $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);          if ($node_url) {
113                    $db->put_doc($doc);
114            } else {
115                    $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
116            }
117    
118  }  }
119    

Legend:
Removed from v.5  
changed lines
  Added in v.18

  ViewVC Help
Powered by ViewVC 1.1.26