/[SWISH-PlusPlus]/trunk/PlusPlus.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/PlusPlus.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 21 by dpavlin, Sun Dec 5 22:24:09 2004 UTC revision 22 by dpavlin, Tue Dec 7 16:05:43 2004 UTC
# Line 4  use 5.008004; Line 4  use 5.008004;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    
7  our $VERSION = '0.10';  our $VERSION = '0.20';
8    
9  use Carp;  use Carp;
10  use File::Temp qw/ tempdir /;  use File::Temp qw/ tempdir /;
11  use BerkeleyDB;  use BerkeleyDB;
12  #use YAML;  use Storable qw(store retrieve freeze thaw);
13    use YAML;
14    
15  =head1 NAME  =head1 NAME
16    
# Line 18  SWISH::PlusPlus - Perl extension for ful Line 19  SWISH::PlusPlus - Perl extension for ful
19  =head1 SYNOPSIS  =head1 SYNOPSIS
20    
21    use SWISH::PlusPlus;    use SWISH::PlusPlus;
22    blah blah blah  
23      my $i = new SWISH::PlusPlus(
24            index_dir => '/tmp/foo',
25      );
26      $i->add( 42 => 'meaning of life' );
27    
28      print $i->search("meaning");  # returns 42
29    
30  =head1 DESCRIPTION  =head1 DESCRIPTION
31    
# Line 164  sub check_bin { Line 171  sub check_bin {
171  Quick way to add simple data to index.  Quick way to add simple data to index.
172    
173    $i->index_document($path, $data);    $i->index_document($path, $data);
174    $i->index_document( 42 => 'meaning of life' );    $i->index_document(
175            42 => 'meaning of life',
176            1984 => 'Oh!',
177      );
178    
179  C<$path> value is really path, so you don't want to use directory  C<$path> value is really path, so you don't want to use directory
180  separators (slashes, /) in it probably.  separators (slashes, /) in it probably.
# Line 208  This is thin wrapper round L<_create_doc Line 218  This is thin wrapper round L<_create_doc
218  sub add {  sub add {
219          my $self = shift;          my $self = shift;
220    
221          $self->_create_doc(@_);          return $self->_create_doc(@_);
222    }
223    
224          return 1;  
225    =head2 delete
226    
227    Delete document from index.
228    
229      $i->delete("document/path");
230    
231    If deletion is succesfull returns revision of deleted document, otherwise
232    undef.
233    
234    =cut
235    
236    sub delete {
237            my $self = shift;
238    
239            my $path = shift || carp "empty path?";
240    
241            print STDERR "## delete: $path\n" if ($self->{'debug'});
242    
243            my $rev = $self->{'meta_db'}->{"R$path"};
244            if ($rev) {
245                    $self->{'_deleted'}->{$path} = $rev;
246                    $self->{'_deleted_counter'}++;
247                    print STDERR "## deleted revision $rev, counter: ",$self->{'_deleted_counter'}++,"\n" if ($self->{'debug'});
248                    return $rev;
249            }
250    
251            return undef;
252  }  }
253    
254    
255  =head2 search  =head2 search
256    
257  Search your index using any valid SWISH++ query.  Search your index using any valid SWISH++ query.
# Line 249  sub search { Line 288  sub search {
288                  ' |';                  ' |';
289          print STDERR "## search: $open_cmd\n" if ($self->{'debug'});          print STDERR "## search: $open_cmd\n" if ($self->{'debug'});
290    
291            my %r;
292    
293          open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!";          open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!";
294          my $l;          my $l;
295          while($l = <SEARCH>) {          while($l = <SEARCH>) {
296                  next if ($l =~ /^#/);                  next if ($l =~ /^#/);
297                  chomp($l);                  chomp($l);
298                  print STDERR "## $l\n" if ($self->{'debug'});                  print STDERR "## $l\n" if ($self->{'debug'});
299                  my ($rank,$path,$size,$title) = split(/ /,$l,4);                  my ($rank,$path,$size,$rev,$title) = split(/ /,$l,5);
300                  $path =~ s#^\./##; # strip from path                  $path =~ s#^\./##; # strip from path
301    
302                    # get current revision
303                    $r{$path} = $self->{'meta_db'}->{"R$path"};
304    
305                    # skip if old revision
306                    next if ($r{$path} > $rev);
307    
308                    print STDERR "## current revision $rev\n" if ($self->{'debug'});
309    
310                  push @results, {                  push @results, {
311                          rank => $rank,                          rank => $rank,
312                          path => $path,                          path => $path,
313                          size => $size,                          size => $size,
314                          title => $title,                          title => $title,
315                  }                  } unless ($self->{'_deleted'}->{$path} && $self->{'_deleted'}->{$path} <= $rev);
316          }          }
317    
318          close(SEARCH) || confess "can't close search";          close(SEARCH) || confess "can't close search";
# Line 276  sub search { Line 326  sub search {
326    
327  Return stored meta property from result or result path.  Return stored meta property from result or result path.
328    
329    print $i->property('path', 'title');    print $i->property('path', 'meta name');
330    print $i->property($res->{'path'}, 'title');    print $i->property($res->{'path'}, 'meta name');
331      print $i->property('path');
332      print $i->property($res->{'path'});
333    
334    Returns one meta property (if meta name is specified) or whole hash with
335    all meta properties.
336    
337  =cut  =cut
338    
339  sub property {  sub property {
340          my $self = shift;          my $self = shift;
341    
342          my ($path,$meta) = @_;          my $path = shift || return;
343            my $meta = shift;
344    
345          if ($path =~ m/^HASH/) {          if ($path =~ m/^HASH/) {
346                  $path = $path->{'path'} || confess "can't find path in input data";                  $path = $path->{'path'} || confess "can't find path in input data";
347          }          }
348    
349          my $val = $self->{'meta_db'}->{"$path-$meta"};          my $val = $self->{'meta_db'}->{"M$path"};
350    
351            # FIXME should we die here like swish-e does?
352            return unless ($val);
353    
354            $val = thaw($val);
355    
356            print STDERR "## property $path $meta: ",(Dump($val) || 'undef'),"\n" if ($self->{'debug'});
357    
358            return $val->{$meta} if ($meta);
359    
         print STDERR "## property $path-$meta: ",($val || 'undef'),"\n" if ($self->{'debug'});  
360          return $val;          return $val;
361  }  }
362    
# Line 349  sub _init_indexer { Line 413  sub _init_indexer {
413    
414          chdir $tmp_dir || confess "can't chdir to ".$tmp_dir.": $!";          chdir $tmp_dir || confess "can't chdir to ".$tmp_dir.": $!";
415    
416          print STDERR "## tmp_dir: $tmp_dir" if ($self->{'debug'});          print STDERR "## tmp_dir: $tmp_dir\n" if ($self->{'debug'});
417    
418          my $opt = "-v " . ($self->{'debug'} || '0');          my $opt = "-v " . ($self->{'debug'} || '0');
419    
420            my $index_dir = $self->{'index_dir'} || confess "no index_dir?";
421            my $index_file = $index_dir . '/index';
422    
423            if (-e $index_file && ! -z $index_file) {
424                    $opt .= ' -I ';
425                    $self->{'_incremental'} = 1;
426                    print STDERR "## using incremental indexing for $index_file\n" if ($self->{'debug'});
427            } else {
428                    $self->{'_incremental'} = 0;
429            }
430    
431          unless ($self->{'use_stopwrods'}) {          unless ($self->{'use_stopwrods'}) {
432                  open(STOP, '>', "_stopwords_") || carp "can't create empty stopword file, skipping\n";                  open(STOP, '>', "_stopwords_") || carp "can't create empty stopword file, skipping\n";
433                  print STOP "  ";                  print STOP "  ";
# Line 360  sub _init_indexer { Line 435  sub _init_indexer {
435                  $opt .= " -s _stopwords_";                  $opt .= " -s _stopwords_";
436          }          }
437    
438          my $index_dir = $self->{'index_dir'} || confess "no index_dir?";          my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$index_file.' -';
   
         my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$index_dir.'/index -';  
439    
440          print STDERR "## init_indexer: $open_cmd\n" if ($self->{'debug'});          print STDERR "## init_indexer: $open_cmd\n" if ($self->{'debug'});
441    
# Line 389  Create temporary file and pass it's name Line 462  Create temporary file and pass it's name
462          }          }
463    );    );
464    
 To delete document, just omit body and meta data.  
   
465  =cut  =cut
466    
467  sub _create_doc {  sub _create_doc {
# Line 405  sub _create_doc { Line 476  sub _create_doc {
476          my $id = $arg->{'path'} || confess "no path?";          my $id = $arg->{'path'} || confess "no path?";
477          $path .= "/$id";          $path .= "/$id";
478    
479          print STDERR "## _create_doc: $path\n" if ($self->{'debug'});          my $rev = $self->{'rev'}++;
480    
481            print STDERR "## _create_doc: $path [$rev]\n" if ($self->{'debug'});
482    
483          open(TMP, '>', $path) || die "can't create temp file $path: $!";          open(TMP, '>', $path) || die "can't create temp file $path: $!";
484    
485          print TMP '<html><head>';          print TMP '<html><head>';
486    
487          $arg->{'body'} ||= '';          my $body = $arg->{'body'};
488    
489            if (defined($body)) {
490                    $self->{'meta_db'}->{"B$id"} = $body;
491            } else {
492                    $body = '';
493            }
494    
495            my $title = $arg->{'title'};
496    
497          if ($arg->{'meta'}) {          if ($arg->{'meta'}) {
498                  foreach my $name (keys %{$arg->{'meta'}}) {                  foreach my $name (keys %{$arg->{'meta'}}) {
499                          my $content = $arg->{'meta'}->{$name};                          my $content = $arg->{'meta'}->{$name};
500                          print TMP qq{<meta name="$name" content="$content">};                          print TMP qq{<meta name="$name" content="$content">};
501                          $arg->{'body'} .= " $content" if ($self->{'meta_in_body'});                          $body .= " $content" if ($self->{'meta_in_body'});
                         $self->{'meta_db'}->{"$id-$name"} = $content;  
502                  }                  }
503                    $arg->{'meta'}->{'title'} = $title;
504                    $self->{'meta_db'}->{"M$id"} = freeze($arg->{'meta'});
505          }          }
506    
         my $title = $arg->{'title'};  
507          if (defined($title)) {          if (defined($title)) {
508                  print TMP "<title>$title</title>";                  $title = "$rev $title";
509                  $arg->{'body'} .= " $title" if ($self->{'meta_in_body'});                  $body .= " $title" if ($self->{'meta_in_body'});
510                  $self->{'meta_db'}->{"$id-title"} = $title;          } else {
511                    $title = "$rev $id";
512          }          }
513    
514          print TMP '</head><body>' . $arg->{'body'} . '</body></html>';          # dump html
515            print TMP "<title>$title</title></head><body>$body</body></html>";
516                    
517          close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!";          close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!";
518    
519          print { $self->{'_index_fh'} } "$id\n";          print { $self->{'_index_fh'} } "$id\n" || confess "can't pass document $id to indexer: $!";
520            
521            $self->{'meta_db'}->{"R$id"} = $rev;
522    
523            # FIXME this is probably not the right place to update global
524            # maximum revision, but it keeps database in sane state
525            $self->{'meta_db'}->{"Crev"} = $rev;
526  }  }
527    
528  =head2 _close_index  =head2 _close_index
# Line 449  You have to close index before searching Line 538  You have to close index before searching
538  sub _close_index {  sub _close_index {
539          my $self = shift;          my $self = shift;
540    
541            $self->_store_deleted;
542    
543          return unless ($self->{'_index_fh'});          return unless ($self->{'_index_fh'});
544    
545          print STDERR "## close index\n" if ($self->{'debug'});          print STDERR "## close index\n" if ($self->{'debug'});
# Line 456  sub _close_index { Line 547  sub _close_index {
547          close($self->{'_index_fh'}) || confess "can't close index: $!";          close($self->{'_index_fh'}) || confess "can't close index: $!";
548          undef $self->{'_index_fh'};          undef $self->{'_index_fh'};
549    
550            if ($self->{'_incremental'}) {
551                    print STDERR "## move new index over old\n" if ($self->{'debug'});
552                    rename $self->{'index_dir'}.'/index.new',$self->{'index_dir'}.'/index' || die "can't move new index over old one: $!";
553            }
554    
555          return 1;          return 1;
556  }  }
557    
# Line 489  sub _tie_meta_db  { Line 585  sub _tie_meta_db  {
585                  -Flags    => $flags                  -Flags    => $flags
586          or confess "cannot open $file: $! $BerkeleyDB::Error\n" ;          or confess "cannot open $file: $! $BerkeleyDB::Error\n" ;
587    
588            $self->{'rev'} = $self->{'meta_db'}->{'Crev'} || 0;
589    
590            my $delref = $self->{'meta_db'}->{'Cdeleted'};
591            if ($delref) {
592                    $self->{'_deleted'} = thaw($delref);
593    
594                    print "## deleted ",keys %{$self->{'_deleted'}}," records\n" if ($self->{'debug'});
595            } else {
596                    $self->{'_deleted'} = {};
597            }
598    
599            $self->{'_deleted_counter'} = 0;
600          return 1;          return 1;
601  }  }
602    
# Line 513  sub _untie_meta_db { Line 621  sub _untie_meta_db {
621          return 1;          return 1;
622  }  }
623    
624    
625    =head2 _store_deleted
626    
627    Save hash of deleted files using L<Storable>.
628    
629      $i->_store_deleted;
630    
631    =cut
632    
633    sub _store_deleted {
634            my $self = shift;
635    
636            return if (! $self->{'_deleted_counter'});
637    
638            print STDERR "## save deleted ",Dump($self->{'_deleted'}) if ($self->{'debug'});
639    
640            my $d = freeze($self->{'_deleted'});
641    
642            $self->_tie_meta_db(DB_CREATE);
643    
644            $self->{'meta_db'}->{'Cdeleted'} = $d ||
645                    carp "can't store deleted: $!";
646    
647            # reset counter
648            $self->{'_deleted_counter'} = 0;
649    }
650    
651  1;  1;
652  __END__  __END__
653    

Legend:
Removed from v.21  
changed lines
  Added in v.22

  ViewVC Help
Powered by ViewVC 1.1.26