/[SWISH-PlusPlus]/trunk/PlusPlus.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/PlusPlus.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 9 by dpavlin, Sun Dec 5 00:59:50 2004 UTC revision 14 by dpavlin, Sun Dec 5 15:35:53 2004 UTC
# Line 4  use 5.008004; Line 4  use 5.008004;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    
7  our $VERSION = '0.03';  our $VERSION = '0.06';
8    
9  use Carp;  use Carp;
10  use File::Temp qw/ tempdir /;  use File::Temp qw/ tempdir /;
# Line 31  performance. However, this module is not Line 31  performance. However, this module is not
31    
32  =head1 METHODS  =head1 METHODS
33    
34  =head2 open  =head2 new
35    
36  Create new indexing object.  Create new indexing object.
37    
38    my $i = SWISH::PlusPlus->open(    my $i = SWISH::PlusPlus->new(
39          index_dir => '/path/to/index',          index_dir => '/path/to/index',
40          index => 'index++',          index => 'index++',
41          search => 'search++',          search => 'search++',
# Line 44  Create new indexing object. Line 44  Create new indexing object.
44          use_stopwords => 1,          use_stopwords => 1,
45    );    );
46    
47  Options to open are following:  Options to new are following:
48    
49  =over 5  =over 5
50    
# Line 81  Use built-in SWISH++ stop words. By defa Line 81  Use built-in SWISH++ stop words. By defa
81    
82  =cut  =cut
83    
84  sub open {  sub new {
85          my $class = shift;          my $class = shift;
86          my $self = {@_};          my $self = {@_};
87          bless($self, $class);          bless($self, $class);
# Line 90  sub open { Line 90  sub open {
90                  croak "need $_" unless $self->{$_};                  croak "need $_" unless $self->{$_};
91          }          }
92    
93          if (! -e $self->{'index_dir'}) {          my $index_dir = $self->{'index_dir'};
94                  mkdir $self->{'index_dir'} || confess "can't create index ",$self->{'index'},": $!";  
95            my $cwd;
96            chomp($cwd = `pwd`);
97            $self->{'cwd'} = $cwd || carp "can't get cwd!";
98            
99            if ($index_dir !~ m#^/#) {
100                    $index_dir = "$cwd/$index_dir";
101                    print STDERR "## full path to index_dir: $index_dir\n" if ($self->{'debug'});
102                    $self->{'index_dir'} = $index_dir;
103            }
104    
105            if (! -e $index_dir) {
106                    mkdir $index_dir || confess "can't create index ",$self->{'index'},": $!";
107          }          }
108    
109          # default executables          # default executables
110          $self->{'index'} ||= 'index';          $self->{'index'} ||= 'index';
111          $self->{'search'} ||= 'search';          $self->{'search'} ||= 'search';
112    
113          print STDERR "## open index_dir: ",$self->{'index_dir'}," index: ",$self->{'index'}, " search: ",$self->{'search'},"\n" if ($self->{'debug'});          print STDERR "## new index_dir: ",$index_dir," index: ",$self->{'index'}, " search: ",$self->{'search'},"\n" if ($self->{'debug'});
114    
115          $self ? return $self : return undef;          $self ? return $self : return undef;
116  }  }
# Line 106  sub open { Line 118  sub open {
118    
119  =head2 check_bin  =head2 check_bin
120    
121  Check if swish++ binaries specified in L<open> are available and verify  Check if swish++ binaries specified in L<new> are available and verify
122  version signature.  version signature.
123    
124    if ($i->check_bin) {    if ($i->check_bin) {
# Line 134  sub check_bin { Line 146  sub check_bin {
146          confess $self->{'search'}," binary is not SWISH++" unless ($s =~ m/^SWISH\+\+/);          confess $self->{'search'}," binary is not SWISH++" unless ($s =~ m/^SWISH\+\+/);
147    
148          if ($i eq $s) {          if ($i eq $s) {
149                    $i =~ s/^SWISH\+\+\s+// || confess "can't strip SWISH++ from version";
150                  $self->{'version'} = $i;                  $self->{'version'} = $i;
151                  return 1;                  return 1;
152          } else  {          } else  {
# Line 208  sub search { Line 221  sub search {
221    
222          my $query = shift || return;          my $query = shift || return;
223    
224          $self->_close_index;          $self->finish_update;
225    
226          my @results;          my @results;
227    
# Line 218  sub search { Line 231  sub search {
231          my $open_cmd = $self->{'search'}." -i ".$self->{'index_dir'}.'/index "'.$query.'" |';          my $open_cmd = $self->{'search'}." -i ".$self->{'index_dir'}.'/index "'.$query.'" |';
232          print STDERR "## search $open_cmd\n" if ($self->{'debug'});          print STDERR "## search $open_cmd\n" if ($self->{'debug'});
233    
234          CORE::open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!";          open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!";
235          while(<SEARCH>) {          while(<SEARCH>) {
236                  next if (/^#/);                  next if (/^#/);
237                  chomp;                  chomp;
# Line 239  sub search { Line 252  sub search {
252          return @results;          return @results;
253  }  }
254    
255    =head2 finish_update
256    
257    This method will close index.
258    
259      $i->finish_update;
260    
261    It will be called on DESTROY when $i goes out of scope.
262    
263    =cut
264    
265    sub finish_update {
266            my $self = shift;
267    
268            print STDERR "## finish_update\n" if ($self->{'debug'});
269    
270            $self->_close_index;
271    }
272    
273    sub DESTROY {
274            my $self = shift;
275            $self->finish_update;
276    }
277    
278  =head1 PRIVATE METHODS  =head1 PRIVATE METHODS
279    
280  Private methods implement internals for creating temporary file needed for  Private methods implement internals for creating temporary file needed for
# Line 259  It will also create empty file C<_stopwo Line 295  It will also create empty file C<_stopwo
295  sub _init_indexer {  sub _init_indexer {
296          my $self = shift;          my $self = shift;
297    
298          $self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!";          return if ($self->{'_index_fh'});
299    
300          chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!";          my $tmp_dir = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!";
301            $self->{'tmp_dir'} = $tmp_dir;
302    
303          my $opt = "-v 4";          chdir $tmp_dir || confess "can't chdir to ".$tmp_dir.": $!";
304    
305            print STDERR "## tmp_dir: $tmp_dir" if ($self->{'debug'});
306    
307            my $opt = "-v " . ($self->{'debug'} || '0');
308    
309          unless ($self->{'use_stopwrods'}) {          unless ($self->{'use_stopwrods'}) {
310                  CORE::open(STOP, '>', "_stopwords_") || carp "can't create empty stopword file, skipping\n";                  open(STOP, '>', "_stopwords_") || carp "can't create empty stopword file, skipping\n";
311                  print STOP "  ";                  print STOP "  ";
312                  close(STOP);                  close(STOP);
313                  $opt .= " -s _stopwords_";                  $opt .= " -s _stopwords_";
# Line 274  sub _init_indexer { Line 315  sub _init_indexer {
315    
316          my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -';          my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -';
317    
318            print STDERR "## init_indexer: $open_cmd\n" if ($self->{'debug'});
319    
320          CORE::open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!";          open($self->{'_index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!";
321    
322            chdir $self->{'cwd'} || confess "can't chdir to ".$self->{'cwd'}.": $!";
323    
324          return $self->{'index_fh'};          return $self->{'_index_fh'};
325  }  }
326    
327  =head2 _create_doc  =head2 _create_doc
# Line 305  sub _create_doc { Line 348  sub _create_doc {
348          my $arg = {@_};          my $arg = {@_};
349    
350          # open indexer if needed          # open indexer if needed
351          $self->{'index_fh'} ||= $self->_init_indexer;          $self->_init_indexer;
352    
353          my $path = $self->{'tmp_dir'} || confess "no tmp_dir?";          my $path = $self->{'tmp_dir'} || confess "no tmp_dir?";
354            $path .= '/' . $arg->{'path'};
355    
356            print STDERR "## _create_doc: $path\n" if ($self->{'debug'});
357    
358          CORE::open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!";          open(TMP, '>', $path) || die "can't create temp file $path: $!";
359    
360          print TMP '<html><head>';          print TMP '<html><head>';
361    
362          $arg->{'body'} ||= '';          $arg->{'body'} ||= '';
363    
364          if ($arg->{'meta'}) {          if ($arg->{'meta'}) {
365                  confess "not yet implemented";                  foreach my $name (keys %{$arg->{'meta'}}) {
366                            my $content = $arg->{'meta'}->{$name};
367                            print TMP qq{<meta name="$name" content="$content">};
368                            $arg->{'body'} .= " $content" if ($self->{'meta_in_body'});
369                    }
370          }          }
371    
372          if (defined($arg->{'title'})) {          if (defined($arg->{'title'})) {
# Line 328  sub _create_doc { Line 378  sub _create_doc {
378                    
379          close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!";          close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!";
380    
381          print { $self->{'index_fh'} } $arg->{'path'}."\n";          print { $self->{'_index_fh'} } $arg->{'path'}."\n";
382  }  }
383    
384  =head2 _close_index  =head2 _close_index
# Line 344  You have to close index before searching Line 394  You have to close index before searching
394  sub _close_index {  sub _close_index {
395          my $self = shift;          my $self = shift;
396    
397          return unless ($self->{'index_fh'});          return unless ($self->{'_index_fh'});
398    
399          print STDERR "## close index\n" if ($self->{'debug'});          print STDERR "## close index\n" if ($self->{'debug'});
400    
401          close($self->{'index_fh'});          close($self->{'_index_fh'});
402          undef $self->{'index_fh'};          undef $self->{'_index_fh'};
403  }  }
404    
405  1;  1;
# Line 366  None by default. Line 416  None by default.
416  Debian version of swish++ is often old (version 5 at moment of this writing  Debian version of swish++ is often old (version 5 at moment of this writing
417  while version 6 is available in source code), so this module by default  while version 6 is available in source code), so this module by default
418  uses executable names B<index> and B<search> for self-compiled version  uses executable names B<index> and B<search> for self-compiled version
419  instead of one from Debian package. See L<open> how to specify Debian  instead of one from Debian package. See L<new> how to specify Debian
420  default binaries B<index++> and B<search++>.  default binaries B<index++> and B<search++>.
421    
422  =head2 SWISH++  =head2 SWISH++
# Line 390  Compilation of SWISH++ is easy process w Line 440  Compilation of SWISH++ is easy process w
440  pages. To see my very relaxed sample configuration take a look at C<swish++>  pages. To see my very relaxed sample configuration take a look at C<swish++>
441  directory included in distribution.  directory included in distribution.
442    
443    =head2 SWISH++ config
444    
445    C<config.h> located in C<swish++> directory of this distribution is relaxed
446    SWISH++ configuration that will index all words passed to it. This
447    configuration is needed for B<date test> because default configuration
448    doesn't recognize 2004-12-05 as date. Have in mind that your index size
449    might explode.
450    
451  =head1 SEE ALSO  =head1 SEE ALSO
452    
453  C<swish++> web site L<http://homepage.mac.com/pauljlucas/software/swish/>  C<swish++> web site L<http://homepage.mac.com/pauljlucas/software/swish/>

Legend:
Removed from v.9  
changed lines
  Added in v.14

  ViewVC Help
Powered by ViewVC 1.1.26