--- trunk/PlusPlus.pm 2004/12/03 13:31:43 1 +++ trunk/PlusPlus.pm 2004/12/03 19:35:02 4 @@ -4,9 +4,10 @@ use strict; use warnings; -our $VERSION = '0.01'; +our $VERSION = '0.02'; use Carp; +use File::Temp qw/ tempdir /; =head1 NAME @@ -20,8 +21,12 @@ =head1 DESCRIPTION This is perl module to use SWISH++ indexer by Paul J. Lucas. SWISH++ is -rewrite of swish-e in C++ with blazingly fast performance, but without -support for properties (which this module tries to fix) +rewrite of swish-e in C++ which is extremly fast (thank to mmap), but without +support for properties (which this module tries to fix). + +Implementation of this module is crafted after L and it +should be easy to replace Plucene with this module for increased +performance. However, this module is not plug-in replacement. =head1 METHODS @@ -29,39 +34,189 @@ Create new indexing object. - my $i = new SWISH::PlusPlus( - index => '/path/to/index', + my $i = SWISH::PlusPlus->open( + index_dir => '/path/to/index', + index => 'index++', + search => 'search++', ); Options to open are following: =over 5 +=item C + +Path to directory in which index will be created. + =item C -path to directory in which index will be created. +Full or partial path to SWISH++ index executable. By default, it's B +for self-compiled version. If you use Debian GNU/Linux package specify +B. See C. + +=item C + +Full or partial path to SWISH++ search executable. By default, it's B. =back =cut -sub new { +sub open { my $class = shift; my $self = {@_}; bless($self, $class); - foreach (qw(index)) { + foreach (qw(index_dir)) { croak "need $_" unless $self->{$_}; } - if (! -e $self->{'index'}) { - mkdir $self->{'index'} || confess "can't create index ",$self->{'index'},": $!"; + if (! -e $self->{'index_dir'}) { + mkdir $self->{'index_dir'} || confess "can't create index ",$self->{'index'},": $!"; } + # default executables + $self->{'index'} ||= 'index'; + $self->{'search'} ||= 'search'; + $self ? return $self : return undef; } +=head2 check_bin + +Check if swish++ binaries specified in L are available and verify +version signature. + + if ($i->check_bin) { + print "swish++ binaries found\n"; + }; + +It will also setup property + + $i->{'version'} + +which you can examine to see version. + +=cut + +sub check_bin { + my $self = shift; + + my $i = `$self->{'index'} -V 2>&1` || confess "can't find '",$self->{'index'},"' binary"; + my $s = `$self->{'search'} -V 2>&1` || confess "can't find '",$self->{'search'},"' binary"; + + chomp $i; + chomp $s; + + confess $self->{'index'}," binary is not SWISH++" unless ($i =~ m/^SWISH\+\+/); + confess $self->{'search'}," binary is not SWISH++" unless ($s =~ m/^SWISH\+\+/); + + if ($i eq $s) { + $self->{'version'} = $i; + return 1; + } else { + carp "version difference: index is $i while search is $s"; + return; + } + +} + +=head2 index_document + +Quick way to add simple data to index. + + $i->index_document($key, $data); + $i->index_document( 42 => 'meaning of life' ); + +=cut + +sub index_document { + my $self = shift; + + my %doc = @_; + + foreach my $id (keys %doc) { + $self->_create_doc( + path => $id, + body => $doc{$id}, + ); + } + + return 1; +} + +=head1 PRIVATE METHODS + +Private methods implement internals for creating temporary file needed for +swish++. You should have no need to call them directly, and they are here +just to have documentation. + +=head2 _init_index + +Create temporary directory in which files for indexing will be created and +start index process. + + my $i->_init_index || die "can't start indexer"; + +=cut + +sub _init_index { + my $self = shift; + + $self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!"; + + my $opt = "-v 4"; + + my $open_cmd = '| index '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -'; + + chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!"; + + CORE::open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!"; + + return $self->{'index_fh'}; +} + +=head2 _create_doc + +Create temporary file and pass it's name to swish++ + + $i->_create_doc( + path => 'path/to/store/in/index', + body => 'data to story in body tag', + meta => { + 'meta name' => 'data for this meta', + 'another' => 'again more data', + } + ); + +=cut + +sub _create_doc { + my $self = shift; + + my $arg = {@_}; + + # open indexer if needed + $self->{'index_fh'} ||= $self->_init_index; + + my $path = $self->{'tmp_dir'} || confess "no tmp_dir?"; + + CORE::open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!"; + + print TMP ''; + + if ($arg->{'meta'}) { + confess "not yet implemented"; + } + + print TMP '' . ($arg->{'body'} || '') . ''; + + close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!"; + + print { $self->{'index_fh'} } $arg->{'path'}."\n"; +} + 1; __END__ @@ -69,6 +224,16 @@ None by default. +=head1 RELATED + +=head2 Debian + +Debian version of swish++ is often old (version 5 at moment of this writing +while version 6 is available in source code), so this module by default +uses executable names B and B for self-compiled version +instead of one from Debian package. See L how to specify Debian +default binaries B and B. + =head1 SEE ALSO Mention other useful documentation such as the documentation of