/[SWISH-PlusPlus]/trunk/PlusPlus.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/PlusPlus.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 4 by dpavlin, Fri Dec 3 19:35:02 2004 UTC revision 8 by dpavlin, Sat Dec 4 17:49:20 2004 UTC
# Line 8  our $VERSION = '0.02'; Line 8  our $VERSION = '0.02';
8    
9  use Carp;  use Carp;
10  use File::Temp qw/ tempdir /;  use File::Temp qw/ tempdir /;
11    #use YAML;
12    
13  =head1 NAME  =head1 NAME
14    
# Line 38  Create new indexing object. Line 39  Create new indexing object.
39          index_dir => '/path/to/index',          index_dir => '/path/to/index',
40          index => 'index++',          index => 'index++',
41          search => 'search++',          search => 'search++',
42            debug => 1,
43    );    );
44    
45  Options to open are following:  Options to open are following:
# Line 58  B<index++>. See C<Debian>. Line 60  B<index++>. See C<Debian>.
60    
61  Full or partial path to SWISH++ search executable. By default, it's B<search>.  Full or partial path to SWISH++ search executable. By default, it's B<search>.
62    
63    =item C<debug>
64    
65    This option (off by default) will produce a lot of debugging output on
66    C<STDERR> prefixed by C<##>.
67    
68  =back  =back
69    
70  =cut  =cut
# Line 79  sub open { Line 86  sub open {
86          $self->{'index'} ||= 'index';          $self->{'index'} ||= 'index';
87          $self->{'search'} ||= 'search';          $self->{'search'} ||= 'search';
88    
89            print STDERR "## open index_dir: ",$self->{'index_dir'}," index: ",$self->{'index'}, " search: ",$self->{'search'},"\n" if ($self->{'debug'});
90    
91          $self ? return $self : return undef;          $self ? return $self : return undef;
92  }  }
93    
# Line 146  sub index_document { Line 155  sub index_document {
155          return 1;          return 1;
156  }  }
157    
158    =head2 search
159    
160    Search your index.
161    
162      my @results = $i->search("swhish query");
163    
164    Returns array with result IDs.
165    
166    =cut
167    
168    sub search {
169            my $self = shift;
170    
171            my $query = shift || return;
172    
173            $self->_close_index;
174    
175            my @results;
176    
177            # escape double quotes in query for shell
178            $query =~ s/"/\\"/g;
179    
180            my $open_cmd = $self->{'search'}." -i ".$self->{'index_dir'}.'/index "'.$query.'" |';
181            print STDERR "## search $open_cmd\n" if ($self->{'debug'});
182    
183            CORE::open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!";
184            while(<SEARCH>) {
185                    next if (/^#/);
186                    chomp;
187                    print STDERR "## $_\n" if ($self->{'debug'});
188                    my ($rank,$path,$size,$title) = split(/ /,$_,4);
189                    push @results, {
190                            rank => $rank,
191                            path => $path,
192                            size => $size,
193                            title => $title,
194                    }
195            }
196    
197            close(SEARCH) || confess "can't close search";
198    
199            #print STDERR "## results: ",Dump(@results),"\n" if ($self->{'debug'});
200    
201            return @results;
202    }
203    
204  =head1 PRIVATE METHODS  =head1 PRIVATE METHODS
205    
206  Private methods implement internals for creating temporary file needed for  Private methods implement internals for creating temporary file needed for
# Line 168  sub _init_index { Line 223  sub _init_index {
223    
224          my $opt = "-v 4";          my $opt = "-v 4";
225    
226          my $open_cmd = '| index '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -';          my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -';
227    
228          chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!";          chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!";
229    
# Line 190  Create temporary file and pass it's name Line 245  Create temporary file and pass it's name
245          }          }
246    );    );
247    
248    To delete document, just omit body and meta data.
249    
250  =cut  =cut
251    
252  sub _create_doc {  sub _create_doc {
# Line 217  sub _create_doc { Line 274  sub _create_doc {
274          print { $self->{'index_fh'} } $arg->{'path'}."\n";          print { $self->{'index_fh'} } $arg->{'path'}."\n";
275  }  }
276    
277    =head2 _close_index
278    
279    Close index after indexing.
280    
281      $i->_close_index;
282    
283    You have to close index before searching.
284    
285    =cut
286    
287    sub _close_index {
288            my $self = shift;
289    
290            return unless ($self->{'index_fh'});
291    
292            print STDERR "## close index\n" if ($self->{'debug'});
293    
294            close($self->{'index_fh'});
295            undef $self->{'index_fh'};
296    }
297    
298  1;  1;
299  __END__  __END__
300    
# Line 234  uses executable names B<index> and B<sea Line 312  uses executable names B<index> and B<sea
312  instead of one from Debian package. See L<open> how to specify Debian  instead of one from Debian package. See L<open> how to specify Debian
313  default binaries B<index++> and B<search++>.  default binaries B<index++> and B<search++>.
314    
315  =head1 SEE ALSO  =head2 SWISH++
316    
317  Mention other useful documentation such as the documentation of  Aside from very good rewrite in C++, SWISH++ is fatster because it has
318  related modules or operating system documentation (such as man pages  claver heuristics about which data in input files are words to index and
319  in UNIX), or any relevant external documentation such as RFCs or  which are not. It's based on English language and might be best choice if
320  standards.  you plan to install large amount of long text documents.
321    
322    However, if you plan to index all data from structured storage (e.g. RDBMS)
323    you might want B<all> words from data to end up in index as opposed to just
324    those which look like English words. This is especially important if you
325    don't plan to index English texts with this module.
326    
327    With distribution build versions of SWISH++ you might have problems with
328    disepearing words. To overcome this problem, you will have to compile and
329    configure SWISH++ yourself (because language characteristics are
330    compilation-time option).
331    
332    Compilation of SWISH++ is easy process well described on project's web
333    pages. To see my very relaxed sample configuration take a look at C<swish++>
334    directory included in distribution.
335    
336  If you have a mailing list set up for your module, mention it here.  =head1 SEE ALSO
337    
338  If you have a web site set up for your module, mention it here.  C<swish++> web site L<http://homepage.mac.com/pauljlucas/software/swish/>
339    
340  =head1 AUTHOR  =head1 AUTHOR
341    
342  Dobrica Pavlinusic, E<lt>dpavlin@E<gt>  Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
343    
344  =head1 COPYRIGHT AND LICENSE  =head1 COPYRIGHT AND LICENSE
345    

Legend:
Removed from v.4  
changed lines
  Added in v.8

  ViewVC Help
Powered by ViewVC 1.1.26