/[SWISH-PlusPlus]/trunk/PlusPlus.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/PlusPlus.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1 by dpavlin, Fri Dec 3 13:31:43 2004 UTC revision 4 by dpavlin, Fri Dec 3 19:35:02 2004 UTC
# Line 4  use 5.008004; Line 4  use 5.008004;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    
7  our $VERSION = '0.01';  our $VERSION = '0.02';
8    
9  use Carp;  use Carp;
10    use File::Temp qw/ tempdir /;
11    
12  =head1 NAME  =head1 NAME
13    
# Line 20  SWISH::PlusPlus - Perl extension SWISH++ Line 21  SWISH::PlusPlus - Perl extension SWISH++
21  =head1 DESCRIPTION  =head1 DESCRIPTION
22    
23  This is perl module to use SWISH++ indexer by Paul J. Lucas. SWISH++ is  This is perl module to use SWISH++ indexer by Paul J. Lucas. SWISH++ is
24  rewrite of swish-e in C++ with blazingly fast performance, but without  rewrite of swish-e in C++ which is extremly fast (thank to mmap), but without
25  support for properties (which this module tries to fix)  support for properties (which this module tries to fix).
26    
27    Implementation of this module is crafted after L<Plucene::Simple> and it
28    should be easy to replace Plucene with this module for increased
29    performance. However, this module is not plug-in replacement.
30    
31  =head1 METHODS  =head1 METHODS
32    
# Line 29  support for properties (which this modul Line 34  support for properties (which this modul
34    
35  Create new indexing object.  Create new indexing object.
36    
37    my $i = new SWISH::PlusPlus(    my $i = SWISH::PlusPlus->open(
38          index => '/path/to/index',          index_dir => '/path/to/index',
39            index => 'index++',
40            search => 'search++',
41    );    );
42    
43  Options to open are following:  Options to open are following:
44    
45  =over 5  =over 5
46    
47    =item C<index_dir>
48    
49    Path to directory in which index will be created.
50    
51  =item C<index>  =item C<index>
52    
53  path to directory in which index will be created.  Full or partial path to SWISH++ index executable. By default, it's B<index>
54    for self-compiled version. If you use Debian GNU/Linux package specify
55    B<index++>. See C<Debian>.
56    
57    =item C<search>
58    
59    Full or partial path to SWISH++ search executable. By default, it's B<search>.
60    
61  =back  =back
62    
63  =cut  =cut
64    
65  sub new {  sub open {
66          my $class = shift;          my $class = shift;
67          my $self = {@_};          my $self = {@_};
68          bless($self, $class);          bless($self, $class);
69    
70          foreach (qw(index)) {          foreach (qw(index_dir)) {
71                  croak "need $_" unless $self->{$_};                  croak "need $_" unless $self->{$_};
72          }          }
73    
74          if (! -e $self->{'index'}) {          if (! -e $self->{'index_dir'}) {
75                  mkdir $self->{'index'} || confess "can't create index ",$self->{'index'},": $!";                  mkdir $self->{'index_dir'} || confess "can't create index ",$self->{'index'},": $!";
76          }          }
77    
78            # default executables
79            $self->{'index'} ||= 'index';
80            $self->{'search'} ||= 'search';
81    
82          $self ? return $self : return undef;          $self ? return $self : return undef;
83  }  }
84    
85    
86    =head2 check_bin
87    
88    Check if swish++ binaries specified in L<open> are available and verify
89    version signature.
90    
91      if ($i->check_bin) {
92            print "swish++ binaries found\n";
93      };
94    
95    It will also setup property
96    
97      $i->{'version'}
98    
99    which you can examine to see version.
100    
101    =cut
102    
103    sub check_bin {
104            my $self = shift;
105    
106            my $i = `$self->{'index'} -V 2>&1` || confess "can't find '",$self->{'index'},"' binary";
107            my $s = `$self->{'search'} -V 2>&1` || confess "can't find '",$self->{'search'},"' binary";
108    
109            chomp $i;
110            chomp $s;
111    
112            confess $self->{'index'}," binary is not SWISH++" unless ($i =~ m/^SWISH\+\+/);
113            confess $self->{'search'}," binary is not SWISH++" unless ($s =~ m/^SWISH\+\+/);
114    
115            if ($i eq $s) {
116                    $self->{'version'} = $i;
117                    return 1;
118            } else  {
119                    carp "version difference: index is $i while search is $s";
120                    return;
121            }
122    
123    }
124    
125    =head2 index_document
126    
127    Quick way to add simple data to index.
128    
129      $i->index_document($key, $data);
130      $i->index_document( 42 => 'meaning of life' );
131    
132    =cut
133    
134    sub index_document {
135            my $self = shift;
136    
137            my %doc = @_;
138    
139            foreach my $id (keys %doc) {
140                    $self->_create_doc(
141                            path => $id,
142                            body => $doc{$id},
143                    );
144            }
145    
146            return 1;
147    }
148    
149    =head1 PRIVATE METHODS
150    
151    Private methods implement internals for creating temporary file needed for
152    swish++. You should have no need to call them directly, and they are here
153    just to have documentation.
154    
155    =head2 _init_index
156    
157    Create temporary directory in which files for indexing will be created and
158    start index process.
159    
160      my $i->_init_index || die "can't start indexer";
161    
162    =cut
163    
164    sub _init_index {
165            my $self = shift;
166    
167            $self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!";
168    
169            my $opt = "-v 4";
170    
171            my $open_cmd = '| index '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -';
172    
173            chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!";
174    
175            CORE::open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!";
176    
177            return $self->{'index_fh'};
178    }
179    
180    =head2 _create_doc
181    
182    Create temporary file and pass it's name to swish++
183    
184      $i->_create_doc(
185            path => 'path/to/store/in/index',
186            body => 'data to story in body tag',
187            meta => {
188                    'meta name' => 'data for this meta',
189                    'another' => 'again more data',
190            }
191      );
192    
193    =cut
194    
195    sub _create_doc {
196            my $self = shift;
197    
198            my $arg = {@_};
199    
200            # open indexer if needed
201            $self->{'index_fh'} ||= $self->_init_index;
202    
203            my $path = $self->{'tmp_dir'} || confess "no tmp_dir?";
204    
205            CORE::open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!";
206    
207            print TMP '<html>';
208    
209            if ($arg->{'meta'}) {
210                    confess "not yet implemented";
211            }
212            
213            print TMP '<body>' . ($arg->{'body'} || '') . '</body></html>';
214            
215            close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!";
216    
217            print { $self->{'index_fh'} } $arg->{'path'}."\n";
218    }
219    
220  1;  1;
221  __END__  __END__
222    
# Line 69  __END__ Line 224  __END__
224    
225  None by default.  None by default.
226    
227    =head1 RELATED
228    
229    =head2 Debian
230    
231    Debian version of swish++ is often old (version 5 at moment of this writing
232    while version 6 is available in source code), so this module by default
233    uses executable names B<index> and B<search> for self-compiled version
234    instead of one from Debian package. See L<open> how to specify Debian
235    default binaries B<index++> and B<search++>.
236    
237  =head1 SEE ALSO  =head1 SEE ALSO
238    
239  Mention other useful documentation such as the documentation of  Mention other useful documentation such as the documentation of

Legend:
Removed from v.1  
changed lines
  Added in v.4

  ViewVC Help
Powered by ViewVC 1.1.26