/[SWISH-PlusPlus]/trunk/PlusPlus.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/PlusPlus.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4 - (hide annotations)
Fri Dec 3 19:35:02 2004 UTC (19 years, 3 months ago) by dpavlin
File size: 5585 byte(s)
simple indexing works

1 dpavlin 1 package SWISH::PlusPlus;
2    
3     use 5.008004;
4     use strict;
5     use warnings;
6    
7 dpavlin 3 our $VERSION = '0.02';
8 dpavlin 1
9     use Carp;
10 dpavlin 4 use File::Temp qw/ tempdir /;
11 dpavlin 1
12     =head1 NAME
13    
14     SWISH::PlusPlus - Perl extension SWISH++
15    
16     =head1 SYNOPSIS
17    
18     use SWISH::PlusPlus;
19     blah blah blah
20    
21     =head1 DESCRIPTION
22    
23     This is perl module to use SWISH++ indexer by Paul J. Lucas. SWISH++ is
24 dpavlin 3 rewrite of swish-e in C++ which is extremly fast (thank to mmap), but without
25     support for properties (which this module tries to fix).
26 dpavlin 1
27 dpavlin 3 Implementation of this module is crafted after L<Plucene::Simple> and it
28     should be easy to replace Plucene with this module for increased
29     performance. However, this module is not plug-in replacement.
30    
31 dpavlin 1 =head1 METHODS
32    
33     =head2 open
34    
35     Create new indexing object.
36    
37 dpavlin 3 my $i = SWISH::PlusPlus->open(
38     index_dir => '/path/to/index',
39     index => 'index++',
40     search => 'search++',
41 dpavlin 1 );
42    
43     Options to open are following:
44    
45     =over 5
46    
47 dpavlin 3 =item C<index_dir>
48    
49     Path to directory in which index will be created.
50    
51 dpavlin 1 =item C<index>
52    
53 dpavlin 3 Full or partial path to SWISH++ index executable. By default, it's B<index>
54     for self-compiled version. If you use Debian GNU/Linux package specify
55     B<index++>. See C<Debian>.
56 dpavlin 1
57 dpavlin 3 =item C<search>
58    
59     Full or partial path to SWISH++ search executable. By default, it's B<search>.
60    
61 dpavlin 1 =back
62    
63     =cut
64    
65 dpavlin 3 sub open {
66 dpavlin 1 my $class = shift;
67     my $self = {@_};
68     bless($self, $class);
69    
70 dpavlin 3 foreach (qw(index_dir)) {
71 dpavlin 1 croak "need $_" unless $self->{$_};
72     }
73    
74 dpavlin 3 if (! -e $self->{'index_dir'}) {
75     mkdir $self->{'index_dir'} || confess "can't create index ",$self->{'index'},": $!";
76 dpavlin 1 }
77    
78 dpavlin 3 # default executables
79     $self->{'index'} ||= 'index';
80     $self->{'search'} ||= 'search';
81    
82 dpavlin 1 $self ? return $self : return undef;
83     }
84    
85    
86 dpavlin 3 =head2 check_bin
87    
88     Check if swish++ binaries specified in L<open> are available and verify
89     version signature.
90    
91     if ($i->check_bin) {
92     print "swish++ binaries found\n";
93     };
94    
95     It will also setup property
96    
97     $i->{'version'}
98    
99     which you can examine to see version.
100    
101     =cut
102    
103     sub check_bin {
104     my $self = shift;
105    
106     my $i = `$self->{'index'} -V 2>&1` || confess "can't find '",$self->{'index'},"' binary";
107     my $s = `$self->{'search'} -V 2>&1` || confess "can't find '",$self->{'search'},"' binary";
108    
109     chomp $i;
110     chomp $s;
111    
112     confess $self->{'index'}," binary is not SWISH++" unless ($i =~ m/^SWISH\+\+/);
113     confess $self->{'search'}," binary is not SWISH++" unless ($s =~ m/^SWISH\+\+/);
114    
115     if ($i eq $s) {
116     $self->{'version'} = $i;
117     return 1;
118     } else {
119     carp "version difference: index is $i while search is $s";
120     return;
121     }
122    
123     }
124    
125 dpavlin 4 =head2 index_document
126    
127     Quick way to add simple data to index.
128    
129     $i->index_document($key, $data);
130     $i->index_document( 42 => 'meaning of life' );
131    
132     =cut
133    
134     sub index_document {
135     my $self = shift;
136    
137     my %doc = @_;
138    
139     foreach my $id (keys %doc) {
140     $self->_create_doc(
141     path => $id,
142     body => $doc{$id},
143     );
144     }
145    
146     return 1;
147     }
148    
149     =head1 PRIVATE METHODS
150    
151     Private methods implement internals for creating temporary file needed for
152     swish++. You should have no need to call them directly, and they are here
153     just to have documentation.
154    
155     =head2 _init_index
156    
157     Create temporary directory in which files for indexing will be created and
158     start index process.
159    
160     my $i->_init_index || die "can't start indexer";
161    
162     =cut
163    
164     sub _init_index {
165     my $self = shift;
166    
167     $self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!";
168    
169     my $opt = "-v 4";
170    
171     my $open_cmd = '| index '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -';
172    
173     chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!";
174    
175     CORE::open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!";
176    
177     return $self->{'index_fh'};
178     }
179    
180     =head2 _create_doc
181    
182     Create temporary file and pass it's name to swish++
183    
184     $i->_create_doc(
185     path => 'path/to/store/in/index',
186     body => 'data to story in body tag',
187     meta => {
188     'meta name' => 'data for this meta',
189     'another' => 'again more data',
190     }
191     );
192    
193     =cut
194    
195     sub _create_doc {
196     my $self = shift;
197    
198     my $arg = {@_};
199    
200     # open indexer if needed
201     $self->{'index_fh'} ||= $self->_init_index;
202    
203     my $path = $self->{'tmp_dir'} || confess "no tmp_dir?";
204    
205     CORE::open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!";
206    
207     print TMP '<html>';
208    
209     if ($arg->{'meta'}) {
210     confess "not yet implemented";
211     }
212    
213     print TMP '<body>' . ($arg->{'body'} || '') . '</body></html>';
214    
215     close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!";
216    
217     print { $self->{'index_fh'} } $arg->{'path'}."\n";
218     }
219    
220 dpavlin 1 1;
221     __END__
222    
223     =head2 EXPORT
224    
225     None by default.
226    
227 dpavlin 3 =head1 RELATED
228    
229     =head2 Debian
230    
231     Debian version of swish++ is often old (version 5 at moment of this writing
232     while version 6 is available in source code), so this module by default
233     uses executable names B<index> and B<search> for self-compiled version
234     instead of one from Debian package. See L<open> how to specify Debian
235     default binaries B<index++> and B<search++>.
236    
237 dpavlin 1 =head1 SEE ALSO
238    
239     Mention other useful documentation such as the documentation of
240     related modules or operating system documentation (such as man pages
241     in UNIX), or any relevant external documentation such as RFCs or
242     standards.
243    
244     If you have a mailing list set up for your module, mention it here.
245    
246     If you have a web site set up for your module, mention it here.
247    
248     =head1 AUTHOR
249    
250     Dobrica Pavlinusic, E<lt>dpavlin@E<gt>
251    
252     =head1 COPYRIGHT AND LICENSE
253    
254     Copyright (C) 2004 by Dobrica Pavlinusic
255    
256     This library is free software; you can redistribute it and/or modify
257     it under the same terms as Perl itself, either Perl version 5.8.4 or,
258     at your option, any later version of Perl 5 you may have available.
259    
260    
261     =cut

  ViewVC Help
Powered by ViewVC 1.1.26