/[SWISH-PlusPlus]/trunk/PlusPlus.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/PlusPlus.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4 - (show annotations)
Fri Dec 3 19:35:02 2004 UTC (19 years, 3 months ago) by dpavlin
File size: 5585 byte(s)
simple indexing works

1 package SWISH::PlusPlus;
2
3 use 5.008004;
4 use strict;
5 use warnings;
6
7 our $VERSION = '0.02';
8
9 use Carp;
10 use File::Temp qw/ tempdir /;
11
12 =head1 NAME
13
14 SWISH::PlusPlus - Perl extension SWISH++
15
16 =head1 SYNOPSIS
17
18 use SWISH::PlusPlus;
19 blah blah blah
20
21 =head1 DESCRIPTION
22
23 This is perl module to use SWISH++ indexer by Paul J. Lucas. SWISH++ is
24 rewrite of swish-e in C++ which is extremly fast (thank to mmap), but without
25 support for properties (which this module tries to fix).
26
27 Implementation of this module is crafted after L<Plucene::Simple> and it
28 should be easy to replace Plucene with this module for increased
29 performance. However, this module is not plug-in replacement.
30
31 =head1 METHODS
32
33 =head2 open
34
35 Create new indexing object.
36
37 my $i = SWISH::PlusPlus->open(
38 index_dir => '/path/to/index',
39 index => 'index++',
40 search => 'search++',
41 );
42
43 Options to open are following:
44
45 =over 5
46
47 =item C<index_dir>
48
49 Path to directory in which index will be created.
50
51 =item C<index>
52
53 Full or partial path to SWISH++ index executable. By default, it's B<index>
54 for self-compiled version. If you use Debian GNU/Linux package specify
55 B<index++>. See C<Debian>.
56
57 =item C<search>
58
59 Full or partial path to SWISH++ search executable. By default, it's B<search>.
60
61 =back
62
63 =cut
64
65 sub open {
66 my $class = shift;
67 my $self = {@_};
68 bless($self, $class);
69
70 foreach (qw(index_dir)) {
71 croak "need $_" unless $self->{$_};
72 }
73
74 if (! -e $self->{'index_dir'}) {
75 mkdir $self->{'index_dir'} || confess "can't create index ",$self->{'index'},": $!";
76 }
77
78 # default executables
79 $self->{'index'} ||= 'index';
80 $self->{'search'} ||= 'search';
81
82 $self ? return $self : return undef;
83 }
84
85
86 =head2 check_bin
87
88 Check if swish++ binaries specified in L<open> are available and verify
89 version signature.
90
91 if ($i->check_bin) {
92 print "swish++ binaries found\n";
93 };
94
95 It will also setup property
96
97 $i->{'version'}
98
99 which you can examine to see version.
100
101 =cut
102
103 sub check_bin {
104 my $self = shift;
105
106 my $i = `$self->{'index'} -V 2>&1` || confess "can't find '",$self->{'index'},"' binary";
107 my $s = `$self->{'search'} -V 2>&1` || confess "can't find '",$self->{'search'},"' binary";
108
109 chomp $i;
110 chomp $s;
111
112 confess $self->{'index'}," binary is not SWISH++" unless ($i =~ m/^SWISH\+\+/);
113 confess $self->{'search'}," binary is not SWISH++" unless ($s =~ m/^SWISH\+\+/);
114
115 if ($i eq $s) {
116 $self->{'version'} = $i;
117 return 1;
118 } else {
119 carp "version difference: index is $i while search is $s";
120 return;
121 }
122
123 }
124
125 =head2 index_document
126
127 Quick way to add simple data to index.
128
129 $i->index_document($key, $data);
130 $i->index_document( 42 => 'meaning of life' );
131
132 =cut
133
134 sub index_document {
135 my $self = shift;
136
137 my %doc = @_;
138
139 foreach my $id (keys %doc) {
140 $self->_create_doc(
141 path => $id,
142 body => $doc{$id},
143 );
144 }
145
146 return 1;
147 }
148
149 =head1 PRIVATE METHODS
150
151 Private methods implement internals for creating temporary file needed for
152 swish++. You should have no need to call them directly, and they are here
153 just to have documentation.
154
155 =head2 _init_index
156
157 Create temporary directory in which files for indexing will be created and
158 start index process.
159
160 my $i->_init_index || die "can't start indexer";
161
162 =cut
163
164 sub _init_index {
165 my $self = shift;
166
167 $self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!";
168
169 my $opt = "-v 4";
170
171 my $open_cmd = '| index '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -';
172
173 chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!";
174
175 CORE::open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!";
176
177 return $self->{'index_fh'};
178 }
179
180 =head2 _create_doc
181
182 Create temporary file and pass it's name to swish++
183
184 $i->_create_doc(
185 path => 'path/to/store/in/index',
186 body => 'data to story in body tag',
187 meta => {
188 'meta name' => 'data for this meta',
189 'another' => 'again more data',
190 }
191 );
192
193 =cut
194
195 sub _create_doc {
196 my $self = shift;
197
198 my $arg = {@_};
199
200 # open indexer if needed
201 $self->{'index_fh'} ||= $self->_init_index;
202
203 my $path = $self->{'tmp_dir'} || confess "no tmp_dir?";
204
205 CORE::open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!";
206
207 print TMP '<html>';
208
209 if ($arg->{'meta'}) {
210 confess "not yet implemented";
211 }
212
213 print TMP '<body>' . ($arg->{'body'} || '') . '</body></html>';
214
215 close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!";
216
217 print { $self->{'index_fh'} } $arg->{'path'}."\n";
218 }
219
220 1;
221 __END__
222
223 =head2 EXPORT
224
225 None by default.
226
227 =head1 RELATED
228
229 =head2 Debian
230
231 Debian version of swish++ is often old (version 5 at moment of this writing
232 while version 6 is available in source code), so this module by default
233 uses executable names B<index> and B<search> for self-compiled version
234 instead of one from Debian package. See L<open> how to specify Debian
235 default binaries B<index++> and B<search++>.
236
237 =head1 SEE ALSO
238
239 Mention other useful documentation such as the documentation of
240 related modules or operating system documentation (such as man pages
241 in UNIX), or any relevant external documentation such as RFCs or
242 standards.
243
244 If you have a mailing list set up for your module, mention it here.
245
246 If you have a web site set up for your module, mention it here.
247
248 =head1 AUTHOR
249
250 Dobrica Pavlinusic, E<lt>dpavlin@E<gt>
251
252 =head1 COPYRIGHT AND LICENSE
253
254 Copyright (C) 2004 by Dobrica Pavlinusic
255
256 This library is free software; you can redistribute it and/or modify
257 it under the same terms as Perl itself, either Perl version 5.8.4 or,
258 at your option, any later version of Perl 5 you may have available.
259
260
261 =cut

  ViewVC Help
Powered by ViewVC 1.1.26