7 |
our $VERSION = '0.02'; |
our $VERSION = '0.02'; |
8 |
|
|
9 |
use Carp; |
use Carp; |
10 |
|
use File::Temp qw/ tempdir /; |
11 |
|
|
12 |
=head1 NAME |
=head1 NAME |
13 |
|
|
122 |
|
|
123 |
} |
} |
124 |
|
|
125 |
|
=head2 index_document |
126 |
|
|
127 |
|
Quick way to add simple data to index. |
128 |
|
|
129 |
|
$i->index_document($key, $data); |
130 |
|
$i->index_document( 42 => 'meaning of life' ); |
131 |
|
|
132 |
|
=cut |
133 |
|
|
134 |
|
sub index_document { |
135 |
|
my $self = shift; |
136 |
|
|
137 |
|
my %doc = @_; |
138 |
|
|
139 |
|
foreach my $id (keys %doc) { |
140 |
|
$self->_create_doc( |
141 |
|
path => $id, |
142 |
|
body => $doc{$id}, |
143 |
|
); |
144 |
|
} |
145 |
|
|
146 |
|
return 1; |
147 |
|
} |
148 |
|
|
149 |
|
=head1 PRIVATE METHODS |
150 |
|
|
151 |
|
Private methods implement internals for creating temporary file needed for |
152 |
|
swish++. You should have no need to call them directly, and they are here |
153 |
|
just to have documentation. |
154 |
|
|
155 |
|
=head2 _init_index |
156 |
|
|
157 |
|
Create temporary directory in which files for indexing will be created and |
158 |
|
start index process. |
159 |
|
|
160 |
|
my $i->_init_index || die "can't start indexer"; |
161 |
|
|
162 |
|
=cut |
163 |
|
|
164 |
|
sub _init_index { |
165 |
|
my $self = shift; |
166 |
|
|
167 |
|
$self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!"; |
168 |
|
|
169 |
|
my $opt = "-v 4"; |
170 |
|
|
171 |
|
my $open_cmd = '| index '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -'; |
172 |
|
|
173 |
|
chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!"; |
174 |
|
|
175 |
|
CORE::open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!"; |
176 |
|
|
177 |
|
return $self->{'index_fh'}; |
178 |
|
} |
179 |
|
|
180 |
|
=head2 _create_doc |
181 |
|
|
182 |
|
Create temporary file and pass it's name to swish++ |
183 |
|
|
184 |
|
$i->_create_doc( |
185 |
|
path => 'path/to/store/in/index', |
186 |
|
body => 'data to story in body tag', |
187 |
|
meta => { |
188 |
|
'meta name' => 'data for this meta', |
189 |
|
'another' => 'again more data', |
190 |
|
} |
191 |
|
); |
192 |
|
|
193 |
|
=cut |
194 |
|
|
195 |
|
sub _create_doc { |
196 |
|
my $self = shift; |
197 |
|
|
198 |
|
my $arg = {@_}; |
199 |
|
|
200 |
|
# open indexer if needed |
201 |
|
$self->{'index_fh'} ||= $self->_init_index; |
202 |
|
|
203 |
|
my $path = $self->{'tmp_dir'} || confess "no tmp_dir?"; |
204 |
|
|
205 |
|
CORE::open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!"; |
206 |
|
|
207 |
|
print TMP '<html>'; |
208 |
|
|
209 |
|
if ($arg->{'meta'}) { |
210 |
|
confess "not yet implemented"; |
211 |
|
} |
212 |
|
|
213 |
|
print TMP '<body>' . ($arg->{'body'} || '') . '</body></html>'; |
214 |
|
|
215 |
|
close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!"; |
216 |
|
|
217 |
|
print { $self->{'index_fh'} } $arg->{'path'}."\n"; |
218 |
|
} |
219 |
|
|
220 |
1; |
1; |
221 |
__END__ |
__END__ |
222 |
|
|
234 |
instead of one from Debian package. See L<open> how to specify Debian |
instead of one from Debian package. See L<open> how to specify Debian |
235 |
default binaries B<index++> and B<search++>. |
default binaries B<index++> and B<search++>. |
236 |
|
|
237 |
=head1 SEE ALSO |
=head2 SWISH++ |
238 |
|
|
239 |
Mention other useful documentation such as the documentation of |
Aside from very good rewrite in C++, SWISH++ is fatster because it has |
240 |
related modules or operating system documentation (such as man pages |
claver heuristics about which data in input files are words to index and |
241 |
in UNIX), or any relevant external documentation such as RFCs or |
which are not. It's based on English language and might be best choice if |
242 |
standards. |
you plan to install large amount of long text documents. |
243 |
|
|
244 |
|
However, if you plan to index all data from structured storage (e.g. RDBMS) |
245 |
|
you might want B<all> words from data to end up in index as opposed to just |
246 |
|
those which look like English words. This is especially important if you |
247 |
|
don't plan to index English texts with this module. |
248 |
|
|
249 |
|
With distribution build versions of SWISH++ you might have problems with |
250 |
|
disepearing words. To overcome this problem, you will have to compile and |
251 |
|
configure SWISH++ yourself (because language characteristics are |
252 |
|
compilation-time option). |
253 |
|
|
254 |
|
Compilation of SWISH++ is easy process well described on project's web |
255 |
|
pages. To see my very relaxed sample configuration take a look at C<swish++> |
256 |
|
directory included in distribution. |
257 |
|
|
258 |
If you have a mailing list set up for your module, mention it here. |
=head1 SEE ALSO |
259 |
|
|
260 |
If you have a web site set up for your module, mention it here. |
C<swish++> web site L<http://homepage.mac.com/pauljlucas/software/swish/> |
261 |
|
|
262 |
=head1 AUTHOR |
=head1 AUTHOR |
263 |
|
|
264 |
Dobrica Pavlinusic, E<lt>dpavlin@E<gt> |
Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt> |
265 |
|
|
266 |
=head1 COPYRIGHT AND LICENSE |
=head1 COPYRIGHT AND LICENSE |
267 |
|
|