4 |
use strict; |
use strict; |
5 |
use warnings; |
use warnings; |
6 |
|
|
7 |
our $VERSION = '0.05'; |
our $VERSION = '0.10'; |
8 |
|
|
9 |
use Carp; |
use Carp; |
10 |
use File::Temp qw/ tempdir /; |
use File::Temp qw/ tempdir /; |
11 |
|
use BerkeleyDB; |
12 |
#use YAML; |
#use YAML; |
13 |
|
|
14 |
=head1 NAME |
=head1 NAME |
91 |
croak "need $_" unless $self->{$_}; |
croak "need $_" unless $self->{$_}; |
92 |
} |
} |
93 |
|
|
94 |
if (! -e $self->{'index_dir'}) { |
my $index_dir = $self->{'index_dir'}; |
95 |
mkdir $self->{'index_dir'} || confess "can't create index ",$self->{'index'},": $!"; |
|
96 |
|
my $cwd; |
97 |
|
chomp($cwd = `pwd`); |
98 |
|
$self->{'cwd'} = $cwd || carp "can't get cwd!"; |
99 |
|
|
100 |
|
if ($index_dir !~ m#^/#) { |
101 |
|
$index_dir = "$cwd/$index_dir"; |
102 |
|
print STDERR "## full path to index_dir: $index_dir\n" if ($self->{'debug'}); |
103 |
|
$self->{'index_dir'} = $index_dir; |
104 |
|
} |
105 |
|
|
106 |
|
if (! -e $index_dir) { |
107 |
|
mkdir $index_dir || confess "can't create index ",$self->{'index'},": $!"; |
108 |
} |
} |
109 |
|
|
110 |
# default executables |
# default executables |
111 |
$self->{'index'} ||= 'index'; |
$self->{'index'} ||= 'index'; |
112 |
$self->{'search'} ||= 'search'; |
$self->{'search'} ||= 'search'; |
113 |
|
|
114 |
print STDERR "## new index_dir: ",$self->{'index_dir'}," index: ",$self->{'index'}, " search: ",$self->{'search'},"\n" if ($self->{'debug'}); |
print STDERR "## new index_dir: ",$index_dir," index: ",$self->{'index'}, " search: ",$self->{'search'},"\n" if ($self->{'debug'}); |
115 |
|
|
116 |
$self ? return $self : return undef; |
$self ? return $self : return undef; |
117 |
} |
} |
147 |
confess $self->{'search'}," binary is not SWISH++" unless ($s =~ m/^SWISH\+\+/); |
confess $self->{'search'}," binary is not SWISH++" unless ($s =~ m/^SWISH\+\+/); |
148 |
|
|
149 |
if ($i eq $s) { |
if ($i eq $s) { |
150 |
|
$i =~ s/^SWISH\+\+\s+// || confess "can't strip SWISH++ from version"; |
151 |
$self->{'version'} = $i; |
$self->{'version'} = $i; |
152 |
return 1; |
return 1; |
153 |
} else { |
} else { |
222 |
|
|
223 |
my $query = shift || return; |
my $query = shift || return; |
224 |
|
|
225 |
$self->_close_index; |
$self->finish_update; |
226 |
|
$self->_tie_meta_db(DB_RDONLY); |
227 |
|
|
228 |
my @results; |
my @results; |
229 |
|
|
230 |
# escape double quotes in query for shell |
# escape double quotes in query for shell |
231 |
$query =~ s/"/\\"/g; |
$query =~ s/"/\\"/g; |
232 |
|
|
233 |
my $open_cmd = $self->{'search'}." -i ".$self->{'index_dir'}.'/index "'.$query.'" |'; |
my $open_cmd = $self->{'search'} . |
234 |
print STDERR "## search $open_cmd\n" if ($self->{'debug'}); |
' -i ' . $self->{'index_dir'}.'/index' . |
235 |
|
' "' . $query . '"'. |
236 |
|
' |'; |
237 |
|
print STDERR "## search: $open_cmd\n" if ($self->{'debug'}); |
238 |
|
|
239 |
open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!"; |
open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!"; |
240 |
while(<SEARCH>) { |
my $l; |
241 |
next if (/^#/); |
while($l = <SEARCH>) { |
242 |
chomp; |
next if ($l =~ /^#/); |
243 |
print STDERR "## $_\n" if ($self->{'debug'}); |
chomp($l); |
244 |
my ($rank,$path,$size,$title) = split(/ /,$_,4); |
print STDERR "## $l\n" if ($self->{'debug'}); |
245 |
|
my ($rank,$path,$size,$title) = split(/ /,$l,4); |
246 |
|
$path =~ s#^\./##; # strip from path |
247 |
push @results, { |
push @results, { |
248 |
rank => $rank, |
rank => $rank, |
249 |
path => $path, |
path => $path, |
259 |
return @results; |
return @results; |
260 |
} |
} |
261 |
|
|
262 |
|
=head2 property |
263 |
|
|
264 |
|
Return stored meta property from result or result path. |
265 |
|
|
266 |
|
print $i->property('path', 'title'); |
267 |
|
print $i->property($res->{'path'}, 'title'); |
268 |
|
|
269 |
|
=cut |
270 |
|
|
271 |
|
sub property { |
272 |
|
my $self = shift; |
273 |
|
|
274 |
|
my ($path,$meta) = @_; |
275 |
|
|
276 |
|
if ($path =~ m/^HASH/) { |
277 |
|
$path = $path->{'path'} || confess "can't find path in input data"; |
278 |
|
} |
279 |
|
|
280 |
|
my $val = $self->{'meta_db'}->{"$path-$meta"}; |
281 |
|
|
282 |
|
print STDERR "## property $path-$meta: ",($val || 'undef'),"\n" if ($self->{'debug'}); |
283 |
|
return $val; |
284 |
|
} |
285 |
|
|
286 |
|
=head2 finish_update |
287 |
|
|
288 |
|
This method will close index. |
289 |
|
|
290 |
|
$i->finish_update; |
291 |
|
|
292 |
|
It will be called on DESTROY when $i goes out of scope. |
293 |
|
|
294 |
|
=cut |
295 |
|
|
296 |
|
sub finish_update { |
297 |
|
my $self = shift; |
298 |
|
|
299 |
|
print STDERR "## finish_update\n" if ($self->{'debug'}); |
300 |
|
|
301 |
|
$self->_close_index && $self->_untie_meta_db; |
302 |
|
} |
303 |
|
|
304 |
|
sub DESTROY { |
305 |
|
my $self = shift; |
306 |
|
$self->finish_update; |
307 |
|
} |
308 |
|
|
309 |
=head1 PRIVATE METHODS |
=head1 PRIVATE METHODS |
310 |
|
|
311 |
Private methods implement internals for creating temporary file needed for |
Private methods implement internals for creating temporary file needed for |
326 |
sub _init_indexer { |
sub _init_indexer { |
327 |
my $self = shift; |
my $self = shift; |
328 |
|
|
329 |
$self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!"; |
return if ($self->{'_index_fh'}); |
330 |
|
|
331 |
|
my $tmp_dir = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!"; |
332 |
|
$self->{'tmp_dir'} = $tmp_dir; |
333 |
|
|
334 |
chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!"; |
chdir $tmp_dir || confess "can't chdir to ".$tmp_dir.": $!"; |
335 |
|
|
336 |
my $opt = "-v 4"; |
print STDERR "## tmp_dir: $tmp_dir" if ($self->{'debug'}); |
337 |
|
|
338 |
|
my $opt = "-v " . ($self->{'debug'} || '0'); |
339 |
|
|
340 |
unless ($self->{'use_stopwrods'}) { |
unless ($self->{'use_stopwrods'}) { |
341 |
open(STOP, '>', "_stopwords_") || carp "can't create empty stopword file, skipping\n"; |
open(STOP, '>', "_stopwords_") || carp "can't create empty stopword file, skipping\n"; |
344 |
$opt .= " -s _stopwords_"; |
$opt .= " -s _stopwords_"; |
345 |
} |
} |
346 |
|
|
347 |
my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -'; |
my $index_dir = $self->{'index_dir'} || confess "no index_dir?"; |
348 |
|
|
349 |
|
my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$index_dir.'/index -'; |
350 |
|
|
351 |
|
print STDERR "## init_indexer: $open_cmd\n" if ($self->{'debug'}); |
352 |
|
|
353 |
open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!"; |
open($self->{'_index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!"; |
354 |
|
|
355 |
|
chdir $self->{'cwd'} || confess "can't chdir to ".$self->{'cwd'}.": $!"; |
356 |
|
|
357 |
return $self->{'index_fh'}; |
$self->_tie_meta_db(DB_CREATE); |
358 |
|
|
359 |
|
return $self->{'_index_fh'}; |
360 |
|
} |
361 |
|
|
362 |
|
=head2 _tie_meta_db |
363 |
|
|
364 |
|
Open BerkeleyDB database with meta properties. |
365 |
|
|
366 |
|
$i->_tie_meta_db(DB_CREATE); |
367 |
|
$i->_tie_meta_db(DB_RDONLY); |
368 |
|
|
369 |
|
} |
370 |
|
|
371 |
|
=cut |
372 |
|
|
373 |
|
sub _tie_meta_db { |
374 |
|
my $self = shift; |
375 |
|
|
376 |
|
my $flags = shift || confess "need DB_CREATE or DB_RDONLY"; |
377 |
|
|
378 |
|
return if ($self->{'_meta_db_flags'} && $self->{'_meta_db_flags'} == $flags); |
379 |
|
|
380 |
|
print STDERR "## _tie_meta_db($flags)\n" if ($self->{'debug'}); |
381 |
|
|
382 |
|
$self->_untie_meta_db; |
383 |
|
$self->{'_meta_db_flags'} = $flags; |
384 |
|
|
385 |
|
my $file = $self->{'index_dir'}.'/meta.db'; |
386 |
|
|
387 |
|
tie %{$self->{'meta_db'}}, "BerkeleyDB::Hash", |
388 |
|
-Filename => $file, |
389 |
|
-Flags => $flags |
390 |
|
or confess "cannot open $file: $! $BerkeleyDB::Error\n" ; |
391 |
|
|
392 |
|
return 1; |
393 |
|
} |
394 |
|
|
395 |
|
=head2 _untie_meta_db |
396 |
|
|
397 |
|
Close BerkeleyDB database with meta properties. |
398 |
|
|
399 |
|
$i->_untie_meta_db |
400 |
|
|
401 |
|
=cut |
402 |
|
|
403 |
|
sub _untie_meta_db { |
404 |
|
my $self = shift; |
405 |
|
|
406 |
|
return unless ($self->{'meta_db'}); |
407 |
|
|
408 |
|
print STDERR "## _untie_meta_db\n" if ($self->{'debug'}); |
409 |
|
untie %{$self->{'meta_db'}} || confess "can't untie!"; |
410 |
|
undef $self->{'meta_db'}; |
411 |
|
undef $self->{'_meta_db_flags'}; |
412 |
|
|
413 |
|
return 1; |
414 |
} |
} |
415 |
|
|
416 |
=head2 _create_doc |
=head2 _create_doc |
437 |
my $arg = {@_}; |
my $arg = {@_}; |
438 |
|
|
439 |
# open indexer if needed |
# open indexer if needed |
440 |
$self->{'index_fh'} ||= $self->_init_indexer; |
$self->_init_indexer; |
441 |
|
|
442 |
my $path = $self->{'tmp_dir'} || confess "no tmp_dir?"; |
my $path = $self->{'tmp_dir'} || confess "no tmp_dir?"; |
443 |
|
my $id = $arg->{'path'} || confess "no path?"; |
444 |
|
$path .= "/$id"; |
445 |
|
|
446 |
|
print STDERR "## _create_doc: $path\n" if ($self->{'debug'}); |
447 |
|
|
448 |
open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!"; |
open(TMP, '>', $path) || die "can't create temp file $path: $!"; |
449 |
|
|
450 |
print TMP '<html><head>'; |
print TMP '<html><head>'; |
451 |
|
|
456 |
my $content = $arg->{'meta'}->{$name}; |
my $content = $arg->{'meta'}->{$name}; |
457 |
print TMP qq{<meta name="$name" content="$content">}; |
print TMP qq{<meta name="$name" content="$content">}; |
458 |
$arg->{'body'} .= " $content" if ($self->{'meta_in_body'}); |
$arg->{'body'} .= " $content" if ($self->{'meta_in_body'}); |
459 |
|
$self->{'meta_db'}->{"$id-$name"} = $content; |
460 |
} |
} |
461 |
} |
} |
462 |
|
|
463 |
if (defined($arg->{'title'})) { |
my $title = $arg->{'title'}; |
464 |
print TMP '<title>' . ($arg->{'title'} || '') . '</title>'; |
if (defined($title)) { |
465 |
$arg->{'body'} .= " ".$arg->{'title'} if ($self->{'meta_in_body'}); |
print TMP "<title>$title</title>"; |
466 |
|
$arg->{'body'} .= " $title" if ($self->{'meta_in_body'}); |
467 |
|
$self->{'meta_db'}->{"$id-title"} = $title; |
468 |
} |
} |
469 |
|
|
470 |
print TMP '</head><body>' . $arg->{'body'} . '</body></html>'; |
print TMP '</head><body>' . $arg->{'body'} . '</body></html>'; |
471 |
|
|
472 |
close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!"; |
close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!"; |
473 |
|
|
474 |
print { $self->{'index_fh'} } $arg->{'path'}."\n"; |
print { $self->{'_index_fh'} } "$id\n"; |
475 |
} |
} |
476 |
|
|
477 |
=head2 _close_index |
=head2 _close_index |
487 |
sub _close_index { |
sub _close_index { |
488 |
my $self = shift; |
my $self = shift; |
489 |
|
|
490 |
return unless ($self->{'index_fh'}); |
return unless ($self->{'_index_fh'}); |
491 |
|
|
492 |
print STDERR "## close index\n" if ($self->{'debug'}); |
print STDERR "## close index\n" if ($self->{'debug'}); |
493 |
|
|
494 |
close($self->{'index_fh'}); |
close($self->{'_index_fh'}) || confess "can't close index: $!"; |
495 |
undef $self->{'index_fh'}; |
undef $self->{'_index_fh'}; |
496 |
|
|
497 |
|
return 1; |
498 |
} |
} |
499 |
|
|
500 |
1; |
1; |