16 |
use Fcntl; |
use Fcntl; |
17 |
use WAIT::Filter; |
use WAIT::Filter; |
18 |
use Carp; |
use Carp; |
19 |
use vars qw(%FUNC); |
use vars qw(%FUNC $VERSION); |
20 |
|
|
21 |
|
$VERSION = "1.801"; # others test if we are loaded by checking $VERSION |
22 |
|
|
23 |
# The dictionary has three different key types: |
# The dictionary has three different key types: |
24 |
# 'o'.$word |
# 'o'.$word |
249 |
# inverse document frequence gives the score for a term. This sort |
# inverse document frequence gives the score for a term. This sort |
250 |
# order can be exploited for tuning of single term queries. |
# order can be exploited for tuning of single term queries. |
251 |
|
|
252 |
|
for my $did (keys %$post) { # sanity check |
253 |
|
unless ($self->{db}->{"m". $did}) { |
254 |
|
warn "Warning from WAIT: DIVZERO threat from did[$did] post[$post->{$did}]"; |
255 |
|
$self->{db}->{"m". $did} = 1; # fails if we have not opened for writing |
256 |
|
} |
257 |
|
} |
258 |
for my $did (sort { $post->{$b} / $self->{db}->{'m'. $b} |
for my $did (sort { $post->{$b} / $self->{db}->{'m'. $b} |
259 |
<=> |
<=> |
260 |
$post->{$a} / $self->{db}->{'m'. $a} |
$post->{$a} / $self->{db}->{'m'. $a} |
282 |
|
|
283 |
grep $occ{$_}++, &{$self->{func}}(@_); |
grep $occ{$_}++, &{$self->{func}}(@_); |
284 |
|
|
285 |
|
# Be prepared for "Odd number of elements in hash assignment" |
286 |
|
local $SIG{__WARN__} = sub { |
287 |
|
my $warning = shift; |
288 |
|
chomp $warning; |
289 |
|
warn "Catching warning[$warning] during delete of key[$key]"; |
290 |
|
}; |
291 |
for (keys %occ) {# may reorder posting list |
for (keys %occ) {# may reorder posting list |
292 |
my %post = unpack 'w*', $db->{'p'.$_}; |
my %post = unpack 'w*', $db->{'p'.$_}; |
293 |
delete $post{$key}; |
delete $post{$key}; |
481 |
} |
} |
482 |
|
|
483 |
for (my $i=1; $i<@res; $i+=2) { |
for (my $i=1; $i<@res; $i+=2) { |
484 |
$res[$i] /= $self->{db}->{'m'. $res[$i-1]} / $idf; |
# $res[$i] /= $self->{db}->{'m'. $res[$i-1]} / $idf; |
485 |
|
# above was written badly, allows two DIV_ZERO problems. |
486 |
|
my $maxtf = $self->{db}->{"m". $res[$i-1]}; |
487 |
|
unless ($maxtf) { |
488 |
|
warn "WAIT-Warning: Averting DIVZERO for i[$i] \$res[\$i-1][$res[$i-1]] term[$term]"; |
489 |
|
$maxtf = 1; |
490 |
|
} |
491 |
|
$res[$i] = ($res[$i] / $maxtf) * $idf; |
492 |
} |
} |
493 |
|
|
494 |
return @res |
return @res |
692 |
if ($self->{mode} & O_RDWR) { |
if ($self->{mode} & O_RDWR) { |
693 |
print STDERR "Flushing $self->{cached} postings\n" if $self->{cached}; |
print STDERR "Flushing $self->{cached} postings\n" if $self->{cached}; |
694 |
while (my($key, $value) = each %{$self->{cache}}) { |
while (my($key, $value) = each %{$self->{cache}}) { |
695 |
|
$self->{db}->{"p". $key} ||= ""; |
696 |
if ($self->{reorg}) { |
if ($self->{reorg}) { |
697 |
$self->{db}->{'p'.$key} = $self->sort_postings($self->{db}->{'p'.$key} |
$self->{db}->{'p'.$key} = $self->sort_postings($self->{db}->{'p'.$key} |
698 |
. $value); |
. $value); |