/[wait]/cvs-head/lib/WAIT/InvertedIndex.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /cvs-head/lib/WAIT/InvertedIndex.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 29 by ulpfr, Sat Nov 11 16:58:53 2000 UTC revision 30 by laperla, Sun Nov 12 01:23:47 2000 UTC
# Line 16  use DB_File; Line 16  use DB_File;
16  use Fcntl;  use Fcntl;
17  use WAIT::Filter;  use WAIT::Filter;
18  use Carp;  use Carp;
19  use vars qw(%FUNC);  use vars qw(%FUNC $VERSION);
20    
21    $VERSION = "1.801"; # others test if we are loaded by checking $VERSION
22    
23  # The dictionary has three different key types:  # The dictionary has three different key types:
24  #  'o'.$word  #  'o'.$word
# Line 247  sub sort_postings { Line 249  sub sort_postings {
249    # inverse document frequence gives the score for a term.  This sort    # inverse document frequence gives the score for a term.  This sort
250    # order can be exploited for tuning of single term queries.    # order can be exploited for tuning of single term queries.
251    
252      for my $did (keys %$post) { # sanity check
253        unless ($self->{db}->{"m". $did}) {
254          warn "Warning from WAIT: DIVZERO threat from did[$did] post[$post->{$did}]";
255          $self->{db}->{"m". $did} = 1; # fails if we have not opened for writing
256        }
257      }
258    for my $did (sort {    $post->{$b} / $self->{db}->{'m'. $b}    for my $did (sort {    $post->{$b} / $self->{db}->{'m'. $b}
259                                        <=>                                        <=>
260                           $post->{$a} / $self->{db}->{'m'. $a}                           $post->{$a} / $self->{db}->{'m'. $a}
# Line 274  sub delete { Line 282  sub delete {
282    
283    grep $occ{$_}++, &{$self->{func}}(@_);    grep $occ{$_}++, &{$self->{func}}(@_);
284    
285      # Be prepared for "Odd number of elements in hash assignment"
286      local $SIG{__WARN__} = sub {
287        my $warning = shift;
288        chomp $warning;
289        warn "Catching warning[$warning] during delete of key[$key]";
290      };
291    for (keys %occ) {# may reorder posting list    for (keys %occ) {# may reorder posting list
292      my %post = unpack 'w*', $db->{'p'.$_};      my %post = unpack 'w*', $db->{'p'.$_};
293      delete $post{$key};      delete $post{$key};
# Line 467  sub search_raw { Line 481  sub search_raw {
481      }      }
482    
483      for (my $i=1; $i<@res; $i+=2) {      for (my $i=1; $i<@res; $i+=2) {
484        $res[$i] /= $self->{db}->{'m'. $res[$i-1]} / $idf;        # $res[$i] /= $self->{db}->{'m'. $res[$i-1]} / $idf;
485          # above was written badly, allows two DIV_ZERO problems.
486          my $maxtf = $self->{db}->{"m". $res[$i-1]};
487          unless ($maxtf) {
488            warn "WAIT-Warning: Averting DIVZERO for i[$i] \$res[\$i-1][$res[$i-1]] term[$term]";
489            $maxtf = 1;
490          }
491          $res[$i] = ($res[$i] / $maxtf) * $idf;
492      }      }
493    
494      return @res      return @res
# Line 671  sub sync { Line 692  sub sync {
692    if ($self->{mode} & O_RDWR) {    if ($self->{mode} & O_RDWR) {
693      print STDERR "Flushing $self->{cached} postings\n" if $self->{cached};      print STDERR "Flushing $self->{cached} postings\n" if $self->{cached};
694      while (my($key, $value) = each %{$self->{cache}}) {      while (my($key, $value) = each %{$self->{cache}}) {
695          $self->{db}->{"p". $key} ||= "";
696        if ($self->{reorg}) {        if ($self->{reorg}) {
697          $self->{db}->{'p'.$key} = $self->sort_postings($self->{db}->{'p'.$key}          $self->{db}->{'p'.$key} = $self->sort_postings($self->{db}->{'p'.$key}
698                                                     . $value);                                                     . $value);

Legend:
Removed from v.29  
changed lines
  Added in v.30

  ViewVC Help
Powered by ViewVC 1.1.26