/[wait]/branches/CPAN/lib/WAIT/InvertedIndex.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /branches/CPAN/lib/WAIT/InvertedIndex.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 12 by unknown, Fri Apr 28 15:41:10 2000 UTC revision 13 by ulpfr, Fri Apr 28 15:42:44 2000 UTC
# Line 1  Line 1 
1  #                              -*- Mode: Perl -*-  #                              -*- Mode: Cperl -*-
2  # InvertedIndex.pm --  # InvertedIndex.pm --
3  # ITIID           : $ITI$ $Header $__Header$  # ITIID           : $ITI$ $Header $__Header$
4  # Author          : Ulrich Pfeifer  # Author          : Ulrich Pfeifer
5  # Created On      : Thu Aug  8 13:05:10 1996  # Created On      : Thu Aug  8 13:05:10 1996
# Line 7  Line 7 
7  # Last Modified On: Sun Nov 22 18:44:42 1998  # Last Modified On: Sun Nov 22 18:44:42 1998
8  # Language        : CPerl  # Language        : CPerl
9  # Status          : Unknown, Use with caution!  # Status          : Unknown, Use with caution!
10  #  #
11  # Copyright (c) 1996-1997, Ulrich Pfeifer  # Copyright (c) 1996-1997, Ulrich Pfeifer
12  #  #
13    
14  package WAIT::InvertedIndex;  package WAIT::InvertedIndex;
15  use strict;  use strict;
# Line 63  sub _split_pos { Line 63  sub _split_pos {
63  sub _xfiltergen {  sub _xfiltergen {
64    my $filter = pop @_;    my $filter = pop @_;
65    
66    if ($filter eq 'stop') {      # avoid the slow stopword elimination  # Oops, we cannot overrule the user's choice. Other filters may kill
67      return _xfiltergen(@_);            # it's cheaper to look them up afterwards  # stopwords, such as isotr clobbers "isn't" to "isnt".
68    }  
69    #  if ($filter eq 'stop') {      # avoid the slow stopword elimination
70    #    return _xfiltergen(@_);            # it's cheaper to look them up afterwards
71    #  }
72    if (@_) {    if (@_) {
73      if ($filter =~ /^split(\d*)/) {      if ($filter =~ /^split(\d*)/) {
74        if ($1) {        if ($1) {
# Line 148  sub insert { Line 151  sub insert {
151    my $self  = shift;    my $self  = shift;
152    my $key   = shift;    my $key   = shift;
153    my %occ;    my %occ;
154      
155    defined $self->{db} or $self->open;    defined $self->{db} or $self->open;
156    grep $occ{$_}++, &{$self->{func}}(@_);    grep $occ{$_}++, &{$self->{func}}(@_);
157    my ($word, $noc);    my ($word, $noc);
# Line 160  sub insert { Line 163  sub insert {
163      } else {      } else {
164        $self->{cdict}->{$O,$word} = 1;        $self->{cdict}->{$O,$word} = 1;
165        $self->{cache}->{$word}  = pack 'w2', $key, $noc;        $self->{cache}->{$word}  = pack 'w2', $key, $noc;
166      }      }
167      $self->{cached}++;      $self->{cached}++;
168    }    }
169    $self->sync if $self->{cached} > 100_000;    $self->sync if $self->{cached} > 100_000;
# Line 277  sub parse { Line 280  sub parse {
280    &{$self->{func}}(@_);    &{$self->{func}}(@_);
281  }  }
282    
283    sub keys {
284      my $self  = shift;
285    
286      defined $self->{db} or $self->open;
287      keys %{$self->{db}};
288    }
289    
290  sub search_prefix {  sub search_prefix {
291    my $self  = shift;    my $self  = shift;
292    
# Line 298  sub search_raw { Line 308  sub search_raw {
308    for (keys %occ) {    for (keys %occ) {
309      if (defined $self->{db}->{$_}) {      if (defined $self->{db}->{$_}) {
310        my %post = unpack 'w*', $self->{db}->{$_};        my %post = unpack 'w*', $self->{db}->{$_};
311        my $idf = log($self->{records}/$self->{db}->{$O,$_});        my $idf = log($self->{records}/($self->{db}->{$O,$_} || 1));
312        my $did;        my $did;
313        for $did (keys %post) {        for $did (keys %post) {
314          $score{$did} = 0 unless defined $score{$did}; # perl -w          $score{$did} = 0 unless defined $score{$did}; # perl -w
# Line 314  sub sync { Line 324  sub sync {
324    my $self = shift;    my $self = shift;
325    
326    if ($self->{mode} & O_RDWR) {    if ($self->{mode} & O_RDWR) {
327      print STDERR "\aFlushing $self->{cached} postings\n";      print STDERR "Flushing $self->{cached} postings\n";
328      while (my($key, $value) = each %{$self->{cache}}) {      while (my($key, $value) = each %{$self->{cache}}) {
329        $self->{db}->{$key} .= $value;        $self->{db}->{$key} .= $value;
330        #delete $self->{cache}->{$key};        #delete $self->{cache}->{$key};

Legend:
Removed from v.12  
changed lines
  Added in v.13

  ViewVC Help
Powered by ViewVC 1.1.26