/[wait]/branches/CPAN/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /branches/CPAN/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 12 by unknown, Fri Apr 28 15:41:10 2000 UTC revision 13 by ulpfr, Fri Apr 28 15:42:44 2000 UTC
# Line 1  Line 1 
1  #                              -*- Mode: Perl -*-  #                              -*- Mode: Cperl -*-
2  # Table.pm --  # Table.pm --
3  # ITIID           : $ITI$ $Header $__Header$  # ITIID           : $ITI$ $Header $__Header$
4  # Author          : Ulrich Pfeifer  # Author          : Ulrich Pfeifer
5  # Created On      : Thu Aug  8 13:05:10 1996  # Created On      : Thu Aug  8 13:05:10 1996
6  # Last Modified By: Ulrich Pfeifer  # Last Modified By: Ulrich Pfeifer
7  # Last Modified On: Sun Nov 22 18:44:37 1998  # Last Modified On: Sun May 30 20:42:30 1999
8  # Language        : CPerl  # Language        : CPerl
9  # Update Count    : 51  # Update Count    : 56
10  # Status          : Unknown, Use with caution!  # Status          : Unknown, Use with caution!
11  #  #
12  # Copyright (c) 1996-1997, Ulrich Pfeifer  # Copyright (c) 1996-1997, Ulrich Pfeifer
13  #  #
14    
15  =head1 NAME  =head1 NAME
16    
# Line 25  WAIT::Table -- Module for maintaining Ta Line 25  WAIT::Table -- Module for maintaining Ta
25  =cut  =cut
26    
27  package WAIT::Table;  package WAIT::Table;
28    
29    use WAIT::Table::Handle ();
30  require WAIT::Parse::Base;  require WAIT::Parse::Base;
31    
32  use strict;  use strict;
33  use Carp;  use Carp;
34    # use autouse Carp => qw( croak($) );
35  use DB_File;  use DB_File;
36  use Fcntl;  use Fcntl;
37    
# Line 35  my $USE_RECNO = 0; Line 39  my $USE_RECNO = 0;
39    
40  =head2 Creating a Table.  =head2 Creating a Table.
41    
42  The constructor WAIT::Table-<gt>new is normally called via the  The constructor WAIT::Table-E<gt>new is normally called via the
43  create_table method of a database handle. This is not enforced, but  create_table method of a database handle. This is not enforced, but
44  creating a table doesn not make any sense unless the table is  creating a table does not make any sense unless the table is
45  registered by the database because the latter implements persistence  registered by the database because the latter implements persistence
46  of the meta data. Registering is done automatically by letting the  of the meta data. Registering is done automatically by letting the
47  database handle create a table.  database handle the creation of a table.
48    
49    my $db = create WAIT::Database name => 'sample';    my $db = WAIT::Database->create(name => 'sample');
50    my $tb = $db->create_table (name     => 'test',    my $tb = $db->create_table(name     => 'test',
51                                attr     => ['docid', 'headline'],                               access   => $access,
52                                layout   => $layout,                               layout   => $layout,
53                                access   => $access,                               attr     => ['docid', 'headline'],
54                               );                              );
55    
56  The constructor returns a handle for the table. This handle is hidden by the  The constructor returns a handle for the table. This handle is hidden by the
57  table module, to prevent direct access if called via Table.  table module, to prevent direct access if called via Table.
58    
59  =over 10  =over 10
60    
61  =item C<access> => I<accesobj>  =item C<access> => I<accessobj>
62    
63  A reference to a acces object for the external parts (attributes) of  A reference to an access object for the external parts (attributes) of
64  tuples. As you may remember, the WAIT System does not enforce that  tuples. As you may remember, the WAIT System does not enforce that
65  objects are completely stored inside the system to avoid duplication.  objects are completely stored inside the system to avoid duplication.
66  There is no (strong) point in storing all you HTML-Documents inside  There is no (strong) point in storing all your HTML documents inside
67  the system when indexing your WWW-Server.  the system when indexing your WWW-Server.
68    
69    The access object is designed to work like as a tied hash. You pass
70    the refernce to the object, not the tied hash though. An example
71    implementation of an access class that works for manpages is
72    WAIT::Document::Nroff.
73    
74    The implementation needs to take into account that WAIT will keep this
75    object in a Data::Dumper or Storable database and re-use it when sman
76    is run. So it is not good enough if we can produce the index with it
77    now, when we create or actively access the table, WAIT also must be
78    able to retrieve documents on its own, when we are in a different
79    context. This happens specifically in a retrieval. To get this working
80    seemlessly, the access-defining class must implement a close method.
81    This method will be called before the Data::Dumper dump takes place.
82    In that moment the access-defining class must get rid of all data
83    structures that cannot be reconstructed via the Data::Dumper dump,
84    such as database handles or C pointers.
85    
86  =item C<file> => I<fname>  =item C<file> => I<fname>
87    
88  The filename of the records file. Files for indexes will have I<fname>  The filename of the records file. Files for indexes will have I<fname>
89  as prefix. I<Mandatory>  as prefix. I<Mandatory>, but usually taken care of by the
90    WAIT::Database handle when the constructor is called via
91    WAIT::Database::create_table().
92    
93  =item C<name> => I<name>  =item C<name> => I<name>
94    
# Line 73  The name of this table. I<Mandatory> Line 96  The name of this table. I<Mandatory>
96    
97  =item C<attr> => [ I<attr> ... ]  =item C<attr> => [ I<attr> ... ]
98    
99  A reference to an array of attribute names. I<Mandatory>  A reference to an array of attribute names. WAIT will keep the
100    contents of these attributes in its table. I<Mandatory>
101    
102  =item C<djk> => [ I<attr> ... ]  =item C<djk> => [ I<attr> ... ]
103    
104  A reference to an array of attribute names which make up the  A reference to an array of attribute names which make up the
105  I<disjointness key>. Don't think about it - i's of no use yet;  I<disjointness key>. Don't think about it - it's of no use yet;
106    
107  =item C<layout> => I<layoutobj>  =item C<layout> => I<layoutobj>
108    
109  A reference to an external parser object. Defaults to anew instance of  A reference to an external parser object. Defaults to a new instance
110  C<WAIT::Parse::Base>  of C<WAIT::Parse::Base>. For an example implementation see
111    WAIT::Parse::Nroff. A layout class can be implemented as a singleton
112    class if you so like.
113    
114    =item C<keyset> => I<keyset>
115    
116  =item C<access> => I<accesobj>  The set of attributes needed to identify a record. Defaults to all
117    attributes.
118    
119  A reference to a acces object for the external parts of tuples.  =item C<invindex> => I<inverted index>
120    
121    A reference to an anon array defining attributes of each record that
122    need to be indexed. See the source of smakewhatis for how to set this
123    up.
124    
125  =back  =back
126    
# Line 98  sub new { Line 131  sub new {
131    my %parm = @_;    my %parm = @_;
132    my $self = {};    my $self = {};
133    
134      # Check for mandatory attrs early
135      $self->{name}     = $parm{name}     or croak "No name specified";
136      $self->{attr}     = $parm{attr}     or croak "No attributes specified";
137    
138    # Do that before we eventually add '_weight' to attributes.    # Do that before we eventually add '_weight' to attributes.
139    $self->{keyset}   = $parm{keyset}   || [[@{$parm{attr}}]];    $self->{keyset}   = $parm{keyset}   || [[@{$parm{attr}}]];
140    
141    $self->{mode}     = O_CREAT | O_RDWR;    $self->{mode}     = O_CREAT | O_RDWR;
142    
143    # Determine and set up subclass    # Determine and set up subclass
144    $type = ref($type) || $type;    $type = ref($type) || $type;
145    if (defined $parm{djk}) {    if (defined $parm{djk}) {
# Line 119  sub new { Line 158  sub new {
158    }    }
159    
160    $self->{file}     = $parm{file}     or croak "No file specified";    $self->{file}     = $parm{file}     or croak "No file specified";
161    if (-d  $self->{file} or !mkdir($self->{file}, 0775)) {    if (-d  $self->{file}){
162        warn "Warning: Directory '$self->{file}' already exists\n";
163      } elsif (!mkdir($self->{file}, 0775)) {
164      croak "Could not 'mkdir $self->{file}': $!\n";      croak "Could not 'mkdir $self->{file}': $!\n";
165    }    }
   $self->{name}     = $parm{name}     or croak "No name specified";  
   $self->{attr}     = $parm{attr}     or croak "No attributes specified";  
166    $self->{djk}      = $parm{djk}      if defined $parm{djk};    $self->{djk}      = $parm{djk}      if defined $parm{djk};
167    $self->{layout}   = $parm{layout} || new WAIT::Parse::Base;    $self->{layout}   = $parm{layout} || new WAIT::Parse::Base;
168    $self->{access}   = $parm{access} if defined $parm{access};    $self->{access}   = $parm{access} if defined $parm{access};
# Line 142  sub new { Line 181  sub new {
181      my $att  = shift @{$parm{invindex}};      my $att  = shift @{$parm{invindex}};
182      my @spec = @{shift @{$parm{invindex}}};      my @spec = @{shift @{$parm{invindex}}};
183      my @opt;      my @opt;
184        
185      if (ref($spec[0])) {      if (ref($spec[0])) {
186        carp "Secondary pipelines are deprecated\n";        carp "Secondary pipelines are deprecated\n";
187        @opt = %{shift @spec};        @opt = %{shift @spec};
# Line 168  table! Line 207  table!
207    
208  sub create_index {  sub create_index {
209    my $self= shift;    my $self= shift;
210      
211    croak "Cannot create index for table aready populated"    croak "Cannot create index for table aready populated"
212      if $self->{nextk} > 1;      if $self->{nextk} > 1;
213      
214    require WAIT::Index;    require WAIT::Index;
215      
216    my $name = join '-', @_;    my $name = join '-', @_;
217    $self->{indexes}->{$name} =    $self->{indexes}->{$name} =
218      new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;      new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
# Line 196  set attributes specified when the table Line 235  set attributes specified when the table
235    
236  =item C<pipeline>  =item C<pipeline>
237    
238  A piplines specification is a reference to and array of method names  A piplines specification is a reference to an array of method names
239  (from package C<WAIT::Filter>) which are to applied in sequence to the  (from package C<WAIT::Filter>) which are to be applied in sequence to
240  contents of the named attribute. The attribute name may not be in the  the contents of the named attribute. The attribute name may not be in
241  attribute list.  the attribute list.
242    
243  =item C<predicate>  =item C<predicate>
244    
245  An indication which predicate the index implements. This may be  An indication which predicate the index implements. This may be
246  e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for  e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
247  query processing. Currently there is no standard set of predicate  query processing. Currently there is no standard set of predicate
248  names. The predicate defaults to the last member of the ppline if  names. The predicate defaults to the last member of the pipeline if
249  omitted.  omitted.
250    
251  =back  =back
# Line 224  sub create_inverted_index { Line 263  sub create_inverted_index {
263    croak "No pipeline specified"  unless $parm{pipeline};    croak "No pipeline specified"  unless $parm{pipeline};
264    
265    $parm{predicate} ||= $parm{pipeline}->[-1];    $parm{predicate} ||= $parm{pipeline}->[-1];
266      
267    croak "Cannot create index for table aready populated"    croak "Cannot create index for table aready populated"
268      if $self->{nextk} > 1;      if $self->{nextk} > 1;
269      
270    require WAIT::InvertedIndex;    require WAIT::InvertedIndex;
271    
272    # backward compatibility stuff    # backward compatibility stuff
# Line 235  sub create_inverted_index { Line 274  sub create_inverted_index {
274    for (qw(attribute pipeline predicate)) {    for (qw(attribute pipeline predicate)) {
275      delete $opt{$_};      delete $opt{$_};
276    }    }
277      
278    my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});    my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
279    my $idx = new WAIT::InvertedIndex(file   => $self->{file}.'/'.$name,    my $idx = new WAIT::InvertedIndex(file   => $self->{file}.'/'.$name,
280                                      filter => [@{$parm{pipeline}}], # clone                                      filter => [@{$parm{pipeline}}], # clone
# Line 340  sub open { Line 379  sub open {
379  sub fetch_extern {  sub fetch_extern {
380    my $self  = shift;    my $self  = shift;
381    
382    print "#@_", $self->{'access'}->{Mode}, "\n";    # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
383    if (exists $self->{'access'}) {    if (exists $self->{'access'}) {
384      mrequire ref($self->{'access'});      mrequire ref($self->{'access'});
385      $self->{'access'}->FETCH(@_);      $self->{'access'}->FETCH(@_);
# Line 358  sub _find_index { Line 397  sub _find_index {
397    my (@att) = @_;    my (@att) = @_;
398    my %att;    my %att;
399    my $name;    my $name;
400      
401    @att{@att} = @att;    @att{@att} = @att;
402    
403    KEY: for $name (keys %{$self->{indexes}}) {    KEY: for $name (keys %{$self->{indexes}}) {
# Line 375  sub have { Line 414  sub have {
414    my $self  = shift;    my $self  = shift;
415    my %parm  = @_;    my %parm  = @_;
416    
417    my $index = $self->_find_index(keys %parm);    my $index = $self->_find_index(keys %parm) or return; # no index-no have
418    croak "No index found" unless $index;  
419    defined $self->{db} or $self->open;    defined $self->{db} or $self->open;
420    return $index->have(@_);    return $index->have(@_);
421  }  }
# Line 387  sub insert { Line 426  sub insert {
426    
427    defined $self->{db} or $self->open;    defined $self->{db} or $self->open;
428    
429      # We should move all writing methods to a subclass to check only once
430      $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
431    
432    my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));    my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
433    my $key;    my $key;
434    my @deleted = keys %{$self->{deleted}};    my @deleted = keys %{$self->{deleted}};
# Line 416  sub insert { Line 458  sub insert {
458          $idx->remove($key, %parm);          $idx->remove($key, %parm);
459        }        }
460        return undef;        return undef;
461      }      }
462    }    }
463    if (defined $self->{inverted}) {    if (defined $self->{inverted}) {
464      my $att;      my $att;
# Line 432  sub insert { Line 474  sub insert {
474    
475  sub sync {  sub sync {
476    my $self  = shift;    my $self  = shift;
477      
478    for (values %{$self->{indexes}}) {    for (values %{$self->{indexes}}) {
479      map $_->sync, $_;      map $_->sync, $_;
480    }    }
# Line 449  sub fetch { Line 491  sub fetch {
491    my $key   = shift;    my $key   = shift;
492    
493    return () if exists $self->{deleted}->{$key};    return () if exists $self->{deleted}->{$key};
494      
495    defined $self->{db} or $self->open;    defined $self->{db} or $self->open;
496    if ($USE_RECNO) {    if ($USE_RECNO) {
497      $self->unpack($self->{db}->[$key]);      $self->unpack($self->{db}->[$key]);
# Line 538  sub close { Line 580  sub close {
580    1;    1;
581  }  }
582    
583    sub DESTROY {
584      my $self = shift;
585    
586      warn "Table handle destroyed without closing it first"
587        if $self->{db} and $self->{mode}&O_RDWR;
588    }
589    
590  sub open_scan {  sub open_scan {
591    my $self = shift;    my $self = shift;
592    my $code = shift;    my $code = shift;
# Line 644  sub hilight_positions { Line 693  sub hilight_positions {
693    my %pos;    my %pos;
694    
695    if (defined $raw) {    if (defined $raw) {
696      for (@{$self->{inverted}->{$attr}}) {      for (@{$self->{inverted}->{$attr}}) { # objects of type
697                                              # WAIT::InvertedIndex for
698                                              # this index field $attr
699        my $name = $_->name;        my $name = $_->name;
700        if (exists $raw->{$name}) {        if (exists $raw->{$name}) {
701          my %qt;          my %qt;
# Line 678  sub hilight_positions { Line 729  sub hilight_positions {
729  }  }
730    
731  sub hilight {  sub hilight {
732    my ($tb, $text, $query, $raw) = @_;    my ($tb, $buf, $qplain, $qraw) = @_;
733    my $type = $tb->layout();    my $layout = $tb->layout();
734    
735    my @result;    my @result;
736    
737    $query ||= {};    $qplain ||= {};
738    $raw   ||= {};    $qraw   ||= {};
739    my @ttxt = $type->tag($text);    my @ttxt = $layout->tag($buf);
740    while (@ttxt) {    while (@ttxt) {
741      no strict 'refs';      no strict 'refs';
742      my %tag = %{shift @ttxt};      my %tag = %{shift @ttxt};
# Line 692  sub hilight { Line 744  sub hilight {
744      my $fld;      my $fld;
745    
746      my %hl;      my %hl;
747      for $fld (grep defined $tag{$_}, keys %$query, keys %$raw) {      for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
748        my $hp = $tb->hilight_positions($fld, $txt,        my $hp = $tb->hilight_positions($fld, $txt,
749                                        $query->{$fld}, $raw->{$fld});                                        $qplain->{$fld}, $qraw->{$fld});
750        for (keys %$hp) {        for (keys %$hp) {
751          if (exists $hl{$_}) {   # -w ;-(          if (exists $hl{$_}) {   # -w ;-(
752            $hl{$_} = max($hl{$_}, $hp->{$_});            $hl{$_} = max($hl{$_}, $hp->{$_});
# Line 720  sub hilight { Line 772  sub hilight {
772  }  }
773    
774  1;  1;
   

Legend:
Removed from v.12  
changed lines
  Added in v.13

  ViewVC Help
Powered by ViewVC 1.1.26