/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 89 - (hide annotations)
Mon May 24 20:57:08 2004 UTC (20 years ago) by dpavlin
File size: 25290 byte(s)
revert to DB_File from BerkeleyDB

1 ulpfr 13 # -*- Mode: Cperl -*-
2 ulpfr 10 # Table.pm --
3     # ITIID : $ITI$ $Header $__Header$
4     # Author : Ulrich Pfeifer
5     # Created On : Thu Aug 8 13:05:10 1996
6     # Last Modified By: Ulrich Pfeifer
7 dpavlin 89 # Last Modified On: Wed Jan 23 14:15:15 2002
8 ulpfr 10 # Language : CPerl
9 dpavlin 89 # Update Count : 152
10 ulpfr 10 # Status : Unknown, Use with caution!
11 ulpfr 13 #
12 ulpfr 10 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 ulpfr 13 #
14 ulpfr 10
15     =head1 NAME
16    
17     WAIT::Table -- Module for maintaining Tables / Relations
18    
19     =head1 SYNOPSIS
20    
21     require WAIT::Table;
22    
23     =head1 DESCRIPTION
24    
25     =cut
26    
27     package WAIT::Table;
28 ulpfr 13
29     use WAIT::Table::Handle ();
30 ulpfr 10 require WAIT::Parse::Base;
31 ulpfr 13
32 ulpfr 10 use strict;
33     use Carp;
34 ulpfr 13 # use autouse Carp => qw( croak($) );
35 dpavlin 89 use DB_File;
36 ulpfr 10 use Fcntl;
37 ulpfr 19 use LockFile::Simple ();
38 ulpfr 10
39     my $USE_RECNO = 0;
40    
41     =head2 Creating a Table.
42    
43 ulpfr 13 The constructor WAIT::Table-E<gt>new is normally called via the
44 ulpfr 10 create_table method of a database handle. This is not enforced, but
45 ulpfr 13 creating a table does not make any sense unless the table is
46 ulpfr 10 registered by the database because the latter implements persistence
47     of the meta data. Registering is done automatically by letting the
48 ulpfr 13 database handle the creation of a table.
49 ulpfr 10
50 ulpfr 13 my $db = WAIT::Database->create(name => 'sample');
51     my $tb = $db->create_table(name => 'test',
52     access => $access,
53     layout => $layout,
54     attr => ['docid', 'headline'],
55     );
56 ulpfr 10
57     The constructor returns a handle for the table. This handle is hidden by the
58     table module, to prevent direct access if called via Table.
59    
60     =over 10
61    
62 ulpfr 13 =item C<access> => I<accessobj>
63 ulpfr 10
64 ulpfr 13 A reference to an access object for the external parts (attributes) of
65 ulpfr 10 tuples. As you may remember, the WAIT System does not enforce that
66     objects are completely stored inside the system to avoid duplication.
67 ulpfr 13 There is no (strong) point in storing all your HTML documents inside
68 ulpfr 10 the system when indexing your WWW-Server.
69    
70 ulpfr 13 The access object is designed to work like as a tied hash. You pass
71     the refernce to the object, not the tied hash though. An example
72     implementation of an access class that works for manpages is
73     WAIT::Document::Nroff.
74    
75     The implementation needs to take into account that WAIT will keep this
76     object in a Data::Dumper or Storable database and re-use it when sman
77     is run. So it is not good enough if we can produce the index with it
78     now, when we create or actively access the table, WAIT also must be
79     able to retrieve documents on its own, when we are in a different
80     context. This happens specifically in a retrieval. To get this working
81     seemlessly, the access-defining class must implement a close method.
82     This method will be called before the Data::Dumper dump takes place.
83     In that moment the access-defining class must get rid of all data
84     structures that cannot be reconstructed via the Data::Dumper dump,
85     such as database handles or C pointers.
86    
87 ulpfr 10 =item C<file> => I<fname>
88    
89     The filename of the records file. Files for indexes will have I<fname>
90 ulpfr 13 as prefix. I<Mandatory>, but usually taken care of by the
91     WAIT::Database handle when the constructor is called via
92     WAIT::Database::create_table().
93 ulpfr 10
94     =item C<name> => I<name>
95    
96     The name of this table. I<Mandatory>
97    
98     =item C<attr> => [ I<attr> ... ]
99    
100 ulpfr 13 A reference to an array of attribute names. WAIT will keep the
101     contents of these attributes in its table. I<Mandatory>
102 ulpfr 10
103     =item C<djk> => [ I<attr> ... ]
104    
105     A reference to an array of attribute names which make up the
106 ulpfr 13 I<disjointness key>. Don't think about it - it's of no use yet;
107 ulpfr 10
108     =item C<layout> => I<layoutobj>
109    
110 ulpfr 13 A reference to an external parser object. Defaults to a new instance
111     of C<WAIT::Parse::Base>. For an example implementation see
112     WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113     class if you so like.
114 ulpfr 10
115 ulpfr 13 =item C<keyset> => I<keyset>
116 ulpfr 10
117 ulpfr 13 The set of attributes needed to identify a record. Defaults to all
118     attributes.
119 ulpfr 10
120 ulpfr 13 =item C<invindex> => I<inverted index>
121    
122     A reference to an anon array defining attributes of each record that
123     need to be indexed. See the source of smakewhatis for how to set this
124     up.
125    
126 ulpfr 10 =back
127    
128     =cut
129    
130     sub new {
131     my $type = shift;
132     my %parm = @_;
133     my $self = {};
134    
135 ulpfr 13 # Check for mandatory attrs early
136     $self->{name} = $parm{name} or croak "No name specified";
137     $self->{attr} = $parm{attr} or croak "No attributes specified";
138    
139 ulpfr 10 # Do that before we eventually add '_weight' to attributes.
140     $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141 ulpfr 13
142 ulpfr 10 $self->{mode} = O_CREAT | O_RDWR;
143 ulpfr 13
144 ulpfr 10 # Determine and set up subclass
145     $type = ref($type) || $type;
146     if (defined $parm{djk}) {
147     if (@{$parm{djk}} == @{$parm{attr}}) {
148     # All attributes in DK (sloppy test here!)
149     $type .= '::Independent';
150     require WAIT::Table::Independent;
151     } else {
152     $type .= '::Disjoint';
153     require WAIT::Table::Disjoint;
154     }
155     # Add '_weight' to attributes
156     my %attr;
157     @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158     unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159     }
160    
161     $self->{file} = $parm{file} or croak "No file specified";
162 dpavlin 89 if (-d $self->{file}){
163     warn "Warning: Directory '$self->{file}' already exists\n";
164     } elsif (!mkdir($self->{file}, 0775)) {
165     croak "Could not 'mkdir $self->{file}': $!\n";
166 ulpfr 10 }
167 ulpfr 19
168 ulpfr 10 $self->{djk} = $parm{djk} if defined $parm{djk};
169     $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
170     $self->{access} = $parm{access} if defined $parm{access};
171     $self->{nextk} = 1; # next record to insert; first record unused
172     $self->{deleted} = {}; # no deleted records yet
173     $self->{indexes} = {};
174    
175     bless $self, $type;
176 ulpfr 35
177     # Checking for readers is not necessary, but let's go with the
178     # generic method.
179     $self->getlock(O_RDWR|O_CREAT); # dies when failing
180    
181 ulpfr 10 # Call create_index() and create_index() for compatibility
182     for (@{$self->{keyset}||[]}) {
183     #carp "Specification of indexes at table create time is deprecated";
184     $self->create_index(@$_);
185     }
186     while (@{$parm{invindex}||[]}) {
187     # carp "Specification of inverted indexes at table create time is deprecated";
188     my $att = shift @{$parm{invindex}};
189     my @spec = @{shift @{$parm{invindex}}};
190     my @opt;
191 ulpfr 13
192 ulpfr 10 if (ref($spec[0])) {
193     carp "Secondary pipelines are deprecated\n";
194     @opt = %{shift @spec};
195     }
196     $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
197     }
198 ulpfr 19
199 ulpfr 10 $self;
200     # end of backwarn compatibility stuff
201     }
202    
203     =head2 Creating an index
204    
205     $tb->create_index('docid');
206    
207     =item C<create_index>
208    
209     must be called with a list of attributes. This must be a subset of the
210     attributes specified when the table was created. Currently this
211     method must be called before the first tuple is inserted in the
212     table!
213    
214     =cut
215    
216     sub create_index {
217     my $self= shift;
218 ulpfr 13
219 ulpfr 10 croak "Cannot create index for table aready populated"
220     if $self->{nextk} > 1;
221 ulpfr 13
222 ulpfr 10 require WAIT::Index;
223 ulpfr 13
224 ulpfr 10 my $name = join '-', @_;
225     $self->{indexes}->{$name} =
226 dpavlin 89 new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
227 ulpfr 10 }
228    
229     =head2 Creating an inverted index
230    
231     $tb->create_inverted_index
232     (attribute => 'au',
233     pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
234     predicate => 'plain',
235     );
236    
237     =over 5
238    
239     =item C<attribute>
240    
241     The attribute to build the index on. This attribute may not be in the
242     set attributes specified when the table was created.
243    
244     =item C<pipeline>
245    
246 ulpfr 13 A piplines specification is a reference to an array of method names
247     (from package C<WAIT::Filter>) which are to be applied in sequence to
248     the contents of the named attribute. The attribute name may not be in
249     the attribute list.
250 ulpfr 10
251     =item C<predicate>
252    
253     An indication which predicate the index implements. This may be
254     e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
255     query processing. Currently there is no standard set of predicate
256 ulpfr 13 names. The predicate defaults to the last member of the pipeline if
257 ulpfr 10 omitted.
258    
259     =back
260    
261     Currently this method must be called before the first tuple is
262     inserted in the table!
263    
264     =cut
265    
266     sub create_inverted_index {
267     my $self = shift;
268     my %parm = @_;
269    
270     croak "No attribute specified" unless $parm{attribute};
271     croak "No pipeline specified" unless $parm{pipeline};
272    
273     $parm{predicate} ||= $parm{pipeline}->[-1];
274 ulpfr 13
275 ulpfr 10 croak "Cannot create index for table aready populated"
276     if $self->{nextk} > 1;
277 ulpfr 13
278 ulpfr 10 require WAIT::InvertedIndex;
279    
280     # backward compatibility stuff
281     my %opt = %parm;
282     for (qw(attribute pipeline predicate)) {
283     delete $opt{$_};
284     }
285 ulpfr 13
286 ulpfr 10 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
287     my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
288     filter => [@{$parm{pipeline}}], # clone
289     name => $name,
290     attr => $parm{attribute},
291     %opt, # backward compatibility stuff
292     );
293     # We will have to use $parm{predicate} here
294     push @{$self->{inverted}->{$parm{attribute}}}, $idx;
295     }
296    
297     sub dir {
298     $_[0]->{file};
299     }
300    
301     =head2 C<$tb-E<gt>layout>
302    
303     Returns the reference to the associated parser object.
304    
305     =cut
306    
307     sub layout { $_[0]->{layout} }
308    
309     =head2 C<$tb-E<gt>fields>
310    
311     Returns the array of attribute names.
312    
313     =cut
314    
315    
316     sub fields { keys %{$_[0]->{inverted}}}
317    
318     =head2 C<$tb-E<gt>drop>
319    
320     Must be called via C<WAIT::Database::drop_table>
321    
322     =cut
323    
324     sub drop {
325     my $self = shift;
326 ulpfr 35
327     unless ($self->{write_lock}){
328     warn "Cannot drop table without write lock. Nothing done";
329     return;
330     }
331    
332 ulpfr 10 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
333     $self->close; # just make sure
334 ulpfr 35
335 ulpfr 10 my $file = $self->{file};
336    
337     for (values %{$self->{indexes}}) {
338     $_->drop;
339     }
340 dpavlin 89 unlink "$file/records";
341     rmdir "$file/read" or warn "Could not rmdir '$file/read'";
342    
343     # $self->unlock;
344     ! (!-e $file or rmdir $file);
345 ulpfr 10 } else {
346     croak ref($self)."::drop called directly";
347     }
348     }
349    
350     sub mrequire ($) {
351     my $module = shift;
352    
353     $module =~ s{::}{/}g;
354     $module .= '.pm';
355     require $module;
356     }
357    
358     sub open {
359     my $self = shift;
360     my $file = $self->{file} . '/records';
361    
362     mrequire ref($self); # that's tricky eh?
363     if (defined $self->{'layout'}) {
364     mrequire ref($self->{'layout'});
365     }
366     if (defined $self->{'access'}) {
367     mrequire ref($self->{'access'});
368     }
369     if (exists $self->{indexes}) {
370     require WAIT::Index;
371     for (values %{$self->{indexes}}) {
372     $_->{mode} = $self->{mode};
373     }
374     }
375     if (exists $self->{inverted}) {
376     my ($att, $idx);
377     for $att (keys %{$self->{inverted}}) {
378     for $idx (@{$self->{inverted}->{$att}}) {
379     $idx->{mode} = $self->{mode};
380     }
381     }
382     require WAIT::InvertedIndex;
383     }
384 ulpfr 42
385     $self->getlock($self->{mode});
386    
387 ulpfr 10 unless (defined $self->{dbh}) {
388     if ($USE_RECNO) {
389 dpavlin 89 $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,
390     $self->{mode}, 0664, $DB_RECNO);
391 ulpfr 10 } else {
392     $self->{dbh} =
393 dpavlin 89 tie(%{$self->{db}}, 'DB_File', $file,
394     $self->{mode}, 0664, $DB_BTREE);
395 ulpfr 10 }
396     }
397 ulpfr 35
398    
399 ulpfr 10 $self;
400     }
401    
402     sub fetch_extern {
403     my $self = shift;
404    
405 ulpfr 13 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
406 ulpfr 10 if (exists $self->{'access'}) {
407     mrequire ref($self->{'access'});
408     $self->{'access'}->FETCH(@_);
409     }
410     }
411    
412     sub fetch_extern_by_id {
413     my $self = shift;
414    
415     $self->fetch_extern($self->fetch(@_));
416     }
417    
418     sub _find_index {
419     my $self = shift;
420     my (@att) = @_;
421     my %att;
422     my $name;
423 ulpfr 13
424 ulpfr 10 @att{@att} = @att;
425    
426     KEY: for $name (keys %{$self->{indexes}}) {
427     my @iat = split /-/, $name;
428     for (@iat) {
429     next KEY unless exists $att{$_};
430     }
431     return $self->{indexes}->{$name};
432     }
433     return undef;
434     }
435    
436     sub have {
437     my $self = shift;
438     my %parm = @_;
439    
440 ulpfr 13 my $index = $self->_find_index(keys %parm) or return; # no index-no have
441    
442 ulpfr 10 defined $self->{db} or $self->open;
443     return $index->have(@_);
444     }
445    
446     sub insert {
447     my $self = shift;
448     my %parm = @_;
449    
450     defined $self->{db} or $self->open;
451    
452 ulpfr 13 # We should move all writing methods to a subclass to check only once
453     $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
454    
455 ulpfr 10 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
456     my $key;
457     my @deleted = keys %{$self->{deleted}};
458 ulpfr 19 my $gotkey = 0;
459 ulpfr 10
460     if (@deleted) {
461     $key = pop @deleted;
462     delete $self->{deleted}->{$key};
463 ulpfr 19 # Sanity check
464     if ($key && $key>0) {
465     $gotkey=1;
466 ulpfr 10 } else {
467 ulpfr 19 warn(sprintf("WAIT database inconsistency during insert ".
468     "key[%s]: Please rebuild index\n",
469     $key
470     ));
471     }
472     }
473     unless ($gotkey) {
474 ulpfr 10 $key = $self->{nextk}++;
475     }
476     if ($USE_RECNO) {
477     $self->{db}->[$key] = $tuple;
478     } else {
479     $self->{db}->{$key} = $tuple;
480     }
481     for (values %{$self->{indexes}}) {
482     unless ($_->insert($key, %parm)) {
483     # duplicate key, undo changes
484     if ($key == $self->{nextk}-1) {
485     $self->{nextk}--;
486     } else {
487 ulpfr 19 # warn "setting key[$key] deleted during insert";
488 ulpfr 10 $self->{deleted}->{$key}=1;
489     }
490     my $idx;
491     for $idx (values %{$self->{indexes}}) {
492     last if $idx eq $_;
493     $idx->remove($key, %parm);
494     }
495     return undef;
496 ulpfr 13 }
497 ulpfr 10 }
498     if (defined $self->{inverted}) {
499     my $att;
500     for $att (keys %{$self->{inverted}}) {
501     if (defined $parm{$att}) {
502     map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
503     #map $_->sync, @{$self->{inverted}->{$att}}
504     }
505     }
506     }
507     $key
508     }
509    
510     sub sync {
511     my $self = shift;
512 ulpfr 13
513 ulpfr 10 for (values %{$self->{indexes}}) {
514     map $_->sync, $_;
515     }
516     if (defined $self->{inverted}) {
517     my $att;
518     for $att (keys %{$self->{inverted}}) {
519     map $_->sync, @{$self->{inverted}->{$att}}
520     }
521     }
522     }
523    
524     sub fetch {
525     my $self = shift;
526     my $key = shift;
527    
528     return () if exists $self->{deleted}->{$key};
529 ulpfr 13
530 ulpfr 10 defined $self->{db} or $self->open;
531     if ($USE_RECNO) {
532     $self->unpack($self->{db}->[$key]);
533     } else {
534     $self->unpack($self->{db}->{$key});
535     }
536     }
537    
538     sub delete_by_key {
539     my $self = shift;
540     my $key = shift;
541    
542 ulpfr 19 unless ($key) {
543     Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
544     return;
545     }
546    
547 ulpfr 10 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
548     my %tuple = $self->fetch($key);
549     for (values %{$self->{indexes}}) {
550     $_->delete($key, %tuple);
551     }
552     if (defined $self->{inverted}) {
553     # User *must* provide the full record for this or the entries
554     # in the inverted index will not be removed
555     %tuple = (%tuple, @_);
556     my $att;
557     for $att (keys %{$self->{inverted}}) {
558     if (defined $tuple{$att}) {
559     map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
560     }
561     }
562     }
563 ulpfr 19 # warn "setting key[$key] deleted during delete_by_key";
564 ulpfr 10 ++$self->{deleted}->{$key};
565     }
566    
567     sub delete {
568     my $self = shift;
569     my $tkey = $self->have(@_);
570 ulpfr 19 # warn "tkey[$tkey]\@_[@_]";
571 ulpfr 10 defined $tkey && $self->delete_by_key($tkey, @_);
572     }
573    
574     sub unpack {
575 laperla 31 my($self, $tuple) = @_;
576 ulpfr 10
577 laperla 31 unless (defined $tuple){
578     # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
579     warn("Debug: somebody called unpack without argument tuple!");
580     return;
581     }
582    
583 ulpfr 10 my $att;
584     my @result;
585     my @tuple = split /$;/, $tuple;
586    
587     for $att (@{$self->{attr}}) {
588     push @result, $att, shift @tuple;
589     }
590     @result;
591     }
592    
593 ulpfr 19 sub set {
594     my ($self, $iattr, $value) = @_;
595    
596 laperla 31 unless ($self->{write_lock}){
597     warn "Cannot set iattr[$iattr] without write lock. Nothing done";
598     return;
599 ulpfr 24 }
600 laperla 77
601     # in the rare case that they haven't written a single record yet, we
602     # make sure, the inverted inherits our $self->{mode}:
603     defined $self->{db} or $self->open;
604    
605 ulpfr 19 for my $att (keys %{$self->{inverted}}) {
606 dpavlin 86 require WAIT::InvertedIndex;
607     if ($^V gt v5.003) { # avoid bug in perl up to 5.003_05
608 ulpfr 19 my $idx;
609     for $idx (@{$self->{inverted}->{$att}}) {
610     $idx->set($iattr, $value);
611     }
612     } else {
613     map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
614     }
615     }
616    
617     1;
618     }
619    
620 ulpfr 10 sub close {
621     my $self = shift;
622    
623     if (exists $self->{'access'}) {
624     eval {$self->{'access'}->close}; # dont bother if not opened
625     }
626 laperla 31 if ($WAIT::Index::VERSION) {
627     for (values %{$self->{indexes}}) {
628     $_->close();
629     }
630 ulpfr 10 }
631 laperla 31 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
632     # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
633     # if WAIT::InvertedIndex has not been loaded, they cannot have
634     # been altered so far
635 ulpfr 10 my $att;
636     for $att (keys %{$self->{inverted}}) {
637     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
638     my $idx;
639     for $idx (@{$self->{inverted}->{$att}}) {
640     $idx->close;
641     }
642     } else {
643     map $_->close(), @{$self->{inverted}->{$att}};
644     }
645     }
646     }
647     if ($self->{dbh}) {
648     delete $self->{dbh};
649    
650     if ($USE_RECNO) {
651     untie @{$self->{db}};
652     } else {
653     untie %{$self->{db}};
654     }
655     delete $self->{db};
656     }
657    
658 ulpfr 19 $self->unlock;
659    
660 ulpfr 10 1;
661     }
662    
663 ulpfr 35 # Locking
664     #
665     # We allow multiple readers to coexists. But write access excludes
666     # all read access and vice versa. In practice read access on tables
667     # open for writing will mostly work ;-)
668    
669     # If a "write" lock is requested, an existing "read" lock will be
670     # released. If a "read" lock ist requested, an existing "write" lock
671     # will be released. Requiring a lock already hold has no effect.
672 laperla 41
673 ulpfr 35 sub getlock {
674     my ($self, $mode) = @_;
675 laperla 41
676     # autoclean cleans on DESTROY, stale sends SIGZERO to the owner
677     #
678     my $lockmgr = LockFile::Simple->make(-autoclean => 1, -stale => 1);
679 dpavlin 89 my $file = $self->{file} . '/records';
680     my $lockdir = $self->{file} . '/read';
681 ulpfr 35
682     unless (-d $lockdir) {
683     mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
684     }
685    
686     if ($mode & O_RDWR) { # Get a write lock. Release it again
687     # and die if there is any valid
688     # readers.
689    
690     # Have a write lock already
691     return $self if $self->{write_lock};
692    
693     if ($self->{read_lock}) { # We are a becoming a writer now. So
694     # we release the read lock to avoid
695     # blocking ourselves.
696     $self->{read_lock}->release;
697     delete $self->{read_lock};
698     }
699    
700     # Get the preliminary write lock
701 dpavlin 89 $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
702     or die "Can't lock '$self->{file}/write'";
703 ulpfr 35
704     # If we actually want to write we must check if there are any
705     # readers. The write lock is confirmed if wen cannot find any
706     # valid readers.
707    
708     local *DIR;
709     opendir DIR, $lockdir or
710     die "Could not opendir '$lockdir': $!";
711     for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
712     # Check if the locks are still valid. Since we are protected by
713     # a write lock, we could use a plain file. But we want to use
714     # the stale testing from LockFile::Simple.
715     if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
716     warn "Removing stale lockfile '$lockdir/$lockfile'";
717     $lck->release;
718     } else { # Found an active reader, rats!
719     $self->{write_lock}->release;
720     die "Cannot write table '$file' while it's in use";
721     }
722     }
723     closedir DIR;
724     } else {
725     # Have a read lock already
726     return $self if $self->{read_lock};
727    
728     # Get the preliminary write lock to protect the directory
729 ulpfr 66 # operations.
730 ulpfr 35
731 dpavlin 89 my $write_lock = $lockmgr->lock($self->{file} . '/read/write')
732     or die "Can't lock '$self->{file}/read/write'";
733 ulpfr 35
734     # Find a new read slot. Maybe the plain file would be better?
735     my $id = time;
736     while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
737     $id++;
738     }
739    
740     $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
741     or die "Can't lock '$lockdir/$id'";
742    
743     # We are a reader now. So we release the write lock
744 ulpfr 66 $write_lock->release;
745 ulpfr 35 }
746     return $self;
747     }
748    
749 ulpfr 19 sub unlock {
750     my $self = shift;
751    
752     # Either we have a read or a write lock (or we close the table already)
753     # unless ($self->{read_lock} || $self->{write_lock}) {
754     # warn "WAIT::Table::unlock: Table aparently hold's no lock"
755     # }
756     if ($self->{write_lock}) {
757     $self->{write_lock}->release();
758     delete $self->{write_lock};
759     }
760     if ($self->{read_lock}) {
761     $self->{read_lock}->release();
762     delete $self->{read_lock};
763     }
764    
765     }
766    
767 ulpfr 13 sub DESTROY {
768     my $self = shift;
769    
770 ulpfr 35 if ($self->{write_lock} || $self->{read_lock}) {
771     warn "Table handle destroyed without closing it first";
772     $self->unlock;
773     }
774 ulpfr 13 }
775    
776 ulpfr 10 sub open_scan {
777     my $self = shift;
778     my $code = shift;
779    
780     $self->{dbh} or $self->open;
781     require WAIT::Scan;
782     new WAIT::Scan $self, $self->{nextk}-1, $code;
783     }
784    
785     sub open_index_scan {
786     my $self = shift;
787     my $attr = shift;
788     my $code = shift;
789     my $name = join '-', @$attr;
790    
791     if (defined $self->{indexes}->{$name}) {
792     $self->{indexes}->{$name}->open_scan($code);
793     } else {
794     croak "No such index '$name'";
795     }
796     }
797    
798     eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
799    
800     sub prefix {
801     my ($self , $attr, $prefix) = @_;
802     my %result;
803    
804     defined $self->{db} or $self->open; # require layout
805    
806     for (@{$self->{inverted}->{$attr}}) {
807     my $result = $_->prefix($prefix);
808     if (defined $result) {
809     $result{$_->name} = $result;
810     }
811     }
812     bless \%result, 'WAIT::Query::Raw';
813     }
814    
815     sub intervall {
816     my ($self, $attr, $lb, $ub) = @_;
817     my %result;
818    
819     defined $self->{db} or $self->open; # require layout
820    
821     for (@{$self->{inverted}->{$attr}}) {
822     my $result = $_->intervall($lb, $ub);
823     if (defined $result) {
824     $result{$_->name} = $result;
825     }
826     }
827     bless \%result, 'WAIT::Query::Raw';
828     }
829    
830     sub search {
831 ulpfr 19 my $self = shift;
832     my ($query, $attr, $cont, $raw);
833     if (ref $_[0]) {
834     $query = shift;
835    
836     $attr = $query->{attr};
837     $cont = $query->{cont};
838     $raw = $query->{raw};
839     } else {
840     require Carp;
841     Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
842     $attr = shift;
843     $cont = shift;
844     $raw = shift;
845     $query = {
846     attr => $attr,
847     cont => $cont,
848     raw => $raw,
849     };
850     }
851    
852 ulpfr 10 my %result;
853    
854     defined $self->{db} or $self->open; # require layout
855    
856     if ($raw) {
857     for (@{$self->{inverted}->{$attr}}) {
858     my $name = $_->name;
859     if (exists $raw->{$name} and @{$raw->{$name}}) {
860     my $scale = 1/scalar(@{$raw->{$name}});
861 ulpfr 19 my %r = $_->search_raw($query, @{$raw->{$name}});
862 ulpfr 10 my ($key, $val);
863     while (($key, $val) = each %r) {
864     if (exists $result{$key}) {
865     $result{$key} += $val*$scale;
866     } else {
867     $result{$key} = $val*$scale;
868     }
869     }
870     }
871     }
872     }
873     if (defined $cont and $cont ne '') {
874     for (@{$self->{inverted}->{$attr}}) {
875 ulpfr 19 my %r = $_->search($query, $cont);
876 ulpfr 10 my ($key, $val);
877     while (($key, $val) = each %r) {
878     if (exists $result{$key}) {
879     $result{$key} += $val;
880     } else {
881     $result{$key} = $val;
882     }
883     }
884     }
885     }
886     # sanity check for deleted documents.
887     # this should not be necessary !@#$
888     for (keys %result) {
889     delete $result{$_} if $self->{deleted}->{$_}
890     }
891     %result;
892     }
893    
894     sub hilight_positions {
895     my ($self, $attr, $text, $query, $raw) = @_;
896     my %pos;
897    
898     if (defined $raw) {
899 ulpfr 13 for (@{$self->{inverted}->{$attr}}) { # objects of type
900     # WAIT::InvertedIndex for
901     # this index field $attr
902 ulpfr 10 my $name = $_->name;
903     if (exists $raw->{$name}) {
904     my %qt;
905     grep $qt{$_}++, @{$raw->{$name}};
906     for ($_->parse_pos($text)) {
907     if (exists $qt{$_->[0]}) {
908     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
909     }
910     }
911     }
912     }
913     }
914     if (defined $query) {
915     for (@{$self->{inverted}->{$attr}}) {
916     my %qt;
917    
918     grep $qt{$_}++, $_->parse($query);
919     for ($_->parse_pos($text)) {
920     if (exists $qt{$_->[0]}) {
921     if (exists $pos{$_->[1]}) { # perl -w ;-)
922     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
923     } else {
924     $pos{$_->[1]} = length($_->[0]);
925     }
926     }
927     }
928     }
929     }
930    
931     \%pos;
932     }
933    
934     sub hilight {
935 ulpfr 13 my ($tb, $buf, $qplain, $qraw) = @_;
936     my $layout = $tb->layout();
937    
938 ulpfr 10 my @result;
939    
940 ulpfr 13 $qplain ||= {};
941     $qraw ||= {};
942     my @ttxt = $layout->tag($buf);
943 ulpfr 10 while (@ttxt) {
944     no strict 'refs';
945     my %tag = %{shift @ttxt};
946     my $txt = shift @ttxt;
947     my $fld;
948    
949     my %hl;
950 ulpfr 13 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
951 ulpfr 10 my $hp = $tb->hilight_positions($fld, $txt,
952 ulpfr 13 $qplain->{$fld}, $qraw->{$fld});
953 ulpfr 10 for (keys %$hp) {
954     if (exists $hl{$_}) { # -w ;-(
955     $hl{$_} = max($hl{$_}, $hp->{$_});
956     } else {
957     $hl{$_} = $hp->{$_};
958     }
959     }
960     }
961     my $pos;
962     my $qt = {_qt => 1, %tag};
963     my $pl = \%tag;
964     my $last = length($txt);
965     my @tmp;
966     for $pos (sort {$b <=> $a} keys %hl) {
967     unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
968     unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
969     $last = $pos;
970     }
971     push @result, $pl, substr($txt,0,$last);
972     push @result, @tmp;
973     }
974     @result; # no speed necessary
975     }
976    
977     1;

Properties

Name Value
cvs2svn:cvs-rev 1.10

  ViewVC Help
Powered by ViewVC 1.1.26