/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 88 - (show annotations)
Mon May 24 13:44:01 2004 UTC (19 years, 11 months ago) by dpavlin
File size: 25308 byte(s)
move cvs-head to trunk

1 # -*- Mode: Cperl -*-
2 # Table.pm --
3 # ITIID : $ITI$ $Header $__Header$
4 # Author : Ulrich Pfeifer
5 # Created On : Thu Aug 8 13:05:10 1996
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Sat Apr 27 17:20:31 2002
8 # Language : CPerl
9 # Update Count : 172
10 # Status : Unknown, Use with caution!
11 #
12 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 #
14
15 =head1 NAME
16
17 WAIT::Table -- Module for maintaining Tables / Relations
18
19 =head1 SYNOPSIS
20
21 require WAIT::Table;
22
23 =head1 DESCRIPTION
24
25 =cut
26
27 package WAIT::Table;
28
29 use WAIT::Table::Handle ();
30 require WAIT::Parse::Base;
31
32 use strict;
33 use Carp;
34 # use autouse Carp => qw( croak($) );
35 use BerkeleyDB;
36 use Fcntl;
37 use LockFile::Simple ();
38
39 my $USE_RECNO = 0;
40
41 =head2 Creating a Table.
42
43 The constructor WAIT::Table-E<gt>new is normally called via the
44 create_table method of a database handle. This is not enforced, but
45 creating a table does not make any sense unless the table is
46 registered by the database because the latter implements persistence
47 of the meta data. Registering is done automatically by letting the
48 database handle the creation of a table.
49
50 my $db = WAIT::Database->create(name => 'sample');
51 my $tb = $db->create_table(name => 'test',
52 access => $access,
53 layout => $layout,
54 attr => ['docid', 'headline'],
55 );
56
57 The constructor returns a handle for the table. This handle is hidden by the
58 table module, to prevent direct access if called via Table.
59
60 =over 10
61
62 =item C<access> => I<accessobj>
63
64 A reference to an access object for the external parts (attributes) of
65 tuples. As you may remember, the WAIT System does not enforce that
66 objects are completely stored inside the system to avoid duplication.
67 There is no (strong) point in storing all your HTML documents inside
68 the system when indexing your WWW-Server.
69
70 The access object is designed to work like as a tied hash. You pass
71 the refernce to the object, not the tied hash though. An example
72 implementation of an access class that works for manpages is
73 WAIT::Document::Nroff.
74
75 The implementation needs to take into account that WAIT will keep this
76 object in a Data::Dumper or Storable database and re-use it when sman
77 is run. So it is not good enough if we can produce the index with it
78 now, when we create or actively access the table, WAIT also must be
79 able to retrieve documents on its own, when we are in a different
80 context. This happens specifically in a retrieval. To get this working
81 seemlessly, the access-defining class must implement a close method.
82 This method will be called before the Data::Dumper dump takes place.
83 In that moment the access-defining class must get rid of all data
84 structures that cannot be reconstructed via the Data::Dumper dump,
85 such as database handles or C pointers.
86
87 =item C<file> => I<fname>
88
89 The filename of the records file. Files for indexes will have I<fname>
90 as prefix. I<Mandatory>, but usually taken care of by the
91 WAIT::Database handle when the constructor is called via
92 WAIT::Database::create_table().
93
94 =item C<name> => I<name>
95
96 The name of this table. I<Mandatory>
97
98 =item C<attr> => [ I<attr> ... ]
99
100 A reference to an array of attribute names. WAIT will keep the
101 contents of these attributes in its table. I<Mandatory>
102
103 =item C<djk> => [ I<attr> ... ]
104
105 A reference to an array of attribute names which make up the
106 I<disjointness key>. Don't think about it - it's of no use yet;
107
108 =item C<layout> => I<layoutobj>
109
110 A reference to an external parser object. Defaults to a new instance
111 of C<WAIT::Parse::Base>. For an example implementation see
112 WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113 class if you so like.
114
115 =item C<keyset> => I<keyset>
116
117 The set of attributes needed to identify a record. Defaults to all
118 attributes.
119
120 =item C<invindex> => I<inverted index>
121
122 A reference to an anon array defining attributes of each record that
123 need to be indexed. See the source of smakewhatis for how to set this
124 up.
125
126 =back
127
128 =cut
129
130 sub new {
131 my $type = shift;
132 my %parm = @_;
133 my $self = {};
134
135 # Check for mandatory attrs early
136 $self->{name} = $parm{name} or croak "No name specified";
137 $self->{attr} = $parm{attr} or croak "No attributes specified";
138
139 # Do that before we eventually add '_weight' to attributes.
140 $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141
142 $self->{mode} = O_CREAT | O_RDWR;
143
144 # Determine and set up subclass
145 $type = ref($type) || $type;
146 if (defined $parm{djk}) {
147 if (@{$parm{djk}} == @{$parm{attr}}) {
148 # All attributes in DK (sloppy test here!)
149 $type .= '::Independent';
150 require WAIT::Table::Independent;
151 } else {
152 $type .= '::Disjoint';
153 require WAIT::Table::Disjoint;
154 }
155 # Add '_weight' to attributes
156 my %attr;
157 @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158 unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159 }
160
161 $self->{file} = $parm{file} or croak "No file specified";
162 if (-e $self->{file}){
163 warn "Warning: file '$self->{file}' already exists\n";
164 }
165
166 $self->{djk} = $parm{djk} if defined $parm{djk};
167 $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
168 $self->{access} = $parm{access} if defined $parm{access};
169 $self->{nextk} = 1; # next record to insert; first record unused
170 $self->{deleted} = {}; # no deleted records yet
171 $self->{indexes} = {};
172
173 bless $self, $type;
174
175 # Checking for readers is not necessary, but let's go with the
176 # generic method.
177 $self->getlock(O_RDWR|O_CREAT); # dies when failing
178
179 # Call create_index() and create_index() for compatibility
180 for (@{$self->{keyset}||[]}) {
181 #carp "Specification of indexes at table create time is deprecated";
182 $self->create_index(@$_);
183 }
184 while (@{$parm{invindex}||[]}) {
185 # carp "Specification of inverted indexes at table create time is deprecated";
186 my $att = shift @{$parm{invindex}};
187 my @spec = @{shift @{$parm{invindex}}};
188 my @opt;
189
190 if (ref($spec[0])) {
191 carp "Secondary pipelines are deprecated\n";
192 @opt = %{shift @spec};
193 }
194 $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
195 }
196
197 $self;
198 # end of backwarn compatibility stuff
199 }
200
201 =head2 Creating an index
202
203 $tb->create_index('docid');
204
205 =item C<create_index>
206
207 must be called with a list of attributes. This must be a subset of the
208 attributes specified when the table was created. Currently this
209 method must be called before the first tuple is inserted in the
210 table!
211
212 =cut
213
214 sub create_index {
215 my $self= shift;
216
217 croak "Cannot create index for table aready populated"
218 if $self->{nextk} > 1;
219
220 require WAIT::Index;
221
222 my $name = join '-', @_;
223 $self->{indexes}->{$name} =
224 new WAIT::Index file => $self->{file}, name => $name, attr => $_;
225 }
226
227 =head2 Creating an inverted index
228
229 $tb->create_inverted_index
230 (attribute => 'au',
231 pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
232 predicate => 'plain',
233 );
234
235 =over 5
236
237 =item C<attribute>
238
239 The attribute to build the index on. This attribute may not be in the
240 set attributes specified when the table was created.
241
242 =item C<pipeline>
243
244 A piplines specification is a reference to an array of method names
245 (from package C<WAIT::Filter>) which are to be applied in sequence to
246 the contents of the named attribute. The attribute name may not be in
247 the attribute list.
248
249 =item C<predicate>
250
251 An indication which predicate the index implements. This may be
252 e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
253 query processing. Currently there is no standard set of predicate
254 names. The predicate defaults to the last member of the pipeline if
255 omitted.
256
257 =back
258
259 Currently this method must be called before the first tuple is
260 inserted in the table!
261
262 =cut
263
264 sub create_inverted_index {
265 my $self = shift;
266 my %parm = @_;
267
268 croak "No attribute specified" unless $parm{attribute};
269 croak "No pipeline specified" unless $parm{pipeline};
270
271 $parm{predicate} ||= $parm{pipeline}->[-1];
272
273 croak "Cannot create index for table aready populated"
274 if $self->{nextk} > 1;
275
276 require WAIT::InvertedIndex;
277
278 # backward compatibility stuff
279 my %opt = %parm;
280 for (qw(attribute pipeline predicate)) {
281 delete $opt{$_};
282 }
283
284 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
285 my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
286 filter => [@{$parm{pipeline}}], # clone
287 name => $name,
288 attr => $parm{attribute},
289 %opt, # backward compatibility stuff
290 );
291 # We will have to use $parm{predicate} here
292 push @{$self->{inverted}->{$parm{attribute}}}, $idx;
293 }
294
295 sub dir {
296 $_[0]->{file};
297 }
298
299 =head2 C<$tb-E<gt>layout>
300
301 Returns the reference to the associated parser object.
302
303 =cut
304
305 sub layout { $_[0]->{layout} }
306
307 =head2 C<$tb-E<gt>fields>
308
309 Returns the array of attribute names.
310
311 =cut
312
313
314 sub fields { keys %{$_[0]->{inverted}}}
315
316 =head2 C<$tb-E<gt>drop>
317
318 Must be called via C<WAIT::Database::drop_table>
319
320 =cut
321
322 sub drop {
323 my $self = shift;
324
325 unless ($self->{write_lock}){
326 warn "Cannot drop table without write lock. Nothing done";
327 return;
328 }
329
330 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
331 $self->close; # just make sure
332
333 my $file = $self->{file};
334
335 for (values %{$self->{indexes}}) {
336 $_->drop;
337 }
338 rmdir "$file.read" or warn "Could not rmdir '$file/read'";
339 unlink "$file";
340
341 } else {
342 croak ref($self)."::drop called directly";
343 }
344 }
345
346 sub mrequire ($) {
347 my $module = shift;
348
349 $module =~ s{::}{/}g;
350 $module .= '.pm';
351 require $module;
352 }
353
354 sub open {
355 my $self = shift;
356 my $file = $self->{file} . '/records';
357
358 mrequire ref($self); # that's tricky eh?
359 if (defined $self->{'layout'}) {
360 mrequire ref($self->{'layout'});
361 }
362 if (defined $self->{'access'}) {
363 mrequire ref($self->{'access'});
364 }
365 if (exists $self->{indexes}) {
366 require WAIT::Index;
367 for (values %{$self->{indexes}}) {
368 $_->{mode} = $self->{mode};
369 }
370 }
371 if (exists $self->{inverted}) {
372 my ($att, $idx);
373 for $att (keys %{$self->{inverted}}) {
374 for $idx (@{$self->{inverted}->{$att}}) {
375 $idx->{mode} = $self->{mode};
376 }
377 }
378 require WAIT::InvertedIndex;
379 }
380
381 $self->getlock($self->{mode});
382
383 my $dbmode = ($self->{mode} & O_CREAT) ? DB_CREATE : 0;
384 unless (defined $self->{dbh}) {
385 if ($USE_RECNO) {
386 tie(%{$self->{db}}, 'BerkeleyDB::Recno',
387 -Filename => $self->{file},
388 -Subname => 'records',
389 -Flags => $dbmode);
390 } else {
391 $self->{dbh} =
392 tie(%{$self->{db}}, 'BerkeleyDB::Btree',
393 -Filename => $self->{file},
394 -Subname => 'records',
395 -Mode => 0664,
396 -Flags => $dbmode);
397 }
398 }
399
400
401 $self;
402 }
403
404 sub fetch_extern {
405 my $self = shift;
406
407 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
408 if (exists $self->{'access'}) {
409 mrequire ref($self->{'access'});
410 $self->{'access'}->FETCH(@_);
411 }
412 }
413
414 sub fetch_extern_by_id {
415 my $self = shift;
416
417 $self->fetch_extern($self->fetch(@_));
418 }
419
420 sub _find_index {
421 my $self = shift;
422 my (@att) = @_;
423 my %att;
424 my $name;
425
426 @att{@att} = @att;
427
428 KEY: for $name (keys %{$self->{indexes}}) {
429 my @iat = split /-/, $name;
430 for (@iat) {
431 next KEY unless exists $att{$_};
432 }
433 return $self->{indexes}->{$name};
434 }
435 return undef;
436 }
437
438 sub have {
439 my $self = shift;
440 my %parm = @_;
441
442 my $index = $self->_find_index(keys %parm) or return; # no index-no have
443
444 defined $self->{db} or $self->open;
445 return $index->have(@_);
446 }
447
448 sub insert {
449 my $self = shift;
450 my %parm = @_;
451
452 defined $self->{db} or $self->open;
453
454 # We should move all writing methods to a subclass to check only once
455 $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
456
457 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
458 my $key;
459 my @deleted = keys %{$self->{deleted}};
460 my $gotkey = 0;
461
462 if (@deleted) {
463 $key = pop @deleted;
464 delete $self->{deleted}->{$key};
465 # Sanity check
466 if ($key && $key>0) {
467 $gotkey=1;
468 } else {
469 warn(sprintf("WAIT database inconsistency during insert ".
470 "key[%s]: Please rebuild index\n",
471 $key
472 ));
473 }
474 }
475 unless ($gotkey) {
476 $key = $self->{nextk}++;
477 }
478 if ($USE_RECNO) {
479 $self->{db}->[$key] = $tuple;
480 } else {
481 $self->{db}->{$key} = $tuple;
482 }
483 for (values %{$self->{indexes}}) {
484 unless ($_->insert($key, %parm)) {
485 # duplicate key, undo changes
486 if ($key == $self->{nextk}-1) {
487 $self->{nextk}--;
488 } else {
489 # warn "setting key[$key] deleted during insert";
490 $self->{deleted}->{$key}=1;
491 }
492 my $idx;
493 for $idx (values %{$self->{indexes}}) {
494 last if $idx eq $_;
495 $idx->remove($key, %parm);
496 }
497 return undef;
498 }
499 }
500 if (defined $self->{inverted}) {
501 my $att;
502 for $att (keys %{$self->{inverted}}) {
503 if (defined $parm{$att}) {
504 map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
505 #map $_->sync, @{$self->{inverted}->{$att}}
506 }
507 }
508 }
509 $key
510 }
511
512 sub sync {
513 my $self = shift;
514
515 for (values %{$self->{indexes}}) {
516 map $_->sync, $_;
517 }
518 if (defined $self->{inverted}) {
519 my $att;
520 for $att (keys %{$self->{inverted}}) {
521 map $_->sync, @{$self->{inverted}->{$att}}
522 }
523 }
524 }
525
526 sub fetch {
527 my $self = shift;
528 my $key = shift;
529
530 return () if exists $self->{deleted}->{$key};
531
532 defined $self->{db} or $self->open;
533 if ($USE_RECNO) {
534 $self->unpack($self->{db}->[$key]);
535 } else {
536 $self->unpack($self->{db}->{$key});
537 }
538 }
539
540 sub delete_by_key {
541 my $self = shift;
542 my $key = shift;
543
544 unless ($key) {
545 Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
546 return;
547 }
548
549 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
550 my %tuple = $self->fetch($key);
551 for (values %{$self->{indexes}}) {
552 $_->delete($key, %tuple);
553 }
554 if (defined $self->{inverted}) {
555 # User *must* provide the full record for this or the entries
556 # in the inverted index will not be removed
557 %tuple = (%tuple, @_);
558 my $att;
559 for $att (keys %{$self->{inverted}}) {
560 if (defined $tuple{$att}) {
561 map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
562 }
563 }
564 }
565 # warn "setting key[$key] deleted during delete_by_key";
566 ++$self->{deleted}->{$key};
567 }
568
569 sub delete {
570 my $self = shift;
571 my $tkey = $self->have(@_);
572 # warn "tkey[$tkey]\@_[@_]";
573 defined $tkey && $self->delete_by_key($tkey, @_);
574 }
575
576 sub unpack {
577 my($self, $tuple) = @_;
578
579 unless (defined $tuple){
580 # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
581 warn("Debug: somebody called unpack without argument tuple!");
582 return;
583 }
584
585 my $att;
586 my @result;
587 my @tuple = split /$;/, $tuple;
588
589 for $att (@{$self->{attr}}) {
590 push @result, $att, shift @tuple;
591 }
592 @result;
593 }
594
595 sub set {
596 my ($self, $iattr, $value) = @_;
597
598 unless ($self->{write_lock}){
599 warn "Cannot set iattr[$iattr] without write lock. Nothing done";
600 return;
601 }
602
603 # in the rare case that they haven't written a single record yet, we
604 # make sure, the inverted inherits our $self->{mode}:
605 defined $self->{db} or $self->open;
606
607 for my $att (keys %{$self->{inverted}}) {
608 require WAIT::InvertedIndex;
609 if ($^V gt v5.003) { # avoid bug in perl up to 5.003_05
610 my $idx;
611 for $idx (@{$self->{inverted}->{$att}}) {
612 $idx->set($iattr, $value);
613 }
614 } else {
615 map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
616 }
617 }
618
619 1;
620 }
621
622 sub close {
623 my $self = shift;
624
625 if (exists $self->{'access'}) {
626 eval {$self->{'access'}->close}; # dont bother if not opened
627 }
628 if ($WAIT::Index::VERSION) {
629 for (values %{$self->{indexes}}) {
630 $_->close();
631 }
632 }
633 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
634 # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
635 # if WAIT::InvertedIndex has not been loaded, they cannot have
636 # been altered so far
637 my $att;
638 for $att (keys %{$self->{inverted}}) {
639 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
640 my $idx;
641 for $idx (@{$self->{inverted}->{$att}}) {
642 $idx->close;
643 }
644 } else {
645 map $_->close(), @{$self->{inverted}->{$att}};
646 }
647 }
648 }
649 if ($self->{dbh}) {
650 delete $self->{dbh};
651
652 if ($USE_RECNO) {
653 untie @{$self->{db}};
654 } else {
655 untie %{$self->{db}};
656 }
657 delete $self->{db};
658 }
659
660 $self->unlock;
661
662 1;
663 }
664
665 # Locking
666 #
667 # We allow multiple readers to coexists. But write access excludes
668 # all read access and vice versa. In practice read access on tables
669 # open for writing will mostly work ;-)
670
671 # If a "write" lock is requested, an existing "read" lock will be
672 # released. If a "read" lock ist requested, an existing "write" lock
673 # will be released. Requiring a lock already hold has no effect.
674
675 sub getlock {
676 my ($self, $mode) = @_;
677
678 # autoclean cleans on DESTROY, stale sends SIGZERO to the owner
679 #
680 my $lockmgr = LockFile::Simple->make(-autoclean => 1, -stale => 1);
681 my $file = $self->{file};
682 my $lockdir = $self->{file} . '.read';
683
684 unless (-d $lockdir) {
685 mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
686 }
687
688 if ($mode & O_RDWR) { # Get a write lock. Release it again
689 # and die if there is any valid
690 # readers.
691
692 # Have a write lock already
693 return $self if $self->{write_lock};
694
695 if ($self->{read_lock}) { # We are a becoming a writer now. So
696 # we release the read lock to avoid
697 # blocking ourselves.
698 $self->{read_lock}->release;
699 delete $self->{read_lock};
700 }
701
702 # Get the preliminary write lock
703 $self->{write_lock} = $lockmgr->lock($self->{file} . '.write')
704 or die "Can't lock '$self->{file}.write'";
705
706 # If we actually want to write we must check if there are any
707 # readers. The write lock is confirmed if wen cannot find any
708 # valid readers.
709
710 local *DIR;
711 opendir DIR, $lockdir or
712 die "Could not opendir '$lockdir': $!";
713 for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
714 # Check if the locks are still valid. Since we are protected by
715 # a write lock, we could use a plain file. But we want to use
716 # the stale testing from LockFile::Simple.
717 if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
718 warn "Removing stale lockfile '$lockdir/$lockfile'";
719 $lck->release;
720 } else { # Found an active reader, rats!
721 $self->{write_lock}->release;
722 die "Cannot write table '$file' while it's in use";
723 }
724 }
725 closedir DIR;
726 } else {
727 # Have a read lock already
728 return $self if $self->{read_lock};
729
730 # Get the preliminary write lock to protect the directory
731 # operations.
732
733 my $write_lock = $lockmgr->lock($self->{file} . '.read/write')
734 or die "Can't lock '$self->{file}.read/write'";
735
736 # Find a new read slot. Maybe the plain file would be better?
737 my $id = time;
738 while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
739 $id++;
740 }
741
742 $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
743 or die "Can't lock '$lockdir/$id'";
744
745 # We are a reader now. So we release the write lock
746 $write_lock->release;
747 }
748 return $self;
749 }
750
751 sub unlock {
752 my $self = shift;
753
754 # Either we have a read or a write lock (or we close the table already)
755 # unless ($self->{read_lock} || $self->{write_lock}) {
756 # warn "WAIT::Table::unlock: Table aparently hold's no lock"
757 # }
758 if ($self->{write_lock}) {
759 $self->{write_lock}->release();
760 delete $self->{write_lock};
761 }
762 if ($self->{read_lock}) {
763 $self->{read_lock}->release();
764 delete $self->{read_lock};
765 }
766
767 }
768
769 sub DESTROY {
770 my $self = shift;
771
772 if ($self->{write_lock} || $self->{read_lock}) {
773 warn "Table handle destroyed without closing it first";
774 $self->unlock;
775 }
776 }
777
778 sub open_scan {
779 my $self = shift;
780 my $code = shift;
781
782 $self->{dbh} or $self->open;
783 require WAIT::Scan;
784 new WAIT::Scan $self, $self->{nextk}-1, $code;
785 }
786
787 sub open_index_scan {
788 my $self = shift;
789 my $attr = shift;
790 my $code = shift;
791 my $name = join '-', @$attr;
792
793 if (defined $self->{indexes}->{$name}) {
794 $self->{indexes}->{$name}->open_scan($code);
795 } else {
796 croak "No such index '$name'";
797 }
798 }
799
800 eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
801
802 sub prefix {
803 my ($self , $attr, $prefix) = @_;
804 my %result;
805
806 defined $self->{db} or $self->open; # require layout
807
808 for (@{$self->{inverted}->{$attr}}) {
809 my $result = $_->prefix($prefix);
810 if (defined $result) {
811 $result{$_->name} = $result;
812 }
813 }
814 bless \%result, 'WAIT::Query::Raw';
815 }
816
817 sub intervall {
818 my ($self, $attr, $lb, $ub) = @_;
819 my %result;
820
821 defined $self->{db} or $self->open; # require layout
822
823 for (@{$self->{inverted}->{$attr}}) {
824 my $result = $_->intervall($lb, $ub);
825 if (defined $result) {
826 $result{$_->name} = $result;
827 }
828 }
829 bless \%result, 'WAIT::Query::Raw';
830 }
831
832 sub search {
833 my $self = shift;
834 my ($query, $attr, $cont, $raw);
835 if (ref $_[0]) {
836 $query = shift;
837
838 $attr = $query->{attr};
839 $cont = $query->{cont};
840 $raw = $query->{raw};
841 } else {
842 require Carp;
843 Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
844 $attr = shift;
845 $cont = shift;
846 $raw = shift;
847 $query = {
848 attr => $attr,
849 cont => $cont,
850 raw => $raw,
851 };
852 }
853
854 my %result;
855
856 defined $self->{db} or $self->open; # require layout
857
858 if ($raw) {
859 for (@{$self->{inverted}->{$attr}}) {
860 my $name = $_->name;
861 if (exists $raw->{$name} and @{$raw->{$name}}) {
862 my $scale = 1/scalar(@{$raw->{$name}});
863 my %r = $_->search_raw($query, @{$raw->{$name}});
864 my ($key, $val);
865 while (($key, $val) = each %r) {
866 if (exists $result{$key}) {
867 $result{$key} += $val*$scale;
868 } else {
869 $result{$key} = $val*$scale;
870 }
871 }
872 }
873 }
874 }
875 if (defined $cont and $cont ne '') {
876 for (@{$self->{inverted}->{$attr}}) {
877 my %r = $_->search($query, $cont);
878 my ($key, $val);
879 while (($key, $val) = each %r) {
880 if (exists $result{$key}) {
881 $result{$key} += $val;
882 } else {
883 $result{$key} = $val;
884 }
885 }
886 }
887 }
888 # sanity check for deleted documents.
889 # this should not be necessary !@#$
890 for (keys %result) {
891 delete $result{$_} if $self->{deleted}->{$_}
892 }
893 %result;
894 }
895
896 sub hilight_positions {
897 my ($self, $attr, $text, $query, $raw) = @_;
898 my %pos;
899
900 if (defined $raw) {
901 for (@{$self->{inverted}->{$attr}}) { # objects of type
902 # WAIT::InvertedIndex for
903 # this index field $attr
904 my $name = $_->name;
905 if (exists $raw->{$name}) {
906 my %qt;
907 grep $qt{$_}++, @{$raw->{$name}};
908 for ($_->parse_pos($text)) {
909 if (exists $qt{$_->[0]}) {
910 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
911 }
912 }
913 }
914 }
915 }
916 if (defined $query) {
917 for (@{$self->{inverted}->{$attr}}) {
918 my %qt;
919
920 grep $qt{$_}++, $_->parse($query);
921 for ($_->parse_pos($text)) {
922 if (exists $qt{$_->[0]}) {
923 if (exists $pos{$_->[1]}) { # perl -w ;-)
924 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
925 } else {
926 $pos{$_->[1]} = length($_->[0]);
927 }
928 }
929 }
930 }
931 }
932
933 \%pos;
934 }
935
936 sub hilight {
937 my ($tb, $buf, $qplain, $qraw) = @_;
938 my $layout = $tb->layout();
939
940 my @result;
941
942 $qplain ||= {};
943 $qraw ||= {};
944 my @ttxt = $layout->tag($buf);
945 while (@ttxt) {
946 no strict 'refs';
947 my %tag = %{shift @ttxt};
948 my $txt = shift @ttxt;
949 my $fld;
950
951 my %hl;
952 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
953 my $hp = $tb->hilight_positions($fld, $txt,
954 $qplain->{$fld}, $qraw->{$fld});
955 for (keys %$hp) {
956 if (exists $hl{$_}) { # -w ;-(
957 $hl{$_} = max($hl{$_}, $hp->{$_});
958 } else {
959 $hl{$_} = $hp->{$_};
960 }
961 }
962 }
963 my $pos;
964 my $qt = {_qt => 1, %tag};
965 my $pl = \%tag;
966 my $last = length($txt);
967 my @tmp;
968 for $pos (sort {$b <=> $a} keys %hl) {
969 unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
970 unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
971 $last = $pos;
972 }
973 push @result, $pl, substr($txt,0,$last);
974 push @result, @tmp;
975 }
976 @result; # no speed necessary
977 }
978
979 1;

Properties

Name Value
cvs2svn:cvs-rev 1.10

  ViewVC Help
Powered by ViewVC 1.1.26