/[wait]/cvs-head/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /cvs-head/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 31 - (show annotations)
Sun Nov 12 01:26:10 2000 UTC (23 years, 5 months ago) by laperla
File size: 23764 byte(s)
Table loads Index and doesn't load InvertedIndex anymore

1 # -*- Mode: Cperl -*-
2 # Table.pm --
3 # ITIID : $ITI$ $Header $__Header$
4 # Author : Ulrich Pfeifer
5 # Created On : Thu Aug 8 13:05:10 1996
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Fri May 19 14:51:14 2000
8 # Language : CPerl
9 # Update Count : 133
10 # Status : Unknown, Use with caution!
11 #
12 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 #
14
15 =head1 NAME
16
17 WAIT::Table -- Module for maintaining Tables / Relations
18
19 =head1 SYNOPSIS
20
21 require WAIT::Table;
22
23 =head1 DESCRIPTION
24
25 =cut
26
27 package WAIT::Table;
28
29 use WAIT::Table::Handle ();
30 require WAIT::Parse::Base;
31
32 use strict;
33 use Carp;
34 # use autouse Carp => qw( croak($) );
35 use DB_File;
36 use Fcntl;
37 use LockFile::Simple ();
38
39 my $USE_RECNO = 0;
40
41 =head2 Creating a Table.
42
43 The constructor WAIT::Table-E<gt>new is normally called via the
44 create_table method of a database handle. This is not enforced, but
45 creating a table does not make any sense unless the table is
46 registered by the database because the latter implements persistence
47 of the meta data. Registering is done automatically by letting the
48 database handle the creation of a table.
49
50 my $db = WAIT::Database->create(name => 'sample');
51 my $tb = $db->create_table(name => 'test',
52 access => $access,
53 layout => $layout,
54 attr => ['docid', 'headline'],
55 );
56
57 The constructor returns a handle for the table. This handle is hidden by the
58 table module, to prevent direct access if called via Table.
59
60 =over 10
61
62 =item C<access> => I<accessobj>
63
64 A reference to an access object for the external parts (attributes) of
65 tuples. As you may remember, the WAIT System does not enforce that
66 objects are completely stored inside the system to avoid duplication.
67 There is no (strong) point in storing all your HTML documents inside
68 the system when indexing your WWW-Server.
69
70 The access object is designed to work like as a tied hash. You pass
71 the refernce to the object, not the tied hash though. An example
72 implementation of an access class that works for manpages is
73 WAIT::Document::Nroff.
74
75 The implementation needs to take into account that WAIT will keep this
76 object in a Data::Dumper or Storable database and re-use it when sman
77 is run. So it is not good enough if we can produce the index with it
78 now, when we create or actively access the table, WAIT also must be
79 able to retrieve documents on its own, when we are in a different
80 context. This happens specifically in a retrieval. To get this working
81 seemlessly, the access-defining class must implement a close method.
82 This method will be called before the Data::Dumper dump takes place.
83 In that moment the access-defining class must get rid of all data
84 structures that cannot be reconstructed via the Data::Dumper dump,
85 such as database handles or C pointers.
86
87 =item C<file> => I<fname>
88
89 The filename of the records file. Files for indexes will have I<fname>
90 as prefix. I<Mandatory>, but usually taken care of by the
91 WAIT::Database handle when the constructor is called via
92 WAIT::Database::create_table().
93
94 =item C<name> => I<name>
95
96 The name of this table. I<Mandatory>
97
98 =item C<attr> => [ I<attr> ... ]
99
100 A reference to an array of attribute names. WAIT will keep the
101 contents of these attributes in its table. I<Mandatory>
102
103 =item C<djk> => [ I<attr> ... ]
104
105 A reference to an array of attribute names which make up the
106 I<disjointness key>. Don't think about it - it's of no use yet;
107
108 =item C<layout> => I<layoutobj>
109
110 A reference to an external parser object. Defaults to a new instance
111 of C<WAIT::Parse::Base>. For an example implementation see
112 WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113 class if you so like.
114
115 =item C<keyset> => I<keyset>
116
117 The set of attributes needed to identify a record. Defaults to all
118 attributes.
119
120 =item C<invindex> => I<inverted index>
121
122 A reference to an anon array defining attributes of each record that
123 need to be indexed. See the source of smakewhatis for how to set this
124 up.
125
126 =back
127
128 =cut
129
130 sub new {
131 my $type = shift;
132 my %parm = @_;
133 my $self = {};
134
135 # Check for mandatory attrs early
136 $self->{name} = $parm{name} or croak "No name specified";
137 $self->{attr} = $parm{attr} or croak "No attributes specified";
138
139 # Do that before we eventually add '_weight' to attributes.
140 $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141
142 $self->{mode} = O_CREAT | O_RDWR;
143
144 # Determine and set up subclass
145 $type = ref($type) || $type;
146 if (defined $parm{djk}) {
147 if (@{$parm{djk}} == @{$parm{attr}}) {
148 # All attributes in DK (sloppy test here!)
149 $type .= '::Independent';
150 require WAIT::Table::Independent;
151 } else {
152 $type .= '::Disjoint';
153 require WAIT::Table::Disjoint;
154 }
155 # Add '_weight' to attributes
156 my %attr;
157 @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158 unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159 }
160
161 $self->{file} = $parm{file} or croak "No file specified";
162 if (-d $self->{file}){
163 warn "Warning: Directory '$self->{file}' already exists\n";
164 } elsif (!mkdir($self->{file}, 0775)) {
165 croak "Could not 'mkdir $self->{file}': $!\n";
166 }
167
168 my $lockmgr = LockFile::Simple->make(-autoclean => 1);
169 # aquire a write lock
170 $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
171 or die "Can't lock '$self->{file}/write'";
172
173 $self->{djk} = $parm{djk} if defined $parm{djk};
174 $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
175 $self->{access} = $parm{access} if defined $parm{access};
176 $self->{nextk} = 1; # next record to insert; first record unused
177 $self->{deleted} = {}; # no deleted records yet
178 $self->{indexes} = {};
179
180 bless $self, $type;
181 # Call create_index() and create_index() for compatibility
182 for (@{$self->{keyset}||[]}) {
183 #carp "Specification of indexes at table create time is deprecated";
184 $self->create_index(@$_);
185 }
186 while (@{$parm{invindex}||[]}) {
187 # carp "Specification of inverted indexes at table create time is deprecated";
188 my $att = shift @{$parm{invindex}};
189 my @spec = @{shift @{$parm{invindex}}};
190 my @opt;
191
192 if (ref($spec[0])) {
193 carp "Secondary pipelines are deprecated\n";
194 @opt = %{shift @spec};
195 }
196 $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
197 }
198
199 $self;
200 # end of backwarn compatibility stuff
201 }
202
203 =head2 Creating an index
204
205 $tb->create_index('docid');
206
207 =item C<create_index>
208
209 must be called with a list of attributes. This must be a subset of the
210 attributes specified when the table was created. Currently this
211 method must be called before the first tuple is inserted in the
212 table!
213
214 =cut
215
216 sub create_index {
217 my $self= shift;
218
219 croak "Cannot create index for table aready populated"
220 if $self->{nextk} > 1;
221
222 require WAIT::Index;
223
224 my $name = join '-', @_;
225 $self->{indexes}->{$name} =
226 new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
227 }
228
229 =head2 Creating an inverted index
230
231 $tb->create_inverted_index
232 (attribute => 'au',
233 pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
234 predicate => 'plain',
235 );
236
237 =over 5
238
239 =item C<attribute>
240
241 The attribute to build the index on. This attribute may not be in the
242 set attributes specified when the table was created.
243
244 =item C<pipeline>
245
246 A piplines specification is a reference to an array of method names
247 (from package C<WAIT::Filter>) which are to be applied in sequence to
248 the contents of the named attribute. The attribute name may not be in
249 the attribute list.
250
251 =item C<predicate>
252
253 An indication which predicate the index implements. This may be
254 e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
255 query processing. Currently there is no standard set of predicate
256 names. The predicate defaults to the last member of the pipeline if
257 omitted.
258
259 =back
260
261 Currently this method must be called before the first tuple is
262 inserted in the table!
263
264 =cut
265
266 sub create_inverted_index {
267 my $self = shift;
268 my %parm = @_;
269
270 croak "No attribute specified" unless $parm{attribute};
271 croak "No pipeline specified" unless $parm{pipeline};
272
273 $parm{predicate} ||= $parm{pipeline}->[-1];
274
275 croak "Cannot create index for table aready populated"
276 if $self->{nextk} > 1;
277
278 require WAIT::InvertedIndex;
279
280 # backward compatibility stuff
281 my %opt = %parm;
282 for (qw(attribute pipeline predicate)) {
283 delete $opt{$_};
284 }
285
286 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
287 my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
288 filter => [@{$parm{pipeline}}], # clone
289 name => $name,
290 attr => $parm{attribute},
291 %opt, # backward compatibility stuff
292 );
293 # We will have to use $parm{predicate} here
294 push @{$self->{inverted}->{$parm{attribute}}}, $idx;
295 }
296
297 sub dir {
298 $_[0]->{file};
299 }
300
301 =head2 C<$tb-E<gt>layout>
302
303 Returns the reference to the associated parser object.
304
305 =cut
306
307 sub layout { $_[0]->{layout} }
308
309 =head2 C<$tb-E<gt>fields>
310
311 Returns the array of attribute names.
312
313 =cut
314
315
316 sub fields { keys %{$_[0]->{inverted}}}
317
318 =head2 C<$tb-E<gt>drop>
319
320 Must be called via C<WAIT::Database::drop_table>
321
322 =cut
323
324 sub drop {
325 my $self = shift;
326 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
327 $self->close; # just make sure
328 my $file = $self->{file};
329
330 for (values %{$self->{indexes}}) {
331 $_->drop;
332 }
333 unlink "$file/records";
334 # $self->unlock;
335 ! (!-e $file or rmdir $file);
336 } else {
337 croak ref($self)."::drop called directly";
338 }
339 }
340
341 sub mrequire ($) {
342 my $module = shift;
343
344 $module =~ s{::}{/}g;
345 $module .= '.pm';
346 require $module;
347 }
348
349 sub open {
350 my $self = shift;
351 my $file = $self->{file} . '/records';
352
353 mrequire ref($self); # that's tricky eh?
354 if (defined $self->{'layout'}) {
355 mrequire ref($self->{'layout'});
356 }
357 if (defined $self->{'access'}) {
358 mrequire ref($self->{'access'});
359 }
360 if (exists $self->{indexes}) {
361 require WAIT::Index;
362 for (values %{$self->{indexes}}) {
363 $_->{mode} = $self->{mode};
364 }
365 }
366 if (exists $self->{inverted}) {
367 my ($att, $idx);
368 for $att (keys %{$self->{inverted}}) {
369 for $idx (@{$self->{inverted}->{$att}}) {
370 $idx->{mode} = $self->{mode};
371 }
372 }
373 require WAIT::InvertedIndex;
374 }
375 unless (defined $self->{dbh}) {
376 if ($USE_RECNO) {
377 $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,
378 $self->{mode}, 0664, $DB_RECNO);
379 } else {
380 $self->{dbh} =
381 tie(%{$self->{db}}, 'DB_File', $file,
382 $self->{mode}, 0664, $DB_BTREE);
383 }
384 }
385
386 # Locking
387 #
388 # We allow multiple readers to coexists. But write access excludes
389 # all read access vice versa. In practice read access on tables
390 # open for writing will mostly work ;-)
391
392 my $lockmgr = LockFile::Simple->make(-autoclean => 1);
393
394 # aquire a write lock. We might hold one acquired in create() already
395 $self->{write_lock} ||= $lockmgr->lock($self->{file} . '/write')
396 or die "Can't lock '$self->{file}/write'";
397
398 my $lockdir = $self->{file} . '/read';
399 unless (-d $lockdir) {
400 mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
401 }
402
403 if ($self->{mode} & O_RDWR) {
404 # this is a hack. We do not check for reopening ...
405 return $self if $self->{write_lock};
406
407 # If we actually want to write we must check if there are any readers
408 local *DIR;
409 opendir DIR, $lockdir or
410 die "Could not opendir '$lockdir': $!";
411 for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
412 # check if the locks are still valid.
413 # Since we are protected by a write lock, we could use a pline file.
414 # But we want to use the stale testing from LockFile::Simple.
415 if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
416 warn "Removing stale lockfile '$lockdir/$lockfile'";
417 $lck->release;
418 } else {
419 $self->{write_lock}->release;
420 die "Cannot write table '$file' while it's in use";
421 }
422 }
423 closedir DIR;
424 } else {
425 # this is a hack. We do not check for reopening ...
426 return $self if $self->{read_lock};
427
428 # We are a reader. So we release the write lock
429 my $id = time;
430 while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
431 $id++;
432 }
433 $self->{read_lock} = $lockmgr->lock("$lockdir/$id");
434 $self->{write_lock}->release;
435 delete $self->{write_lock};
436 }
437
438 $self;
439 }
440
441 sub fetch_extern {
442 my $self = shift;
443
444 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
445 if (exists $self->{'access'}) {
446 mrequire ref($self->{'access'});
447 $self->{'access'}->FETCH(@_);
448 }
449 }
450
451 sub fetch_extern_by_id {
452 my $self = shift;
453
454 $self->fetch_extern($self->fetch(@_));
455 }
456
457 sub _find_index {
458 my $self = shift;
459 my (@att) = @_;
460 my %att;
461 my $name;
462
463 @att{@att} = @att;
464
465 KEY: for $name (keys %{$self->{indexes}}) {
466 my @iat = split /-/, $name;
467 for (@iat) {
468 next KEY unless exists $att{$_};
469 }
470 return $self->{indexes}->{$name};
471 }
472 return undef;
473 }
474
475 sub have {
476 my $self = shift;
477 my %parm = @_;
478
479 my $index = $self->_find_index(keys %parm) or return; # no index-no have
480
481 defined $self->{db} or $self->open;
482 return $index->have(@_);
483 }
484
485 sub insert {
486 my $self = shift;
487 my %parm = @_;
488
489 defined $self->{db} or $self->open;
490
491 # We should move all writing methods to a subclass to check only once
492 $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
493
494 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
495 my $key;
496 my @deleted = keys %{$self->{deleted}};
497 my $gotkey = 0;
498
499 if (@deleted) {
500 $key = pop @deleted;
501 delete $self->{deleted}->{$key};
502 # Sanity check
503 if ($key && $key>0) {
504 $gotkey=1;
505 } else {
506 warn(sprintf("WAIT database inconsistency during insert ".
507 "key[%s]: Please rebuild index\n",
508 $key
509 ));
510 }
511 }
512 unless ($gotkey) {
513 $key = $self->{nextk}++;
514 }
515 if ($USE_RECNO) {
516 $self->{db}->[$key] = $tuple;
517 } else {
518 $self->{db}->{$key} = $tuple;
519 }
520 for (values %{$self->{indexes}}) {
521 unless ($_->insert($key, %parm)) {
522 # duplicate key, undo changes
523 if ($key == $self->{nextk}-1) {
524 $self->{nextk}--;
525 } else {
526 # warn "setting key[$key] deleted during insert";
527 $self->{deleted}->{$key}=1;
528 }
529 my $idx;
530 for $idx (values %{$self->{indexes}}) {
531 last if $idx eq $_;
532 $idx->remove($key, %parm);
533 }
534 return undef;
535 }
536 }
537 if (defined $self->{inverted}) {
538 my $att;
539 for $att (keys %{$self->{inverted}}) {
540 if (defined $parm{$att}) {
541 map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
542 #map $_->sync, @{$self->{inverted}->{$att}}
543 }
544 }
545 }
546 $key
547 }
548
549 sub sync {
550 my $self = shift;
551
552 for (values %{$self->{indexes}}) {
553 map $_->sync, $_;
554 }
555 if (defined $self->{inverted}) {
556 my $att;
557 for $att (keys %{$self->{inverted}}) {
558 map $_->sync, @{$self->{inverted}->{$att}}
559 }
560 }
561 }
562
563 sub fetch {
564 my $self = shift;
565 my $key = shift;
566
567 return () if exists $self->{deleted}->{$key};
568
569 defined $self->{db} or $self->open;
570 if ($USE_RECNO) {
571 $self->unpack($self->{db}->[$key]);
572 } else {
573 $self->unpack($self->{db}->{$key});
574 }
575 }
576
577 sub delete_by_key {
578 my $self = shift;
579 my $key = shift;
580
581 unless ($key) {
582 Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
583 return;
584 }
585
586 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
587 my %tuple = $self->fetch($key);
588 for (values %{$self->{indexes}}) {
589 $_->delete($key, %tuple);
590 }
591 if (defined $self->{inverted}) {
592 # User *must* provide the full record for this or the entries
593 # in the inverted index will not be removed
594 %tuple = (%tuple, @_);
595 my $att;
596 for $att (keys %{$self->{inverted}}) {
597 if (defined $tuple{$att}) {
598 map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
599 }
600 }
601 }
602 # warn "setting key[$key] deleted during delete_by_key";
603 ++$self->{deleted}->{$key};
604 }
605
606 sub delete {
607 my $self = shift;
608 my $tkey = $self->have(@_);
609 # warn "tkey[$tkey]\@_[@_]";
610 defined $tkey && $self->delete_by_key($tkey, @_);
611 }
612
613 sub unpack {
614 my($self, $tuple) = @_;
615
616 unless (defined $tuple){
617 # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
618 warn("Debug: somebody called unpack without argument tuple!");
619 return;
620 }
621
622 my $att;
623 my @result;
624 my @tuple = split /$;/, $tuple;
625
626 for $att (@{$self->{attr}}) {
627 push @result, $att, shift @tuple;
628 }
629 @result;
630 }
631
632 sub set {
633 my ($self, $iattr, $value) = @_;
634
635 unless ($self->{write_lock}){
636 warn "Cannot set iattr[$iattr] without write lock. Nothing done";
637 return;
638 }
639 for my $att (keys %{$self->{inverted}}) {
640 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
641 my $idx;
642 for $idx (@{$self->{inverted}->{$att}}) {
643 $idx->set($iattr, $value);
644 }
645 } else {
646 map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
647 }
648 }
649
650 1;
651 }
652
653 sub close {
654 my $self = shift;
655
656 if (exists $self->{'access'}) {
657 eval {$self->{'access'}->close}; # dont bother if not opened
658 }
659 if ($WAIT::Index::VERSION) {
660 for (values %{$self->{indexes}}) {
661 $_->close();
662 }
663 }
664 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
665 # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
666 # if WAIT::InvertedIndex has not been loaded, they cannot have
667 # been altered so far
668 my $att;
669 for $att (keys %{$self->{inverted}}) {
670 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
671 my $idx;
672 for $idx (@{$self->{inverted}->{$att}}) {
673 $idx->close;
674 }
675 } else {
676 map $_->close(), @{$self->{inverted}->{$att}};
677 }
678 }
679 }
680 if ($self->{dbh}) {
681 delete $self->{dbh};
682
683 if ($USE_RECNO) {
684 untie @{$self->{db}};
685 } else {
686 untie %{$self->{db}};
687 }
688 delete $self->{db};
689 }
690
691 $self->unlock;
692
693 1;
694 }
695
696 sub unlock {
697 my $self = shift;
698
699 # Either we have a read or a write lock (or we close the table already)
700 # unless ($self->{read_lock} || $self->{write_lock}) {
701 # warn "WAIT::Table::unlock: Table aparently hold's no lock"
702 # }
703 if ($self->{write_lock}) {
704 $self->{write_lock}->release();
705 delete $self->{write_lock};
706 }
707 if ($self->{read_lock}) {
708 $self->{read_lock}->release();
709 delete $self->{read_lock};
710 }
711
712 }
713
714 sub DESTROY {
715 my $self = shift;
716
717 warn "Table handle destroyed without closing it first"
718 if $self->{write_lock} || $self->{read_lock};
719 }
720
721 sub open_scan {
722 my $self = shift;
723 my $code = shift;
724
725 $self->{dbh} or $self->open;
726 require WAIT::Scan;
727 new WAIT::Scan $self, $self->{nextk}-1, $code;
728 }
729
730 sub open_index_scan {
731 my $self = shift;
732 my $attr = shift;
733 my $code = shift;
734 my $name = join '-', @$attr;
735
736 if (defined $self->{indexes}->{$name}) {
737 $self->{indexes}->{$name}->open_scan($code);
738 } else {
739 croak "No such index '$name'";
740 }
741 }
742
743 eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
744
745 sub prefix {
746 my ($self , $attr, $prefix) = @_;
747 my %result;
748
749 defined $self->{db} or $self->open; # require layout
750
751 for (@{$self->{inverted}->{$attr}}) {
752 my $result = $_->prefix($prefix);
753 if (defined $result) {
754 $result{$_->name} = $result;
755 }
756 }
757 bless \%result, 'WAIT::Query::Raw';
758 }
759
760 sub intervall {
761 my ($self, $attr, $lb, $ub) = @_;
762 my %result;
763
764 defined $self->{db} or $self->open; # require layout
765
766 for (@{$self->{inverted}->{$attr}}) {
767 my $result = $_->intervall($lb, $ub);
768 if (defined $result) {
769 $result{$_->name} = $result;
770 }
771 }
772 bless \%result, 'WAIT::Query::Raw';
773 }
774
775 sub search {
776 my $self = shift;
777 my ($query, $attr, $cont, $raw);
778 if (ref $_[0]) {
779 $query = shift;
780
781 $attr = $query->{attr};
782 $cont = $query->{cont};
783 $raw = $query->{raw};
784 } else {
785 require Carp;
786 Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
787 $attr = shift;
788 $cont = shift;
789 $raw = shift;
790 $query = {
791 attr => $attr,
792 cont => $cont,
793 raw => $raw,
794 };
795 }
796
797 my %result;
798
799 defined $self->{db} or $self->open; # require layout
800
801 if ($raw) {
802 for (@{$self->{inverted}->{$attr}}) {
803 my $name = $_->name;
804 if (exists $raw->{$name} and @{$raw->{$name}}) {
805 my $scale = 1/scalar(@{$raw->{$name}});
806 my %r = $_->search_raw($query, @{$raw->{$name}});
807 my ($key, $val);
808 while (($key, $val) = each %r) {
809 if (exists $result{$key}) {
810 $result{$key} += $val*$scale;
811 } else {
812 $result{$key} = $val*$scale;
813 }
814 }
815 }
816 }
817 }
818 if (defined $cont and $cont ne '') {
819 for (@{$self->{inverted}->{$attr}}) {
820 my %r = $_->search($query, $cont);
821 my ($key, $val);
822 while (($key, $val) = each %r) {
823 if (exists $result{$key}) {
824 $result{$key} += $val;
825 } else {
826 $result{$key} = $val;
827 }
828 }
829 }
830 }
831 # sanity check for deleted documents.
832 # this should not be necessary !@#$
833 for (keys %result) {
834 delete $result{$_} if $self->{deleted}->{$_}
835 }
836 %result;
837 }
838
839 sub hilight_positions {
840 my ($self, $attr, $text, $query, $raw) = @_;
841 my %pos;
842
843 if (defined $raw) {
844 for (@{$self->{inverted}->{$attr}}) { # objects of type
845 # WAIT::InvertedIndex for
846 # this index field $attr
847 my $name = $_->name;
848 if (exists $raw->{$name}) {
849 my %qt;
850 grep $qt{$_}++, @{$raw->{$name}};
851 for ($_->parse_pos($text)) {
852 if (exists $qt{$_->[0]}) {
853 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
854 }
855 }
856 }
857 }
858 }
859 if (defined $query) {
860 for (@{$self->{inverted}->{$attr}}) {
861 my %qt;
862
863 grep $qt{$_}++, $_->parse($query);
864 for ($_->parse_pos($text)) {
865 if (exists $qt{$_->[0]}) {
866 if (exists $pos{$_->[1]}) { # perl -w ;-)
867 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
868 } else {
869 $pos{$_->[1]} = length($_->[0]);
870 }
871 }
872 }
873 }
874 }
875
876 \%pos;
877 }
878
879 sub hilight {
880 my ($tb, $buf, $qplain, $qraw) = @_;
881 my $layout = $tb->layout();
882
883 my @result;
884
885 $qplain ||= {};
886 $qraw ||= {};
887 my @ttxt = $layout->tag($buf);
888 while (@ttxt) {
889 no strict 'refs';
890 my %tag = %{shift @ttxt};
891 my $txt = shift @ttxt;
892 my $fld;
893
894 my %hl;
895 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
896 my $hp = $tb->hilight_positions($fld, $txt,
897 $qplain->{$fld}, $qraw->{$fld});
898 for (keys %$hp) {
899 if (exists $hl{$_}) { # -w ;-(
900 $hl{$_} = max($hl{$_}, $hp->{$_});
901 } else {
902 $hl{$_} = $hp->{$_};
903 }
904 }
905 }
906 my $pos;
907 my $qt = {_qt => 1, %tag};
908 my $pl = \%tag;
909 my $last = length($txt);
910 my @tmp;
911 for $pos (sort {$b <=> $a} keys %hl) {
912 unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
913 unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
914 $last = $pos;
915 }
916 push @result, $pl, substr($txt,0,$last);
917 push @result, @tmp;
918 }
919 @result; # no speed necessary
920 }
921
922 1;

Properties

Name Value
cvs2svn:cvs-rev 1.3

  ViewVC Help
Powered by ViewVC 1.1.26