/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 116 - (show annotations)
Wed Jul 14 09:48:26 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 23715 byte(s)
more fixes, more debug

1 # -*- Mode: Cperl -*-
2 # Table.pm --
3 # ITIID : $ITI$ $Header $__Header$
4 # Author : Ulrich Pfeifer
5 # Created On : Thu Aug 8 13:05:10 1996
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Wed Jan 23 14:15:15 2002
8 # Language : CPerl
9 # Update Count : 152
10 # Status : Unknown, Use with caution!
11 #
12 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 #
14
15 =head1 NAME
16
17 WAIT::Table -- Module for maintaining Tables / Relations
18
19 =head1 SYNOPSIS
20
21 require WAIT::Table;
22
23 =head1 DESCRIPTION
24
25 =cut
26
27 package WAIT::Table;
28 our $VERSION = "2.000";
29
30 use WAIT::Table::Handle ();
31 require WAIT::Parse::Base;
32
33 use strict;
34 use Carp qw(cluck croak confess);
35 # use autouse Carp => qw( croak($) );
36 use BerkeleyDB;
37 use Fcntl;
38
39 =head2 Creating a Table.
40
41 The constructor WAIT::Table-E<gt>new is normally called via the
42 create_table method of a database handle. This is not enforced, but
43 creating a table does not make any sense unless the table is
44 registered by the database because the latter implements persistence
45 of the meta data. Registering is done automatically by letting the
46 database handle the creation of a table.
47
48 my $db = WAIT::Database->create(name => 'sample');
49 my $tb = $db->create_table(name => 'test',
50 access => $access,
51 layout => $layout,
52 attr => ['docid', 'headline'],
53 );
54
55 The constructor returns a handle for the table. This handle is hidden by the
56 table module, to prevent direct access if called via Table.
57
58 =over 10
59
60 =item C<access> => I<accessobj>
61
62 A reference to an access object for the external parts (attributes) of
63 tuples. As you may remember, the WAIT System does not enforce that
64 objects are completely stored inside the system to avoid duplication.
65 There is no (strong) point in storing all your HTML documents inside
66 the system when indexing your WWW-Server.
67
68 The access object is designed to work like as a tied hash. You pass
69 the refernce to the object, not the tied hash though. An example
70 implementation of an access class that works for manpages is
71 WAIT::Document::Nroff.
72
73 The implementation needs to take into account that WAIT will keep this
74 object in a Data::Dumper or Storable database and re-use it when sman
75 is run. So it is not good enough if we can produce the index with it
76 now, when we create or actively access the table, WAIT also must be
77 able to retrieve documents on its own, when we are in a different
78 context. This happens specifically in a retrieval. To get this working
79 seemlessly, the access-defining class must implement a close method.
80 This method will be called before the Data::Dumper dump takes place.
81 In that moment the access-defining class must get rid of all data
82 structures that cannot be reconstructed via the Data::Dumper dump,
83 such as database handles or C pointers.
84
85 =item C<path> => I<dir>
86
87 The path to database. Files for indexes will have I<path>
88 as prefix. I<Mandatory>, but usually taken care of by the
89 WAIT::Database handle when the constructor is called via
90 WAIT::Database::create_table().
91
92 =item C<name> => I<name>
93
94 The name of this table. I<Mandatory>
95
96 =item C<attr> => [ I<attr> ... ]
97
98 A reference to an array of attribute names. WAIT will keep the
99 contents of these attributes in its table. I<Mandatory>
100
101 =item C<djk> => [ I<attr> ... ]
102
103 A reference to an array of attribute names which make up the
104 I<disjointness key>. Don't think about it - it's of no use yet;
105
106 =item C<layout> => I<layoutobj>
107
108 A reference to an external parser object. Defaults to a new instance
109 of C<WAIT::Parse::Base>. For an example implementation see
110 WAIT::Parse::Nroff. A layout class can be implemented as a singleton
111 class if you so like.
112
113 =item C<keyset> => I<keyset>
114
115 The set of attributes needed to identify a record. Defaults to all
116 attributes.
117
118 =item C<invindex> => I<inverted index>
119
120 A reference to an anon array defining attributes of each record that
121 need to be indexed. See the source of smakewhatis for how to set this
122 up.
123
124 =back
125
126 =cut
127
128 sub new {
129 my $type = shift;
130 my %parm = @_;
131 my $self = {};
132
133 # Check for mandatory attrs early
134 for my $x (qw(name attr env maindbfile tablename)) {
135 $self->{$x} = $parm{$x} or confess "No $x specified";
136 }
137
138 # Do that before we eventually add '_weight' to attributes.
139 $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
140
141 $self->{mode} = O_CREAT | O_RDWR;
142
143 # Determine and set up subclass
144 $type = ref($type) || $type;
145 if (defined $parm{djk}) {
146 if (@{$parm{djk}} == @{$parm{attr}}) {
147 # All attributes in DK (sloppy test here!)
148 $type .= '::Independent';
149 require WAIT::Table::Independent;
150 } else {
151 $type .= '::Disjoint';
152 require WAIT::Table::Disjoint;
153 }
154 # Add '_weight' to attributes
155 my %attr;
156 @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
157 unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
158 }
159
160 $self->{path} = $parm{path} or confess "No path specified";
161 bless $self, $type;
162
163 $self->{djk} = $parm{djk} if defined $parm{djk};
164 $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
165 $self->{access} = $parm{access} if defined $parm{access};
166 $self->{nextk} = 1; # next record to insert; first record unused
167 $self->{deleted} = {}; # no deleted records yet
168 $self->{indexes} = {};
169
170 # Checking for readers is not necessary, but let's go with the
171 # generic method.
172
173 # Call create_index() and create_index() for compatibility
174 for (@{$self->{keyset}||[]}) {
175 #carp "Specification of indexes at table create time is deprecated";
176 $self->create_index(@$_);
177 }
178 while (@{$parm{invindex}||[]}) {
179 # carp "Specification of inverted indexes at table create time is deprecated";
180 my $att = shift @{$parm{invindex}};
181 my @spec = @{shift @{$parm{invindex}}};
182 my @opt = ();
183
184 if (ref($spec[0])) {
185 warn "Secondary pipelines are deprecated";
186 @opt = %{shift @spec};
187 }
188 $self->create_inverted_index(attribute => $att,
189 pipeline => \@spec,
190 @opt);
191 }
192
193 $self;
194 # end of backwarn compatibility stuff
195 }
196
197 for my $accessor (qw(maindbfile tablename)) {
198 no strict 'refs';
199 *{$accessor} = sub {
200 my($self) = @_;
201 return $self->{$accessor} if $self->{$accessor};
202 require Carp;
203 Carp::confess("accessor $accessor not there");
204 }
205 }
206
207 =head2 Creating an index
208
209 $tb->create_index('docid');
210
211 C<create_index>
212 must be called with a list of attributes. This must be a subset of the
213 attributes specified when the table was created. Currently this
214 method must be called before the first tuple is inserted in the
215 table!
216
217 =cut
218
219 sub create_index {
220 my $self= shift;
221
222 confess "Cannot create index for table aready populated"
223 if $self->{nextk} > 1;
224
225 require WAIT::Index;
226
227 my $name = join '-', @_;
228 #### warn "WARNING: Suspect use of \$_ in method create_index. name[$name]_[$_]";
229 $self->{indexes}->{$name} =
230 WAIT::Index->new(
231 path => $self->path.'/'.$name,
232 subname => $name,
233 env => $self->{env},
234 maindbfile => $self->maindbfile,
235 tablename => $self->tablename,
236 attr => $_,
237 );
238 }
239
240 =head2 Creating an inverted index
241
242 $tb->create_inverted_index
243 (attribute => 'au',
244 pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
245 predicate => 'plain',
246 );
247
248 =over 5
249
250 =item C<attribute>
251
252 The attribute to build the index on. This attribute may not be in the
253 set attributes specified when the table was created.
254
255 =item C<pipeline>
256
257 A piplines specification is a reference to an array of method names
258 (from package C<WAIT::Filter>) which are to be applied in sequence to
259 the contents of the named attribute. The attribute name may not be in
260 the attribute list.
261
262 =item C<predicate>
263
264 An indication which predicate the index implements. This may be
265 e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
266 query processing. Currently there is no standard set of predicate
267 names. The predicate defaults to the last member of the pipeline if
268 omitted.
269
270 =back
271
272 Currently this method must be called before the first tuple is
273 inserted in the table!
274
275 =cut
276
277 sub create_inverted_index {
278 my $self = shift;
279 my %parm = @_;
280
281 confess "No attribute specified" unless $parm{attribute};
282 confess "No pipeline specified" unless $parm{pipeline};
283
284 $parm{predicate} ||= $parm{pipeline}->[-1];
285
286 confess "Cannot create index for table aready populated"
287 if $self->{nextk} > 1;
288
289 require WAIT::InvertedIndex;
290
291 # backward compatibility stuff
292 my %opt = %parm;
293 for (qw(attribute pipeline predicate)) {
294 delete $opt{$_};
295 }
296
297 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
298 my $idx = WAIT::InvertedIndex->new(path => $self->path.'/'.$name,
299 subname=> $name,
300 env => $self->{env},
301 maindbfile => $self->maindbfile,
302 tablename => $self->tablename,
303 filter => [@{$parm{pipeline}}], # clone
304 name => $name,
305 attr => $parm{attribute},
306 %opt, # backward compatibility stuff
307 );
308 # We will have to use $parm{predicate} here
309 push @{$self->{inverted}->{$parm{attribute}}}, $idx;
310 }
311
312 sub dir {
313 $_[0]->path;
314 }
315
316 =head2 C<$tb-E<gt>layout>
317
318 Returns the reference to the associated parser object.
319
320 =cut
321
322 sub layout { $_[0]->{layout} }
323
324 =head2 C<$tb-E<gt>fields>
325
326 Returns the array of attribute names.
327
328 =cut
329
330
331 sub fields { keys %{$_[0]->{inverted}}}
332
333 =head2 C<$tb-E<gt>drop>
334
335 Must be called via C<WAIT::Database::drop_table>
336
337 =cut
338
339 sub drop {
340 my $self = shift;
341
342 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
343 $self->close; # just make sure
344
345 # my $path = $self->path;
346
347 for (values %{$self->{indexes}}) {
348 $_->drop;
349 }
350 # unlink "$path/records";
351 # rmdir "$path/read" or warn "Could not rmdir '$path/read'";
352
353 } else {
354 confess ref($self)."::drop called directly";
355 }
356 }
357
358 sub mrequire ($) {
359 my $module = shift;
360
361 $module =~ s{::}{/}g;
362 $module .= '.pm';
363 require $module;
364 }
365
366 sub path {
367 my($self) = @_;
368 return $self->{path} if $self->{path};
369 require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
370 require Carp;
371 confess("NO path attr");
372 }
373
374 sub open {
375 my $self = shift;
376 my $path = $self->path . '/records';
377
378 mrequire ref($self); # that's tricky eh?
379 if (defined $self->{'layout'}) {
380 mrequire ref($self->{'layout'});
381 }
382 if (defined $self->{'access'}) {
383 mrequire ref($self->{'access'});
384 }
385 if (exists $self->{indexes}) {
386 require WAIT::Index;
387 for my $Ind (values %{$self->{indexes}}) {
388 for my $x (qw(mode env maindbfile)) {
389 $Ind->{$x} = $self->{$x};
390 }
391 }
392 }
393 if (exists $self->{inverted}) {
394 my ($att, $idx);
395 for $att (keys %{$self->{inverted}}) {
396 for $idx (@{$self->{inverted}->{$att}}) {
397 for my $x (qw(mode env maindbfile)) {
398 $idx->{$x} = $self->{$x};
399 }
400 }
401 }
402 require WAIT::InvertedIndex;
403 }
404
405 # CONFUSION: WAIT knows two *modes*: read-only or read-write.
406 # BerkeleyDB means file permissions when talking about Mode.
407 # BerkeleyDB has the "Flags" attribute to specify
408 # read/write/lock/etc subsystems.
409
410 my $flags;
411 if ($self->{mode} & O_RDWR) {
412 $flags = DB_CREATE; # | DB_INIT_MPOOL | DB_PRIVATE | DB_INIT_CDB;
413 #warn "DEBUG: Flags on table $path set to 'writing'";
414 } else {
415 $flags = DB_RDONLY;
416 #warn "DEBUG: Flags on table $path set to 'readonly'";
417 }
418 unless (defined $self->{dbh}) {
419 my $subname = $self->tablename . "/records";
420 $self->{dbh} =
421 tie(%{$self->{db}}, 'BerkeleyDB::Btree',
422 $self->{env} ? (Env => $self->{env}) : (),
423 # Filename => $file,
424 Filename => $self->maindbfile,
425 Subname => $subname,
426 Mode => 0664,
427 Flags => $flags,
428 $WAIT::Database::Cachesize?(Cachesize => $WAIT::Database::Cachesize):(),
429 $WAIT::Database::Pagesize?(Pagesize => $WAIT::Database::Pagesize):(),
430 )
431 or confess "Cannot tie: $BerkeleyDB::Error\nDEBUG: Filename[$self->{maindbfile}]subname[$subname]Mode[0664]Flags[$flags]";
432 }
433 $self;
434 }
435
436 sub fetch_extern {
437 my $self = shift;
438
439 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
440 if (exists $self->{'access'}) {
441 mrequire ref($self->{'access'});
442 $self->{'access'}->FETCH(@_);
443 }
444 }
445
446 sub fetch_extern_by_id {
447 my $self = shift;
448
449 $self->fetch_extern($self->fetch(@_));
450 }
451
452 sub _find_index {
453 my $self = shift;
454 my (@att) = @_;
455 my %att;
456 my $name;
457
458 @att{@att} = @att;
459
460 KEY: for $name (keys %{$self->{indexes}}) {
461 my @iat = split /-/, $name;
462 for (@iat) {
463 next KEY unless exists $att{$_};
464 }
465 return $self->{indexes}->{$name};
466 }
467 return undef;
468 }
469
470 sub have {
471 my $self = shift;
472 my %parm = @_;
473
474 my $index = $self->_find_index(keys %parm) or return; # no index-no have
475
476 defined $self->{db} or $self->open;
477 return $index->have(@_);
478 }
479
480 sub insert {
481 my $self = shift;
482 my %parm = @_;
483
484 defined $self->{db} or $self->open;
485
486 # We should move all writing methods to a subclass to check only once
487 $self->{mode} & O_RDWR or confess "Cannot insert into table opened in RD_ONLY mode";
488
489 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
490 my $key;
491 my @deleted = keys %{$self->{deleted}};
492 my $gotkey = 0;
493
494 if (@deleted) {
495 $key = pop @deleted;
496 delete $self->{deleted}->{$key};
497 # Sanity check
498 if ($key && $key>0) {
499 $gotkey=1;
500 } else {
501 warn(sprintf("WAIT database inconsistency during insert ".
502 "key[%s]: Please rebuild index\n",
503 $key
504 ));
505 }
506 }
507 unless ($gotkey) {
508 $key = $self->{nextk}++;
509 }
510 $self->{db}->{$key} = $tuple;
511 for (values %{$self->{indexes}}) {
512 unless ($_->insert($key, %parm)) {
513 # duplicate key, undo changes
514 if ($key == $self->{nextk}-1) {
515 $self->{nextk}--;
516 } else {
517 # warn "setting key[$key] deleted during insert";
518 $self->{deleted}->{$key}=1;
519 }
520 my $idx;
521 for $idx (values %{$self->{indexes}}) {
522 last if $idx eq $_;
523 $idx->remove($key, %parm);
524 }
525 return undef;
526 }
527 }
528 if (defined $self->{inverted}) {
529 my $att;
530 for $att (keys %{$self->{inverted}}) {
531 if (defined $parm{$att}) {
532 map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
533 #map $_->sync, @{$self->{inverted}->{$att}}
534 }
535 }
536 }
537 $key
538 }
539
540 sub sync {
541 my $self = shift;
542
543 for (values %{$self->{indexes}}) {
544 map $_->sync, $_;
545 }
546 if (defined $self->{inverted}) {
547 my $att;
548 for $att (keys %{$self->{inverted}}) {
549 map $_->sync, @{$self->{inverted}->{$att}}
550 }
551 }
552 }
553
554 sub fetch {
555 my $self = shift;
556 my $key = shift;
557
558 return () if exists $self->{deleted}->{$key};
559
560 defined $self->{db} or $self->open;
561 $self->unpack($self->{db}->{$key});
562 }
563
564 sub delete_by_key {
565 my $self = shift;
566 my $key = shift;
567
568 unless ($key) {
569 cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
570 return;
571 }
572
573 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
574 my %tuple = $self->fetch($key);
575 for (values %{$self->{indexes}}) {
576 $_->delete($key, %tuple);
577 }
578 if (defined $self->{inverted}) {
579 # User *must* provide the full record for this or the entries
580 # in the inverted index will not be removed
581 %tuple = (%tuple, @_);
582 my $att;
583 for $att (keys %{$self->{inverted}}) {
584 if (defined $tuple{$att}) {
585 map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
586 }
587 }
588 }
589 # warn "setting key[$key] deleted during delete_by_key";
590 ++$self->{deleted}->{$key};
591 }
592
593 sub delete {
594 my $self = shift;
595 my $tkey = $self->have(@_);
596 # warn "tkey[$tkey]\@_[@_]";
597 defined $tkey && $self->delete_by_key($tkey, @_);
598 }
599
600 sub unpack {
601 my($self, $tuple) = @_;
602
603 unless (defined $tuple){
604 # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
605 warn("Debug: somebody called unpack without argument tuple!");
606 return;
607 }
608
609 my $att;
610 my @result;
611 my @tuple = split /$;/, $tuple;
612
613 for $att (@{$self->{attr}}) {
614 push @result, $att, shift @tuple;
615 }
616 @result;
617 }
618
619 sub set {
620 my ($self, $iattr, $value) = @_;
621 # in the rare case that they haven't written a single record yet, we
622 # make sure, the inverted inherits our $self->{mode}:
623 defined $self->{db} or $self->open;
624
625 for my $att (keys %{$self->{inverted}}) {
626 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
627 my $idx;
628 for $idx (@{$self->{inverted}->{$att}}) {
629 $idx->set($iattr, $value);
630 }
631 } else {
632 map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
633 }
634 }
635
636 1;
637 }
638
639 sub close {
640 my $self = shift;
641
642 #cluck("DEBUG: Closing A Table");
643
644 if (exists $self->{'access'}) {
645 eval {$self->{'access'}->close}; # dont bother if not opened
646 }
647 if ($WAIT::Index::VERSION) {
648 for (values %{$self->{indexes}}) {
649 $_->close();
650 }
651 }
652 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
653 # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
654 # if WAIT::InvertedIndex has not been loaded, they cannot have
655 # been altered so far
656 my $att;
657 for $att (keys %{$self->{inverted}}) {
658 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
659 my $idx;
660 for $idx (@{$self->{inverted}->{$att}}) {
661 $idx->close;
662 }
663 } else {
664 map $_->close(), @{$self->{inverted}->{$att}};
665 }
666 }
667 }
668 if ($self->{dbh}) {
669 delete $self->{dbh};
670 }
671 untie %{$self->{db}};
672 for my $att (qw(env db path maindbfile)) {
673 delete $self->{$att};
674 #cluck "DEBUG: Deleted att $att";
675 }
676
677 1;
678 }
679
680 sub DESTROY {
681 my $self = shift;
682
683 delete $self->{env};
684
685 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
686
687 }
688
689 sub open_scan {
690 my $self = shift;
691 my $code = shift;
692
693 $self->{dbh} or $self->open;
694 require WAIT::Scan;
695 new WAIT::Scan $self, $self->{nextk}-1, $code;
696 }
697
698 sub open_index_scan {
699 my $self = shift;
700 my $attr = shift;
701 my $code = shift;
702 my $name = join '-', @$attr;
703
704 if (defined $self->{indexes}->{$name}) {
705 $self->{indexes}->{$name}->open_scan($code);
706 } else {
707 confess "No such index '$name'";
708 }
709 }
710
711 eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
712
713 sub prefix {
714 my ($self , $attr, $prefix) = @_;
715 my %result;
716
717 defined $self->{db} or $self->open; # require layout
718
719 for (@{$self->{inverted}->{$attr}}) {
720 my $result = $_->prefix($prefix);
721 if (defined $result) {
722 $result{$_->name} = $result;
723 }
724 }
725 bless \%result, 'WAIT::Query::Raw';
726 }
727
728 sub intervall {
729 my ($self, $attr, $lb, $ub) = @_;
730 my %result;
731
732 defined $self->{db} or $self->open; # require layout
733
734 for (@{$self->{inverted}->{$attr}}) {
735 my $result = $_->intervall($lb, $ub);
736 if (defined $result) {
737 $result{$_->name} = $result;
738 }
739 }
740 bless \%result, 'WAIT::Query::Raw';
741 }
742
743 sub search_ref {
744 my $self = shift;
745 my ($query, $attr, $cont, $raw);
746 if (ref $_[0]) {
747 $query = shift;
748 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$query],[qw()])->Indent(1)->Useqq(1)->Dump; # XXX
749
750 $attr = $query->{attr};
751 $cont = $query->{cont};
752 $raw = $query->{raw};
753 } else {
754 cluck("Using three argument search interface is deprecated, use hashref interface instead");
755 $attr = shift;
756 $cont = shift;
757 $raw = shift;
758 $query = {
759 attr => $attr,
760 cont => $cont,
761 raw => $raw,
762 };
763 }
764
765 my %result;
766
767 defined $self->{db} or $self->open; # require layout
768
769 if ($raw) {
770 for (@{$self->{inverted}->{$attr}}) {
771 my $name = $_->name;
772 if (exists $raw->{$name} and @{$raw->{$name}}) {
773 my $scale = 1/scalar(@{$raw->{$name}});
774 my %r = $_->search_raw($query, @{$raw->{$name}});
775 my ($key, $val);
776 while (($key, $val) = each %r) {
777 if (exists $result{$key}) {
778 $result{$key} += $val*$scale;
779 } else {
780 $result{$key} = $val*$scale;
781 }
782 }
783 }
784 }
785 }
786 if (defined $cont and $cont ne '') {
787 for (@{$self->{inverted}->{$attr}}) {
788 my $r = $_->search_ref($query, $cont);
789 my ($key, $val);
790 while (($key, $val) = each %$r) {
791 if (exists $result{$key}) {
792 $result{$key} += $val;
793 } else {
794 $result{$key} = $val;
795 }
796 }
797 }
798 }
799 # sanity check for deleted documents.
800 # this should not be necessary !@#$
801 for (keys %result) {
802 delete $result{$_} if $self->{deleted}->{$_}
803 }
804 \%result;
805 }
806
807 sub parse_query {
808 my($self, $attr, $query) = @_;
809 return unless defined $query && length $query;
810 my %qt;
811 for (@{$self->{inverted}->{$attr}}) {
812 grep $qt{$_}++, $_->parse($query);
813 }
814 [keys %qt];
815 }
816
817 sub hilight_positions {
818 my ($self, $attr, $text, $query, $raw) = @_;
819 my %pos;
820
821 if (defined $raw) {
822 for (@{$self->{inverted}->{$attr}}) { # objects of type
823 # WAIT::InvertedIndex for
824 # this index field $attr
825 my $name = $_->name;
826 if (exists $raw->{$name}) {
827 my %qt;
828 grep $qt{$_}++, @{$raw->{$name}};
829 for ($_->parse_pos($text)) {
830 if (exists $qt{$_->[0]}) {
831 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
832 }
833 }
834 }
835 }
836 }
837 if (defined $query) {
838 for (@{$self->{inverted}->{$attr}}) {
839 my %qt;
840
841 grep $qt{$_}++, $_->parse($query);
842 for ($_->parse_pos($text)) {
843 if (exists $qt{$_->[0]}) {
844 if (exists $pos{$_->[1]}) { # perl -w ;-)
845 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
846 } else {
847 $pos{$_->[1]} = length($_->[0]);
848 }
849 }
850 }
851 }
852 }
853
854 \%pos;
855 }
856
857 sub hilight {
858 my ($tb, $buf, $qplain, $qraw) = @_;
859 my $layout = $tb->layout();
860
861 my @result;
862
863 $qplain ||= {};
864 $qraw ||= {};
865 my @ttxt = $layout->tag($buf);
866 while (@ttxt) {
867 no strict 'refs';
868 my %tag = %{shift @ttxt};
869 my $txt = shift @ttxt;
870 my $fld;
871
872 my %hl;
873 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
874 my $hp = $tb->hilight_positions($fld, $txt,
875 $qplain->{$fld}, $qraw->{$fld});
876 for (keys %$hp) {
877 if (exists $hl{$_}) { # -w ;-(
878 $hl{$_} = max($hl{$_}, $hp->{$_});
879 } else {
880 $hl{$_} = $hp->{$_};
881 }
882 }
883 }
884 my $pos;
885 my $qt = {_qt => 1, %tag};
886 my $pl = \%tag;
887 my $last = length($txt);
888 my @tmp;
889 for $pos (sort {$b <=> $a} keys %hl) {
890 unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
891 unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
892 $last = $pos;
893 }
894 push @result, $pl, substr($txt,0,$last);
895 push @result, @tmp;
896 }
897 @result; # no speed necessary
898 }
899
900 1;

Properties

Name Value
cvs2svn:cvs-rev 1.10

  ViewVC Help
Powered by ViewVC 1.1.26