/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 114 - (show annotations)
Tue Jul 13 21:27:27 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 23692 byte(s)
enought for today, still not passing all tests

1 # -*- Mode: Cperl -*-
2 # Table.pm --
3 # ITIID : $ITI$ $Header $__Header$
4 # Author : Ulrich Pfeifer
5 # Created On : Thu Aug 8 13:05:10 1996
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Wed Jan 23 14:15:15 2002
8 # Language : CPerl
9 # Update Count : 152
10 # Status : Unknown, Use with caution!
11 #
12 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 #
14
15 =head1 NAME
16
17 WAIT::Table -- Module for maintaining Tables / Relations
18
19 =head1 SYNOPSIS
20
21 require WAIT::Table;
22
23 =head1 DESCRIPTION
24
25 =cut
26
27 package WAIT::Table;
28 our $VERSION = "2.000";
29
30 use WAIT::Table::Handle ();
31 require WAIT::Parse::Base;
32
33 use strict;
34 use Carp qw(cluck croak);
35 # use autouse Carp => qw( croak($) );
36 use BerkeleyDB;
37 use Fcntl;
38
39 =head2 Creating a Table.
40
41 The constructor WAIT::Table-E<gt>new is normally called via the
42 create_table method of a database handle. This is not enforced, but
43 creating a table does not make any sense unless the table is
44 registered by the database because the latter implements persistence
45 of the meta data. Registering is done automatically by letting the
46 database handle the creation of a table.
47
48 my $db = WAIT::Database->create(name => 'sample');
49 my $tb = $db->create_table(name => 'test',
50 access => $access,
51 layout => $layout,
52 attr => ['docid', 'headline'],
53 );
54
55 The constructor returns a handle for the table. This handle is hidden by the
56 table module, to prevent direct access if called via Table.
57
58 =over 10
59
60 =item C<access> => I<accessobj>
61
62 A reference to an access object for the external parts (attributes) of
63 tuples. As you may remember, the WAIT System does not enforce that
64 objects are completely stored inside the system to avoid duplication.
65 There is no (strong) point in storing all your HTML documents inside
66 the system when indexing your WWW-Server.
67
68 The access object is designed to work like as a tied hash. You pass
69 the refernce to the object, not the tied hash though. An example
70 implementation of an access class that works for manpages is
71 WAIT::Document::Nroff.
72
73 The implementation needs to take into account that WAIT will keep this
74 object in a Data::Dumper or Storable database and re-use it when sman
75 is run. So it is not good enough if we can produce the index with it
76 now, when we create or actively access the table, WAIT also must be
77 able to retrieve documents on its own, when we are in a different
78 context. This happens specifically in a retrieval. To get this working
79 seemlessly, the access-defining class must implement a close method.
80 This method will be called before the Data::Dumper dump takes place.
81 In that moment the access-defining class must get rid of all data
82 structures that cannot be reconstructed via the Data::Dumper dump,
83 such as database handles or C pointers.
84
85 =item C<path> => I<dir>
86
87 The path to database. Files for indexes will have I<path>
88 as prefix. I<Mandatory>, but usually taken care of by the
89 WAIT::Database handle when the constructor is called via
90 WAIT::Database::create_table().
91
92 =item C<name> => I<name>
93
94 The name of this table. I<Mandatory>
95
96 =item C<attr> => [ I<attr> ... ]
97
98 A reference to an array of attribute names. WAIT will keep the
99 contents of these attributes in its table. I<Mandatory>
100
101 =item C<djk> => [ I<attr> ... ]
102
103 A reference to an array of attribute names which make up the
104 I<disjointness key>. Don't think about it - it's of no use yet;
105
106 =item C<layout> => I<layoutobj>
107
108 A reference to an external parser object. Defaults to a new instance
109 of C<WAIT::Parse::Base>. For an example implementation see
110 WAIT::Parse::Nroff. A layout class can be implemented as a singleton
111 class if you so like.
112
113 =item C<keyset> => I<keyset>
114
115 The set of attributes needed to identify a record. Defaults to all
116 attributes.
117
118 =item C<invindex> => I<inverted index>
119
120 A reference to an anon array defining attributes of each record that
121 need to be indexed. See the source of smakewhatis for how to set this
122 up.
123
124 =back
125
126 =cut
127
128 sub new {
129 my $type = shift;
130 my %parm = @_;
131 my $self = {};
132
133 # Check for mandatory attrs early
134 for my $x (qw(name attr env maindbfile tablename)) {
135 $self->{$x} = $parm{$x} or croak "No $x specified";
136 }
137
138 # Do that before we eventually add '_weight' to attributes.
139 $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
140
141 $self->{mode} = O_CREAT | O_RDWR;
142
143 # Determine and set up subclass
144 $type = ref($type) || $type;
145 if (defined $parm{djk}) {
146 if (@{$parm{djk}} == @{$parm{attr}}) {
147 # All attributes in DK (sloppy test here!)
148 $type .= '::Independent';
149 require WAIT::Table::Independent;
150 } else {
151 $type .= '::Disjoint';
152 require WAIT::Table::Disjoint;
153 }
154 # Add '_weight' to attributes
155 my %attr;
156 @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
157 unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
158 }
159
160 $self->{path} = $parm{path} or croak "No path specified";
161 bless $self, $type;
162
163 $self->{djk} = $parm{djk} if defined $parm{djk};
164 $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
165 $self->{access} = $parm{access} if defined $parm{access};
166 $self->{nextk} = 1; # next record to insert; first record unused
167 $self->{deleted} = {}; # no deleted records yet
168 $self->{indexes} = {};
169
170 # Checking for readers is not necessary, but let's go with the
171 # generic method.
172
173 # Call create_index() and create_index() for compatibility
174 for (@{$self->{keyset}||[]}) {
175 #carp "Specification of indexes at table create time is deprecated";
176 $self->create_index(@$_);
177 }
178 while (@{$parm{invindex}||[]}) {
179 # carp "Specification of inverted indexes at table create time is deprecated";
180 my $att = shift @{$parm{invindex}};
181 my @spec = @{shift @{$parm{invindex}}};
182 my @opt = ();
183
184 if (ref($spec[0])) {
185 warn "Secondary pipelines are deprecated";
186 @opt = %{shift @spec};
187 }
188 $self->create_inverted_index(attribute => $att,
189 pipeline => \@spec,
190 @opt);
191 }
192
193 $self;
194 # end of backwarn compatibility stuff
195 }
196
197 for my $accessor (qw(maindbfile tablename)) {
198 no strict 'refs';
199 *{$accessor} = sub {
200 my($self) = @_;
201 return $self->{$accessor} if $self->{$accessor};
202 require Carp;
203 Carp::confess("accessor $accessor not there");
204 }
205 }
206
207 =head2 Creating an index
208
209 $tb->create_index('docid');
210
211 C<create_index>
212 must be called with a list of attributes. This must be a subset of the
213 attributes specified when the table was created. Currently this
214 method must be called before the first tuple is inserted in the
215 table!
216
217 =cut
218
219 sub create_index {
220 my $self= shift;
221
222 croak "Cannot create index for table aready populated"
223 if $self->{nextk} > 1;
224
225 require WAIT::Index;
226
227 my $name = join '-', @_;
228 #### warn "WARNING: Suspect use of \$_ in method create_index. name[$name]_[$_]";
229 $self->{indexes}->{$name} =
230 WAIT::Index->new(
231 path => $self->path.'/'.$name,
232 subname => $name,
233 env => $self->{env},
234 maindbfile => $self->maindbfile,
235 tablename => $self->tablename,
236 attr => $_,
237 );
238 }
239
240 =head2 Creating an inverted index
241
242 $tb->create_inverted_index
243 (attribute => 'au',
244 pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
245 predicate => 'plain',
246 );
247
248 =over 5
249
250 =item C<attribute>
251
252 The attribute to build the index on. This attribute may not be in the
253 set attributes specified when the table was created.
254
255 =item C<pipeline>
256
257 A piplines specification is a reference to an array of method names
258 (from package C<WAIT::Filter>) which are to be applied in sequence to
259 the contents of the named attribute. The attribute name may not be in
260 the attribute list.
261
262 =item C<predicate>
263
264 An indication which predicate the index implements. This may be
265 e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
266 query processing. Currently there is no standard set of predicate
267 names. The predicate defaults to the last member of the pipeline if
268 omitted.
269
270 =back
271
272 Currently this method must be called before the first tuple is
273 inserted in the table!
274
275 =cut
276
277 sub create_inverted_index {
278 my $self = shift;
279 my %parm = @_;
280
281 croak "No attribute specified" unless $parm{attribute};
282 croak "No pipeline specified" unless $parm{pipeline};
283
284 $parm{predicate} ||= $parm{pipeline}->[-1];
285
286 croak "Cannot create index for table aready populated"
287 if $self->{nextk} > 1;
288
289 require WAIT::InvertedIndex;
290
291 # backward compatibility stuff
292 my %opt = %parm;
293 for (qw(attribute pipeline predicate)) {
294 delete $opt{$_};
295 }
296
297 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
298 my $idx = WAIT::InvertedIndex->new(path => $self->path.'/'.$name,
299 subname=> $name,
300 env => $self->{env},
301 maindbfile => $self->maindbfile,
302 tablename => $self->tablename,
303 filter => [@{$parm{pipeline}}], # clone
304 name => $name,
305 attr => $parm{attribute},
306 %opt, # backward compatibility stuff
307 );
308 # We will have to use $parm{predicate} here
309 push @{$self->{inverted}->{$parm{attribute}}}, $idx;
310 }
311
312 sub dir {
313 $_[0]->path;
314 }
315
316 =head2 C<$tb-E<gt>layout>
317
318 Returns the reference to the associated parser object.
319
320 =cut
321
322 sub layout { $_[0]->{layout} }
323
324 =head2 C<$tb-E<gt>fields>
325
326 Returns the array of attribute names.
327
328 =cut
329
330
331 sub fields { keys %{$_[0]->{inverted}}}
332
333 =head2 C<$tb-E<gt>drop>
334
335 Must be called via C<WAIT::Database::drop_table>
336
337 =cut
338
339 sub drop {
340 my $self = shift;
341
342 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
343 $self->close; # just make sure
344
345 # my $path = $self->path;
346
347 for (values %{$self->{indexes}}) {
348 $_->drop;
349 }
350 # unlink "$path/records";
351 # rmdir "$path/read" or warn "Could not rmdir '$path/read'";
352
353 } else {
354 croak ref($self)."::drop called directly";
355 }
356 }
357
358 sub mrequire ($) {
359 my $module = shift;
360
361 $module =~ s{::}{/}g;
362 $module .= '.pm';
363 require $module;
364 }
365
366 sub path {
367 my($self) = @_;
368 return $self->{path} if $self->{path};
369 require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
370 require Carp;
371 Carp::confess("NO path attr");
372 }
373
374 sub open {
375 my $self = shift;
376 my $path = $self->path . '/records';
377
378 mrequire ref($self); # that's tricky eh?
379 if (defined $self->{'layout'}) {
380 mrequire ref($self->{'layout'});
381 }
382 if (defined $self->{'access'}) {
383 mrequire ref($self->{'access'});
384 }
385 if (exists $self->{indexes}) {
386 require WAIT::Index;
387 for my $Ind (values %{$self->{indexes}}) {
388 for my $x (qw(mode env maindbfile)) {
389 $Ind->{$x} = $self->{$x};
390 }
391 }
392 }
393 if (exists $self->{inverted}) {
394 my ($att, $idx);
395 for $att (keys %{$self->{inverted}}) {
396 for $idx (@{$self->{inverted}->{$att}}) {
397 for my $x (qw(mode env maindbfile)) {
398 $idx->{$x} = $self->{$x};
399 }
400 }
401 }
402 require WAIT::InvertedIndex;
403 }
404
405 # CONFUSION: WAIT knows two *modes*: read-only or read-write.
406 # BerkeleyDB means file permissions when talking about Mode.
407 # BerkeleyDB has the "Flags" attribute to specify
408 # read/write/lock/etc subsystems.
409
410 my $flags;
411 if ($self->{mode} & O_RDWR) {
412 $flags = DB_CREATE; # | DB_INIT_MPOOL | DB_PRIVATE | DB_INIT_CDB;
413 #warn "DEBUG: Flags on table $path set to 'writing'";
414 } else {
415 $flags = DB_RDONLY;
416 #warn "DEBUG: Flags on table $path set to 'readonly'";
417 }
418 unless (defined $self->{dbh}) {
419 my $subname = $self->tablename . "/records";
420 $self->{dbh} =
421 tie(%{$self->{db}}, 'BerkeleyDB::Btree',
422 $self->{env} ? (Env => $self->{env}) : (),
423 # Filename => $file,
424 Filename => $self->maindbfile,
425 Subname => $subname,
426 Mode => 0664,
427 Flags => $flags,
428 $WAIT::Database::Cachesize?(Cachesize => $WAIT::Database::Cachesize):(),
429 $WAIT::Database::Pagesize?(Pagesize => $WAIT::Database::Pagesize):(),
430 )
431 or die "Cannot tie: $BerkeleyDB::Error;
432 DEBUG: Filename[$self->{maindbfile}]subname[$subname]Mode[0664]Flags[$flags]";
433 }
434 $self;
435 }
436
437 sub fetch_extern {
438 my $self = shift;
439
440 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
441 if (exists $self->{'access'}) {
442 mrequire ref($self->{'access'});
443 $self->{'access'}->FETCH(@_);
444 }
445 }
446
447 sub fetch_extern_by_id {
448 my $self = shift;
449
450 $self->fetch_extern($self->fetch(@_));
451 }
452
453 sub _find_index {
454 my $self = shift;
455 my (@att) = @_;
456 my %att;
457 my $name;
458
459 @att{@att} = @att;
460
461 KEY: for $name (keys %{$self->{indexes}}) {
462 my @iat = split /-/, $name;
463 for (@iat) {
464 next KEY unless exists $att{$_};
465 }
466 return $self->{indexes}->{$name};
467 }
468 return undef;
469 }
470
471 sub have {
472 my $self = shift;
473 my %parm = @_;
474
475 my $index = $self->_find_index(keys %parm) or return; # no index-no have
476
477 defined $self->{db} or $self->open;
478 return $index->have(@_);
479 }
480
481 sub insert {
482 my $self = shift;
483 my %parm = @_;
484
485 defined $self->{db} or $self->open;
486
487 # We should move all writing methods to a subclass to check only once
488 $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
489
490 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
491 my $key;
492 my @deleted = keys %{$self->{deleted}};
493 my $gotkey = 0;
494
495 if (@deleted) {
496 $key = pop @deleted;
497 delete $self->{deleted}->{$key};
498 # Sanity check
499 if ($key && $key>0) {
500 $gotkey=1;
501 } else {
502 warn(sprintf("WAIT database inconsistency during insert ".
503 "key[%s]: Please rebuild index\n",
504 $key
505 ));
506 }
507 }
508 unless ($gotkey) {
509 $key = $self->{nextk}++;
510 }
511 $self->{db}->{$key} = $tuple;
512 for (values %{$self->{indexes}}) {
513 unless ($_->insert($key, %parm)) {
514 # duplicate key, undo changes
515 if ($key == $self->{nextk}-1) {
516 $self->{nextk}--;
517 } else {
518 # warn "setting key[$key] deleted during insert";
519 $self->{deleted}->{$key}=1;
520 }
521 my $idx;
522 for $idx (values %{$self->{indexes}}) {
523 last if $idx eq $_;
524 $idx->remove($key, %parm);
525 }
526 return undef;
527 }
528 }
529 if (defined $self->{inverted}) {
530 my $att;
531 for $att (keys %{$self->{inverted}}) {
532 if (defined $parm{$att}) {
533 map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
534 #map $_->sync, @{$self->{inverted}->{$att}}
535 }
536 }
537 }
538 $key
539 }
540
541 sub sync {
542 my $self = shift;
543
544 for (values %{$self->{indexes}}) {
545 map $_->sync, $_;
546 }
547 if (defined $self->{inverted}) {
548 my $att;
549 for $att (keys %{$self->{inverted}}) {
550 map $_->sync, @{$self->{inverted}->{$att}}
551 }
552 }
553 }
554
555 sub fetch {
556 my $self = shift;
557 my $key = shift;
558
559 return () if exists $self->{deleted}->{$key};
560
561 defined $self->{db} or $self->open;
562 $self->unpack($self->{db}->{$key});
563 }
564
565 sub delete_by_key {
566 my $self = shift;
567 my $key = shift;
568
569 unless ($key) {
570 cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
571 return;
572 }
573
574 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
575 my %tuple = $self->fetch($key);
576 for (values %{$self->{indexes}}) {
577 $_->delete($key, %tuple);
578 }
579 if (defined $self->{inverted}) {
580 # User *must* provide the full record for this or the entries
581 # in the inverted index will not be removed
582 %tuple = (%tuple, @_);
583 my $att;
584 for $att (keys %{$self->{inverted}}) {
585 if (defined $tuple{$att}) {
586 map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
587 }
588 }
589 }
590 # warn "setting key[$key] deleted during delete_by_key";
591 ++$self->{deleted}->{$key};
592 }
593
594 sub delete {
595 my $self = shift;
596 my $tkey = $self->have(@_);
597 # warn "tkey[$tkey]\@_[@_]";
598 defined $tkey && $self->delete_by_key($tkey, @_);
599 }
600
601 sub unpack {
602 my($self, $tuple) = @_;
603
604 unless (defined $tuple){
605 # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
606 warn("Debug: somebody called unpack without argument tuple!");
607 return;
608 }
609
610 my $att;
611 my @result;
612 my @tuple = split /$;/, $tuple;
613
614 for $att (@{$self->{attr}}) {
615 push @result, $att, shift @tuple;
616 }
617 @result;
618 }
619
620 sub set {
621 my ($self, $iattr, $value) = @_;
622 # in the rare case that they haven't written a single record yet, we
623 # make sure, the inverted inherits our $self->{mode}:
624 defined $self->{db} or $self->open;
625
626 for my $att (keys %{$self->{inverted}}) {
627 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
628 my $idx;
629 for $idx (@{$self->{inverted}->{$att}}) {
630 $idx->set($iattr, $value);
631 }
632 } else {
633 map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
634 }
635 }
636
637 1;
638 }
639
640 sub close {
641 my $self = shift;
642
643 #cluck("DEBUG: Closing A Table");
644
645 if (exists $self->{'access'}) {
646 eval {$self->{'access'}->close}; # dont bother if not opened
647 }
648 if ($WAIT::Index::VERSION) {
649 for (values %{$self->{indexes}}) {
650 $_->close();
651 }
652 }
653 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
654 # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
655 # if WAIT::InvertedIndex has not been loaded, they cannot have
656 # been altered so far
657 my $att;
658 for $att (keys %{$self->{inverted}}) {
659 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
660 my $idx;
661 for $idx (@{$self->{inverted}->{$att}}) {
662 $idx->close;
663 }
664 } else {
665 map $_->close(), @{$self->{inverted}->{$att}};
666 }
667 }
668 }
669 if ($self->{dbh}) {
670 delete $self->{dbh};
671 }
672 untie %{$self->{db}};
673 for my $att (qw(env db path maindbfile)) {
674 delete $self->{$att};
675 #cluck "DEBUG: Deleted att $att";
676 }
677
678 1;
679 }
680
681 sub DESTROY {
682 my $self = shift;
683
684 delete $self->{env};
685
686 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
687
688 }
689
690 sub open_scan {
691 my $self = shift;
692 my $code = shift;
693
694 $self->{dbh} or $self->open;
695 require WAIT::Scan;
696 new WAIT::Scan $self, $self->{nextk}-1, $code;
697 }
698
699 sub open_index_scan {
700 my $self = shift;
701 my $attr = shift;
702 my $code = shift;
703 my $name = join '-', @$attr;
704
705 if (defined $self->{indexes}->{$name}) {
706 $self->{indexes}->{$name}->open_scan($code);
707 } else {
708 croak "No such index '$name'";
709 }
710 }
711
712 eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
713
714 sub prefix {
715 my ($self , $attr, $prefix) = @_;
716 my %result;
717
718 defined $self->{db} or $self->open; # require layout
719
720 for (@{$self->{inverted}->{$attr}}) {
721 my $result = $_->prefix($prefix);
722 if (defined $result) {
723 $result{$_->name} = $result;
724 }
725 }
726 bless \%result, 'WAIT::Query::Raw';
727 }
728
729 sub intervall {
730 my ($self, $attr, $lb, $ub) = @_;
731 my %result;
732
733 defined $self->{db} or $self->open; # require layout
734
735 for (@{$self->{inverted}->{$attr}}) {
736 my $result = $_->intervall($lb, $ub);
737 if (defined $result) {
738 $result{$_->name} = $result;
739 }
740 }
741 bless \%result, 'WAIT::Query::Raw';
742 }
743
744 sub search_ref {
745 my $self = shift;
746 my ($query, $attr, $cont, $raw);
747 if (ref $_[0]) {
748 $query = shift;
749 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$query],[qw()])->Indent(1)->Useqq(1)->Dump; # XXX
750
751 $attr = $query->{attr};
752 $cont = $query->{cont};
753 $raw = $query->{raw};
754 } else {
755 cluck("Using three argument search interface is deprecated, use hashref interface instead");
756 $attr = shift;
757 $cont = shift;
758 $raw = shift;
759 $query = {
760 attr => $attr,
761 cont => $cont,
762 raw => $raw,
763 };
764 }
765
766 my %result;
767
768 defined $self->{db} or $self->open; # require layout
769
770 if ($raw) {
771 for (@{$self->{inverted}->{$attr}}) {
772 my $name = $_->name;
773 if (exists $raw->{$name} and @{$raw->{$name}}) {
774 my $scale = 1/scalar(@{$raw->{$name}});
775 my %r = $_->search_raw($query, @{$raw->{$name}});
776 my ($key, $val);
777 while (($key, $val) = each %r) {
778 if (exists $result{$key}) {
779 $result{$key} += $val*$scale;
780 } else {
781 $result{$key} = $val*$scale;
782 }
783 }
784 }
785 }
786 }
787 if (defined $cont and $cont ne '') {
788 for (@{$self->{inverted}->{$attr}}) {
789 my $r = $_->search_ref($query, $cont);
790 my ($key, $val);
791 while (($key, $val) = each %$r) {
792 if (exists $result{$key}) {
793 $result{$key} += $val;
794 } else {
795 $result{$key} = $val;
796 }
797 }
798 }
799 }
800 # sanity check for deleted documents.
801 # this should not be necessary !@#$
802 for (keys %result) {
803 delete $result{$_} if $self->{deleted}->{$_}
804 }
805 \%result;
806 }
807
808 sub parse_query {
809 my($self, $attr, $query) = @_;
810 return unless defined $query && length $query;
811 my %qt;
812 for (@{$self->{inverted}->{$attr}}) {
813 grep $qt{$_}++, $_->parse($query);
814 }
815 [keys %qt];
816 }
817
818 sub hilight_positions {
819 my ($self, $attr, $text, $query, $raw) = @_;
820 my %pos;
821
822 if (defined $raw) {
823 for (@{$self->{inverted}->{$attr}}) { # objects of type
824 # WAIT::InvertedIndex for
825 # this index field $attr
826 my $name = $_->name;
827 if (exists $raw->{$name}) {
828 my %qt;
829 grep $qt{$_}++, @{$raw->{$name}};
830 for ($_->parse_pos($text)) {
831 if (exists $qt{$_->[0]}) {
832 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
833 }
834 }
835 }
836 }
837 }
838 if (defined $query) {
839 for (@{$self->{inverted}->{$attr}}) {
840 my %qt;
841
842 grep $qt{$_}++, $_->parse($query);
843 for ($_->parse_pos($text)) {
844 if (exists $qt{$_->[0]}) {
845 if (exists $pos{$_->[1]}) { # perl -w ;-)
846 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
847 } else {
848 $pos{$_->[1]} = length($_->[0]);
849 }
850 }
851 }
852 }
853 }
854
855 \%pos;
856 }
857
858 sub hilight {
859 my ($tb, $buf, $qplain, $qraw) = @_;
860 my $layout = $tb->layout();
861
862 my @result;
863
864 $qplain ||= {};
865 $qraw ||= {};
866 my @ttxt = $layout->tag($buf);
867 while (@ttxt) {
868 no strict 'refs';
869 my %tag = %{shift @ttxt};
870 my $txt = shift @ttxt;
871 my $fld;
872
873 my %hl;
874 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
875 my $hp = $tb->hilight_positions($fld, $txt,
876 $qplain->{$fld}, $qraw->{$fld});
877 for (keys %$hp) {
878 if (exists $hl{$_}) { # -w ;-(
879 $hl{$_} = max($hl{$_}, $hp->{$_});
880 } else {
881 $hl{$_} = $hp->{$_};
882 }
883 }
884 }
885 my $pos;
886 my $qt = {_qt => 1, %tag};
887 my $pl = \%tag;
888 my $last = length($txt);
889 my @tmp;
890 for $pos (sort {$b <=> $a} keys %hl) {
891 unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
892 unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
893 $last = $pos;
894 }
895 push @result, $pl, substr($txt,0,$last);
896 push @result, @tmp;
897 }
898 @result; # no speed necessary
899 }
900
901 1;

Properties

Name Value
cvs2svn:cvs-rev 1.10

  ViewVC Help
Powered by ViewVC 1.1.26