/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 108 - (hide annotations)
Tue Jul 13 17:41:12 2004 UTC (19 years, 10 months ago) by dpavlin
File size: 23742 byte(s)
beginning of version 2.0 using BerkeleyDB (non-functional for now)

1 ulpfr 13 # -*- Mode: Cperl -*-
2 ulpfr 10 # Table.pm --
3     # ITIID : $ITI$ $Header $__Header$
4     # Author : Ulrich Pfeifer
5     # Created On : Thu Aug 8 13:05:10 1996
6     # Last Modified By: Ulrich Pfeifer
7 dpavlin 89 # Last Modified On: Wed Jan 23 14:15:15 2002
8 ulpfr 10 # Language : CPerl
9 dpavlin 89 # Update Count : 152
10 ulpfr 10 # Status : Unknown, Use with caution!
11 ulpfr 13 #
12 ulpfr 10 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 ulpfr 13 #
14 ulpfr 10
15     =head1 NAME
16    
17     WAIT::Table -- Module for maintaining Tables / Relations
18    
19     =head1 SYNOPSIS
20    
21     require WAIT::Table;
22    
23     =head1 DESCRIPTION
24    
25     =cut
26    
27     package WAIT::Table;
28 dpavlin 108 our $VERSION = "2.000";
29 ulpfr 13
30     use WAIT::Table::Handle ();
31 ulpfr 10 require WAIT::Parse::Base;
32 ulpfr 13
33 ulpfr 10 use strict;
34     use Carp;
35 ulpfr 13 # use autouse Carp => qw( croak($) );
36 dpavlin 108 use BerkeleyDB;
37 ulpfr 10 use Fcntl;
38    
39     =head2 Creating a Table.
40    
41 ulpfr 13 The constructor WAIT::Table-E<gt>new is normally called via the
42 ulpfr 10 create_table method of a database handle. This is not enforced, but
43 ulpfr 13 creating a table does not make any sense unless the table is
44 ulpfr 10 registered by the database because the latter implements persistence
45     of the meta data. Registering is done automatically by letting the
46 ulpfr 13 database handle the creation of a table.
47 ulpfr 10
48 ulpfr 13 my $db = WAIT::Database->create(name => 'sample');
49     my $tb = $db->create_table(name => 'test',
50     access => $access,
51     layout => $layout,
52     attr => ['docid', 'headline'],
53     );
54 ulpfr 10
55     The constructor returns a handle for the table. This handle is hidden by the
56     table module, to prevent direct access if called via Table.
57    
58     =over 10
59    
60 ulpfr 13 =item C<access> => I<accessobj>
61 ulpfr 10
62 ulpfr 13 A reference to an access object for the external parts (attributes) of
63 ulpfr 10 tuples. As you may remember, the WAIT System does not enforce that
64     objects are completely stored inside the system to avoid duplication.
65 ulpfr 13 There is no (strong) point in storing all your HTML documents inside
66 ulpfr 10 the system when indexing your WWW-Server.
67    
68 ulpfr 13 The access object is designed to work like as a tied hash. You pass
69     the refernce to the object, not the tied hash though. An example
70     implementation of an access class that works for manpages is
71     WAIT::Document::Nroff.
72    
73     The implementation needs to take into account that WAIT will keep this
74     object in a Data::Dumper or Storable database and re-use it when sman
75     is run. So it is not good enough if we can produce the index with it
76     now, when we create or actively access the table, WAIT also must be
77     able to retrieve documents on its own, when we are in a different
78     context. This happens specifically in a retrieval. To get this working
79     seemlessly, the access-defining class must implement a close method.
80     This method will be called before the Data::Dumper dump takes place.
81     In that moment the access-defining class must get rid of all data
82     structures that cannot be reconstructed via the Data::Dumper dump,
83     such as database handles or C pointers.
84    
85 ulpfr 10 =item C<file> => I<fname>
86    
87     The filename of the records file. Files for indexes will have I<fname>
88 ulpfr 13 as prefix. I<Mandatory>, but usually taken care of by the
89     WAIT::Database handle when the constructor is called via
90     WAIT::Database::create_table().
91 ulpfr 10
92     =item C<name> => I<name>
93    
94     The name of this table. I<Mandatory>
95    
96     =item C<attr> => [ I<attr> ... ]
97    
98 ulpfr 13 A reference to an array of attribute names. WAIT will keep the
99     contents of these attributes in its table. I<Mandatory>
100 ulpfr 10
101     =item C<djk> => [ I<attr> ... ]
102    
103     A reference to an array of attribute names which make up the
104 ulpfr 13 I<disjointness key>. Don't think about it - it's of no use yet;
105 ulpfr 10
106     =item C<layout> => I<layoutobj>
107    
108 ulpfr 13 A reference to an external parser object. Defaults to a new instance
109     of C<WAIT::Parse::Base>. For an example implementation see
110     WAIT::Parse::Nroff. A layout class can be implemented as a singleton
111     class if you so like.
112 ulpfr 10
113 ulpfr 13 =item C<keyset> => I<keyset>
114 ulpfr 10
115 ulpfr 13 The set of attributes needed to identify a record. Defaults to all
116     attributes.
117 ulpfr 10
118 ulpfr 13 =item C<invindex> => I<inverted index>
119    
120     A reference to an anon array defining attributes of each record that
121     need to be indexed. See the source of smakewhatis for how to set this
122     up.
123    
124 ulpfr 10 =back
125    
126     =cut
127    
128     sub new {
129     my $type = shift;
130     my %parm = @_;
131     my $self = {};
132    
133 ulpfr 13 # Check for mandatory attrs early
134 dpavlin 108 for my $x (qw(name attr env maindbfile tablename)) {
135     $self->{$x} = $parm{$x} or croak "No $x specified";
136     }
137 ulpfr 13
138 ulpfr 10 # Do that before we eventually add '_weight' to attributes.
139     $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
140 ulpfr 13
141 ulpfr 10 $self->{mode} = O_CREAT | O_RDWR;
142 ulpfr 13
143 ulpfr 10 # Determine and set up subclass
144     $type = ref($type) || $type;
145     if (defined $parm{djk}) {
146     if (@{$parm{djk}} == @{$parm{attr}}) {
147     # All attributes in DK (sloppy test here!)
148     $type .= '::Independent';
149     require WAIT::Table::Independent;
150     } else {
151     $type .= '::Disjoint';
152     require WAIT::Table::Disjoint;
153     }
154     # Add '_weight' to attributes
155     my %attr;
156     @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
157     unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
158     }
159    
160 dpavlin 108 $self->{path} = $parm{path} or croak "No path specified";
161     bless $self, $type;
162 ulpfr 19
163 ulpfr 10 $self->{djk} = $parm{djk} if defined $parm{djk};
164     $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
165     $self->{access} = $parm{access} if defined $parm{access};
166     $self->{nextk} = 1; # next record to insert; first record unused
167     $self->{deleted} = {}; # no deleted records yet
168     $self->{indexes} = {};
169    
170 ulpfr 35 # Checking for readers is not necessary, but let's go with the
171     # generic method.
172    
173 ulpfr 10 # Call create_index() and create_index() for compatibility
174     for (@{$self->{keyset}||[]}) {
175     #carp "Specification of indexes at table create time is deprecated";
176     $self->create_index(@$_);
177     }
178     while (@{$parm{invindex}||[]}) {
179     # carp "Specification of inverted indexes at table create time is deprecated";
180     my $att = shift @{$parm{invindex}};
181     my @spec = @{shift @{$parm{invindex}}};
182 dpavlin 108 my @opt = ();
183 ulpfr 13
184 ulpfr 10 if (ref($spec[0])) {
185 dpavlin 108 warn "Secondary pipelines are deprecated";
186 ulpfr 10 @opt = %{shift @spec};
187     }
188 dpavlin 108 $self->create_inverted_index(attribute => $att,
189     pipeline => \@spec,
190     @opt);
191 ulpfr 10 }
192 ulpfr 19
193 ulpfr 10 $self;
194     # end of backwarn compatibility stuff
195     }
196    
197 dpavlin 108 for my $accessor (qw(maindbfile tablename)) {
198     no strict 'refs';
199     *{$accessor} = sub {
200     my($self) = @_;
201     return $self->{$accessor} if $self->{$accessor};
202     require Carp;
203     Carp::confess("accessor $accessor not there");
204     }
205     }
206    
207 ulpfr 10 =head2 Creating an index
208    
209     $tb->create_index('docid');
210    
211     =item C<create_index>
212    
213     must be called with a list of attributes. This must be a subset of the
214     attributes specified when the table was created. Currently this
215     method must be called before the first tuple is inserted in the
216     table!
217    
218     =cut
219    
220     sub create_index {
221     my $self= shift;
222 ulpfr 13
223 ulpfr 10 croak "Cannot create index for table aready populated"
224     if $self->{nextk} > 1;
225 ulpfr 13
226 ulpfr 10 require WAIT::Index;
227 ulpfr 13
228 ulpfr 10 my $name = join '-', @_;
229 dpavlin 108 #### warn "WARNING: Suspect use of \$_ in method create_index. name[$name]_[$_]";
230 ulpfr 10 $self->{indexes}->{$name} =
231 dpavlin 108 WAIT::Index->new(
232     file => $self->file.'/'.$name,
233     subname => $name,
234     env => $self->{env},
235     maindbfile => $self->maindbfile,
236     tablename => $self->tablename,
237     attr => $_,
238     );
239 ulpfr 10 }
240    
241     =head2 Creating an inverted index
242    
243     $tb->create_inverted_index
244     (attribute => 'au',
245     pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
246     predicate => 'plain',
247     );
248    
249     =over 5
250    
251     =item C<attribute>
252    
253     The attribute to build the index on. This attribute may not be in the
254     set attributes specified when the table was created.
255    
256     =item C<pipeline>
257    
258 ulpfr 13 A piplines specification is a reference to an array of method names
259     (from package C<WAIT::Filter>) which are to be applied in sequence to
260     the contents of the named attribute. The attribute name may not be in
261     the attribute list.
262 ulpfr 10
263     =item C<predicate>
264    
265     An indication which predicate the index implements. This may be
266     e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
267     query processing. Currently there is no standard set of predicate
268 ulpfr 13 names. The predicate defaults to the last member of the pipeline if
269 ulpfr 10 omitted.
270    
271     =back
272    
273     Currently this method must be called before the first tuple is
274     inserted in the table!
275    
276     =cut
277    
278     sub create_inverted_index {
279     my $self = shift;
280     my %parm = @_;
281    
282     croak "No attribute specified" unless $parm{attribute};
283     croak "No pipeline specified" unless $parm{pipeline};
284    
285     $parm{predicate} ||= $parm{pipeline}->[-1];
286 ulpfr 13
287 ulpfr 10 croak "Cannot create index for table aready populated"
288     if $self->{nextk} > 1;
289 ulpfr 13
290 ulpfr 10 require WAIT::InvertedIndex;
291    
292     # backward compatibility stuff
293     my %opt = %parm;
294     for (qw(attribute pipeline predicate)) {
295     delete $opt{$_};
296     }
297 ulpfr 13
298 ulpfr 10 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
299 dpavlin 108 my $idx = WAIT::InvertedIndex->new(file => $self->file.'/'.$name,
300     subname=> $name,
301     env => $self->{env},
302     maindbfile => $self->maindbfile,
303     tablename => $self->tablename,
304     filter => [@{$parm{pipeline}}], # clone
305     name => $name,
306     attr => $parm{attribute},
307     %opt, # backward compatibility stuff
308     );
309 ulpfr 10 # We will have to use $parm{predicate} here
310     push @{$self->{inverted}->{$parm{attribute}}}, $idx;
311     }
312    
313     sub dir {
314 dpavlin 108 $_[0]->file;
315 ulpfr 10 }
316    
317     =head2 C<$tb-E<gt>layout>
318    
319     Returns the reference to the associated parser object.
320    
321     =cut
322    
323     sub layout { $_[0]->{layout} }
324    
325     =head2 C<$tb-E<gt>fields>
326    
327     Returns the array of attribute names.
328    
329     =cut
330    
331    
332     sub fields { keys %{$_[0]->{inverted}}}
333    
334     =head2 C<$tb-E<gt>drop>
335    
336     Must be called via C<WAIT::Database::drop_table>
337    
338     =cut
339    
340     sub drop {
341     my $self = shift;
342 ulpfr 35
343 ulpfr 10 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
344     $self->close; # just make sure
345 ulpfr 35
346 dpavlin 108 my $file = $self->file;
347 ulpfr 10
348     for (values %{$self->{indexes}}) {
349     $_->drop;
350     }
351 dpavlin 89 unlink "$file/records";
352     rmdir "$file/read" or warn "Could not rmdir '$file/read'";
353    
354 ulpfr 10 } else {
355     croak ref($self)."::drop called directly";
356     }
357     }
358    
359     sub mrequire ($) {
360     my $module = shift;
361    
362     $module =~ s{::}{/}g;
363     $module .= '.pm';
364     require $module;
365     }
366    
367 dpavlin 108 sub path {
368     my($self) = @_;
369     return $self->{path} if $self->{path};
370     require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
371     require Carp;
372     Carp::confess("NO file attr");
373     }
374    
375 ulpfr 10 sub open {
376     my $self = shift;
377 dpavlin 108 my $file = $self->file . '/records';
378 ulpfr 10
379     mrequire ref($self); # that's tricky eh?
380     if (defined $self->{'layout'}) {
381     mrequire ref($self->{'layout'});
382     }
383     if (defined $self->{'access'}) {
384     mrequire ref($self->{'access'});
385     }
386     if (exists $self->{indexes}) {
387     require WAIT::Index;
388 dpavlin 108 for my $Ind (values %{$self->{indexes}}) {
389     for my $x (qw(mode env maindbfile)) {
390     $Ind->{$x} = $self->{$x};
391     }
392 ulpfr 10 }
393     }
394     if (exists $self->{inverted}) {
395     my ($att, $idx);
396     for $att (keys %{$self->{inverted}}) {
397     for $idx (@{$self->{inverted}->{$att}}) {
398 dpavlin 108 for my $x (qw(mode env maindbfile)) {
399     $idx->{$x} = $self->{$x};
400     }
401 ulpfr 10 }
402     }
403     require WAIT::InvertedIndex;
404     }
405 ulpfr 42
406 dpavlin 108 # CONFUSION: WAIT knows two *modes*: read-only or read-write.
407     # BerkeleyDB means file permissions when talking about Mode.
408     # BerkeleyDB has the "Flags" attribute to specify
409     # read/write/lock/etc subsystems.
410 ulpfr 42
411 dpavlin 108 my $flags;
412     if ($self->{mode} & O_RDWR) {
413     $flags = DB_CREATE; # | DB_INIT_MPOOL | DB_PRIVATE | DB_INIT_CDB;
414     warn "Flags on table $file set to 'writing'";
415     } else {
416     $flags = DB_RDONLY;
417     # warn "Flags on table $file set to 'readonly'";
418     }
419 ulpfr 10 unless (defined $self->{dbh}) {
420 dpavlin 108 my $subname = $self->tablename . "/records";
421     $self->{dbh} =
422     tie(%{$self->{db}}, 'BerkeleyDB::Btree',
423     $self->{env} ? (Env => $self->{env}) : (),
424     # Filename => $file,
425     Filename => $self->maindbfile,
426     Subname => $subname,
427     Mode => 0664,
428     Flags => $flags,
429     $WAIT::Database::Cachesize?(Cachesize => $WAIT::Database::Cachesize):(),
430     $WAIT::Database::Pagesize?(Pagesize => $WAIT::Database::Pagesize):(),
431     )
432     or die "Cannot tie: $BerkeleyDB::Error;
433     DEBUG: Filename[$self->{maindbfile}]subname[$subname]Mode[0664]Flags[$flags]";
434 ulpfr 10 }
435     $self;
436     }
437    
438     sub fetch_extern {
439     my $self = shift;
440    
441 ulpfr 13 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
442 ulpfr 10 if (exists $self->{'access'}) {
443     mrequire ref($self->{'access'});
444     $self->{'access'}->FETCH(@_);
445     }
446     }
447    
448     sub fetch_extern_by_id {
449     my $self = shift;
450    
451     $self->fetch_extern($self->fetch(@_));
452     }
453    
454     sub _find_index {
455     my $self = shift;
456     my (@att) = @_;
457     my %att;
458     my $name;
459 ulpfr 13
460 ulpfr 10 @att{@att} = @att;
461    
462     KEY: for $name (keys %{$self->{indexes}}) {
463     my @iat = split /-/, $name;
464     for (@iat) {
465     next KEY unless exists $att{$_};
466     }
467     return $self->{indexes}->{$name};
468     }
469     return undef;
470     }
471    
472     sub have {
473     my $self = shift;
474     my %parm = @_;
475    
476 ulpfr 13 my $index = $self->_find_index(keys %parm) or return; # no index-no have
477    
478 ulpfr 10 defined $self->{db} or $self->open;
479     return $index->have(@_);
480     }
481    
482     sub insert {
483     my $self = shift;
484     my %parm = @_;
485    
486     defined $self->{db} or $self->open;
487    
488 ulpfr 13 # We should move all writing methods to a subclass to check only once
489     $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
490    
491 ulpfr 10 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
492     my $key;
493     my @deleted = keys %{$self->{deleted}};
494 ulpfr 19 my $gotkey = 0;
495 ulpfr 10
496     if (@deleted) {
497     $key = pop @deleted;
498     delete $self->{deleted}->{$key};
499 ulpfr 19 # Sanity check
500     if ($key && $key>0) {
501     $gotkey=1;
502 ulpfr 10 } else {
503 ulpfr 19 warn(sprintf("WAIT database inconsistency during insert ".
504     "key[%s]: Please rebuild index\n",
505     $key
506     ));
507     }
508     }
509     unless ($gotkey) {
510 ulpfr 10 $key = $self->{nextk}++;
511     }
512 dpavlin 108 $self->{db}->{$key} = $tuple;
513 ulpfr 10 for (values %{$self->{indexes}}) {
514     unless ($_->insert($key, %parm)) {
515     # duplicate key, undo changes
516     if ($key == $self->{nextk}-1) {
517     $self->{nextk}--;
518     } else {
519 ulpfr 19 # warn "setting key[$key] deleted during insert";
520 ulpfr 10 $self->{deleted}->{$key}=1;
521     }
522     my $idx;
523     for $idx (values %{$self->{indexes}}) {
524     last if $idx eq $_;
525     $idx->remove($key, %parm);
526     }
527     return undef;
528 ulpfr 13 }
529 ulpfr 10 }
530     if (defined $self->{inverted}) {
531     my $att;
532     for $att (keys %{$self->{inverted}}) {
533     if (defined $parm{$att}) {
534     map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
535     #map $_->sync, @{$self->{inverted}->{$att}}
536     }
537     }
538     }
539     $key
540     }
541    
542     sub sync {
543     my $self = shift;
544 ulpfr 13
545 ulpfr 10 for (values %{$self->{indexes}}) {
546     map $_->sync, $_;
547     }
548     if (defined $self->{inverted}) {
549     my $att;
550     for $att (keys %{$self->{inverted}}) {
551     map $_->sync, @{$self->{inverted}->{$att}}
552     }
553     }
554     }
555    
556     sub fetch {
557     my $self = shift;
558     my $key = shift;
559    
560     return () if exists $self->{deleted}->{$key};
561 ulpfr 13
562 ulpfr 10 defined $self->{db} or $self->open;
563 dpavlin 108 $self->unpack($self->{db}->{$key});
564 ulpfr 10 }
565    
566     sub delete_by_key {
567     my $self = shift;
568     my $key = shift;
569    
570 ulpfr 19 unless ($key) {
571     Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
572     return;
573     }
574    
575 ulpfr 10 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
576     my %tuple = $self->fetch($key);
577     for (values %{$self->{indexes}}) {
578     $_->delete($key, %tuple);
579     }
580     if (defined $self->{inverted}) {
581     # User *must* provide the full record for this or the entries
582     # in the inverted index will not be removed
583     %tuple = (%tuple, @_);
584     my $att;
585     for $att (keys %{$self->{inverted}}) {
586     if (defined $tuple{$att}) {
587     map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
588     }
589     }
590     }
591 ulpfr 19 # warn "setting key[$key] deleted during delete_by_key";
592 ulpfr 10 ++$self->{deleted}->{$key};
593     }
594    
595     sub delete {
596     my $self = shift;
597     my $tkey = $self->have(@_);
598 ulpfr 19 # warn "tkey[$tkey]\@_[@_]";
599 ulpfr 10 defined $tkey && $self->delete_by_key($tkey, @_);
600     }
601    
602     sub unpack {
603 laperla 31 my($self, $tuple) = @_;
604 ulpfr 10
605 laperla 31 unless (defined $tuple){
606     # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
607     warn("Debug: somebody called unpack without argument tuple!");
608     return;
609     }
610    
611 ulpfr 10 my $att;
612     my @result;
613     my @tuple = split /$;/, $tuple;
614    
615     for $att (@{$self->{attr}}) {
616     push @result, $att, shift @tuple;
617     }
618     @result;
619     }
620    
621 ulpfr 19 sub set {
622     my ($self, $iattr, $value) = @_;
623 laperla 77 # in the rare case that they haven't written a single record yet, we
624     # make sure, the inverted inherits our $self->{mode}:
625     defined $self->{db} or $self->open;
626    
627 ulpfr 19 for my $att (keys %{$self->{inverted}}) {
628 dpavlin 108 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
629 ulpfr 19 my $idx;
630     for $idx (@{$self->{inverted}->{$att}}) {
631     $idx->set($iattr, $value);
632     }
633     } else {
634     map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
635     }
636     }
637    
638     1;
639     }
640    
641 ulpfr 10 sub close {
642     my $self = shift;
643    
644 dpavlin 108 require Carp; Carp::cluck("------->Closing A Table<-------");
645    
646 ulpfr 10 if (exists $self->{'access'}) {
647     eval {$self->{'access'}->close}; # dont bother if not opened
648     }
649 laperla 31 if ($WAIT::Index::VERSION) {
650     for (values %{$self->{indexes}}) {
651     $_->close();
652     }
653 ulpfr 10 }
654 laperla 31 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
655     # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
656     # if WAIT::InvertedIndex has not been loaded, they cannot have
657     # been altered so far
658 ulpfr 10 my $att;
659     for $att (keys %{$self->{inverted}}) {
660     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
661     my $idx;
662     for $idx (@{$self->{inverted}->{$att}}) {
663     $idx->close;
664     }
665     } else {
666     map $_->close(), @{$self->{inverted}->{$att}};
667     }
668     }
669     }
670     if ($self->{dbh}) {
671     delete $self->{dbh};
672     }
673 dpavlin 108 untie %{$self->{db}};
674     for my $att (qw(env db file maindbfile)) {
675     delete $self->{$att};
676     warn "----->Deleted att $att<-----";
677     }
678 ulpfr 10
679     1;
680     }
681    
682 dpavlin 108 sub DESTROY {
683 ulpfr 19 my $self = shift;
684    
685 dpavlin 108 delete $self->{env};
686 ulpfr 19
687 dpavlin 108 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
688 ulpfr 19
689 ulpfr 13 }
690    
691 ulpfr 10 sub open_scan {
692     my $self = shift;
693     my $code = shift;
694    
695     $self->{dbh} or $self->open;
696     require WAIT::Scan;
697     new WAIT::Scan $self, $self->{nextk}-1, $code;
698     }
699    
700     sub open_index_scan {
701     my $self = shift;
702     my $attr = shift;
703     my $code = shift;
704     my $name = join '-', @$attr;
705    
706     if (defined $self->{indexes}->{$name}) {
707     $self->{indexes}->{$name}->open_scan($code);
708     } else {
709     croak "No such index '$name'";
710     }
711     }
712    
713     eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
714    
715     sub prefix {
716     my ($self , $attr, $prefix) = @_;
717     my %result;
718    
719     defined $self->{db} or $self->open; # require layout
720    
721     for (@{$self->{inverted}->{$attr}}) {
722     my $result = $_->prefix($prefix);
723     if (defined $result) {
724     $result{$_->name} = $result;
725     }
726     }
727     bless \%result, 'WAIT::Query::Raw';
728     }
729    
730     sub intervall {
731     my ($self, $attr, $lb, $ub) = @_;
732     my %result;
733    
734     defined $self->{db} or $self->open; # require layout
735    
736     for (@{$self->{inverted}->{$attr}}) {
737     my $result = $_->intervall($lb, $ub);
738     if (defined $result) {
739     $result{$_->name} = $result;
740     }
741     }
742     bless \%result, 'WAIT::Query::Raw';
743     }
744    
745 dpavlin 108 sub search_ref {
746 ulpfr 19 my $self = shift;
747     my ($query, $attr, $cont, $raw);
748     if (ref $_[0]) {
749     $query = shift;
750 dpavlin 108 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$query],[qw()])->Indent(1)->Useqq(1)->Dump; # XXX
751    
752 ulpfr 19 $attr = $query->{attr};
753     $cont = $query->{cont};
754     $raw = $query->{raw};
755     } else {
756     require Carp;
757     Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
758     $attr = shift;
759     $cont = shift;
760     $raw = shift;
761     $query = {
762     attr => $attr,
763     cont => $cont,
764     raw => $raw,
765     };
766     }
767    
768 ulpfr 10 my %result;
769    
770     defined $self->{db} or $self->open; # require layout
771    
772     if ($raw) {
773     for (@{$self->{inverted}->{$attr}}) {
774     my $name = $_->name;
775     if (exists $raw->{$name} and @{$raw->{$name}}) {
776     my $scale = 1/scalar(@{$raw->{$name}});
777 ulpfr 19 my %r = $_->search_raw($query, @{$raw->{$name}});
778 ulpfr 10 my ($key, $val);
779     while (($key, $val) = each %r) {
780     if (exists $result{$key}) {
781     $result{$key} += $val*$scale;
782     } else {
783     $result{$key} = $val*$scale;
784     }
785     }
786     }
787     }
788     }
789     if (defined $cont and $cont ne '') {
790     for (@{$self->{inverted}->{$attr}}) {
791 dpavlin 108 my $r = $_->search_ref($query, $cont);
792 ulpfr 10 my ($key, $val);
793 dpavlin 108 while (($key, $val) = each %$r) {
794 ulpfr 10 if (exists $result{$key}) {
795     $result{$key} += $val;
796     } else {
797     $result{$key} = $val;
798     }
799     }
800     }
801     }
802     # sanity check for deleted documents.
803     # this should not be necessary !@#$
804     for (keys %result) {
805     delete $result{$_} if $self->{deleted}->{$_}
806     }
807 dpavlin 108 \%result;
808 ulpfr 10 }
809    
810 dpavlin 108 sub parse_query {
811     my($self, $attr, $query) = @_;
812     return unless defined $query && length $query;
813     my %qt;
814     for (@{$self->{inverted}->{$attr}}) {
815     grep $qt{$_}++, $_->parse($query);
816     }
817     [keys %qt];
818     }
819    
820 ulpfr 10 sub hilight_positions {
821     my ($self, $attr, $text, $query, $raw) = @_;
822     my %pos;
823    
824     if (defined $raw) {
825 ulpfr 13 for (@{$self->{inverted}->{$attr}}) { # objects of type
826     # WAIT::InvertedIndex for
827     # this index field $attr
828 ulpfr 10 my $name = $_->name;
829     if (exists $raw->{$name}) {
830     my %qt;
831     grep $qt{$_}++, @{$raw->{$name}};
832     for ($_->parse_pos($text)) {
833     if (exists $qt{$_->[0]}) {
834     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
835     }
836     }
837     }
838     }
839     }
840     if (defined $query) {
841     for (@{$self->{inverted}->{$attr}}) {
842     my %qt;
843    
844     grep $qt{$_}++, $_->parse($query);
845     for ($_->parse_pos($text)) {
846     if (exists $qt{$_->[0]}) {
847     if (exists $pos{$_->[1]}) { # perl -w ;-)
848     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
849     } else {
850     $pos{$_->[1]} = length($_->[0]);
851     }
852     }
853     }
854     }
855     }
856    
857     \%pos;
858     }
859    
860     sub hilight {
861 ulpfr 13 my ($tb, $buf, $qplain, $qraw) = @_;
862     my $layout = $tb->layout();
863    
864 ulpfr 10 my @result;
865    
866 ulpfr 13 $qplain ||= {};
867     $qraw ||= {};
868     my @ttxt = $layout->tag($buf);
869 ulpfr 10 while (@ttxt) {
870     no strict 'refs';
871     my %tag = %{shift @ttxt};
872     my $txt = shift @ttxt;
873     my $fld;
874    
875     my %hl;
876 ulpfr 13 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
877 ulpfr 10 my $hp = $tb->hilight_positions($fld, $txt,
878 ulpfr 13 $qplain->{$fld}, $qraw->{$fld});
879 ulpfr 10 for (keys %$hp) {
880     if (exists $hl{$_}) { # -w ;-(
881     $hl{$_} = max($hl{$_}, $hp->{$_});
882     } else {
883     $hl{$_} = $hp->{$_};
884     }
885     }
886     }
887     my $pos;
888     my $qt = {_qt => 1, %tag};
889     my $pl = \%tag;
890     my $last = length($txt);
891     my @tmp;
892     for $pos (sort {$b <=> $a} keys %hl) {
893     unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
894     unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
895     $last = $pos;
896     }
897     push @result, $pl, substr($txt,0,$last);
898     push @result, @tmp;
899     }
900     @result; # no speed necessary
901     }
902    
903     1;

Properties

Name Value
cvs2svn:cvs-rev 1.10

  ViewVC Help
Powered by ViewVC 1.1.26