/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 117 - (hide annotations)
Fri Jul 15 18:58:37 2005 UTC (18 years, 10 months ago) by dpavlin
File size: 23719 byte(s)
fix for perl 5.8

1 ulpfr 13 # -*- Mode: Cperl -*-
2 ulpfr 10 # Table.pm --
3     # ITIID : $ITI$ $Header $__Header$
4     # Author : Ulrich Pfeifer
5     # Created On : Thu Aug 8 13:05:10 1996
6     # Last Modified By: Ulrich Pfeifer
7 dpavlin 89 # Last Modified On: Wed Jan 23 14:15:15 2002
8 ulpfr 10 # Language : CPerl
9 dpavlin 89 # Update Count : 152
10 ulpfr 10 # Status : Unknown, Use with caution!
11 ulpfr 13 #
12 ulpfr 10 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 ulpfr 13 #
14 ulpfr 10
15     =head1 NAME
16    
17     WAIT::Table -- Module for maintaining Tables / Relations
18    
19     =head1 SYNOPSIS
20    
21     require WAIT::Table;
22    
23     =head1 DESCRIPTION
24    
25     =cut
26    
27     package WAIT::Table;
28 dpavlin 108 our $VERSION = "2.000";
29 ulpfr 13
30     use WAIT::Table::Handle ();
31 ulpfr 10 require WAIT::Parse::Base;
32 ulpfr 13
33 ulpfr 10 use strict;
34 dpavlin 115 use Carp qw(cluck croak confess);
35 ulpfr 13 # use autouse Carp => qw( croak($) );
36 dpavlin 108 use BerkeleyDB;
37 ulpfr 10 use Fcntl;
38    
39     =head2 Creating a Table.
40    
41 ulpfr 13 The constructor WAIT::Table-E<gt>new is normally called via the
42 ulpfr 10 create_table method of a database handle. This is not enforced, but
43 ulpfr 13 creating a table does not make any sense unless the table is
44 ulpfr 10 registered by the database because the latter implements persistence
45     of the meta data. Registering is done automatically by letting the
46 ulpfr 13 database handle the creation of a table.
47 ulpfr 10
48 ulpfr 13 my $db = WAIT::Database->create(name => 'sample');
49     my $tb = $db->create_table(name => 'test',
50     access => $access,
51     layout => $layout,
52     attr => ['docid', 'headline'],
53     );
54 ulpfr 10
55     The constructor returns a handle for the table. This handle is hidden by the
56     table module, to prevent direct access if called via Table.
57    
58     =over 10
59    
60 ulpfr 13 =item C<access> => I<accessobj>
61 ulpfr 10
62 ulpfr 13 A reference to an access object for the external parts (attributes) of
63 ulpfr 10 tuples. As you may remember, the WAIT System does not enforce that
64     objects are completely stored inside the system to avoid duplication.
65 ulpfr 13 There is no (strong) point in storing all your HTML documents inside
66 ulpfr 10 the system when indexing your WWW-Server.
67    
68 ulpfr 13 The access object is designed to work like as a tied hash. You pass
69     the refernce to the object, not the tied hash though. An example
70     implementation of an access class that works for manpages is
71     WAIT::Document::Nroff.
72    
73     The implementation needs to take into account that WAIT will keep this
74     object in a Data::Dumper or Storable database and re-use it when sman
75     is run. So it is not good enough if we can produce the index with it
76     now, when we create or actively access the table, WAIT also must be
77     able to retrieve documents on its own, when we are in a different
78     context. This happens specifically in a retrieval. To get this working
79     seemlessly, the access-defining class must implement a close method.
80     This method will be called before the Data::Dumper dump takes place.
81     In that moment the access-defining class must get rid of all data
82     structures that cannot be reconstructed via the Data::Dumper dump,
83     such as database handles or C pointers.
84    
85 dpavlin 114 =item C<path> => I<dir>
86 ulpfr 10
87 dpavlin 114 The path to database. Files for indexes will have I<path>
88 ulpfr 13 as prefix. I<Mandatory>, but usually taken care of by the
89     WAIT::Database handle when the constructor is called via
90     WAIT::Database::create_table().
91 ulpfr 10
92     =item C<name> => I<name>
93    
94     The name of this table. I<Mandatory>
95    
96     =item C<attr> => [ I<attr> ... ]
97    
98 ulpfr 13 A reference to an array of attribute names. WAIT will keep the
99     contents of these attributes in its table. I<Mandatory>
100 ulpfr 10
101     =item C<djk> => [ I<attr> ... ]
102    
103     A reference to an array of attribute names which make up the
104 ulpfr 13 I<disjointness key>. Don't think about it - it's of no use yet;
105 ulpfr 10
106     =item C<layout> => I<layoutobj>
107    
108 ulpfr 13 A reference to an external parser object. Defaults to a new instance
109     of C<WAIT::Parse::Base>. For an example implementation see
110     WAIT::Parse::Nroff. A layout class can be implemented as a singleton
111     class if you so like.
112 ulpfr 10
113 ulpfr 13 =item C<keyset> => I<keyset>
114 ulpfr 10
115 ulpfr 13 The set of attributes needed to identify a record. Defaults to all
116     attributes.
117 ulpfr 10
118 ulpfr 13 =item C<invindex> => I<inverted index>
119    
120     A reference to an anon array defining attributes of each record that
121     need to be indexed. See the source of smakewhatis for how to set this
122     up.
123    
124 ulpfr 10 =back
125    
126     =cut
127    
128     sub new {
129     my $type = shift;
130     my %parm = @_;
131     my $self = {};
132    
133 ulpfr 13 # Check for mandatory attrs early
134 dpavlin 108 for my $x (qw(name attr env maindbfile tablename)) {
135 dpavlin 116 $self->{$x} = $parm{$x} or confess "No $x specified";
136 dpavlin 108 }
137 ulpfr 13
138 ulpfr 10 # Do that before we eventually add '_weight' to attributes.
139     $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
140 ulpfr 13
141 ulpfr 10 $self->{mode} = O_CREAT | O_RDWR;
142 ulpfr 13
143 ulpfr 10 # Determine and set up subclass
144     $type = ref($type) || $type;
145     if (defined $parm{djk}) {
146     if (@{$parm{djk}} == @{$parm{attr}}) {
147     # All attributes in DK (sloppy test here!)
148     $type .= '::Independent';
149     require WAIT::Table::Independent;
150     } else {
151     $type .= '::Disjoint';
152     require WAIT::Table::Disjoint;
153     }
154     # Add '_weight' to attributes
155     my %attr;
156     @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
157     unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
158     }
159    
160 dpavlin 116 $self->{path} = $parm{path} or confess "No path specified";
161 dpavlin 108 bless $self, $type;
162 ulpfr 19
163 ulpfr 10 $self->{djk} = $parm{djk} if defined $parm{djk};
164     $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
165     $self->{access} = $parm{access} if defined $parm{access};
166     $self->{nextk} = 1; # next record to insert; first record unused
167     $self->{deleted} = {}; # no deleted records yet
168     $self->{indexes} = {};
169    
170 ulpfr 35 # Checking for readers is not necessary, but let's go with the
171     # generic method.
172    
173 ulpfr 10 # Call create_index() and create_index() for compatibility
174     for (@{$self->{keyset}||[]}) {
175     #carp "Specification of indexes at table create time is deprecated";
176     $self->create_index(@$_);
177     }
178     while (@{$parm{invindex}||[]}) {
179     # carp "Specification of inverted indexes at table create time is deprecated";
180     my $att = shift @{$parm{invindex}};
181     my @spec = @{shift @{$parm{invindex}}};
182 dpavlin 108 my @opt = ();
183 ulpfr 13
184 ulpfr 10 if (ref($spec[0])) {
185 dpavlin 108 warn "Secondary pipelines are deprecated";
186 ulpfr 10 @opt = %{shift @spec};
187     }
188 dpavlin 108 $self->create_inverted_index(attribute => $att,
189     pipeline => \@spec,
190     @opt);
191 ulpfr 10 }
192 ulpfr 19
193 ulpfr 10 $self;
194     # end of backwarn compatibility stuff
195     }
196    
197 dpavlin 108 for my $accessor (qw(maindbfile tablename)) {
198     no strict 'refs';
199     *{$accessor} = sub {
200     my($self) = @_;
201     return $self->{$accessor} if $self->{$accessor};
202     require Carp;
203     Carp::confess("accessor $accessor not there");
204     }
205     }
206    
207 ulpfr 10 =head2 Creating an index
208    
209     $tb->create_index('docid');
210    
211 dpavlin 109 C<create_index>
212 ulpfr 10 must be called with a list of attributes. This must be a subset of the
213     attributes specified when the table was created. Currently this
214     method must be called before the first tuple is inserted in the
215     table!
216    
217     =cut
218    
219     sub create_index {
220     my $self= shift;
221 ulpfr 13
222 dpavlin 116 confess "Cannot create index for table aready populated"
223 ulpfr 10 if $self->{nextk} > 1;
224 ulpfr 13
225 ulpfr 10 require WAIT::Index;
226 ulpfr 13
227 ulpfr 10 my $name = join '-', @_;
228 dpavlin 108 #### warn "WARNING: Suspect use of \$_ in method create_index. name[$name]_[$_]";
229 ulpfr 10 $self->{indexes}->{$name} =
230 dpavlin 108 WAIT::Index->new(
231 dpavlin 114 path => $self->path.'/'.$name,
232 dpavlin 108 subname => $name,
233     env => $self->{env},
234     maindbfile => $self->maindbfile,
235     tablename => $self->tablename,
236     attr => $_,
237     );
238 ulpfr 10 }
239    
240     =head2 Creating an inverted index
241    
242     $tb->create_inverted_index
243     (attribute => 'au',
244     pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
245     predicate => 'plain',
246     );
247    
248     =over 5
249    
250     =item C<attribute>
251    
252     The attribute to build the index on. This attribute may not be in the
253     set attributes specified when the table was created.
254    
255     =item C<pipeline>
256    
257 ulpfr 13 A piplines specification is a reference to an array of method names
258     (from package C<WAIT::Filter>) which are to be applied in sequence to
259     the contents of the named attribute. The attribute name may not be in
260     the attribute list.
261 ulpfr 10
262     =item C<predicate>
263    
264     An indication which predicate the index implements. This may be
265     e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
266     query processing. Currently there is no standard set of predicate
267 ulpfr 13 names. The predicate defaults to the last member of the pipeline if
268 ulpfr 10 omitted.
269    
270     =back
271    
272     Currently this method must be called before the first tuple is
273     inserted in the table!
274    
275     =cut
276    
277     sub create_inverted_index {
278     my $self = shift;
279     my %parm = @_;
280    
281 dpavlin 116 confess "No attribute specified" unless $parm{attribute};
282     confess "No pipeline specified" unless $parm{pipeline};
283 ulpfr 10
284     $parm{predicate} ||= $parm{pipeline}->[-1];
285 ulpfr 13
286 dpavlin 116 confess "Cannot create index for table aready populated"
287 ulpfr 10 if $self->{nextk} > 1;
288 ulpfr 13
289 ulpfr 10 require WAIT::InvertedIndex;
290    
291     # backward compatibility stuff
292     my %opt = %parm;
293     for (qw(attribute pipeline predicate)) {
294     delete $opt{$_};
295     }
296 ulpfr 13
297 ulpfr 10 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
298 dpavlin 114 my $idx = WAIT::InvertedIndex->new(path => $self->path.'/'.$name,
299 dpavlin 108 subname=> $name,
300     env => $self->{env},
301     maindbfile => $self->maindbfile,
302     tablename => $self->tablename,
303     filter => [@{$parm{pipeline}}], # clone
304     name => $name,
305     attr => $parm{attribute},
306     %opt, # backward compatibility stuff
307     );
308 ulpfr 10 # We will have to use $parm{predicate} here
309     push @{$self->{inverted}->{$parm{attribute}}}, $idx;
310     }
311    
312     sub dir {
313 dpavlin 111 $_[0]->path;
314 ulpfr 10 }
315    
316     =head2 C<$tb-E<gt>layout>
317    
318     Returns the reference to the associated parser object.
319    
320     =cut
321    
322     sub layout { $_[0]->{layout} }
323    
324     =head2 C<$tb-E<gt>fields>
325    
326     Returns the array of attribute names.
327    
328     =cut
329    
330    
331     sub fields { keys %{$_[0]->{inverted}}}
332    
333     =head2 C<$tb-E<gt>drop>
334    
335     Must be called via C<WAIT::Database::drop_table>
336    
337     =cut
338    
339     sub drop {
340     my $self = shift;
341 ulpfr 35
342 ulpfr 10 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
343     $self->close; # just make sure
344 ulpfr 35
345 dpavlin 114 # my $path = $self->path;
346 ulpfr 10
347     for (values %{$self->{indexes}}) {
348     $_->drop;
349     }
350 dpavlin 114 # unlink "$path/records";
351     # rmdir "$path/read" or warn "Could not rmdir '$path/read'";
352 dpavlin 89
353 ulpfr 10 } else {
354 dpavlin 115 confess ref($self)."::drop called directly";
355 ulpfr 10 }
356     }
357    
358     sub mrequire ($) {
359     my $module = shift;
360    
361     $module =~ s{::}{/}g;
362     $module .= '.pm';
363     require $module;
364     }
365    
366 dpavlin 108 sub path {
367     my($self) = @_;
368     return $self->{path} if $self->{path};
369     require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
370     require Carp;
371 dpavlin 115 confess("NO path attr");
372 dpavlin 108 }
373    
374 ulpfr 10 sub open {
375     my $self = shift;
376 dpavlin 111 my $path = $self->path . '/records';
377 ulpfr 10
378     mrequire ref($self); # that's tricky eh?
379     if (defined $self->{'layout'}) {
380     mrequire ref($self->{'layout'});
381     }
382     if (defined $self->{'access'}) {
383     mrequire ref($self->{'access'});
384     }
385     if (exists $self->{indexes}) {
386     require WAIT::Index;
387 dpavlin 108 for my $Ind (values %{$self->{indexes}}) {
388     for my $x (qw(mode env maindbfile)) {
389     $Ind->{$x} = $self->{$x};
390     }
391 ulpfr 10 }
392     }
393     if (exists $self->{inverted}) {
394     my ($att, $idx);
395     for $att (keys %{$self->{inverted}}) {
396     for $idx (@{$self->{inverted}->{$att}}) {
397 dpavlin 108 for my $x (qw(mode env maindbfile)) {
398     $idx->{$x} = $self->{$x};
399     }
400 ulpfr 10 }
401     }
402     require WAIT::InvertedIndex;
403     }
404 ulpfr 42
405 dpavlin 108 # CONFUSION: WAIT knows two *modes*: read-only or read-write.
406     # BerkeleyDB means file permissions when talking about Mode.
407     # BerkeleyDB has the "Flags" attribute to specify
408     # read/write/lock/etc subsystems.
409 ulpfr 42
410 dpavlin 108 my $flags;
411     if ($self->{mode} & O_RDWR) {
412     $flags = DB_CREATE; # | DB_INIT_MPOOL | DB_PRIVATE | DB_INIT_CDB;
413 dpavlin 113 #warn "DEBUG: Flags on table $path set to 'writing'";
414 dpavlin 108 } else {
415     $flags = DB_RDONLY;
416 dpavlin 113 #warn "DEBUG: Flags on table $path set to 'readonly'";
417 dpavlin 108 }
418 ulpfr 10 unless (defined $self->{dbh}) {
419 dpavlin 108 my $subname = $self->tablename . "/records";
420     $self->{dbh} =
421     tie(%{$self->{db}}, 'BerkeleyDB::Btree',
422     $self->{env} ? (Env => $self->{env}) : (),
423     # Filename => $file,
424     Filename => $self->maindbfile,
425     Subname => $subname,
426     Mode => 0664,
427     Flags => $flags,
428     $WAIT::Database::Cachesize?(Cachesize => $WAIT::Database::Cachesize):(),
429     $WAIT::Database::Pagesize?(Pagesize => $WAIT::Database::Pagesize):(),
430     )
431 dpavlin 115 or confess "Cannot tie: $BerkeleyDB::Error\nDEBUG: Filename[$self->{maindbfile}]subname[$subname]Mode[0664]Flags[$flags]";
432 ulpfr 10 }
433     $self;
434     }
435    
436     sub fetch_extern {
437     my $self = shift;
438    
439 ulpfr 13 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
440 ulpfr 10 if (exists $self->{'access'}) {
441     mrequire ref($self->{'access'});
442     $self->{'access'}->FETCH(@_);
443     }
444     }
445    
446     sub fetch_extern_by_id {
447     my $self = shift;
448    
449     $self->fetch_extern($self->fetch(@_));
450     }
451    
452     sub _find_index {
453     my $self = shift;
454     my (@att) = @_;
455     my %att;
456     my $name;
457 ulpfr 13
458 ulpfr 10 @att{@att} = @att;
459    
460     KEY: for $name (keys %{$self->{indexes}}) {
461     my @iat = split /-/, $name;
462     for (@iat) {
463     next KEY unless exists $att{$_};
464     }
465     return $self->{indexes}->{$name};
466     }
467     return undef;
468     }
469    
470     sub have {
471     my $self = shift;
472     my %parm = @_;
473    
474 ulpfr 13 my $index = $self->_find_index(keys %parm) or return; # no index-no have
475    
476 ulpfr 10 defined $self->{db} or $self->open;
477     return $index->have(@_);
478     }
479    
480     sub insert {
481     my $self = shift;
482     my %parm = @_;
483    
484     defined $self->{db} or $self->open;
485    
486 ulpfr 13 # We should move all writing methods to a subclass to check only once
487 dpavlin 116 $self->{mode} & O_RDWR or confess "Cannot insert into table opened in RD_ONLY mode";
488 ulpfr 13
489 ulpfr 10 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
490     my $key;
491     my @deleted = keys %{$self->{deleted}};
492 ulpfr 19 my $gotkey = 0;
493 ulpfr 10
494     if (@deleted) {
495     $key = pop @deleted;
496     delete $self->{deleted}->{$key};
497 ulpfr 19 # Sanity check
498     if ($key && $key>0) {
499     $gotkey=1;
500 ulpfr 10 } else {
501 ulpfr 19 warn(sprintf("WAIT database inconsistency during insert ".
502     "key[%s]: Please rebuild index\n",
503     $key
504     ));
505     }
506     }
507     unless ($gotkey) {
508 ulpfr 10 $key = $self->{nextk}++;
509     }
510 dpavlin 108 $self->{db}->{$key} = $tuple;
511 ulpfr 10 for (values %{$self->{indexes}}) {
512     unless ($_->insert($key, %parm)) {
513     # duplicate key, undo changes
514     if ($key == $self->{nextk}-1) {
515     $self->{nextk}--;
516     } else {
517 ulpfr 19 # warn "setting key[$key] deleted during insert";
518 ulpfr 10 $self->{deleted}->{$key}=1;
519     }
520     my $idx;
521     for $idx (values %{$self->{indexes}}) {
522     last if $idx eq $_;
523     $idx->remove($key, %parm);
524     }
525     return undef;
526 ulpfr 13 }
527 ulpfr 10 }
528     if (defined $self->{inverted}) {
529     my $att;
530     for $att (keys %{$self->{inverted}}) {
531     if (defined $parm{$att}) {
532     map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
533     #map $_->sync, @{$self->{inverted}->{$att}}
534     }
535     }
536     }
537     $key
538     }
539    
540     sub sync {
541     my $self = shift;
542 ulpfr 13
543 ulpfr 10 for (values %{$self->{indexes}}) {
544     map $_->sync, $_;
545     }
546     if (defined $self->{inverted}) {
547     my $att;
548     for $att (keys %{$self->{inverted}}) {
549     map $_->sync, @{$self->{inverted}->{$att}}
550     }
551     }
552     }
553    
554     sub fetch {
555     my $self = shift;
556     my $key = shift;
557    
558     return () if exists $self->{deleted}->{$key};
559 ulpfr 13
560 ulpfr 10 defined $self->{db} or $self->open;
561 dpavlin 108 $self->unpack($self->{db}->{$key});
562 ulpfr 10 }
563    
564     sub delete_by_key {
565     my $self = shift;
566     my $key = shift;
567    
568 ulpfr 19 unless ($key) {
569 dpavlin 113 cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
570 ulpfr 19 return;
571     }
572    
573 ulpfr 10 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
574     my %tuple = $self->fetch($key);
575     for (values %{$self->{indexes}}) {
576     $_->delete($key, %tuple);
577     }
578     if (defined $self->{inverted}) {
579     # User *must* provide the full record for this or the entries
580     # in the inverted index will not be removed
581     %tuple = (%tuple, @_);
582     my $att;
583     for $att (keys %{$self->{inverted}}) {
584     if (defined $tuple{$att}) {
585     map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
586     }
587     }
588     }
589 ulpfr 19 # warn "setting key[$key] deleted during delete_by_key";
590 ulpfr 10 ++$self->{deleted}->{$key};
591     }
592    
593     sub delete {
594     my $self = shift;
595     my $tkey = $self->have(@_);
596 ulpfr 19 # warn "tkey[$tkey]\@_[@_]";
597 ulpfr 10 defined $tkey && $self->delete_by_key($tkey, @_);
598     }
599    
600     sub unpack {
601 laperla 31 my($self, $tuple) = @_;
602 ulpfr 10
603 laperla 31 unless (defined $tuple){
604     # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
605     warn("Debug: somebody called unpack without argument tuple!");
606     return;
607     }
608    
609 ulpfr 10 my $att;
610     my @result;
611     my @tuple = split /$;/, $tuple;
612    
613     for $att (@{$self->{attr}}) {
614     push @result, $att, shift @tuple;
615     }
616     @result;
617     }
618    
619 ulpfr 19 sub set {
620     my ($self, $iattr, $value) = @_;
621 laperla 77 # in the rare case that they haven't written a single record yet, we
622     # make sure, the inverted inherits our $self->{mode}:
623     defined $self->{db} or $self->open;
624    
625 ulpfr 19 for my $att (keys %{$self->{inverted}}) {
626 dpavlin 108 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
627 ulpfr 19 my $idx;
628     for $idx (@{$self->{inverted}->{$att}}) {
629     $idx->set($iattr, $value);
630     }
631     } else {
632     map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
633     }
634     }
635    
636     1;
637     }
638    
639 ulpfr 10 sub close {
640     my $self = shift;
641    
642 dpavlin 113 #cluck("DEBUG: Closing A Table");
643 dpavlin 108
644 ulpfr 10 if (exists $self->{'access'}) {
645     eval {$self->{'access'}->close}; # dont bother if not opened
646     }
647 laperla 31 if ($WAIT::Index::VERSION) {
648     for (values %{$self->{indexes}}) {
649     $_->close();
650     }
651 ulpfr 10 }
652 laperla 31 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
653     # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
654     # if WAIT::InvertedIndex has not been loaded, they cannot have
655     # been altered so far
656 ulpfr 10 my $att;
657     for $att (keys %{$self->{inverted}}) {
658     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
659     my $idx;
660     for $idx (@{$self->{inverted}->{$att}}) {
661     $idx->close;
662     }
663     } else {
664     map $_->close(), @{$self->{inverted}->{$att}};
665     }
666     }
667     }
668     if ($self->{dbh}) {
669     delete $self->{dbh};
670     }
671 dpavlin 108 untie %{$self->{db}};
672 dpavlin 111 for my $att (qw(env db path maindbfile)) {
673 dpavlin 108 delete $self->{$att};
674 dpavlin 113 #cluck "DEBUG: Deleted att $att";
675 dpavlin 108 }
676 ulpfr 10
677     1;
678     }
679    
680 dpavlin 108 sub DESTROY {
681 ulpfr 19 my $self = shift;
682    
683 dpavlin 108 delete $self->{env};
684 ulpfr 19
685 dpavlin 108 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
686 ulpfr 19
687 ulpfr 13 }
688    
689 ulpfr 10 sub open_scan {
690     my $self = shift;
691     my $code = shift;
692    
693     $self->{dbh} or $self->open;
694     require WAIT::Scan;
695     new WAIT::Scan $self, $self->{nextk}-1, $code;
696     }
697    
698     sub open_index_scan {
699     my $self = shift;
700     my $attr = shift;
701     my $code = shift;
702     my $name = join '-', @$attr;
703    
704     if (defined $self->{indexes}->{$name}) {
705     $self->{indexes}->{$name}->open_scan($code);
706     } else {
707 dpavlin 116 confess "No such index '$name'";
708 ulpfr 10 }
709     }
710    
711 dpavlin 117 eval {sub WAIT::Query::Raw::new {} } unless defined \&WAIT::Query::Raw::new;
712 ulpfr 10
713     sub prefix {
714     my ($self , $attr, $prefix) = @_;
715     my %result;
716    
717     defined $self->{db} or $self->open; # require layout
718    
719     for (@{$self->{inverted}->{$attr}}) {
720     my $result = $_->prefix($prefix);
721     if (defined $result) {
722     $result{$_->name} = $result;
723     }
724     }
725     bless \%result, 'WAIT::Query::Raw';
726     }
727    
728     sub intervall {
729     my ($self, $attr, $lb, $ub) = @_;
730     my %result;
731    
732     defined $self->{db} or $self->open; # require layout
733    
734     for (@{$self->{inverted}->{$attr}}) {
735     my $result = $_->intervall($lb, $ub);
736     if (defined $result) {
737     $result{$_->name} = $result;
738     }
739     }
740     bless \%result, 'WAIT::Query::Raw';
741     }
742    
743 dpavlin 108 sub search_ref {
744 ulpfr 19 my $self = shift;
745     my ($query, $attr, $cont, $raw);
746     if (ref $_[0]) {
747     $query = shift;
748 dpavlin 108 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$query],[qw()])->Indent(1)->Useqq(1)->Dump; # XXX
749    
750 ulpfr 19 $attr = $query->{attr};
751     $cont = $query->{cont};
752     $raw = $query->{raw};
753     } else {
754 dpavlin 113 cluck("Using three argument search interface is deprecated, use hashref interface instead");
755 ulpfr 19 $attr = shift;
756     $cont = shift;
757     $raw = shift;
758     $query = {
759     attr => $attr,
760     cont => $cont,
761     raw => $raw,
762     };
763     }
764    
765 ulpfr 10 my %result;
766    
767     defined $self->{db} or $self->open; # require layout
768    
769     if ($raw) {
770     for (@{$self->{inverted}->{$attr}}) {
771     my $name = $_->name;
772     if (exists $raw->{$name} and @{$raw->{$name}}) {
773     my $scale = 1/scalar(@{$raw->{$name}});
774 ulpfr 19 my %r = $_->search_raw($query, @{$raw->{$name}});
775 ulpfr 10 my ($key, $val);
776     while (($key, $val) = each %r) {
777     if (exists $result{$key}) {
778     $result{$key} += $val*$scale;
779     } else {
780     $result{$key} = $val*$scale;
781     }
782     }
783     }
784     }
785     }
786     if (defined $cont and $cont ne '') {
787     for (@{$self->{inverted}->{$attr}}) {
788 dpavlin 108 my $r = $_->search_ref($query, $cont);
789 ulpfr 10 my ($key, $val);
790 dpavlin 108 while (($key, $val) = each %$r) {
791 ulpfr 10 if (exists $result{$key}) {
792     $result{$key} += $val;
793     } else {
794     $result{$key} = $val;
795     }
796     }
797     }
798     }
799     # sanity check for deleted documents.
800     # this should not be necessary !@#$
801     for (keys %result) {
802     delete $result{$_} if $self->{deleted}->{$_}
803     }
804 dpavlin 108 \%result;
805 ulpfr 10 }
806    
807 dpavlin 108 sub parse_query {
808     my($self, $attr, $query) = @_;
809     return unless defined $query && length $query;
810     my %qt;
811     for (@{$self->{inverted}->{$attr}}) {
812     grep $qt{$_}++, $_->parse($query);
813     }
814     [keys %qt];
815     }
816    
817 ulpfr 10 sub hilight_positions {
818     my ($self, $attr, $text, $query, $raw) = @_;
819     my %pos;
820    
821     if (defined $raw) {
822 ulpfr 13 for (@{$self->{inverted}->{$attr}}) { # objects of type
823     # WAIT::InvertedIndex for
824     # this index field $attr
825 ulpfr 10 my $name = $_->name;
826     if (exists $raw->{$name}) {
827     my %qt;
828     grep $qt{$_}++, @{$raw->{$name}};
829     for ($_->parse_pos($text)) {
830     if (exists $qt{$_->[0]}) {
831     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
832     }
833     }
834     }
835     }
836     }
837     if (defined $query) {
838     for (@{$self->{inverted}->{$attr}}) {
839     my %qt;
840    
841     grep $qt{$_}++, $_->parse($query);
842     for ($_->parse_pos($text)) {
843     if (exists $qt{$_->[0]}) {
844     if (exists $pos{$_->[1]}) { # perl -w ;-)
845     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
846     } else {
847     $pos{$_->[1]} = length($_->[0]);
848     }
849     }
850     }
851     }
852     }
853    
854     \%pos;
855     }
856    
857     sub hilight {
858 ulpfr 13 my ($tb, $buf, $qplain, $qraw) = @_;
859     my $layout = $tb->layout();
860    
861 ulpfr 10 my @result;
862    
863 ulpfr 13 $qplain ||= {};
864     $qraw ||= {};
865     my @ttxt = $layout->tag($buf);
866 ulpfr 10 while (@ttxt) {
867     no strict 'refs';
868     my %tag = %{shift @ttxt};
869     my $txt = shift @ttxt;
870     my $fld;
871    
872     my %hl;
873 ulpfr 13 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
874 ulpfr 10 my $hp = $tb->hilight_positions($fld, $txt,
875 ulpfr 13 $qplain->{$fld}, $qraw->{$fld});
876 ulpfr 10 for (keys %$hp) {
877     if (exists $hl{$_}) { # -w ;-(
878     $hl{$_} = max($hl{$_}, $hp->{$_});
879     } else {
880     $hl{$_} = $hp->{$_};
881     }
882     }
883     }
884     my $pos;
885     my $qt = {_qt => 1, %tag};
886     my $pl = \%tag;
887     my $last = length($txt);
888     my @tmp;
889     for $pos (sort {$b <=> $a} keys %hl) {
890     unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
891     unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
892     $last = $pos;
893     }
894     push @result, $pl, substr($txt,0,$last);
895     push @result, @tmp;
896     }
897     @result; # no speed necessary
898     }
899    
900     1;

Properties

Name Value
cvs2svn:cvs-rev 1.10

  ViewVC Help
Powered by ViewVC 1.1.26