1 |
# -*- Mode: Perl -*- |
2 |
# $Basename: Wais.pm $ |
3 |
# $Revision: 1.5 $ |
4 |
# Author : Ulrich Pfeifer |
5 |
# Created On : Mon Sep 16 11:08:04 1996 |
6 |
# Last Modified By: Ulrich Pfeifer |
7 |
# Last Modified On: Sat Apr 15 15:51:49 2000 |
8 |
# Language : CPerl |
9 |
# |
10 |
# (C) Copyright 1997-2000, Ulrich Pfeifer |
11 |
# |
12 |
|
13 |
package WAIT::Wais; |
14 |
|
15 |
require WAIT::Query::Wais; |
16 |
require WAIT::Database; |
17 |
use Fcntl; |
18 |
use strict; |
19 |
use vars qw(%DB %TB); |
20 |
|
21 |
my %FORMATTER; |
22 |
|
23 |
BEGIN { # check for available formatters |
24 |
%FORMATTER = qw(text WAIT::Format::Base); |
25 |
for my $inc (@INC) { |
26 |
if (-d "$inc/WAIT/Format") { |
27 |
for my $format ( <$inc/WAIT/Format/*.pm>) { |
28 |
my ($name) = ($format =~ /(\w+)\.pm$/); |
29 |
my $module = "WAIT::Format::$name"; |
30 |
$name = lc $name; |
31 |
$FORMATTER{$name} = $module; |
32 |
} |
33 |
} |
34 |
} |
35 |
} |
36 |
|
37 |
|
38 |
sub _database { |
39 |
my $path = shift; |
40 |
my ($dir, $dn, $tn) = ($path =~ m:(.*)/([^/]+)/([^/]+)$:); |
41 |
|
42 |
return $DB{"$dir/$dn"} if exists $DB{"$dir/$dn"}; |
43 |
$DB{"$dir/$dn"} = WAIT::Database->open(name => $dn, directory => $dir, |
44 |
mode => O_RDONLY); |
45 |
return $DB{"$dir/$dn"}; |
46 |
} |
47 |
|
48 |
sub _table { |
49 |
my $path = shift; |
50 |
|
51 |
return $TB{$path} if exists $TB{$path}; |
52 |
my $db = _database($path); |
53 |
my ($dir, $dn, $tn) = ($path =~ m:(.*)/([^/]+)/([^/]+)$:); |
54 |
$TB{$path} = $db->table(name => $tn); |
55 |
$TB{$path}; |
56 |
} |
57 |
|
58 |
sub Search { |
59 |
my (@requests) = @_; |
60 |
my $request; |
61 |
my $result = new WAIT::Wais::Result; |
62 |
for $request (@requests) { |
63 |
my $query = $request->{'query'}; |
64 |
my $database = $request->{'database'}; |
65 |
my $tag = $request->{'tag'} || $request->{'database'}; |
66 |
my ($dir, $dn, $tn) = ($database =~ m:(.*)/([^/]+)/([^/]+)$:); |
67 |
my $tb = _table($database); |
68 |
unless (defined $tb) { |
69 |
$result->add(Tag => $tag, Error => 'Could not open database'); |
70 |
return $result; |
71 |
} |
72 |
my $wquery; |
73 |
eval {$wquery = WAIT::Query::Wais::query($tb, $query)}; |
74 |
if ($@ ne '') { |
75 |
$result->add(Tag => $tag, Error => $@); |
76 |
return $result; |
77 |
} |
78 |
my %po = $wquery->execute(); |
79 |
$result->add(Tag => $tag, Database => $database, |
80 |
Table => $tb, Postings => \%po) |
81 |
} |
82 |
$result; |
83 |
} |
84 |
|
85 |
sub Retrieve { |
86 |
my %parm = @_; |
87 |
my $result = new WAIT::Wais::Result; |
88 |
my $tb = _table($parm{database}); |
89 |
|
90 |
unless (defined $tb) { |
91 |
$result->add(Tag => 'document', Error => 'Could not open database'); |
92 |
return $result; |
93 |
} |
94 |
|
95 |
my $did = ref($parm{docid})?$parm{docid}->did:$parm{docid}; |
96 |
|
97 |
my %rec = $tb->fetch($did); |
98 |
|
99 |
# another CPAN hack |
100 |
if ($rec{docid} =~ m(^data/)) { |
101 |
$rec{docid} = $tb->dir . '/' . $rec{docid}; |
102 |
} |
103 |
|
104 |
my $text = $tb->fetch_extern($rec{docid}); |
105 |
|
106 |
my @txt; |
107 |
$tb->open; |
108 |
if ($parm{query}) { |
109 |
@txt = WAIT::Query::Wais::query($tb,$parm{query})->hilight($text); |
110 |
} else { |
111 |
@txt = $tb->layout->tag($text); |
112 |
} |
113 |
|
114 |
if ($parm{lines}) { |
115 |
@txt = filter($parm{lines}, @txt); |
116 |
} |
117 |
|
118 |
my $type = lc $parm{type}; |
119 |
|
120 |
my $module = (exists $FORMATTER{$type})?$FORMATTER{$type}:$FORMATTER{text}; |
121 |
my $path = $module; |
122 |
$path =~ s(::)(/)g; |
123 |
|
124 |
require "$path.pm"; |
125 |
my $format = new $module; |
126 |
$text = $format->as_string(\@txt, sub {$tb->fetch($did)}); |
127 |
$result->add(Tag => 'document', Text => $text); |
128 |
} |
129 |
|
130 |
sub filter { |
131 |
my $filter = shift; |
132 |
my @result; |
133 |
my @context; |
134 |
my $lines = 0; |
135 |
my $clines = 0; |
136 |
my $elipsis = 0; |
137 |
|
138 |
while (@_) { |
139 |
my %tag = %{shift @_}; |
140 |
my $txt = shift @_; |
141 |
|
142 |
for (split /(\n)/, $txt) { |
143 |
if ($_ eq "\n") { |
144 |
if (exists $tag{_qt}) { |
145 |
#die "Weird!"; |
146 |
push @result, {_i=>1}, "[WEIRD]"; |
147 |
} elsif ($lines) { |
148 |
push @result, {}, $_; |
149 |
$lines--; |
150 |
} else { |
151 |
push @context, {}, $_; |
152 |
$clines++; |
153 |
} |
154 |
} else { |
155 |
if (exists $tag{_qt}) { |
156 |
push @result, {_i=>1}, "\n[ $elipsis linesĀ ]\n" if $elipsis; |
157 |
push @result, @context, {%tag}, $_; |
158 |
delete $tag{_qt}; |
159 |
@context = (); $clines = 0; $elipsis=0; |
160 |
$lines = $filter+1; |
161 |
} elsif ($lines) { |
162 |
push @result, \%tag, $_; |
163 |
} else { |
164 |
push @context, \%tag, $_; |
165 |
} |
166 |
} |
167 |
if ($clines>$filter) { |
168 |
my (%tag, $txt); |
169 |
while ($clines>$filter) { |
170 |
%tag = %{shift @context}; |
171 |
$txt = shift @context; |
172 |
if ($txt =~ /\n/) { |
173 |
$clines--; |
174 |
$elipsis++; |
175 |
} |
176 |
} |
177 |
} |
178 |
} |
179 |
} |
180 |
@result; |
181 |
} |
182 |
|
183 |
package WAIT::Wais::Result; |
184 |
|
185 |
sub new { |
186 |
my $type = shift; |
187 |
my %par = @_; |
188 |
my $self = {'header' => [], 'diagnostics' => [], 'text' => ''}; |
189 |
|
190 |
bless $self, $type; |
191 |
} |
192 |
|
193 |
sub _header { |
194 |
my ($database, $did, $score) = @_; |
195 |
my $types; |
196 |
my $tb = WAIT::Wais::_table($database); |
197 |
my %rec = $tb->fetch($did); |
198 |
my $lines = $rec{'lines'} || 0; |
199 |
my $length = $rec{'size'} || 0; |
200 |
unless ($length) { |
201 |
($length) = ($rec{docid} =~ /(\d+)$/) |
202 |
} |
203 |
unless ($rec{docid} =~ m(^/)) { |
204 |
$rec{docid} = $tb->dir . '/' . $rec{docid}; |
205 |
} |
206 |
my $headline = $rec{headline} || ''; |
207 |
if (exists $rec{types}) { |
208 |
$types = [split ',', $rec{types}] |
209 |
} else { |
210 |
$types = [keys %FORMATTER]; |
211 |
} |
212 |
|
213 |
[$score, $lines, $length, $headline, $types, |
214 |
WAIT::Wais::Docid->new('wait',$database, $did)]; |
215 |
} |
216 |
|
217 |
sub add { |
218 |
my $self = shift; |
219 |
my %parm = @_; |
220 |
my $tag = $parm{Tag}; |
221 |
my $docid; |
222 |
|
223 |
if ($parm{Postings}) { |
224 |
my @result; |
225 |
my @left = @{$self->{'header'}}; |
226 |
my @right; |
227 |
for (keys %{$parm{Postings}}) { |
228 |
push @right, _header($parm{Database}, $_, $parm{Postings}->{$_}) |
229 |
} |
230 |
while (($#left >= $[) or ($#right >= $[)) { |
231 |
if ($#left < $[) { |
232 |
for (@right) { |
233 |
push @result, [$tag, @{$_}]; |
234 |
} |
235 |
last; |
236 |
} |
237 |
if ($#right < $[) { |
238 |
push @result, @left; |
239 |
last; |
240 |
} |
241 |
if ($left[0]->[1] > $right[0]->[0]) { |
242 |
push @result, shift @left; |
243 |
} else { |
244 |
push @result, [$tag, @{shift @right}]; |
245 |
} |
246 |
} |
247 |
$self->{'header'} = \@result; |
248 |
} |
249 |
if ($parm{Errors}) { |
250 |
my %diag = %{$parm{Errors}}; |
251 |
for (keys %diag) { |
252 |
push(@{$self->{'diagnostics'}}, [$tag, $_, $diag{$_}]); |
253 |
} |
254 |
} |
255 |
if ($parm{Text}) { |
256 |
$self->{'text'} .= $parm{Text}; |
257 |
} |
258 |
|
259 |
$self; |
260 |
} |
261 |
|
262 |
|
263 |
sub diagnostics { |
264 |
my $self = shift; |
265 |
|
266 |
@{$self->{'diagnostics'}}; |
267 |
} |
268 |
|
269 |
sub header { |
270 |
my $self = shift; |
271 |
|
272 |
@{$self->{'header'}}; |
273 |
} |
274 |
|
275 |
sub text { |
276 |
my $self = shift; |
277 |
|
278 |
$self->{'text'}; |
279 |
} |
280 |
|
281 |
package WAIT::Wais::Docid; |
282 |
|
283 |
sub new { |
284 |
my $type = shift; |
285 |
my ($server, $database, $dodid) = @_; |
286 |
my $self = join ';', $server, $database, $dodid; |
287 |
bless \$self, $type; |
288 |
} |
289 |
|
290 |
sub did { |
291 |
($_[0]->split)[2]; |
292 |
} |
293 |
|
294 |
sub split { |
295 |
my $self = shift; |
296 |
|
297 |
split /;/, $$self; |
298 |
} |
299 |
|
300 |
1; |