/[wait]/branches/unido/script/sman
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /branches/unido/script/sman

Parent Directory Parent Directory | Revision Log Revision Log


Revision 106 - (show annotations)
Tue Jul 13 12:22:09 2004 UTC (19 years, 8 months ago) by dpavlin
File size: 12317 byte(s)
Changes made by Andreas J. Koenig <andreas.koenig(at)anima.de> for Unido project

1 #!perl -w
2 # -*- Mode: Perl -*-
3 # $Basename: sman $
4 # $Revision: 1.14 $
5 # Author : Ulrich Pfeifer
6 # Created On : Fri Aug 30 15:52:25 1996
7 # Last Modified By: Ulrich Pfeifer
8 # Last Modified On: Mon May 8 11:03:46 2000
9 # Language : CPerl
10 #
11 # (C) Copyright 1996-2000, Ulrich Pfeifer
12 #
13
14 use strict;
15
16 use Term::ReadLine;
17 use Getopt::Long;
18 use Fcntl qw(O_RDONLY);
19 use Config;
20
21 require WAIT::Config;
22 require WAIT::Database;
23 require WAIT::Query::Base;
24 require WAIT::Query::Wais;
25
26
27 $SIG{PIPE} = 'IGNORE';
28 my %OPT = (database => 'DB',
29 dir => $WAIT::Config->{WAIT_home} || '/tmp',
30 table => 'man',
31 pager => $WAIT::Config->{'pager'} || 'more',
32 filter => 0,
33 max => 15,
34 );
35
36 GetOptions(\%OPT,
37 'database=s',
38 'dir=s',
39 'table=s',
40 'filter=i',
41 'max=i',
42 'pager:s') || die "
43 Usage: $0
44 [--database database]
45 [--dir dir ]
46 [--table table ]
47 [--filter integer ]
48 [--max integer ]
49 [--pager pager ]
50 ";
51
52 my $db = WAIT::Database->open(name => $OPT{database},
53 mode => O_RDONLY,
54 directory => $OPT{dir})
55 or die "Could not open database $OPT{database}: $@";
56
57 my $tb = $db->table(name => $OPT{table})
58 or die "Could not open table $OPT{table}: $@";
59
60 # not used: my $layout = $tb->layout; # a WAIT::Parse::Nroff object
61
62 my $term = new Term::ReadLine 'Simple Query Interface';
63
64 require WAIT::Format::Term;
65 my $format;
66 if ($Config::Config{'archname'} eq 'i586-linux') {
67 # for color xterm
68 $format = new WAIT::Format::Term query_s => "", query_e => "";
69 } else {
70 $format = new WAIT::Format::Term;
71 }
72
73 my $pager = ($OPT{pager}) ? \&pager : \&less;
74 my $OUT = $term->OUT;
75
76 my $st = 1;
77 print $OUT "Enter 'h' for help.\n";
78
79 # sman is basically offering three services: find the hits and show
80 # them (a query), show metadata for a hit (a view), show a hot (display)
81
82 my($query, @did);
83
84 while (defined ($_ = &myreadline("$st> "))) {
85 chomp; $st++;
86
87 my(%hits, $query_text);
88 if (/^$/) {
89 next;
90 } elsif (/^m (\d+)$/) {
91 $OPT{max} = $1;
92 } elsif (/^f\s*(\d+)?$/) {
93 $OPT{filter} = $1;
94 next;
95 } elsif (/^t$/i) {
96 if ($pager eq \&less) {
97 $pager = \&pager;
98 } else {
99 $pager = \&less;
100 }
101 next;
102 } elsif (/^(\d+)$/) {
103 if (defined $did[$1]) {
104 display($did[$1]); # <----------- display (full doc)
105 next;
106 }
107 } elsif (/^d\s*(\d+)/) {
108 if (defined $did[$1]) {
109 view($did[$1]); # <----------- view (metadata from WAIT)
110 next;
111 }
112 } elsif (/^q$/i) {
113 last;
114 } elsif (/^l$/i) {
115 # fall through
116 } elsif (/^[h?]$/i) {
117 help();
118 next;
119 } elsif (/^hh$/i) {
120 extended_help();
121 next;
122 } else { # <----------- A query (Display a list)
123 $query_text = $_;
124 eval {$query = WAIT::Query::Wais::query($tb, $_)};
125 if ($@ ne '') {
126 print $OUT "$_ => $query\n\$\@='$@'\n";
127 } elsif (ref($query)) {
128 %hits = $query->execute(top => $OPT{max}, picky => 1);
129 # the hash %hits has as keys document numbers and as values
130 # quality figures. The doc numbers are not what we have as docid
131 # to find the item in the access class, they are WAIT's private
132 # numbers.
133 } else {
134 next;
135 }
136 }
137
138 next unless %hits;
139 my $no = 1; # numbering the hits for the result table that is
140 # presented to the user
141
142 @did = (); # store the internal numbers (keys of %hits). The user
143 # will use $no in sman's interface to select a hit.
144
145 # the following loop uses the values of %hits to sort the results
146 # according to the quality and cut after a number of rows. After
147 # that %hits isn't needed anymore.
148 print "Query: $query_text\n";
149 for my $did (sort {$hits{$b} <=> $hits{$a}} keys %hits) {
150
151 my %tattr = $tb->fetch($did);
152 # the hash %tattr contains several attributes of the item we are
153 # referring to, namely the attributes that we named in the "attr"
154 # argument of the create_table statement in smakewhatis
155
156 printf $OUT "%2d %6.3f %s\n", $no, $hits{$did},
157 substr($tattr{headline} ||'',0,68);
158 $did[$no] = $did;
159 last if $no++ >= $OPT{max};
160
161 }
162
163 } continue {
164 # we don't do this since Andreas Koenig does not think of it as feature
165 # $term->SetHistory(grep length($_)>4, $term->GetHistory)
166 }
167 warn "Thank you for using sman\n";
168
169 $tb->close;
170 $db->close;
171
172 sub myreadline {
173 if (@ARGV) {
174 return shift @ARGV;
175 } else {
176 $term->readline(@_);
177 }
178 }
179 sub help {
180 my $idb = "\n\t'". join(q[', '], $tb->fields()) . "'";
181 print $OUT qq[Available commands:
182
183 <num> Show the document <num>
184 d <num> Show the db entry of document <num>
185 f <num> Display only <num> lines context
186 h,? Display this help message
187 hh Display query examples
188 m <num> Set maxhits to <num>
189 t Toggle display mode (term/less)
190 q Exit from $0
191 l redisplay last ranking
192 Other input is tried as wais query.
193 The following fields are known: $idb
194 ] ;
195 }
196
197 sub extended_help {
198 print q{
199 Here are some query examples:
200
201 information retrieval free text query
202 information or retrieval same as above
203 des=information retrieval `information' must be in the description
204 des=(information retrieval) one of them in description
205 des=(information or retrieval) same as above
206 des=(information and retrieval) both of them in description
207 des=(information not retrieval) `information' in description and
208 `retrieval' not in description
209 des=(information system*) wild-card search
210 au=ilia author names may be misspelled
211
212 You can build arbitary boolean combination of the above examples.
213 Field names may be abbreviated.
214 }
215 }
216
217 sub view {
218 my $did = shift;
219 my %tattr = $tb->fetch($did);
220 for (keys %tattr) {
221 print $OUT "$_ $tattr{$_}\n";
222 }
223 }
224
225 sub display {
226 my $did = shift;
227
228 return unless defined $query and defined $did;
229
230 print $OUT "Wais display document $did\n";
231 my %tattr = $tb->fetch($did);
232 my $tdid = $tattr{docid};
233 # WHAT DOES HE DO HERE? ULI???
234 # Re: some indexing scripts did use pathnames relative to the table directory
235 # especially the cpanwait script does this. uli
236 # if ($tdid !~ m(^/)) {
237 # $tdid = $tb->dir . '/' . $tdid;
238 # }
239
240 # The main task of all that follows from here is highlighting. WAIT
241 # is designed to make it possible to show the user why a certain
242 # document was chosen by the indexer.
243
244 my $buf = $tb->fetch_extern($tdid);
245 # This $buf can be an object that can have enough information to do
246 # highlighting without WAIT's help. If you prefer to implement your
247 # own highlighting, you can do so now with e.g. print
248 # $buf->highlight(query => $query)
249
250 # All you need to know to implement highlighting is how a
251 # WAIT::Query::Base object looks like (left as an exercise for the
252 # reader).
253
254 # The impatient reader may want to implement something without
255 # highlighting, in which case he does not need any info about the
256 # query object and can rightaway run e.g.
257 # print $buf->as_string
258
259 # Thus the impatient reader does not necessarily need the following
260 # heavy wizardry. Just to give you an idea what's going on: every
261 # word in the text must be compared to every word in the query if it
262 # is worth highlighting, and which part of the word is worth
263 # highlighting. This must be done differently for every field in the
264 # table and for every index defined for that field. Try to run a
265 # query with 100 words and you'll be amazed to see it really works.
266 # Or maybe it doesn't. You should be aware that the hilighting code
267 # is to be regarded as alpha. It is certainly the least tested part
268 # of WAIT so far.
269
270 if ($buf) {
271 my @txt = $query->hilight($buf);
272 # In this operation the following things melt into one piece:
273 # $query: The query entered by the user (Class isa WAIT::Query::Base)
274 # $tb: The table we queried (Class WAIT::Table)
275 # $buf: The document to display (User defined class or string)
276 # The steps taken are:
277 # 1.) $query calls "hilight" on $tb and passes
278 # filtered and raw search terms ($query->{Plain} and $query->{Raw}).
279 # 2.) $tb asks the layout object to tag the object which results
280 # in an array with alternating elements of tags (anon HASHes) and
281 # strings.
282 # 3.) $tb adds some markup on its own: {qt=>1} or some such
283
284 # The result of that process can optionally be sent through a
285 # filter, just to impress your friends with yet more heavy
286 # wizardry
287 if ($OPT{filter}) {
288 @txt = &filter(@txt);
289 }
290
291 # And then a formatter (in our case a terminal formatter) turns
292 # all the markup into escape sequences and strings that can in
293 # turn be sent through a pager for instance
294 &$pager($format->as_string(\@txt));
295 }
296
297 # Hey, that's it. The user out there is deeply impressed now. You
298 # can lean back again:-) He got a document that has some words
299 # hilighted and will probably read and enjoy it. Maybe he'll send
300 # you an email.
301 }
302
303 sub filter {
304 my @result;
305 my @context;
306 my $lines = 0;
307 my $clines = 0;
308 my $elipsis = 0;
309
310 print STDERR "Filter ...";
311 while (@_) {
312 my %tag = %{shift @_};
313 my $txt = shift @_;
314
315 for (split /(\n)/, $txt) {
316 if ($_ eq "\n") {
317 if (exists $tag{_qt}) {
318 #die "Weird!";
319 push @result, {_i=>1}, "[WEIRD]";
320 } elsif ($lines) {
321 push @result, {}, $_;
322 $lines--;
323 } else {
324 push @context, {}, $_;
325 $clines++;
326 }
327 } else {
328 if (exists $tag{_qt}) {
329 push @result, {_i=>1}, "\n[ $elipsis linesĀ ]\n" if $elipsis;
330 push @result, @context, {%tag}, $_;
331 delete $tag{_qt};
332 @context = (); $clines = 0; $elipsis=0;
333 $lines = $OPT{filter}+1;
334 } elsif ($lines) {
335 push @result, \%tag, $_;
336 } else {
337 push @context, \%tag, $_;
338 }
339 }
340 if ($clines>$OPT{filter}) {
341 my (%tag, $txt);
342 while ($clines>$OPT{filter}) {
343 %tag = %{shift @context};
344 $txt = shift @context;
345 if ($txt =~ /\n/) {
346 $clines--;
347 $elipsis++;
348 }
349 }
350 }
351 }
352 }
353 print STDERR " done\n";
354 @result;
355 }
356
357 sub less {
358 my $flags;
359 if ($WAIT::Config->{pager} =~ /less/) {
360 $flags = '-r';
361 } elsif ($WAIT::Config->{pager} =~ /more/) {
362 $flags = '-c';
363 }
364 open(PAGER, "|$WAIT::Config->{pager} $flags") or die;
365 print PAGER @_;
366 close PAGER;
367 }
368
369 sub pager {
370 my @lines = split /\n/, $_[0];
371 my $line = 0;
372 for (@lines) {
373 print "$_\n"; $line++;
374 if ($line % 24 == 0) {
375 my $key = $term->readline("[return]");
376 return if $key =~ /^q/i;
377 }
378 }
379 }
380
381
382 __END__
383 ## ###################################################################
384 ## pod
385 ## ###################################################################
386
387 =head1 NAME
388
389 sman - Search and disply manuals interactive
390
391 =head1 SYNOPSIS
392
393 B<sman>
394 [B<-database> I<database name>]
395 [B<-dir> I<database directory>]
396 [B<-table> I<name>]
397 [B<-less>]
398 [B<-filter> I<num>]
399 [B<-max> I<num>]
400
401 =head1 DESCRIPTION
402
403 B<Sman> is an interactive search interface to your systems manual pages.
404
405 =head2 OPTIONS
406
407 =over 10
408
409 =item B<-database> I<database name>
410
411 Change the default database name to I<database name>.
412
413 =item B<-dir> I<database directory>
414
415 Change the default database directory to I<database directory>.
416
417 =item B<-table> I<name>
418
419 Use I<name> instead of C<man> as table name.
420
421 =item B<-pager> I<name>
422
423 Use I<name> instead of the default pager. If no I<name> is supplied a
424 buildin pager is used.
425
426 =item B<-filter> I<num>
427
428 Display only I<num> lines above and below an occurance of a search
429 term in the manual.
430
431 =item B<-max> I<num>
432
433 Display only I<num> hits. Default is to 10.
434
435 =head1 SEE ALSO
436
437 L<smakewhatis>.
438
439 =head1 AUTHOR
440
441 Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26