/[wait]/trunk/script/sman
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/script/sman

Parent Directory Parent Directory | Revision Log Revision Log


Revision 103 - (show annotations)
Tue Jun 29 22:21:50 2004 UTC (19 years, 10 months ago) by dpavlin
File size: 12681 byte(s)
make redisplay last ranking work

1 #!/usr/bin/perl -w
2 # -*- Mode: Perl -*-
3 # $Basename: sman $
4 # $Revision: 1.14 $
5 # Author : Ulrich Pfeifer
6 # Created On : Fri Aug 30 15:52:25 1996
7 # Last Modified By: Ulrich Pfeifer
8 # Last Modified On: Mon May 8 11:03:46 2000
9 # Language : CPerl
10 #
11 # (C) Copyright 1996-2000, Ulrich Pfeifer
12 #
13
14 use strict;
15
16 use Term::ReadLine;
17 use Getopt::Long;
18 use Fcntl qw(O_RDONLY);
19 use Config;
20
21 use lib '/data/wait/lib';
22
23 require WAIT::Config;
24 require WAIT::Database;
25 require WAIT::Query::Base;
26 require WAIT::Query::Wais;
27
28
29 $SIG{PIPE} = 'IGNORE';
30 my %OPT = (database => 'DB',
31 dir => $WAIT::Config->{WAIT_home} || '/tmp',
32 table => 'man',
33 pager => $WAIT::Config->{'pager'} || 'more',
34 filter => 0,
35 max => 15,
36 );
37
38 GetOptions(\%OPT,
39 'database=s',
40 'dir=s',
41 'table=s',
42 'filter=i',
43 'max=i',
44 'pager:s') || die "
45 Usage: $0
46 [--database database]
47 [--dir dir ]
48 [--table table ]
49 [--filter integer ]
50 [--max integer ]
51 [--pager pager ]
52 ";
53
54 my $db = WAIT::Database->open(name => $OPT{database},
55 mode => O_RDONLY,
56 directory => $OPT{dir})
57 or die "Could not open database $OPT{database}: $@";
58
59 my $tb = $db->table(name => $OPT{table})
60 or die "Could not open table $OPT{table}: $@";
61
62 # not used: my $layout = $tb->layout; # a WAIT::Parse::Nroff object
63
64 my $term = new Term::ReadLine 'Simple Query Interface';
65
66 require WAIT::Format::Term;
67 my $format;
68 if ($Config::Config{'archname'} eq 'i586-linux') {
69 # for color xterm
70 $format = new WAIT::Format::Term query_s => "", query_e => "";
71 } else {
72 $format = new WAIT::Format::Term;
73 }
74
75 my $pager = ($OPT{pager}) ? \&less : \&pager;
76 my $OUT = $term->OUT;
77
78 my $st = 1;
79 print $OUT "Enter 'h' for help.\n";
80
81 # sman is basically offering three services: find the hits and show
82 # them (a query), show metadata for a hit (a view), show a hot (display)
83
84 my($query, @did);
85 my %hits;
86
87 while (defined ($_ = &myreadline("$st> "))) {
88 chomp; $st++;
89
90 my $query_text;
91 if (/^$/) {
92 next;
93 } elsif (/^m (\d+)$/) {
94 $OPT{max} = $1;
95 } elsif (/^f\s*(\d+)?$/) {
96 $OPT{filter} = $1;
97 next;
98 } elsif (/^t$/i) {
99 if ($pager eq \&less) {
100 $pager = \&pager;
101 } else {
102 $pager = \&less;
103 }
104 next;
105 } elsif (/^(\d+)$/) {
106 if (defined $did[$1]) {
107 display($did[$1]); # <----------- display (full doc)
108 next;
109 }
110 } elsif (/^d\s*(\d+)/) {
111 if (defined $did[$1]) {
112 view($did[$1]); # <----------- view (metadata from WAIT)
113 next;
114 }
115 } elsif (/^q$/i) {
116 last;
117 } elsif (/^l$/i) {
118 # fall through
119 } elsif (/^[h?]$/i) {
120 help();
121 next;
122 } elsif (/^hh$/i) {
123 extended_help();
124 next;
125 } else { # <----------- A query (Display a list)
126 $query_text = $_;
127 eval {$query = WAIT::Query::Wais::query($tb, $_)};
128 if ($@ ne '') {
129 print $OUT "$_ => $query\n\$\@='$@'\n";
130 } elsif (ref($query)) {
131 %hits = $query->execute(top => $OPT{max}, picky => 1);
132 # the hash %hits has as keys document numbers and as values
133 # quality figures. The doc numbers are not what we have as docid
134 # to find the item in the access class, they are WAIT's private
135 # numbers.
136 } else {
137 next;
138 }
139 }
140
141 next unless %hits;
142 my $no = 1; # numbering the hits for the result table that is
143 # presented to the user
144
145 @did = (); # store the internal numbers (keys of %hits). The user
146 # will use $no in sman's interface to select a hit.
147
148 # the following loop uses the values of %hits to sort the results
149 # according to the quality and cut after a number of rows. After
150 # that %hits isn't needed anymore.
151 print "Query: $query_text\n";
152 for my $did (sort {$hits{$b} <=> $hits{$a}} keys %hits) {
153
154 my %tattr = $tb->fetch($did);
155 # the hash %tattr contains several attributes of the item we are
156 # referring to, namely the attributes that we named in the "attr"
157 # argument of the create_table statement in smakewhatis
158
159 printf $OUT "%2d %6.3f %s\n", $no, $hits{$did},
160 substr($tattr{headline} ||'',0,68);
161 $did[$no] = $did;
162 last if $no++ >= $OPT{max};
163
164 }
165
166 } continue {
167 # we don't do this since Andreas Koenig does not think of it as feature
168 # $term->SetHistory(grep length($_)>4, $term->GetHistory)
169 }
170 warn "Thank you for using sman\n";
171
172 $tb->close;
173 $db->close;
174
175 sub myreadline {
176 if (@ARGV) {
177 return shift @ARGV;
178 } else {
179 $term->readline(@_);
180 }
181 }
182 sub help {
183 my $idb = "\n\t'". join(q[', '], $tb->fields()) . "'";
184 print $OUT qq[Available commands:
185
186 <num> Show the document <num>
187 d <num> Show the db entry of document <num>
188 f <num> Display only <num> lines context
189 h,? Display this help message
190 hh Display query examples
191 m <num> Set maxhits to <num>
192 t Toggle display mode (term/less)
193 q Exit from $0
194 l redisplay last ranking
195 Other input is tried as wais query.
196 The following fields for table '$OPT{table}' are known: $idb
197 ] ;
198 }
199
200 sub extended_help {
201 print q{
202 Here are some query examples:
203
204 information retrieval free text query
205 information or retrieval same as above
206 des=information retrieval `information' must be in the description
207 des=(information retrieval) one of them in description
208 des=(information or retrieval) same as above
209 des=(information and retrieval) both of them in description
210 des=(information not retrieval) `information' in description and
211 `retrieval' not in description
212 des=(information system*) wild-card search
213 au=ilia author names may be misspelled
214
215 You can build arbitary boolean combination of the above examples.
216 Field names may be abbreviated.
217 }
218 }
219
220 sub view {
221 my $did = shift;
222 my %tattr = $tb->fetch($did);
223 for (keys %tattr) {
224 print $OUT "$_ $tattr{$_}\n";
225 }
226 }
227
228 sub display {
229 my $did = shift;
230
231 return unless defined $query and defined $did;
232
233 print $OUT "Wais display document $did\n";
234 my %tattr = $tb->fetch($did);
235 my $tdid = $tattr{docid};
236 # WHAT DOES HE DO HERE? ULI???
237 # Re: some indexing scripts did use pathnames relative to the table directory
238 # especially the cpanwait script does this. uli
239 if ($tdid !~ m(^/)) {
240 $tdid = $tb->dir . '/' . $tdid;
241 }
242 # if original version doesn't exist maybe someone created gziped one?
243 # (this is used if you decided to compress data files after indexing,
244 # next indexing will pick names with gz extension anyway)
245 if (! -e $tdid && -e $tdid.".gz") {
246 $tdid .= ".gz";
247 }
248
249 # The main task of all that follows from here is highlighting. WAIT
250 # is designed to make it possible to show the user why a certain
251 # document was chosen by the indexer.
252
253 my $buf = $tb->fetch_extern($tdid);
254 # This $buf can be an object that can have enough information to do
255 # highlighting without WAIT's help. If you prefer to implement your
256 # own highlighting, you can do so now with e.g. print
257 # $buf->highlight(query => $query)
258
259 # All you need to know to implement highlighting is how a
260 # WAIT::Query::Base object looks like (left as an exercise for the
261 # reader).
262
263 # The impatient reader may want to implement something without
264 # highlighting, in which case he does not need any info about the
265 # query object and can rightaway run e.g.
266 # print $buf->as_string
267
268 # Thus the impatient reader does not necessarily need the following
269 # heavy wizardry. Just to give you an idea what's going on: every
270 # word in the text must be compared to every word in the query if it
271 # is worth highlighting, and which part of the word is worth
272 # highlighting. This must be done differently for every field in the
273 # table and for every index defined for that field. Try to run a
274 # query with 100 words and you'll be amazed to see it really works.
275 # Or maybe it doesn't. You should be aware that the hilighting code
276 # is to be regarded as alpha. It is certainly the least tested part
277 # of WAIT so far.
278
279 if ($buf) {
280 my @txt = $query->hilight($buf);
281 # In this operation the following things melt into one piece:
282 # $query: The query entered by the user (Class isa WAIT::Query::Base)
283 # $tb: The table we queried (Class WAIT::Table)
284 # $buf: The document to display (User defined class or string)
285 # The steps taken are:
286 # 1.) $query calls "hilight" on $tb and passes
287 # filtered and raw search terms ($query->{Plain} and $query->{Raw}).
288 # 2.) $tb asks the layout object to tag the object which results
289 # in an array with alternating elements of tags (anon HASHes) and
290 # strings.
291 # 3.) $tb adds some markup on its own: {qt=>1} or some such
292
293 # The result of that process can optionally be sent through a
294 # filter, just to impress your friends with yet more heavy
295 # wizardry
296 if ($OPT{filter}) {
297 @txt = &filter(@txt);
298 }
299
300 # And then a formatter (in our case a terminal formatter) turns
301 # all the markup into escape sequences and strings that can in
302 # turn be sent through a pager for instance
303 &$pager($format->as_string(\@txt));
304 }
305
306 # Hey, that's it. The user out there is deeply impressed now. You
307 # can lean back again:-) He got a document that has some words
308 # hilighted and will probably read and enjoy it. Maybe he'll send
309 # you an email.
310 }
311
312 sub filter {
313 my @result;
314 my @context;
315 my $lines = 0;
316 my $clines = 0;
317 my $elipsis = 0;
318
319 print STDERR "Filter ...";
320 while (@_) {
321 my %tag = %{shift @_};
322 my $txt = shift @_;
323
324 for (split /(\n)/, $txt) {
325 if ($_ eq "\n") {
326 if (exists $tag{_qt}) {
327 #die "Weird!";
328 push @result, {_i=>1}, "[WEIRD]";
329 } elsif ($lines) {
330 push @result, {}, $_;
331 $lines--;
332 } else {
333 push @context, {}, $_;
334 $clines++;
335 }
336 } else {
337 if (exists $tag{_qt}) {
338 push @result, {_i=>1}, "\n[ $elipsis linesĀ ]\n" if $elipsis;
339 push @result, @context, {%tag}, $_;
340 delete $tag{_qt};
341 @context = (); $clines = 0; $elipsis=0;
342 $lines = $OPT{filter}+1;
343 } elsif ($lines) {
344 push @result, \%tag, $_;
345 } else {
346 push @context, \%tag, $_;
347 }
348 }
349 if ($clines>$OPT{filter}) {
350 my (%tag, $txt);
351 while ($clines>$OPT{filter}) {
352 %tag = %{shift @context};
353 $txt = shift @context;
354 if ($txt =~ /\n/) {
355 $clines--;
356 $elipsis++;
357 }
358 }
359 }
360 }
361 }
362 print STDERR " done\n";
363 @result;
364 }
365
366 sub less {
367 my $flags = '';
368 my $pager = $OPT{pager};
369 if ($pager =~ /less/) {
370 $flags = '-r';
371 } elsif ($pager =~ /more/) {
372 $flags = '-c';
373 } elsif ($pager =~ /lynx/) {
374 $flags = '-stdin';
375 }
376 open(PAGER, "| $pager $flags") or die;
377 print PAGER @_;
378 close PAGER;
379 }
380
381 sub pager {
382 my @lines = split /\n/, $_[0];
383 my $line = 0;
384 for (@lines) {
385 print "$_\n"; $line++;
386 if ($line % 24 == 0) {
387 my $key = $term->readline("[return]");
388 return if $key =~ /^q/i;
389 }
390 }
391 }
392
393
394 __END__
395 ## ###################################################################
396 ## pod
397 ## ###################################################################
398
399 =head1 NAME
400
401 sman - Search and disply manuals interactive
402
403 =head1 SYNOPSIS
404
405 B<sman>
406 [B<-database> I<database name>]
407 [B<-dir> I<database directory>]
408 [B<-table> I<name>]
409 [B<-less>]
410 [B<-filter> I<num>]
411 [B<-max> I<num>]
412
413 =head1 DESCRIPTION
414
415 B<Sman> is an interactive search interface to your systems manual pages.
416
417 =head2 OPTIONS
418
419 =over 10
420
421 =item B<-database> I<database name>
422
423 Change the default database name to I<database name>.
424
425 =item B<-dir> I<database directory>
426
427 Change the default database directory to I<database directory>.
428
429 =item B<-table> I<name>
430
431 Use I<name> instead of C<man> as table name.
432
433 =item B<-pager> I<name>
434
435 Use I<name> instead of the default pager. If no I<name> is supplied a
436 buildin pager is used.
437
438 =item B<-filter> I<num>
439
440 Display only I<num> lines above and below an occurance of a search
441 term in the manual.
442
443 =item B<-max> I<num>
444
445 Display only I<num> hits. Default is to 10.
446
447 =head1 SEE ALSO
448
449 L<smakewhatis>.
450
451 =head1 AUTHOR
452
453 Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>

Properties

Name Value
cvs2svn:cvs-rev 1.2
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26