/[wait]/trunk/script/sman
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/script/sman

Parent Directory Parent Directory | Revision Log Revision Log


Revision 109 - (hide annotations)
Tue Jul 13 17:50:27 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 12685 byte(s)
pod fixes

1 dpavlin 90 #!/usr/bin/perl -w
2 ulpfr 19 # -*- Mode: Perl -*-
3     # $Basename: sman $
4     # $Revision: 1.14 $
5     # Author : Ulrich Pfeifer
6     # Created On : Fri Aug 30 15:52:25 1996
7     # Last Modified By: Ulrich Pfeifer
8     # Last Modified On: Mon May 8 11:03:46 2000
9     # Language : CPerl
10     #
11     # (C) Copyright 1996-2000, Ulrich Pfeifer
12     #
13 ulpfr 10
14     use strict;
15    
16     use Term::ReadLine;
17     use Getopt::Long;
18 laperla 65 use Fcntl qw(O_RDONLY);
19 ulpfr 10 use Config;
20    
21 dpavlin 90 use lib '/data/wait/lib';
22 dpavlin 86
23 ulpfr 10 require WAIT::Config;
24     require WAIT::Database;
25     require WAIT::Query::Base;
26     require WAIT::Query::Wais;
27    
28    
29     $SIG{PIPE} = 'IGNORE';
30     my %OPT = (database => 'DB',
31     dir => $WAIT::Config->{WAIT_home} || '/tmp',
32     table => 'man',
33     pager => $WAIT::Config->{'pager'} || 'more',
34     filter => 0,
35     max => 15,
36     );
37    
38     GetOptions(\%OPT,
39     'database=s',
40     'dir=s',
41     'table=s',
42     'filter=i',
43     'max=i',
44 laperla 65 'pager:s') || die "
45     Usage: $0
46     [--database database]
47     [--dir dir ]
48     [--table table ]
49     [--filter integer ]
50     [--max integer ]
51     [--pager pager ]
52     ";
53 ulpfr 10
54     my $db = WAIT::Database->open(name => $OPT{database},
55     mode => O_RDONLY,
56     directory => $OPT{dir})
57     or die "Could not open database $OPT{database}: $@";
58    
59     my $tb = $db->table(name => $OPT{table})
60     or die "Could not open table $OPT{table}: $@";
61    
62     # not used: my $layout = $tb->layout; # a WAIT::Parse::Nroff object
63    
64     my $term = new Term::ReadLine 'Simple Query Interface';
65    
66     require WAIT::Format::Term;
67     my $format;
68     if ($Config::Config{'archname'} eq 'i586-linux') {
69     # for color xterm
70     $format = new WAIT::Format::Term query_s => "", query_e => "";
71     } else {
72     $format = new WAIT::Format::Term;
73     }
74    
75 dpavlin 98 my $pager = ($OPT{pager}) ? \&less : \&pager;
76 ulpfr 10 my $OUT = $term->OUT;
77    
78     my $st = 1;
79     print $OUT "Enter 'h' for help.\n";
80    
81     # sman is basically offering three services: find the hits and show
82     # them (a query), show metadata for a hit (a view), show a hot (display)
83    
84     my($query, @did);
85 dpavlin 105 my (%hits, $query_text);
86 ulpfr 10
87     while (defined ($_ = &myreadline("$st> "))) {
88     chomp; $st++;
89    
90     if (/^$/) {
91     next;
92     } elsif (/^m (\d+)$/) {
93     $OPT{max} = $1;
94     } elsif (/^f\s*(\d+)?$/) {
95     $OPT{filter} = $1;
96     next;
97     } elsif (/^t$/i) {
98     if ($pager eq \&less) {
99     $pager = \&pager;
100     } else {
101     $pager = \&less;
102     }
103     next;
104     } elsif (/^(\d+)$/) {
105     if (defined $did[$1]) {
106     display($did[$1]); # <----------- display (full doc)
107     next;
108     }
109     } elsif (/^d\s*(\d+)/) {
110     if (defined $did[$1]) {
111     view($did[$1]); # <----------- view (metadata from WAIT)
112     next;
113     }
114     } elsif (/^q$/i) {
115     last;
116     } elsif (/^l$/i) {
117     # fall through
118     } elsif (/^[h?]$/i) {
119     help();
120     next;
121     } elsif (/^hh$/i) {
122     extended_help();
123     next;
124     } else { # <----------- A query (Display a list)
125     $query_text = $_;
126     eval {$query = WAIT::Query::Wais::query($tb, $_)};
127     if ($@ ne '') {
128     print $OUT "$_ => $query\n\$\@='$@'\n";
129     } elsif (ref($query)) {
130 ulpfr 19 %hits = $query->execute(top => $OPT{max}, picky => 1);
131 ulpfr 10 # the hash %hits has as keys document numbers and as values
132     # quality figures. The doc numbers are not what we have as docid
133     # to find the item in the access class, they are WAIT's private
134     # numbers.
135     } else {
136     next;
137     }
138     }
139    
140     next unless %hits;
141     my $no = 1; # numbering the hits for the result table that is
142     # presented to the user
143    
144     @did = (); # store the internal numbers (keys of %hits). The user
145     # will use $no in sman's interface to select a hit.
146    
147     # the following loop uses the values of %hits to sort the results
148     # according to the quality and cut after a number of rows. After
149     # that %hits isn't needed anymore.
150     print "Query: $query_text\n";
151     for my $did (sort {$hits{$b} <=> $hits{$a}} keys %hits) {
152    
153     my %tattr = $tb->fetch($did);
154     # the hash %tattr contains several attributes of the item we are
155     # referring to, namely the attributes that we named in the "attr"
156     # argument of the create_table statement in smakewhatis
157    
158     printf $OUT "%2d %6.3f %s\n", $no, $hits{$did},
159     substr($tattr{headline} ||'',0,68);
160     $did[$no] = $did;
161     last if $no++ >= $OPT{max};
162    
163     }
164    
165     } continue {
166 ulpfr 19 # we don't do this since Andreas Koenig does not think of it as feature
167 ulpfr 10 # $term->SetHistory(grep length($_)>4, $term->GetHistory)
168     }
169 ulpfr 19 warn "Thank you for using sman\n";
170 ulpfr 10
171 ulpfr 19 $tb->close;
172     $db->close;
173    
174 ulpfr 10 sub myreadline {
175     if (@ARGV) {
176     return shift @ARGV;
177     } else {
178     $term->readline(@_);
179     }
180     }
181     sub help {
182     my $idb = "\n\t'". join(q[', '], $tb->fields()) . "'";
183     print $OUT qq[Available commands:
184    
185     <num> Show the document <num>
186     d <num> Show the db entry of document <num>
187     f <num> Display only <num> lines context
188     h,? Display this help message
189     hh Display query examples
190     m <num> Set maxhits to <num>
191     t Toggle display mode (term/less)
192     q Exit from $0
193     l redisplay last ranking
194     Other input is tried as wais query.
195 dpavlin 91 The following fields for table '$OPT{table}' are known: $idb
196 ulpfr 10 ] ;
197     }
198    
199     sub extended_help {
200     print q{
201     Here are some query examples:
202    
203     information retrieval free text query
204     information or retrieval same as above
205     des=information retrieval `information' must be in the description
206     des=(information retrieval) one of them in description
207     des=(information or retrieval) same as above
208     des=(information and retrieval) both of them in description
209     des=(information not retrieval) `information' in description and
210     `retrieval' not in description
211     des=(information system*) wild-card search
212     au=ilia author names may be misspelled
213    
214     You can build arbitary boolean combination of the above examples.
215     Field names may be abbreviated.
216     }
217     }
218    
219     sub view {
220     my $did = shift;
221     my %tattr = $tb->fetch($did);
222     for (keys %tattr) {
223     print $OUT "$_ $tattr{$_}\n";
224     }
225     }
226    
227     sub display {
228     my $did = shift;
229    
230     return unless defined $query and defined $did;
231    
232     print $OUT "Wais display document $did\n";
233     my %tattr = $tb->fetch($did);
234     my $tdid = $tattr{docid};
235     # WHAT DOES HE DO HERE? ULI???
236     # Re: some indexing scripts did use pathnames relative to the table directory
237     # especially the cpanwait script does this. uli
238 dpavlin 86 if ($tdid !~ m(^/)) {
239     $tdid = $tb->dir . '/' . $tdid;
240     }
241 dpavlin 96 # if original version doesn't exist maybe someone created gziped one?
242     # (this is used if you decided to compress data files after indexing,
243     # next indexing will pick names with gz extension anyway)
244     if (! -e $tdid && -e $tdid.".gz") {
245     $tdid .= ".gz";
246     }
247 ulpfr 10
248     # The main task of all that follows from here is highlighting. WAIT
249     # is designed to make it possible to show the user why a certain
250     # document was chosen by the indexer.
251    
252     my $buf = $tb->fetch_extern($tdid);
253     # This $buf can be an object that can have enough information to do
254     # highlighting without WAIT's help. If you prefer to implement your
255     # own highlighting, you can do so now with e.g. print
256     # $buf->highlight(query => $query)
257    
258     # All you need to know to implement highlighting is how a
259     # WAIT::Query::Base object looks like (left as an exercise for the
260     # reader).
261    
262     # The impatient reader may want to implement something without
263     # highlighting, in which case he does not need any info about the
264     # query object and can rightaway run e.g.
265     # print $buf->as_string
266    
267     # Thus the impatient reader does not necessarily need the following
268     # heavy wizardry. Just to give you an idea what's going on: every
269     # word in the text must be compared to every word in the query if it
270     # is worth highlighting, and which part of the word is worth
271     # highlighting. This must be done differently for every field in the
272     # table and for every index defined for that field. Try to run a
273     # query with 100 words and you'll be amazed to see it really works.
274     # Or maybe it doesn't. You should be aware that the hilighting code
275     # is to be regarded as alpha. It is certainly the least tested part
276     # of WAIT so far.
277    
278     if ($buf) {
279     my @txt = $query->hilight($buf);
280     # In this operation the following things melt into one piece:
281     # $query: The query entered by the user (Class isa WAIT::Query::Base)
282     # $tb: The table we queried (Class WAIT::Table)
283     # $buf: The document to display (User defined class or string)
284     # The steps taken are:
285     # 1.) $query calls "hilight" on $tb and passes
286     # filtered and raw search terms ($query->{Plain} and $query->{Raw}).
287     # 2.) $tb asks the layout object to tag the object which results
288     # in an array with alternating elements of tags (anon HASHes) and
289     # strings.
290     # 3.) $tb adds some markup on its own: {qt=>1} or some such
291    
292     # The result of that process can optionally be sent through a
293     # filter, just to impress your friends with yet more heavy
294     # wizardry
295     if ($OPT{filter}) {
296     @txt = &filter(@txt);
297     }
298    
299     # And then a formatter (in our case a terminal formatter) turns
300     # all the markup into escape sequences and strings that can in
301     # turn be sent through a pager for instance
302     &$pager($format->as_string(\@txt));
303     }
304    
305     # Hey, that's it. The user out there is deeply impressed now. You
306     # can lean back again:-) He got a document that has some words
307     # hilighted and will probably read and enjoy it. Maybe he'll send
308     # you an email.
309     }
310    
311     sub filter {
312     my @result;
313     my @context;
314     my $lines = 0;
315     my $clines = 0;
316     my $elipsis = 0;
317    
318     print STDERR "Filter ...";
319     while (@_) {
320     my %tag = %{shift @_};
321     my $txt = shift @_;
322    
323     for (split /(\n)/, $txt) {
324     if ($_ eq "\n") {
325     if (exists $tag{_qt}) {
326     #die "Weird!";
327     push @result, {_i=>1}, "[WEIRD]";
328     } elsif ($lines) {
329     push @result, {}, $_;
330     $lines--;
331     } else {
332     push @context, {}, $_;
333     $clines++;
334     }
335     } else {
336     if (exists $tag{_qt}) {
337     push @result, {_i=>1}, "\n[ $elipsis linesĀ ]\n" if $elipsis;
338     push @result, @context, {%tag}, $_;
339     delete $tag{_qt};
340     @context = (); $clines = 0; $elipsis=0;
341     $lines = $OPT{filter}+1;
342     } elsif ($lines) {
343     push @result, \%tag, $_;
344     } else {
345     push @context, \%tag, $_;
346     }
347     }
348     if ($clines>$OPT{filter}) {
349     my (%tag, $txt);
350     while ($clines>$OPT{filter}) {
351     %tag = %{shift @context};
352     $txt = shift @context;
353     if ($txt =~ /\n/) {
354     $clines--;
355     $elipsis++;
356     }
357     }
358     }
359     }
360     }
361     print STDERR " done\n";
362     @result;
363     }
364    
365     sub less {
366 dpavlin 96 my $flags = '';
367 dpavlin 98 my $pager = $OPT{pager};
368     if ($pager =~ /less/) {
369 ulpfr 10 $flags = '-r';
370 dpavlin 98 } elsif ($pager =~ /more/) {
371 ulpfr 10 $flags = '-c';
372 dpavlin 98 } elsif ($pager =~ /lynx/) {
373     $flags = '-stdin';
374 ulpfr 10 }
375 dpavlin 98 open(PAGER, "| $pager $flags") or die;
376 ulpfr 10 print PAGER @_;
377     close PAGER;
378     }
379    
380     sub pager {
381     my @lines = split /\n/, $_[0];
382     my $line = 0;
383     for (@lines) {
384     print "$_\n"; $line++;
385     if ($line % 24 == 0) {
386     my $key = $term->readline("[return]");
387     return if $key =~ /^q/i;
388     }
389     }
390     }
391    
392    
393     __END__
394     ## ###################################################################
395     ## pod
396     ## ###################################################################
397    
398     =head1 NAME
399    
400     sman - Search and disply manuals interactive
401    
402     =head1 SYNOPSIS
403    
404     B<sman>
405     [B<-database> I<database name>]
406     [B<-dir> I<database directory>]
407     [B<-table> I<name>]
408     [B<-less>]
409     [B<-filter> I<num>]
410     [B<-max> I<num>]
411    
412     =head1 DESCRIPTION
413    
414     B<Sman> is an interactive search interface to your systems manual pages.
415    
416     =head2 OPTIONS
417    
418     =over 10
419    
420     =item B<-database> I<database name>
421    
422     Change the default database name to I<database name>.
423    
424     =item B<-dir> I<database directory>
425    
426     Change the default database directory to I<database directory>.
427    
428     =item B<-table> I<name>
429    
430     Use I<name> instead of C<man> as table name.
431    
432     =item B<-pager> I<name>
433    
434     Use I<name> instead of the default pager. If no I<name> is supplied a
435     buildin pager is used.
436    
437     =item B<-filter> I<num>
438    
439     Display only I<num> lines above and below an occurance of a search
440     term in the manual.
441    
442     =item B<-max> I<num>
443    
444     Display only I<num> hits. Default is to 10.
445    
446 dpavlin 109 =back
447    
448 ulpfr 10 =head1 SEE ALSO
449    
450     L<smakewhatis>.
451    
452     =head1 AUTHOR
453    
454     Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>

Properties

Name Value
cvs2svn:cvs-rev 1.2
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26