/[wait]/branches/unido/script/sman
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /branches/unido/script/sman

Parent Directory Parent Directory | Revision Log Revision Log


Revision 106 - (hide annotations)
Tue Jul 13 12:22:09 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 12317 byte(s)
Changes made by Andreas J. Koenig <andreas.koenig(at)anima.de> for Unido project

1 dpavlin 106 #!perl -w
2     # -*- Mode: Perl -*-
3     # $Basename: sman $
4     # $Revision: 1.14 $
5     # Author : Ulrich Pfeifer
6     # Created On : Fri Aug 30 15:52:25 1996
7     # Last Modified By: Ulrich Pfeifer
8     # Last Modified On: Mon May 8 11:03:46 2000
9     # Language : CPerl
10     #
11     # (C) Copyright 1996-2000, Ulrich Pfeifer
12     #
13    
14     use strict;
15    
16     use Term::ReadLine;
17     use Getopt::Long;
18     use Fcntl qw(O_RDONLY);
19     use Config;
20    
21     require WAIT::Config;
22     require WAIT::Database;
23     require WAIT::Query::Base;
24     require WAIT::Query::Wais;
25    
26    
27     $SIG{PIPE} = 'IGNORE';
28     my %OPT = (database => 'DB',
29     dir => $WAIT::Config->{WAIT_home} || '/tmp',
30     table => 'man',
31     pager => $WAIT::Config->{'pager'} || 'more',
32     filter => 0,
33     max => 15,
34     );
35    
36     GetOptions(\%OPT,
37     'database=s',
38     'dir=s',
39     'table=s',
40     'filter=i',
41     'max=i',
42     'pager:s') || die "
43     Usage: $0
44     [--database database]
45     [--dir dir ]
46     [--table table ]
47     [--filter integer ]
48     [--max integer ]
49     [--pager pager ]
50     ";
51    
52     my $db = WAIT::Database->open(name => $OPT{database},
53     mode => O_RDONLY,
54     directory => $OPT{dir})
55     or die "Could not open database $OPT{database}: $@";
56    
57     my $tb = $db->table(name => $OPT{table})
58     or die "Could not open table $OPT{table}: $@";
59    
60     # not used: my $layout = $tb->layout; # a WAIT::Parse::Nroff object
61    
62     my $term = new Term::ReadLine 'Simple Query Interface';
63    
64     require WAIT::Format::Term;
65     my $format;
66     if ($Config::Config{'archname'} eq 'i586-linux') {
67     # for color xterm
68     $format = new WAIT::Format::Term query_s => "", query_e => "";
69     } else {
70     $format = new WAIT::Format::Term;
71     }
72    
73     my $pager = ($OPT{pager}) ? \&pager : \&less;
74     my $OUT = $term->OUT;
75    
76     my $st = 1;
77     print $OUT "Enter 'h' for help.\n";
78    
79     # sman is basically offering three services: find the hits and show
80     # them (a query), show metadata for a hit (a view), show a hot (display)
81    
82     my($query, @did);
83    
84     while (defined ($_ = &myreadline("$st> "))) {
85     chomp; $st++;
86    
87     my(%hits, $query_text);
88     if (/^$/) {
89     next;
90     } elsif (/^m (\d+)$/) {
91     $OPT{max} = $1;
92     } elsif (/^f\s*(\d+)?$/) {
93     $OPT{filter} = $1;
94     next;
95     } elsif (/^t$/i) {
96     if ($pager eq \&less) {
97     $pager = \&pager;
98     } else {
99     $pager = \&less;
100     }
101     next;
102     } elsif (/^(\d+)$/) {
103     if (defined $did[$1]) {
104     display($did[$1]); # <----------- display (full doc)
105     next;
106     }
107     } elsif (/^d\s*(\d+)/) {
108     if (defined $did[$1]) {
109     view($did[$1]); # <----------- view (metadata from WAIT)
110     next;
111     }
112     } elsif (/^q$/i) {
113     last;
114     } elsif (/^l$/i) {
115     # fall through
116     } elsif (/^[h?]$/i) {
117     help();
118     next;
119     } elsif (/^hh$/i) {
120     extended_help();
121     next;
122     } else { # <----------- A query (Display a list)
123     $query_text = $_;
124     eval {$query = WAIT::Query::Wais::query($tb, $_)};
125     if ($@ ne '') {
126     print $OUT "$_ => $query\n\$\@='$@'\n";
127     } elsif (ref($query)) {
128     %hits = $query->execute(top => $OPT{max}, picky => 1);
129     # the hash %hits has as keys document numbers and as values
130     # quality figures. The doc numbers are not what we have as docid
131     # to find the item in the access class, they are WAIT's private
132     # numbers.
133     } else {
134     next;
135     }
136     }
137    
138     next unless %hits;
139     my $no = 1; # numbering the hits for the result table that is
140     # presented to the user
141    
142     @did = (); # store the internal numbers (keys of %hits). The user
143     # will use $no in sman's interface to select a hit.
144    
145     # the following loop uses the values of %hits to sort the results
146     # according to the quality and cut after a number of rows. After
147     # that %hits isn't needed anymore.
148     print "Query: $query_text\n";
149     for my $did (sort {$hits{$b} <=> $hits{$a}} keys %hits) {
150    
151     my %tattr = $tb->fetch($did);
152     # the hash %tattr contains several attributes of the item we are
153     # referring to, namely the attributes that we named in the "attr"
154     # argument of the create_table statement in smakewhatis
155    
156     printf $OUT "%2d %6.3f %s\n", $no, $hits{$did},
157     substr($tattr{headline} ||'',0,68);
158     $did[$no] = $did;
159     last if $no++ >= $OPT{max};
160    
161     }
162    
163     } continue {
164     # we don't do this since Andreas Koenig does not think of it as feature
165     # $term->SetHistory(grep length($_)>4, $term->GetHistory)
166     }
167     warn "Thank you for using sman\n";
168    
169     $tb->close;
170     $db->close;
171    
172     sub myreadline {
173     if (@ARGV) {
174     return shift @ARGV;
175     } else {
176     $term->readline(@_);
177     }
178     }
179     sub help {
180     my $idb = "\n\t'". join(q[', '], $tb->fields()) . "'";
181     print $OUT qq[Available commands:
182    
183     <num> Show the document <num>
184     d <num> Show the db entry of document <num>
185     f <num> Display only <num> lines context
186     h,? Display this help message
187     hh Display query examples
188     m <num> Set maxhits to <num>
189     t Toggle display mode (term/less)
190     q Exit from $0
191     l redisplay last ranking
192     Other input is tried as wais query.
193     The following fields are known: $idb
194     ] ;
195     }
196    
197     sub extended_help {
198     print q{
199     Here are some query examples:
200    
201     information retrieval free text query
202     information or retrieval same as above
203     des=information retrieval `information' must be in the description
204     des=(information retrieval) one of them in description
205     des=(information or retrieval) same as above
206     des=(information and retrieval) both of them in description
207     des=(information not retrieval) `information' in description and
208     `retrieval' not in description
209     des=(information system*) wild-card search
210     au=ilia author names may be misspelled
211    
212     You can build arbitary boolean combination of the above examples.
213     Field names may be abbreviated.
214     }
215     }
216    
217     sub view {
218     my $did = shift;
219     my %tattr = $tb->fetch($did);
220     for (keys %tattr) {
221     print $OUT "$_ $tattr{$_}\n";
222     }
223     }
224    
225     sub display {
226     my $did = shift;
227    
228     return unless defined $query and defined $did;
229    
230     print $OUT "Wais display document $did\n";
231     my %tattr = $tb->fetch($did);
232     my $tdid = $tattr{docid};
233     # WHAT DOES HE DO HERE? ULI???
234     # Re: some indexing scripts did use pathnames relative to the table directory
235     # especially the cpanwait script does this. uli
236     # if ($tdid !~ m(^/)) {
237     # $tdid = $tb->dir . '/' . $tdid;
238     # }
239    
240     # The main task of all that follows from here is highlighting. WAIT
241     # is designed to make it possible to show the user why a certain
242     # document was chosen by the indexer.
243    
244     my $buf = $tb->fetch_extern($tdid);
245     # This $buf can be an object that can have enough information to do
246     # highlighting without WAIT's help. If you prefer to implement your
247     # own highlighting, you can do so now with e.g. print
248     # $buf->highlight(query => $query)
249    
250     # All you need to know to implement highlighting is how a
251     # WAIT::Query::Base object looks like (left as an exercise for the
252     # reader).
253    
254     # The impatient reader may want to implement something without
255     # highlighting, in which case he does not need any info about the
256     # query object and can rightaway run e.g.
257     # print $buf->as_string
258    
259     # Thus the impatient reader does not necessarily need the following
260     # heavy wizardry. Just to give you an idea what's going on: every
261     # word in the text must be compared to every word in the query if it
262     # is worth highlighting, and which part of the word is worth
263     # highlighting. This must be done differently for every field in the
264     # table and for every index defined for that field. Try to run a
265     # query with 100 words and you'll be amazed to see it really works.
266     # Or maybe it doesn't. You should be aware that the hilighting code
267     # is to be regarded as alpha. It is certainly the least tested part
268     # of WAIT so far.
269    
270     if ($buf) {
271     my @txt = $query->hilight($buf);
272     # In this operation the following things melt into one piece:
273     # $query: The query entered by the user (Class isa WAIT::Query::Base)
274     # $tb: The table we queried (Class WAIT::Table)
275     # $buf: The document to display (User defined class or string)
276     # The steps taken are:
277     # 1.) $query calls "hilight" on $tb and passes
278     # filtered and raw search terms ($query->{Plain} and $query->{Raw}).
279     # 2.) $tb asks the layout object to tag the object which results
280     # in an array with alternating elements of tags (anon HASHes) and
281     # strings.
282     # 3.) $tb adds some markup on its own: {qt=>1} or some such
283    
284     # The result of that process can optionally be sent through a
285     # filter, just to impress your friends with yet more heavy
286     # wizardry
287     if ($OPT{filter}) {
288     @txt = &filter(@txt);
289     }
290    
291     # And then a formatter (in our case a terminal formatter) turns
292     # all the markup into escape sequences and strings that can in
293     # turn be sent through a pager for instance
294     &$pager($format->as_string(\@txt));
295     }
296    
297     # Hey, that's it. The user out there is deeply impressed now. You
298     # can lean back again:-) He got a document that has some words
299     # hilighted and will probably read and enjoy it. Maybe he'll send
300     # you an email.
301     }
302    
303     sub filter {
304     my @result;
305     my @context;
306     my $lines = 0;
307     my $clines = 0;
308     my $elipsis = 0;
309    
310     print STDERR "Filter ...";
311     while (@_) {
312     my %tag = %{shift @_};
313     my $txt = shift @_;
314    
315     for (split /(\n)/, $txt) {
316     if ($_ eq "\n") {
317     if (exists $tag{_qt}) {
318     #die "Weird!";
319     push @result, {_i=>1}, "[WEIRD]";
320     } elsif ($lines) {
321     push @result, {}, $_;
322     $lines--;
323     } else {
324     push @context, {}, $_;
325     $clines++;
326     }
327     } else {
328     if (exists $tag{_qt}) {
329     push @result, {_i=>1}, "\n[ $elipsis linesĀ ]\n" if $elipsis;
330     push @result, @context, {%tag}, $_;
331     delete $tag{_qt};
332     @context = (); $clines = 0; $elipsis=0;
333     $lines = $OPT{filter}+1;
334     } elsif ($lines) {
335     push @result, \%tag, $_;
336     } else {
337     push @context, \%tag, $_;
338     }
339     }
340     if ($clines>$OPT{filter}) {
341     my (%tag, $txt);
342     while ($clines>$OPT{filter}) {
343     %tag = %{shift @context};
344     $txt = shift @context;
345     if ($txt =~ /\n/) {
346     $clines--;
347     $elipsis++;
348     }
349     }
350     }
351     }
352     }
353     print STDERR " done\n";
354     @result;
355     }
356    
357     sub less {
358     my $flags;
359     if ($WAIT::Config->{pager} =~ /less/) {
360     $flags = '-r';
361     } elsif ($WAIT::Config->{pager} =~ /more/) {
362     $flags = '-c';
363     }
364     open(PAGER, "|$WAIT::Config->{pager} $flags") or die;
365     print PAGER @_;
366     close PAGER;
367     }
368    
369     sub pager {
370     my @lines = split /\n/, $_[0];
371     my $line = 0;
372     for (@lines) {
373     print "$_\n"; $line++;
374     if ($line % 24 == 0) {
375     my $key = $term->readline("[return]");
376     return if $key =~ /^q/i;
377     }
378     }
379     }
380    
381    
382     __END__
383     ## ###################################################################
384     ## pod
385     ## ###################################################################
386    
387     =head1 NAME
388    
389     sman - Search and disply manuals interactive
390    
391     =head1 SYNOPSIS
392    
393     B<sman>
394     [B<-database> I<database name>]
395     [B<-dir> I<database directory>]
396     [B<-table> I<name>]
397     [B<-less>]
398     [B<-filter> I<num>]
399     [B<-max> I<num>]
400    
401     =head1 DESCRIPTION
402    
403     B<Sman> is an interactive search interface to your systems manual pages.
404    
405     =head2 OPTIONS
406    
407     =over 10
408    
409     =item B<-database> I<database name>
410    
411     Change the default database name to I<database name>.
412    
413     =item B<-dir> I<database directory>
414    
415     Change the default database directory to I<database directory>.
416    
417     =item B<-table> I<name>
418    
419     Use I<name> instead of C<man> as table name.
420    
421     =item B<-pager> I<name>
422    
423     Use I<name> instead of the default pager. If no I<name> is supplied a
424     buildin pager is used.
425    
426     =item B<-filter> I<num>
427    
428     Display only I<num> lines above and below an occurance of a search
429     term in the manual.
430    
431     =item B<-max> I<num>
432    
433     Display only I<num> hits. Default is to 10.
434    
435     =head1 SEE ALSO
436    
437     L<smakewhatis>.
438    
439     =head1 AUTHOR
440    
441     Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26