/[wait]/branches/unido/script/index-html
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /branches/unido/script/index-html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 106 - (hide annotations)
Tue Jul 13 12:22:09 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 4581 byte(s)
Changes made by Andreas J. Koenig <andreas.koenig(at)anima.de> for Unido project

1 dpavlin 106 #!/usr/local/perl5.005_56.Mar06/bin/perl -w
2     eval 'exec perl -w -S $0 "$@"'
3     if 0;
4    
5     use strict;
6    
7    
8     use FileHandle;
9     use Getopt::Long;
10    
11     require WAIT::Database;
12     require WAIT::Config;
13     require WAIT::Parse::HTML;
14     require WAIT::Document::Find;
15    
16    
17     my %OPT = (database => 'DB',
18     dir => $WAIT::Config->{WAIT_home} || '/tmp',
19     table => 'kbox',
20     clean => 0,
21     remove => 0,
22     );
23    
24     GetOptions(\%OPT,
25     'database=s',
26     'dir=s',
27     'table=s',
28     'clean!',
29     'remove',
30     ) || die "Usage: ...\n";
31    
32     my $db;
33     if ($OPT{clean} and -d "$OPT{dir}/$OPT{database}") {
34     eval {
35     my $tmp = WAIT::Database->open(name => $OPT{database},
36     'directory' => $OPT{dir})
37     or die "Could not open table $OPT{table}: $@";
38     my $tbl = $tmp->table(name => $OPT{table});
39     $tbl->drop if $tbl;
40     $tmp->close;
41     rmtree("$OPT{dir}/$OPT{database}/$OPT{table}",1,1)
42     if -d "$OPT{dir}/$OPT{database}/$OPT{table}";
43     };
44     exit;
45     }
46     unless (-d "$OPT{dir}/$OPT{database}") {
47     $db = WAIT::Database->create(name => $OPT{database},
48     'directory' => $OPT{dir})
49     or die "Could not open database $OPT{database}: $@";
50     } else {
51     $db = WAIT::Database->open(name => $OPT{database},
52     'directory' => $OPT{dir})
53     or die "Could not open table $OPT{table}: $@";
54     }
55    
56     my $layout= new WAIT::Parse::HTML;
57     my $stem = [{
58     'prefix' => ['isotr', 'isolc'],
59     'intervall' => ['isotr', 'isolc'],
60     },'decode_entities', 'isotr', 'isolc', 'split2', 'stop', 'Stem'];
61     my $text = [{
62     'prefix' => ['isotr', 'isolc'],
63     'intervall' => ['isotr', 'isolc'],
64     },
65     'decode_entities', 'isotr', 'isolc', 'split2', 'stop'];
66     my $sound = ['decode_entities', 'isotr', 'isolc', 'split2', 'Soundex'];
67    
68     my %D;
69    
70     my $access = tie (%D, 'WAIT::Document::Find', sub { $_[0] =~ /\.htm/; },
71     "/usr/local/etc/httpd/htdocs/berlin");
72     die $@ unless defined $access;
73    
74    
75     my $tb = $db->table(name => $OPT{table}) ||
76     $db->create_table
77     (name => $OPT{table},
78     attr => ['docid', 'headline', 'size'],
79     keyset => [['docid']],
80     layout => $layout,
81     access => $access,
82     invindex =>
83     [
84     'text' => $stem,
85     'title' => $stem,
86     'title' => $text,
87     ]
88     );
89     die unless $tb;
90    
91     my @DIRS;
92     if (@ARGV) {
93     @DIRS = @ARGV;
94     } else {
95     @DIRS = @{$WAIT::Config->{manpath}};
96     }
97    
98     while (my ($path, $content) = each %D) {
99     &index($path, $content);
100     }
101     $db->close();
102     exit;
103    
104     my $NO;
105     sub index {
106     my ($did, $value) = @_;
107     if ($tb->have('docid' => $did)) {
108     if (!$OPT{remove}) {
109     print "duplicate\n";
110     return;
111     }
112     } elsif ($OPT{remove}) {
113     print "missing\n";
114     return;
115     }
116    
117     if (-s $did < 100) {
118     print "too small\n";
119     return;
120     }
121    
122     unless (defined $value) {
123     print "unavailable\n";
124     return;
125     }
126     printf STDERR "ok [%d]\n", ++$NO;
127    
128     my $record = $layout->split($value);
129     $record->{size} = length($value);
130     my $headline = $record->{title} || $did;
131     $headline =~ s/\s+/ /g; $headline =~ s/^\s+//;
132     printf "%s\n", substr($headline,0,80);
133     if ($OPT{remove}) {
134     $tb->delete('docid' => $did, headline => $headline, %{$record});
135     } else {
136     $tb->insert('docid' => $did, headline => $headline, %{$record});
137     }
138     }
139    
140    
141     __END__
142     ## ###################################################################
143     ## pod
144     ## ###################################################################
145    
146     =head1 NAME
147    
148     smakewhatis - generate a manual database for sman
149    
150     =head1 SYNOPSIS
151    
152     B<smakewhatis>
153     [B<-database> I<database name>]
154     [B<-dir> I<database directory>]
155     [B<-table> I<name>]
156     [B<-remove>]
157     [I<mandir> ...]
158    
159     =head1 DESCRIPTION
160    
161     B<Smakewhatis> generates/updates databases for B<sman>(1). If
162     I<mandir>s are specified, these are used. Otherwise the confiigured
163     default directories are indexed.
164    
165     =head2 OPTIONS
166    
167     =over 10
168    
169     =item B<-database> I<database name>
170    
171     Change the default database name to I<database name>.
172    
173     =item B<-dir> I<database directory>
174    
175     Change the default database directory to I<database directory>.
176    
177     =item B<-table> I<name>
178    
179     Use I<name> instead of C<man> as table name.
180    
181     =item B<-clean>
182    
183     Clean B<database> before indexing.
184    
185     =item B<-remove>
186    
187     Remove the selected directories from the database instead of
188     adding/updating. This works only for the manuals which are unchanged
189     since the indexing.
190    
191     =head1 SEE ALSO
192    
193     L<sman>.
194    
195     =head1 AUTHOR
196    
197     Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26