/[wait]/branches/unido/script/index-html
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /branches/unido/script/index-html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 106 - (show annotations)
Tue Jul 13 12:22:09 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 4581 byte(s)
Changes made by Andreas J. Koenig <andreas.koenig(at)anima.de> for Unido project

1 #!/usr/local/perl5.005_56.Mar06/bin/perl -w
2 eval 'exec perl -w -S $0 "$@"'
3 if 0;
4
5 use strict;
6
7
8 use FileHandle;
9 use Getopt::Long;
10
11 require WAIT::Database;
12 require WAIT::Config;
13 require WAIT::Parse::HTML;
14 require WAIT::Document::Find;
15
16
17 my %OPT = (database => 'DB',
18 dir => $WAIT::Config->{WAIT_home} || '/tmp',
19 table => 'kbox',
20 clean => 0,
21 remove => 0,
22 );
23
24 GetOptions(\%OPT,
25 'database=s',
26 'dir=s',
27 'table=s',
28 'clean!',
29 'remove',
30 ) || die "Usage: ...\n";
31
32 my $db;
33 if ($OPT{clean} and -d "$OPT{dir}/$OPT{database}") {
34 eval {
35 my $tmp = WAIT::Database->open(name => $OPT{database},
36 'directory' => $OPT{dir})
37 or die "Could not open table $OPT{table}: $@";
38 my $tbl = $tmp->table(name => $OPT{table});
39 $tbl->drop if $tbl;
40 $tmp->close;
41 rmtree("$OPT{dir}/$OPT{database}/$OPT{table}",1,1)
42 if -d "$OPT{dir}/$OPT{database}/$OPT{table}";
43 };
44 exit;
45 }
46 unless (-d "$OPT{dir}/$OPT{database}") {
47 $db = WAIT::Database->create(name => $OPT{database},
48 'directory' => $OPT{dir})
49 or die "Could not open database $OPT{database}: $@";
50 } else {
51 $db = WAIT::Database->open(name => $OPT{database},
52 'directory' => $OPT{dir})
53 or die "Could not open table $OPT{table}: $@";
54 }
55
56 my $layout= new WAIT::Parse::HTML;
57 my $stem = [{
58 'prefix' => ['isotr', 'isolc'],
59 'intervall' => ['isotr', 'isolc'],
60 },'decode_entities', 'isotr', 'isolc', 'split2', 'stop', 'Stem'];
61 my $text = [{
62 'prefix' => ['isotr', 'isolc'],
63 'intervall' => ['isotr', 'isolc'],
64 },
65 'decode_entities', 'isotr', 'isolc', 'split2', 'stop'];
66 my $sound = ['decode_entities', 'isotr', 'isolc', 'split2', 'Soundex'];
67
68 my %D;
69
70 my $access = tie (%D, 'WAIT::Document::Find', sub { $_[0] =~ /\.htm/; },
71 "/usr/local/etc/httpd/htdocs/berlin");
72 die $@ unless defined $access;
73
74
75 my $tb = $db->table(name => $OPT{table}) ||
76 $db->create_table
77 (name => $OPT{table},
78 attr => ['docid', 'headline', 'size'],
79 keyset => [['docid']],
80 layout => $layout,
81 access => $access,
82 invindex =>
83 [
84 'text' => $stem,
85 'title' => $stem,
86 'title' => $text,
87 ]
88 );
89 die unless $tb;
90
91 my @DIRS;
92 if (@ARGV) {
93 @DIRS = @ARGV;
94 } else {
95 @DIRS = @{$WAIT::Config->{manpath}};
96 }
97
98 while (my ($path, $content) = each %D) {
99 &index($path, $content);
100 }
101 $db->close();
102 exit;
103
104 my $NO;
105 sub index {
106 my ($did, $value) = @_;
107 if ($tb->have('docid' => $did)) {
108 if (!$OPT{remove}) {
109 print "duplicate\n";
110 return;
111 }
112 } elsif ($OPT{remove}) {
113 print "missing\n";
114 return;
115 }
116
117 if (-s $did < 100) {
118 print "too small\n";
119 return;
120 }
121
122 unless (defined $value) {
123 print "unavailable\n";
124 return;
125 }
126 printf STDERR "ok [%d]\n", ++$NO;
127
128 my $record = $layout->split($value);
129 $record->{size} = length($value);
130 my $headline = $record->{title} || $did;
131 $headline =~ s/\s+/ /g; $headline =~ s/^\s+//;
132 printf "%s\n", substr($headline,0,80);
133 if ($OPT{remove}) {
134 $tb->delete('docid' => $did, headline => $headline, %{$record});
135 } else {
136 $tb->insert('docid' => $did, headline => $headline, %{$record});
137 }
138 }
139
140
141 __END__
142 ## ###################################################################
143 ## pod
144 ## ###################################################################
145
146 =head1 NAME
147
148 smakewhatis - generate a manual database for sman
149
150 =head1 SYNOPSIS
151
152 B<smakewhatis>
153 [B<-database> I<database name>]
154 [B<-dir> I<database directory>]
155 [B<-table> I<name>]
156 [B<-remove>]
157 [I<mandir> ...]
158
159 =head1 DESCRIPTION
160
161 B<Smakewhatis> generates/updates databases for B<sman>(1). If
162 I<mandir>s are specified, these are used. Otherwise the confiigured
163 default directories are indexed.
164
165 =head2 OPTIONS
166
167 =over 10
168
169 =item B<-database> I<database name>
170
171 Change the default database name to I<database name>.
172
173 =item B<-dir> I<database directory>
174
175 Change the default database directory to I<database directory>.
176
177 =item B<-table> I<name>
178
179 Use I<name> instead of C<man> as table name.
180
181 =item B<-clean>
182
183 Clean B<database> before indexing.
184
185 =item B<-remove>
186
187 Remove the selected directories from the database instead of
188 adding/updating. This works only for the manuals which are unchanged
189 since the indexing.
190
191 =head1 SEE ALSO
192
193 L<sman>.
194
195 =head1 AUTHOR
196
197 Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26