/[wait]/branches/unido/script/index_mail
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /branches/unido/script/index_mail

Parent Directory Parent Directory | Revision Log Revision Log


Revision 106 - (show annotations)
Tue Jul 13 12:22:09 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 4185 byte(s)
Changes made by Andreas J. Koenig <andreas.koenig(at)anima.de> for Unido project

1 #!/usr/bin/perl -w
2 # -*- Mode: Perl -*-
3 # $Basename: index_mail $
4 # $Revision: 1.3 $
5 # Author : Ulrich Pfeifer
6 # Created On : Fri Apr 7 13:45:50 2000
7 # Last Modified By: Ulrich Pfeifer
8 # Last Modified On: Fri Dec 29 17:07:26 2000
9 # Language : CPerl
10 #
11 # (C) Copyright 2000, UUNET Deutschland GmbH, Germany
12 #
13
14 use strict;
15 use File::Path;
16 use DB_File;
17 use Getopt::Long;
18 use Cwd;
19
20 require WAIT::Config;
21 require WAIT::Database;
22 require WAIT::Parse::Overview;
23 require WAIT::Document::Split;
24 require WAIT::InvertedIndex;
25
26
27 $DB_BTREE->{'cachesize'} = 200_000 ;
28
29 my %OPT = (clean => 0,
30 database => 'DB',
31 dir => $WAIT::Config->{WAIT_home} || '/tmp',
32 table => 'mail',
33 );
34
35 GetOptions(\%OPT,
36 'clean!',
37 'database=s',
38 'dir=s',
39 'table=s',
40 ) || die "Usage: ...\n";
41
42 if ($OPT{clean} and -d "$OPT{dir}/$OPT{database}") {
43 my $tmp = WAIT::Database->open(name => $OPT{database},
44 'directory' => $OPT{dir})
45 or die "Could not open table $OPT{table}: $@\n";
46 my $tbl = $tmp->table(name => $OPT{table});
47 $tbl->drop if $tbl;
48 rmtree("$OPT{dir}/$OPT{database}/$OPT{table}", 1, 1)
49 if -d "$OPT{dir}/$OPT{database}/$OPT{table}";
50 $tmp->close;
51 }
52
53 my $db;
54 unless (-d "$OPT{dir}/$OPT{database}") {
55 $db = WAIT::Database->create(name => $OPT{database},
56 'directory' => $OPT{dir})
57 or die "Could not open database $OPT{database}: $@\n";
58 }
59 else {
60 $db = WAIT::Database->open(name => $OPT{database},
61 'directory' => $OPT{dir})
62 or die "Could not open table $OPT{table}: $@\n";
63 }
64
65 my $layout = new WAIT::Parse::Overview;
66
67 my $stem = ['isotr', 'isolc', 'split2', 'stop', 'Stem'];
68 my $text = [{
69 'prefix' => ['isotr', 'isolc'],
70 'intervall' => ['isotr', 'isolc'],
71 },
72 'isotr', 'isolc', 'split2', 'stop'];
73 my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
74
75 my $cwd = cwd;
76
77 my %D;
78 my $access = tie %D, 'WAIT::Document::Split', 'end', '$', @ARGV,
79 or die "Couldn't tie to file: $!\n";
80
81 my $tb = $db->create_table(name => $OPT{table},
82 attr => ['from', 'to', 'subject', 'article', 'docid', 'headline'],
83 layout => $layout,
84 access => $access,
85 invindex =>
86 [
87 'subject' => $stem, 'subject' => $text,
88 'to' => $text,
89 'from' => $text,
90 ]
91 );
92 die "Couldn't create table $OPT{table}: $@\n" unless $tb;
93
94 my ($did, $value);
95 while (($did, $value) = each %D) {
96 my $record = $layout->split($value);
97 my $headline = $record->{subject};
98 $headline =~ s/\s+/ /sg;
99 #printf "%s\n", substr($headline,0,80);
100 $tb->insert('docid' => $did,
101 headline => $headline,
102 %{$record});
103 print $did,"\n" if $did =~ / 0 0/;
104 }
105 $tb->set(top=>1);
106 $tb->close();
107 $db->close();
108
109 $WAIT::Config = $WAIT::Config; # make perl -w happy
110
111
112 __END__
113 ## ###################################################################
114 ## pod
115 ## ###################################################################
116
117 =head1 NAME
118
119 index_mail - generate an WAIT index for .overview files
120
121 =head1 SYNOPSIS
122
123 B<index_mail>
124 [B<-clean>] [B<-noclean>]
125 [B<-database> I<dbname>]
126 [B<-dir> I<directory>]
127 [B<-table> I<table name>]
128
129 =head1 DESCRIPTION
130
131 Either indexes F<$WAIT/t/test.ste> (if called from directory F<$WAIT>)
132 or F</usr/local/ls6/tex/bib/bibdb.ste>.
133
134 =head1 OPTIONS
135
136 =over 5
137
138 =item B<-clean> / B<-noclean>
139
140 Clean the table before indexing. Default is B<off>.
141
142 =item B<-database> I<dbname>
143
144 Specify database name. Default is F<DB>.
145
146 =item B<-dir> I<directory>
147
148 Alternate directory where databases are located. Default is the
149 directory specified during configuration of WAIT.
150
151 =item B<-table> I<table name>
152
153 Specify an alternate table name. Default is C<bibdb>.
154
155 =head1 AUTHOR
156
157 Ulrich Pfeifer E<lt>F<pfeifer@wait.de>E<gt>
158

  ViewVC Help
Powered by ViewVC 1.1.26