/[wait]/trunk/script/index_mail
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/script/index_mail

Parent Directory Parent Directory | Revision Log Revision Log


Revision 48 - (hide annotations)
Fri Dec 29 16:09:58 2000 UTC (23 years, 4 months ago) by ulpfr
Original Path: cvs-head/script/index_mail
File size: 4166 byte(s)
Mail indexing works resonable

1 ulpfr 47 # -*- Mode: Perl -*-
2     # $Basename: index_mail $
3 ulpfr 48 # $Revision: 1.2 $
4 ulpfr 47 # Author : Ulrich Pfeifer
5     # Created On : Fri Apr 7 13:45:50 2000
6     # Last Modified By: Ulrich Pfeifer
7 ulpfr 48 # Last Modified On: Fri Dec 29 17:07:26 2000
8 ulpfr 47 # Language : CPerl
9     #
10     # (C) Copyright 2000, UUNET Deutschland GmbH, Germany
11     #
12    
13     use strict;
14     use File::Path;
15     use DB_File;
16     use Getopt::Long;
17     use Cwd;
18    
19     require WAIT::Config;
20     require WAIT::Database;
21     require WAIT::Parse::Overview;
22     require WAIT::Document::Split;
23     require WAIT::InvertedIndex;
24    
25    
26     $DB_BTREE->{'cachesize'} = 200_000 ;
27    
28     my %OPT = (clean => 0,
29     database => 'DB',
30     dir => $WAIT::Config->{WAIT_home} || '/tmp',
31     table => 'mail',
32     );
33    
34     GetOptions(\%OPT,
35     'clean!',
36     'database=s',
37     'dir=s',
38     'table=s',
39     ) || die "Usage: ...\n";
40    
41     if ($OPT{clean} and -d "$OPT{dir}/$OPT{database}") {
42     my $tmp = WAIT::Database->open(name => $OPT{database},
43     'directory' => $OPT{dir})
44     or die "Could not open table $OPT{table}: $@\n";
45     my $tbl = $tmp->table(name => $OPT{table});
46     $tbl->drop if $tbl;
47     rmtree("$OPT{dir}/$OPT{database}/$OPT{table}", 1, 1)
48     if -d "$OPT{dir}/$OPT{database}/$OPT{table}";
49     $tmp->close;
50     }
51    
52     my $db;
53     unless (-d "$OPT{dir}/$OPT{database}") {
54     $db = WAIT::Database->create(name => $OPT{database},
55     'directory' => $OPT{dir})
56     or die "Could not open database $OPT{database}: $@\n";
57     }
58     else {
59     $db = WAIT::Database->open(name => $OPT{database},
60     'directory' => $OPT{dir})
61     or die "Could not open table $OPT{table}: $@\n";
62     }
63    
64     my $layout = new WAIT::Parse::Overview;
65    
66     my $stem = ['isotr', 'isolc', 'split2', 'stop', 'Stem'];
67     my $text = [{
68     'prefix' => ['isotr', 'isolc'],
69     'intervall' => ['isotr', 'isolc'],
70     },
71     'isotr', 'isolc', 'split2', 'stop'];
72     my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
73    
74     my $cwd = cwd;
75    
76     my %D;
77     my $access = tie %D, 'WAIT::Document::Split', 'end', '$', @ARGV,
78     or die "Couldn't tie to file: $!\n";
79    
80     my $tb = $db->create_table(name => $OPT{table},
81 ulpfr 48 attr => ['from', 'to', 'subject', 'article', 'docid', 'headline'],
82 ulpfr 47 layout => $layout,
83     access => $access,
84     invindex =>
85     [
86     'subject' => $stem, 'subject' => $text,
87     'to' => $text,
88     'from' => $text,
89     ]
90     );
91     die "Couldn't create table $OPT{table}: $@\n" unless $tb;
92    
93     my ($did, $value);
94     while (($did, $value) = each %D) {
95     my $record = $layout->split($value);
96     my $headline = $record->{subject};
97     $headline =~ s/\s+/ /sg;
98 ulpfr 48 #printf "%s\n", substr($headline,0,80);
99 ulpfr 47 $tb->insert('docid' => $did,
100     headline => $headline,
101     %{$record});
102 ulpfr 48 print $did,"\n" if $did =~ / 0 0/;
103 ulpfr 47 }
104     $tb->set(top=>1);
105     $tb->close();
106     $db->close();
107    
108     $WAIT::Config = $WAIT::Config; # make perl -w happy
109    
110    
111     __END__
112     ## ###################################################################
113     ## pod
114     ## ###################################################################
115    
116     =head1 NAME
117    
118     index_mail - generate an WAIT index for .overview files
119    
120     =head1 SYNOPSIS
121    
122     B<index_mail>
123     [B<-clean>] [B<-noclean>]
124     [B<-database> I<dbname>]
125     [B<-dir> I<directory>]
126     [B<-table> I<table name>]
127    
128     =head1 DESCRIPTION
129    
130     Either indexes F<$WAIT/t/test.ste> (if called from directory F<$WAIT>)
131     or F</usr/local/ls6/tex/bib/bibdb.ste>.
132    
133     =head1 OPTIONS
134    
135     =over 5
136    
137     =item B<-clean> / B<-noclean>
138    
139     Clean the table before indexing. Default is B<off>.
140    
141     =item B<-database> I<dbname>
142    
143     Specify database name. Default is F<DB>.
144    
145     =item B<-dir> I<directory>
146    
147     Alternate directory where databases are located. Default is the
148     directory specified during configuration of WAIT.
149    
150     =item B<-table> I<table name>
151    
152     Specify an alternate table name. Default is C<bibdb>.
153    
154     =head1 AUTHOR
155    
156     Ulrich Pfeifer E<lt>F<pfeifer@wait.de>E<gt>
157    

Properties

Name Value
cvs2svn:cvs-rev 1.2

  ViewVC Help
Powered by ViewVC 1.1.26