/[swish]/trunk/mailman/index_mailman.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/mailman/index_mailman.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 77 - (hide annotations)
Sun Apr 18 06:31:38 2004 UTC (20 years, 1 month ago) by dpavlin
File MIME type: text/plain
File size: 5005 byte(s)
index MailMan archives

1 dpavlin 77 #!/usr/bin/perl -w
2     use strict;
3    
4     =pod
5    
6     This is an example program for use with swish-e's -S prog indexing method.
7    
8     This will scan and index a MailMan (http://www.list.org) mailing list archive.
9     This script is based on original index_hypermail.pl from swish-e distribution.
10    
11     You might use a config file such as:
12    
13     IndexDir /data/swish/mailman/index_mailman.pl
14     SwishProgParameters /var/lib/mailman/archives/public/
15    
16     MetaNames swishtitle name email
17     PropertyNames name email
18     PropertyNamesDate sent
19     IndexContents HTML2 .html
20     StoreDescription HTML2 <body> 100000
21     UndefinedMetaTags ignore
22    
23     ReplaceRules replace /var/lib/mailman/archives/public/ /pipermail/
24    
25     All examples assume that this is Debian system. if it's not, you might want
26     to change paths above.
27    
28     Index with the command:
29    
30     ./swish-e -c swish.conf -S prog
31    
32     See perldoc examples/swish.cgi for how to search this index. Here's a possible
33     config file for use with swish.cgi:
34    
35     >cat .swishcgi.conf
36    
37     return {
38     title => "Search the Foo List Archive",
39     swish_binary => '../swish-e',
40     display_props => [qw/ name email sent /],
41     sorts => [qw/swishrank swishtitle email sent/],
42     secondary_sort => [qw/sent desc/],
43     metanames => [qw/swishdefault swishtitle name email/],
44     name_labels => {
45     swishrank => 'Rank',
46     swishtitle => 'Subject Only',
47     name => "Poster's Name",
48     email => "Poster's Email",
49     sent => 'Message Date',
50     swishdefault => 'Subject & Body',
51     },
52    
53     highlight => {
54     package => 'PhraseHighlight',
55     show_words => 10, # Number of swish words words to show around highlighted word
56     max_words => 100, # If no words are found to highlighted then show this many words
57     occurrences => 6, # Limit number of occurrences of highlighted words
58     highlight_on => '<font style="background:#FFFF99">',
59     highlight_off => '</font>',
60     meta_to_prop_map => { # this maps search metatags to display properties
61     swishdefault => [ qw/swishtitle swishdescription/ ],
62     swishtitle => [ qw/swishtitle/ ],
63     email => [ qw/email/ ],
64     name => [ qw/name/ ],
65     swishdocpath => [ qw/swishdocpath/ ],
66     },
67     },
68     date_ranges => {
69     property_name => 'sent', # property name to limit by
70     time_periods => [
71     'All',
72     'Today',
73     'Yesterday',
74     'This Week',
75     'Last Week',
76     'Last 90 Days',
77     'This Month',
78     'Last Month',
79     ],
80    
81     line_break => 0,
82     default => 'All',
83     date_range => 1,
84     },
85     };
86    
87    
88    
89     =cut
90    
91    
92    
93     use File::Find; # for recursing a directory tree
94     use Date::Parse;
95    
96     # Recurse the directory(s) passed in on the command line
97    
98     find( { wanted => \&wanted, no_chdir => 1, follow => 1 }, @ARGV );
99    
100    
101     sub wanted {
102     return if -d;
103     return unless m!(^|/)\d+\.html$!;
104    
105     print STDERR $File::Find::name,"\n";
106    
107     my $mtime = (stat $File::Find::name )[9];
108    
109     my $html = format_message($File::Find::name );
110     return unless $html;
111    
112     my $size = length $html;
113    
114     my $name = $File::Find::name;
115     $name =~ s[^./][];
116    
117     print <<EOF;
118     Content-Length: $size
119     Last-Mtime: $mtime
120     Path-Name: $name
121    
122     EOF
123    
124     print $html;
125     }
126    
127    
128    
129     sub format_message {
130     my $file = shift;
131     local $_;
132    
133     unless ( open FH, "<$file" ) {
134     warn "Failed to open '$file'. Error: $!";
135     return;
136     }
137    
138     my %fields;
139     my $content = '';
140     my $body = '';
141    
142     while (<FH>) {
143     $content .= $_;
144     }
145    
146     if ($content =~ m{<I>(.+?)</I>}s) {
147     $fields{sent} = str2time $1;
148     }
149    
150     if ($content =~ m{<B>(.+?)</B>}s) {
151     $fields{name} = $1;
152     }
153    
154     if ($content =~ m{<A\s+HREF="mailto:.+?>(.+?)</A>}s) {
155     my $email = $1;
156     $email =~ s/^\s+//s;
157     $email =~ s/\s+$//s;
158     $fields{email} = $email;
159     }
160    
161     if ($content =~ m{<H1>(.+?)</H1>}s) {
162     my $subject = $1;
163     # if you have just one public mail list, you
164     # might want to remove it's name from subject!
165     #$subject =~ s/\[.+?\]//;
166     if ( $subject =~ s/\s*Re:\s*//i ) {
167     $subject .= ' (Re)';
168     }
169     $fields{subject} = $subject;
170     }
171    
172     if ($content =~ m{<!--beginarticle-->(.*)<!--endarticle-->}s) {
173     $body = $1;
174     }
175    
176     return join "\n",
177     '<html>',
178     '<head>',
179     '<title>',
180     ($fields{subject} || '' ),
181     '</title>',
182     map( { qq[<meta name="$_" content="$fields{$_}">] } keys %fields ),
183     '</head><body>',
184     $body,
185     '</body>',
186     '</html>',
187     '';
188     }
189    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26