/[swish]/trunk/mailman/index_mailman.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/mailman/index_mailman.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 77 - (show annotations)
Sun Apr 18 06:31:38 2004 UTC (20 years ago) by dpavlin
File MIME type: text/plain
File size: 5005 byte(s)
index MailMan archives

1 #!/usr/bin/perl -w
2 use strict;
3
4 =pod
5
6 This is an example program for use with swish-e's -S prog indexing method.
7
8 This will scan and index a MailMan (http://www.list.org) mailing list archive.
9 This script is based on original index_hypermail.pl from swish-e distribution.
10
11 You might use a config file such as:
12
13 IndexDir /data/swish/mailman/index_mailman.pl
14 SwishProgParameters /var/lib/mailman/archives/public/
15
16 MetaNames swishtitle name email
17 PropertyNames name email
18 PropertyNamesDate sent
19 IndexContents HTML2 .html
20 StoreDescription HTML2 <body> 100000
21 UndefinedMetaTags ignore
22
23 ReplaceRules replace /var/lib/mailman/archives/public/ /pipermail/
24
25 All examples assume that this is Debian system. if it's not, you might want
26 to change paths above.
27
28 Index with the command:
29
30 ./swish-e -c swish.conf -S prog
31
32 See perldoc examples/swish.cgi for how to search this index. Here's a possible
33 config file for use with swish.cgi:
34
35 >cat .swishcgi.conf
36
37 return {
38 title => "Search the Foo List Archive",
39 swish_binary => '../swish-e',
40 display_props => [qw/ name email sent /],
41 sorts => [qw/swishrank swishtitle email sent/],
42 secondary_sort => [qw/sent desc/],
43 metanames => [qw/swishdefault swishtitle name email/],
44 name_labels => {
45 swishrank => 'Rank',
46 swishtitle => 'Subject Only',
47 name => "Poster's Name",
48 email => "Poster's Email",
49 sent => 'Message Date',
50 swishdefault => 'Subject & Body',
51 },
52
53 highlight => {
54 package => 'PhraseHighlight',
55 show_words => 10, # Number of swish words words to show around highlighted word
56 max_words => 100, # If no words are found to highlighted then show this many words
57 occurrences => 6, # Limit number of occurrences of highlighted words
58 highlight_on => '<font style="background:#FFFF99">',
59 highlight_off => '</font>',
60 meta_to_prop_map => { # this maps search metatags to display properties
61 swishdefault => [ qw/swishtitle swishdescription/ ],
62 swishtitle => [ qw/swishtitle/ ],
63 email => [ qw/email/ ],
64 name => [ qw/name/ ],
65 swishdocpath => [ qw/swishdocpath/ ],
66 },
67 },
68 date_ranges => {
69 property_name => 'sent', # property name to limit by
70 time_periods => [
71 'All',
72 'Today',
73 'Yesterday',
74 'This Week',
75 'Last Week',
76 'Last 90 Days',
77 'This Month',
78 'Last Month',
79 ],
80
81 line_break => 0,
82 default => 'All',
83 date_range => 1,
84 },
85 };
86
87
88
89 =cut
90
91
92
93 use File::Find; # for recursing a directory tree
94 use Date::Parse;
95
96 # Recurse the directory(s) passed in on the command line
97
98 find( { wanted => \&wanted, no_chdir => 1, follow => 1 }, @ARGV );
99
100
101 sub wanted {
102 return if -d;
103 return unless m!(^|/)\d+\.html$!;
104
105 print STDERR $File::Find::name,"\n";
106
107 my $mtime = (stat $File::Find::name )[9];
108
109 my $html = format_message($File::Find::name );
110 return unless $html;
111
112 my $size = length $html;
113
114 my $name = $File::Find::name;
115 $name =~ s[^./][];
116
117 print <<EOF;
118 Content-Length: $size
119 Last-Mtime: $mtime
120 Path-Name: $name
121
122 EOF
123
124 print $html;
125 }
126
127
128
129 sub format_message {
130 my $file = shift;
131 local $_;
132
133 unless ( open FH, "<$file" ) {
134 warn "Failed to open '$file'. Error: $!";
135 return;
136 }
137
138 my %fields;
139 my $content = '';
140 my $body = '';
141
142 while (<FH>) {
143 $content .= $_;
144 }
145
146 if ($content =~ m{<I>(.+?)</I>}s) {
147 $fields{sent} = str2time $1;
148 }
149
150 if ($content =~ m{<B>(.+?)</B>}s) {
151 $fields{name} = $1;
152 }
153
154 if ($content =~ m{<A\s+HREF="mailto:.+?>(.+?)</A>}s) {
155 my $email = $1;
156 $email =~ s/^\s+//s;
157 $email =~ s/\s+$//s;
158 $fields{email} = $email;
159 }
160
161 if ($content =~ m{<H1>(.+?)</H1>}s) {
162 my $subject = $1;
163 # if you have just one public mail list, you
164 # might want to remove it's name from subject!
165 #$subject =~ s/\[.+?\]//;
166 if ( $subject =~ s/\s*Re:\s*//i ) {
167 $subject .= ' (Re)';
168 }
169 $fields{subject} = $subject;
170 }
171
172 if ($content =~ m{<!--beginarticle-->(.*)<!--endarticle-->}s) {
173 $body = $1;
174 }
175
176 return join "\n",
177 '<html>',
178 '<head>',
179 '<title>',
180 ($fields{subject} || '' ),
181 '</title>',
182 map( { qq[<meta name="$_" content="$fields{$_}">] } keys %fields ),
183 '</head><body>',
184 $body,
185 '</body>',
186 '</html>',
187 '';
188 }
189

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26