1 |
dpavlin |
237 |
#!/opt/isis/tcl/bin/tcl |
2 |
|
|
# vim:syntax=tcl |
3 |
|
|
|
4 |
|
|
# mail fix |
5 |
|
|
# split a message into parts |
6 |
|
|
# create a new message from selected headers and textified parts |
7 |
|
|
# keep original non-text parts in files |
8 |
|
|
# qualify cleaned message by dbacl |
9 |
|
|
|
10 |
|
|
# in mime.tcl, BE SURE to remove the comment from the line |
11 |
|
|
# error "termination string missing in $state(content)" |
12 |
|
|
# cf. BUG 631314 "infinite loop on bad data" |
13 |
|
|
# |
14 |
|
|
# unterminated multipart is very frequent in SPAM, |
15 |
|
|
# be VERY rare in non-automated mail |
16 |
|
|
# when not erroring out, mime.tcl may hang |
17 |
|
|
package require mime; |
18 |
|
|
namespace import ::mime::*; |
19 |
|
|
|
20 |
|
|
proc mail2txt file { exec sed -e {1,/^$/d} <$file >$file.txt } |
21 |
|
|
proc html2txt file { exec lynx -dump -force_html -nolog $file >$file.txt } |
22 |
|
|
proc winl2txt file { # stupid M$ "Cp1252" |
23 |
|
|
exec tr {\200-\237} {E?,f".+#^%S<Ö?Z??''''""*--"Ts>ö?zY} <$file \ |
24 |
|
|
| fmt -s >$file.txt |
25 |
|
|
} |
26 |
|
|
proc win2txt file { |
27 |
|
|
exec tr {\200-\237} {E?,f".+#^%S<Ö?Z??''''""*--"Ts>ö?zY} <$file >$file.txt |
28 |
|
|
} |
29 |
|
|
proc long2txt file { exec fmt -s <$file >$file.txt } |
30 |
|
|
proc txt2txt file { file rename $file $file.txt } |
31 |
|
|
proc pdf2txt file { exec pdftotext -raw $file - >$file.txt } |
32 |
|
|
proc rtf2txt file { |
33 |
|
|
exec unrtf --nopict --html $file 2>/dev/null \ |
34 |
|
|
| lynx -dump -force_html -nolist /proc/self/fd/0 >$file.txt |
35 |
|
|
} |
36 |
|
|
proc doc2txt file { exec antiword $file >$file.txt } |
37 |
|
|
|
38 |
|
|
# fix a part in file |
39 |
|
|
# - figure out real file type |
40 |
|
|
# - figure out an extension so that webserver will deliver with proper ctype |
41 |
|
|
# - in any case, rename or convert file to that extension |
42 |
|
|
# - for some "text" types, also create additional plain text version |
43 |
|
|
proc fixpart {file {ctype ""}} { |
44 |
|
|
# figure out file type |
45 |
|
|
set ftype "" |
46 |
|
|
# shouldn't harm to believe image |
47 |
|
|
switch -glob -- $ctype \ |
48 |
|
|
image/*gif { set ftype "GIF image" } \ |
49 |
|
|
image/*jpeg { set ftype "JPEG image" } \ |
50 |
|
|
image/*png { set ftype "PNG image" |
51 |
|
|
} |
52 |
|
|
if {"" == $ftype} { |
53 |
|
|
set ftype [exec file -b $file] |
54 |
|
|
} |
55 |
|
|
# figure out extension and fix from file type |
56 |
|
|
set ext txt |
57 |
|
|
set fix "" |
58 |
|
|
set nofix 0 |
59 |
|
|
switch -glob -- $ftype \ |
60 |
|
|
"*mail text*" { set fix [set ext mail]2txt } \ |
61 |
|
|
*HTML* { # maybe ASCII or ISO-8859 HTML document text, so check before ASCII |
62 |
|
|
set fix [set ext html]2txt } \ |
63 |
|
|
"*Non-ISO extended-ASCII*long lines" { # assume windows stuff |
64 |
|
|
set fix winl2txt } \ |
65 |
|
|
"*Non-ISO extended-ASCII*" { set fix win2txt } \ |
66 |
|
|
"ASCII*text*long lines" - "ISO-8859*text*long lines" { set fix long2txt } \ |
67 |
|
|
ASCII*text* - ISO-8859*text* {} \ |
68 |
|
|
PDF* { set fix [set ext pdf]2txt } \ |
69 |
|
|
"Rich Text Format*" { set fix [set ext rtf]2txt } \ |
70 |
|
|
"Microsoft Office Document*" { set fix [set ext doc]2txt } \ |
71 |
|
|
"*GIF image*" { set ext gif } \ |
72 |
|
|
"*JPEG image*" { set ext jpg } \ |
73 |
|
|
"*PC bitmap*" { set ext bmp } \ |
74 |
|
|
"*PNG image*" { set ext png } \ |
75 |
|
|
"*TIFF image*" { set ext tiff } \ |
76 |
|
|
Zip* { set ext zip } \ |
77 |
|
|
gzip* { set ext gz } \ |
78 |
|
|
*image* { set ext img } \ |
79 |
|
|
default { set ext bin } |
80 |
|
|
::puts stderr "file $file; c=\"$ctype\"; f=\"$ftype;\" e=$ext; fix=$fix" |
81 |
|
|
# if we don't have no fix, |
82 |
|
|
# or can not successfully apply a fix, we stick with the original |
83 |
|
|
if {"" == $fix || [catch {$fix $file}]} { |
84 |
|
|
if {"" != $fix} { |
85 |
|
|
::puts stderr "FIXPART $file:\t$::errorInfo" |
86 |
|
|
catch {file delete $file.txt} |
87 |
|
|
} |
88 |
|
|
file rename $file $file.$ext |
89 |
|
|
return $ext |
90 |
|
|
} |
91 |
|
|
# if the original is represented by a non-destructive txt fix, |
92 |
|
|
# delete it and keep only the fix |
93 |
|
|
if {"txt" == $ext} { |
94 |
|
|
file delete $file |
95 |
|
|
return txt |
96 |
|
|
} |
97 |
|
|
# we have a successfull non-equivalence fix -- keep both |
98 |
|
|
file rename $file $file.$ext |
99 |
|
|
list txt $ext |
100 |
|
|
} ;# fixpart |
101 |
|
|
|
102 |
|
|
|
103 |
|
|
# operate on array env |
104 |
|
|
# file => base filename |
105 |
|
|
# attc => saved attachments (list of extensions and original ctypes) |
106 |
|
|
# part => list of plaintext part tokens |
107 |
|
|
proc saveparts {envname msg} { |
108 |
|
|
upvar $envname env |
109 |
|
|
if [catch {getproperty $msg content} ctype] { |
110 |
|
|
set ctype "text/plain" |
111 |
|
|
} |
112 |
|
|
# multiparts recurse |
113 |
|
|
if ![catch {getproperty $msg parts} parts] { |
114 |
|
|
if {"multipart/alternative" == $ctype} { # select one |
115 |
|
|
set select [lindex $parts 0] |
116 |
|
|
foreach p $parts { |
117 |
|
|
if {![catch {getproperty $msg ctype} pc] && "text/plain" == $pc } { |
118 |
|
|
set select $p |
119 |
|
|
break |
120 |
|
|
} |
121 |
|
|
} |
122 |
|
|
set parts [list $select] |
123 |
|
|
} |
124 |
|
|
# TODO: also care for mp/apple-double |
125 |
|
|
foreach p $parts { |
126 |
|
|
saveparts env $p |
127 |
|
|
} |
128 |
|
|
return |
129 |
|
|
} |
130 |
|
|
# a leave is not plain or too long: save as $file.i, i=0,1.. |
131 |
|
|
set file $env(file).[expr [llength $env(attc)]/2] |
132 |
|
|
set fid [open $file w] |
133 |
|
|
fconfigure $fid -translation binary |
134 |
|
|
puts -nonewline $fid [getbody $msg] |
135 |
|
|
close $fid |
136 |
|
|
# check and fix the file, get primary and secondary extension |
137 |
|
|
foreach {ext sec} [fixpart $file $ctype] break |
138 |
|
|
# optionally create new part |
139 |
|
|
if {"txt" == $ext} { # create new text/plain part |
140 |
|
|
set fid [open $file.txt] |
141 |
|
|
fconfigure $fid -translation binary |
142 |
|
|
set body [read $fid 8000] |
143 |
|
|
close $fid |
144 |
|
|
set complete [expr 8000 >= [file size $file.txt]] |
145 |
|
|
set charset ISO-8859-1 |
146 |
|
|
if {"text/plain" == $ctype} { |
147 |
|
|
foreach {key val} [getproperty $msg params] { |
148 |
|
|
if {"charset" == $key} { set charset $val } |
149 |
|
|
} |
150 |
|
|
} |
151 |
|
|
lappend env(part) [initialize -canonical text/plain \ |
152 |
|
|
-param [list charset $charset] \ |
153 |
|
|
-encoding quoted-printable -string $body] |
154 |
|
|
if {$complete || "" != $sec} { |
155 |
|
|
file delete $file.txt |
156 |
|
|
} |
157 |
|
|
if {"" != $sec} { # use that as attachment |
158 |
|
|
set ext $sec |
159 |
|
|
} elseif {$complete} { # no attachment |
160 |
|
|
return |
161 |
|
|
} |
162 |
|
|
} |
163 |
|
|
lappend env(attc) $ext $ctype |
164 |
|
|
} ;# saveparts |
165 |
|
|
|
166 |
|
|
|
167 |
|
|
proc fixmsg {file dir link} { |
168 |
|
|
set msg [initialize -file $dir$file] |
169 |
|
|
array set env [list file $dir$file part "" attc ""] |
170 |
|
|
saveparts env $msg |
171 |
|
|
::puts stderr "$dir$file\t$env(attc)\t$env(part)" |
172 |
|
|
if {"" != $env(attc)} { # create referring part |
173 |
|
|
set i -1 |
174 |
|
|
if ![file isdirectory .attc] { file mkdir .attc } |
175 |
|
|
set body "" |
176 |
|
|
foreach {ext ctype} $env(attc) { |
177 |
|
|
set f $file.[incr i].$ext |
178 |
|
|
file rename $dir$f .attc |
179 |
|
|
append body "$link$f\t$ctype\n" |
180 |
|
|
} |
181 |
|
|
lappend env(part) [initialize -canonical text/plain \ |
182 |
|
|
-param {charset US-ASCII} -encoding 7bit -string $body] |
183 |
|
|
} |
184 |
|
|
if {1 == [llength $env(part)]} { |
185 |
|
|
set nmsg [lindex $env(part) 0] |
186 |
|
|
} else { |
187 |
|
|
set nmsg [initialize -canonical multipart/mixed -parts $env(part)] |
188 |
|
|
} |
189 |
|
|
foreach {key vallist} [getheader $msg] { |
190 |
|
|
foreach val $vallist { |
191 |
|
|
# ::puts stderr "$key $val" |
192 |
|
|
switch -- [string tolower $key] \ |
193 |
|
|
subject - date - from - to - cc - delivered-to - \ |
194 |
|
|
message-id - references - in-reply-to { |
195 |
|
|
setheader $nmsg $key $val -mode append |
196 |
|
|
} |
197 |
|
|
} |
198 |
|
|
} |
199 |
|
|
if ![file isdirectory .orig] { file mkdir .orig } |
200 |
|
|
file rename $dir$file .orig |
201 |
|
|
finalize $msg |
202 |
|
|
set fid [open $dir$file w] |
203 |
|
|
copymessage $nmsg $fid |
204 |
|
|
close $fid |
205 |
|
|
finalize $nmsg |
206 |
|
|
} ;# fixmsg |
207 |
|
|
|
208 |
|
|
|
209 |
|
|
proc main {argv} { |
210 |
|
|
foreach {file dir base link} $argv break |
211 |
|
|
if {"" != $base} { |
212 |
|
|
cd $base |
213 |
|
|
} |
214 |
|
|
if {"" != $dir} { |
215 |
|
|
set dir $dir/ |
216 |
|
|
} elseif {[file isdirectory tmp]} { |
217 |
|
|
set dir tmp/ |
218 |
|
|
} |
219 |
|
|
if {"-" == $file || "" == $file} { |
220 |
|
|
set file [clock format [clock seconds] -format %Y%m%d%H%M%S -gmt 1] |
221 |
|
|
set try 0 |
222 |
|
|
set base $file |
223 |
|
|
while {[catch {open $dir$file {WRONLY CREAT EXCL}} fid]} { |
224 |
|
|
if {99 == $try} { |
225 |
|
|
error "too many tries on $dir$file" |
226 |
|
|
} |
227 |
|
|
set file $base-[incr try] |
228 |
|
|
} |
229 |
|
|
while {![eof stdin]} { |
230 |
|
|
puts -nonewline $fid [read stdin 8192] |
231 |
|
|
} |
232 |
|
|
close $fid |
233 |
|
|
} |
234 |
|
|
set spam "" |
235 |
|
|
if [catch {fixmsg $file $dir $link} err] { |
236 |
|
|
# some errors have 99.99% SPAM probability |
237 |
|
|
switch -glob -- $err \ |
238 |
|
|
"termination string missing in multipart*" - \ |
239 |
|
|
"multiple Content-Type fields*" { |
240 |
|
|
set spam "FIXMSG $err" |
241 |
|
|
} |
242 |
|
|
::puts stderr "FIXMSG $file:\t$::errorInfo" |
243 |
|
|
} |
244 |
|
|
if ![file isdirectory .spam] { file mkdir .spam } |
245 |
|
|
if {"" == $spam} { |
246 |
|
|
catch {exec dbacl -c .cat/ok -c .cat/spam $dir$file} |
247 |
|
|
if {2 != [scan $::errorCode "CHILDSTATUS %d %d" pid code]} { |
248 |
|
|
::puts stderr "OOPS! could not scan '$::errorCode'" |
249 |
|
|
} elseif {2 == $code} { |
250 |
|
|
set spam "DBACL" |
251 |
|
|
} |
252 |
|
|
} |
253 |
|
|
if {"" != $spam} { |
254 |
|
|
::puts stderr "FIXSPAM\t$file\t$spam" |
255 |
|
|
file rename $dir$file .spam |
256 |
|
|
# collect some headers even if message parsing failed |
257 |
|
|
# for quick scan, ignore continuation lines |
258 |
|
|
set headers "$file\t$spam\n" |
259 |
|
|
set fid [open .spam/$file] |
260 |
|
|
while {[gets $fid line]} { |
261 |
|
|
if [regexp {^([\w\-]+):\s*(.*)} $line - key val] { |
262 |
|
|
switch -- [string tolower $key] \ |
263 |
|
|
from - to - subject - delivered-to { |
264 |
|
|
append headers $line "\n" |
265 |
|
|
} |
266 |
|
|
} |
267 |
|
|
} |
268 |
|
|
close $fid |
269 |
|
|
set fid [open .spam/.log {WRONLY APPEND CREAT}] |
270 |
|
|
puts $fid $headers ;# let puts append a newline |
271 |
|
|
close $fid |
272 |
|
|
return |
273 |
|
|
} |
274 |
|
|
if [file isdirectory new] { |
275 |
|
|
file rename $dir$file new |
276 |
|
|
} |
277 |
|
|
} |
278 |
|
|
|
279 |
|
|
main $argv |