1 |
#!/opt/isis/tcl/bin/tcl |
2 |
# vim:syntax=tcl |
3 |
|
4 |
# mail fix |
5 |
# split a message into parts |
6 |
# create a new message from selected headers and textified parts |
7 |
# keep original non-text parts in files |
8 |
# qualify cleaned message by dbacl |
9 |
|
10 |
# in mime.tcl, BE SURE to remove the comment from the line |
11 |
# error "termination string missing in $state(content)" |
12 |
# cf. BUG 631314 "infinite loop on bad data" |
13 |
# |
14 |
# unterminated multipart is very frequent in SPAM, |
15 |
# be VERY rare in non-automated mail |
16 |
# when not erroring out, mime.tcl may hang |
17 |
package require mime; |
18 |
namespace import ::mime::*; |
19 |
|
20 |
proc mail2txt file { exec sed -e {1,/^$/d} <$file >$file.txt } |
21 |
proc html2txt file { exec lynx -dump -force_html -nolog $file >$file.txt } |
22 |
proc winl2txt file { # stupid M$ "Cp1252" |
23 |
exec tr {\200-\237} {E?,f".+#^%S<Ö?Z??''''""*--"Ts>ö?zY} <$file \ |
24 |
| fmt -s >$file.txt |
25 |
} |
26 |
proc win2txt file { |
27 |
exec tr {\200-\237} {E?,f".+#^%S<Ö?Z??''''""*--"Ts>ö?zY} <$file >$file.txt |
28 |
} |
29 |
proc long2txt file { exec fmt -s <$file >$file.txt } |
30 |
proc txt2txt file { file rename $file $file.txt } |
31 |
proc pdf2txt file { exec pdftotext -raw $file - >$file.txt } |
32 |
proc rtf2txt file { |
33 |
exec unrtf --nopict --html $file 2>/dev/null \ |
34 |
| lynx -dump -force_html -nolist /proc/self/fd/0 >$file.txt |
35 |
} |
36 |
proc doc2txt file { exec antiword $file >$file.txt } |
37 |
|
38 |
# fix a part in file |
39 |
# - figure out real file type |
40 |
# - figure out an extension so that webserver will deliver with proper ctype |
41 |
# - in any case, rename or convert file to that extension |
42 |
# - for some "text" types, also create additional plain text version |
43 |
proc fixpart {file {ctype ""}} { |
44 |
# figure out file type |
45 |
set ftype "" |
46 |
# shouldn't harm to believe image |
47 |
switch -glob -- $ctype \ |
48 |
image/*gif { set ftype "GIF image" } \ |
49 |
image/*jpeg { set ftype "JPEG image" } \ |
50 |
image/*png { set ftype "PNG image" |
51 |
} |
52 |
if {"" == $ftype} { |
53 |
set ftype [exec file -b $file] |
54 |
} |
55 |
# figure out extension and fix from file type |
56 |
set ext txt |
57 |
set fix "" |
58 |
set nofix 0 |
59 |
switch -glob -- $ftype \ |
60 |
"*mail text*" { set fix [set ext mail]2txt } \ |
61 |
*HTML* { # maybe ASCII or ISO-8859 HTML document text, so check before ASCII |
62 |
set fix [set ext html]2txt } \ |
63 |
"*Non-ISO extended-ASCII*long lines" { # assume windows stuff |
64 |
set fix winl2txt } \ |
65 |
"*Non-ISO extended-ASCII*" { set fix win2txt } \ |
66 |
"ASCII*text*long lines" - "ISO-8859*text*long lines" { set fix long2txt } \ |
67 |
ASCII*text* - ISO-8859*text* {} \ |
68 |
PDF* { set fix [set ext pdf]2txt } \ |
69 |
"Rich Text Format*" { set fix [set ext rtf]2txt } \ |
70 |
"Microsoft Office Document*" { set fix [set ext doc]2txt } \ |
71 |
"*GIF image*" { set ext gif } \ |
72 |
"*JPEG image*" { set ext jpg } \ |
73 |
"*PC bitmap*" { set ext bmp } \ |
74 |
"*PNG image*" { set ext png } \ |
75 |
"*TIFF image*" { set ext tiff } \ |
76 |
Zip* { set ext zip } \ |
77 |
gzip* { set ext gz } \ |
78 |
*image* { set ext img } \ |
79 |
default { set ext bin } |
80 |
::puts stderr "file $file; c=\"$ctype\"; f=\"$ftype;\" e=$ext; fix=$fix" |
81 |
# if we don't have no fix, |
82 |
# or can not successfully apply a fix, we stick with the original |
83 |
if {"" == $fix || [catch {$fix $file}]} { |
84 |
if {"" != $fix} { |
85 |
::puts stderr "FIXPART $file:\t$::errorInfo" |
86 |
catch {file delete $file.txt} |
87 |
} |
88 |
file rename $file $file.$ext |
89 |
return $ext |
90 |
} |
91 |
# if the original is represented by a non-destructive txt fix, |
92 |
# delete it and keep only the fix |
93 |
if {"txt" == $ext} { |
94 |
file delete $file |
95 |
return txt |
96 |
} |
97 |
# we have a successfull non-equivalence fix -- keep both |
98 |
file rename $file $file.$ext |
99 |
list txt $ext |
100 |
} ;# fixpart |
101 |
|
102 |
|
103 |
# operate on array env |
104 |
# file => base filename |
105 |
# attc => saved attachments (list of extensions and original ctypes) |
106 |
# part => list of plaintext part tokens |
107 |
proc saveparts {envname msg} { |
108 |
upvar $envname env |
109 |
if [catch {getproperty $msg content} ctype] { |
110 |
set ctype "text/plain" |
111 |
} |
112 |
# multiparts recurse |
113 |
if ![catch {getproperty $msg parts} parts] { |
114 |
if {"multipart/alternative" == $ctype} { # select one |
115 |
set select [lindex $parts 0] |
116 |
foreach p $parts { |
117 |
if {![catch {getproperty $msg ctype} pc] && "text/plain" == $pc } { |
118 |
set select $p |
119 |
break |
120 |
} |
121 |
} |
122 |
set parts [list $select] |
123 |
} |
124 |
# TODO: also care for mp/apple-double |
125 |
foreach p $parts { |
126 |
saveparts env $p |
127 |
} |
128 |
return |
129 |
} |
130 |
# a leave is not plain or too long: save as $file.i, i=0,1.. |
131 |
set file $env(file).[expr [llength $env(attc)]/2] |
132 |
set fid [open $file w] |
133 |
fconfigure $fid -translation binary |
134 |
puts -nonewline $fid [getbody $msg] |
135 |
close $fid |
136 |
# check and fix the file, get primary and secondary extension |
137 |
foreach {ext sec} [fixpart $file $ctype] break |
138 |
# optionally create new part |
139 |
if {"txt" == $ext} { # create new text/plain part |
140 |
set fid [open $file.txt] |
141 |
fconfigure $fid -translation binary |
142 |
set body [read $fid 8000] |
143 |
close $fid |
144 |
set complete [expr 8000 >= [file size $file.txt]] |
145 |
set charset ISO-8859-1 |
146 |
if {"text/plain" == $ctype} { |
147 |
foreach {key val} [getproperty $msg params] { |
148 |
if {"charset" == $key} { set charset $val } |
149 |
} |
150 |
} |
151 |
lappend env(part) [initialize -canonical text/plain \ |
152 |
-param [list charset $charset] \ |
153 |
-encoding quoted-printable -string $body] |
154 |
if {$complete || "" != $sec} { |
155 |
file delete $file.txt |
156 |
} |
157 |
if {"" != $sec} { # use that as attachment |
158 |
set ext $sec |
159 |
} elseif {$complete} { # no attachment |
160 |
return |
161 |
} |
162 |
} |
163 |
lappend env(attc) $ext $ctype |
164 |
} ;# saveparts |
165 |
|
166 |
|
167 |
proc fixmsg {file dir link} { |
168 |
set msg [initialize -file $dir$file] |
169 |
array set env [list file $dir$file part "" attc ""] |
170 |
saveparts env $msg |
171 |
::puts stderr "$dir$file\t$env(attc)\t$env(part)" |
172 |
if {"" != $env(attc)} { # create referring part |
173 |
set i -1 |
174 |
if ![file isdirectory .attc] { file mkdir .attc } |
175 |
set body "" |
176 |
foreach {ext ctype} $env(attc) { |
177 |
set f $file.[incr i].$ext |
178 |
file rename $dir$f .attc |
179 |
append body "$link$f\t$ctype\n" |
180 |
} |
181 |
lappend env(part) [initialize -canonical text/plain \ |
182 |
-param {charset US-ASCII} -encoding 7bit -string $body] |
183 |
} |
184 |
if {1 == [llength $env(part)]} { |
185 |
set nmsg [lindex $env(part) 0] |
186 |
} else { |
187 |
set nmsg [initialize -canonical multipart/mixed -parts $env(part)] |
188 |
} |
189 |
foreach {key vallist} [getheader $msg] { |
190 |
foreach val $vallist { |
191 |
# ::puts stderr "$key $val" |
192 |
switch -- [string tolower $key] \ |
193 |
subject - date - from - to - cc - delivered-to - \ |
194 |
message-id - references - in-reply-to { |
195 |
setheader $nmsg $key $val -mode append |
196 |
} |
197 |
} |
198 |
} |
199 |
if ![file isdirectory .orig] { file mkdir .orig } |
200 |
file rename $dir$file .orig |
201 |
finalize $msg |
202 |
set fid [open $dir$file w] |
203 |
copymessage $nmsg $fid |
204 |
close $fid |
205 |
finalize $nmsg |
206 |
} ;# fixmsg |
207 |
|
208 |
|
209 |
proc main {argv} { |
210 |
foreach {file dir base link} $argv break |
211 |
if {"" != $base} { |
212 |
cd $base |
213 |
} |
214 |
if {"" != $dir} { |
215 |
set dir $dir/ |
216 |
} elseif {[file isdirectory tmp]} { |
217 |
set dir tmp/ |
218 |
} |
219 |
if {"-" == $file || "" == $file} { |
220 |
set file [clock format [clock seconds] -format %Y%m%d%H%M%S -gmt 1] |
221 |
set try 0 |
222 |
set base $file |
223 |
while {[catch {open $dir$file {WRONLY CREAT EXCL}} fid]} { |
224 |
if {99 == $try} { |
225 |
error "too many tries on $dir$file" |
226 |
} |
227 |
set file $base-[incr try] |
228 |
} |
229 |
while {![eof stdin]} { |
230 |
puts -nonewline $fid [read stdin 8192] |
231 |
} |
232 |
close $fid |
233 |
} |
234 |
set spam "" |
235 |
if [catch {fixmsg $file $dir $link} err] { |
236 |
# some errors have 99.99% SPAM probability |
237 |
switch -glob -- $err \ |
238 |
"termination string missing in multipart*" - \ |
239 |
"multiple Content-Type fields*" { |
240 |
set spam "FIXMSG $err" |
241 |
} |
242 |
::puts stderr "FIXMSG $file:\t$::errorInfo" |
243 |
} |
244 |
if ![file isdirectory .spam] { file mkdir .spam } |
245 |
if {"" == $spam} { |
246 |
catch {exec dbacl -c .cat/ok -c .cat/spam $dir$file} |
247 |
if {2 != [scan $::errorCode "CHILDSTATUS %d %d" pid code]} { |
248 |
::puts stderr "OOPS! could not scan '$::errorCode'" |
249 |
} elseif {2 == $code} { |
250 |
set spam "DBACL" |
251 |
} |
252 |
} |
253 |
if {"" != $spam} { |
254 |
::puts stderr "FIXSPAM\t$file\t$spam" |
255 |
file rename $dir$file .spam |
256 |
# collect some headers even if message parsing failed |
257 |
# for quick scan, ignore continuation lines |
258 |
set headers "$file\t$spam\n" |
259 |
set fid [open .spam/$file] |
260 |
while {[gets $fid line]} { |
261 |
if [regexp {^([\w\-]+):\s*(.*)} $line - key val] { |
262 |
switch -- [string tolower $key] \ |
263 |
from - to - subject - delivered-to { |
264 |
append headers $line "\n" |
265 |
} |
266 |
} |
267 |
} |
268 |
close $fid |
269 |
set fid [open .spam/.log {WRONLY APPEND CREAT}] |
270 |
puts $fid $headers ;# let puts append a newline |
271 |
close $fid |
272 |
return |
273 |
} |
274 |
if [file isdirectory new] { |
275 |
file rename $dir$file new |
276 |
} |
277 |
} |
278 |
|
279 |
main $argv |