/[webpac]/trunk/openisis/tcl/mf
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/openisis/tcl/mf

Parent Directory Parent Directory | Revision Log Revision Log


Revision 239 - (show annotations)
Mon Mar 8 17:49:13 2004 UTC (17 years, 4 months ago) by dpavlin
File size: 8251 byte(s)
including openisis 0.9.0 into webpac tree

1 #!/opt/isis/tcl/bin/tcl
2 # vim:syntax=tcl
3
4 # mail fix
5 # split a message into parts
6 # create a new message from selected headers and textified parts
7 # keep original non-text parts in files
8 # qualify cleaned message by dbacl
9
10 # in mime.tcl, BE SURE to remove the comment from the line
11 # error "termination string missing in $state(content)"
12 # cf. BUG 631314 "infinite loop on bad data"
13 #
14 # unterminated multipart is very frequent in SPAM,
15 # be VERY rare in non-automated mail
16 # when not erroring out, mime.tcl may hang
17 package require mime;
18 namespace import ::mime::*;
19
20 proc mail2txt file { exec sed -e {1,/^$/d} <$file >$file.txt }
21 proc html2txt file { exec lynx -dump -force_html -nolog $file >$file.txt }
22 proc winl2txt file { # stupid M$ "Cp1252"
23 exec tr {\200-\237} {E?,f".+#^%S<Ö?Z??''''""*--"Ts>ö?zY} <$file \
24 | fmt -s >$file.txt
25 }
26 proc win2txt file {
27 exec tr {\200-\237} {E?,f".+#^%S<Ö?Z??''''""*--"Ts>ö?zY} <$file >$file.txt
28 }
29 proc long2txt file { exec fmt -s <$file >$file.txt }
30 proc txt2txt file { file rename $file $file.txt }
31 proc pdf2txt file { exec pdftotext -raw $file - >$file.txt }
32 proc rtf2txt file {
33 exec unrtf --nopict --html $file 2>/dev/null \
34 | lynx -dump -force_html -nolist /proc/self/fd/0 >$file.txt
35 }
36 proc doc2txt file { exec antiword $file >$file.txt }
37
38 # fix a part in file
39 # - figure out real file type
40 # - figure out an extension so that webserver will deliver with proper ctype
41 # - in any case, rename or convert file to that extension
42 # - for some "text" types, also create additional plain text version
43 proc fixpart {file {ctype ""}} {
44 # figure out file type
45 set ftype ""
46 # shouldn't harm to believe image
47 switch -glob -- $ctype \
48 image/*gif { set ftype "GIF image" } \
49 image/*jpeg { set ftype "JPEG image" } \
50 image/*png { set ftype "PNG image"
51 }
52 if {"" == $ftype} {
53 set ftype [exec file -b $file]
54 }
55 # figure out extension and fix from file type
56 set ext txt
57 set fix ""
58 set nofix 0
59 switch -glob -- $ftype \
60 "*mail text*" { set fix [set ext mail]2txt } \
61 *HTML* { # maybe ASCII or ISO-8859 HTML document text, so check before ASCII
62 set fix [set ext html]2txt } \
63 "*Non-ISO extended-ASCII*long lines" { # assume windows stuff
64 set fix winl2txt } \
65 "*Non-ISO extended-ASCII*" { set fix win2txt } \
66 "ASCII*text*long lines" - "ISO-8859*text*long lines" { set fix long2txt } \
67 ASCII*text* - ISO-8859*text* {} \
68 PDF* { set fix [set ext pdf]2txt } \
69 "Rich Text Format*" { set fix [set ext rtf]2txt } \
70 "Microsoft Office Document*" { set fix [set ext doc]2txt } \
71 "*GIF image*" { set ext gif } \
72 "*JPEG image*" { set ext jpg } \
73 "*PC bitmap*" { set ext bmp } \
74 "*PNG image*" { set ext png } \
75 "*TIFF image*" { set ext tiff } \
76 Zip* { set ext zip } \
77 gzip* { set ext gz } \
78 *image* { set ext img } \
79 default { set ext bin }
80 ::puts stderr "file $file; c=\"$ctype\"; f=\"$ftype;\" e=$ext; fix=$fix"
81 # if we don't have no fix,
82 # or can not successfully apply a fix, we stick with the original
83 if {"" == $fix || [catch {$fix $file}]} {
84 if {"" != $fix} {
85 ::puts stderr "FIXPART $file:\t$::errorInfo"
86 catch {file delete $file.txt}
87 }
88 file rename $file $file.$ext
89 return $ext
90 }
91 # if the original is represented by a non-destructive txt fix,
92 # delete it and keep only the fix
93 if {"txt" == $ext} {
94 file delete $file
95 return txt
96 }
97 # we have a successfull non-equivalence fix -- keep both
98 file rename $file $file.$ext
99 list txt $ext
100 } ;# fixpart
101
102
103 # operate on array env
104 # file => base filename
105 # attc => saved attachments (list of extensions and original ctypes)
106 # part => list of plaintext part tokens
107 proc saveparts {envname msg} {
108 upvar $envname env
109 if [catch {getproperty $msg content} ctype] {
110 set ctype "text/plain"
111 }
112 # multiparts recurse
113 if ![catch {getproperty $msg parts} parts] {
114 if {"multipart/alternative" == $ctype} { # select one
115 set select [lindex $parts 0]
116 foreach p $parts {
117 if {![catch {getproperty $msg ctype} pc] && "text/plain" == $pc } {
118 set select $p
119 break
120 }
121 }
122 set parts [list $select]
123 }
124 # TODO: also care for mp/apple-double
125 foreach p $parts {
126 saveparts env $p
127 }
128 return
129 }
130 # a leave is not plain or too long: save as $file.i, i=0,1..
131 set file $env(file).[expr [llength $env(attc)]/2]
132 set fid [open $file w]
133 fconfigure $fid -translation binary
134 puts -nonewline $fid [getbody $msg]
135 close $fid
136 # check and fix the file, get primary and secondary extension
137 foreach {ext sec} [fixpart $file $ctype] break
138 # optionally create new part
139 if {"txt" == $ext} { # create new text/plain part
140 set fid [open $file.txt]
141 fconfigure $fid -translation binary
142 set body [read $fid 8000]
143 close $fid
144 set complete [expr 8000 >= [file size $file.txt]]
145 set charset ISO-8859-1
146 if {"text/plain" == $ctype} {
147 foreach {key val} [getproperty $msg params] {
148 if {"charset" == $key} { set charset $val }
149 }
150 }
151 lappend env(part) [initialize -canonical text/plain \
152 -param [list charset $charset] \
153 -encoding quoted-printable -string $body]
154 if {$complete || "" != $sec} {
155 file delete $file.txt
156 }
157 if {"" != $sec} { # use that as attachment
158 set ext $sec
159 } elseif {$complete} { # no attachment
160 return
161 }
162 }
163 lappend env(attc) $ext $ctype
164 } ;# saveparts
165
166
167 proc fixmsg {file dir link} {
168 set msg [initialize -file $dir$file]
169 array set env [list file $dir$file part "" attc ""]
170 saveparts env $msg
171 ::puts stderr "$dir$file\t$env(attc)\t$env(part)"
172 if {"" != $env(attc)} { # create referring part
173 set i -1
174 if ![file isdirectory .attc] { file mkdir .attc }
175 set body ""
176 foreach {ext ctype} $env(attc) {
177 set f $file.[incr i].$ext
178 file rename $dir$f .attc
179 append body "$link$f\t$ctype\n"
180 }
181 lappend env(part) [initialize -canonical text/plain \
182 -param {charset US-ASCII} -encoding 7bit -string $body]
183 }
184 if {1 == [llength $env(part)]} {
185 set nmsg [lindex $env(part) 0]
186 } else {
187 set nmsg [initialize -canonical multipart/mixed -parts $env(part)]
188 }
189 foreach {key vallist} [getheader $msg] {
190 foreach val $vallist {
191 # ::puts stderr "$key $val"
192 switch -- [string tolower $key] \
193 subject - date - from - to - cc - delivered-to - \
194 message-id - references - in-reply-to {
195 setheader $nmsg $key $val -mode append
196 }
197 }
198 }
199 if ![file isdirectory .orig] { file mkdir .orig }
200 file rename $dir$file .orig
201 finalize $msg
202 set fid [open $dir$file w]
203 copymessage $nmsg $fid
204 close $fid
205 finalize $nmsg
206 } ;# fixmsg
207
208
209 proc main {argv} {
210 foreach {file dir base link} $argv break
211 if {"" != $base} {
212 cd $base
213 }
214 if {"" != $dir} {
215 set dir $dir/
216 } elseif {[file isdirectory tmp]} {
217 set dir tmp/
218 }
219 if {"-" == $file || "" == $file} {
220 set file [clock format [clock seconds] -format %Y%m%d%H%M%S -gmt 1]
221 set try 0
222 set base $file
223 while {[catch {open $dir$file {WRONLY CREAT EXCL}} fid]} {
224 if {99 == $try} {
225 error "too many tries on $dir$file"
226 }
227 set file $base-[incr try]
228 }
229 while {![eof stdin]} {
230 puts -nonewline $fid [read stdin 8192]
231 }
232 close $fid
233 }
234 set spam ""
235 if [catch {fixmsg $file $dir $link} err] {
236 # some errors have 99.99% SPAM probability
237 switch -glob -- $err \
238 "termination string missing in multipart*" - \
239 "multiple Content-Type fields*" {
240 set spam "FIXMSG $err"
241 }
242 ::puts stderr "FIXMSG $file:\t$::errorInfo"
243 }
244 if ![file isdirectory .spam] { file mkdir .spam }
245 if {"" == $spam} {
246 catch {exec dbacl -c .cat/ok -c .cat/spam $dir$file}
247 if {2 != [scan $::errorCode "CHILDSTATUS %d %d" pid code]} {
248 ::puts stderr "OOPS! could not scan '$::errorCode'"
249 } elseif {2 == $code} {
250 set spam "DBACL"
251 }
252 }
253 if {"" != $spam} {
254 ::puts stderr "FIXSPAM\t$file\t$spam"
255 file rename $dir$file .spam
256 # collect some headers even if message parsing failed
257 # for quick scan, ignore continuation lines
258 set headers "$file\t$spam\n"
259 set fid [open .spam/$file]
260 while {[gets $fid line]} {
261 if [regexp {^([\w\-]+):\s*(.*)} $line - key val] {
262 switch -- [string tolower $key] \
263 from - to - subject - delivered-to {
264 append headers $line "\n"
265 }
266 }
267 }
268 close $fid
269 set fid [open .spam/.log {WRONLY APPEND CREAT}]
270 puts $fid $headers ;# let puts append a newline
271 close $fid
272 return
273 }
274 if [file isdirectory new] {
275 file rename $dir$file new
276 }
277 }
278
279 main $argv

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26