/[webpac]/trunk/openisis/tcl/mf
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/openisis/tcl/mf

Parent Directory Parent Directory | Revision Log Revision Log


Revision 237 - (hide annotations)
Mon Mar 8 17:43:12 2004 UTC (20 years ago) by dpavlin
Original Path: openisis/current/tcl/mf
File size: 8251 byte(s)
initial import of openisis 0.9.0 vendor drop

1 dpavlin 237 #!/opt/isis/tcl/bin/tcl
2     # vim:syntax=tcl
3    
4     # mail fix
5     # split a message into parts
6     # create a new message from selected headers and textified parts
7     # keep original non-text parts in files
8     # qualify cleaned message by dbacl
9    
10     # in mime.tcl, BE SURE to remove the comment from the line
11     # error "termination string missing in $state(content)"
12     # cf. BUG 631314 "infinite loop on bad data"
13     #
14     # unterminated multipart is very frequent in SPAM,
15     # be VERY rare in non-automated mail
16     # when not erroring out, mime.tcl may hang
17     package require mime;
18     namespace import ::mime::*;
19    
20     proc mail2txt file { exec sed -e {1,/^$/d} <$file >$file.txt }
21     proc html2txt file { exec lynx -dump -force_html -nolog $file >$file.txt }
22     proc winl2txt file { # stupid M$ "Cp1252"
23     exec tr {\200-\237} {E?,f".+#^%S<Ö?Z??''''""*--"Ts>ö?zY} <$file \
24     | fmt -s >$file.txt
25     }
26     proc win2txt file {
27     exec tr {\200-\237} {E?,f".+#^%S<Ö?Z??''''""*--"Ts>ö?zY} <$file >$file.txt
28     }
29     proc long2txt file { exec fmt -s <$file >$file.txt }
30     proc txt2txt file { file rename $file $file.txt }
31     proc pdf2txt file { exec pdftotext -raw $file - >$file.txt }
32     proc rtf2txt file {
33     exec unrtf --nopict --html $file 2>/dev/null \
34     | lynx -dump -force_html -nolist /proc/self/fd/0 >$file.txt
35     }
36     proc doc2txt file { exec antiword $file >$file.txt }
37    
38     # fix a part in file
39     # - figure out real file type
40     # - figure out an extension so that webserver will deliver with proper ctype
41     # - in any case, rename or convert file to that extension
42     # - for some "text" types, also create additional plain text version
43     proc fixpart {file {ctype ""}} {
44     # figure out file type
45     set ftype ""
46     # shouldn't harm to believe image
47     switch -glob -- $ctype \
48     image/*gif { set ftype "GIF image" } \
49     image/*jpeg { set ftype "JPEG image" } \
50     image/*png { set ftype "PNG image"
51     }
52     if {"" == $ftype} {
53     set ftype [exec file -b $file]
54     }
55     # figure out extension and fix from file type
56     set ext txt
57     set fix ""
58     set nofix 0
59     switch -glob -- $ftype \
60     "*mail text*" { set fix [set ext mail]2txt } \
61     *HTML* { # maybe ASCII or ISO-8859 HTML document text, so check before ASCII
62     set fix [set ext html]2txt } \
63     "*Non-ISO extended-ASCII*long lines" { # assume windows stuff
64     set fix winl2txt } \
65     "*Non-ISO extended-ASCII*" { set fix win2txt } \
66     "ASCII*text*long lines" - "ISO-8859*text*long lines" { set fix long2txt } \
67     ASCII*text* - ISO-8859*text* {} \
68     PDF* { set fix [set ext pdf]2txt } \
69     "Rich Text Format*" { set fix [set ext rtf]2txt } \
70     "Microsoft Office Document*" { set fix [set ext doc]2txt } \
71     "*GIF image*" { set ext gif } \
72     "*JPEG image*" { set ext jpg } \
73     "*PC bitmap*" { set ext bmp } \
74     "*PNG image*" { set ext png } \
75     "*TIFF image*" { set ext tiff } \
76     Zip* { set ext zip } \
77     gzip* { set ext gz } \
78     *image* { set ext img } \
79     default { set ext bin }
80     ::puts stderr "file $file; c=\"$ctype\"; f=\"$ftype;\" e=$ext; fix=$fix"
81     # if we don't have no fix,
82     # or can not successfully apply a fix, we stick with the original
83     if {"" == $fix || [catch {$fix $file}]} {
84     if {"" != $fix} {
85     ::puts stderr "FIXPART $file:\t$::errorInfo"
86     catch {file delete $file.txt}
87     }
88     file rename $file $file.$ext
89     return $ext
90     }
91     # if the original is represented by a non-destructive txt fix,
92     # delete it and keep only the fix
93     if {"txt" == $ext} {
94     file delete $file
95     return txt
96     }
97     # we have a successfull non-equivalence fix -- keep both
98     file rename $file $file.$ext
99     list txt $ext
100     } ;# fixpart
101    
102    
103     # operate on array env
104     # file => base filename
105     # attc => saved attachments (list of extensions and original ctypes)
106     # part => list of plaintext part tokens
107     proc saveparts {envname msg} {
108     upvar $envname env
109     if [catch {getproperty $msg content} ctype] {
110     set ctype "text/plain"
111     }
112     # multiparts recurse
113     if ![catch {getproperty $msg parts} parts] {
114     if {"multipart/alternative" == $ctype} { # select one
115     set select [lindex $parts 0]
116     foreach p $parts {
117     if {![catch {getproperty $msg ctype} pc] && "text/plain" == $pc } {
118     set select $p
119     break
120     }
121     }
122     set parts [list $select]
123     }
124     # TODO: also care for mp/apple-double
125     foreach p $parts {
126     saveparts env $p
127     }
128     return
129     }
130     # a leave is not plain or too long: save as $file.i, i=0,1..
131     set file $env(file).[expr [llength $env(attc)]/2]
132     set fid [open $file w]
133     fconfigure $fid -translation binary
134     puts -nonewline $fid [getbody $msg]
135     close $fid
136     # check and fix the file, get primary and secondary extension
137     foreach {ext sec} [fixpart $file $ctype] break
138     # optionally create new part
139     if {"txt" == $ext} { # create new text/plain part
140     set fid [open $file.txt]
141     fconfigure $fid -translation binary
142     set body [read $fid 8000]
143     close $fid
144     set complete [expr 8000 >= [file size $file.txt]]
145     set charset ISO-8859-1
146     if {"text/plain" == $ctype} {
147     foreach {key val} [getproperty $msg params] {
148     if {"charset" == $key} { set charset $val }
149     }
150     }
151     lappend env(part) [initialize -canonical text/plain \
152     -param [list charset $charset] \
153     -encoding quoted-printable -string $body]
154     if {$complete || "" != $sec} {
155     file delete $file.txt
156     }
157     if {"" != $sec} { # use that as attachment
158     set ext $sec
159     } elseif {$complete} { # no attachment
160     return
161     }
162     }
163     lappend env(attc) $ext $ctype
164     } ;# saveparts
165    
166    
167     proc fixmsg {file dir link} {
168     set msg [initialize -file $dir$file]
169     array set env [list file $dir$file part "" attc ""]
170     saveparts env $msg
171     ::puts stderr "$dir$file\t$env(attc)\t$env(part)"
172     if {"" != $env(attc)} { # create referring part
173     set i -1
174     if ![file isdirectory .attc] { file mkdir .attc }
175     set body ""
176     foreach {ext ctype} $env(attc) {
177     set f $file.[incr i].$ext
178     file rename $dir$f .attc
179     append body "$link$f\t$ctype\n"
180     }
181     lappend env(part) [initialize -canonical text/plain \
182     -param {charset US-ASCII} -encoding 7bit -string $body]
183     }
184     if {1 == [llength $env(part)]} {
185     set nmsg [lindex $env(part) 0]
186     } else {
187     set nmsg [initialize -canonical multipart/mixed -parts $env(part)]
188     }
189     foreach {key vallist} [getheader $msg] {
190     foreach val $vallist {
191     # ::puts stderr "$key $val"
192     switch -- [string tolower $key] \
193     subject - date - from - to - cc - delivered-to - \
194     message-id - references - in-reply-to {
195     setheader $nmsg $key $val -mode append
196     }
197     }
198     }
199     if ![file isdirectory .orig] { file mkdir .orig }
200     file rename $dir$file .orig
201     finalize $msg
202     set fid [open $dir$file w]
203     copymessage $nmsg $fid
204     close $fid
205     finalize $nmsg
206     } ;# fixmsg
207    
208    
209     proc main {argv} {
210     foreach {file dir base link} $argv break
211     if {"" != $base} {
212     cd $base
213     }
214     if {"" != $dir} {
215     set dir $dir/
216     } elseif {[file isdirectory tmp]} {
217     set dir tmp/
218     }
219     if {"-" == $file || "" == $file} {
220     set file [clock format [clock seconds] -format %Y%m%d%H%M%S -gmt 1]
221     set try 0
222     set base $file
223     while {[catch {open $dir$file {WRONLY CREAT EXCL}} fid]} {
224     if {99 == $try} {
225     error "too many tries on $dir$file"
226     }
227     set file $base-[incr try]
228     }
229     while {![eof stdin]} {
230     puts -nonewline $fid [read stdin 8192]
231     }
232     close $fid
233     }
234     set spam ""
235     if [catch {fixmsg $file $dir $link} err] {
236     # some errors have 99.99% SPAM probability
237     switch -glob -- $err \
238     "termination string missing in multipart*" - \
239     "multiple Content-Type fields*" {
240     set spam "FIXMSG $err"
241     }
242     ::puts stderr "FIXMSG $file:\t$::errorInfo"
243     }
244     if ![file isdirectory .spam] { file mkdir .spam }
245     if {"" == $spam} {
246     catch {exec dbacl -c .cat/ok -c .cat/spam $dir$file}
247     if {2 != [scan $::errorCode "CHILDSTATUS %d %d" pid code]} {
248     ::puts stderr "OOPS! could not scan '$::errorCode'"
249     } elseif {2 == $code} {
250     set spam "DBACL"
251     }
252     }
253     if {"" != $spam} {
254     ::puts stderr "FIXSPAM\t$file\t$spam"
255     file rename $dir$file .spam
256     # collect some headers even if message parsing failed
257     # for quick scan, ignore continuation lines
258     set headers "$file\t$spam\n"
259     set fid [open .spam/$file]
260     while {[gets $fid line]} {
261     if [regexp {^([\w\-]+):\s*(.*)} $line - key val] {
262     switch -- [string tolower $key] \
263     from - to - subject - delivered-to {
264     append headers $line "\n"
265     }
266     }
267     }
268     close $fid
269     set fid [open .spam/.log {WRONLY APPEND CREAT}]
270     puts $fid $headers ;# let puts append a newline
271     close $fid
272     return
273     }
274     if [file isdirectory new] {
275     file rename $dir$file new
276     }
277     }
278    
279     main $argv

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26