1 |
#!/usr/bin/perl |
2 |
|
3 |
# perl/encoding.pl </usr/lib/tcl8.3/encoding/cp850.enc |
4 |
# for x in /usr/lib/tcl8.3/encoding/*; do perl/encoding.pl <$x; done >encodings |
5 |
# |
6 |
# read a Tcl encoding file (see man Tcl_GetEncoding) |
7 |
# spit out a perl translator |
8 |
|
9 |
$enc = 'unknown'; |
10 |
$_ = <STDIN>; |
11 |
if ( /^#/ ) { |
12 |
($enc) = /: ([^,]+),/; |
13 |
$_ = <STDIN>; |
14 |
} |
15 |
/^S/ || die "not a single byte encoding: got '$_'"; |
16 |
$_ = <STDIN>; |
17 |
($fallback,$symbol,$pages) = split /\s+/; |
18 |
1 == $pages || die "not a single byte encoding: got $fallback,$symbol,$pages"; |
19 |
$fallback = hex $fallback; |
20 |
$fbchr = 31<$fallback && $fallback<127 |
21 |
? chr($fallback) : sprintf('\x%02x', $fallback); |
22 |
print STDERR "fallback character is $fallback ($fbchr)\n"; |
23 |
|
24 |
# read page |
25 |
$_ = <STDIN>; |
26 |
hex && die "base is not 00 but '$_'"; |
27 |
for $n (0..7) { # check for ASCII |
28 |
@v = unpack( 'n*', pack( 'H64', scalar(<STDIN>) ) ); |
29 |
15 == $#v || die "length is ".$#v; |
30 |
# print STDERR join(',',@v), "\n"; |
31 |
for (0..15) { |
32 |
$v[$_] == 16*$n + $_ || die "bad val $v[$_] at row $n pos $_"; |
33 |
} |
34 |
} |
35 |
$#rv = 127; |
36 |
for (0..127) { $rv[$_] = $fbchr; } |
37 |
for $n (8..15) { |
38 |
@v = unpack( 'n*', pack( 'H64', scalar(<STDIN>) ) ); |
39 |
15 == $#v || die "length is ".$#v; |
40 |
print STDERR join(',',@v), "\n"; |
41 |
for (0..15) { |
42 |
if ( 255 < $v[$_] ) { |
43 |
$tr .= '?'; |
44 |
} else { |
45 |
$tr .= sprintf('\x%02x',$v[$_]); |
46 |
$rv[$v[$_]-128] = sprintf('\x%02x',16*$n + $_) if 127 < $v[$_]; |
47 |
} |
48 |
} |
49 |
} |
50 |
print "# convert $enc to Latin1\n"; |
51 |
print 'y/\x80-\xff/',$tr,"/;\n"; |
52 |
$rv = join('',@rv); |
53 |
print "# convert Latin1 to $enc\n"; |
54 |
print 'y/\x80-\xff/',$rv,"/;\n"; |
55 |
|
56 |
# Latin1 roundtrip test |
57 |
$_ = $test = pack( 'C*', (160..255) ); |
58 |
eval 'y/\x80-\xff/'.$rv.'/'; |
59 |
print STDERR "Latin1 160..255 in encoding $enc\n"; |
60 |
for $n (0..5) { |
61 |
print STDERR 160+16*$n,': ',join(',',unpack('C*',substr($_,16*$n,16))), "\n"; |
62 |
} |
63 |
print STDERR 'test: ',$test,"\n"; |
64 |
eval 'y/\x80-\xff/'.$tr.'/'; |
65 |
print STDERR "trip: ",$_,"\n"; |
66 |
print STDERR "Latin1 160..255 after roundtrip through $enc\n"; |
67 |
for $n (0..5) { |
68 |
print STDERR 160+16*$n,': ',join(',',unpack('C*',substr($_,16*$n,16))), "\n"; |
69 |
} |