1 |
/* |
2 |
openisis - an open implementation of the CDS/ISIS database |
3 |
Version 0.8.x (patchlevel see file Version) |
4 |
Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org |
5 |
|
6 |
This library is free software; you can redistribute it and/or |
7 |
modify it under the terms of the GNU Lesser General Public |
8 |
License as published by the Free Software Foundation; either |
9 |
version 2.1 of the License, or (at your option) any later version. |
10 |
|
11 |
This library is distributed in the hope that it will be useful, |
12 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
Lesser General Public License for more details. |
15 |
|
16 |
You should have received a copy of the GNU Lesser General Public |
17 |
License along with this library; if not, write to the Free Software |
18 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
|
20 |
see README for more information |
21 |
EOH */ |
22 |
|
23 |
package org.openisis; |
24 |
|
25 |
import java.io.*; |
26 |
import java.util.Arrays; |
27 |
|
28 |
/** |
29 |
Charset tests and utils for openisis java binding. |
30 |
<p> |
31 |
$Id: Charset.java,v 1.3 2003/04/08 00:20:53 kripke Exp $ |
32 |
@version $Revision: 1.3 $ |
33 |
@author $Author: kripke $ |
34 |
*/ |
35 |
public class Charset { |
36 |
|
37 |
static final String[] encodings = { |
38 |
"ISO-8859-5", // cyrillic |
39 |
"ISO-8859-7", // greek |
40 |
"ISO-8859-9", // turkish |
41 |
}; |
42 |
|
43 |
// bytes 160(=-96) to 255(=-1) |
44 |
static final byte[] isohigh = new byte[96]; |
45 |
static { |
46 |
for ( int b=0; b<96; b++ ) |
47 |
isohigh[b] = (byte)(b-96); |
48 |
} |
49 |
|
50 |
static void utf8seq () |
51 |
throws IOException |
52 |
{ |
53 |
Writer w = new OutputStreamWriter( System.out, "UTF-8" ); |
54 |
for ( int i=0; i<encodings.length; i++ ) { |
55 |
String e = encodings[i]; |
56 |
w.write( e+"|"+(new String(isohigh,e))+"\n" ); |
57 |
} |
58 |
w.flush(); |
59 |
} |
60 |
|
61 |
|
62 |
static void chartest () |
63 |
throws IOException |
64 |
{ |
65 |
String[] args = new String[] { "-v", "error", "-encoding", "UTF-8", |
66 |
"-format", "aligned" }; |
67 |
Db db = Db.open( "db/charset/charset", args ); |
68 |
for ( int i=0; i<encodings.length; i++ ) { |
69 |
String e = encodings[i]; |
70 |
int[] n = db.search( e, 0 ); |
71 |
if ( null == n || 1 != n.length ) { |
72 |
System.err.println( "bad res "+n+" for encoding "+e ); |
73 |
continue; |
74 |
} |
75 |
Rec r = db.readRow( n[0] ); |
76 |
if ( null == r ) { |
77 |
System.err.println( "could not read mfn "+n[0]+" for encoding "+e ); |
78 |
continue; |
79 |
} |
80 |
if ( 2 > r.getLen() || null == r.getField(0) || null == r.getField(1) ) { |
81 |
System.err.println( "bad fields on mfn "+n[0]+" for encoding "+e ); |
82 |
continue; |
83 |
} |
84 |
Field f = r.getField(0); |
85 |
if ( 1 != f.tag || !e.equals( f.val ) ) { |
86 |
System.err.println( "bad field 0 "+f.tag+"="+f.val |
87 |
+" on mfn "+n[0]+" for encoding "+e ); |
88 |
continue; |
89 |
} |
90 |
f = r.getField(1); |
91 |
if ( 2 != f.tag ) { |
92 |
System.err.println( "bad field 1 "+f.tag+"="+f.val |
93 |
+" on mfn "+n[0]+" for encoding "+e ); |
94 |
continue; |
95 |
} |
96 |
if ( ! f.val.equals( new String(isohigh,e) ) ) { |
97 |
System.err.println( "UTF-8 mismatch on mfn "+n[0]+" for encoding "+e ); |
98 |
continue; |
99 |
} |
100 |
byte[] b = f.val.getBytes(e); |
101 |
if ( ! Arrays.equals( b, isohigh ) ) // ok for some encodings |
102 |
System.err.println( "note: no round-trip for encoding "+e ); |
103 |
} |
104 |
} |
105 |
|
106 |
|
107 |
public final String enc; |
108 |
|
109 |
public Charset ( String encoding ) { enc = encoding; } |
110 |
|
111 |
public OutputStreamWriter osw ( OutputStream s ) |
112 |
throws IOException |
113 |
{ |
114 |
return new OutputStreamWriter( s, enc ); |
115 |
} |
116 |
|
117 |
public void chartab ( OutputStreamWriter w ) |
118 |
throws IOException |
119 |
{ |
120 |
if ( null == w ) |
121 |
w = new OutputStreamWriter( System.out ); |
122 |
w.write( "... 0 1 2 3 4 5 6 7 8 9 A B C D E F\n" ); |
123 |
byte[] b = new byte[32]; |
124 |
for ( int l=0; l<8; l++ ) { |
125 |
for ( int j=0; j<16; j++ ) { |
126 |
b[2*j] = (byte)' '; |
127 |
b[2*j+1] = (byte)(l*16 + j - 128); |
128 |
} |
129 |
String s = new String( b, enc ); |
130 |
w.write( (128+l*16) + s + "\n" ); |
131 |
// char[] c = s.toCharArray[]; |
132 |
} |
133 |
w.flush(); |
134 |
} |
135 |
|
136 |
public void codetab ( OutputStreamWriter w ) |
137 |
throws IOException |
138 |
{ |
139 |
if ( null == w ) |
140 |
w = new OutputStreamWriter( System.out ); |
141 |
byte[] b = new byte[16]; |
142 |
for ( int l=0; l<8; l++ ) { |
143 |
for ( int j=0; j<16; j++ ) |
144 |
b[j] = (byte)(l*16 + j - 128); |
145 |
String s = new String( b, enc ); |
146 |
char[] c = s.toCharArray(); |
147 |
for ( int j=0; j<c.length; j++ ) |
148 |
w.write( " " + (int)c[j] + "," ); |
149 |
w.write( "\n" ); |
150 |
} |
151 |
w.flush(); |
152 |
} |
153 |
|
154 |
public void htmltab ( OutputStreamWriter w ) |
155 |
throws IOException |
156 |
{ |
157 |
if ( null == w ) |
158 |
w = osw( System.out ); |
159 |
w.write( "<html><head>\n" |
160 |
+ "<meta http-equiv=\"Content-Type\"" |
161 |
+ " content=\"text/html;charset="+enc+"\"/>\n" |
162 |
+ "</head><body><pre>\n" |
163 |
); |
164 |
byte[] b = new byte[1]; |
165 |
for ( b[0] = -96; b[0]<-1; b[0]++ ) { |
166 |
w.write( (256+(int)b[0]) + " = " |
167 |
+ new String( b, enc ) + "\n" ); |
168 |
} |
169 |
w.write( "</pre></body></html>" ); |
170 |
w.flush(); |
171 |
} |
172 |
|
173 |
public static void main ( String[] args ) |
174 |
throws IOException |
175 |
{ |
176 |
String enc = "Cp850"; |
177 |
char task = 's'; |
178 |
int a = 0; |
179 |
|
180 |
while ( a<args.length ) { |
181 |
String arg = args[a++]; |
182 |
if ( "-enc".equals( arg ) ) |
183 |
enc = args[a++]; |
184 |
else if ( "-utf8seq".equals( arg ) ) |
185 |
task = 'u'; |
186 |
else if ( "-tab".equals( arg ) ) |
187 |
task = 't'; |
188 |
else if ( "-htab".equals( arg ) ) |
189 |
task = 'h'; |
190 |
else if ( "-ctab".equals( arg ) ) |
191 |
task = 'c'; |
192 |
} |
193 |
|
194 |
switch ( task ) { |
195 |
case 'u': // -utf8seq |
196 |
utf8seq(); |
197 |
break; |
198 |
case 't': // -tab |
199 |
(new Charset(enc)).chartab(null); |
200 |
break; |
201 |
case 'h': // -htab |
202 |
(new Charset(enc)).htmltab(null); |
203 |
break; |
204 |
case 'c': // -ctab |
205 |
(new Charset(enc)).codetab(null); |
206 |
break; |
207 |
case 's': // -test |
208 |
chartest(); |
209 |
} |
210 |
} |
211 |
} // Charset |