/[webpac]/openisis/current/org/openisis/Charset.java
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /openisis/current/org/openisis/Charset.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 237 - (show annotations)
Mon Mar 8 17:43:12 2004 UTC (20 years ago) by dpavlin
File size: 5471 byte(s)
initial import of openisis 0.9.0 vendor drop

1 /*
2 openisis - an open implementation of the CDS/ISIS database
3 Version 0.8.x (patchlevel see file Version)
4 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 see README for more information
21 EOH */
22
23 package org.openisis;
24
25 import java.io.*;
26 import java.util.Arrays;
27
28 /**
29 Charset tests and utils for openisis java binding.
30 <p>
31 $Id: Charset.java,v 1.3 2003/04/08 00:20:53 kripke Exp $
32 @version $Revision: 1.3 $
33 @author $Author: kripke $
34 */
35 public class Charset {
36
37 static final String[] encodings = {
38 "ISO-8859-5", // cyrillic
39 "ISO-8859-7", // greek
40 "ISO-8859-9", // turkish
41 };
42
43 // bytes 160(=-96) to 255(=-1)
44 static final byte[] isohigh = new byte[96];
45 static {
46 for ( int b=0; b<96; b++ )
47 isohigh[b] = (byte)(b-96);
48 }
49
50 static void utf8seq ()
51 throws IOException
52 {
53 Writer w = new OutputStreamWriter( System.out, "UTF-8" );
54 for ( int i=0; i<encodings.length; i++ ) {
55 String e = encodings[i];
56 w.write( e+"|"+(new String(isohigh,e))+"\n" );
57 }
58 w.flush();
59 }
60
61
62 static void chartest ()
63 throws IOException
64 {
65 String[] args = new String[] { "-v", "error", "-encoding", "UTF-8",
66 "-format", "aligned" };
67 Db db = Db.open( "db/charset/charset", args );
68 for ( int i=0; i<encodings.length; i++ ) {
69 String e = encodings[i];
70 int[] n = db.search( e, 0 );
71 if ( null == n || 1 != n.length ) {
72 System.err.println( "bad res "+n+" for encoding "+e );
73 continue;
74 }
75 Rec r = db.readRow( n[0] );
76 if ( null == r ) {
77 System.err.println( "could not read mfn "+n[0]+" for encoding "+e );
78 continue;
79 }
80 if ( 2 > r.getLen() || null == r.getField(0) || null == r.getField(1) ) {
81 System.err.println( "bad fields on mfn "+n[0]+" for encoding "+e );
82 continue;
83 }
84 Field f = r.getField(0);
85 if ( 1 != f.tag || !e.equals( f.val ) ) {
86 System.err.println( "bad field 0 "+f.tag+"="+f.val
87 +" on mfn "+n[0]+" for encoding "+e );
88 continue;
89 }
90 f = r.getField(1);
91 if ( 2 != f.tag ) {
92 System.err.println( "bad field 1 "+f.tag+"="+f.val
93 +" on mfn "+n[0]+" for encoding "+e );
94 continue;
95 }
96 if ( ! f.val.equals( new String(isohigh,e) ) ) {
97 System.err.println( "UTF-8 mismatch on mfn "+n[0]+" for encoding "+e );
98 continue;
99 }
100 byte[] b = f.val.getBytes(e);
101 if ( ! Arrays.equals( b, isohigh ) ) // ok for some encodings
102 System.err.println( "note: no round-trip for encoding "+e );
103 }
104 }
105
106
107 public final String enc;
108
109 public Charset ( String encoding ) { enc = encoding; }
110
111 public OutputStreamWriter osw ( OutputStream s )
112 throws IOException
113 {
114 return new OutputStreamWriter( s, enc );
115 }
116
117 public void chartab ( OutputStreamWriter w )
118 throws IOException
119 {
120 if ( null == w )
121 w = new OutputStreamWriter( System.out );
122 w.write( "... 0 1 2 3 4 5 6 7 8 9 A B C D E F\n" );
123 byte[] b = new byte[32];
124 for ( int l=0; l<8; l++ ) {
125 for ( int j=0; j<16; j++ ) {
126 b[2*j] = (byte)' ';
127 b[2*j+1] = (byte)(l*16 + j - 128);
128 }
129 String s = new String( b, enc );
130 w.write( (128+l*16) + s + "\n" );
131 // char[] c = s.toCharArray[];
132 }
133 w.flush();
134 }
135
136 public void codetab ( OutputStreamWriter w )
137 throws IOException
138 {
139 if ( null == w )
140 w = new OutputStreamWriter( System.out );
141 byte[] b = new byte[16];
142 for ( int l=0; l<8; l++ ) {
143 for ( int j=0; j<16; j++ )
144 b[j] = (byte)(l*16 + j - 128);
145 String s = new String( b, enc );
146 char[] c = s.toCharArray();
147 for ( int j=0; j<c.length; j++ )
148 w.write( " " + (int)c[j] + "," );
149 w.write( "\n" );
150 }
151 w.flush();
152 }
153
154 public void htmltab ( OutputStreamWriter w )
155 throws IOException
156 {
157 if ( null == w )
158 w = osw( System.out );
159 w.write( "<html><head>\n"
160 + "<meta http-equiv=\"Content-Type\""
161 + " content=\"text/html;charset="+enc+"\"/>\n"
162 + "</head><body><pre>\n"
163 );
164 byte[] b = new byte[1];
165 for ( b[0] = -96; b[0]<-1; b[0]++ ) {
166 w.write( (256+(int)b[0]) + " = "
167 + new String( b, enc ) + "\n" );
168 }
169 w.write( "</pre></body></html>" );
170 w.flush();
171 }
172
173 public static void main ( String[] args )
174 throws IOException
175 {
176 String enc = "Cp850";
177 char task = 's';
178 int a = 0;
179
180 while ( a<args.length ) {
181 String arg = args[a++];
182 if ( "-enc".equals( arg ) )
183 enc = args[a++];
184 else if ( "-utf8seq".equals( arg ) )
185 task = 'u';
186 else if ( "-tab".equals( arg ) )
187 task = 't';
188 else if ( "-htab".equals( arg ) )
189 task = 'h';
190 else if ( "-ctab".equals( arg ) )
191 task = 'c';
192 }
193
194 switch ( task ) {
195 case 'u': // -utf8seq
196 utf8seq();
197 break;
198 case 't': // -tab
199 (new Charset(enc)).chartab(null);
200 break;
201 case 'h': // -htab
202 (new Charset(enc)).htmltab(null);
203 break;
204 case 'c': // -ctab
205 (new Charset(enc)).codetab(null);
206 break;
207 case 's': // -test
208 chartest();
209 }
210 }
211 } // Charset

  ViewVC Help
Powered by ViewVC 1.1.26