/[webpac]/trunk2/openisis/org/openisis/Charset.java
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/openisis/org/openisis/Charset.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 337 - (hide annotations)
Thu Jun 10 19:22:40 2004 UTC (19 years, 11 months ago) by dpavlin
File size: 5471 byte(s)
new trunk for webpac v2

1 dpavlin 237 /*
2     openisis - an open implementation of the CDS/ISIS database
3     Version 0.8.x (patchlevel see file Version)
4     Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5    
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Lesser General Public
8     License as published by the Free Software Foundation; either
9     version 2.1 of the License, or (at your option) any later version.
10    
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14     Lesser General Public License for more details.
15    
16     You should have received a copy of the GNU Lesser General Public
17     License along with this library; if not, write to the Free Software
18     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19    
20     see README for more information
21     EOH */
22    
23     package org.openisis;
24    
25     import java.io.*;
26     import java.util.Arrays;
27    
28     /**
29     Charset tests and utils for openisis java binding.
30     <p>
31     $Id: Charset.java,v 1.3 2003/04/08 00:20:53 kripke Exp $
32     @version $Revision: 1.3 $
33     @author $Author: kripke $
34     */
35     public class Charset {
36    
37     static final String[] encodings = {
38     "ISO-8859-5", // cyrillic
39     "ISO-8859-7", // greek
40     "ISO-8859-9", // turkish
41     };
42    
43     // bytes 160(=-96) to 255(=-1)
44     static final byte[] isohigh = new byte[96];
45     static {
46     for ( int b=0; b<96; b++ )
47     isohigh[b] = (byte)(b-96);
48     }
49    
50     static void utf8seq ()
51     throws IOException
52     {
53     Writer w = new OutputStreamWriter( System.out, "UTF-8" );
54     for ( int i=0; i<encodings.length; i++ ) {
55     String e = encodings[i];
56     w.write( e+"|"+(new String(isohigh,e))+"\n" );
57     }
58     w.flush();
59     }
60    
61    
62     static void chartest ()
63     throws IOException
64     {
65     String[] args = new String[] { "-v", "error", "-encoding", "UTF-8",
66     "-format", "aligned" };
67     Db db = Db.open( "db/charset/charset", args );
68     for ( int i=0; i<encodings.length; i++ ) {
69     String e = encodings[i];
70     int[] n = db.search( e, 0 );
71     if ( null == n || 1 != n.length ) {
72     System.err.println( "bad res "+n+" for encoding "+e );
73     continue;
74     }
75     Rec r = db.readRow( n[0] );
76     if ( null == r ) {
77     System.err.println( "could not read mfn "+n[0]+" for encoding "+e );
78     continue;
79     }
80     if ( 2 > r.getLen() || null == r.getField(0) || null == r.getField(1) ) {
81     System.err.println( "bad fields on mfn "+n[0]+" for encoding "+e );
82     continue;
83     }
84     Field f = r.getField(0);
85     if ( 1 != f.tag || !e.equals( f.val ) ) {
86     System.err.println( "bad field 0 "+f.tag+"="+f.val
87     +" on mfn "+n[0]+" for encoding "+e );
88     continue;
89     }
90     f = r.getField(1);
91     if ( 2 != f.tag ) {
92     System.err.println( "bad field 1 "+f.tag+"="+f.val
93     +" on mfn "+n[0]+" for encoding "+e );
94     continue;
95     }
96     if ( ! f.val.equals( new String(isohigh,e) ) ) {
97     System.err.println( "UTF-8 mismatch on mfn "+n[0]+" for encoding "+e );
98     continue;
99     }
100     byte[] b = f.val.getBytes(e);
101     if ( ! Arrays.equals( b, isohigh ) ) // ok for some encodings
102     System.err.println( "note: no round-trip for encoding "+e );
103     }
104     }
105    
106    
107     public final String enc;
108    
109     public Charset ( String encoding ) { enc = encoding; }
110    
111     public OutputStreamWriter osw ( OutputStream s )
112     throws IOException
113     {
114     return new OutputStreamWriter( s, enc );
115     }
116    
117     public void chartab ( OutputStreamWriter w )
118     throws IOException
119     {
120     if ( null == w )
121     w = new OutputStreamWriter( System.out );
122     w.write( "... 0 1 2 3 4 5 6 7 8 9 A B C D E F\n" );
123     byte[] b = new byte[32];
124     for ( int l=0; l<8; l++ ) {
125     for ( int j=0; j<16; j++ ) {
126     b[2*j] = (byte)' ';
127     b[2*j+1] = (byte)(l*16 + j - 128);
128     }
129     String s = new String( b, enc );
130     w.write( (128+l*16) + s + "\n" );
131     // char[] c = s.toCharArray[];
132     }
133     w.flush();
134     }
135    
136     public void codetab ( OutputStreamWriter w )
137     throws IOException
138     {
139     if ( null == w )
140     w = new OutputStreamWriter( System.out );
141     byte[] b = new byte[16];
142     for ( int l=0; l<8; l++ ) {
143     for ( int j=0; j<16; j++ )
144     b[j] = (byte)(l*16 + j - 128);
145     String s = new String( b, enc );
146     char[] c = s.toCharArray();
147     for ( int j=0; j<c.length; j++ )
148     w.write( " " + (int)c[j] + "," );
149     w.write( "\n" );
150     }
151     w.flush();
152     }
153    
154     public void htmltab ( OutputStreamWriter w )
155     throws IOException
156     {
157     if ( null == w )
158     w = osw( System.out );
159     w.write( "<html><head>\n"
160     + "<meta http-equiv=\"Content-Type\""
161     + " content=\"text/html;charset="+enc+"\"/>\n"
162     + "</head><body><pre>\n"
163     );
164     byte[] b = new byte[1];
165     for ( b[0] = -96; b[0]<-1; b[0]++ ) {
166     w.write( (256+(int)b[0]) + " = "
167     + new String( b, enc ) + "\n" );
168     }
169     w.write( "</pre></body></html>" );
170     w.flush();
171     }
172    
173     public static void main ( String[] args )
174     throws IOException
175     {
176     String enc = "Cp850";
177     char task = 's';
178     int a = 0;
179    
180     while ( a<args.length ) {
181     String arg = args[a++];
182     if ( "-enc".equals( arg ) )
183     enc = args[a++];
184     else if ( "-utf8seq".equals( arg ) )
185     task = 'u';
186     else if ( "-tab".equals( arg ) )
187     task = 't';
188     else if ( "-htab".equals( arg ) )
189     task = 'h';
190     else if ( "-ctab".equals( arg ) )
191     task = 'c';
192     }
193    
194     switch ( task ) {
195     case 'u': // -utf8seq
196     utf8seq();
197     break;
198     case 't': // -tab
199     (new Charset(enc)).chartab(null);
200     break;
201     case 'h': // -htab
202     (new Charset(enc)).htmltab(null);
203     break;
204     case 'c': // -ctab
205     (new Charset(enc)).codetab(null);
206     break;
207     case 's': // -test
208     chartest();
209     }
210     }
211     } // Charset

  ViewVC Help
Powered by ViewVC 1.1.26