/[webpac]/trunk/openisis/lcs.h
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/openisis/lcs.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 239 - (show annotations)
Mon Mar 8 17:49:13 2004 UTC (20 years ago) by dpavlin
File MIME type: text/plain
File size: 4109 byte(s)
including openisis 0.9.0 into webpac tree

1 /*
2 openisis - an open implementation of the CDS/ISIS database
3 Version 0.8.x (patchlevel see file Version)
4 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 see README for more information
21 EOH */
22 #ifndef LCS_H
23
24 /*
25 $Id: lcs.h,v 1.3 2003/04/08 00:20:52 kripke Exp $
26 charset support
27 */
28
29 typedef enum {
30 LCS_SINGLE, /* some ASCII-compliant single byte charset */
31 LCS_UTF8, /* unicode in 8bit transfer encoding */
32 LCS_UCS2 /* the unicode base multilingual plane (BMP, first 64K chars)
33 in 2byte encoding, native (platform) byte order */
34 } lcs_type;
35
36 typedef enum {
37 LCS_LATIN1, /* the ISO-8859-1 (Latin 1) charset */
38 LCS_CP850, /* MS-DOS Latin 1, known as the "OEM" charset */
39 LCS__SBCS /* # of single byte charsets */
40 } lcs_single;
41
42
43 typedef enum {
44 LCS_CTYPE, /* flags see below */
45 LCS_UNICO, /* 128 unicodes */
46 LCS_UCASE, /* each characters uppercase code */
47 LCS_INDEX, /* single byte recoding for index ("strxfrm") */
48 LCS_XEDNI, /* inverse */
49 LCS_RECOD, /* single byte recoding to extern (typically Latin1) */
50 LCS_DOCER, /* inverse */
51 LCS__TABS
52 } lcs_tabid;
53
54
55 /* some conversion table on a single byte charset
56 c may hold an actab (!0 for any character deemed "alpha"),
57 uctab ("uppercase" replacement code, typically removing diacriticals),
58 code-to-code conversion or other.
59 u may hold unicode values for the upper half of an
60 ASCII compatible single byte charset.
61 */
62
63 typedef union {
64 unsigned char c[256];
65 unsigned short u[128];
66 } LcsTab;
67
68 enum {
69 /* controls (0..31,127) */
70 LCS_R = 0x00, /* record separator (FF,GS), */
71 LCS_F = 0x10, /* field separator (NUL,CR,LF,RS) */
72 LCS_C = 0x28, /* other control */
73 /* other separators */
74 LCS_S = 0x48, /* space(blank,nbsp), */
75 LCS_T = 0x68, /* other token separator (,:;=), */
76 /* other punctuation assumed "word" characters */
77 LCS_P = 0x80,
78 LCS_Y = 0xb0, /* symbol */
79 /* identifiers */
80 LCS_I = 0xc0, /* C-ident (_). */
81 LCS_D = 0xd0, /* digits ('0'..'9') */
82 LCS_A = 0xe0, /* ASCII alpha */
83 LCS_L = 0xe4, /* ... among those formatting literals a,c,i,x */
84 LCS_N = 0xf0 /* other alpha ("national"/non-ASCII) */
85 };
86
87 #define LCS_ISCONTR( t ) (!(0xc0 & (t)))
88 #define LCS_ISSPACE( t ) (LCS_S >= (t))
89 #define LCS_ISPRINT( t ) (0xc0 & (t)) /* (LCS_S <= (t)) */
90 #define LCS_ISIDENT( t ) (0xc0 == (0xc0 & (t)))
91 #define LCS_ISALPHA( t ) (0xe0 == (0xe0 & (t)))
92
93 /* for record parsing */
94 #define LCS_ISSEP( t ) (!(0x80 & (t)))
95 #define LCS_ISWORD( t ) (0x80 & (t))
96 #define LCS_ISFR( t ) (!(0xe0 & (t))) /* field or record separator */
97 #define LCS_ISCST( t ) (0x08 & (t)) /* other separator */
98
99
100 enum {
101 LCS_MKUNI = -1 /* as "bits" value, have mktab create unicode table */
102 };
103
104 /** create the table dst from string containing
105 a free style sequence of decimal numbers (ignoring any non-digit).
106 if bits is 0, numbers are assigned sequentially to dst->c,
107 else if bits < 0, they are assigned to dst->u,
108 else for every number, bits are set in the corresponding array element
109 else something strange may happen.
110 */
111 extern int lcs_mktab ( LcsTab *dst, char *numbers, int len, int bits );
112
113 /** create single byte conversion table from two unicode tables.
114 if to is NULL, the trivial (Latin1) table is used.
115 */
116 extern int lcs_mkrecod ( unsigned char *dst, unsigned short *from,
117 unsigned short *to );
118
119 extern unsigned char lcs_latin1_uc[256];
120 extern unsigned char lcs_latin1_ct[256];
121
122 #define LCS_H
123 #endif /* LCS_H */

  ViewVC Help
Powered by ViewVC 1.1.26