1 |
/* |
2 |
openisis - an open implementation of the CDS/ISIS database |
3 |
Version 0.8.x (patchlevel see file Version) |
4 |
Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org |
5 |
|
6 |
This library is free software; you can redistribute it and/or |
7 |
modify it under the terms of the GNU Lesser General Public |
8 |
License as published by the Free Software Foundation; either |
9 |
version 2.1 of the License, or (at your option) any later version. |
10 |
|
11 |
This library is distributed in the hope that it will be useful, |
12 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
Lesser General Public License for more details. |
15 |
|
16 |
You should have received a copy of the GNU Lesser General Public |
17 |
License along with this library; if not, write to the Free Software |
18 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
|
20 |
see README for more information |
21 |
EOH */ |
22 |
#ifndef LCS_H |
23 |
|
24 |
/* |
25 |
$Id: lcs.h,v 1.3 2003/04/08 00:20:52 kripke Exp $ |
26 |
charset support |
27 |
*/ |
28 |
|
29 |
typedef enum { |
30 |
LCS_SINGLE, /* some ASCII-compliant single byte charset */ |
31 |
LCS_UTF8, /* unicode in 8bit transfer encoding */ |
32 |
LCS_UCS2 /* the unicode base multilingual plane (BMP, first 64K chars) |
33 |
in 2byte encoding, native (platform) byte order */ |
34 |
} lcs_type; |
35 |
|
36 |
typedef enum { |
37 |
LCS_LATIN1, /* the ISO-8859-1 (Latin 1) charset */ |
38 |
LCS_CP850, /* MS-DOS Latin 1, known as the "OEM" charset */ |
39 |
LCS__SBCS /* # of single byte charsets */ |
40 |
} lcs_single; |
41 |
|
42 |
|
43 |
typedef enum { |
44 |
LCS_CTYPE, /* flags see below */ |
45 |
LCS_UNICO, /* 128 unicodes */ |
46 |
LCS_UCASE, /* each characters uppercase code */ |
47 |
LCS_INDEX, /* single byte recoding for index ("strxfrm") */ |
48 |
LCS_XEDNI, /* inverse */ |
49 |
LCS_RECOD, /* single byte recoding to extern (typically Latin1) */ |
50 |
LCS_DOCER, /* inverse */ |
51 |
LCS__TABS |
52 |
} lcs_tabid; |
53 |
|
54 |
|
55 |
/* some conversion table on a single byte charset |
56 |
c may hold an actab (!0 for any character deemed "alpha"), |
57 |
uctab ("uppercase" replacement code, typically removing diacriticals), |
58 |
code-to-code conversion or other. |
59 |
u may hold unicode values for the upper half of an |
60 |
ASCII compatible single byte charset. |
61 |
*/ |
62 |
|
63 |
typedef union { |
64 |
unsigned char c[256]; |
65 |
unsigned short u[128]; |
66 |
} LcsTab; |
67 |
|
68 |
enum { |
69 |
/* controls (0..31,127) */ |
70 |
LCS_R = 0x00, /* record separator (FF,GS), */ |
71 |
LCS_F = 0x10, /* field separator (NUL,CR,LF,RS) */ |
72 |
LCS_C = 0x28, /* other control */ |
73 |
/* other separators */ |
74 |
LCS_S = 0x48, /* space(blank,nbsp), */ |
75 |
LCS_T = 0x68, /* other token separator (,:;=), */ |
76 |
/* other punctuation assumed "word" characters */ |
77 |
LCS_P = 0x80, |
78 |
LCS_Y = 0xb0, /* symbol */ |
79 |
/* identifiers */ |
80 |
LCS_I = 0xc0, /* C-ident (_). */ |
81 |
LCS_D = 0xd0, /* digits ('0'..'9') */ |
82 |
LCS_A = 0xe0, /* ASCII alpha */ |
83 |
LCS_L = 0xe4, /* ... among those formatting literals a,c,i,x */ |
84 |
LCS_N = 0xf0 /* other alpha ("national"/non-ASCII) */ |
85 |
}; |
86 |
|
87 |
#define LCS_ISCONTR( t ) (!(0xc0 & (t))) |
88 |
#define LCS_ISSPACE( t ) (LCS_S >= (t)) |
89 |
#define LCS_ISPRINT( t ) (0xc0 & (t)) /* (LCS_S <= (t)) */ |
90 |
#define LCS_ISIDENT( t ) (0xc0 == (0xc0 & (t))) |
91 |
#define LCS_ISALPHA( t ) (0xe0 == (0xe0 & (t))) |
92 |
|
93 |
/* for record parsing */ |
94 |
#define LCS_ISSEP( t ) (!(0x80 & (t))) |
95 |
#define LCS_ISWORD( t ) (0x80 & (t)) |
96 |
#define LCS_ISFR( t ) (!(0xe0 & (t))) /* field or record separator */ |
97 |
#define LCS_ISCST( t ) (0x08 & (t)) /* other separator */ |
98 |
|
99 |
|
100 |
enum { |
101 |
LCS_MKUNI = -1 /* as "bits" value, have mktab create unicode table */ |
102 |
}; |
103 |
|
104 |
/** create the table dst from string containing |
105 |
a free style sequence of decimal numbers (ignoring any non-digit). |
106 |
if bits is 0, numbers are assigned sequentially to dst->c, |
107 |
else if bits < 0, they are assigned to dst->u, |
108 |
else for every number, bits are set in the corresponding array element |
109 |
else something strange may happen. |
110 |
*/ |
111 |
extern int lcs_mktab ( LcsTab *dst, char *numbers, int len, int bits ); |
112 |
|
113 |
/** create single byte conversion table from two unicode tables. |
114 |
if to is NULL, the trivial (Latin1) table is used. |
115 |
*/ |
116 |
extern int lcs_mkrecod ( unsigned char *dst, unsigned short *from, |
117 |
unsigned short *to ); |
118 |
|
119 |
extern unsigned char lcs_latin1_uc[256]; |
120 |
extern unsigned char lcs_latin1_ct[256]; |
121 |
|
122 |
#define LCS_H |
123 |
#endif /* LCS_H */ |