/*
openisis - an open implementation of the CDS/ISIS database
Version 0.8.x (patchlevel see file Version)
Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
see README for more information
EOH */
#ifndef LCS_H
/*
$Id: lcs.h,v 1.3 2003/04/08 00:20:52 kripke Exp $
charset support
*/
typedef enum {
LCS_SINGLE, /* some ASCII-compliant single byte charset */
LCS_UTF8, /* unicode in 8bit transfer encoding */
LCS_UCS2 /* the unicode base multilingual plane (BMP, first 64K chars)
in 2byte encoding, native (platform) byte order */
} lcs_type;
typedef enum {
LCS_LATIN1, /* the ISO-8859-1 (Latin 1) charset */
LCS_CP850, /* MS-DOS Latin 1, known as the "OEM" charset */
LCS__SBCS /* # of single byte charsets */
} lcs_single;
typedef enum {
LCS_CTYPE, /* flags see below */
LCS_UNICO, /* 128 unicodes */
LCS_UCASE, /* each characters uppercase code */
LCS_INDEX, /* single byte recoding for index ("strxfrm") */
LCS_XEDNI, /* inverse */
LCS_RECOD, /* single byte recoding to extern (typically Latin1) */
LCS_DOCER, /* inverse */
LCS__TABS
} lcs_tabid;
/* some conversion table on a single byte charset
c may hold an actab (!0 for any character deemed "alpha"),
uctab ("uppercase" replacement code, typically removing diacriticals),
code-to-code conversion or other.
u may hold unicode values for the upper half of an
ASCII compatible single byte charset.
*/
typedef union {
unsigned char c[256];
unsigned short u[128];
} LcsTab;
enum {
/* controls (0..31,127) */
LCS_R = 0x00, /* record separator (FF,GS), */
LCS_F = 0x10, /* field separator (NUL,CR,LF,RS) */
LCS_C = 0x28, /* other control */
/* other separators */
LCS_S = 0x48, /* space(blank,nbsp), */
LCS_T = 0x68, /* other token separator (,:;=), */
/* other punctuation assumed "word" characters */
LCS_P = 0x80,
LCS_Y = 0xb0, /* symbol */
/* identifiers */
LCS_I = 0xc0, /* C-ident (_). */
LCS_D = 0xd0, /* digits ('0'..'9') */
LCS_A = 0xe0, /* ASCII alpha */
LCS_L = 0xe4, /* ... among those formatting literals a,c,i,x */
LCS_N = 0xf0 /* other alpha ("national"/non-ASCII) */
};
#define LCS_ISCONTR( t ) (!(0xc0 & (t)))
#define LCS_ISSPACE( t ) (LCS_S >= (t))
#define LCS_ISPRINT( t ) (0xc0 & (t)) /* (LCS_S <= (t)) */
#define LCS_ISIDENT( t ) (0xc0 == (0xc0 & (t)))
#define LCS_ISALPHA( t ) (0xe0 == (0xe0 & (t)))
/* for record parsing */
#define LCS_ISSEP( t ) (!(0x80 & (t)))
#define LCS_ISWORD( t ) (0x80 & (t))
#define LCS_ISFR( t ) (!(0xe0 & (t))) /* field or record separator */
#define LCS_ISCST( t ) (0x08 & (t)) /* other separator */
enum {
LCS_MKUNI = -1 /* as "bits" value, have mktab create unicode table */
};
/** create the table dst from string containing
a free style sequence of decimal numbers (ignoring any non-digit).
if bits is 0, numbers are assigned sequentially to dst->c,
else if bits < 0, they are assigned to dst->u,
else for every number, bits are set in the corresponding array element
else something strange may happen.
*/
extern int lcs_mktab ( LcsTab *dst, char *numbers, int len, int bits );
/** create single byte conversion table from two unicode tables.
if to is NULL, the trivial (Latin1) table is used.
*/
extern int lcs_mkrecod ( unsigned char *dst, unsigned short *from,
unsigned short *to );
extern unsigned char lcs_latin1_uc[256];
extern unsigned char lcs_latin1_ct[256];
#define LCS_H
#endif /* LCS_H */