/*
openisis - an open implementation of the CDS/ISIS database
Version 0.8.x (patchlevel see file Version)
Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
see README for more information
EOH */
#ifndef LSTR_H
/*
$Id: lstr.h,v 1.15 2003/04/08 00:20:53 kripke Exp $
definition of ISIS internal structures for openisis lib implementation.
This header defines the common internal data structures and their members
and external structure and member definition types.
A internal data structure is the common in-memory representation of
ISIS data; it may be backed by different external (in-file) structures.
An in-memory data structure
is a contigous int* chunk of memory, consisting of the sizes (one int),
an array of members (int) and some buffer (array of char).
A data structures sizes consists of the lengths
fix and rep of the fixed and repeated part, resp.,
and the number of occurences of the repeated part.
It is used in both an abstract description of a data type
and each instance of a data structure.
If the number of occurences of the repeated part is not fixed,
it is 0 in the abstract case.
The lengths are suitable as offsets into the lmbr array.
This structure is designed to fit within 32 bits.
The sizing is the only common meta-data description of a logical
data type. For readability of code, however, the member indexes
are enum-erated. When actually used, the type has to match the code's
assumptions anyway.
The numbers fix and rep are fixed for a given data structure.
Although the logical data type has more properties,
especially the type of each member (numeric, C-string or raw mem),
a representation of this data is needed only when converting
external data and thus is stored (somewhat redundantly)
with the external properties.
There are only two types of members:
numeric and memory (mostly some character data).
both are represented by a int, which, for memory,
denotes an offset (counted in BYTES, not ints) from the records start
(NOT offset into the data buffer, as with the isis mst record).
The member array consists of a fixed number <emp>fix</emp> of initial
members and a number <emp>occ</emp> of occurences
of member arrays of fixed length <emp>rep</emp>.
While <emp>occ</emp> is usually fixed,
it varies for masterfile records.
Note that this layout somewhat resembles the isis record itself,
which supports a single level of nesting and repetition.
*/
/* id of a structure */
/* construction
*/
#define LSTRID( set, no, vari ) \
(((int)(vari))<<16 | (set)<<8 | (no) )
/* access
*/
#define LSTRNO( l ) (char)(l) /* no of record type in set */
#define LSTRSET( l ) (char)((l)>>8) /* record set number */
#define LSTRVAR( l ) (short)((l)>>16) /* variant */
/* size of a structure */
/* construction
*/
#define LSTRSIZE( fix, rep, occ ) \
(((int)(occ))<<16 | (rep)<<8 | (fix))
/* access
*/
#define LSTRFIX( l ) (char)(l) /* length of fixed part */
#define LSTRREP( l ) (char)((l)>>8) /* length of repeated part */
#define LSTROCC( l ) (short)((l)>>16) /* the members */
/* actual number of members */
#define LSTRMEMS( l ) (LSTRFIX(l) + LSTRREP(l)*LSTROCC(l))
/* in-memory size of members (w/o buffer). */
#define LSTRLEN( t ) ((int)sizeof(int)*(1 + LSTRMEMS(t)))
/**
Description of a external members layout is encoded as a short "len"
and a short offset "off" into the data or repeated part, resp.
offset and len are combined into one int.
If offsets are 0, they are computed based on the assumption,
that members are layouted packed in the declared ordering.
For character data, len just gives the length of the member
in bytes (up to 2**15), with the highest bit cleared.
A length of 0 denotes character data of variable size.
Numerical data is marked with the highest bit set.
For numbers, the lowest two bits give the ld of the length in bytes
(i.e. 0=1,1=2,2=4,3=8 bytes).
The offset and length of the actually used bits within these bytes
are each encoded in six bits, which is enough for 0..63.
A length of 0 (as resulting from 6-bit overflow on '64') means no masking.
The 15th bit, if set, denotes high endian data.
Note that the only reason for stuffing all that into a short is,
that it is sufficient. The amount of space saved is minimal and
depending on the actual processor there may be a slight performance
penalty for the bit operations.
*/
#define LONG2LEN( l ) ((short)(l))
#define LONG2OFF( l ) ((short)((l) >> 16))
/* constants
*/
#define LMBR_FNUM 0x8000 /* flag numeric data */
#define LMBR_FHIE 0x4000 /* flag high endian data */
/* construction
*/
#define LMBRNUM( ld ) (LMBR_FNUM | ld)
/* standard nueric types: the world according to Java :) */
#define LMBRBYTE LMBRNUM(0)
#define LMBRSHORT LMBRNUM(1)
#define LMBRINT LMBRNUM(2)
#define LMBRLONG LMBRNUM(3)
/* bit mask construction for numerical data */
#define LMBRBITS( off, len, ld ) (LMBR_FNUM | (off)<<8 | (len)<<2 | (ld))
#define LMBRBITS4( off, len ) (LMBR_FNUM | (off)<<8 | (len)<<2 | 2)
/* if you have to specify the offset explicitly */
#define LSTRLOFF( len, off ) ((len) | (off)<<16)
/* access
*/
#define LMBRISNUM( f ) (LMBR_FNUM & (f))
#define LMBRLD( f ) (0x3 & (f))
/* bit access for numerical data */
#define LMBRISBITS( f ) (0x3ffc & (f))
#define LMBRBITLEN( f ) (0x003f & ((f) >> 2))
#define LMBRBITOFF( f ) (0x003f & ((f) >> 8))
/**
Description of an external structure layout.
This is usually fixed for a given (implementation of a) type;
for the isis mst record, however, the occurences of the repeated
part and the total external and internal lengths vary for each record.
*/
enum {
LSTR_SIZE, /* sizes */
LSTR_XRLO, /* repeated part byte length and offset <<16; auto */
LSTR_XLEN, /* ext. total length of data; auto */
LSTR_ILEN, /* int. total length of data; auto */
LSTR_XMBR /* members */
};
#define LSTR_AUTOLENGTHS 0,0,0
#define LSTR_LONGS( totmbrs ) (4+(totmbrs))
/* technical variants of a record */
typedef enum {
LVAR_PAC, /* packed little endian base variant */
LVAR_ALI, /* aligned variant */
LVAR_BIG, /* big endian aligned */
LVAR_VARI
} lstr_variant;
/* known record sets */
enum {
LSET_MST, /* isis 1 master file */
LSET_INV, /* isis 1 inverted file */
LSET_SETS
};
typedef struct {
const char ***name; /* an array of names per record type */
int **desc[LVAR_VARI]; /* an array of descs per variant */
} LstrSet;
/* data types of ISIS 1 master file */
typedef enum {
LSTR_MFC, /* MST head */
LSTR_MFR, /* MST record */
LSTR_XRF, /* XRF record */
LSTR_MST /* count */
} lstr_mst;
enum { /* MFC members */
LMFC_CTLM = 1,
LMFC_NMFN, /* next available MFN */
LMFC_NMFB, /* next free block in MF counting from 1 */
LMFC_NMFP, /* next free pos in MF */
LMFC_TYPE, /* "0 for user's bases; 1 for messages" */
LMFC_RCNT, /* record count ? not used by CISIS */
LMFC_MFX1,
LMFC_MFX2, /* LAN lock */
LMFC_MFX3, /* LAN lock */
LMFC__FL
};
enum { /* XRF members */
LXRF_XPOS = 1,
LXRF_XREC
};
/* data types of ISIS 1 inverted file */
typedef enum {
LSTR_CNT, /* CNT record */
LSTR_N01, /* N01 record */
LSTR_L01, /* L01 record */
LSTR_N02, /* N02 record */
LSTR_L02, /* L02 record */
LSTR_IFP, /* IFP record */
LSTR_INV /* count */
} lstr_inv;
enum { /* CNT members */
LCNT_TYPE = 1, /* 1 or 2: N0x */
LCNT_ORDN, /* fix 5 */
LCNT_ORDF, /* fix 5 */
LCNT_N, /* fix 15 */
LCNT_K, /* fix 5 */
LCNT_LEV, /* depth of index */
LCNT_POSR, /* position of root */
LCNT_NMAX,
LCNT_FMAX,
LCNT_ABNO,
LCNT__FL
};
enum { /* N0x members */
LN0X_POS = 1, /* record number */
LN0X_OCK, /* number of keys */
LN0X_TYPE, /* type 1 or 2 */
LN0X__FL, /* offset of repeated part */
LN0X_KEY = 0, /* key chars */
LN0X_REF, /* ref to next node (>0) or leaf (<0) */
LN0X__RL /* length of repeated part */
};
enum { /* L0x members */
LL0X_POS = 1, /* record number */
LL0X_OCK, /* number of keys */
LL0X_TYPE, /* type 1 or 2 */
LL0X_PS, /* type 1 or 2 */
LL0X__FL, /* offset of repeated part */
LL0X_KEY = 0, /* key chars */
LL0X_INFB, /* ifp block */
LL0X_INFP, /* ifp pos */
LL0X__RL /* length of repeated part */
};
enum { /* IFP members */
LIFP_NXTB = 1, /* block of next segment */
LIFP_NXTP, /* pos of next segment */
LIFP_TOTP, /* total postings */
LIFP_SEGP, /* postings in this segment */
LIFP_SEGC, /* postings in this segment */
LIFP__FL, /* offset of repeated part */
LIFP_MFN = 0, /* mfn */
LIFP_ADD, /* additional qualifyers */
LIFP__RL /* length of repeated part */
};
extern const LstrSet lstrlib[LSET_SETS];
/** auto-initialise the given structure or all builtins, iff NULL.
*/
extern int lstr_auto ( int *str );
#define LSTR_H
#endif /* LSTR_H */