1 |
dpavlin |
604 |
/* |
2 |
|
|
The Malete project - the Z39.2/Z39.50 database framework of OpenIsis. |
3 |
|
|
Version 0.9.x (patchlevel see file Version) |
4 |
|
|
Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org |
5 |
|
|
|
6 |
|
|
This library is free software; you can redistribute it and/or |
7 |
|
|
modify it under the terms of the GNU Lesser General Public |
8 |
|
|
License as published by the Free Software Foundation; either |
9 |
|
|
version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
|
11 |
|
|
This library is distributed in the hope that it will be useful, |
12 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
14 |
|
|
See the GNU Lesser General Public License for more details. |
15 |
|
|
|
16 |
|
|
You should have received a copy of the GNU Lesser General Public |
17 |
|
|
License along with this library; if not, write to the Free Software |
18 |
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
|
|
|
20 |
|
|
see README for more information |
21 |
|
|
EOH */ |
22 |
|
|
#ifndef CORE_H |
23 |
|
|
|
24 |
|
|
#include <assert.h> |
25 |
|
|
#include <string.h> /* various str* and mem* */ |
26 |
|
|
|
27 |
|
|
/* |
28 |
|
|
$Id: core.h,v 1.43 2004/11/11 15:47:08 kripke Exp $ |
29 |
|
|
full interface of the Malete core |
30 |
|
|
*/ |
31 |
|
|
|
32 |
|
|
#define CORE_VERSION "0.9.9" |
33 |
|
|
|
34 |
|
|
|
35 |
|
|
|
36 |
|
|
/* ****************************************************************** */ |
37 |
|
|
/* */ |
38 |
|
|
/* COMPILER FEATURES */ |
39 |
|
|
/* */ |
40 |
|
|
/* ****************************************************************** */ |
41 |
|
|
|
42 |
|
|
#ifndef __STDC__ /* defined by ISO C */ |
43 |
|
|
#error "need ANSI/ISO C compiler" |
44 |
|
|
#endif |
45 |
|
|
/* "cpp -dM </dev/null" lists nonstandard machine and OS macros */ |
46 |
|
|
|
47 |
|
|
/* optimizing */ |
48 |
|
|
#ifdef __GNUC__ |
49 |
|
|
# define OPT_INLINE __inline__ /* OPT_INLINE type func () */ |
50 |
|
|
#else |
51 |
|
|
# define OPT_INLINE |
52 |
|
|
#endif |
53 |
|
|
#if defined( __i386__ ) && defined( __GNUC__ ) |
54 |
|
|
/* called function pops args; makes most functions slightly faster */ |
55 |
|
|
/* type OPT_STDCALL func () on both declaration and definition */ |
56 |
|
|
# define OPT_STDCALL __attribute__((stdcall)) |
57 |
|
|
#else |
58 |
|
|
# define OPT_STDCALL |
59 |
|
|
#endif |
60 |
|
|
#if defined( __i386__ ) && defined( __GNUC__ ) && defined(NDEBUG) |
61 |
|
|
/* more aggressive: up to 3 args in registers, fails with -pg */ |
62 |
|
|
/* type OPT_REGPARM func () in declaration is sufficient */ |
63 |
|
|
/* also using in definition allows fallback to stdcall */ |
64 |
|
|
# define OPT_REGPARM __attribute__((regparm(3))) |
65 |
|
|
#else |
66 |
|
|
# define OPT_REGPARM OPT_STDCALL |
67 |
|
|
#endif |
68 |
|
|
|
69 |
|
|
/* CPU features */ |
70 |
|
|
#if defined( __sparc__ ) || defined( __ppc__ ) |
71 |
|
|
# define CPU_BIG_ENDIAN |
72 |
|
|
#endif |
73 |
|
|
#if defined( __sparc__ ) |
74 |
|
|
# define CPU_NEED_ALIGN |
75 |
|
|
#endif |
76 |
|
|
/* max bits of PAGE_SIZE; min is always 12 */ |
77 |
|
|
/* grep -r 'fine PAGE_SHIFT' /usr/src/linux/include/asm-* : 12..16 */ |
78 |
|
|
#if defined( __ia64__ ) |
79 |
|
|
# define CPU_PAGE_SHIFT 16 |
80 |
|
|
#elif defined( __arm__ ) |
81 |
|
|
# define CPU_PAGE_SHIFT 15 |
82 |
|
|
#elif defined( __i386__ ) || defined( __ppc__ ) |
83 |
|
|
# define CPU_PAGE_SHIFT 12 /* there are more not exceeding 12 bits */ |
84 |
|
|
#else /* however 13 bits is not too much waste */ |
85 |
|
|
# define CPU_PAGE_SHIFT 13 /* sparc, sparc64, alpha, m68k, cris */ |
86 |
|
|
#endif |
87 |
|
|
|
88 |
|
|
/* 64 bit integer */ |
89 |
|
|
#ifdef __GNUC__ |
90 |
|
|
# define LOLO_BUILTIN long long |
91 |
|
|
# define LOLO( v ) v##LL |
92 |
|
|
# define LULU( v ) v##ULL |
93 |
|
|
#elif defined( _MSC_VER ) || defined( __BORLANDC__ ) |
94 |
|
|
# define LOLO_BUILTIN __int64 |
95 |
|
|
# define LOLO( v ) v /* is there some special suffix ??? */ |
96 |
|
|
# define LULU( v ) v |
97 |
|
|
#elif 0xFFFFFFFFL != ~0L /* 64 bit compiler ? */ |
98 |
|
|
# define LOLO_BUILTIN long |
99 |
|
|
# define LOLO( v ) v##L |
100 |
|
|
# define LULU( v ) v##UL |
101 |
|
|
#else |
102 |
|
|
# error "please use a compiler providing a 64 bit integer type. thanks." |
103 |
|
|
#endif |
104 |
|
|
typedef LOLO_BUILTIN lolo; |
105 |
|
|
typedef unsigned LOLO_BUILTIN lulu; |
106 |
|
|
|
107 |
|
|
|
108 |
|
|
|
109 |
|
|
/* ****************************************************************** */ |
110 |
|
|
/* */ |
111 |
|
|
/* BUILD FEATURES */ |
112 |
|
|
/* */ |
113 |
|
|
/* ****************************************************************** */ |
114 |
|
|
|
115 |
|
|
/* support the ENV_SHARED mode */ |
116 |
|
|
#if defined(BUILD_SHMODE) && defined(WIN32) |
117 |
|
|
# undef BUILD_SHMODE |
118 |
|
|
#endif |
119 |
|
|
|
120 |
|
|
|
121 |
|
|
|
122 |
|
|
/* ****************************************************************** */ |
123 |
|
|
/* */ |
124 |
|
|
/* DATA STRUCTURES AND UTILITIES */ |
125 |
|
|
/* */ |
126 |
|
|
/* ****************************************************************** */ |
127 |
|
|
|
128 |
|
|
/* some characters */ |
129 |
|
|
#define TAB 9 /* horizontal, that is */ |
130 |
|
|
#define LF 10 /* LineFeed a.k.a. newline - '\n' isn't really well defined */ |
131 |
|
|
#define VT 11 /* vertical, used as newline replacement */ |
132 |
|
|
#define CR 13 /* for windoze, telnet and the like */ |
133 |
|
|
|
134 |
|
|
/** similar to atoi/strtol, but |
135 |
|
|
- string needs not be 0 terminated (unless l < 0) |
136 |
|
|
- cares for hex 0x, but not octal 0. |
137 |
|
|
*/ |
138 |
|
|
extern int a2i ( const char *p, int l ); |
139 |
|
|
/** similar to a2i but |
140 |
|
|
- return number of parsed characters |
141 |
|
|
- put result in *res |
142 |
|
|
*/ |
143 |
|
|
extern int a2il ( const char *p, int l, int *res ); |
144 |
|
|
/** similar to a2i but |
145 |
|
|
- return dflt if less than l characters of p has been successfully parsed |
146 |
|
|
*/ |
147 |
|
|
extern int a2id ( const char *p, int l, int dflt ); |
148 |
|
|
/** print int (NOT 0 terminated !). |
149 |
|
|
p must have 11 bytes space |
150 |
|
|
@return number of chars (up to 10 digits + minus sign) |
151 |
|
|
*/ |
152 |
|
|
extern int i2a ( char *p, int i ); |
153 |
|
|
extern int u2a ( char *p, unsigned u ); |
154 |
|
|
/* print exactly n digits, do not add 0 byte */ |
155 |
|
|
extern void u2a0 ( char *p, unsigned u, unsigned n ); |
156 |
|
|
|
157 |
|
|
extern const char b36dig[36]; /* '0'..'9', 'a'..'z' */ |
158 |
|
|
/* 0..35 '0'..'9', 'a'..'z' and 'A'..'Z', 36 else */ |
159 |
|
|
extern const char b36val[256]; |
160 |
|
|
|
161 |
|
|
extern const unsigned char lat1up[256]; /* latin 1 uppercase */ |
162 |
|
|
extern const unsigned char lat1ct[256]; /* latin 1 ctype */ |
163 |
|
|
enum { /* character type bits */ |
164 |
|
|
CT_WHITE = 0x01, /* all <= ' ' */ |
165 |
|
|
CT_DIGIT = 0x02, /* 0..9 */ |
166 |
|
|
CT_ALPHA = 0x04, /* 'A'..'Z','a'..'z' */ |
167 |
|
|
CT_IDENT = 0x08, /* digits,alphas,underscore */ |
168 |
|
|
CT_SPECL = 0x10, /* all other ASCIIs */ |
169 |
|
|
CT_LATIN = 0x20 /* ident + non ASCII Latin1 alphas */ |
170 |
|
|
}; |
171 |
|
|
enum { /* character type values */ |
172 |
|
|
CT_W = CT_WHITE, |
173 |
|
|
CT_D = CT_DIGIT|CT_IDENT|CT_LATIN, |
174 |
|
|
CT_A = CT_ALPHA|CT_IDENT|CT_LATIN, |
175 |
|
|
CT_I = CT_IDENT|CT_LATIN, |
176 |
|
|
CT_S = CT_SPECL, |
177 |
|
|
CT_L = CT_LATIN, |
178 |
|
|
CT_N = 0 /* other C1 control or symbol */ |
179 |
|
|
}; |
180 |
|
|
#define CT_IS(type, c) (CT_##type == lat1ct[(unsigned char)(c)]) |
181 |
|
|
#define CT_HAS(flg, c) (CT_##flg & lat1ct[(unsigned char)(c)]) |
182 |
|
|
|
183 |
|
|
|
184 |
|
|
/** replace from by to in bytes */ |
185 |
|
|
extern void mTr ( char *bytes, char from, char to, int len ); |
186 |
|
|
|
187 |
|
|
|
188 |
|
|
typedef struct Fld { |
189 |
|
|
int tag; |
190 |
|
|
unsigned len; /* must use at most 31 bits, highest is temporarily abused */ |
191 |
|
|
char *val; /* not necessarily 0 terminated */ |
192 |
|
|
} Fld; |
193 |
|
|
|
194 |
|
|
/* |
195 |
|
|
v (value) functions on single field |
196 |
|
|
*/ |
197 |
|
|
#define V2I( f ) a2i( (f)->val, (f)->len ) |
198 |
|
|
|
199 |
|
|
/* field has primary value key length l */ |
200 |
|
|
#define VKEY( f, k, l ) ( \ |
201 |
|
|
(f)->len >= (l) && !memcmp( (f)->val, k, l ) \ |
202 |
|
|
&& ((f)->len == (l) || TAB == (f)->val[(l)]) \ |
203 |
|
|
) |
204 |
|
|
|
205 |
|
|
/* dup a field as 0-terminated string */ |
206 |
|
|
#define VDUPZ( f ) ((char*)mDupz((f)->val, (f)->len)) |
207 |
|
|
|
208 |
|
|
/* |
209 |
|
|
snip tab-separated subfields from value, |
210 |
|
|
setting tag to the subfield identifier and len/val to the contents. |
211 |
|
|
if opt is not 0, it lists the options wanted (* for the primary). |
212 |
|
|
dst->val should be 0-initialized; |
213 |
|
|
if it's greater than src->val, search starts at dst->val+dst->len, |
214 |
|
|
@return 0 if not found, else 1+dst->len |
215 |
|
|
*/ |
216 |
|
|
extern int vGet ( Fld *dst, const Fld *src, const char *opt ); |
217 |
|
|
/* |
218 |
|
|
access to the primary value never fails, if initialized, so return len |
219 |
|
|
*/ |
220 |
|
|
#define VPRI( dst, src ) ((dst)->val = 0, vGet(dst, src, 0), (dst)->len) |
221 |
|
|
#define VEQZ( f, str ) (!strncmp((f)->val,str,(f)->len) && !str[(f)->len]) |
222 |
|
|
|
223 |
|
|
/** |
224 |
|
|
undo the encoding of lBin (see below). |
225 |
|
|
return #bytes int dst, which is <= src->len |
226 |
|
|
extern int vDecod ( char *dst, const Fld *src ); |
227 |
|
|
*/ |
228 |
|
|
|
229 |
|
|
/* sign(a - b) */ |
230 |
|
|
extern int vCmp ( const Fld *a, const Fld *b ); |
231 |
|
|
/* a > b */ |
232 |
|
|
extern int vGt ( const Fld *a, const Fld *b ); |
233 |
|
|
|
234 |
|
|
/* comparision function type for rSort */ |
235 |
|
|
typedef int VGt ( const Fld *a, const Fld *b ); |
236 |
|
|
|
237 |
|
|
|
238 |
|
|
/* |
239 |
|
|
r (record) functions on an array of fields |
240 |
|
|
they expect the tag of the first field to be the negative number of fields. |
241 |
|
|
The value of the first field ("header") may contain various meta info. |
242 |
|
|
For lists representing database "records", |
243 |
|
|
the canonical format is [no[@pos]][<TAB>leader]. |
244 |
|
|
Leader can be used e.g. to hold a Z39.2 leader as used by MARC. |
245 |
|
|
Other (protocol) lists should start with some type identifier. |
246 |
|
|
|
247 |
|
|
it's a matter of taste whether you like and use this typedef. |
248 |
|
|
as it's not going to be changed but only here to clarify the intend, |
249 |
|
|
the core functions do not use it. |
250 |
|
|
*/ |
251 |
|
|
typedef const Fld *Rec; |
252 |
|
|
|
253 |
|
|
#define RLEN( r ) (-(r)->tag) |
254 |
|
|
#define REND( r ) ((r)-(r)->tag) |
255 |
|
|
#define RLAST( r ) (REND(r)-1) |
256 |
|
|
|
257 |
|
|
extern unsigned rSiz ( const Fld *r ); |
258 |
|
|
|
259 |
|
|
/** |
260 |
|
|
get occurence of field. |
261 |
|
|
@param pos if given, the first occ starting at pos is searched |
262 |
|
|
and pos is set to one after the found position (or after end). |
263 |
|
|
may be used to loop all on an int var initialized to 0. |
264 |
|
|
@return the field or 0 |
265 |
|
|
*/ |
266 |
|
|
extern const Fld *rGet ( const Fld *r, int tag, int *pos ); |
267 |
|
|
|
268 |
|
|
/* |
269 |
|
|
get field with given tag and primary value or empty pv |
270 |
|
|
*/ |
271 |
|
|
extern const Fld *rKey ( const Fld *r, int tag, const char *key ); |
272 |
|
|
|
273 |
|
|
/* |
274 |
|
|
create a new const rec in a contigous mAlloced peace of mem |
275 |
|
|
if siz is 0, rSiz() is used |
276 |
|
|
*/ |
277 |
|
|
extern const Fld *rDup ( const Fld *src, unsigned siz ); |
278 |
|
|
|
279 |
|
|
|
280 |
|
|
/** flatten (serialize) record |
281 |
|
|
to tag\tval lines ended by a blank line. |
282 |
|
|
buf must be of size rSiz(rec) |
283 |
|
|
+ 13*RLEN(rec) for sign+10digits+tab+nl |
284 |
|
|
+ 1 for the blank line |
285 |
|
|
@return # of bytes written |
286 |
|
|
*/ |
287 |
|
|
extern int rSer ( char *buf, const Fld *rec ); |
288 |
|
|
|
289 |
|
|
/** sort fields |
290 |
|
|
WARNING: this cuts fields longer than 2GB !!! |
291 |
|
|
NOTE that rSort, unlike qsort, does NOT use a 3-way cmp function, |
292 |
|
|
but a boolean gt function |
293 |
|
|
*/ |
294 |
|
|
extern void rSort ( Fld *rec, VGt *gt ); |
295 |
|
|
extern void rSortTag ( Fld *rec ); |
296 |
|
|
extern void rSortVal ( Fld *rec ); /* == rSort(vGt) */ |
297 |
|
|
|
298 |
|
|
|
299 |
|
|
#define DEFBLKLEN 8000 /* default buffer block length */ |
300 |
|
|
#define DEFFIELDS 40 /* default number of fields */ |
301 |
|
|
|
302 |
|
|
typedef struct LBlk { /* chained buffer block */ |
303 |
|
|
struct LBlk *nxt; |
304 |
|
|
unsigned siz; |
305 |
|
|
char byt[DEFBLKLEN]; /* actual len may vary */ |
306 |
|
|
} LBlk; |
307 |
|
|
|
308 |
|
|
|
309 |
|
|
/* |
310 |
|
|
A full-fledged, modifiable list. |
311 |
|
|
*/ |
312 |
|
|
typedef struct List { |
313 |
|
|
Fld *fld; /* fields list, initially fl0 */ |
314 |
|
|
unsigned fav; /* fields available at end of *fld buffer */ |
315 |
|
|
int siz; /* used secondary buffer size minus holes (add buf-blk.byt) */ |
316 |
|
|
char *buf; /* pointing into blk->byt */ |
317 |
|
|
char *end; /* of blk->byt */ |
318 |
|
|
LBlk *blk; /* buffer chain, initially bl0 */ |
319 |
|
|
char *bok; /* if == buf, buffers contain the serialization */ |
320 |
|
|
Fld fl0[DEFFIELDS]; |
321 |
|
|
LBlk bl0; |
322 |
|
|
} List; |
323 |
|
|
|
324 |
|
|
|
325 |
|
|
#define LLEN( l ) RLEN((l)->fld) |
326 |
|
|
#define LEND( l ) REND((l)->fld) |
327 |
|
|
#define LLAST( l ) RLAST((l)->fld) |
328 |
|
|
|
329 |
|
|
|
330 |
|
|
/* |
331 |
|
|
initialize list and set header. |
332 |
|
|
if fmt is 0, no header is printed, and the list left empty. |
333 |
|
|
(i.e. the first field added will become the header). |
334 |
|
|
@return l |
335 |
|
|
*/ |
336 |
|
|
extern List *lInit ( List *l, const char *fmt, ... ); |
337 |
|
|
/* |
338 |
|
|
completely clear all fields and buffers. |
339 |
|
|
*/ |
340 |
|
|
extern List *lClr ( List *l ); |
341 |
|
|
/* |
342 |
|
|
clear all fields and buffers, but keep the header. |
343 |
|
|
*/ |
344 |
|
|
extern List *lReset ( List *l ); |
345 |
|
|
/* |
346 |
|
|
clear all fields and buffers, keep nothing |
347 |
|
|
*/ |
348 |
|
|
extern void OPT_STDCALL lFini ( List *l ); |
349 |
|
|
|
350 |
|
|
/* |
351 |
|
|
increase available space. |
352 |
|
|
If buffer has a flusher, call it. |
353 |
|
|
fields -1 means we want to append (keep last field contigous, do NOT flush). |
354 |
|
|
fields >0 means reserve at least mode fields. |
355 |
|
|
*/ |
356 |
|
|
extern int OPT_REGPARM lExtend ( List *l, unsigned need, int fields ); |
357 |
|
|
|
358 |
|
|
|
359 |
|
|
|
360 |
|
|
/* bytes available at buf */ |
361 |
|
|
#define LAVL( l ) ((unsigned)((l)->end - (l)->buf)) |
362 |
|
|
#define LSIZ( l ) ((unsigned)((l)->buf - (l)->blk->byt + (l)->siz)) |
363 |
|
|
|
364 |
|
|
/* |
365 |
|
|
fragment used to start a new field with tag t. |
366 |
|
|
If the list was empty, the field will become the header, |
367 |
|
|
ignoring the tag. |
368 |
|
|
*/ |
369 |
|
|
#define LDEFNEWF( l, t ) \ |
370 |
|
|
int __i = (l)->fld->tag; \ |
371 |
|
|
Fld *__f = (l)->fld - __i--; \ |
372 |
|
|
assert(0 > __i); \ |
373 |
|
|
__f->tag = t; \ |
374 |
|
|
(l)->fld->tag = __i; |
375 |
|
|
|
376 |
|
|
/* add field tag t reserving n bytes space */ |
377 |
|
|
#define LNEWF( l, t, n ) do { \ |
378 |
|
|
if ( ((l)->fav && LAVL(l) >= n) || lExtend( l, n, 1 ) ) { \ |
379 |
|
|
LDEFNEWF( l, t ) \ |
380 |
|
|
__f->val = (l)->buf; \ |
381 |
|
|
__f->len = 0; \ |
382 |
|
|
(l)->fav --; \ |
383 |
|
|
} } while(0) |
384 |
|
|
|
385 |
|
|
/* add field tag t using n bytes prefilled space |
386 |
|
|
must be preextended for one field |
387 |
|
|
*/ |
388 |
|
|
#define LPREF( l, t, n ) do { \ |
389 |
|
|
if ( (l)->fav ) { \ |
390 |
|
|
LDEFNEWF( l, t ) \ |
391 |
|
|
__f->val = (l)->buf; \ |
392 |
|
|
__f->len = n; \ |
393 |
|
|
(l)->fav --; \ |
394 |
|
|
(l)->buf += n; \ |
395 |
|
|
} } while(0) |
396 |
|
|
|
397 |
|
|
|
398 |
|
|
/* add field tag t with value v n bytes long */ |
399 |
|
|
#define LADD( l, t, v, n ) do { \ |
400 |
|
|
if ( ((l)->fav && LAVL(l) >= n) || lExtend( l, n, 1 ) ) { \ |
401 |
|
|
LDEFNEWF( l, t ) \ |
402 |
|
|
memcpy( __f->val = (l)->buf, v, __f->len = n ); \ |
403 |
|
|
(l)->fav --; \ |
404 |
|
|
(l)->buf += n; \ |
405 |
|
|
} } while(0) |
406 |
|
|
|
407 |
|
|
/* append value v n bytes long */ |
408 |
|
|
#define LAPP( l, v, n ) do { \ |
409 |
|
|
if ( LAVL(l) >= n || lExtend( l, n, -1 ) ) { \ |
410 |
|
|
Fld *__f = LLAST(l); \ |
411 |
|
|
memcpy( (l)->buf, v, n ); \ |
412 |
|
|
__f->len += n; \ |
413 |
|
|
(l)->buf += n; \ |
414 |
|
|
} } while(0) |
415 |
|
|
|
416 |
|
|
/* add field tag t with int value i */ |
417 |
|
|
#define LADDI( l, t, i ) do { \ |
418 |
|
|
if ( ((l)->fav && LAVL(l) >= 12) || lExtend( l, 12, 1 ) ) { \ |
419 |
|
|
LDEFNEWF( l, t ) \ |
420 |
|
|
(l)->buf += __f->len = i2a( __f->val = (l)->buf, i ); \ |
421 |
|
|
(l)->fav --; \ |
422 |
|
|
} } while(0) |
423 |
|
|
|
424 |
|
|
#define LADDS( l, t, s ) LADD( l, t, s, strlen(s) ) |
425 |
|
|
#define LAPPS( l, s ) LAPP( l, s, strlen(s) ) |
426 |
|
|
#define LADDF( l, f ) LADD( l, f->tag, f->val, f->len ) |
427 |
|
|
|
428 |
|
|
|
429 |
|
|
/** |
430 |
|
|
add a field with arbitrary binary data. |
431 |
|
|
The encoded field value will not contain newlines. |
432 |
|
|
The encoding maps a VT to two bytes VT 0, |
433 |
|
|
and a LF to VT 1, if the LF is followed by a 0 or 1, a single VT else. |
434 |
|
|
extern void lBin ( List *l, int tag, const char *bytes, int len ); |
435 |
|
|
*/ |
436 |
|
|
|
437 |
|
|
|
438 |
|
|
/* |
439 |
|
|
append canonical options fields. |
440 |
|
|
args starting with '-' are treated as options and appended as |
441 |
|
|
tab separated subfield w/o the '-'. |
442 |
|
|
If the first arg does not start with '-', it is used as primary value. |
443 |
|
|
Other non-options are appended as subfields indicated by '@'. |
444 |
|
|
In order to skip the program's name, from main() use with argc-1,argv+1. |
445 |
|
|
*/ |
446 |
|
|
extern int lArgv ( List *l, int tag, int argc, const char **argv ); |
447 |
|
|
/* |
448 |
|
|
same using 0-terminated list. |
449 |
|
|
extern int lArgs ( List *l, int tag, const char *arg, ... ); |
450 |
|
|
*/ |
451 |
|
|
/** |
452 |
|
|
A variation on this theme as used for env.var: |
453 |
|
|
an initial non-option as command and following options go to the header. |
454 |
|
|
every following non-option starts a new field. |
455 |
|
|
*/ |
456 |
|
|
extern List *lVar ( List *l, int argc, const char **argv ); |
457 |
|
|
|
458 |
|
|
/* if siz is 0, rSiz() is used */ |
459 |
|
|
extern int lCpy ( List *l, const Fld *src, unsigned siz ); |
460 |
|
|
#define LCPY( l, src ) lCpy( l, (src)->fld, LSIZ(src) ) |
461 |
|
|
|
462 |
|
|
/* create canned version of list as new record */ |
463 |
|
|
#define LCAN( l ) rDup( (l)->fld, LSIZ(l) ) |
464 |
|
|
|
465 |
|
|
|
466 |
|
|
/* |
467 |
|
|
Print a field to list. |
468 |
|
|
a fmt of 0 indicates varargs. |
469 |
|
|
*/ |
470 |
|
|
extern int lOut ( List *to, int tag, const char *fmt, ... ); |
471 |
|
|
|
472 |
|
|
|
473 |
|
|
/* |
474 |
|
|
set the first field with tag to val (of len, -1 for strlen) |
475 |
|
|
kill additional fields with tag |
476 |
|
|
if no field found, add one at end |
477 |
|
|
extern int lSet ( List *l, int tag, const char *val, unsigned len ); |
478 |
|
|
*/ |
479 |
|
|
|
480 |
|
|
|
481 |
|
|
enum { /* list parse state */ |
482 |
|
|
LPS_SOR, /* 0, at start of record */ |
483 |
|
|
LPS_SOL = 0x1000000, /* at start of line */ |
484 |
|
|
LPS_TAG = 0x2000000, /* in tag (only digits seen in line) */ |
485 |
|
|
LPS_VAL = 0x3000000, /* in val (some non-digit seen) */ |
486 |
|
|
LPS_NEG = 0x4000000, /* found '-' at start of line */ |
487 |
|
|
LPS_CR = 0x8000000, /* kill carriage return character */ |
488 |
|
|
LPS_LEN = 0x0ffffff |
489 |
|
|
}; |
490 |
|
|
/** |
491 |
|
|
add serialized "text" to rec |
492 |
|
|
lps is the buffer length (of up to 16MB) + state bits |
493 |
|
|
@return new state, if buffer was exhausted, |
494 |
|
|
or #remaining bytes (|LPS_SOR), if a blank line was seen |
495 |
|
|
*/ |
496 |
|
|
extern int lParse (List *l, const char *txt, int lps); |
497 |
|
|
|
498 |
|
|
|
499 |
|
|
/** |
500 |
|
|
record sink. |
501 |
|
|
The sink function may be called with eor 0 after adding one or several fields |
502 |
|
|
to optionally flush partial records. |
503 |
|
|
It must be called with eor when a record is complete. |
504 |
|
|
In that case it must prepare List to receive a new header. |
505 |
|
|
This typically is done by flushing and resetting list, |
506 |
|
|
however, it would be sufficient to prepare for a new embedded |
507 |
|
|
subrecord by recording the position of the next head. |
508 |
|
|
*/ |
509 |
|
|
typedef struct Sink Sink; |
510 |
|
|
typedef void sink (Sink *that, int eor); /* clean all or part of buffer(s) */ |
511 |
|
|
|
512 |
|
|
struct Sink { |
513 |
|
|
List lst; |
514 |
|
|
sink *snk; /* sinking function */ |
515 |
|
|
void *dst; /* destination */ |
516 |
|
|
int off; /* start of current record in list, negative after partial flush */ |
517 |
|
|
}; |
518 |
|
|
|
519 |
|
|
#define SINK(s) (s)->snk(s, 0) |
520 |
|
|
#define SEOR(s) (s)->snk(s, 1) |
521 |
|
|
#define SCPY(s, l) do { LCPY(&(s)->lst, l); SEOR(s); } while (0) |
522 |
|
|
#define SCPYR(s, r) do { lCpy(&(s)->lst, r, 0); SEOR(s); } while (0) |
523 |
|
|
|
524 |
|
|
/* ****************************************************************** */ |
525 |
|
|
/* */ |
526 |
|
|
/* ENVIRONMENT AND SYSTEM */ |
527 |
|
|
/* */ |
528 |
|
|
/* ****************************************************************** */ |
529 |
|
|
|
530 |
|
|
#ifndef WIN32 |
531 |
|
|
typedef int file; |
532 |
|
|
#define FIL_NONE -1 |
533 |
|
|
#else |
534 |
|
|
typedef void *file; |
535 |
|
|
#define FIL_NONE 0 |
536 |
|
|
#endif |
537 |
|
|
|
538 |
|
|
/* environment. |
539 |
|
|
*/ |
540 |
|
|
typedef struct Env { |
541 |
|
|
List *opt; /* options (header is packed command line) */ |
542 |
|
|
file in; |
543 |
|
|
Sink *out; /* a proper record sink expecting complete records */ |
544 |
|
|
Sink *err; /* a usually field buffered sink for eOut messages */ |
545 |
|
|
int log; /* level */ |
546 |
|
|
int flg; |
547 |
|
|
int wri; /* write mode none/excl/shared */ |
548 |
|
|
int sig; /* interrupted by signal */ |
549 |
|
|
unsigned psz; /* system page size = 1<<psh enforced min 4K */ |
550 |
|
|
int psh; /* page shift (bits) 12..CPU_PAGE_SHIFT */ |
551 |
|
|
unsigned rml; /* r memory map limit (max pages per pointer map) */ |
552 |
|
|
unsigned qml; /* q memory map limit (max pages per tree map) */ |
553 |
|
|
/* LBlk *blk[5]; for memory management */ |
554 |
|
|
} Env; |
555 |
|
|
|
556 |
|
|
enum { /* env flags */ |
557 |
|
|
ENV_BUFFER = 1, /* env.err is buffered */ |
558 |
|
|
ENV_MSYNC = 2 /* use msync */ |
559 |
|
|
}; |
560 |
|
|
enum { /* env writing mode */ |
561 |
|
|
ENV_RO, /* readonly - not writing */ |
562 |
|
|
ENV_EXCL /* exclusive access (the default) */ |
563 |
|
|
#ifdef BUILD_SHMODE |
564 |
|
|
, ENV_SHARED /* shared access */ |
565 |
|
|
#endif |
566 |
|
|
}; |
567 |
|
|
enum { /* signal code */ |
568 |
|
|
ENV_CANCEL = 1, /* abort current request */ |
569 |
|
|
ENV_TERM /* abort current request and exit */ |
570 |
|
|
}; |
571 |
|
|
|
572 |
|
|
extern Env env; /* main environment */ |
573 |
|
|
|
574 |
|
|
/** |
575 |
|
|
error and loglevel codes |
576 |
|
|
*/ |
577 |
|
|
enum { /* our very own errno */ |
578 |
|
|
ERR_OK = 0, /* 0 is no error, also read(2)'s EINTR, EAGAIN */ |
579 |
|
|
ERR_NO = -1, /* get details from errno */ |
580 |
|
|
/* error level FATAL: we are wrong */ |
581 |
|
|
ERR_IDIOT = -2, /* caught programming error */ |
582 |
|
|
ERR_TRASH = -3, /* database internal consistency */ |
583 |
|
|
LOG_FATAL = ERR_TRASH, /* fatal internal errors: we can't go on */ |
584 |
|
|
/* error levels SYERR,ERROR,IOERR: system or user was wrong */ |
585 |
|
|
ERR_NOMEM = -4, /* out of memory, also open(2)'s EMFILE, ENFILE */ |
586 |
|
|
ERR_IO = -5, /* real IO error, also write(2)'s ENOSPC, EPIPE */ |
587 |
|
|
ERR_BUSY = -6, /* object is busy */ |
588 |
|
|
LOG_SYSERR = ERR_BUSY, /* problem with system ressources: bad file, no mem */ |
589 |
|
|
ERR_BADF = -7, /* bad file, also read(2)'s EINVAL, some of open(2) */ |
590 |
|
|
ERR_FAULT = -8, /* 0 pointer or bad sized buffer given */ |
591 |
|
|
ERR_INVAL = -9, /* general invalid parameters, any EINVAL errno */ |
592 |
|
|
LOG_ERROR = ERR_INVAL, /* unusable input, database or query */ |
593 |
|
|
ERR_AGAIN = -10, /* no data at intr or nonblock */ |
594 |
|
|
LOG_IOERR = ERR_AGAIN, /* problem on IO */ |
595 |
|
|
LOG_WARN = -11, /* bad input */ |
596 |
|
|
/* logging levels: nothing was wrong */ |
597 |
|
|
LOG_INFO = -12, /* some major event like opening a db */ |
598 |
|
|
LOG_VERBOSE = -13, /* any event like reading a record */ |
599 |
|
|
LOG_DEBUG = -14, /* lots of processing details (debug built only) */ |
600 |
|
|
LOG_TRACE = -15, /* database content (log_str) */ |
601 |
|
|
LOG_ALL = LOG_TRACE |
602 |
|
|
}; |
603 |
|
|
|
604 |
|
|
|
605 |
|
|
#define EADD(t, v, n) LADD(&env.out->lst, t, v, n) |
606 |
|
|
#define EAPP(v, n) LAPP(&env.out->lst, v, n) |
607 |
|
|
#define EADDS(t, s) LADDS(&env.out->lst, t, s) |
608 |
|
|
#define EAPPS(s) LAPPS(&env.out->lst, s) |
609 |
|
|
#define EADDI(t, i) LADDI(&env.out->lst, t, i) |
610 |
|
|
#define EADDF(f) LADDF(&env.out->lst, f) |
611 |
|
|
|
612 |
|
|
/* |
613 |
|
|
Append a field with abs(tag) to env.out for non-negative tags, |
614 |
|
|
else to env.err if tag >= env's loglevel. |
615 |
|
|
For system errors, additional info is appended. |
616 |
|
|
env.err's eof is called, if any. |
617 |
|
|
returns tag |
618 |
|
|
|
619 |
|
|
supports only a small subset of printf formats -- see the src |
620 |
|
|
has %b (bytes), which is like %.*s, but prints the string in hex |
621 |
|
|
*/ |
622 |
|
|
extern int eOut ( int tag, const char *fmt, ... ); |
623 |
|
|
extern int eRr ( int tag, const char *fmt, ... ); |
624 |
|
|
#ifndef NDEBUG |
625 |
|
|
# define LOG_DBG eRr |
626 |
|
|
#elif defined( __GNUC__ ) |
627 |
|
|
# define LOG_DBG( args... ) |
628 |
|
|
#else |
629 |
|
|
# define LOG_DBG (void) /* compiler should dispose statement off */ |
630 |
|
|
#endif |
631 |
|
|
|
632 |
|
|
/** |
633 |
|
|
Init the env. |
634 |
|
|
The first field specifies general command and options. |
635 |
|
|
Following fields describe databases. |
636 |
|
|
If no sinks are provided, file sinks on stdout and stderr are used. |
637 |
|
|
*/ |
638 |
|
|
extern void cInit ( List *args, Sink *out, Sink *err ); |
639 |
|
|
/* typical usage from main() */ |
640 |
|
|
#define CINIT(argl) cInit(lVar(lInit(argl,0), argc-1, argv+1), 0, 0) |
641 |
|
|
|
642 |
|
|
|
643 |
|
|
/** |
644 |
|
|
memory management. |
645 |
|
|
All memory will be initialized, |
646 |
|
|
and all allocs but TryAlloc exit the process when out of memory. |
647 |
|
|
DO NOT MIX with other alloc/free routines. |
648 |
|
|
*/ |
649 |
|
|
extern void *mAlloc ( int size ); |
650 |
|
|
extern void *mDup ( const void *str, int size ); |
651 |
|
|
/* cp siz bytes, append a 0 byte */ |
652 |
|
|
extern char *mDupz ( const char *str, int size ); |
653 |
|
|
extern void *mTryAlloc ( int size ); |
654 |
|
|
extern void mFree ( void *mem ); |
655 |
|
|
extern Fld *mFldAlloc ( int nfields ); |
656 |
|
|
extern void mFldFree ( Fld *fld ); |
657 |
|
|
extern LBlk *mBlkAlloc ( int size ); |
658 |
|
|
extern void mBlkFree ( LBlk *blk ); |
659 |
|
|
extern List *mListAlloc ( const char *name ); /* lInitialized with name */ |
660 |
|
|
extern void mListFree ( List *l ); |
661 |
|
|
#define mFldAlloc(n) ((Fld*)mAlloc((n)*sizeof(Fld))) |
662 |
|
|
#define mFldFree mFree |
663 |
|
|
#define mBlkFree mFree |
664 |
|
|
#define MFREE(p) do { mFree(p); (p)=0; } while(0) |
665 |
|
|
|
666 |
|
|
|
667 |
|
|
/** |
668 |
|
|
set tm to current time, return difference in millis |
669 |
|
|
*/ |
670 |
|
|
extern int tUpd ( lolo *tm ); |
671 |
|
|
/** |
672 |
|
|
print generalized time yyyyMMddHHmmss + 0 byte to buffer |
673 |
|
|
if tm is 0, current time is used |
674 |
|
|
if *tm is 0, *tm is updated |
675 |
|
|
buffer must have 15 bytes |
676 |
|
|
return millis |
677 |
|
|
*/ |
678 |
|
|
extern int tGtf ( char *buf, lolo *tm ); |
679 |
|
|
/** |
680 |
|
|
grok the fine manual. |
681 |
|
|
like tGtf, but with additional 3 digits millis |
682 |
|
|
return buffer, which must have 18 bytes |
683 |
|
|
*/ |
684 |
|
|
extern char *tGtfm ( char *buf, lolo *tm ); |
685 |
|
|
/** |
686 |
|
|
nanosl |
687 |
|
|
*/ |
688 |
|
|
extern void tSleep ( lolo tm ); |
689 |
|
|
|
690 |
|
|
|
691 |
|
|
/* ************************************************************ |
692 |
|
|
disk files (block devices) |
693 |
|
|
*/ |
694 |
|
|
|
695 |
|
|
|
696 |
|
|
enum { |
697 |
|
|
/* basic open flags */ |
698 |
|
|
FIL_RD = 0x001, /* shall be opened for input */ |
699 |
|
|
FIL_WR = 0x002, /* shall be opened for output */ |
700 |
|
|
FIL_RDWR = 0x003, /* shall be opened for both */ |
701 |
|
|
FIL_TRY = 0x004, /* do not complain if open fails */ |
702 |
|
|
/* write flags */ |
703 |
|
|
FIL_CREAT = 0x010, /* shall be created */ |
704 |
|
|
FIL_TRUNC = 0x020, /* shall be truncated */ |
705 |
|
|
FIL_SYNC = 0x040, /* syncing output */ |
706 |
|
|
/* lock flags */ |
707 |
|
|
FIL_TLOCK = 0x100, /* try locking (EX with WR) */ |
708 |
|
|
FIL_BLOCK = 0x200, /* blocking lock (EX with WR) */ |
709 |
|
|
FIL_FLOCK = 0x300 /* any locking is set */ |
710 |
|
|
}; |
711 |
|
|
|
712 |
|
|
/** open a new fid based on name and flags. |
713 |
|
|
TLOCK can be specified on any plattform, |
714 |
|
|
translating to a fcntl full file lock on *nix and a share mode on win |
715 |
|
|
@return 0 or some error code |
716 |
|
|
*/ |
717 |
|
|
extern int fOpen ( file *f, const char *name, int flags ); |
718 |
|
|
extern int fClose ( file *f ); |
719 |
|
|
extern int fSize ( file f ); |
720 |
|
|
extern unsigned fTime ( file f ); /* mtime sec */ |
721 |
|
|
|
722 |
|
|
|
723 |
|
|
/* |
724 |
|
|
Like the syscalls, this returns the number of bytes on success, 0 on eof. |
725 |
|
|
fPwrite repeats and does not return an error when interrupted. |
726 |
|
|
On error, a negative value is returned. |
727 |
|
|
*/ |
728 |
|
|
extern int fRead ( file *f, void *buf, unsigned len ); |
729 |
|
|
extern int fWrite ( file *f, const void *buf, unsigned len ); |
730 |
|
|
extern int fPread ( file *f, void *buf, unsigned len, unsigned off ); |
731 |
|
|
extern int fPwrite ( file *f, const void *buf, unsigned len, unsigned off ); |
732 |
|
|
extern int fSeek ( file *f, unsigned off ); |
733 |
|
|
extern int fTrunc ( file *f, unsigned length ); |
734 |
|
|
#ifdef BUILD_SHMODE |
735 |
|
|
/* remove a full file lock as set by fOpen */ |
736 |
|
|
extern void fUnlock ( file f ); |
737 |
|
|
/* |
738 |
|
|
lock byte n |
739 |
|
|
use TLOCK or BLOCK, possibly with WR, to lock, 0 to unlock. |
740 |
|
|
*/ |
741 |
|
|
extern int fLock ( file f, unsigned n, int flg ); |
742 |
|
|
#define FLOCK(f,n,flg) (ENV_SHARED==env.wri && fLock(f,n,flg)) |
743 |
|
|
#else |
744 |
|
|
#define FLOCK(f,n,flg) 0 |
745 |
|
|
#endif |
746 |
|
|
#define FLOCKSH(f,n) FLOCK(f,n,FIL_BLOCK) |
747 |
|
|
#define FLOCKEX(f,n) FLOCK(f,n,FIL_BLOCK|FIL_WR) |
748 |
|
|
#define FLOCKUN(f,n) FLOCK(f,n,0) |
749 |
|
|
|
750 |
|
|
/** slurp in a whole file at once. |
751 |
|
|
@param buf points to buffer of size sz. |
752 |
|
|
lio_slurp will allocate one, if *buf is NULL |
753 |
|
|
@param sz maximum number of bytes to read |
754 |
|
|
@param name of file to slurp |
755 |
|
|
@param opt if != 0, do not complain on failure |
756 |
|
|
@return number of bytes read or negative on error |
757 |
|
|
*/ |
758 |
|
|
extern int fSlurp ( char **buf, int sz, const char *name, int opt ); |
759 |
|
|
|
760 |
|
|
/* record-oriented sink to (file)dst. |
761 |
|
|
Expects the list to contain a proper record. |
762 |
|
|
If the header starts with a digit, a W<TAB> is prepended. |
763 |
|
|
An empty header is ommited. |
764 |
|
|
*/ |
765 |
|
|
extern void fSinkr (Sink *that, int eor); |
766 |
|
|
/* line-oriented sink (field values only) to (file)dst. |
767 |
|
|
leaves the header alone, ignores tags and blank values. |
768 |
|
|
yet prints a blank line on eor. |
769 |
|
|
*/ |
770 |
|
|
extern void fSinkl (Sink *that, int eor); |
771 |
|
|
|
772 |
|
|
/** a potentially mapped file */ |
773 |
|
|
typedef struct { |
774 |
|
|
file fil; |
775 |
|
|
int flg; |
776 |
|
|
char *map; |
777 |
|
|
unsigned npg; /* in pages of env.psz */ |
778 |
|
|
unsigned lim; /* max pages to map */ |
779 |
|
|
#ifdef WIN32 |
780 |
|
|
char *nam; /* for shared mapping */ |
781 |
|
|
void *hdl; /* "mapping object" */ |
782 |
|
|
#endif |
783 |
|
|
} FMap; |
784 |
|
|
|
785 |
|
|
/** |
786 |
|
|
open a file to be mapped. |
787 |
|
|
Like fOpen, but saves flags for later reference. |
788 |
|
|
*/ |
789 |
|
|
extern int fMOpen ( FMap *fm, const char *name, int flags ); |
790 |
|
|
extern int fMClose ( FMap *fm ); |
791 |
|
|
/** |
792 |
|
|
map, remap or unmap a memory mapping |
793 |
|
|
|
794 |
|
|
@param fm the filemap |
795 |
|
|
members fil and flg must be set as of fOpen. |
796 |
|
|
if fm was mapped, the existing mapping is unmapped (or remapped) |
797 |
|
|
@param npg number of pages to map; 0 for no new mapping |
798 |
|
|
@return mapped length; if <= 0, fm->map is set to 0, else to memory region |
799 |
|
|
*/ |
800 |
|
|
extern int fMap ( FMap *fm, unsigned npg ); |
801 |
|
|
/** sync a mapped page */ |
802 |
|
|
extern int fMSync ( FMap *fm, unsigned page ); |
803 |
|
|
|
804 |
|
|
|
805 |
|
|
/* |
806 |
|
|
file input buffer structure for fGets. |
807 |
|
|
suitable for both temp fix buffers and, with some care, for List buffers. |
808 |
|
|
*/ |
809 |
|
|
typedef struct { |
810 |
|
|
file f; /* file to read from */ |
811 |
|
|
unsigned n; /* current line number (1 based) */ |
812 |
|
|
unsigned o; /* offset of p from file start */ |
813 |
|
|
char *b; /* buffer base (const) */ |
814 |
|
|
unsigned s; /* buffer size (const) */ |
815 |
|
|
char *p; /* start of current line */ |
816 |
|
|
unsigned l; /* line len */ |
817 |
|
|
unsigned m; /* more bytes after p+l (including the LF) */ |
818 |
|
|
} FBuf; |
819 |
|
|
/* sloppy but convenient initializer macro */ |
820 |
|
|
#define FIL_BUF( fb, fil, buf ) do { \ |
821 |
|
|
fb.f = fil; fb.b = fb.p = buf; fb.s = sizeof(buf); \ |
822 |
|
|
fb.n = 1; fb.o = fb.l = fb.m = 0; \ |
823 |
|
|
} while (0) |
824 |
|
|
/* even more convenient macro, must be end of data def */ |
825 |
|
|
#define FIL_DEFBUF( f ) \ |
826 |
|
|
char buf[0x2000]; \ |
827 |
|
|
FBuf fb; \ |
828 |
|
|
FIL_BUF(fb, f, buf); |
829 |
|
|
/* |
830 |
|
|
set p and l to next line, lines are terminated by LF. |
831 |
|
|
if l, advance p after current line. |
832 |
|
|
if m, skip next byte. |
833 |
|
|
if m, search for newline. |
834 |
|
|
if no newline found, mv p downto b and read more chars upto s. |
835 |
|
|
if l==s, buffer is exhausted (and m is 0). |
836 |
|
|
else if m, p[l] is a newline. |
837 |
|
|
else if FIL_NONE == f, |
838 |
|
|
no bytes could be read (an eof was seen or somebody set NBLOCK). |
839 |
|
|
return whether we got a line |
840 |
|
|
*/ |
841 |
|
|
extern int fGets ( FBuf *fb ); |
842 |
|
|
extern int fGetr ( List *l, FBuf *fb ); |
843 |
|
|
|
844 |
|
|
|
845 |
|
|
|
846 |
|
|
/* ****************************************************************** */ |
847 |
|
|
/* */ |
848 |
|
|
/* DATABASE */ |
849 |
|
|
/* */ |
850 |
|
|
/* ****************************************************************** */ |
851 |
|
|
|
852 |
|
|
enum { /* flags for record and query data/index file pairs */ |
853 |
|
|
DX_OPEN = 0x1, /* open */ |
854 |
|
|
DX_WRITE = 0x2, /* open for writing */ |
855 |
|
|
DX_ASYNC = 0x4, /* no synced write */ |
856 |
|
|
DX_MODIF = 0x8 /* modified */ |
857 |
|
|
}; |
858 |
|
|
/** |
859 |
|
|
record data and index. |
860 |
|
|
While this is accessible standalone, a logical database table |
861 |
|
|
may consist of several Rdx, e.g. per every million records. |
862 |
|
|
*/ |
863 |
|
|
typedef struct Rdx { |
864 |
|
|
file mrd; |
865 |
|
|
FMap mrx; |
866 |
|
|
int flg; |
867 |
|
|
int mid; /* in records == maxid */ |
868 |
|
|
int rdl; /* length of data file in bytes */ |
869 |
|
|
int ptl; /* pointer bytes, by now always 8 */ |
870 |
|
|
int typ; /* type of pointer file */ |
871 |
|
|
} Rdx; |
872 |
|
|
|
873 |
|
|
/** initialise from an already open fd. */ |
874 |
|
|
extern int rInit ( Rdx *rdx ); |
875 |
|
|
/** flush and release any cache. */ |
876 |
|
|
extern void rFini ( Rdx *rdx ); |
877 |
|
|
|
878 |
|
|
/* |
879 |
|
|
read record rid. |
880 |
|
|
if mpos and the rec is found at mpos or higher, |
881 |
|
|
rRead will backtrack to earlier versions. |
882 |
|
|
*/ |
883 |
|
|
extern int rRead ( List *l, Rdx *rdx, int rid, unsigned mpos ); |
884 |
|
|
/* |
885 |
|
|
write a record |
886 |
|
|
use rid 0 to get new rid |
887 |
|
|
use opos -1 if you don't care about old pos |
888 |
|
|
to be transparent here, the rid@pos as found in standard recs |
889 |
|
|
is ignored and should be pre-skipped up to the leader. |
890 |
|
|
specify the record size if known, with 0, rSiz will be used |
891 |
|
|
@return the record id written (> 0) on success, <= 0 on error |
892 |
|
|
*/ |
893 |
|
|
extern int rWrite ( Rdx *rdx, const Fld *r, int rid, int opos, unsigned siz ); |
894 |
|
|
|
895 |
|
|
|
896 |
|
|
/** |
897 |
|
|
query data and index |
898 |
|
|
leaf blocks have a configurable size from 512 bytes to 8K |
899 |
|
|
fork (inner node) blocks have always pagesize, i.e. 4K up to 64K |
900 |
|
|
|
901 |
|
|
While this is accessible standalone, a logical database table |
902 |
|
|
may consist of several Qdx, each holding some range of keys. |
903 |
|
|
*/ |
904 |
|
|
typedef struct Cdx Cdx; |
905 |
|
|
|
906 |
|
|
typedef struct Qdx { /* actually it's a B-Link-Tree */ |
907 |
|
|
file mqd; /* the leaves file */ |
908 |
|
|
FMap mqx; /* the tree file */ |
909 |
|
|
const Cdx *cdx; /* the collation */ |
910 |
|
|
int flg; /* flags: writeable */ |
911 |
|
|
unsigned char typ; /* cfg: leaf block type */ |
912 |
|
|
unsigned char ksz; /* cfg: max key length, default 255 */ |
913 |
|
|
unsigned char ptr; /* cfg: inverted file pointer type or plain value size */ |
914 |
|
|
unsigned char let; /* cfg: pct free on load */ |
915 |
|
|
/* members set automatically: */ |
916 |
|
|
unsigned char vsz; /* value size, min 4, default 8 */ |
917 |
|
|
unsigned char uni; /* value unique length (see qSet) */ |
918 |
|
|
unsigned char ftp; /* fork block type */ |
919 |
|
|
unsigned char dpt; /* depth (level of root over bottom > 0) */ |
920 |
|
|
unsigned lsz; /* leaf block size computed from type */ |
921 |
|
|
unsigned lln; /* # leaf blocks in index */ |
922 |
|
|
unsigned fln; /* # fork blocks in index */ |
923 |
|
|
/* members considered internal: */ |
924 |
|
|
struct QLoad *qld; |
925 |
|
|
} Qdx; |
926 |
|
|
|
927 |
|
|
enum { /* btree block type, size, flg */ |
928 |
|
|
QDX_TYPMSK = 0xC0, /* highest 2 bits: basic type */ |
929 |
|
|
QDX_LEAF = 0x00, /* leaf block, portable */ |
930 |
|
|
QDX_FORKLE = 0x40, /* fork block little endian */ |
931 |
|
|
QDX_FORKBE = 0x80, /* fork block big endian */ |
932 |
|
|
QDX_LEAFPV = 0xC0, /* leaf plain values (forks don't care) */ |
933 |
|
|
/* next 2 bits 0x30 for future extensions */ |
934 |
|
|
QDX_COMPRS = 0x08, /* flag compressed keys (not yet supported) */ |
935 |
|
|
QDX_SIZMSK = 0x07, /* lowest 3 bits: blocksize */ |
936 |
|
|
QDX_LEAF0K = 0x00, /* 1/2K blocks 0x0200 9+0 bits */ |
937 |
|
|
QDX_LEAF1K = 0x01, /* 1K blocks 0x0400 */ |
938 |
|
|
QDX_LEAF2K = 0x02, /* 2K blocks 0x0800 */ |
939 |
|
|
QDX_LEAF4K = 0x03, /* 4K blocks 0x1000 */ |
940 |
|
|
QDX_LEAF8K = 0x04 /* 8K blocks 0x2000, max for leaves */ |
941 |
|
|
}; |
942 |
|
|
enum { |
943 |
|
|
QDX_MAXVALPERLEAF = 0x800, /* max 8K / min 4 bytes vsz */ |
944 |
|
|
QDX_LEAFSH = 9, /* leaf size shift+(0..4) ~ 512 bytes - 8K */ |
945 |
|
|
QDX_FORKSH = 12 /* fork size shift+(0..4) ~ 4K - 64K */ |
946 |
|
|
}; |
947 |
|
|
|
948 |
|
|
typedef struct { |
949 |
|
|
unsigned char len; |
950 |
|
|
unsigned char byt[255]; |
951 |
|
|
} Val; |
952 |
|
|
|
953 |
|
|
typedef struct { |
954 |
|
|
Val val; |
955 |
|
|
unsigned char len; |
956 |
|
|
unsigned char byt[255]; |
957 |
|
|
} Key; |
958 |
|
|
|
959 |
|
|
|
960 |
|
|
typedef struct QLoop QLoop; |
961 |
|
|
/** |
962 |
|
|
callback for index loop. |
963 |
|
|
maybe called multiple times for same key, if it spans blocks. |
964 |
|
|
In this case, flag QSAME is set. |
965 |
|
|
loop stops if QCb returns != 0 |
966 |
|
|
*/ |
967 |
|
|
typedef int QCb ( QLoop *self ); |
968 |
|
|
|
969 |
|
|
struct QLoop { |
970 |
|
|
QCb *qcb; |
971 |
|
|
Qdx *qdx; |
972 |
|
|
int flg; |
973 |
|
|
Key key; |
974 |
|
|
Key to; |
975 |
|
|
/* set on callback: */ |
976 |
|
|
Key cur; |
977 |
|
|
unsigned nvals; |
978 |
|
|
const unsigned char *vals; |
979 |
|
|
}; |
980 |
|
|
|
981 |
|
|
enum { /* flags */ |
982 |
|
|
QLOOP = 0, /* loop endless */ |
983 |
|
|
/* stop based on QLoop.key */ |
984 |
|
|
QEQ = 1, /* loop while == key */ |
985 |
|
|
QPF = 2, /* loop on prefix key */ |
986 |
|
|
/* stop based on QLoop.to */ |
987 |
|
|
QUPTO = 4, /* loop while < to */ |
988 |
|
|
QINCL = 5, /* loop while <= to */ |
989 |
|
|
QSTOP = 7, /* mask for stop mode */ |
990 |
|
|
QSKIP = 8, /* skip the from key */ |
991 |
|
|
QSAME = 0x10 /* callback on same key */ |
992 |
|
|
}; |
993 |
|
|
|
994 |
|
|
|
995 |
|
|
/** initialise from an already open fd. */ |
996 |
|
|
extern int qInit ( Qdx *bt ); |
997 |
|
|
/** flush and release any cache. */ |
998 |
|
|
extern void qFini ( Qdx *bt ); |
999 |
|
|
|
1000 |
|
|
/* |
1001 |
|
|
load a sorted series of keys and hits into index. |
1002 |
|
|
call repeatedly, using a key with val.len 0 in last call |
1003 |
|
|
*/ |
1004 |
|
|
extern int qLoad ( Qdx *bt, Key *key ); |
1005 |
|
|
extern int qLoadf ( Qdx *bt, file *f ); |
1006 |
|
|
|
1007 |
|
|
/* |
1008 |
|
|
write the key-value pair to the index and return 0, unless: |
1009 |
|
|
- the value is zero on the 1st uni bytes |
1010 |
|
|
and there is already such a value (unique key, return 3) |
1011 |
|
|
- there is an all-zero value for the key (stopword, return 2) |
1012 |
|
|
- it is already there (full duplicate found, return 1) |
1013 |
|
|
With plain values, only full duplicates are checked. |
1014 |
|
|
Uni is usually the length of the initial segment of value, |
1015 |
|
|
which is the rid in fulltext mode, else the tag. |
1016 |
|
|
Where a unique key is found, the value is copied to key. |
1017 |
|
|
*/ |
1018 |
|
|
extern int qSet ( Qdx *bt, Key *key ); |
1019 |
|
|
enum { |
1020 |
|
|
QST_OK, |
1021 |
|
|
QST_FOUND, |
1022 |
|
|
QST_STPWRD, |
1023 |
|
|
QST_UNIKEY |
1024 |
|
|
}; |
1025 |
|
|
/* |
1026 |
|
|
delete a key-value pair. return 1 if found, else 0 |
1027 |
|
|
*/ |
1028 |
|
|
extern int qDel ( Qdx *bt, Key *key ); |
1029 |
|
|
|
1030 |
|
|
extern int qLoop ( QLoop *self ); |
1031 |
|
|
|
1032 |
|
|
|
1033 |
|
|
/* |
1034 |
|
|
standard values structured as pointers |
1035 |
|
|
denoting a record and position where key occurred. |
1036 |
|
|
The value structure is up to 3 big endian unsigned numbers: |
1037 |
|
|
0..2 bytes for tag, |
1038 |
|
|
3+(0..3) bytes for rid (in fulltext mode: before the tag) and |
1039 |
|
|
0..4 bytes for pos, |
1040 |
|
|
totalling from 4 (vsz min.) to 12 bytes. |
1041 |
|
|
*/ |
1042 |
|
|
typedef struct { /* where a key has a hit */ |
1043 |
|
|
unsigned short tag; /* while neg. tags are allowed, sorting is unsigned */ |
1044 |
|
|
unsigned short ext; /* extend row id to six bytes (used as db number) */ |
1045 |
|
|
unsigned rid; /* row id */ |
1046 |
|
|
unsigned pos; /* word pos, usually with field occ<<16 */ |
1047 |
|
|
} Ptr; |
1048 |
|
|
|
1049 |
|
|
enum { |
1050 |
|
|
QDX_TAGMSK = 0xC0, /* mask length of tag 0..2 */ |
1051 |
|
|
QDX_TAG1 = 0x40, |
1052 |
|
|
QDX_TAG2 = 0x80, |
1053 |
|
|
QDX_TAGSH = 6, /* ... shifted by 6 */ |
1054 |
|
|
QDX_RIDMSK = 0x30, /* mask length of rid 0..3 */ |
1055 |
|
|
QDX_RIDMIN = 3, /* based on 3 */ |
1056 |
|
|
QDX_RIDSH = 4, /* ... shifted by 4 */ |
1057 |
|
|
QDX_RID3 = 0x00, |
1058 |
|
|
QDX_RID4 = 0x10, |
1059 |
|
|
QDX_RID5 = 0x20, |
1060 |
|
|
QDX_RID6 = 0x30, |
1061 |
|
|
QDX_FULTXT = 0x08, /* traditional fulltext ordering */ |
1062 |
|
|
QDX_POSMSK = 0x07, /* mask length pos info 0..4 */ |
1063 |
|
|
/* default settings based on 3 byte rid */ |
1064 |
|
|
QDX_ISIS = QDX_RID3|QDX_TAG2|QDX_FULTXT|3, |
1065 |
|
|
/* 0x8B, 8 byte fulltext 3+2+3 IFP format */ |
1066 |
|
|
QDX_STDDB = QDX_TAG1|QDX_RID3 |
1067 |
|
|
/* 0x40, 4 byte 3 rid + 1 for 256 field tags */ |
1068 |
|
|
}; |
1069 |
|
|
|
1070 |
|
|
/* decode a value to a pointer */ |
1071 |
|
|
extern void qRdVal ( Ptr *ptr, const unsigned char *val, unsigned char typ ); |
1072 |
|
|
/* create a value from hit. */ |
1073 |
|
|
extern void qMkVal ( Val *val, Ptr *ptr, unsigned char typ ); |
1074 |
|
|
/* decode a key to plaintext */ |
1075 |
|
|
extern int qRdKey ( Qdx *qdx, char *plain, int l, Key *key ); |
1076 |
|
|
/* create a key from plaintext, truncating to ksz. */ |
1077 |
|
|
extern void qMkKey ( Qdx *qdx, Key *key, char *b, int l ); |
1078 |
|
|
/* create a key and value from line. */ |
1079 |
|
|
extern int qMkKeyVal ( Qdx *qdx, Key *key, char *b, int l ); |
1080 |
|
|
|
1081 |
|
|
|
1082 |
|
|
typedef struct { |
1083 |
|
|
Qdx *qdx; |
1084 |
|
|
int del; |
1085 |
|
|
Key pfx; |
1086 |
|
|
Ptr ptr; |
1087 |
|
|
} QSet; |
1088 |
|
|
/* |
1089 |
|
|
set or del one keys |
1090 |
|
|
basically this behaves like qMkKey, qMkVal, qSet |
1091 |
|
|
ptr.pos is incremented |
1092 |
|
|
with pfx, pfx is prepended to the key (before qMkKey) |
1093 |
|
|
*/ |
1094 |
|
|
extern int qSetKeyVal (QSet *qst, char *val, int len); |
1095 |
|
|
/* |
1096 |
|
|
split the value into words and qSetKeyVal each |
1097 |
|
|
return the number of entries made |
1098 |
|
|
*/ |
1099 |
|
|
extern int qSetKeyVals (QSet *qst, char *val, int len); |
1100 |
|
|
|
1101 |
|
|
|
1102 |
|
|
/** |
1103 |
|
|
collation |
1104 |
|
|
*/ |
1105 |
|
|
#define CDX_MAXSEQ 15 /* max byte sequence length */ |
1106 |
|
|
|
1107 |
|
|
|
1108 |
|
|
extern const Cdx *cOpen ( const Fld *src ); |
1109 |
|
|
extern int cEnc ( const Cdx *cdx, Key *key, unsigned char *b, int l, int w ); |
1110 |
|
|
extern int cDec ( const Cdx *cdx, unsigned char *b, int l, Key *key ); |
1111 |
|
|
|
1112 |
|
|
|
1113 |
|
|
enum { /* see Metadata.txt */ |
1114 |
|
|
MET_OPT = 001, |
1115 |
|
|
MET_UNU = 002, |
1116 |
|
|
MET_CTP = 003, |
1117 |
|
|
MET_COL = 004, |
1118 |
|
|
MET_VER = 005, |
1119 |
|
|
MET_FLD = 006 |
1120 |
|
|
}; |
1121 |
|
|
|
1122 |
|
|
/** |
1123 |
|
|
finally, a real database |
1124 |
|
|
*/ |
1125 |
|
|
typedef struct Db { |
1126 |
|
|
char *nam; |
1127 |
|
|
char *pat; |
1128 |
|
|
int flg; |
1129 |
|
|
int mnt; /* mount count */ |
1130 |
|
|
struct Db *nxt; /* linked list */ |
1131 |
|
|
const Fld *opt; /* inner meta data */ |
1132 |
|
|
Rdx rdx; |
1133 |
|
|
Qdx qdx; |
1134 |
|
|
} Db; |
1135 |
|
|
|
1136 |
|
|
extern Db *dOpen (const char *dbname); |
1137 |
|
|
extern void dClose (Db *db); |
1138 |
|
|
extern void dCloseAll (); |
1139 |
|
|
|
1140 |
|
|
/** |
1141 |
|
|
access to record and query data via the db. |
1142 |
|
|
In future versions these might become real functions taking |
1143 |
|
|
care of multifile databases. |
1144 |
|
|
*/ |
1145 |
|
|
#define dRead(l,db,rid) ((rid)?rRead(l,&(db)->rdx,rid,0):lCpy(l,(db)->opt,0)) |
1146 |
|
|
#define dWrite(db,r,rid) rWrite(&(db)->rdx,r,rid,-1,0) |
1147 |
|
|
|
1148 |
|
|
#define dSet(db,key) qSet(&(db)->qdx,key) |
1149 |
|
|
#define dDel(db,key) qDel(&(db)->qdx,key) |
1150 |
|
|
#define dLoop(db,ql) ((ql)->qdx=&(db)->qdx, qLoop(ql)) |
1151 |
|
|
|
1152 |
|
|
#define CORE_H |
1153 |
|
|
#endif /* CORE_H */ |