/[webpac]/openisis/0.9.9e/core/core.h
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /openisis/0.9.9e/core/core.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (hide annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 4 months ago) by dpavlin
File MIME type: text/plain
File size: 36351 byte(s)
import of new openisis release, 0.9.9e

1 dpavlin 604 /*
2     The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3     Version 0.9.x (patchlevel see file Version)
4     Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5    
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Lesser General Public
8     License as published by the Free Software Foundation; either
9     version 2.1 of the License, or (at your option) any later version.
10    
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14     See the GNU Lesser General Public License for more details.
15    
16     You should have received a copy of the GNU Lesser General Public
17     License along with this library; if not, write to the Free Software
18     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19    
20     see README for more information
21     EOH */
22     #ifndef CORE_H
23    
24     #include <assert.h>
25     #include <string.h> /* various str* and mem* */
26    
27     /*
28     $Id: core.h,v 1.43 2004/11/11 15:47:08 kripke Exp $
29     full interface of the Malete core
30     */
31    
32     #define CORE_VERSION "0.9.9"
33    
34    
35    
36     /* ****************************************************************** */
37     /* */
38     /* COMPILER FEATURES */
39     /* */
40     /* ****************************************************************** */
41    
42     #ifndef __STDC__ /* defined by ISO C */
43     #error "need ANSI/ISO C compiler"
44     #endif
45     /* "cpp -dM </dev/null" lists nonstandard machine and OS macros */
46    
47     /* optimizing */
48     #ifdef __GNUC__
49     # define OPT_INLINE __inline__ /* OPT_INLINE type func () */
50     #else
51     # define OPT_INLINE
52     #endif
53     #if defined( __i386__ ) && defined( __GNUC__ )
54     /* called function pops args; makes most functions slightly faster */
55     /* type OPT_STDCALL func () on both declaration and definition */
56     # define OPT_STDCALL __attribute__((stdcall))
57     #else
58     # define OPT_STDCALL
59     #endif
60     #if defined( __i386__ ) && defined( __GNUC__ ) && defined(NDEBUG)
61     /* more aggressive: up to 3 args in registers, fails with -pg */
62     /* type OPT_REGPARM func () in declaration is sufficient */
63     /* also using in definition allows fallback to stdcall */
64     # define OPT_REGPARM __attribute__((regparm(3)))
65     #else
66     # define OPT_REGPARM OPT_STDCALL
67     #endif
68    
69     /* CPU features */
70     #if defined( __sparc__ ) || defined( __ppc__ )
71     # define CPU_BIG_ENDIAN
72     #endif
73     #if defined( __sparc__ )
74     # define CPU_NEED_ALIGN
75     #endif
76     /* max bits of PAGE_SIZE; min is always 12 */
77     /* grep -r 'fine PAGE_SHIFT' /usr/src/linux/include/asm-* : 12..16 */
78     #if defined( __ia64__ )
79     # define CPU_PAGE_SHIFT 16
80     #elif defined( __arm__ )
81     # define CPU_PAGE_SHIFT 15
82     #elif defined( __i386__ ) || defined( __ppc__ )
83     # define CPU_PAGE_SHIFT 12 /* there are more not exceeding 12 bits */
84     #else /* however 13 bits is not too much waste */
85     # define CPU_PAGE_SHIFT 13 /* sparc, sparc64, alpha, m68k, cris */
86     #endif
87    
88     /* 64 bit integer */
89     #ifdef __GNUC__
90     # define LOLO_BUILTIN long long
91     # define LOLO( v ) v##LL
92     # define LULU( v ) v##ULL
93     #elif defined( _MSC_VER ) || defined( __BORLANDC__ )
94     # define LOLO_BUILTIN __int64
95     # define LOLO( v ) v /* is there some special suffix ??? */
96     # define LULU( v ) v
97     #elif 0xFFFFFFFFL != ~0L /* 64 bit compiler ? */
98     # define LOLO_BUILTIN long
99     # define LOLO( v ) v##L
100     # define LULU( v ) v##UL
101     #else
102     # error "please use a compiler providing a 64 bit integer type. thanks."
103     #endif
104     typedef LOLO_BUILTIN lolo;
105     typedef unsigned LOLO_BUILTIN lulu;
106    
107    
108    
109     /* ****************************************************************** */
110     /* */
111     /* BUILD FEATURES */
112     /* */
113     /* ****************************************************************** */
114    
115     /* support the ENV_SHARED mode */
116     #if defined(BUILD_SHMODE) && defined(WIN32)
117     # undef BUILD_SHMODE
118     #endif
119    
120    
121    
122     /* ****************************************************************** */
123     /* */
124     /* DATA STRUCTURES AND UTILITIES */
125     /* */
126     /* ****************************************************************** */
127    
128     /* some characters */
129     #define TAB 9 /* horizontal, that is */
130     #define LF 10 /* LineFeed a.k.a. newline - '\n' isn't really well defined */
131     #define VT 11 /* vertical, used as newline replacement */
132     #define CR 13 /* for windoze, telnet and the like */
133    
134     /** similar to atoi/strtol, but
135     - string needs not be 0 terminated (unless l < 0)
136     - cares for hex 0x, but not octal 0.
137     */
138     extern int a2i ( const char *p, int l );
139     /** similar to a2i but
140     - return number of parsed characters
141     - put result in *res
142     */
143     extern int a2il ( const char *p, int l, int *res );
144     /** similar to a2i but
145     - return dflt if less than l characters of p has been successfully parsed
146     */
147     extern int a2id ( const char *p, int l, int dflt );
148     /** print int (NOT 0 terminated !).
149     p must have 11 bytes space
150     @return number of chars (up to 10 digits + minus sign)
151     */
152     extern int i2a ( char *p, int i );
153     extern int u2a ( char *p, unsigned u );
154     /* print exactly n digits, do not add 0 byte */
155     extern void u2a0 ( char *p, unsigned u, unsigned n );
156    
157     extern const char b36dig[36]; /* '0'..'9', 'a'..'z' */
158     /* 0..35 '0'..'9', 'a'..'z' and 'A'..'Z', 36 else */
159     extern const char b36val[256];
160    
161     extern const unsigned char lat1up[256]; /* latin 1 uppercase */
162     extern const unsigned char lat1ct[256]; /* latin 1 ctype */
163     enum { /* character type bits */
164     CT_WHITE = 0x01, /* all <= ' ' */
165     CT_DIGIT = 0x02, /* 0..9 */
166     CT_ALPHA = 0x04, /* 'A'..'Z','a'..'z' */
167     CT_IDENT = 0x08, /* digits,alphas,underscore */
168     CT_SPECL = 0x10, /* all other ASCIIs */
169     CT_LATIN = 0x20 /* ident + non ASCII Latin1 alphas */
170     };
171     enum { /* character type values */
172     CT_W = CT_WHITE,
173     CT_D = CT_DIGIT|CT_IDENT|CT_LATIN,
174     CT_A = CT_ALPHA|CT_IDENT|CT_LATIN,
175     CT_I = CT_IDENT|CT_LATIN,
176     CT_S = CT_SPECL,
177     CT_L = CT_LATIN,
178     CT_N = 0 /* other C1 control or symbol */
179     };
180     #define CT_IS(type, c) (CT_##type == lat1ct[(unsigned char)(c)])
181     #define CT_HAS(flg, c) (CT_##flg & lat1ct[(unsigned char)(c)])
182    
183    
184     /** replace from by to in bytes */
185     extern void mTr ( char *bytes, char from, char to, int len );
186    
187    
188     typedef struct Fld {
189     int tag;
190     unsigned len; /* must use at most 31 bits, highest is temporarily abused */
191     char *val; /* not necessarily 0 terminated */
192     } Fld;
193    
194     /*
195     v (value) functions on single field
196     */
197     #define V2I( f ) a2i( (f)->val, (f)->len )
198    
199     /* field has primary value key length l */
200     #define VKEY( f, k, l ) ( \
201     (f)->len >= (l) && !memcmp( (f)->val, k, l ) \
202     && ((f)->len == (l) || TAB == (f)->val[(l)]) \
203     )
204    
205     /* dup a field as 0-terminated string */
206     #define VDUPZ( f ) ((char*)mDupz((f)->val, (f)->len))
207    
208     /*
209     snip tab-separated subfields from value,
210     setting tag to the subfield identifier and len/val to the contents.
211     if opt is not 0, it lists the options wanted (* for the primary).
212     dst->val should be 0-initialized;
213     if it's greater than src->val, search starts at dst->val+dst->len,
214     @return 0 if not found, else 1+dst->len
215     */
216     extern int vGet ( Fld *dst, const Fld *src, const char *opt );
217     /*
218     access to the primary value never fails, if initialized, so return len
219     */
220     #define VPRI( dst, src ) ((dst)->val = 0, vGet(dst, src, 0), (dst)->len)
221     #define VEQZ( f, str ) (!strncmp((f)->val,str,(f)->len) && !str[(f)->len])
222    
223     /**
224     undo the encoding of lBin (see below).
225     return #bytes int dst, which is <= src->len
226     extern int vDecod ( char *dst, const Fld *src );
227     */
228    
229     /* sign(a - b) */
230     extern int vCmp ( const Fld *a, const Fld *b );
231     /* a > b */
232     extern int vGt ( const Fld *a, const Fld *b );
233    
234     /* comparision function type for rSort */
235     typedef int VGt ( const Fld *a, const Fld *b );
236    
237    
238     /*
239     r (record) functions on an array of fields
240     they expect the tag of the first field to be the negative number of fields.
241     The value of the first field ("header") may contain various meta info.
242     For lists representing database "records",
243     the canonical format is [no[@pos]][<TAB>leader].
244     Leader can be used e.g. to hold a Z39.2 leader as used by MARC.
245     Other (protocol) lists should start with some type identifier.
246    
247     it's a matter of taste whether you like and use this typedef.
248     as it's not going to be changed but only here to clarify the intend,
249     the core functions do not use it.
250     */
251     typedef const Fld *Rec;
252    
253     #define RLEN( r ) (-(r)->tag)
254     #define REND( r ) ((r)-(r)->tag)
255     #define RLAST( r ) (REND(r)-1)
256    
257     extern unsigned rSiz ( const Fld *r );
258    
259     /**
260     get occurence of field.
261     @param pos if given, the first occ starting at pos is searched
262     and pos is set to one after the found position (or after end).
263     may be used to loop all on an int var initialized to 0.
264     @return the field or 0
265     */
266     extern const Fld *rGet ( const Fld *r, int tag, int *pos );
267    
268     /*
269     get field with given tag and primary value or empty pv
270     */
271     extern const Fld *rKey ( const Fld *r, int tag, const char *key );
272    
273     /*
274     create a new const rec in a contigous mAlloced peace of mem
275     if siz is 0, rSiz() is used
276     */
277     extern const Fld *rDup ( const Fld *src, unsigned siz );
278    
279    
280     /** flatten (serialize) record
281     to tag\tval lines ended by a blank line.
282     buf must be of size rSiz(rec)
283     + 13*RLEN(rec) for sign+10digits+tab+nl
284     + 1 for the blank line
285     @return # of bytes written
286     */
287     extern int rSer ( char *buf, const Fld *rec );
288    
289     /** sort fields
290     WARNING: this cuts fields longer than 2GB !!!
291     NOTE that rSort, unlike qsort, does NOT use a 3-way cmp function,
292     but a boolean gt function
293     */
294     extern void rSort ( Fld *rec, VGt *gt );
295     extern void rSortTag ( Fld *rec );
296     extern void rSortVal ( Fld *rec ); /* == rSort(vGt) */
297    
298    
299     #define DEFBLKLEN 8000 /* default buffer block length */
300     #define DEFFIELDS 40 /* default number of fields */
301    
302     typedef struct LBlk { /* chained buffer block */
303     struct LBlk *nxt;
304     unsigned siz;
305     char byt[DEFBLKLEN]; /* actual len may vary */
306     } LBlk;
307    
308    
309     /*
310     A full-fledged, modifiable list.
311     */
312     typedef struct List {
313     Fld *fld; /* fields list, initially fl0 */
314     unsigned fav; /* fields available at end of *fld buffer */
315     int siz; /* used secondary buffer size minus holes (add buf-blk.byt) */
316     char *buf; /* pointing into blk->byt */
317     char *end; /* of blk->byt */
318     LBlk *blk; /* buffer chain, initially bl0 */
319     char *bok; /* if == buf, buffers contain the serialization */
320     Fld fl0[DEFFIELDS];
321     LBlk bl0;
322     } List;
323    
324    
325     #define LLEN( l ) RLEN((l)->fld)
326     #define LEND( l ) REND((l)->fld)
327     #define LLAST( l ) RLAST((l)->fld)
328    
329    
330     /*
331     initialize list and set header.
332     if fmt is 0, no header is printed, and the list left empty.
333     (i.e. the first field added will become the header).
334     @return l
335     */
336     extern List *lInit ( List *l, const char *fmt, ... );
337     /*
338     completely clear all fields and buffers.
339     */
340     extern List *lClr ( List *l );
341     /*
342     clear all fields and buffers, but keep the header.
343     */
344     extern List *lReset ( List *l );
345     /*
346     clear all fields and buffers, keep nothing
347     */
348     extern void OPT_STDCALL lFini ( List *l );
349    
350     /*
351     increase available space.
352     If buffer has a flusher, call it.
353     fields -1 means we want to append (keep last field contigous, do NOT flush).
354     fields >0 means reserve at least mode fields.
355     */
356     extern int OPT_REGPARM lExtend ( List *l, unsigned need, int fields );
357    
358    
359    
360     /* bytes available at buf */
361     #define LAVL( l ) ((unsigned)((l)->end - (l)->buf))
362     #define LSIZ( l ) ((unsigned)((l)->buf - (l)->blk->byt + (l)->siz))
363    
364     /*
365     fragment used to start a new field with tag t.
366     If the list was empty, the field will become the header,
367     ignoring the tag.
368     */
369     #define LDEFNEWF( l, t ) \
370     int __i = (l)->fld->tag; \
371     Fld *__f = (l)->fld - __i--; \
372     assert(0 > __i); \
373     __f->tag = t; \
374     (l)->fld->tag = __i;
375    
376     /* add field tag t reserving n bytes space */
377     #define LNEWF( l, t, n ) do { \
378     if ( ((l)->fav && LAVL(l) >= n) || lExtend( l, n, 1 ) ) { \
379     LDEFNEWF( l, t ) \
380     __f->val = (l)->buf; \
381     __f->len = 0; \
382     (l)->fav --; \
383     } } while(0)
384    
385     /* add field tag t using n bytes prefilled space
386     must be preextended for one field
387     */
388     #define LPREF( l, t, n ) do { \
389     if ( (l)->fav ) { \
390     LDEFNEWF( l, t ) \
391     __f->val = (l)->buf; \
392     __f->len = n; \
393     (l)->fav --; \
394     (l)->buf += n; \
395     } } while(0)
396    
397    
398     /* add field tag t with value v n bytes long */
399     #define LADD( l, t, v, n ) do { \
400     if ( ((l)->fav && LAVL(l) >= n) || lExtend( l, n, 1 ) ) { \
401     LDEFNEWF( l, t ) \
402     memcpy( __f->val = (l)->buf, v, __f->len = n ); \
403     (l)->fav --; \
404     (l)->buf += n; \
405     } } while(0)
406    
407     /* append value v n bytes long */
408     #define LAPP( l, v, n ) do { \
409     if ( LAVL(l) >= n || lExtend( l, n, -1 ) ) { \
410     Fld *__f = LLAST(l); \
411     memcpy( (l)->buf, v, n ); \
412     __f->len += n; \
413     (l)->buf += n; \
414     } } while(0)
415    
416     /* add field tag t with int value i */
417     #define LADDI( l, t, i ) do { \
418     if ( ((l)->fav && LAVL(l) >= 12) || lExtend( l, 12, 1 ) ) { \
419     LDEFNEWF( l, t ) \
420     (l)->buf += __f->len = i2a( __f->val = (l)->buf, i ); \
421     (l)->fav --; \
422     } } while(0)
423    
424     #define LADDS( l, t, s ) LADD( l, t, s, strlen(s) )
425     #define LAPPS( l, s ) LAPP( l, s, strlen(s) )
426     #define LADDF( l, f ) LADD( l, f->tag, f->val, f->len )
427    
428    
429     /**
430     add a field with arbitrary binary data.
431     The encoded field value will not contain newlines.
432     The encoding maps a VT to two bytes VT 0,
433     and a LF to VT 1, if the LF is followed by a 0 or 1, a single VT else.
434     extern void lBin ( List *l, int tag, const char *bytes, int len );
435     */
436    
437    
438     /*
439     append canonical options fields.
440     args starting with '-' are treated as options and appended as
441     tab separated subfield w/o the '-'.
442     If the first arg does not start with '-', it is used as primary value.
443     Other non-options are appended as subfields indicated by '@'.
444     In order to skip the program's name, from main() use with argc-1,argv+1.
445     */
446     extern int lArgv ( List *l, int tag, int argc, const char **argv );
447     /*
448     same using 0-terminated list.
449     extern int lArgs ( List *l, int tag, const char *arg, ... );
450     */
451     /**
452     A variation on this theme as used for env.var:
453     an initial non-option as command and following options go to the header.
454     every following non-option starts a new field.
455     */
456     extern List *lVar ( List *l, int argc, const char **argv );
457    
458     /* if siz is 0, rSiz() is used */
459     extern int lCpy ( List *l, const Fld *src, unsigned siz );
460     #define LCPY( l, src ) lCpy( l, (src)->fld, LSIZ(src) )
461    
462     /* create canned version of list as new record */
463     #define LCAN( l ) rDup( (l)->fld, LSIZ(l) )
464    
465    
466     /*
467     Print a field to list.
468     a fmt of 0 indicates varargs.
469     */
470     extern int lOut ( List *to, int tag, const char *fmt, ... );
471    
472    
473     /*
474     set the first field with tag to val (of len, -1 for strlen)
475     kill additional fields with tag
476     if no field found, add one at end
477     extern int lSet ( List *l, int tag, const char *val, unsigned len );
478     */
479    
480    
481     enum { /* list parse state */
482     LPS_SOR, /* 0, at start of record */
483     LPS_SOL = 0x1000000, /* at start of line */
484     LPS_TAG = 0x2000000, /* in tag (only digits seen in line) */
485     LPS_VAL = 0x3000000, /* in val (some non-digit seen) */
486     LPS_NEG = 0x4000000, /* found '-' at start of line */
487     LPS_CR = 0x8000000, /* kill carriage return character */
488     LPS_LEN = 0x0ffffff
489     };
490     /**
491     add serialized "text" to rec
492     lps is the buffer length (of up to 16MB) + state bits
493     @return new state, if buffer was exhausted,
494     or #remaining bytes (|LPS_SOR), if a blank line was seen
495     */
496     extern int lParse (List *l, const char *txt, int lps);
497    
498    
499     /**
500     record sink.
501     The sink function may be called with eor 0 after adding one or several fields
502     to optionally flush partial records.
503     It must be called with eor when a record is complete.
504     In that case it must prepare List to receive a new header.
505     This typically is done by flushing and resetting list,
506     however, it would be sufficient to prepare for a new embedded
507     subrecord by recording the position of the next head.
508     */
509     typedef struct Sink Sink;
510     typedef void sink (Sink *that, int eor); /* clean all or part of buffer(s) */
511    
512     struct Sink {
513     List lst;
514     sink *snk; /* sinking function */
515     void *dst; /* destination */
516     int off; /* start of current record in list, negative after partial flush */
517     };
518    
519     #define SINK(s) (s)->snk(s, 0)
520     #define SEOR(s) (s)->snk(s, 1)
521     #define SCPY(s, l) do { LCPY(&(s)->lst, l); SEOR(s); } while (0)
522     #define SCPYR(s, r) do { lCpy(&(s)->lst, r, 0); SEOR(s); } while (0)
523    
524     /* ****************************************************************** */
525     /* */
526     /* ENVIRONMENT AND SYSTEM */
527     /* */
528     /* ****************************************************************** */
529    
530     #ifndef WIN32
531     typedef int file;
532     #define FIL_NONE -1
533     #else
534     typedef void *file;
535     #define FIL_NONE 0
536     #endif
537    
538     /* environment.
539     */
540     typedef struct Env {
541     List *opt; /* options (header is packed command line) */
542     file in;
543     Sink *out; /* a proper record sink expecting complete records */
544     Sink *err; /* a usually field buffered sink for eOut messages */
545     int log; /* level */
546     int flg;
547     int wri; /* write mode none/excl/shared */
548     int sig; /* interrupted by signal */
549     unsigned psz; /* system page size = 1<<psh enforced min 4K */
550     int psh; /* page shift (bits) 12..CPU_PAGE_SHIFT */
551     unsigned rml; /* r memory map limit (max pages per pointer map) */
552     unsigned qml; /* q memory map limit (max pages per tree map) */
553     /* LBlk *blk[5]; for memory management */
554     } Env;
555    
556     enum { /* env flags */
557     ENV_BUFFER = 1, /* env.err is buffered */
558     ENV_MSYNC = 2 /* use msync */
559     };
560     enum { /* env writing mode */
561     ENV_RO, /* readonly - not writing */
562     ENV_EXCL /* exclusive access (the default) */
563     #ifdef BUILD_SHMODE
564     , ENV_SHARED /* shared access */
565     #endif
566     };
567     enum { /* signal code */
568     ENV_CANCEL = 1, /* abort current request */
569     ENV_TERM /* abort current request and exit */
570     };
571    
572     extern Env env; /* main environment */
573    
574     /**
575     error and loglevel codes
576     */
577     enum { /* our very own errno */
578     ERR_OK = 0, /* 0 is no error, also read(2)'s EINTR, EAGAIN */
579     ERR_NO = -1, /* get details from errno */
580     /* error level FATAL: we are wrong */
581     ERR_IDIOT = -2, /* caught programming error */
582     ERR_TRASH = -3, /* database internal consistency */
583     LOG_FATAL = ERR_TRASH, /* fatal internal errors: we can't go on */
584     /* error levels SYERR,ERROR,IOERR: system or user was wrong */
585     ERR_NOMEM = -4, /* out of memory, also open(2)'s EMFILE, ENFILE */
586     ERR_IO = -5, /* real IO error, also write(2)'s ENOSPC, EPIPE */
587     ERR_BUSY = -6, /* object is busy */
588     LOG_SYSERR = ERR_BUSY, /* problem with system ressources: bad file, no mem */
589     ERR_BADF = -7, /* bad file, also read(2)'s EINVAL, some of open(2) */
590     ERR_FAULT = -8, /* 0 pointer or bad sized buffer given */
591     ERR_INVAL = -9, /* general invalid parameters, any EINVAL errno */
592     LOG_ERROR = ERR_INVAL, /* unusable input, database or query */
593     ERR_AGAIN = -10, /* no data at intr or nonblock */
594     LOG_IOERR = ERR_AGAIN, /* problem on IO */
595     LOG_WARN = -11, /* bad input */
596     /* logging levels: nothing was wrong */
597     LOG_INFO = -12, /* some major event like opening a db */
598     LOG_VERBOSE = -13, /* any event like reading a record */
599     LOG_DEBUG = -14, /* lots of processing details (debug built only) */
600     LOG_TRACE = -15, /* database content (log_str) */
601     LOG_ALL = LOG_TRACE
602     };
603    
604    
605     #define EADD(t, v, n) LADD(&env.out->lst, t, v, n)
606     #define EAPP(v, n) LAPP(&env.out->lst, v, n)
607     #define EADDS(t, s) LADDS(&env.out->lst, t, s)
608     #define EAPPS(s) LAPPS(&env.out->lst, s)
609     #define EADDI(t, i) LADDI(&env.out->lst, t, i)
610     #define EADDF(f) LADDF(&env.out->lst, f)
611    
612     /*
613     Append a field with abs(tag) to env.out for non-negative tags,
614     else to env.err if tag >= env's loglevel.
615     For system errors, additional info is appended.
616     env.err's eof is called, if any.
617     returns tag
618    
619     supports only a small subset of printf formats -- see the src
620     has %b (bytes), which is like %.*s, but prints the string in hex
621     */
622     extern int eOut ( int tag, const char *fmt, ... );
623     extern int eRr ( int tag, const char *fmt, ... );
624     #ifndef NDEBUG
625     # define LOG_DBG eRr
626     #elif defined( __GNUC__ )
627     # define LOG_DBG( args... )
628     #else
629     # define LOG_DBG (void) /* compiler should dispose statement off */
630     #endif
631    
632     /**
633     Init the env.
634     The first field specifies general command and options.
635     Following fields describe databases.
636     If no sinks are provided, file sinks on stdout and stderr are used.
637     */
638     extern void cInit ( List *args, Sink *out, Sink *err );
639     /* typical usage from main() */
640     #define CINIT(argl) cInit(lVar(lInit(argl,0), argc-1, argv+1), 0, 0)
641    
642    
643     /**
644     memory management.
645     All memory will be initialized,
646     and all allocs but TryAlloc exit the process when out of memory.
647     DO NOT MIX with other alloc/free routines.
648     */
649     extern void *mAlloc ( int size );
650     extern void *mDup ( const void *str, int size );
651     /* cp siz bytes, append a 0 byte */
652     extern char *mDupz ( const char *str, int size );
653     extern void *mTryAlloc ( int size );
654     extern void mFree ( void *mem );
655     extern Fld *mFldAlloc ( int nfields );
656     extern void mFldFree ( Fld *fld );
657     extern LBlk *mBlkAlloc ( int size );
658     extern void mBlkFree ( LBlk *blk );
659     extern List *mListAlloc ( const char *name ); /* lInitialized with name */
660     extern void mListFree ( List *l );
661     #define mFldAlloc(n) ((Fld*)mAlloc((n)*sizeof(Fld)))
662     #define mFldFree mFree
663     #define mBlkFree mFree
664     #define MFREE(p) do { mFree(p); (p)=0; } while(0)
665    
666    
667     /**
668     set tm to current time, return difference in millis
669     */
670     extern int tUpd ( lolo *tm );
671     /**
672     print generalized time yyyyMMddHHmmss + 0 byte to buffer
673     if tm is 0, current time is used
674     if *tm is 0, *tm is updated
675     buffer must have 15 bytes
676     return millis
677     */
678     extern int tGtf ( char *buf, lolo *tm );
679     /**
680     grok the fine manual.
681     like tGtf, but with additional 3 digits millis
682     return buffer, which must have 18 bytes
683     */
684     extern char *tGtfm ( char *buf, lolo *tm );
685     /**
686     nanosl
687     */
688     extern void tSleep ( lolo tm );
689    
690    
691     /* ************************************************************
692     disk files (block devices)
693     */
694    
695    
696     enum {
697     /* basic open flags */
698     FIL_RD = 0x001, /* shall be opened for input */
699     FIL_WR = 0x002, /* shall be opened for output */
700     FIL_RDWR = 0x003, /* shall be opened for both */
701     FIL_TRY = 0x004, /* do not complain if open fails */
702     /* write flags */
703     FIL_CREAT = 0x010, /* shall be created */
704     FIL_TRUNC = 0x020, /* shall be truncated */
705     FIL_SYNC = 0x040, /* syncing output */
706     /* lock flags */
707     FIL_TLOCK = 0x100, /* try locking (EX with WR) */
708     FIL_BLOCK = 0x200, /* blocking lock (EX with WR) */
709     FIL_FLOCK = 0x300 /* any locking is set */
710     };
711    
712     /** open a new fid based on name and flags.
713     TLOCK can be specified on any plattform,
714     translating to a fcntl full file lock on *nix and a share mode on win
715     @return 0 or some error code
716     */
717     extern int fOpen ( file *f, const char *name, int flags );
718     extern int fClose ( file *f );
719     extern int fSize ( file f );
720     extern unsigned fTime ( file f ); /* mtime sec */
721    
722    
723     /*
724     Like the syscalls, this returns the number of bytes on success, 0 on eof.
725     fPwrite repeats and does not return an error when interrupted.
726     On error, a negative value is returned.
727     */
728     extern int fRead ( file *f, void *buf, unsigned len );
729     extern int fWrite ( file *f, const void *buf, unsigned len );
730     extern int fPread ( file *f, void *buf, unsigned len, unsigned off );
731     extern int fPwrite ( file *f, const void *buf, unsigned len, unsigned off );
732     extern int fSeek ( file *f, unsigned off );
733     extern int fTrunc ( file *f, unsigned length );
734     #ifdef BUILD_SHMODE
735     /* remove a full file lock as set by fOpen */
736     extern void fUnlock ( file f );
737     /*
738     lock byte n
739     use TLOCK or BLOCK, possibly with WR, to lock, 0 to unlock.
740     */
741     extern int fLock ( file f, unsigned n, int flg );
742     #define FLOCK(f,n,flg) (ENV_SHARED==env.wri && fLock(f,n,flg))
743     #else
744     #define FLOCK(f,n,flg) 0
745     #endif
746     #define FLOCKSH(f,n) FLOCK(f,n,FIL_BLOCK)
747     #define FLOCKEX(f,n) FLOCK(f,n,FIL_BLOCK|FIL_WR)
748     #define FLOCKUN(f,n) FLOCK(f,n,0)
749    
750     /** slurp in a whole file at once.
751     @param buf points to buffer of size sz.
752     lio_slurp will allocate one, if *buf is NULL
753     @param sz maximum number of bytes to read
754     @param name of file to slurp
755     @param opt if != 0, do not complain on failure
756     @return number of bytes read or negative on error
757     */
758     extern int fSlurp ( char **buf, int sz, const char *name, int opt );
759    
760     /* record-oriented sink to (file)dst.
761     Expects the list to contain a proper record.
762     If the header starts with a digit, a W<TAB> is prepended.
763     An empty header is ommited.
764     */
765     extern void fSinkr (Sink *that, int eor);
766     /* line-oriented sink (field values only) to (file)dst.
767     leaves the header alone, ignores tags and blank values.
768     yet prints a blank line on eor.
769     */
770     extern void fSinkl (Sink *that, int eor);
771    
772     /** a potentially mapped file */
773     typedef struct {
774     file fil;
775     int flg;
776     char *map;
777     unsigned npg; /* in pages of env.psz */
778     unsigned lim; /* max pages to map */
779     #ifdef WIN32
780     char *nam; /* for shared mapping */
781     void *hdl; /* "mapping object" */
782     #endif
783     } FMap;
784    
785     /**
786     open a file to be mapped.
787     Like fOpen, but saves flags for later reference.
788     */
789     extern int fMOpen ( FMap *fm, const char *name, int flags );
790     extern int fMClose ( FMap *fm );
791     /**
792     map, remap or unmap a memory mapping
793    
794     @param fm the filemap
795     members fil and flg must be set as of fOpen.
796     if fm was mapped, the existing mapping is unmapped (or remapped)
797     @param npg number of pages to map; 0 for no new mapping
798     @return mapped length; if <= 0, fm->map is set to 0, else to memory region
799     */
800     extern int fMap ( FMap *fm, unsigned npg );
801     /** sync a mapped page */
802     extern int fMSync ( FMap *fm, unsigned page );
803    
804    
805     /*
806     file input buffer structure for fGets.
807     suitable for both temp fix buffers and, with some care, for List buffers.
808     */
809     typedef struct {
810     file f; /* file to read from */
811     unsigned n; /* current line number (1 based) */
812     unsigned o; /* offset of p from file start */
813     char *b; /* buffer base (const) */
814     unsigned s; /* buffer size (const) */
815     char *p; /* start of current line */
816     unsigned l; /* line len */
817     unsigned m; /* more bytes after p+l (including the LF) */
818     } FBuf;
819     /* sloppy but convenient initializer macro */
820     #define FIL_BUF( fb, fil, buf ) do { \
821     fb.f = fil; fb.b = fb.p = buf; fb.s = sizeof(buf); \
822     fb.n = 1; fb.o = fb.l = fb.m = 0; \
823     } while (0)
824     /* even more convenient macro, must be end of data def */
825     #define FIL_DEFBUF( f ) \
826     char buf[0x2000]; \
827     FBuf fb; \
828     FIL_BUF(fb, f, buf);
829     /*
830     set p and l to next line, lines are terminated by LF.
831     if l, advance p after current line.
832     if m, skip next byte.
833     if m, search for newline.
834     if no newline found, mv p downto b and read more chars upto s.
835     if l==s, buffer is exhausted (and m is 0).
836     else if m, p[l] is a newline.
837     else if FIL_NONE == f,
838     no bytes could be read (an eof was seen or somebody set NBLOCK).
839     return whether we got a line
840     */
841     extern int fGets ( FBuf *fb );
842     extern int fGetr ( List *l, FBuf *fb );
843    
844    
845    
846     /* ****************************************************************** */
847     /* */
848     /* DATABASE */
849     /* */
850     /* ****************************************************************** */
851    
852     enum { /* flags for record and query data/index file pairs */
853     DX_OPEN = 0x1, /* open */
854     DX_WRITE = 0x2, /* open for writing */
855     DX_ASYNC = 0x4, /* no synced write */
856     DX_MODIF = 0x8 /* modified */
857     };
858     /**
859     record data and index.
860     While this is accessible standalone, a logical database table
861     may consist of several Rdx, e.g. per every million records.
862     */
863     typedef struct Rdx {
864     file mrd;
865     FMap mrx;
866     int flg;
867     int mid; /* in records == maxid */
868     int rdl; /* length of data file in bytes */
869     int ptl; /* pointer bytes, by now always 8 */
870     int typ; /* type of pointer file */
871     } Rdx;
872    
873     /** initialise from an already open fd. */
874     extern int rInit ( Rdx *rdx );
875     /** flush and release any cache. */
876     extern void rFini ( Rdx *rdx );
877    
878     /*
879     read record rid.
880     if mpos and the rec is found at mpos or higher,
881     rRead will backtrack to earlier versions.
882     */
883     extern int rRead ( List *l, Rdx *rdx, int rid, unsigned mpos );
884     /*
885     write a record
886     use rid 0 to get new rid
887     use opos -1 if you don't care about old pos
888     to be transparent here, the rid@pos as found in standard recs
889     is ignored and should be pre-skipped up to the leader.
890     specify the record size if known, with 0, rSiz will be used
891     @return the record id written (> 0) on success, <= 0 on error
892     */
893     extern int rWrite ( Rdx *rdx, const Fld *r, int rid, int opos, unsigned siz );
894    
895    
896     /**
897     query data and index
898     leaf blocks have a configurable size from 512 bytes to 8K
899     fork (inner node) blocks have always pagesize, i.e. 4K up to 64K
900    
901     While this is accessible standalone, a logical database table
902     may consist of several Qdx, each holding some range of keys.
903     */
904     typedef struct Cdx Cdx;
905    
906     typedef struct Qdx { /* actually it's a B-Link-Tree */
907     file mqd; /* the leaves file */
908     FMap mqx; /* the tree file */
909     const Cdx *cdx; /* the collation */
910     int flg; /* flags: writeable */
911     unsigned char typ; /* cfg: leaf block type */
912     unsigned char ksz; /* cfg: max key length, default 255 */
913     unsigned char ptr; /* cfg: inverted file pointer type or plain value size */
914     unsigned char let; /* cfg: pct free on load */
915     /* members set automatically: */
916     unsigned char vsz; /* value size, min 4, default 8 */
917     unsigned char uni; /* value unique length (see qSet) */
918     unsigned char ftp; /* fork block type */
919     unsigned char dpt; /* depth (level of root over bottom > 0) */
920     unsigned lsz; /* leaf block size computed from type */
921     unsigned lln; /* # leaf blocks in index */
922     unsigned fln; /* # fork blocks in index */
923     /* members considered internal: */
924     struct QLoad *qld;
925     } Qdx;
926    
927     enum { /* btree block type, size, flg */
928     QDX_TYPMSK = 0xC0, /* highest 2 bits: basic type */
929     QDX_LEAF = 0x00, /* leaf block, portable */
930     QDX_FORKLE = 0x40, /* fork block little endian */
931     QDX_FORKBE = 0x80, /* fork block big endian */
932     QDX_LEAFPV = 0xC0, /* leaf plain values (forks don't care) */
933     /* next 2 bits 0x30 for future extensions */
934     QDX_COMPRS = 0x08, /* flag compressed keys (not yet supported) */
935     QDX_SIZMSK = 0x07, /* lowest 3 bits: blocksize */
936     QDX_LEAF0K = 0x00, /* 1/2K blocks 0x0200 9+0 bits */
937     QDX_LEAF1K = 0x01, /* 1K blocks 0x0400 */
938     QDX_LEAF2K = 0x02, /* 2K blocks 0x0800 */
939     QDX_LEAF4K = 0x03, /* 4K blocks 0x1000 */
940     QDX_LEAF8K = 0x04 /* 8K blocks 0x2000, max for leaves */
941     };
942     enum {
943     QDX_MAXVALPERLEAF = 0x800, /* max 8K / min 4 bytes vsz */
944     QDX_LEAFSH = 9, /* leaf size shift+(0..4) ~ 512 bytes - 8K */
945     QDX_FORKSH = 12 /* fork size shift+(0..4) ~ 4K - 64K */
946     };
947    
948     typedef struct {
949     unsigned char len;
950     unsigned char byt[255];
951     } Val;
952    
953     typedef struct {
954     Val val;
955     unsigned char len;
956     unsigned char byt[255];
957     } Key;
958    
959    
960     typedef struct QLoop QLoop;
961     /**
962     callback for index loop.
963     maybe called multiple times for same key, if it spans blocks.
964     In this case, flag QSAME is set.
965     loop stops if QCb returns != 0
966     */
967     typedef int QCb ( QLoop *self );
968    
969     struct QLoop {
970     QCb *qcb;
971     Qdx *qdx;
972     int flg;
973     Key key;
974     Key to;
975     /* set on callback: */
976     Key cur;
977     unsigned nvals;
978     const unsigned char *vals;
979     };
980    
981     enum { /* flags */
982     QLOOP = 0, /* loop endless */
983     /* stop based on QLoop.key */
984     QEQ = 1, /* loop while == key */
985     QPF = 2, /* loop on prefix key */
986     /* stop based on QLoop.to */
987     QUPTO = 4, /* loop while < to */
988     QINCL = 5, /* loop while <= to */
989     QSTOP = 7, /* mask for stop mode */
990     QSKIP = 8, /* skip the from key */
991     QSAME = 0x10 /* callback on same key */
992     };
993    
994    
995     /** initialise from an already open fd. */
996     extern int qInit ( Qdx *bt );
997     /** flush and release any cache. */
998     extern void qFini ( Qdx *bt );
999    
1000     /*
1001     load a sorted series of keys and hits into index.
1002     call repeatedly, using a key with val.len 0 in last call
1003     */
1004     extern int qLoad ( Qdx *bt, Key *key );
1005     extern int qLoadf ( Qdx *bt, file *f );
1006    
1007     /*
1008     write the key-value pair to the index and return 0, unless:
1009     - the value is zero on the 1st uni bytes
1010     and there is already such a value (unique key, return 3)
1011     - there is an all-zero value for the key (stopword, return 2)
1012     - it is already there (full duplicate found, return 1)
1013     With plain values, only full duplicates are checked.
1014     Uni is usually the length of the initial segment of value,
1015     which is the rid in fulltext mode, else the tag.
1016     Where a unique key is found, the value is copied to key.
1017     */
1018     extern int qSet ( Qdx *bt, Key *key );
1019     enum {
1020     QST_OK,
1021     QST_FOUND,
1022     QST_STPWRD,
1023     QST_UNIKEY
1024     };
1025     /*
1026     delete a key-value pair. return 1 if found, else 0
1027     */
1028     extern int qDel ( Qdx *bt, Key *key );
1029    
1030     extern int qLoop ( QLoop *self );
1031    
1032    
1033     /*
1034     standard values structured as pointers
1035     denoting a record and position where key occurred.
1036     The value structure is up to 3 big endian unsigned numbers:
1037     0..2 bytes for tag,
1038     3+(0..3) bytes for rid (in fulltext mode: before the tag) and
1039     0..4 bytes for pos,
1040     totalling from 4 (vsz min.) to 12 bytes.
1041     */
1042     typedef struct { /* where a key has a hit */
1043     unsigned short tag; /* while neg. tags are allowed, sorting is unsigned */
1044     unsigned short ext; /* extend row id to six bytes (used as db number) */
1045     unsigned rid; /* row id */
1046     unsigned pos; /* word pos, usually with field occ<<16 */
1047     } Ptr;
1048    
1049     enum {
1050     QDX_TAGMSK = 0xC0, /* mask length of tag 0..2 */
1051     QDX_TAG1 = 0x40,
1052     QDX_TAG2 = 0x80,
1053     QDX_TAGSH = 6, /* ... shifted by 6 */
1054     QDX_RIDMSK = 0x30, /* mask length of rid 0..3 */
1055     QDX_RIDMIN = 3, /* based on 3 */
1056     QDX_RIDSH = 4, /* ... shifted by 4 */
1057     QDX_RID3 = 0x00,
1058     QDX_RID4 = 0x10,
1059     QDX_RID5 = 0x20,
1060     QDX_RID6 = 0x30,
1061     QDX_FULTXT = 0x08, /* traditional fulltext ordering */
1062     QDX_POSMSK = 0x07, /* mask length pos info 0..4 */
1063     /* default settings based on 3 byte rid */
1064     QDX_ISIS = QDX_RID3|QDX_TAG2|QDX_FULTXT|3,
1065     /* 0x8B, 8 byte fulltext 3+2+3 IFP format */
1066     QDX_STDDB = QDX_TAG1|QDX_RID3
1067     /* 0x40, 4 byte 3 rid + 1 for 256 field tags */
1068     };
1069    
1070     /* decode a value to a pointer */
1071     extern void qRdVal ( Ptr *ptr, const unsigned char *val, unsigned char typ );
1072     /* create a value from hit. */
1073     extern void qMkVal ( Val *val, Ptr *ptr, unsigned char typ );
1074     /* decode a key to plaintext */
1075     extern int qRdKey ( Qdx *qdx, char *plain, int l, Key *key );
1076     /* create a key from plaintext, truncating to ksz. */
1077     extern void qMkKey ( Qdx *qdx, Key *key, char *b, int l );
1078     /* create a key and value from line. */
1079     extern int qMkKeyVal ( Qdx *qdx, Key *key, char *b, int l );
1080    
1081    
1082     typedef struct {
1083     Qdx *qdx;
1084     int del;
1085     Key pfx;
1086     Ptr ptr;
1087     } QSet;
1088     /*
1089     set or del one keys
1090     basically this behaves like qMkKey, qMkVal, qSet
1091     ptr.pos is incremented
1092     with pfx, pfx is prepended to the key (before qMkKey)
1093     */
1094     extern int qSetKeyVal (QSet *qst, char *val, int len);
1095     /*
1096     split the value into words and qSetKeyVal each
1097     return the number of entries made
1098     */
1099     extern int qSetKeyVals (QSet *qst, char *val, int len);
1100    
1101    
1102     /**
1103     collation
1104     */
1105     #define CDX_MAXSEQ 15 /* max byte sequence length */
1106    
1107    
1108     extern const Cdx *cOpen ( const Fld *src );
1109     extern int cEnc ( const Cdx *cdx, Key *key, unsigned char *b, int l, int w );
1110     extern int cDec ( const Cdx *cdx, unsigned char *b, int l, Key *key );
1111    
1112    
1113     enum { /* see Metadata.txt */
1114     MET_OPT = 001,
1115     MET_UNU = 002,
1116     MET_CTP = 003,
1117     MET_COL = 004,
1118     MET_VER = 005,
1119     MET_FLD = 006
1120     };
1121    
1122     /**
1123     finally, a real database
1124     */
1125     typedef struct Db {
1126     char *nam;
1127     char *pat;
1128     int flg;
1129     int mnt; /* mount count */
1130     struct Db *nxt; /* linked list */
1131     const Fld *opt; /* inner meta data */
1132     Rdx rdx;
1133     Qdx qdx;
1134     } Db;
1135    
1136     extern Db *dOpen (const char *dbname);
1137     extern void dClose (Db *db);
1138     extern void dCloseAll ();
1139    
1140     /**
1141     access to record and query data via the db.
1142     In future versions these might become real functions taking
1143     care of multifile databases.
1144     */
1145     #define dRead(l,db,rid) ((rid)?rRead(l,&(db)->rdx,rid,0):lCpy(l,(db)->opt,0))
1146     #define dWrite(db,r,rid) rWrite(&(db)->rdx,r,rid,-1,0)
1147    
1148     #define dSet(db,key) qSet(&(db)->qdx,key)
1149     #define dDel(db,key) qDel(&(db)->qdx,key)
1150     #define dLoop(db,ql) ((ql)->qdx=&(db)->qdx, qLoop(ql))
1151    
1152     #define CORE_H
1153     #endif /* CORE_H */

  ViewVC Help
Powered by ViewVC 1.1.26