/[webpac]/openisis/0.9.9e/core/qdx.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /openisis/0.9.9e/core/qdx.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (hide annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 4 months ago) by dpavlin
File MIME type: text/plain
File size: 72047 byte(s)
import of new openisis release, 0.9.9e

1 dpavlin 604 /*
2     The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3     Version 0.9.x (patchlevel see file Version)
4     Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5    
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Lesser General Public
8     License as published by the Free Software Foundation; either
9     version 2.1 of the License, or (at your option) any later version.
10    
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14     See the GNU Lesser General Public License for more details.
15    
16     You should have received a copy of the GNU Lesser General Public
17     License along with this library; if not, write to the Free Software
18     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19    
20     see README for more information
21     EOH */
22    
23     /*
24     $Id: qdx.c,v 1.29 2004/11/12 08:38:13 kripke Exp $
25     the btree.
26     */
27    
28     #include <stdlib.h>
29     #include <string.h>
30     #include <assert.h>
31    
32     #include "../core/core.h"
33    
34    
35    
36     /* ************************************************************
37     private types
38     */
39    
40    
41     /**
42     Dictionary layout for forks (inner nodes of tree):
43     */
44     typedef struct Dict { /* dictionary of block entries */
45     unsigned short pos; /* position of entry, native byte order */
46     unsigned char vln; /* number of values */
47     unsigned char kln; /* length of key */
48     } Dict;
49     /*
50     For leaves, the max block size is 8K,
51     which need 13 bit for pos and 11 bit for vln (max 8K/min vsz 4 values).
52     The 1st 3 bytes are:
53     1. lower 8 bit of pos
54     2. higher 5 bit of pos | higher 3 bit of vlen << 5
55     3. lower 8 bit of vln
56     */
57     #ifdef CPU_BIG_ENDIAN
58     #define LEAFPOS( d ) (*((unsigned char*)(d)) \
59     | (((unsigned char*)(d))[1]&0x1f)<<8)
60     #define LEAFVLN( d ) ((d)->vln \
61     | (((unsigned char*)(d))[1]&0xe0)<<3)
62     #define LEAFPOSADD( d, a ) do { \
63     unsigned char *__u = (unsigned char*)(d); \
64     unsigned short __p = (a) + __u[0] + (__u[1]<<8); \
65     __u[0] = (unsigned char)__p; \
66     __u[1] = (unsigned char)((__u[1]&0xe0)|((__p>>8)&0x1f)); \
67     } while (0)
68     #define LEAFVLNADD( d, a ) do { \
69     unsigned char *__u = (unsigned char*)(d); \
70     unsigned short __v = (a) + __u[2] + ((__u[1]&0xe0)<<3); \
71     __u[2] = (unsigned char)__v; \
72     __u[1] = (unsigned char)((__u[1]&0x1f)|((__v>>3)&0xe0)); \
73     } while (0)
74     #define LEAFSETPV( d, p, v ) do { \
75     unsigned char *__u = (unsigned char*)(d); \
76     __u[0] = (unsigned char)(p); \
77     __u[1] = (unsigned char)((((p)>>8)&0x1f)|(((v)>>3)&0xe0)); \
78     __u[2] = (unsigned char)(v); \
79     } while (0)
80     #else
81     #define LEAFPOS( d ) (0x1fff & (d)->pos)
82     #define LEAFVLN( d ) ((d)->vln | (~0x1fff & (d)->pos)>>5)
83     #define LEAFPOSADD( d, a ) do { \
84     (d)->pos = (~0x1fff & (d)->pos) | (0x1fff & ((d)->pos + (a))); \
85     } while (0)
86     #define LEAFVLNADD( d, a ) do { \
87     unsigned _vln = LEAFVLN( d ) + (a); \
88     (d)->pos = (0x1fff & (d)->pos) \
89     | (~0x1fff & ((_vln)<<5)); \
90     (d)->vln = (unsigned char)(_vln); \
91     } while (0)
92     #define LEAFSETPV( d, p, v ) do { \
93     (d)->pos = (0x1fff & (p)) | (~0x1fff & ((v)<<5)); \
94     (d)->vln = (unsigned char)(v); \
95     } while (0)
96     #endif
97    
98     #define LEAFSETPVK( d, p, v, k ) do { \
99     LEAFSETPV( d, p, v ); \
100     (d)->kln = (unsigned char)(k); \
101     } while (0)
102    
103    
104     /*
105     the first 8 bytes are actually redundant,
106     however are here for convenience and as a check
107     */
108     typedef struct Block {
109     unsigned num; /* number of this block, for check and convenience */
110     unsigned char typ; /* block type & endianess */
111     unsigned char ksz; /* max key length */
112     unsigned char ptr; /* ptr type; for forks+plain leaves: value size */
113     unsigned char lev; /* level over bottom, 0 for leaves */
114     unsigned nxt; /* right sibling, free list for root */
115     unsigned short ent; /* # entries */
116     unsigned short stk; /* offset of keyspace stack */
117     Dict dct[1];
118     } Block;
119     #define DICTOFF ((char *)&((Block*)0)->dct - (char*)((Block*)0))
120     #define GETENT(b, i) (((unsigned char *)(b)) \
121     + ((b)->lev ? (b)->dct[i].pos : LEAFPOS((b)->dct+(i))))
122    
123     /*
124     LAYOUT:
125     dct is an array of dictionary entries in increasing key order
126     stk is growing downwards from block end towards dict
127     position of entry i is b->stk <= LEAFPOS(b->dct+i) < dx->t/lsz
128     decreasing on i (i.e. stack space is in reverse ordering)
129    
130     at LEAFPOS(b->dct+i), there are b->dct[i].kln bytes key
131     for a leaf entry, the key is followed by
132     LEAFVLN(b->dct+i) (between 0..2047 incl.) values
133     of dx->vsz (between 8..23 incl) each
134     for a fork entry, the key is followed by
135     a single 4 bytes block number (vln = 0)
136     or vln pairs of qualifying values (vsz each) and block numbers
137     (without the key compression flag to be implemented, only vln=1 is supported)
138    
139     SIZES:
140     block size >= 512
141     block base overhead DICTOFF is 16 bytes
142     avail size >= 496
143     Dict entry is 4 bytes
144     max. used stack space per entry is:
145     - fork: kln+4+dx->vsz <= 255+4+255 = 514
146     - new leaf entry (one value): kln+dx->vsz <= 510
147     max total space for a new entry incl. 4 bytes dict is 518.
148     For leaves of minimal block size 512,
149     the sum of ksz+vsz must not exceed 492=512-16-4,
150     so that each block can hold at least one entry.
151     Forks are at least 4K and thus can hold min 7 entries.
152     Even in pathological cases with max kln and vsz and all forks
153     split to the worst, a level's fork factor does not drop below 4.
154     */
155    
156     enum {
157     QMAXLEAF = 0x2000, /* max leaf size -- 8K fixed !!! */
158     QMAXFORK = 1<<CPU_PAGE_SHIFT,
159     #if 0x2000 < 1<<CPU_PAGE_SHIFT
160     QMAXBLCK = 1<<CPU_PAGE_SHIFT,
161     #else
162     QMAXBLCK = 0x2000,
163     #endif
164     /* With 2Gig files there are at most 2^31/2^9=2^22 leaves,
165     needing 11 fork levels. */
166     QLEVELS = 12 /* max depth incl. leaves */
167     };
168     typedef union { Block b; unsigned char base[QMAXLEAF]; } Leaf;
169     typedef union { Block b; unsigned char base[QMAXFORK]; } Fork;
170     typedef union { Block b; unsigned char base[QMAXBLCK]; } Bloke; /* any dude */
171    
172    
173     enum {
174     QENTSIZE = 8, /* minimum cost of entry: 4 byte dict, 4 byte val */
175     /* the number of entries is limited by (QMAXFORK-16)/QENTSIZE < 8K */
176     QENTBITS = 13, /* bits for # enries */
177     QENTMASK = 0x1fff,
178     QENTMAX = QENTMASK,
179     QMATCH = 0x8000 /* entry matched flag in found entry numbers */
180     };
181    
182     #if 0
183     # define SORT_Q
184     /* glibc's qsort is up to 20% faster than our basic untuned WHS. However:
185     - dietlibc's is slower
186     - it still has poor worst case performance
187     - many implementations are known to be buggy
188     */
189     #endif
190     /* memory for internal sorting */
191     /*
192     loaded 5MB index in 5 seconds with as little as
193     #define SORTMEM 0x30000 = 192K (+96K Keys) internal
194     #define SORTMEM 0x400000 = 4MB (+2MB Keys) internal
195     */
196     #define SORTMEM 0x400000
197     #define SORTKEYS (SORTMEM/8) /* + another 50%: #entries (assume > 8 Bytes) */
198    
199     /* blocksize for files */
200     /*
201     #define MERGEBLK 0x20000 = 128K buffer, 48-way merge
202     #define MERGEBLK 0x3000 = 12K buffer, 512-way merge
203    
204     12K blocks perform quite well and allow for 512-way merge,
205     so we never have a second pass for 31-bit addressable files
206     */
207     #define MERGEBLK 0x3000
208     #define MERGECHK (SORTMEM/MERGEBLK) /* initial chunk size */
209    
210     /*
211     By organizing files in MERGEBLKs, so that entries don't span blocks,
212     we may be wasting up to one full entry (256 bytes) per block:
213     if we have only exactly the space for one such longest entry,
214     use one terminating 0 byte instead.
215     */
216     #define SORTUSE (MERGECHK*(MERGEBLK-256))
217    
218     typedef struct {
219     unsigned char *keyb[SORTKEYS];
220     unsigned char memb[SORTMEM];
221     } SortBuf;
222     typedef unsigned char MergeBlk[MERGEBLK];
223    
224     /* n-way merge
225     #define MERGEWAY (sizeof (SortBuf)/sizeof (MergeBlk))
226     */
227     #ifndef MERGEWAY /* define possible max */
228     # define MERGEWAY ((4*SORTKEYS+SORTMEM)/MERGEBLK) /* as pre-pro constant */
229     #endif
230     #if 0x80000000UL > (MERGEWAY*SORTMEM)
231     # define MERGEMULTIPASS
232     /* else (512-way merge on 4MB) we can do 2GB in one pass */
233     #endif
234    
235     typedef struct QLoad {
236     unsigned got; /* current for this key */
237     unsigned tot; /* total for this key */
238     unsigned let;
239     /* stats */
240     unsigned keys;
241     unsigned keyl;
242     unsigned maxl;
243     unsigned vals;
244     unsigned maxv;
245     unsigned span;
246     unsigned free;
247     /* load buffer */
248     Leaf cur;
249     Key key;
250     Val val[QENTMAX];
251     /* sort buffer */
252     file fil;
253     unsigned blks;
254     unsigned char **nkey;
255     unsigned char *nmem;
256     #if 0 /* hard to measure ... */
257     # define ALIGN4K __attribute__ ((aligned (0x1000)))
258     #else
259     # define ALIGN4K
260     #endif
261     union {
262     SortBuf s;
263     MergeBlk m[MERGEWAY]; /* mergebuf */
264     } buf ALIGN4K;
265     MergeBlk mout; /* only used temporary, but don't want to kill stack */
266     } QLoad;
267    
268    
269    
270     typedef struct { /* structure filled by findkey */
271     Qdx *dx; /* the tree */
272     Key *key; /* the key searched */
273     unsigned n[QLEVELS]; /* block numbers of path */
274     unsigned short e[QLEVELS]; /* entries in path with match flag */
275     } FindKey;
276    
277    
278     typedef struct {
279     unsigned key;
280     unsigned cmp;
281     unsigned fget;
282     unsigned lget;
283     } Stat;
284    
285     /* buffer to print a value to */
286     typedef char PrVal[64];
287    
288    
289     /* ************************************************************
290     private data
291     */
292    
293     static Stat stat;
294    
295    
296     /* ************************************************************
297     private functions
298     */
299    
300     /* bail out */
301     #define DONE( args ) do { ret = eRr args; goto done; } while (0)
302    
303     #ifdef CPU_NEED_ALIGN
304     static unsigned GETINT ( void *m ) /* bus error safe version of *(int*) */
305     {
306     unsigned l;
307     memcpy( &l, m, 4 );
308     return l;
309     }
310     static void SETINT ( void *m, unsigned i )
311     {
312     memcpy( m, &i, 4 );
313     }
314     #else
315     #define GETINT( m ) (*(unsigned*)(m))
316     #define SETINT( m, i ) (*(unsigned*)(m) = (i))
317     #endif
318    
319     void qMkVal ( Val *v, Ptr *p, unsigned char typ )
320     {
321     unsigned char *c = v->byt;
322     /* TODO: may macros to address the bytes in the ints would be faster */
323     if ( !(QDX_FULTXT & typ) ) /* leading tag */
324     switch ( QDX_TAGMSK&typ ) {
325     case QDX_TAG2: *c++ = (unsigned char) (p->tag >> 8);
326     case QDX_TAG1: *c++ = (unsigned char) p->tag;
327     }
328     switch ( QDX_RIDMSK&typ ) {
329     case QDX_RID6: *c++ = (unsigned char) (p->ext >> 8);
330     case QDX_RID5: *c++ = (unsigned char) p->ext;
331     case QDX_RID4: *c++ = (unsigned char) (p->rid >> 24);
332     case QDX_RID3:
333     *c++ = (unsigned char) (p->rid >> 16);
334     *c++ = (unsigned char) (p->rid >> 8);
335     *c++ = (unsigned char) p->rid;
336     }
337     if ( QDX_FULTXT & typ )
338     switch ( QDX_TAGMSK&typ ) {
339     case QDX_TAG2: *c++ = (unsigned char) (p->tag >> 8);
340     case QDX_TAG1: *c++ = (unsigned char) p->tag;
341     }
342     switch ( QDX_POSMSK&typ ) {
343     case 4: *c++ = (unsigned char) (p->pos >> 24);
344     case 3: *c++ = (unsigned char) (p->pos >> 16);
345     case 2: *c++ = (unsigned char) (p->pos >> 8);
346     case 1: *c++ = (unsigned char) p->pos;
347     }
348     v->len = c - v->byt;
349     } /* qMkVal */
350    
351    
352     int qRdKey ( Qdx *qdx, char *plain, int l, Key *key )
353     {
354     if (!qdx->cdx) {
355     if ( l > key->len ) l = key->len;
356     memcpy(plain, key->byt, l);
357     } else
358     l = cDec(qdx->cdx, (unsigned char*)plain, l, key);
359     return l;
360     } /* qRdKey */
361    
362    
363     void qMkKey ( Qdx *qdx, Key *key, char *b, int l )
364     {
365     if (!qdx->cdx) {
366     if ( l > qdx->ksz ) l = qdx->ksz;
367     memcpy(key->byt, b, key->len = l);
368     } else {
369     key->len = qdx->ksz;
370     cEnc(qdx->cdx, key, (unsigned char*)b, l, 0);
371     }
372     } /* qMkKey */
373    
374    
375     int qMkKeyVal ( Qdx *qdx, Key *key, char *b, int l )
376     {
377     Ptr p = {0,0,0,0};
378     char *t = memchr(b, '\t', l);
379     if ( !t )
380     return -1;
381     qMkKey(qdx, key, b, t - b);
382     if ( 0 >= (l -= t-b+1) || !(t = memchr( b=t+1, '\t', l )) )
383     return -1;
384     p.rid = (unsigned) a2i( b, t-b );
385     if ( 0 >= (l -= t-b+1) || !(t = memchr( b=t+1, '\t', l )) )
386     return -1;
387     p.tag = (unsigned short) a2i( b, t-b );
388     if ( 0 >= (l -= t-b+1) || !(t = memchr( b=t+1, '\t', l )) )
389     return -1;
390     p.pos = (unsigned) a2i( b, t-b );
391     if ( 0 < (l -= t-b+1) )
392     p.pos = p.pos << 16 | (unsigned short) a2i( t+1, l );
393     eRr(LOG_TRACE, "%'%b %d %d 0x%4x",
394     key->len, key->byt, p.rid, p.tag, p.pos );
395     qMkVal( &key->val, &p, qdx->ptr );
396     return 0;
397     } /* qMkKeyVal */
398    
399    
400     void qRdVal ( Ptr *p, const unsigned char *c, unsigned char typ )
401     {
402     p->tag = p->ext = p->rid = p->pos = 0;
403     if ( !(QDX_FULTXT & typ) ) /* leading tag */
404     switch ( QDX_TAGMSK&typ ) {
405     case QDX_TAG2: p->tag = (unsigned short)*c++ << 8;
406     case QDX_TAG1: p->tag |= *c++;
407     }
408     switch ( QDX_RIDMSK&typ ) {
409     case QDX_RID6: p->ext = (unsigned short)*c++ << 8;
410     case QDX_RID5: p->ext |= *c++;
411     case QDX_RID4: p->rid = (unsigned int)*c++ << 24;
412     case QDX_RID3:
413     p->rid |= (unsigned int)*c++ << 16;
414     p->rid |= (unsigned int)*c++ << 8;
415     p->rid |= *c++;
416     }
417     if ( QDX_FULTXT & typ )
418     switch ( QDX_TAGMSK&typ ) {
419     case QDX_TAG2: p->tag = (unsigned short)*c++ << 8;
420     case QDX_TAG1: p->tag |= *c++;
421     }
422     switch ( QDX_POSMSK&typ ) {
423     case 4: p->pos = (unsigned int)*c++ << 24;
424     case 3: p->pos |= (unsigned int)*c++ << 16;
425     case 2: p->pos |= (unsigned short)*c++ << 8;
426     case 1: p->pos |= *c++;
427     }
428     } /* qRdVal */
429    
430    
431     static char *prval ( PrVal buf, unsigned char *c, unsigned typ )
432     {
433     char *d = buf;
434     Ptr p;
435     if ( ! typ ) {
436     buf[0] = '#';
437     buf[1] = 0;
438     return buf;
439     }
440     qRdVal( &p, c, typ );
441     if ( p.ext ) { d += u2a( d, p.ext ); *d++ = '.'; }
442     d += u2a( d, p.rid ); *d++ = '.';
443     d += u2a( d, p.tag ); *d++ = '.';
444     d += u2a( d, p.pos ); *d++ = '.';
445     *d = 0;
446     return buf;
447     } /* prval */
448    
449     #define PRVAL( buf, v ) prval( buf, v.byt, dx->ptr )
450    
451     #define QLOCK(n,l) (void)FLOCK(dx->mqd, n, l)
452     #define QLOCKSH(n) (void)FLOCKSH(dx->mqd, n)
453     #define QLOCKEX(n) (void)FLOCKEX(dx->mqd, n)
454     #define QLOCKUN(n) (void)FLOCKUN(dx->mqd, n)
455     #ifdef BUILD_SHMODE
456     enum {
457     LOCK_READ = FIL_BLOCK,
458     LOCK_WRITE = FIL_BLOCK|FIL_WR
459     };
460     #endif
461    
462    
463     /* ************************************************************
464     block I/O
465     */
466    
467    
468     static int chkblk ( Block *b, Qdx *dx )
469     {
470     int i;
471     unsigned short eod = DICTOFF + b->ent*sizeof(Dict), siz, pos, vln, key;
472     unsigned char *base = (unsigned char*)b;
473     unsigned len = b->lev ? dx->fln : dx->lln;
474     unsigned bsz = b->lev ? env.psz : dx->lsz;
475     unsigned end = bsz, tot = 0;
476     if ( b->num > len || (b->num && b->nxt > len) )
477     return eRr(ERR_TRASH, "bad num %d / nxt %d on lev %d",
478     b->num, b->nxt, b->lev );
479     if ( b->typ != (b->lev ? dx->ftp : dx->typ)
480     || b->ksz != dx->ksz || b->ptr != (b->lev?dx->vsz:dx->ptr)
481     )
482     return eRr(ERR_TRASH, "bad sig 0x%02x,%d,%d in block %d want 0x%02x,%d,%d",
483     b->typ, b->ksz, b->ptr, b->num,
484     b->lev ? dx->ftp : dx->typ, dx->ksz, (b->lev?dx->vsz:dx->ptr) );
485     if ( b->ent > bsz / QENTSIZE || b->stk > bsz || b->stk < eod )
486     return eRr(ERR_TRASH, "bad ent/stk %d,%d in block %d eod %d sz %d",
487     b->ent, b->stk, b->num, eod, bsz );
488     for ( i=0; i<b->ent; i++ ) {
489     Dict *d = &b->dct[i];
490     if ( b->lev ) {
491     pos = d->pos; vln = d->vln;
492     siz = vln ? vln*(dx->vsz+4) : 4;
493     } else {
494     pos = LEAFPOS(d); vln = LEAFVLN(d);
495     siz = vln*dx->vsz;
496     }
497     siz += key = d->kln;
498     if ( pos < b->stk || end != (unsigned)pos + siz )
499     return eRr(ERR_TRASH,
500     "bad pos/vln/key %d/%d/%d in entry %d of block %d"
501     " stack %d sz %d end %d expected %d",
502     pos, vln, key, i, b->num, b->stk, siz, pos+siz, end );
503     if ( b->lev ) {
504     unsigned lnk = GETINT( base + pos + key + vln*dx->vsz );
505     if ( lnk >= (1==b->lev ? dx->lln : dx->fln) )
506     return eRr(ERR_TRASH,
507     "bad link %u in entry %d of block %d",
508     lnk, i, b->num );
509     }
510     #ifndef NDEBUG
511     if ( i ) { /* not on first */
512     unsigned short op = b->lev ? d[-1].pos : LEAFPOS(d-1), okl = d[-1].kln;
513     int mc = memcmp( base+pos, base+op, key<okl ? key : okl ), j;
514     if ( 0 < mc || (!mc && 0 < (mc = (int)key - (int)okl)) )
515     goto ok;
516     j = 0;
517     if ( !mc && b->lev && vln ) { /* compare values */
518     unsigned ovln = d[-1].vln;
519     if ( ! ovln || 0 < (j = memcmp( base+pos+key, base+op+okl, dx->vsz )) )
520     goto ok;
521     }
522     return eRr(ERR_TRASH,
523     "bad key order entry %d of block %d lev %d"
524     " mc %d j %d key %d okl %d vln %d",
525     i, b->num, b->lev, mc, j, key, okl, vln
526     );
527     }
528     ok:
529     #endif
530     tot += siz;
531     end = pos;
532     } /* for i (entries) */
533     if ( tot > bsz - b->stk )
534     return eRr(ERR_TRASH,
535     "bad total entry size %d in block %d stack %d",
536     tot, b->num, b->stk );
537     #ifndef NDEBUG
538     if ( LOG_TRACE >= env.log ) {
539     eRr(LOG_TRACE,
540     "block %d lev %d sig 0x%02x,%d ent %d eod %d stk %d",
541     b->num, b->lev, b->typ, b->ksz, b->ent, eod, b->stk
542     );
543     for ( i=0; i<2; i++ ) {
544     int j = i ? b->ent-1 : 0;
545     Dict *d = &b->dct[j];
546     if ( i >= b->ent ) /* if b->ent is 0 or 1, log no or one key */
547     break;
548     if ( b->lev ) {
549     pos = d->pos; vln = d->vln;
550     } else {
551     pos = LEAFPOS(d); vln = LEAFVLN(d);
552     }
553     key = d->kln;
554     /* start of value */
555     if ( b->lev )
556     eRr(LOG_TRACE, "key %d '%b'/%d -> %d",
557     j, key, base+pos, vln, GETINT(base+pos+key+(vln?4:0)) );
558     else {
559     Ptr p;
560     qRdVal( &p, base+pos+key, dx->ptr );
561     eRr(LOG_TRACE, "key %d '%b'[%d] = %d[%d,%d]",
562     j, key, base+pos, vln, p.rid, p.tag, p.pos );
563     }
564     }
565     }
566     #endif
567     return 0;
568     } /* chkblk */
569    
570    
571     /*
572     init a new leaf in the local buffer b (usually a stack based Leaf).
573     increment dx's lln
574     */
575     static void newleaf ( Block *b, Qdx *dx )
576     {
577     memset(b, 0, dx->lsz);
578     b->num = dx->lln++;
579     b->typ = dx->typ;
580     b->ksz = dx->ksz;
581     b->ptr = dx->ptr;
582     b->lev = 0;
583     b->stk = dx->lsz;
584     } /* newleaf */
585    
586    
587     /*
588     init a new fork in the local buffer b (usually a stack based Fork).
589     increment dx's fln
590     TODO: for qSet, use variant checking for free list at root->nxt.
591     */
592     static Block *newfork ( Block *b, Qdx *dx, unsigned char lev )
593     {
594     memset(b, 0, env.psz);
595     b->num = dx->fln++;
596     b->typ = dx->ftp;
597     b->ksz = dx->ksz;
598     b->ptr = dx->vsz;
599     b->lev = lev;
600     b->stk = env.psz;
601     #if 16 == CPU_PAGE_SHIFT /* possible short overflow on ia64 */
602     if ( !b->stk ) b->stk--;
603     #endif
604     return b;
605     } /* newfork */
606    
607    
608     #ifdef CPU_BIG_ENDIAN
609     static unsigned rvi ( unsigned i ) {
610     unsigned r;
611     ((char*)&r)[0] = ((char*)&i)[3];
612     ((char*)&r)[1] = ((char*)&i)[2];
613     ((char*)&r)[2] = ((char*)&i)[1];
614     ((char*)&r)[3] = ((char*)&i)[0];
615     return r;
616     }
617     static unsigned short rvs ( unsigned short i ) {
618     unsigned short r;
619     ((char*)&r)[0] = ((char*)&i)[1];
620     ((char*)&r)[1] = ((char*)&i)[0];
621     return r;
622     }
623     #define SWI( i ) b->i = rvi( b->i )
624     #define SWS( s ) b->s = rvs( b->s )
625     static void swapleaf ( Block *b ) /* leaves are little endian */
626     {
627     SWI(num);
628     SWI(nxt);
629     SWS(ent);
630     SWS(stk);
631     }
632     #else
633     # define swapleaf(b)
634     #endif
635    
636     /*
637     writing a leaf will render the leaf scrambled on big endians
638     */
639     static int wrtleaf ( Block *b, Qdx *dx )
640     {
641     unsigned num = b->num;
642     int got;
643     if ( chkblk( b, dx ) ) /* don't write no shize */
644     return eRr(ERR_TRASH, "\tbefore writing dx block %d", b->num );
645     swapleaf(b); /* a leaf written is a leaf lost */
646     got = fPwrite(&dx->mqd, b, dx->lsz, num*dx->lsz );
647     eRr(LOG_TRACE, "writing leaf %u got %u", num, got );
648     if ( dx->lsz != (unsigned)got )
649     return eRr(LOG_SYSERR, "\twhen writing dx block %d", b->num );
650     return 0;
651     } /* wrtleaf */
652    
653    
654     /*
655     writing a fork may invalidate all mapped forks due to remap
656     */
657     static int wrtfork ( Block *b, Qdx *dx )
658     {
659     int got;
660     if ( chkblk( b, dx ) ) /* don't write no shize */
661     return eRr(ERR_TRASH, "\tbefore writing dx block %d", b->num );
662     if ( dx->mqx.npg ) { /* have mmap -- not during qload */
663     if ( b->num < dx->mqx.npg ) { /* should be in map */
664     if ( (char*)b != dx->mqx.map + (b->num<<env.psh) )
665     return eRr(ERR_TRASH, "fork %d should be mapped", b->num);
666     if ( ENV_MSYNC & env.flg ) fMSync(&dx->mqx, b->num);
667     return 0;
668     }
669     if ( b->num < dx->mqx.lim /* try to extend mapping */
670     && 0 < (got = fMap(&dx->mqx, b->num+1))
671     && b->num < (unsigned)got
672     ) {
673     eRr(LOG_VERBOSE, "remapped to %d at %8x", dx->mqx.npg, dx->mqx.map);
674     memcpy(dx->mqx.map + (b->num<<env.psh), b, env.psz);
675     fMSync(&dx->mqx, b->num);
676     return 0;
677     }
678     eRr(LOG_WARN, "remap to %d failed", b->num+1);
679     /* else write to file */
680     }
681     got = fPwrite(&dx->mqx.fil, b, env.psz, b->num*env.psz );
682     eRr(LOG_TRACE, "writing fork %u got %u", b->num, got );
683     if ( env.psz != (unsigned)got )
684     return eRr(LOG_SYSERR, "\twhen writing dx block %d", b->num );
685     return 0;
686     } /* wrtfork */
687    
688    
689    
690     static int chkread ( Block *b, Qdx *dx,
691     int got, unsigned bsz, unsigned num, unsigned char lev )
692     {
693     if ( 0 > got || bsz != (unsigned)got )
694     return eRr(LOG_SYSERR, "\twhen reading dx block %d", num );
695     if ( b->num != num )
696     return eRr(ERR_TRASH, "bad num %d in block %d", b->num, num );
697     if ( lev && b->lev != lev )
698     return eRr(ERR_TRASH, "bad lev %d in block %d want %d",
699     b->lev, b->num, lev );
700     return chkblk( b, dx );
701     } /* chkread */
702    
703    
704     /* read a leaf into the provided local buffer */
705     static Block *getleaf ( Block *b, Qdx *dx, unsigned num
706     #ifdef BUILD_SHMODE
707     , int lck
708     # define GETLEAF(leaf, dx, num, lck) getleaf(leaf, dx, num, lck)
709     #else
710     # define GETLEAF(leaf, dx, num, lck) getleaf(leaf, dx, num)
711     #endif
712     )
713     {
714     int got;
715     #ifdef BUILD_SHMODE
716     if ( lck ) QLOCK(num<<1, lck);
717     #endif
718     got = fPread(&dx->mqd, b, dx->lsz, num*dx->lsz);
719     stat.lget++;
720     #ifdef CPU_BIG_ENDIAN
721     if ( 0 < got && dx->lsz == (unsigned)got ) swapleaf(b);
722     #endif
723     if ( chkread(b, dx, got, dx->lsz, num, 0) )
724     b = 0;
725     #ifdef BUILD_SHMODE
726     if ( LOCK_READ == lck ) QLOCKUN(num<<1);
727     #endif
728     return b;
729     } /* getleaf */
730    
731    
732     /* pick a fork from cache or read into the provided local buffer */
733     static Block *getfork ( Block *b, Qdx *dx, unsigned num, unsigned char lev )
734     {
735     int got = env.psz;
736     stat.fget++;
737     if ( num < dx->mqx.npg )
738     b = (Block*)(dx->mqx.map + num*env.psz);
739     else
740     got = fPread(&dx->mqx.fil, b, env.psz, num*env.psz);
741     return chkread(b, dx, got, env.psz, num, lev) ? 0 : b;
742     } /* getfork */
743    
744    
745    
746     /** binsearch position for key in a leaf.
747     position is either the index of the matching entry (0..b->ent-1)
748     or the index (0..b->ent) where key should be inserted.
749     @return position for key (0..b->ent), |yfied with QMATCH, if found
750     */
751     static unsigned short keyleaf ( Block *b, Key *key )
752     {
753     unsigned char *base = (unsigned char*)b;
754     unsigned char *kbt = key->byt;
755     unsigned char kln = key->len;
756     unsigned short l = 0, r = b->ent;
757     Dict *d = b->dct;
758     /* binsearch invariant:
759     key is < all entries i >= r (we do know relation at r)
760     key is > all entries i < l (don't know relation at l)
761     */
762     while ( l < r ) { /* invariant: l <= keypos < r */
763     unsigned short i = (l + r) >> 1; /* l <= i < r */
764     unsigned short pos = LEAFPOS(&d[i]);
765     unsigned char cln = d[i].kln;
766     int cmp = memcmp( kbt, base+pos, kln<cln ? kln : cln );
767     stat.cmp++; /* dirty */
768     if ( ! cmp && !(cmp = (int)kln - cln) )
769     return QMATCH | i;
770     if ( cmp > 0 )
771     l = i+1;
772     else
773     r = i;
774     }
775     return l; /* == r */
776     } /* keyleaf */
777    
778     /* same for forks.
779     somewhat more complex since we might have duplicate keys
780     with different values.
781     Must not use the _POS/_VLN macros.
782     */
783     static unsigned short keyfork ( Block *b, Key *key, unsigned char vsz )
784     {
785     unsigned char *base = (unsigned char*)b;
786     unsigned char *kbt = key->byt;
787     unsigned char kln = key->len;
788     unsigned short l = 0, r = b->ent;
789     Dict *d = b->dct;
790     /* binsearch invariant:
791     key is < all entries i >= r (we do know relation at r)
792     key is > all entries i < l (don't know relation at l)
793     */
794     while ( l < r ) { /* invariant: l <= keypos < r */
795     unsigned short i = (l + r) >> 1; /* l <= i < r */
796     unsigned short pos = d[i].pos;
797     unsigned char cln = d[i].kln;
798     int cmp = memcmp( kbt, base+pos, kln<cln ? kln : cln );
799     stat.cmp++; /* dirty */
800     if ( ! cmp && !(cmp = (int)kln - cln) ) { /* key match -- cmp value */
801     if ( ! d[i].vln ) { /* common case */
802     if ( !key->val.len ) /* no value */
803     return QMATCH | i;
804     cmp = 1;
805     } else if ( ! key->val.len )
806     cmp = -1;
807     else if ( !(cmp = memcmp( key->val.byt, base+pos+cln,
808     key->val.len < vsz ? key->val.len : vsz))
809     && !(cmp = (int) key->val.len - vsz)
810     )
811     return QMATCH | i;
812     }
813     if ( cmp > 0 )
814     l = i+1;
815     else
816     r = i;
817     }
818     return l; /* == r */
819     } /* keyfork */
820    
821    
822     /** find position for (leaf) val.
823     search v in array b of n values of size sz
824     @return position for value (0..n), |yfied with QMATCH, if found
825     */
826     static unsigned short valpos ( unsigned char *b,
827     unsigned short n, unsigned char sz, unsigned char *v )
828     {
829     unsigned short l = 0, r = n;
830     /* binsearch invariant:
831     v is < all values i >= r (we do know relation at r)
832     v is > all values i < l (don't know relation at l)
833     */
834     while ( l < r ) { /* invariant: l <= valpos < r */
835     unsigned short i = (l + r) >> 1; /* l <= i < r */
836     int cmp = memcmp( v, b+i*sz, sz );
837     if ( ! cmp )
838     return QMATCH | i;
839     if ( cmp > 0 )
840     l = i+1;
841     else
842     r = i;
843     }
844     return l; /* == r */
845     } /* valpos */
846    
847    
848     #define TRACEKEY( l, f ) (void)(f)
849    
850    
851     /*
852     find and fetch key's leaf into the provided buffer.
853     */
854     static int findkey ( Block *leaf, FindKey *f
855     #ifdef BUILD_SHMODE
856     , int lck
857     # define FINDKEY(leaf, f, lck) findkey(leaf, f, lck)
858     #else
859     # define FINDKEY(leaf, f, lck) findkey(leaf, f)
860     #endif
861     )
862     {
863     #ifdef BUILD_SHMODE
864     Qdx *dx = f->dx; /* for the lock macros */
865     #endif
866     Fork fork[2]; /* buffer */
867     Leaf lbuf;
868     int i, ret=-1;
869     unsigned char *pkey = 0, pkln = 0, pvln = 0;
870     Block *b;
871    
872     QLOCKSH(1);
873     b = getfork(&fork[0].b, f->dx, 0, 0);
874     if ( !b )
875     DONE((ERR_TRASH, "no root"));
876     if ( b->lev >= QLEVELS )
877     DONE((ERR_TRASH, "tree is too deep: %u", b->lev));
878     stat.key++;
879     for ( i=QLEVELS; i--; ) /* paranoia */
880     f->n[i] = f->e[i] = 0;
881     for ( i=b->lev;; ) {
882     unsigned short e;
883     unsigned n;
884     Dict *d;
885     int tog = 0;
886    
887     f->n[i] = b->num;
888     e = i ? keyfork(b, f->key, f->dx->vsz) : keyleaf(b, f->key);
889     LOG_DBG( LOG_DEBUG, "keypos %4x", e );
890     /*
891     here goes the Lehmann/Yao race detection:
892     If we ran on right boundary, follow right link
893     */
894     while ( !(QMATCH & e) && b->ent == e && b->nxt && i < f->dx->dpt ) {
895     Block *s;
896     unsigned se;
897     /* peek next block, toggling buffers */
898     if ( i ) {
899     /* TODO: actually we should never see this in forks
900     as long as we lock the complete fork file */
901     s = getfork(&fork[(++tog)&1].b, f->dx, b->nxt, i);
902     se = keyfork(s, f->key, f->dx->vsz);
903     } else {
904     s = GETLEAF((++tog)&1?&lbuf.b:leaf, f->dx, b->nxt, lck);
905     se = keyleaf(s, f->key);
906     #ifdef BUILD_SHMODE
907     if ( LOCK_WRITE == lck ) QLOCKUN(se ? b->num : s->num);
908     #endif
909     }
910     if ( !se ) /* lower than first */
911     break;
912     LOG_DBG( LOG_DEBUG, "LYao %d -> %d lev %d for '%b' got 0x%4x",
913     b->num, s->num, i, f->key->len, f->key->byt, se );
914     /* trade b for s */
915     f->n[i] = (b = s)->num;
916     e = se;
917     }
918     f->e[i] = e;
919     if ( ! i ) {
920     if ( b != leaf )
921     memcpy(leaf, b, f->dx->lsz);
922     return 0; /* keep the lock */
923     }
924     if ( !(QMATCH & e) ) {
925     if ( e )
926     f->e[i] = --e;
927     else {
928     TRACEKEY( ERR_TRASH, f );
929     return eRr(ERR_IDIOT,
930     "ran on left bound of block %u ent %u",
931     b->num, b->ent );
932     }
933     }
934     d = b->dct + (QENTMASK & e);
935     /* direct access ok in fork */
936     pkey = ((unsigned char*)b)+d->pos;
937     pkln = d->kln;
938     pvln = d->vln ? f->dx->vsz : 0;
939     n = GETINT( pkey+pkln+pvln );
940     if ( --i )
941     b = getfork(&fork[0].b, f->dx, n, i);
942     else {
943     QLOCKUN(1);
944     b = GETLEAF(leaf, f->dx, n, lck); /* read 1st leaf */
945     /* continue only to do the LY */
946     }
947     }
948     done:
949     QLOCKUN(1);
950     return ret; /* go away */
951     } /* findkey */
952    
953    
954    
955    
956    
957     /* ************************************************************
958     public functions
959     */
960    
961     int qRemake ( Qdx *dx )
962     {
963     Key last;
964     Bloke child;
965     Fork fork;
966     unsigned char lev = 0; /* the current child level to be indexed */
967     unsigned left = 0; /* block on child level */
968     unsigned iblk = 0;
969     unsigned ikey = 0;
970     unsigned ikeyl = 0;
971     unsigned ipfx = 0;
972     unsigned ispn = 0;
973    
974     fTrunc(&dx->mqx.fil, 0);
975     fMap(&dx->mqx, 0);
976     dx->fln = 1; /* pretend we had a root */
977     /* build tree */
978     for (dx->fln = 1;;lev++) { /* levels */
979     unsigned len = 0; /* current is n'th block on level */
980     Block *b = 0;
981     unsigned char *base = 0; /* == b */
982     unsigned have = 0; /* room in current block b */
983     Dict *d = 0; /* of current block b */
984     unsigned next = left; /* child */
985    
986     left = dx->fln;
987     /* get child c and create child pointer in block b */
988     do {
989     unsigned char vln = 0; /* actual byte length: dx->vsz or 0 */
990     unsigned char *key;
991     unsigned char kln;
992     unsigned need;
993     Block *c = lev
994     ? getfork(&child.b, dx, next, lev)
995     : GETLEAF(&child.b, dx, next, 0);/* must have full ex lock */
996     if ( ! c )
997     return eRr(ERR_NOMEM, "no child block" );
998     next = c->nxt;
999     if ( ! c->ent ) /* OOPS empty node */
1000     continue;
1001     key = GETENT(c, 0);
1002     kln = c->dct[0].kln;
1003     if ( ! b ) /* the leftmost child on this level */
1004     kln = 0; /* always 0 key on left */
1005     else if ( lev ) /* the child is a fork -- have to carry vals ... */
1006     vln = c->dct[0].vln ? dx->vsz : 0; /* ... if the child starts with */
1007     else if ( last.len == kln /* compare key to last of prev. ... */
1008     && !memcmp( last.byt, key, kln ) /* ... which was saved in last */
1009     ) { /* key spans blocks */
1010     LOG_DBG( LOG_DEBUG, "found span key '%b' child %u",
1011     kln, key, c->num );
1012     vln = dx->vsz; /* qualify with value */
1013     } else {
1014     unsigned char x = 0;
1015     /* find prefix key */
1016     if ( kln > last.len )
1017     kln = last.len;
1018     while ( x < kln && key[x] == last.byt[x] )
1019     x++;
1020     if ( x < kln ) /* they differ at [x] */
1021     kln = x+1;
1022     else /* equal on the shorter length */
1023     kln++;
1024     if ( kln > c->dct[0].kln )
1025     return eRr(ERR_TRASH,
1026     "bad key '%b' on level %u matching previous of length %u",
1027     c->dct[0].kln, key, lev, last.len );
1028     LOG_DBG( LOG_DEBUG, "prefix on key '%b' child %u save %u",
1029     kln, key, c->num, c->dct[0].kln - kln );
1030     ipfx += c->dct[0].kln - kln;
1031     }
1032     if ( vln )
1033     ispn++;
1034     ikeyl += kln;
1035     need = sizeof(Dict) + kln + vln + 4;
1036     if ( need > have ) {
1037     if ( b ) {
1038     eRr(LOG_TRACE, "level %u block %u is full %u ent stack %u",
1039     lev, b->num, b->ent, b->stk );
1040     b->nxt = dx->fln; /* link */
1041     if ( wrtfork(b, dx) )
1042     goto nowrite;
1043     len++;
1044     }
1045     b = newfork(&fork.b, dx, lev+1);
1046     base = (unsigned char*)b;
1047     have = env.psz - DICTOFF; /* - ql->let; TODO: should we ??? */
1048     d = b->dct;
1049     len++;
1050     iblk++;
1051     }
1052     SETINT( base+(b->stk -= 4), c->num );
1053     d->kln = kln;
1054     d->vln = vln ? 1 : 0;
1055     if ( kln += vln ) /* key + consecutive qualifying value */
1056     memcpy( base+(b->stk -= kln), key, kln );
1057     d->pos = b->stk;
1058     d++;
1059     b->ent++;
1060     ikey++;
1061     have -= need;
1062     /* save the last dance for me */
1063     memcpy(last.byt, GETENT(c, c->ent-1), last.len = c->dct[c->ent-1].kln);
1064     } while (next);
1065    
1066     if ( !len )
1067     return eRr(ERR_TRASH, "level %u was empty", lev );
1068     /* close level */
1069     eRr(LOG_INFO, "closing level %u with %u blocks %u..%u",
1070     lev, len, left, b->num );
1071     if ( 1 == len )
1072     b->num = 0; /* make it the root */
1073     if ( wrtfork(b, dx) )
1074     goto nowrite;
1075     if ( 1 == len ) {
1076     dx->dpt = b->lev;
1077     break;
1078     }
1079     }
1080     #define PM(a,b) (unsigned)(a*LULU(1000)/b) /* per mille */
1081     if (ikey && ikeyl) /* divisors */
1082     eRr(LOG_INFO,
1083     "\tused %u fork blocks %u keys %u bytes avg %d\n"
1084     "\t%u #spans (%d/oo) %u pfx (- %d/oo)",
1085     iblk, ikey, ikeyl, ikeyl/ikey,
1086     ispn, PM(ispn*dx->vsz,ikeyl), ipfx, PM(ipfx,(ipfx+ikeyl)) );
1087     dx->fln--; /* counted one over */
1088     fMap(&dx->mqx, dx->fln);
1089    
1090     return 0;
1091     nowrite:
1092     dx->fln = 0;
1093     return eRr(LOG_SYSERR, "\twhen writing tree block" );
1094     } /* qRemake */
1095    
1096    
1097    
1098    
1099     static int load ( Qdx *dx, unsigned char *key )
1100     {
1101     int cmp = 0, keyl = 0, l;
1102     QLoad *ql = dx->qld;
1103     if (key) {
1104     keyl = *key - dx->vsz;
1105     l = keyl < ql->key.len ? keyl : ql->key.len; /* min */
1106     if ( !(cmp = memcmp(key+1, ql->key.byt, l)) )
1107     cmp = keyl - ql->key.len;
1108     if (0 > cmp)
1109     return eRr(LOG_ERROR, "qload key '%b' < last '%b'",
1110     keyl, key+1, ql->key.len, ql->key.byt );
1111     }
1112     /* flush ? */
1113     while ( !key /* end */
1114     || QENTMAX == ql->got /* full */
1115     || cmp /* next key */
1116     ) {
1117     Block *b = &ql->cur.b;
1118     unsigned have = b->stk - DICTOFF - b->ent*sizeof(Dict) - ql->let;
1119     unsigned need = sizeof(Dict) + ql->key.len;
1120     unsigned want = need + ql->got * dx->vsz;
1121     unsigned take = ql->got;
1122     if ( have < want ) { /* you can't always get ... */
1123     if ( have <= need )
1124     take = 0;
1125     else {
1126     take = (have - need) / dx->vsz;
1127     if ( 256 > take * dx->vsz )
1128     take = 0;
1129     }
1130     }
1131     LOG_DBG( LOG_DEBUG, "qload '%b' got %u need %u want %u have %u take %u",
1132     ql->key.len, ql->key.byt, ql->got, need, want, have, take );
1133     /* we have enough room in b for take entries */
1134     if ( take ) {
1135     unsigned char *base = (unsigned char*)b;
1136     unsigned char *s = base + b->stk;
1137     int i = take;
1138     Dict *d = &b->dct[ b->ent++ ];
1139     while ( i-- )
1140     memcpy( s -= dx->vsz, ql->val[i].byt, dx->vsz );
1141     memcpy( s -= ql->key.len, ql->key.byt, ql->key.len );
1142     b->stk = (unsigned short) (s - base);
1143     LEAFSETPV( d, b->stk, take );
1144     d->kln = ql->key.len;
1145     if ( ql->got > take )
1146     memcpy( ql->val, ql->val+take, sizeof(ql->val[0])*(ql->got - take) );
1147     ql->got -= take;
1148     }
1149     if ( ! ql->got )
1150     break;
1151     if ( take ) /* key spans blocks */
1152     ql->span++;
1153     ql->free += b->stk - DICTOFF - b->ent*sizeof(Dict); /* wasted */
1154     b->nxt = dx->lln; /* link */
1155     LOG_DBG( LOG_DEBUG, "qload: %u entries left, getting block %u",
1156     ql->got, dx->lln );
1157     if ( wrtleaf(b, dx) )
1158     return eRr(LOG_SYSERR, "\twhen writing qload block" );
1159     /* so couldn't flush all, thus we need another block */
1160     eRr(LOG_TRACE, "new qload leaf %d", dx->lln);
1161     newleaf(b = &ql->cur.b, dx);
1162     /* will continue on end or keychange */
1163     LOG_DBG( LOG_TRACE, "new block %u has %u ent %u stk, will keep %u",
1164     b->num, b->ent, b->stk, ql->let );
1165     }
1166     if (cmp || !key) /* done with key: stats */
1167     if (ql->maxv < ql->tot)
1168     ql->maxv = ql->tot;
1169     if (key) {
1170     if (cmp) { /* set new key */
1171     if (ql->got)
1172     return eRr(ERR_IDIOT, "qload not empty on new key");
1173     ql->tot = 0;
1174     memcpy(ql->key.byt, key+1, ql->key.len = keyl);
1175     ql->keys++;
1176     ql->keyl += keyl;
1177     if (ql->maxl < (unsigned)keyl)
1178     ql->maxl = keyl;
1179     }
1180     /* add new val */
1181     /* LOG_HEX( key->val.byt, dx->vsz ); */
1182     memcpy(ql->val[ ql->got++ ].byt, key+1+keyl, dx->vsz);
1183     ql->vals++;
1184     ql->tot++;
1185     } else {
1186     /* stats */
1187     unsigned size = dx->lln * dx->lsz;
1188     unsigned sval = ql->vals * dx->vsz;
1189     unsigned sove = dx->lln * DICTOFF + sizeof(Dict)*(ql->keys+ql->span);
1190    
1191     if (size && ql->keys) /* divisors */
1192     eRr(LOG_INFO,
1193     "end qload:\n"
1194     "\t#blks %u total len %u free %u (%d/oo) overhead %u (%d/oo)\n"
1195     "\t#keys %u total len %u (%d/oo) avg %d max %u\n"
1196     "\t#vals %u total len %u (%d/oo) avg %d max %u spans %u",
1197     dx->lln, size, ql->free, PM(ql->free,size), sove, PM(sove,size),
1198     ql->keys, ql->keyl, PM(ql->keyl,size), ql->keyl/ql->keys, ql->maxl,
1199     ql->vals, sval, PM(sval,size), ql->vals/ql->keys, ql->maxv, ql->span
1200     );
1201     wrtleaf(&ql->cur.b, dx);
1202     }
1203    
1204     return 0;
1205     } /* load */
1206    
1207    
1208     static int ncomp;
1209     #ifdef SORT_Q
1210     static int compar (const void *pa, const void *pb)
1211     {
1212     unsigned char *a = *(unsigned char **)pa;
1213     unsigned char *b = *(unsigned char **)pb;
1214     int l = *a, cmp;
1215     if (l > *b) l = *b;
1216     ncomp++;
1217     return (cmp = memcmp(a+1, b+1, l)) ? cmp : (int)*a - (int)*b;
1218     }
1219     #endif
1220     static OPT_INLINE int keyGt (unsigned char *a, unsigned char *b)
1221     {
1222     int l = *a, cmp;
1223     if (l > *b) l = *b;
1224     ncomp++;
1225     return 0 < (cmp = memcmp(a+1, b+1, l)) || (!cmp && *a > *b);
1226     }
1227    
1228    
1229     static int sort (Qdx *dx, unsigned char **key, int len)
1230     {
1231     unsigned char rev[SORTKEYS];
1232     unsigned char *tmp;
1233     int i, ret = 0;
1234     lolo tm;
1235    
1236     tUpd(&tm);
1237     if (2 > len)
1238     goto cp;
1239     #ifdef SORT_Q
1240     qsort(key, len, sizeof key[0], compar);
1241     #else
1242     # define SORT_WH
1243     /* basic weak-heap-sort (Dutton 93) just like rSort; c.f. uti.c
1244     ``On the Performance of WEAK-HEAPSORT´´
1245     http://www.informatik.uni-freiburg.de/~edelkamp/publications/Weakheap.pdf
1246     for tuned versions see
1247     http://www.jea.acm.org/2002/EdelkampHeapsort/
1248     and ``Pushing the Limits in Sequential Sorting´´
1249     http://www.informatik.uni-freiburg.de/~edelkamp/publications/Improvements.pdf
1250     */
1251     memset(rev, 0, len);
1252     /* weak-heap-forestify
1253     every node roots a weak heap AND is <= root
1254     (especially the leftist independent roots are <= root)
1255     */
1256     for ( i=len; --i; ) {
1257     int g = i; /* granma is 1st left i or root, if i is totally leftist ;) */
1258     while (!(1&g)) /* we're left child */
1259     g >>= 1; /* shift granma right :( */
1260     g >>= 1;
1261     if (!keyGt(key[i], key[g])) /* merge: swap if g < i */
1262     continue;
1263     tmp = key[i]; key[i] = key[g]; key[g] = tmp; /* swap */
1264     rev[i] = 1;
1265     }
1266    
1267     /* re-weak-heap-forestify */
1268     for ( i=len; --i>1; ) {
1269     int j=1, left;
1270     /* rev[i] = 0; i's REVBIT no longer needed */
1271     /* follow path of left childs (independent roots) */
1272     while (i > (left = (j<<1) + rev[j]))
1273     j = left;
1274     /* put max of them and i (new root) at i */
1275     do {
1276     if (!keyGt(key[j], key[i])) /* merge: swap if i < j */
1277     continue;
1278     tmp = key[i]; key[i] = key[j]; key[j] = tmp; /* swap */
1279     rev[j] ^= 1; /* toggle revbit */
1280     } while (j >>= 1);
1281     }
1282     /* rev[1] = 0; 2... have been cleared */
1283     key[len] = key[0]; /* initial root still untouched biggest */
1284     #endif
1285     #ifndef SORT_WH
1286     key--;
1287     #endif
1288     eRr(LOG_VERBOSE, "%d sorted %d keys with %d comparisions -- loading ...",
1289     tUpd(&tm), len, ncomp);
1290     cp:
1291     /* finally cp to dest */
1292     if (FIL_NONE == dx->qld->fil) { /* internal -- load */
1293     unsigned char **e = key+len+1;
1294     while (++key != e)
1295     if ((ret = load(dx, *key)))
1296     goto err;
1297     eRr(LOG_VERBOSE, "%d loaded", tUpd(&tm));
1298     } else { /* external -- write MERGECHK blks */
1299     unsigned char **e = key+len+1;
1300     unsigned char *out = dx->qld->mout, *end = out+MERGEBLK, *dst = out;
1301     unsigned blks = 0;
1302    
1303     while (++key != e) {
1304     unsigned l = 1+**key;
1305     if (end <= dst+l) {
1306     *dst = 0;
1307     if (0 > (ret = fWrite(&dx->qld->fil, out, MERGEBLK)))
1308     goto err;
1309     dst = out;
1310     if (MERGECHK == ++blks)
1311     return eRr(ERR_IDIOT, "CHUNK!");
1312     }
1313     memcpy(dst, *key, l);
1314     dst += l;
1315     }
1316     do {
1317     *dst = 0;
1318     fWrite(&dx->qld->fil, out, MERGEBLK);
1319     dst = out;
1320     } while (MERGECHK != ++blks);
1321     eRr(LOG_VERBOSE, "%d wrote %d blocks ...", tUpd(&tm), blks);
1322     dx->qld->blks += MERGECHK;
1323     ret = 0;
1324     }
1325     err:
1326     return ret;
1327     } /* sort */
1328    
1329    
1330     static int merge (Qdx *dx, QLoad *ql)
1331     {
1332     #ifndef MERGEMULTIPASS
1333     # define chunk MERGECHK
1334     #else
1335     unsigned char *const out = ql->mout, *const end = out+MERGEBLK;
1336     unsigned chunk = MERGECHK, pass = 1;
1337     file fil2 = FIL_NONE;
1338     #endif
1339     lolo tm;
1340     int ret = 0;
1341    
1342     eRr(LOG_VERBOSE, "%d-way merging %d blocks in %d chunks of %d ...",
1343     MERGEWAY, ql->blks, (ql->blks+MERGECHK-1)/MERGECHK, MERGECHK);
1344     tUpd(&tm);
1345     ncomp = 0;
1346     #ifndef MERGEMULTIPASS
1347     { /* single pass */
1348     #else
1349     if (ql->blks > MERGECHK*MERGEWAY) /* multipass */
1350     fOpen(&fil2, 0, 0);
1351     for (;; pass++) {
1352     #endif
1353     unsigned blkno = 0;
1354     #ifndef MERGEMULTIPASS
1355     # define inf &ql->fil
1356     { /* single run in single pass */
1357     #else
1358     unsigned nchunk = chunk*MERGEWAY; /* lastrun, if >= ql->blks */
1359     int lastpass = nchunk >= ql->blks, run = 1;
1360     file *inf, *outf;
1361    
1362     if (1 & pass) {
1363     inf = &ql->fil; outf = &fil2;
1364     } else {
1365     inf = &fil2; outf = &ql->fil;
1366     }
1367     if (!lastpass)
1368     fSeek(outf, 0);
1369     while (blkno < ql->blks) {
1370     /* merge MERGEWAY chunks of length chunk to n chunks of size nchunk */
1371     /* only one run in last pass */
1372     unsigned char *dst = out;
1373     unsigned blks = 0;
1374     #endif
1375     #ifdef MERGEDEBUG
1376     unsigned char lastkey[256];
1377     #endif
1378     struct {
1379     unsigned char *blk; /* merge blk */
1380     unsigned char *key; /* current key */
1381     unsigned nxt; /* next block no */
1382     unsigned end; /* end */
1383     } in[MERGEWAY], *nxt[MERGEWAY], *tmp;
1384     unsigned ways = 0, i;
1385    
1386     for (ways=0; ways<MERGEWAY; ways++) { /* prefill */
1387     if (blkno >= ql->blks)
1388     break;
1389     in[ways].key = in[ways].blk = ql->buf.m[ways];
1390     if (MERGEBLK != (ret =
1391     fPread(inf, in[ways].blk, MERGEBLK, blkno*MERGEBLK)))
1392     goto done;
1393     in[ways].nxt = blkno+1;
1394     if (ql->blks < (blkno += chunk)) blkno = ql->blks;
1395     in[ways].end = blkno;
1396     /* insert */
1397     for (i=0; i<ways && !keyGt(nxt[i]->key, in[ways].key); ) i++;
1398     if (i < ways) memmove(nxt+i+1, nxt+i, (ways-i)*sizeof nxt[0]);
1399     nxt[i] = in+ways;
1400     }
1401     #ifndef MERGEMULTIPASS
1402     eRr(LOG_VERBOSE, "%d prepared %d blocks %d ways", tUpd(&tm), blkno, ways);
1403     #else
1404     eRr(LOG_VERBOSE, "%d prepared %d/%d blocks %d ways run %d of pass %d%s",
1405     tUpd(&tm), blkno, ql->blks, ways, run, pass, lastpass?" (last)":"");
1406     #endif
1407     #ifdef MERGEDEBUG
1408     lastkey[0] = 0;
1409     for (i=0; i<ways; i++)
1410     eRr(LOG_VERBOSE, "%d %b", i, *nxt[i]->key, nxt[i]->key+1);
1411     #endif
1412     for (;;) { /* merge */
1413     int len = *nxt[0]->key + 1;
1414     /* stuff away 1st key */
1415     #ifdef MERGEDEBUG
1416     if (keyGt(lastkey, nxt[0]->key)) {
1417     ret = eRr(ERR_IDIOT, "'%b' > '%b' way %d",
1418     *lastkey, lastkey+1, *nxt[0]->key, nxt[0]->key+1, nxt[0]-in);
1419     goto done;
1420     }
1421     memcpy(lastkey, nxt[0]->key, len);
1422     #endif
1423     #ifdef MERGEMULTIPASS
1424     if (lastpass) {
1425     #endif
1426     if ((ret = load(dx, nxt[0]->key))) goto done;
1427     #ifdef MERGEMULTIPASS
1428     } else {
1429     if (len >= end - dst) { /* flush out block */
1430     # ifdef MERGEDEBUG
1431     eRr(LOG_VERBOSE, "out blk %d len %d %b", blks, dst-out, *out, out+1);
1432     # endif
1433     *dst = 0; /* terminate */
1434     if (MERGEBLK != (ret = fWrite(outf, dst = out, MERGEBLK)))
1435     goto done;
1436     blks++;
1437     }
1438     memcpy(dst, nxt[0]->key, len);
1439     dst += len;
1440     }
1441     #endif
1442     /* reload nxt[0] */
1443     nxt[0]->key += len;
1444     if (!*nxt[0]->key) { /* eob */
1445     if (nxt[0]->nxt != nxt[0]->end) {
1446     if (MERGEBLK != (ret = fPread(inf, nxt[0]->key = nxt[0]->blk,
1447     MERGEBLK, nxt[0]->nxt++ * MERGEBLK)))
1448     goto done;
1449     }
1450     if (!*nxt[0]->key) { /* done with this stream */
1451     if (!--ways)
1452     break;
1453     memmove(nxt, nxt+1, ways*sizeof nxt[0]);
1454     continue;
1455     }
1456     }
1457     #ifdef MERGEDEBUG
1458     if (keyGt(lastkey, nxt[0]->key)) {
1459     ret = eRr(ERR_IDIOT, "'%b' > '%b' in blk %d off %d way %d",
1460     *lastkey, lastkey+1, *nxt[0]->key, nxt[0]->key+1,
1461     nxt[0]->nxt-1, nxt[0]->key-nxt[0]->blk, nxt[0]-in);
1462     goto done;
1463     }
1464     #endif
1465     if (1 == ways) continue;
1466     /*
1467     this may look like pretty brute force.
1468     However, in practice we rarely see reinsertions at all
1469     and get by with about 1.1 comparisions per key.
1470     With sorted input we have even less than 1 per key,
1471     due to the continue (last exit oneway) above.
1472     */
1473     for (i=1; i<ways && keyGt(nxt[0]->key, nxt[i]->key); ) i++;
1474     /* nxt[0]->key is not gt nxt[i]->key: move to --i */
1475     if (--i) {
1476     tmp = nxt[0];
1477     memmove(nxt, nxt+1, i*sizeof nxt[0]);
1478     nxt[i] = tmp;
1479     }
1480     } /* merge */
1481     ret = 0;
1482     #ifndef MERGEMULTIPASS
1483     eRr(LOG_VERBOSE, "%d merged %d comparisions", tUpd(&tm), ncomp);
1484     } /* single run */
1485     #else
1486     eRr(LOG_VERBOSE, "%d merged %d comparisions %d/%d blocks pass %d",
1487     tUpd(&tm), ncomp, blks, nchunk, pass);
1488     if (lastpass)
1489     goto done;
1490     if (nchunk <= blks)
1491     eRr(ERR_IDIOT, "wrote %d >= %d blks pass %d", blks, nchunk, pass);
1492     do {
1493     *dst = 0;
1494     fWrite(outf, out, MERGEBLK);
1495     dst = out;
1496     } while (nchunk != ++blks);
1497     run++;
1498     } /* for new chunks */
1499     chunk = nchunk;
1500     #endif
1501     } /* pass */
1502     done:
1503     #ifdef MERGEMULTIPASS
1504     if (FIL_NONE != fil2) fClose(&fil2);
1505     #endif
1506     return ret;
1507     } /* merge */
1508    
1509    
1510     int qLoad ( Qdx *dx, Key *key )
1511     {
1512     QLoad *ql = dx->qld;
1513     int ret, l = key->len+dx->vsz;
1514    
1515     if ( !ql ) {
1516     if (! key->val.len) return 0; /* double close ? */
1517     ncomp = 0;
1518     if (ENV_EXCL!=env.wri || !(DX_WRITE & dx->flg))
1519     return eRr(ERR_TRASH, "need excl write access to load index");
1520     ql = dx->qld = (QLoad*)mAlloc(sizeof (QLoad));
1521     if ( dx->let > 50 )
1522     dx->let = 50;
1523     ql->let = dx->let * dx->lsz / 100;
1524     fTrunc(&dx->mqd, 0);
1525     dx->lln = 0;
1526     newleaf(&ql->cur.b, dx);
1527     ql->fil = FIL_NONE;
1528     ql->nkey = ql->buf.s.keyb;
1529     ql->nmem = ql->buf.s.memb;
1530     eRr(LOG_INFO,
1531     "loading %d/%dK mem, %d keys, %dK blocks in %d-chunks, %d-way merge"
1532     #ifdef MERGEMULTIPASS
1533     " (multipass)"
1534     #endif
1535     , SORTUSE>>10, SORTMEM>>10, SORTKEYS, MERGEBLK>>10, MERGECHK, MERGEWAY
1536     );
1537     }
1538     if ( !key->val.len /* done */
1539     || SORTKEYS-2 <= ql->nkey-ql->buf.s.keyb /* reserve 1 for WHS */
1540     || SORTUSE <= ql->nmem-ql->buf.s.memb
1541     ) {
1542     if (key->val.len && FIL_NONE == ql->fil)
1543     fOpen(&ql->fil, 0, 0);
1544     if ((ret = sort(dx, ql->buf.s.keyb, ql->nkey - ql->buf.s.keyb))
1545     || !key->val.len
1546     )
1547     goto done;
1548     ql->nkey = ql->buf.s.keyb;
1549     ql->nmem = ql->buf.s.memb;
1550     }
1551     /* add key to sort buffer */
1552     if (key->val.len == dx->vsz && l < 256) {
1553     *ql->nkey++ = ql->nmem;
1554     *ql->nmem = (unsigned char)l;
1555     memcpy(++ ql->nmem, key->byt, key->len);
1556     memcpy(ql->nmem += key->len, key->val.byt, dx->vsz);
1557     ql->nmem += dx->vsz;
1558     return 0;
1559     }
1560     ret = eRr(ERR_INVAL, "bad klen %d vlen %d", key->len, key->val.len);
1561     done:
1562     if (FIL_NONE != ql->fil) {
1563     if (!ret) ret = merge(dx, ql);
1564     fClose(&ql->fil);
1565     }
1566     if (!ret) {
1567     load(dx, 0); /* close last leaf */
1568     qRemake(dx); /* build tree */
1569     }
1570     mFree(dx->qld);
1571     dx->qld = 0;
1572     return ret;
1573     } /* qLoad */
1574    
1575    
1576     int qLoadf ( Qdx *dx, file *f )
1577     {
1578     Key key;
1579     lolo tm;
1580    
1581     FIL_DEFBUF(*f);
1582     memset( &key, 0, sizeof(key) );
1583     key.val.len = dx->vsz;
1584     tUpd(&tm);
1585     while ( fGets(&fb) && fb.l ) {
1586     if ( qMkKeyVal(dx, &key, fb.p, fb.l) ) {
1587     eRr(LOG_VERBOSE, "bad key at line %d offset %d", fb.n, fb.o);
1588     continue;
1589     }
1590     if ( qLoad( dx, &key ) )
1591     break;
1592     if ( !(fb.n & 0x3ff) )
1593     eRr(LOG_VERBOSE, "%dK lines", fb.n >> 10 );
1594     }
1595     eRr(LOG_INFO, "%d did %d lines %d bytes (%s)", tUpd(&tm), fb.n, fb.o,
1596     FIL_NONE == fb.f ? "eof" : "blank" );
1597     *f = fb.f;
1598     key.val.len = 0; /* close it */
1599     return qLoad(dx, &key);
1600     } /* qLoadf */
1601    
1602    
1603     /* TODO: this function is an awful mess and needs complete rewrite, KR
1604     */
1605     int qSet ( Qdx *dx, Key *key )
1606     {
1607     Bloke blk[2];
1608     Key up;
1609     Block * b = &blk[0].b; /* mainblock */
1610     unsigned char * base = blk[0].base;
1611     int ret = 0;
1612     unsigned short e, i = 0;
1613     Block *s = 0; /* sibling */
1614     Dict *d;
1615     unsigned char *sbase, *src, *v, *stk;
1616     FindKey f;
1617     int hadkey, split = 0, insibling = 0;
1618     unsigned need;
1619     unsigned sibnum = 0;
1620     unsigned char lev = 0;
1621     PrVal pv;
1622     #ifdef BUILD_SHMODE /* locks we hold on block, sibling, tree */
1623     int lb=-1, ls=-1, lt=0;
1624     #endif
1625    
1626     f.dx = dx;
1627     f.key = key;
1628     if ( dx->ksz && key->len > dx->ksz )
1629     key->len = dx->ksz;
1630     if ( FINDKEY(b, &f, LOCK_WRITE) ) {
1631     b = 0;
1632     DONE((ERR_TRASH, "could not find key"));
1633     }
1634     #ifdef BUILD_SHMODE
1635     lb = b->num;
1636     #endif
1637     e = (QENTMASK & f.e[0]);
1638     d = b->dct + e;
1639     if ( ! (hadkey = QMATCH & f.e[0]) ) /* new key */
1640     need = key->len + sizeof(Dict) + dx->vsz;
1641     else {
1642     unsigned vln = LEAFVLN(d);
1643     v = base+LEAFPOS(d)+d->kln;
1644     if ( 1 == vln ) { /* check stopword -- value entirely zeroed */
1645     int l = dx->vsz;
1646     unsigned char *C = v;
1647     while ( l-- && !*C++ ) /* huh huh, he said "C++" */
1648     ;
1649     if ( -1 == l ) {
1650     LOG_DBG(LOG_VERBOSE, "key '%b' is a stopword", key->len, key->byt);
1651     return QST_STPWRD;
1652     }
1653     /* TODO: check QST_UNIKEY */
1654     }
1655     i = valpos( v, vln, dx->vsz, key->val.byt );
1656     if ( QMATCH & i ) {
1657     LOG_DBG( LOG_DEBUG,
1658     "adding key '%b'/%s: already there pos %u",
1659     key->len, key->byt, PRVAL( pv, key->val ), i & ~QMATCH );
1660     return QST_FOUND; /* value already there */
1661     }
1662     need = dx->vsz;
1663     }
1664     LOG_DBG( LOG_DEBUG,
1665     "adding key '%b'/%s in block %d had %d ent %d stk %d need %d",
1666     key->len, key->byt, PRVAL( pv, key->val ),
1667     b->num, hadkey, b->ent, b->stk, need );
1668    
1669     if ( need > b->stk - DICTOFF - b->ent*sizeof(Dict) ) {
1670     /* now that's bad: not enough space
1671     create new block s, move last entries there
1672     starting from the end, entries (including the new one) are collected,
1673     until their combined size reaches more than half a block.
1674     If the combined size then is more than 2/3 a block,
1675     and the last entry alone takes more than 1/3 a block, it is split.
1676     TODO: check existing sibling for free space
1677     */
1678     unsigned half = (dx->lsz - DICTOFF) >> 1;
1679     unsigned tot = 0;
1680     unsigned mv = b->ent;
1681    
1682     LOG_DBG( LOG_DEBUG, "splitting block %d need %d have %d (stk %d ent %d)",
1683     b->num, need, b->stk - DICTOFF - b->ent*sizeof(Dict), b->stk, b->ent );
1684     if ( need > (dx->lsz - DICTOFF) / 3 )
1685     DONE((ERR_IDIOT, "OOPS! excessive need for %d bytes", need));
1686     /* figure out split position */
1687     if ( !hadkey && b->ent == e ) {
1688     tot = need;
1689     insibling = !0;
1690     }
1691     d = b->dct+mv;
1692     while ( d--, mv-- ) {
1693     unsigned sz = sizeof(Dict)+d->kln+LEAFVLN(d)*dx->vsz;
1694     if ( half > (tot += sz) ) { /* sz fits */
1695     if ( e == mv && !hadkey ) {
1696     /*
1697     check size of new entry.
1698     In case of hadkey, we don't bother for the dx->vsz bytes.
1699     */
1700     if ( half < (tot + need) ) {
1701     if ( (half - tot) > need/2 ) { /* more balanced if we move */
1702     tot += need;
1703     insibling = !0;
1704     }
1705     break;
1706     }
1707     tot += need;
1708     insibling = !0;
1709     }
1710     if ( mv )
1711     continue; /* else croak on ! mv below */
1712     }
1713     /* exceeded half -- maybe we move entry mv anyway */
1714     if ( tot > (dx->lsz - DICTOFF)*2/3 ) {
1715     if ( tot-sz < (dx->lsz - DICTOFF)/3 )
1716     split = !0;
1717     else { /* undo -- don't move this entry */
1718     tot -= sz;
1719     d++; mv++;
1720     }
1721     } else if ( ! mv )
1722     DONE((ERR_IDIOT, "OOPS! didn't find end tot %d bytes", tot));
1723     break;
1724     } /* while mv-- */
1725     if ( mv < e || (mv == e && hadkey && !split) )
1726     insibling = !0;
1727     /* now mv is the first entry to move */
1728     /* create new leaf block dx->lln */
1729     #ifdef BUILD_SHMODE
1730     for (;;) {
1731     int size;
1732     QLOCKEX(dx->lln<<1);
1733     size = fSize( dx->mqd );
1734     if ( size == (int)(dx->lln * dx->lsz) ) {
1735     ls = dx->lln;
1736     break;
1737     }
1738     QLOCKUN(dx->lln<<1);
1739     if ( size % dx->lsz || size < (int)(dx->lln * dx->lsz) )
1740     DONE((ERR_TRASH, "bad mqd size 0x%08x", size));
1741     dx->lln = size / dx->lsz;
1742     }
1743     /* have lock on new leaf block dx->lln */
1744     #endif
1745     newleaf(s = &blk[1].b, dx);
1746     sbase = (unsigned char*)s; /* == blk[1].base */
1747     LOG_DBG( LOG_DEBUG,
1748     "split: move %d of %d to new blk %d e %d tot %d insibling %d split %d",
1749     mv, b->ent, s->num, e, tot, insibling, split );
1750     s->nxt = b->nxt;
1751     b->nxt = s->num;
1752     if ( split ) {
1753     /* now total includes full size of entry mv.
1754     if we included a new entry, it might even exceed lsz.
1755     calculate sizes for both blocks w/o mv's values.
1756     */
1757     unsigned esz = sizeof(Dict)+d->kln; /* entry's base cost */
1758     unsigned vln = LEAFVLN(d);
1759     unsigned keep, diff, mvval, kpval, mvsiz, pos = LEAFPOS(d);
1760     /*
1761     from currently used space, substract tot (including all
1762     values and possibly new key "need"), but add entry base cost.
1763     that should be the size remaining if we where moving
1764     all of the values but keep an entry and key.
1765     maybe slightly biased if need is dx->vsz (the hadkey case).
1766     */
1767     keep = dx->lsz - b->stk + b->ent*sizeof(Dict) + need - tot + esz;
1768     tot -= vln * dx->vsz; /* undo entries */
1769     diff = (keep > tot ? keep - tot : tot - keep) / dx->vsz;
1770     if ( diff > vln )
1771     DONE((ERR_IDIOT, "OOPS! got diff %d vln %d entry %d tot %d keep %d",
1772     diff, vln, mv, tot, keep));
1773     mvval = (vln - diff)/2;
1774     if ( keep > tot )
1775     mvval += diff;
1776     kpval = vln - mvval;
1777     if ( ! mvval || ! kpval ) {
1778     LOG_DBG( LOG_WARN, "bad mv/kpval %d/%d keep %d tot %d vln %d diff %d",
1779     mvval, kpval, keep, tot, vln, diff );
1780     if ( ! mvval )
1781     kpval = vln - (mvval = 1);
1782     else
1783     mvval = vln - (kpval = 1);
1784     }
1785     tot += mvsiz = mvval*dx->vsz;
1786     keep += kpval*dx->vsz;
1787     src = base+pos;
1788     LOG_DBG( LOG_DEBUG,
1789     "split: entry %d '%b' mv %d of %d values, i %d keep %d tot %d",
1790     mv, d->kln, src, mvval, vln, i, keep, tot );
1791     if ( keep > (dx->lsz-DICTOFF) || tot > (dx->lsz-DICTOFF) )
1792     DONE((ERR_IDIOT,
1793     "OOPS! got diff %d vln %d mvval %d entry %d tot %d keep %d",
1794     diff, vln, mvval, mv, tot, keep));
1795     /* SIGH -- do it */
1796     memcpy( sbase + (s->stk -= mvsiz),
1797     src+d->kln+kpval*dx->vsz, mvsiz );
1798     if ( e == mv && hadkey && i > kpval ) {
1799     insibling = !0;
1800     s->stk -= dx->vsz;
1801     i -= kpval;
1802     memmove( sbase+s->stk, sbase+s->stk+dx->vsz, i*dx->vsz );
1803     memcpy( sbase+s->stk+i*dx->vsz, key->val.byt, dx->vsz );
1804     mvval++;
1805     }
1806     memcpy( sbase + (s->stk -= d->kln), src, d->kln );
1807     LEAFSETPVK( s->dct, s->stk, mvval, d->kln );
1808     s->ent = 1;
1809     memmove( src + mvsiz, src, d->kln+kpval*dx->vsz );
1810     pos += mvsiz;
1811     LEAFSETPV( d, pos, kpval );
1812     d++; mv++;
1813     }
1814     /* now entries mv..b->ent are moved, mv > 0 */
1815     b->stk = LEAFPOS(b->dct+mv-1); /* pre-adjust stack */
1816     if ( mv < e ) { /* move entries mv..e-1 */
1817     /* used stack space is from pos of entry e-1 to end of entry mv
1818     (might differ from pos of mv-1 due to split)
1819     */
1820     unsigned from = LEAFPOS(b->dct+e-1), nent = e-mv,
1821     sz = LEAFPOS(b->dct+mv)+b->dct[mv].kln
1822     +LEAFVLN(b->dct+mv)*dx->vsz - from;
1823     int adj = (s->stk -= sz) - from; /* entry position adjustment */
1824     memcpy( sbase + s->stk, base+from, sz );
1825     memcpy( s->dct+s->ent, b->dct+mv, nent*sizeof(Dict) );
1826     d = s->dct + s->ent;
1827     s->ent += nent;
1828     for ( ; nent--; d++ )
1829     LEAFPOSADD( d, adj );
1830     }
1831     if ( insibling && mv <= e ) { /* now mv or insert e, unless done on split */
1832     d = b->dct + e;
1833     if ( hadkey ) { /* mv existing entry e >= mv, add new val */
1834     /* i is valpos */
1835     unsigned vln = LEAFVLN(d);
1836     src = base+LEAFPOS(d)+d->kln;
1837     if ( i < vln ) {
1838     unsigned sz = (vln - i)*dx->vsz;
1839     memcpy( sbase + (s->stk -= sz), src + i*dx->vsz, sz );
1840     }
1841     memcpy( sbase + (s->stk -= dx->vsz), key->val.byt, dx->vsz );
1842     if ( i ) {
1843     unsigned sz = i*dx->vsz;
1844     memcpy( sbase + (s->stk -= sz), src, sz );
1845     }
1846     memcpy( sbase + (s->stk -= d->kln), base+LEAFPOS(d), d->kln );
1847     LEAFSETPVK( s->dct + s->ent, s->stk, vln+1, d->kln );
1848     e++; /* e is moved */
1849     } else {
1850     memcpy( sbase + (s->stk -= dx->vsz), key->val.byt, dx->vsz );
1851     memcpy( sbase + (s->stk -= key->len), key->byt, key->len );
1852     LEAFSETPVK( s->dct + s->ent, s->stk, 1, key->len );
1853     }
1854     s->ent++;
1855     } /* mv or insert e */
1856     if ( e < b->ent && mv < b->ent ) { /* move entries max(e,mv) to end */
1857     unsigned fst = mv < e ? e : mv;
1858     /* used stack space is from pos of entry b->ent-1 (the original b->stk)
1859     to end of entry fst (might differ from pos of fst-1 due to split)
1860     */
1861     unsigned from = LEAFPOS(b->dct+b->ent-1), nent = b->ent-fst,
1862     sz = LEAFPOS(b->dct+fst)+b->dct[fst].kln
1863     +LEAFVLN(b->dct+fst)*dx->vsz - from;
1864     int adj = (s->stk -= sz) - from; /* entry position adjustment */
1865     LOG_DBG( LOG_DEBUG, "split: entries %d..%d sz %d from %d to %d",
1866     fst, b->ent-1, sz, from, s->stk );
1867     memcpy( sbase + s->stk, base+from, sz );
1868     memcpy( s->dct+s->ent, b->dct+fst, nent*sizeof(Dict) );
1869     d = s->dct + s->ent;
1870     s->ent += nent;
1871     for ( ; nent--; d++ ) {
1872     #if !defined(NDEBUG)
1873     unsigned p = LEAFPOS(d);
1874     #endif
1875     LEAFPOSADD( d, adj );
1876     #if !defined(NDEBUG)
1877     LOG_DBG( LOG_DEBUG, "new ent %d p/v/k %d/%d/%d o %d",
1878     d - s->dct, LEAFPOS(d), LEAFVLN(d), d->kln, p );
1879     #endif
1880     }
1881     }
1882     b->ent = mv; /* kill entries */
1883     LOG_DBG( LOG_DEBUG,
1884     "split: blks %d/%d with %d/%d entries, s starts '%b'",
1885     b->num, s->num, b->ent, s->ent, s->dct[0].kln, sbase+LEAFPOS(s->dct) );
1886     } /* new sibling */
1887    
1888     if ( ! insibling ) {
1889     /* now we do have enough space */
1890     d = b->dct + e;
1891     stk = base + b->stk; /* bottom of data stack */
1892     if ( hadkey ) { /* insert value at i */
1893     unsigned char *val = base + LEAFPOS(d)+d->kln+i*dx->vsz;
1894     need = dx->vsz;
1895     #if 0
1896     LOG_DBG( LOG_DEBUG,
1897     "in entry %d '%b' val %d: memmove( %p %p %d) val %p need %d",
1898     e, d->kln, base+LEAFPOS(d), i,
1899     stk-need, stk, (val-stk)+need, val, need );
1900     #endif
1901     memmove( stk - need, stk, (val - stk)+need );
1902     memcpy( val - need, key->val.byt, dx->vsz );
1903     LEAFVLNADD( d, 1 );
1904     } else { /* insert key, value and dict entry */
1905     /* take dict and data space from entry e */
1906     unsigned char *nu;
1907     need = key->len + dx->vsz;
1908     if ( e == b->ent )
1909     nu = base + b->stk - need;
1910     else {
1911     /* at end of next entry, i.e. end of block or beg of previous */
1912     nu = base + LEAFPOS(d) + d->kln + LEAFVLN(d)*dx->vsz;
1913     memmove( stk - need, stk, nu - stk );
1914     nu -= need;
1915     memmove( d+1, d, (b->ent - e)*sizeof(Dict) );
1916     }
1917     memcpy( nu, key->byt, key->len );
1918     memcpy( nu + key->len, key->val.byt, dx->vsz );
1919     LEAFSETPV( d, nu-base, 1 );
1920     d->kln = key->len;
1921     d++;
1922     e++;
1923     b->ent++;
1924     }
1925     /*
1926     need is set to the needed (i.e. additionally used) data space,
1927     e is the first dict entry to redirect need bytes lower
1928     */
1929     b->stk -= need;
1930     for ( ; e < b->ent; e++, d++ )
1931     LEAFPOSADD( d, -need );
1932     } /* ! insibling */
1933     if ( s ) { /* save upkey before writing, which may kill s */
1934     unsigned char *e0 = ((unsigned char*)s) + LEAFPOS(s->dct);
1935     memcpy( up.byt, e0, up.len = s->dct[0].kln );
1936     sibnum = s->num;
1937     if ( split )
1938     memcpy( up.val.byt, e0+up.len, up.val.len = dx->vsz );
1939     else
1940     up.val.len = 0;
1941     LOG_DBG( LOG_DEBUG, "split: had %d/%d key '%b' vln %d %s",
1942     b->num, s->num, up.len, up.byt, up.val.len, PRVAL( pv, up.val ) );
1943     wrtleaf(s, dx);
1944     }
1945     wrtleaf(b, dx);
1946    
1947     /*
1948     WHOA -- now all that was just to care for the leaf level!
1949     Now walk up inserting pointers in parent forks while we split.
1950     */
1951    
1952     #ifdef BUILD_SHMODE /* lock tree, unlock leaves */
1953     QLOCKUN(lb<<1);
1954     lb = -1;
1955     if ( sibnum ) {
1956     QLOCKEX(1); /* get tree lock */
1957     lt = 1;
1958     QLOCKUN(ls<<1);
1959     ls = -1;
1960     }
1961     #endif
1962     while ( sibnum ) { /* created sibling */
1963     unsigned char *e0;
1964     int tog = 0;
1965     Block *ins;
1966    
1967     /* get parent */
1968     lev++;
1969     b = getfork(&blk[0].b, dx, f.n[lev], lev);
1970     LOG_DBG( LOG_DEBUG,
1971     "split: got parent %d lev %d", b->num, b->lev );
1972     e = keyfork(b, &up, dx->vsz);
1973     /* again L/Y link hunt */
1974     while ( !(QMATCH & e) && b->ent == e && b->nxt && b->num ) {
1975     unsigned se;
1976     /* peek next block, toggling buffers */
1977     s = getfork(&blk[(++tog)&1].b, dx, b->nxt, lev);
1978     se = keyfork(s, &up, dx->vsz);
1979     if ( !se ) /* lower than first */
1980     break;
1981     LOG_DBG( LOG_DEBUG, "LYao %d -> %d lev %d for '%b' got 0x%4x",
1982     b->num, s->num, i, up.len, up.byt, se );
1983     /* trade b for s */
1984     b = s;
1985     e = se;
1986     }
1987     base = (unsigned char*)b;
1988     ins = b;
1989     s = 0;
1990     LOG_DBG( LOG_DEBUG, "split: got parent pos %4x", e );
1991     if ( QMATCH & e ) /* !??? */
1992     DONE((ERR_TRASH, "OOPS! found key in parent"));
1993     if ( ! e ) {
1994     TRACEKEY( ERR_TRASH, &f );
1995     DONE((ERR_TRASH, "OOPS! parent insert at 0 lev %d", lev));
1996     }
1997     need = sizeof(Dict) + up.len + 4 + up.val.len;
1998     if ( need > b->stk - DICTOFF - b->ent*sizeof(Dict) ) {
1999     unsigned half = ((env.psz - b->stk)+b->ent*sizeof(Dict)+need) / 2;
2000     unsigned mv = b->ent, tot = 0, nent, sz;
2001     int adj;
2002     if ( e == b->ent ) {
2003     tot = need;
2004     ins = 0;
2005     }
2006     for ( d = b->dct+mv; d--, mv--; ) {
2007     sz = sizeof(Dict) + d->kln + 4 + (d->vln ? dx->vsz : 0);
2008     if ( half > (tot += sz) ) {
2009     if ( e == mv ) {
2010     if ( half > tot+need || tot+need-half < half-tot ) {
2011     tot += need;
2012     ins = 0; /* new link goes to sibling */
2013     } else
2014     break;
2015     if ( half <= tot )
2016     break;
2017     }
2018     if ( mv )
2019     continue;
2020     }
2021     if ( tot - half > sz/2 ) { /* don't move this */
2022     tot -= sz;
2023     d++; mv++;
2024     } else if ( ! mv ) /* !??? */
2025     mv = 1;
2026     break;
2027     }
2028     /* split here a little bit simpler */
2029     s = newfork( b==&blk[0].b ? &blk[1].b : &blk[0].b, dx, lev );
2030     s->nxt = b->nxt;
2031     b->nxt = s->num;
2032     /* straight move entries mv..b->ent-1 to sibling */
2033     sbase = (unsigned char*)s;
2034     d = b->dct + mv;
2035     sz = b->dct[mv-1].pos - b->stk;
2036     nent = b->ent - mv;
2037     LOG_DBG( LOG_DEBUG,
2038     "split: parent lev %d split %d to new %d mv %d"
2039     " '%b'/%s nent %d sz %d",
2040     lev, b->num, s->num, mv, d->kln, base+d->pos,
2041     prval( pv, base+d->pos+d->kln, d->vln?dx->ptr:0 ), nent, sz );
2042     memcpy( sbase + (s->stk -= sz), base + b->stk, sz );
2043     adj = s->stk - b->stk;
2044     b->stk += sz;
2045     memcpy( s->dct, d, nent*sizeof(Dict) );
2046     b->ent = mv;
2047     s->ent = nent;
2048     for ( d = s->dct; nent--; d++ )
2049     d->pos += adj;
2050     if ( ! ins ) {
2051     ins = s;
2052     e -= mv;
2053     }
2054     if ( (ret = chkblk( s, dx )) )
2055     break;
2056     }
2057     /* now insert link in block ins at pos e */ {
2058     unsigned isz = up.len + 4 + up.val.len, nent = ins->ent - e;
2059     unsigned char *ibase = (unsigned char*)ins;
2060     unsigned end = e ? ins->dct[e-1].pos : env.psz;
2061     LOG_DBG( LOG_DEBUG,
2062     "split: ins parent %d lev %d pos %d key '%b' -> %d vln %d %s",
2063     ins->num, ins->lev, e, up.len, up.byt, sibnum, up.val.len,
2064     PRVAL( pv, up.val ) );
2065     memmove( ibase + ins->stk - isz, ibase + ins->stk, end - ins->stk );
2066     ins->stk -= isz;
2067     SETINT(ibase + (end -= 4), sibnum);
2068     if ( up.val.len )
2069     memcpy( ibase + (end -= up.val.len), up.val.byt, up.val.len );
2070     memcpy( ibase + (end -= up.len), up.byt, up.len );
2071     for ( d = ins->dct+ins->ent; nent--; d-- ) {
2072     *d = d[-1];
2073     d->pos -= isz;
2074     }
2075     d->pos = end;
2076     d->vln = up.val.len ? 1 : 0;
2077     d->kln = up.len;
2078     ins->ent++;
2079     if ( (ret = chkblk( ins, dx )) )
2080     break;
2081     }
2082     wrtfork(b, dx);
2083     if ( ! s ) /* not split - we're done */
2084     break;
2085     /* save sibling's key */
2086     sibnum = s->num;
2087     e0 = ((unsigned char*)s) + s->dct[0].pos;
2088     memcpy( up.byt, e0, up.len = s->dct[0].kln );
2089     if ( s->dct[0].vln )
2090     memcpy( up.val.byt, e0+up.len, up.val.len = dx->vsz );
2091     else
2092     up.val.len = 0;
2093     LOG_DBG( LOG_DEBUG, "split: had %d/%d on lev %d key '%b' vln %d %s",
2094     b->num, sibnum, s->lev, up.len, up.byt, up.val.len, PRVAL( pv, up.val ) );
2095     wrtfork(s, dx);
2096     if ( lev < dx->dpt )
2097     continue;
2098     LOG_DBG( LOG_DEBUG, "root split!" );
2099     if ( b != &blk[0].b && b != &blk[1].b ) /* was in map -- could be gone */
2100     b = getfork(&blk[0].b, dx, 0, lev); /* refetch */
2101     /* new block for the old root, use sibling buffer (if any) */
2102     s = newfork(b==&blk[0].b ? &blk[1].b : &blk[0].b, dx, lev);
2103     /* cp all but the num */
2104     memcpy( ((unsigned char*)s)+4, ((unsigned char*)b)+4, env.psz-4 );
2105     b->nxt = s->nxt; /* restore root's special nxt */
2106     s->nxt = 0;
2107     b->stk = env.psz; /* empty the root */
2108     /* add two childs old root and it's sibling */
2109     SETINT( base + (b->stk -= 4), s->num ); /* the old root */
2110     b->dct[0].pos = b->stk; b->dct[0].vln = 0; b->dct[0].kln = 0;
2111     SETINT( base + (b->stk -= 4), sibnum );
2112     e0 = ((unsigned char*)s) + s->dct[0].pos;
2113     if ( ! up.val.len )
2114     b->dct[1].vln = 0;
2115     else {
2116     memcpy( base + (b->stk -= dx->vsz), up.val.byt, dx->vsz );
2117     b->dct[1].vln = 1;
2118     }
2119     memcpy( base + (b->stk -= up.len), up.byt, b->dct[1].kln = up.len );
2120     b->dct[1].pos = b->stk;
2121     b->ent = 2;
2122     b->lev++; /* increase tree depth */
2123     dx->dpt = b->lev;
2124     ret = wrtfork(b, dx);
2125     break;
2126     }
2127     done:
2128     #ifdef BUILD_SHMODE
2129     if ( -1 != lb ) QLOCKUN(lb<<1);
2130     if ( -1 != ls ) QLOCKUN(ls<<1);
2131     if ( lt ) QLOCKUN(1);
2132     #endif
2133     if ( 0 <= ret && b->lev && FINDKEY(&blk[0].b, &f, LOCK_READ) )
2134     ret = eRr(ERR_TRASH, "could not find key after add" );
2135     return ret;
2136     } /* qSet */
2137    
2138    
2139     int qDel ( Qdx *dx, Key *key )
2140     {
2141     Leaf leaf;
2142     Block * const b = &leaf.b;
2143     unsigned char * const base = leaf.base;
2144     int ret = 0;
2145     unsigned short e, n, i, mv;
2146     Dict *d;
2147     unsigned char *v;
2148     FindKey f;
2149     PrVal pv;
2150    
2151     f.dx = dx;
2152     f.key = key;
2153     LOG_DBG( LOG_DEBUG, "deleting key '%b'/%s",
2154     key->len, key->byt, PRVAL( pv, key->val ) );
2155     if ( FINDKEY(b, &f, LOCK_WRITE) )
2156     return eRr(ERR_TRASH, "could not find key");
2157     if ( !(QMATCH & f.e[0]) ) /* key not found - ok */
2158     goto done;
2159     LOG_DBG( LOG_DEBUG, "key found" );
2160     e = (QENTMASK & f.e[0]);
2161     d = b->dct + e;
2162     v = base+LEAFPOS(d)+d->kln; /* start of values */
2163     n = LEAFVLN(d);
2164     if ( ! n )
2165     goto done;
2166     i = valpos( v, n, dx->vsz, key->val.byt );
2167     LOG_DBG( LOG_DEBUG, "val %sfound pos %u of %u",
2168     (QMATCH & i) ? "" : "not ", i & ~QMATCH, n );
2169     if ( !(QMATCH & i) ) /* val not found - ok */
2170     goto done;
2171     i &= ~QMATCH;
2172     LOG_DBG( LOG_DEBUG,
2173     "del blk %d e %d i/n %d/%d", b->num, e, i, n );
2174     if ( 1 < n ) {
2175     v += i*dx->vsz; /* end of stack area to move: start of this value */
2176     mv = dx->vsz; /* amount to move: one value size */
2177     LEAFVLNADD( d, -1 );
2178     } else { /* sole value -- rm whole entry */
2179     v = base+LEAFPOS(d); /* start of key */
2180     mv = d->kln+dx->vsz;
2181     for ( i=b->ent - e - 1; i--; d++ ) /* move the i entries after d down */
2182     *d = d[1];
2183     d = b->dct + e; /* reset */
2184     b->ent--;
2185     }
2186     memmove( base+b->stk+mv, base+b->stk, v-(base+b->stk) );
2187     b->stk += mv;
2188     for ( ; e++ < b->ent; d++ )
2189     LEAFPOSADD( d, mv );
2190     wrtleaf(b, dx);
2191     done:
2192     QLOCKUN(b->num<<1);
2193     return ret;
2194     } /* qDel */
2195    
2196    
2197     static int setKeyVal (QSet *qst, char *val, int len, int split)
2198     {
2199     Qdx *qdx = qst->qdx;
2200     char *e = val+len;
2201     Key key;
2202     int nope, done = 0;
2203    
2204     if (split && qst->pfx.len)
2205     split = 1+qst->pfx.len; /* don't split on 1st pfx.len bytes */
2206     do {
2207     char buf[512], *b;
2208     int l;
2209    
2210     if (!split || qdx->cdx)
2211     l = e-val;
2212     else {
2213     char *s;
2214     while (val<e && (CT_WHITE|CT_SPECL)&lat1ct[(unsigned char)*val]) val++;
2215     if (e == (s = val))
2216     break;
2217     while (s<e && !((CT_WHITE|CT_SPECL)&lat1ct[(unsigned char)*s])) s++;
2218     l = s-val;
2219     }
2220     b = val;
2221     if (qst->pfx.len) {
2222     memcpy(b = buf, qst->pfx.byt, qst->pfx.len);
2223     if (l > (int)sizeof buf-qst->pfx.len)
2224     l = sizeof buf-qst->pfx.len;
2225     memcpy(buf+qst->pfx.len, val, l);
2226     l += qst->pfx.len;
2227     }
2228     /* qMkKey(qst->qdx, &key, val, len): */
2229     if (!qdx->cdx)
2230     memcpy(key.byt, b, key.len = l<qdx->ksz ? l : qdx->ksz);
2231     else {
2232     key.len = qdx->ksz;
2233     l = cEnc(qdx->cdx, &key, (unsigned char*)b, l, split);
2234     if ( split && (!key.len /* wordless */
2235     || l <= qst->pfx.len) /* houston, we have a problem */
2236     )
2237     break;
2238     }
2239     qMkVal(&key.val, &qst->ptr, qdx->ptr);
2240     qst->ptr.pos++;
2241     eRr(LOG_VERBOSE, "%s '%.*s'->'%b' %u %u %u %u",
2242     qst->del ? "del" : "set", l, b, key.len, key.byt,
2243     qst->ptr.rid, qst->ptr.tag, qst->ptr.pos>>16, qst->ptr.pos&0xffff);
2244     nope = qst->del ? qDel(qdx, &key) : qSet(qdx, &key);
2245     if (!split)
2246     return nope;
2247     val += l - qst->pfx.len;
2248     if (!nope)
2249     done++;
2250     } while (val < e);
2251     return done;
2252     } /* qSetKeyVal */
2253    
2254     int qSetKeyVal (QSet *qst, char *val, int len)
2255     {
2256     return setKeyVal(qst, val, len, 0);
2257     }
2258    
2259     int qSetKeyVals (QSet *qst, char *val, int len)
2260     {
2261     return setKeyVal(qst, val, len, 1);
2262     } /* qSetKeyVals */
2263    
2264    
2265    
2266     int qLoop ( QLoop *self )
2267     {
2268     Leaf leaf;
2269     Qdx *dx = self->qdx;
2270     int ret = 0;
2271     unsigned i = 0;
2272     int stop = QSTOP & self->flg;
2273     int skip = QSKIP & self->flg;
2274     Block *b = &leaf.b;
2275     Key *cmp = &self->key;
2276    
2277     if ( ! cmp->len ) { /* start at 1st leaf */
2278     if ( b != GETLEAF(b, dx, 0, LOCK_READ) )
2279     DONE((ERR_TRASH, "could not read leaf 0"));
2280     self->cur.len = 1; /* avoid QSAME match */
2281     } else { /* locate key */
2282     FindKey f;
2283     f.dx = dx;
2284     f.key = cmp;
2285     if ( FINDKEY(b, &f, LOCK_READ) )
2286     DONE((ERR_TRASH, "could not find key"));
2287     i = (unsigned)(QENTMASK & f.e[0]);
2288     if ( !(QMATCH & f.e[0]) && QEQ == stop )
2289     goto done;
2290     self->cur.len = 0; /* avoid QSAME match */
2291     }
2292     if ( QUPTO <= stop )
2293     cmp = &self->to;
2294     for ( ;; i=0 ) { /* blocks */
2295     unsigned char *base = (unsigned char*)b;
2296     Dict *d;
2297     if ( ! b || b->lev )
2298     return ERR_TRASH;
2299     d = b->dct + i;
2300     for ( ; i<b->ent; i++, d++ ) {
2301     unsigned short pos = LEAFPOS( d );
2302     unsigned short vln = LEAFVLN( d );
2303     unsigned short kln = d->kln;
2304     unsigned char *v = base+pos+kln;
2305     Ptr p;
2306    
2307     if ( skip ) {
2308     if ( kln == self->key.len
2309     && !memcmp(self->key.byt, base+pos, kln)
2310     )
2311     continue;
2312     skip = 0;
2313     }
2314     if ( stop ) {
2315     int diff = memcmp( cmp->byt, base+pos,
2316     kln <= cmp->len ? kln : cmp->len );
2317     switch ( stop ) {
2318     case QPF:
2319     if ( diff || kln < cmp->len )
2320     goto moan;
2321     break;
2322     case QEQ:
2323     if ( diff || kln != cmp->len )
2324     goto done;
2325     break;
2326     case QUPTO:
2327     if ( 0 >= diff )
2328     goto done;
2329     break;
2330     case QINCL:
2331     if ( 0 > diff || (!diff && kln > cmp->len) )
2332     goto done;
2333     break;
2334     default: /* unused */
2335     moan:
2336     eRr(LOG_INFO,
2337     "loop ends on key '%b'(%d) %d %d",
2338     kln, base+pos, kln, diff, cmp->len );
2339     goto done;
2340     }
2341     } /* if stop */
2342    
2343     if ( !i && kln == self->cur.len && !memcmp(self->cur.byt, base+pos, kln) )
2344     self->flg |= QSAME;
2345     else {
2346     self->flg &= ~QSAME;
2347     memcpy( self->cur.byt, base+pos, self->cur.len = kln );
2348     }
2349     if ( self->qcb ) {
2350     self->vals = v;
2351     self->nvals = vln;
2352     if ( (ret = self->qcb(self)) )
2353     goto done;
2354     } else { /* internal check */
2355     Leaf ll;
2356     FindKey f;
2357     f.dx = dx;
2358     f.key = &self->cur;
2359     memcpy( self->cur.val.byt, v, self->cur.val.len = dx->vsz );
2360     if ( FINDKEY(&ll.b, &f, LOCK_READ) )
2361     DONE((ERR_TRASH, "could not find key"));
2362     if ( !(QMATCH & f.e[0])
2363     || b->num != ll.b.num
2364     || i != (unsigned)(QENTMASK & f.e[0])
2365     ) {
2366     eRr(ERR_TRASH,
2367     "OOPS! want %u.%u got %u.%u (match %4x)",
2368     b->num, i, ll.b.num, QENTMASK & f.e[0], f.e[0] );
2369     qRdVal( &p, v, dx->vsz );
2370     eRr(ERR_TRASH,
2371     "\twhen looking for '%b'/%u.%u.%u",
2372     self->cur.len, self->cur.byt, p.rid, p.tag, p.pos );
2373     if ( ll.b.ent <= (QENTMASK & f.e[0]) )
2374     eRr(ERR_TRASH, "\tI found nothing" );
2375     else {
2376     Dict *dd = ll.b.dct + (QENTMASK & f.e[0]);
2377     unsigned char *bb = ((unsigned char*)&ll.b)+LEAFPOS(dd);
2378     qRdVal( &p, bb+dd->kln, dx->vsz );
2379     eRr(ERR_TRASH,
2380     "\tI found '%b'/%u.%u.%u",
2381     dd->kln, bb, p.rid, p.tag, p.pos );
2382     }
2383     } else
2384     eRr(LOG_TRACE, "key '%b' ok", self->cur.len, self->cur.byt );
2385     } /* internal check */
2386     }
2387     if ( ! b->nxt || !GETLEAF(b, dx, b->nxt, LOCK_READ) )
2388     break;
2389     /* TODO:
2390     b now is the block that was our sibling when reading the old b.
2391     As long as we don't shift entries,
2392     there is no need to advance to next key that is actually greater !?
2393     */
2394     }
2395     done:
2396     if ( ! self->qcb )
2397     eRr(LOG_INFO,
2398     "checked %d keys in %u blks of %dK depth %u cmp/fget/lget %u/%u/%u",
2399     stat.key, dx->lln, dx->lsz>>10, dx->dpt, stat.cmp, stat.fget, stat.lget );
2400     return ret;
2401     } /* qLoop */
2402    
2403    
2404    
2405     int qInit ( Qdx *dx )
2406     {
2407     Bloke blk;
2408     int size;
2409    
2410     /* defaults */
2411     if ( !dx->ksz ) dx->ksz = 255;
2412    
2413     /* check leaves file */
2414     size = fSize( dx->mqd );
2415     if ( 0x3ff & size ) /* not on 1K boundary */
2416     return eRr(LOG_ERROR, "mqd has bad size %dK + %d",
2417     size >> 10, (int)size & 0x3ff );
2418     if ( size ) { /* read 1st leaf header */
2419     Block b;
2420     int got = fPread( &dx->mqd, (unsigned char*)&b, sizeof(b), 0 );
2421     if ( sizeof(b) != (unsigned)got )
2422     return eRr(LOG_SYSERR, "could not get leaf head" );
2423     if ( b.num )
2424     return eRr(ERR_TRASH, "leaf has num %d", b.num );
2425     if ( dx->typ && dx->typ != b.typ )
2426     eRr(LOG_WARN, "typ want 0x02%2x got 0x02%2x", dx->typ, b.typ );
2427     if ( QDX_COMPRS & dx->typ )
2428     eRr(LOG_WARN, "sorry this version does not support compressed keys");
2429     dx->typ = b.typ;
2430     dx->ksz = b.ksz;
2431     dx->ptr = b.ptr;
2432     }
2433     if ( QDX_LEAFPV & dx->typ )
2434     dx->vsz = dx->ptr;
2435     else
2436     dx->vsz = (3&(dx->ptr>>6)) + 3+(3&(dx->ptr>>4)) + (7&dx->ptr);
2437     if (!size && dx->ksz > 255 - dx->vsz)
2438     dx->ksz = 255 - dx->vsz;
2439     eRr(LOG_VERBOSE, "dx ptr %2x vsz %d=%d+%d+%d", dx->ptr, dx->vsz,
2440     (3&(dx->ptr>>6)), 3+(3&(dx->ptr>>4)), (7&dx->ptr));
2441     dx->lsz = 1<<QDX_LEAFSH<<(QDX_SIZMSK&dx->typ);
2442     if ( size % dx->lsz )
2443     return eRr(ERR_TRASH, "size 0x%08x %d blksize", size, dx->lsz );
2444     dx->lln = size / dx->lsz;
2445     if ( ! dx->lln
2446     && (!(DX_WRITE & dx->flg) || (newleaf(&blk.b, dx), wrtleaf(&blk.b, dx)))
2447     )
2448     return eRr(LOG_SYSERR, "could not init new leaf file" );
2449    
2450     #ifdef CPU_BIG_ENDIAN
2451     # define QDX_FORK (QDX_FORKBE | (env.psh-QDX_FORKSH))
2452     #else
2453     # define QDX_FORK (QDX_FORKLE | (env.psh-QDX_FORKSH))
2454     #endif
2455     size = fSize(dx->mqx.fil);
2456     if ( size ) {
2457     if ( size % env.psz ) /* not on env.psz boundary */
2458     return eRr(LOG_ERROR, "mqx has bad size %dK + %d",
2459     size >> 10, (int)size & 0x3ff );
2460     dx->fln = size >> env.psh;
2461     if ( (int)dx->fln != fMap(&dx->mqx, dx->fln) )
2462     return eRr(LOG_ERROR, "could not map mqx");
2463     dx->ftp = ((Block*)dx->mqx.map)->typ;
2464     if ( dx->ftp != QDX_FORK ) /* bad pagesize, endianess or flags */
2465     return eRr(LOG_ERROR, "root has type %2x != %2x",
2466     dx->ftp, QDX_FORK );
2467     dx->dpt = ((Block*)dx->mqx.map)->lev;
2468     }
2469     dx->fln = size / env.psz;
2470     if ( ! dx->fln ) {
2471     dx->ftp = QDX_FORK;
2472     eRr(LOG_INFO, "creating index type 0x%2x/0x%2x lsz %u psz %u ksz %u vsz %u",
2473     dx->typ, dx->ftp, dx->lsz, env.psz, dx->ksz, dx->vsz );
2474     if ( 1 < dx->lln ) {
2475     if (ENV_EXCL!=env.wri || !(DX_WRITE & dx->flg))
2476     return eRr(ERR_TRASH, "need excl write access to rebuild tree");
2477     if ( qRemake(dx) )
2478     return eRr(LOG_ERROR, "could not rebuild tree");
2479     } else {
2480     /* init: link root to leaf */
2481     Block *b = newfork(&blk.b, dx, 1);
2482     SETINT( ((unsigned char*)b)+(b->stk -= 4), 0 );
2483     b->dct[0].pos = b->stk; /* 0, 0 */
2484     b->ent = 1;
2485     if ( wrtfork(b, dx) )
2486     return eRr(LOG_SYSERR, "\twhen writing root" );
2487     fMap(&dx->mqx, 1);
2488     dx->dpt = 1;
2489     }
2490     }
2491     eRr(LOG_INFO, "psz %d mapped %d", env.psz, dx->mqx.npg );
2492    
2493     return 0;
2494     } /* qInit */
2495    
2496    
2497     void qFini ( Qdx *dx )
2498     {
2499     fMap(&dx->mqx, 0);
2500     } /* qFini */

  ViewVC Help
Powered by ViewVC 1.1.26