/[webpac]/openisis/0.9.9e/core/qdx.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /openisis/0.9.9e/core/qdx.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (show annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 2 months ago) by dpavlin
File MIME type: text/plain
File size: 72047 byte(s)
import of new openisis release, 0.9.9e

1 /*
2 The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3 Version 0.9.x (patchlevel see file Version)
4 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 see README for more information
21 EOH */
22
23 /*
24 $Id: qdx.c,v 1.29 2004/11/12 08:38:13 kripke Exp $
25 the btree.
26 */
27
28 #include <stdlib.h>
29 #include <string.h>
30 #include <assert.h>
31
32 #include "../core/core.h"
33
34
35
36 /* ************************************************************
37 private types
38 */
39
40
41 /**
42 Dictionary layout for forks (inner nodes of tree):
43 */
44 typedef struct Dict { /* dictionary of block entries */
45 unsigned short pos; /* position of entry, native byte order */
46 unsigned char vln; /* number of values */
47 unsigned char kln; /* length of key */
48 } Dict;
49 /*
50 For leaves, the max block size is 8K,
51 which need 13 bit for pos and 11 bit for vln (max 8K/min vsz 4 values).
52 The 1st 3 bytes are:
53 1. lower 8 bit of pos
54 2. higher 5 bit of pos | higher 3 bit of vlen << 5
55 3. lower 8 bit of vln
56 */
57 #ifdef CPU_BIG_ENDIAN
58 #define LEAFPOS( d ) (*((unsigned char*)(d)) \
59 | (((unsigned char*)(d))[1]&0x1f)<<8)
60 #define LEAFVLN( d ) ((d)->vln \
61 | (((unsigned char*)(d))[1]&0xe0)<<3)
62 #define LEAFPOSADD( d, a ) do { \
63 unsigned char *__u = (unsigned char*)(d); \
64 unsigned short __p = (a) + __u[0] + (__u[1]<<8); \
65 __u[0] = (unsigned char)__p; \
66 __u[1] = (unsigned char)((__u[1]&0xe0)|((__p>>8)&0x1f)); \
67 } while (0)
68 #define LEAFVLNADD( d, a ) do { \
69 unsigned char *__u = (unsigned char*)(d); \
70 unsigned short __v = (a) + __u[2] + ((__u[1]&0xe0)<<3); \
71 __u[2] = (unsigned char)__v; \
72 __u[1] = (unsigned char)((__u[1]&0x1f)|((__v>>3)&0xe0)); \
73 } while (0)
74 #define LEAFSETPV( d, p, v ) do { \
75 unsigned char *__u = (unsigned char*)(d); \
76 __u[0] = (unsigned char)(p); \
77 __u[1] = (unsigned char)((((p)>>8)&0x1f)|(((v)>>3)&0xe0)); \
78 __u[2] = (unsigned char)(v); \
79 } while (0)
80 #else
81 #define LEAFPOS( d ) (0x1fff & (d)->pos)
82 #define LEAFVLN( d ) ((d)->vln | (~0x1fff & (d)->pos)>>5)
83 #define LEAFPOSADD( d, a ) do { \
84 (d)->pos = (~0x1fff & (d)->pos) | (0x1fff & ((d)->pos + (a))); \
85 } while (0)
86 #define LEAFVLNADD( d, a ) do { \
87 unsigned _vln = LEAFVLN( d ) + (a); \
88 (d)->pos = (0x1fff & (d)->pos) \
89 | (~0x1fff & ((_vln)<<5)); \
90 (d)->vln = (unsigned char)(_vln); \
91 } while (0)
92 #define LEAFSETPV( d, p, v ) do { \
93 (d)->pos = (0x1fff & (p)) | (~0x1fff & ((v)<<5)); \
94 (d)->vln = (unsigned char)(v); \
95 } while (0)
96 #endif
97
98 #define LEAFSETPVK( d, p, v, k ) do { \
99 LEAFSETPV( d, p, v ); \
100 (d)->kln = (unsigned char)(k); \
101 } while (0)
102
103
104 /*
105 the first 8 bytes are actually redundant,
106 however are here for convenience and as a check
107 */
108 typedef struct Block {
109 unsigned num; /* number of this block, for check and convenience */
110 unsigned char typ; /* block type & endianess */
111 unsigned char ksz; /* max key length */
112 unsigned char ptr; /* ptr type; for forks+plain leaves: value size */
113 unsigned char lev; /* level over bottom, 0 for leaves */
114 unsigned nxt; /* right sibling, free list for root */
115 unsigned short ent; /* # entries */
116 unsigned short stk; /* offset of keyspace stack */
117 Dict dct[1];
118 } Block;
119 #define DICTOFF ((char *)&((Block*)0)->dct - (char*)((Block*)0))
120 #define GETENT(b, i) (((unsigned char *)(b)) \
121 + ((b)->lev ? (b)->dct[i].pos : LEAFPOS((b)->dct+(i))))
122
123 /*
124 LAYOUT:
125 dct is an array of dictionary entries in increasing key order
126 stk is growing downwards from block end towards dict
127 position of entry i is b->stk <= LEAFPOS(b->dct+i) < dx->t/lsz
128 decreasing on i (i.e. stack space is in reverse ordering)
129
130 at LEAFPOS(b->dct+i), there are b->dct[i].kln bytes key
131 for a leaf entry, the key is followed by
132 LEAFVLN(b->dct+i) (between 0..2047 incl.) values
133 of dx->vsz (between 8..23 incl) each
134 for a fork entry, the key is followed by
135 a single 4 bytes block number (vln = 0)
136 or vln pairs of qualifying values (vsz each) and block numbers
137 (without the key compression flag to be implemented, only vln=1 is supported)
138
139 SIZES:
140 block size >= 512
141 block base overhead DICTOFF is 16 bytes
142 avail size >= 496
143 Dict entry is 4 bytes
144 max. used stack space per entry is:
145 - fork: kln+4+dx->vsz <= 255+4+255 = 514
146 - new leaf entry (one value): kln+dx->vsz <= 510
147 max total space for a new entry incl. 4 bytes dict is 518.
148 For leaves of minimal block size 512,
149 the sum of ksz+vsz must not exceed 492=512-16-4,
150 so that each block can hold at least one entry.
151 Forks are at least 4K and thus can hold min 7 entries.
152 Even in pathological cases with max kln and vsz and all forks
153 split to the worst, a level's fork factor does not drop below 4.
154 */
155
156 enum {
157 QMAXLEAF = 0x2000, /* max leaf size -- 8K fixed !!! */
158 QMAXFORK = 1<<CPU_PAGE_SHIFT,
159 #if 0x2000 < 1<<CPU_PAGE_SHIFT
160 QMAXBLCK = 1<<CPU_PAGE_SHIFT,
161 #else
162 QMAXBLCK = 0x2000,
163 #endif
164 /* With 2Gig files there are at most 2^31/2^9=2^22 leaves,
165 needing 11 fork levels. */
166 QLEVELS = 12 /* max depth incl. leaves */
167 };
168 typedef union { Block b; unsigned char base[QMAXLEAF]; } Leaf;
169 typedef union { Block b; unsigned char base[QMAXFORK]; } Fork;
170 typedef union { Block b; unsigned char base[QMAXBLCK]; } Bloke; /* any dude */
171
172
173 enum {
174 QENTSIZE = 8, /* minimum cost of entry: 4 byte dict, 4 byte val */
175 /* the number of entries is limited by (QMAXFORK-16)/QENTSIZE < 8K */
176 QENTBITS = 13, /* bits for # enries */
177 QENTMASK = 0x1fff,
178 QENTMAX = QENTMASK,
179 QMATCH = 0x8000 /* entry matched flag in found entry numbers */
180 };
181
182 #if 0
183 # define SORT_Q
184 /* glibc's qsort is up to 20% faster than our basic untuned WHS. However:
185 - dietlibc's is slower
186 - it still has poor worst case performance
187 - many implementations are known to be buggy
188 */
189 #endif
190 /* memory for internal sorting */
191 /*
192 loaded 5MB index in 5 seconds with as little as
193 #define SORTMEM 0x30000 = 192K (+96K Keys) internal
194 #define SORTMEM 0x400000 = 4MB (+2MB Keys) internal
195 */
196 #define SORTMEM 0x400000
197 #define SORTKEYS (SORTMEM/8) /* + another 50%: #entries (assume > 8 Bytes) */
198
199 /* blocksize for files */
200 /*
201 #define MERGEBLK 0x20000 = 128K buffer, 48-way merge
202 #define MERGEBLK 0x3000 = 12K buffer, 512-way merge
203
204 12K blocks perform quite well and allow for 512-way merge,
205 so we never have a second pass for 31-bit addressable files
206 */
207 #define MERGEBLK 0x3000
208 #define MERGECHK (SORTMEM/MERGEBLK) /* initial chunk size */
209
210 /*
211 By organizing files in MERGEBLKs, so that entries don't span blocks,
212 we may be wasting up to one full entry (256 bytes) per block:
213 if we have only exactly the space for one such longest entry,
214 use one terminating 0 byte instead.
215 */
216 #define SORTUSE (MERGECHK*(MERGEBLK-256))
217
218 typedef struct {
219 unsigned char *keyb[SORTKEYS];
220 unsigned char memb[SORTMEM];
221 } SortBuf;
222 typedef unsigned char MergeBlk[MERGEBLK];
223
224 /* n-way merge
225 #define MERGEWAY (sizeof (SortBuf)/sizeof (MergeBlk))
226 */
227 #ifndef MERGEWAY /* define possible max */
228 # define MERGEWAY ((4*SORTKEYS+SORTMEM)/MERGEBLK) /* as pre-pro constant */
229 #endif
230 #if 0x80000000UL > (MERGEWAY*SORTMEM)
231 # define MERGEMULTIPASS
232 /* else (512-way merge on 4MB) we can do 2GB in one pass */
233 #endif
234
235 typedef struct QLoad {
236 unsigned got; /* current for this key */
237 unsigned tot; /* total for this key */
238 unsigned let;
239 /* stats */
240 unsigned keys;
241 unsigned keyl;
242 unsigned maxl;
243 unsigned vals;
244 unsigned maxv;
245 unsigned span;
246 unsigned free;
247 /* load buffer */
248 Leaf cur;
249 Key key;
250 Val val[QENTMAX];
251 /* sort buffer */
252 file fil;
253 unsigned blks;
254 unsigned char **nkey;
255 unsigned char *nmem;
256 #if 0 /* hard to measure ... */
257 # define ALIGN4K __attribute__ ((aligned (0x1000)))
258 #else
259 # define ALIGN4K
260 #endif
261 union {
262 SortBuf s;
263 MergeBlk m[MERGEWAY]; /* mergebuf */
264 } buf ALIGN4K;
265 MergeBlk mout; /* only used temporary, but don't want to kill stack */
266 } QLoad;
267
268
269
270 typedef struct { /* structure filled by findkey */
271 Qdx *dx; /* the tree */
272 Key *key; /* the key searched */
273 unsigned n[QLEVELS]; /* block numbers of path */
274 unsigned short e[QLEVELS]; /* entries in path with match flag */
275 } FindKey;
276
277
278 typedef struct {
279 unsigned key;
280 unsigned cmp;
281 unsigned fget;
282 unsigned lget;
283 } Stat;
284
285 /* buffer to print a value to */
286 typedef char PrVal[64];
287
288
289 /* ************************************************************
290 private data
291 */
292
293 static Stat stat;
294
295
296 /* ************************************************************
297 private functions
298 */
299
300 /* bail out */
301 #define DONE( args ) do { ret = eRr args; goto done; } while (0)
302
303 #ifdef CPU_NEED_ALIGN
304 static unsigned GETINT ( void *m ) /* bus error safe version of *(int*) */
305 {
306 unsigned l;
307 memcpy( &l, m, 4 );
308 return l;
309 }
310 static void SETINT ( void *m, unsigned i )
311 {
312 memcpy( m, &i, 4 );
313 }
314 #else
315 #define GETINT( m ) (*(unsigned*)(m))
316 #define SETINT( m, i ) (*(unsigned*)(m) = (i))
317 #endif
318
319 void qMkVal ( Val *v, Ptr *p, unsigned char typ )
320 {
321 unsigned char *c = v->byt;
322 /* TODO: may macros to address the bytes in the ints would be faster */
323 if ( !(QDX_FULTXT & typ) ) /* leading tag */
324 switch ( QDX_TAGMSK&typ ) {
325 case QDX_TAG2: *c++ = (unsigned char) (p->tag >> 8);
326 case QDX_TAG1: *c++ = (unsigned char) p->tag;
327 }
328 switch ( QDX_RIDMSK&typ ) {
329 case QDX_RID6: *c++ = (unsigned char) (p->ext >> 8);
330 case QDX_RID5: *c++ = (unsigned char) p->ext;
331 case QDX_RID4: *c++ = (unsigned char) (p->rid >> 24);
332 case QDX_RID3:
333 *c++ = (unsigned char) (p->rid >> 16);
334 *c++ = (unsigned char) (p->rid >> 8);
335 *c++ = (unsigned char) p->rid;
336 }
337 if ( QDX_FULTXT & typ )
338 switch ( QDX_TAGMSK&typ ) {
339 case QDX_TAG2: *c++ = (unsigned char) (p->tag >> 8);
340 case QDX_TAG1: *c++ = (unsigned char) p->tag;
341 }
342 switch ( QDX_POSMSK&typ ) {
343 case 4: *c++ = (unsigned char) (p->pos >> 24);
344 case 3: *c++ = (unsigned char) (p->pos >> 16);
345 case 2: *c++ = (unsigned char) (p->pos >> 8);
346 case 1: *c++ = (unsigned char) p->pos;
347 }
348 v->len = c - v->byt;
349 } /* qMkVal */
350
351
352 int qRdKey ( Qdx *qdx, char *plain, int l, Key *key )
353 {
354 if (!qdx->cdx) {
355 if ( l > key->len ) l = key->len;
356 memcpy(plain, key->byt, l);
357 } else
358 l = cDec(qdx->cdx, (unsigned char*)plain, l, key);
359 return l;
360 } /* qRdKey */
361
362
363 void qMkKey ( Qdx *qdx, Key *key, char *b, int l )
364 {
365 if (!qdx->cdx) {
366 if ( l > qdx->ksz ) l = qdx->ksz;
367 memcpy(key->byt, b, key->len = l);
368 } else {
369 key->len = qdx->ksz;
370 cEnc(qdx->cdx, key, (unsigned char*)b, l, 0);
371 }
372 } /* qMkKey */
373
374
375 int qMkKeyVal ( Qdx *qdx, Key *key, char *b, int l )
376 {
377 Ptr p = {0,0,0,0};
378 char *t = memchr(b, '\t', l);
379 if ( !t )
380 return -1;
381 qMkKey(qdx, key, b, t - b);
382 if ( 0 >= (l -= t-b+1) || !(t = memchr( b=t+1, '\t', l )) )
383 return -1;
384 p.rid = (unsigned) a2i( b, t-b );
385 if ( 0 >= (l -= t-b+1) || !(t = memchr( b=t+1, '\t', l )) )
386 return -1;
387 p.tag = (unsigned short) a2i( b, t-b );
388 if ( 0 >= (l -= t-b+1) || !(t = memchr( b=t+1, '\t', l )) )
389 return -1;
390 p.pos = (unsigned) a2i( b, t-b );
391 if ( 0 < (l -= t-b+1) )
392 p.pos = p.pos << 16 | (unsigned short) a2i( t+1, l );
393 eRr(LOG_TRACE, "%'%b %d %d 0x%4x",
394 key->len, key->byt, p.rid, p.tag, p.pos );
395 qMkVal( &key->val, &p, qdx->ptr );
396 return 0;
397 } /* qMkKeyVal */
398
399
400 void qRdVal ( Ptr *p, const unsigned char *c, unsigned char typ )
401 {
402 p->tag = p->ext = p->rid = p->pos = 0;
403 if ( !(QDX_FULTXT & typ) ) /* leading tag */
404 switch ( QDX_TAGMSK&typ ) {
405 case QDX_TAG2: p->tag = (unsigned short)*c++ << 8;
406 case QDX_TAG1: p->tag |= *c++;
407 }
408 switch ( QDX_RIDMSK&typ ) {
409 case QDX_RID6: p->ext = (unsigned short)*c++ << 8;
410 case QDX_RID5: p->ext |= *c++;
411 case QDX_RID4: p->rid = (unsigned int)*c++ << 24;
412 case QDX_RID3:
413 p->rid |= (unsigned int)*c++ << 16;
414 p->rid |= (unsigned int)*c++ << 8;
415 p->rid |= *c++;
416 }
417 if ( QDX_FULTXT & typ )
418 switch ( QDX_TAGMSK&typ ) {
419 case QDX_TAG2: p->tag = (unsigned short)*c++ << 8;
420 case QDX_TAG1: p->tag |= *c++;
421 }
422 switch ( QDX_POSMSK&typ ) {
423 case 4: p->pos = (unsigned int)*c++ << 24;
424 case 3: p->pos |= (unsigned int)*c++ << 16;
425 case 2: p->pos |= (unsigned short)*c++ << 8;
426 case 1: p->pos |= *c++;
427 }
428 } /* qRdVal */
429
430
431 static char *prval ( PrVal buf, unsigned char *c, unsigned typ )
432 {
433 char *d = buf;
434 Ptr p;
435 if ( ! typ ) {
436 buf[0] = '#';
437 buf[1] = 0;
438 return buf;
439 }
440 qRdVal( &p, c, typ );
441 if ( p.ext ) { d += u2a( d, p.ext ); *d++ = '.'; }
442 d += u2a( d, p.rid ); *d++ = '.';
443 d += u2a( d, p.tag ); *d++ = '.';
444 d += u2a( d, p.pos ); *d++ = '.';
445 *d = 0;
446 return buf;
447 } /* prval */
448
449 #define PRVAL( buf, v ) prval( buf, v.byt, dx->ptr )
450
451 #define QLOCK(n,l) (void)FLOCK(dx->mqd, n, l)
452 #define QLOCKSH(n) (void)FLOCKSH(dx->mqd, n)
453 #define QLOCKEX(n) (void)FLOCKEX(dx->mqd, n)
454 #define QLOCKUN(n) (void)FLOCKUN(dx->mqd, n)
455 #ifdef BUILD_SHMODE
456 enum {
457 LOCK_READ = FIL_BLOCK,
458 LOCK_WRITE = FIL_BLOCK|FIL_WR
459 };
460 #endif
461
462
463 /* ************************************************************
464 block I/O
465 */
466
467
468 static int chkblk ( Block *b, Qdx *dx )
469 {
470 int i;
471 unsigned short eod = DICTOFF + b->ent*sizeof(Dict), siz, pos, vln, key;
472 unsigned char *base = (unsigned char*)b;
473 unsigned len = b->lev ? dx->fln : dx->lln;
474 unsigned bsz = b->lev ? env.psz : dx->lsz;
475 unsigned end = bsz, tot = 0;
476 if ( b->num > len || (b->num && b->nxt > len) )
477 return eRr(ERR_TRASH, "bad num %d / nxt %d on lev %d",
478 b->num, b->nxt, b->lev );
479 if ( b->typ != (b->lev ? dx->ftp : dx->typ)
480 || b->ksz != dx->ksz || b->ptr != (b->lev?dx->vsz:dx->ptr)
481 )
482 return eRr(ERR_TRASH, "bad sig 0x%02x,%d,%d in block %d want 0x%02x,%d,%d",
483 b->typ, b->ksz, b->ptr, b->num,
484 b->lev ? dx->ftp : dx->typ, dx->ksz, (b->lev?dx->vsz:dx->ptr) );
485 if ( b->ent > bsz / QENTSIZE || b->stk > bsz || b->stk < eod )
486 return eRr(ERR_TRASH, "bad ent/stk %d,%d in block %d eod %d sz %d",
487 b->ent, b->stk, b->num, eod, bsz );
488 for ( i=0; i<b->ent; i++ ) {
489 Dict *d = &b->dct[i];
490 if ( b->lev ) {
491 pos = d->pos; vln = d->vln;
492 siz = vln ? vln*(dx->vsz+4) : 4;
493 } else {
494 pos = LEAFPOS(d); vln = LEAFVLN(d);
495 siz = vln*dx->vsz;
496 }
497 siz += key = d->kln;
498 if ( pos < b->stk || end != (unsigned)pos + siz )
499 return eRr(ERR_TRASH,
500 "bad pos/vln/key %d/%d/%d in entry %d of block %d"
501 " stack %d sz %d end %d expected %d",
502 pos, vln, key, i, b->num, b->stk, siz, pos+siz, end );
503 if ( b->lev ) {
504 unsigned lnk = GETINT( base + pos + key + vln*dx->vsz );
505 if ( lnk >= (1==b->lev ? dx->lln : dx->fln) )
506 return eRr(ERR_TRASH,
507 "bad link %u in entry %d of block %d",
508 lnk, i, b->num );
509 }
510 #ifndef NDEBUG
511 if ( i ) { /* not on first */
512 unsigned short op = b->lev ? d[-1].pos : LEAFPOS(d-1), okl = d[-1].kln;
513 int mc = memcmp( base+pos, base+op, key<okl ? key : okl ), j;
514 if ( 0 < mc || (!mc && 0 < (mc = (int)key - (int)okl)) )
515 goto ok;
516 j = 0;
517 if ( !mc && b->lev && vln ) { /* compare values */
518 unsigned ovln = d[-1].vln;
519 if ( ! ovln || 0 < (j = memcmp( base+pos+key, base+op+okl, dx->vsz )) )
520 goto ok;
521 }
522 return eRr(ERR_TRASH,
523 "bad key order entry %d of block %d lev %d"
524 " mc %d j %d key %d okl %d vln %d",
525 i, b->num, b->lev, mc, j, key, okl, vln
526 );
527 }
528 ok:
529 #endif
530 tot += siz;
531 end = pos;
532 } /* for i (entries) */
533 if ( tot > bsz - b->stk )
534 return eRr(ERR_TRASH,
535 "bad total entry size %d in block %d stack %d",
536 tot, b->num, b->stk );
537 #ifndef NDEBUG
538 if ( LOG_TRACE >= env.log ) {
539 eRr(LOG_TRACE,
540 "block %d lev %d sig 0x%02x,%d ent %d eod %d stk %d",
541 b->num, b->lev, b->typ, b->ksz, b->ent, eod, b->stk
542 );
543 for ( i=0; i<2; i++ ) {
544 int j = i ? b->ent-1 : 0;
545 Dict *d = &b->dct[j];
546 if ( i >= b->ent ) /* if b->ent is 0 or 1, log no or one key */
547 break;
548 if ( b->lev ) {
549 pos = d->pos; vln = d->vln;
550 } else {
551 pos = LEAFPOS(d); vln = LEAFVLN(d);
552 }
553 key = d->kln;
554 /* start of value */
555 if ( b->lev )
556 eRr(LOG_TRACE, "key %d '%b'/%d -> %d",
557 j, key, base+pos, vln, GETINT(base+pos+key+(vln?4:0)) );
558 else {
559 Ptr p;
560 qRdVal( &p, base+pos+key, dx->ptr );
561 eRr(LOG_TRACE, "key %d '%b'[%d] = %d[%d,%d]",
562 j, key, base+pos, vln, p.rid, p.tag, p.pos );
563 }
564 }
565 }
566 #endif
567 return 0;
568 } /* chkblk */
569
570
571 /*
572 init a new leaf in the local buffer b (usually a stack based Leaf).
573 increment dx's lln
574 */
575 static void newleaf ( Block *b, Qdx *dx )
576 {
577 memset(b, 0, dx->lsz);
578 b->num = dx->lln++;
579 b->typ = dx->typ;
580 b->ksz = dx->ksz;
581 b->ptr = dx->ptr;
582 b->lev = 0;
583 b->stk = dx->lsz;
584 } /* newleaf */
585
586
587 /*
588 init a new fork in the local buffer b (usually a stack based Fork).
589 increment dx's fln
590 TODO: for qSet, use variant checking for free list at root->nxt.
591 */
592 static Block *newfork ( Block *b, Qdx *dx, unsigned char lev )
593 {
594 memset(b, 0, env.psz);
595 b->num = dx->fln++;
596 b->typ = dx->ftp;
597 b->ksz = dx->ksz;
598 b->ptr = dx->vsz;
599 b->lev = lev;
600 b->stk = env.psz;
601 #if 16 == CPU_PAGE_SHIFT /* possible short overflow on ia64 */
602 if ( !b->stk ) b->stk--;
603 #endif
604 return b;
605 } /* newfork */
606
607
608 #ifdef CPU_BIG_ENDIAN
609 static unsigned rvi ( unsigned i ) {
610 unsigned r;
611 ((char*)&r)[0] = ((char*)&i)[3];
612 ((char*)&r)[1] = ((char*)&i)[2];
613 ((char*)&r)[2] = ((char*)&i)[1];
614 ((char*)&r)[3] = ((char*)&i)[0];
615 return r;
616 }
617 static unsigned short rvs ( unsigned short i ) {
618 unsigned short r;
619 ((char*)&r)[0] = ((char*)&i)[1];
620 ((char*)&r)[1] = ((char*)&i)[0];
621 return r;
622 }
623 #define SWI( i ) b->i = rvi( b->i )
624 #define SWS( s ) b->s = rvs( b->s )
625 static void swapleaf ( Block *b ) /* leaves are little endian */
626 {
627 SWI(num);
628 SWI(nxt);
629 SWS(ent);
630 SWS(stk);
631 }
632 #else
633 # define swapleaf(b)
634 #endif
635
636 /*
637 writing a leaf will render the leaf scrambled on big endians
638 */
639 static int wrtleaf ( Block *b, Qdx *dx )
640 {
641 unsigned num = b->num;
642 int got;
643 if ( chkblk( b, dx ) ) /* don't write no shize */
644 return eRr(ERR_TRASH, "\tbefore writing dx block %d", b->num );
645 swapleaf(b); /* a leaf written is a leaf lost */
646 got = fPwrite(&dx->mqd, b, dx->lsz, num*dx->lsz );
647 eRr(LOG_TRACE, "writing leaf %u got %u", num, got );
648 if ( dx->lsz != (unsigned)got )
649 return eRr(LOG_SYSERR, "\twhen writing dx block %d", b->num );
650 return 0;
651 } /* wrtleaf */
652
653
654 /*
655 writing a fork may invalidate all mapped forks due to remap
656 */
657 static int wrtfork ( Block *b, Qdx *dx )
658 {
659 int got;
660 if ( chkblk( b, dx ) ) /* don't write no shize */
661 return eRr(ERR_TRASH, "\tbefore writing dx block %d", b->num );
662 if ( dx->mqx.npg ) { /* have mmap -- not during qload */
663 if ( b->num < dx->mqx.npg ) { /* should be in map */
664 if ( (char*)b != dx->mqx.map + (b->num<<env.psh) )
665 return eRr(ERR_TRASH, "fork %d should be mapped", b->num);
666 if ( ENV_MSYNC & env.flg ) fMSync(&dx->mqx, b->num);
667 return 0;
668 }
669 if ( b->num < dx->mqx.lim /* try to extend mapping */
670 && 0 < (got = fMap(&dx->mqx, b->num+1))
671 && b->num < (unsigned)got
672 ) {
673 eRr(LOG_VERBOSE, "remapped to %d at %8x", dx->mqx.npg, dx->mqx.map);
674 memcpy(dx->mqx.map + (b->num<<env.psh), b, env.psz);
675 fMSync(&dx->mqx, b->num);
676 return 0;
677 }
678 eRr(LOG_WARN, "remap to %d failed", b->num+1);
679 /* else write to file */
680 }
681 got = fPwrite(&dx->mqx.fil, b, env.psz, b->num*env.psz );
682 eRr(LOG_TRACE, "writing fork %u got %u", b->num, got );
683 if ( env.psz != (unsigned)got )
684 return eRr(LOG_SYSERR, "\twhen writing dx block %d", b->num );
685 return 0;
686 } /* wrtfork */
687
688
689
690 static int chkread ( Block *b, Qdx *dx,
691 int got, unsigned bsz, unsigned num, unsigned char lev )
692 {
693 if ( 0 > got || bsz != (unsigned)got )
694 return eRr(LOG_SYSERR, "\twhen reading dx block %d", num );
695 if ( b->num != num )
696 return eRr(ERR_TRASH, "bad num %d in block %d", b->num, num );
697 if ( lev && b->lev != lev )
698 return eRr(ERR_TRASH, "bad lev %d in block %d want %d",
699 b->lev, b->num, lev );
700 return chkblk( b, dx );
701 } /* chkread */
702
703
704 /* read a leaf into the provided local buffer */
705 static Block *getleaf ( Block *b, Qdx *dx, unsigned num
706 #ifdef BUILD_SHMODE
707 , int lck
708 # define GETLEAF(leaf, dx, num, lck) getleaf(leaf, dx, num, lck)
709 #else
710 # define GETLEAF(leaf, dx, num, lck) getleaf(leaf, dx, num)
711 #endif
712 )
713 {
714 int got;
715 #ifdef BUILD_SHMODE
716 if ( lck ) QLOCK(num<<1, lck);
717 #endif
718 got = fPread(&dx->mqd, b, dx->lsz, num*dx->lsz);
719 stat.lget++;
720 #ifdef CPU_BIG_ENDIAN
721 if ( 0 < got && dx->lsz == (unsigned)got ) swapleaf(b);
722 #endif
723 if ( chkread(b, dx, got, dx->lsz, num, 0) )
724 b = 0;
725 #ifdef BUILD_SHMODE
726 if ( LOCK_READ == lck ) QLOCKUN(num<<1);
727 #endif
728 return b;
729 } /* getleaf */
730
731
732 /* pick a fork from cache or read into the provided local buffer */
733 static Block *getfork ( Block *b, Qdx *dx, unsigned num, unsigned char lev )
734 {
735 int got = env.psz;
736 stat.fget++;
737 if ( num < dx->mqx.npg )
738 b = (Block*)(dx->mqx.map + num*env.psz);
739 else
740 got = fPread(&dx->mqx.fil, b, env.psz, num*env.psz);
741 return chkread(b, dx, got, env.psz, num, lev) ? 0 : b;
742 } /* getfork */
743
744
745
746 /** binsearch position for key in a leaf.
747 position is either the index of the matching entry (0..b->ent-1)
748 or the index (0..b->ent) where key should be inserted.
749 @return position for key (0..b->ent), |yfied with QMATCH, if found
750 */
751 static unsigned short keyleaf ( Block *b, Key *key )
752 {
753 unsigned char *base = (unsigned char*)b;
754 unsigned char *kbt = key->byt;
755 unsigned char kln = key->len;
756 unsigned short l = 0, r = b->ent;
757 Dict *d = b->dct;
758 /* binsearch invariant:
759 key is < all entries i >= r (we do know relation at r)
760 key is > all entries i < l (don't know relation at l)
761 */
762 while ( l < r ) { /* invariant: l <= keypos < r */
763 unsigned short i = (l + r) >> 1; /* l <= i < r */
764 unsigned short pos = LEAFPOS(&d[i]);
765 unsigned char cln = d[i].kln;
766 int cmp = memcmp( kbt, base+pos, kln<cln ? kln : cln );
767 stat.cmp++; /* dirty */
768 if ( ! cmp && !(cmp = (int)kln - cln) )
769 return QMATCH | i;
770 if ( cmp > 0 )
771 l = i+1;
772 else
773 r = i;
774 }
775 return l; /* == r */
776 } /* keyleaf */
777
778 /* same for forks.
779 somewhat more complex since we might have duplicate keys
780 with different values.
781 Must not use the _POS/_VLN macros.
782 */
783 static unsigned short keyfork ( Block *b, Key *key, unsigned char vsz )
784 {
785 unsigned char *base = (unsigned char*)b;
786 unsigned char *kbt = key->byt;
787 unsigned char kln = key->len;
788 unsigned short l = 0, r = b->ent;
789 Dict *d = b->dct;
790 /* binsearch invariant:
791 key is < all entries i >= r (we do know relation at r)
792 key is > all entries i < l (don't know relation at l)
793 */
794 while ( l < r ) { /* invariant: l <= keypos < r */
795 unsigned short i = (l + r) >> 1; /* l <= i < r */
796 unsigned short pos = d[i].pos;
797 unsigned char cln = d[i].kln;
798 int cmp = memcmp( kbt, base+pos, kln<cln ? kln : cln );
799 stat.cmp++; /* dirty */
800 if ( ! cmp && !(cmp = (int)kln - cln) ) { /* key match -- cmp value */
801 if ( ! d[i].vln ) { /* common case */
802 if ( !key->val.len ) /* no value */
803 return QMATCH | i;
804 cmp = 1;
805 } else if ( ! key->val.len )
806 cmp = -1;
807 else if ( !(cmp = memcmp( key->val.byt, base+pos+cln,
808 key->val.len < vsz ? key->val.len : vsz))
809 && !(cmp = (int) key->val.len - vsz)
810 )
811 return QMATCH | i;
812 }
813 if ( cmp > 0 )
814 l = i+1;
815 else
816 r = i;
817 }
818 return l; /* == r */
819 } /* keyfork */
820
821
822 /** find position for (leaf) val.
823 search v in array b of n values of size sz
824 @return position for value (0..n), |yfied with QMATCH, if found
825 */
826 static unsigned short valpos ( unsigned char *b,
827 unsigned short n, unsigned char sz, unsigned char *v )
828 {
829 unsigned short l = 0, r = n;
830 /* binsearch invariant:
831 v is < all values i >= r (we do know relation at r)
832 v is > all values i < l (don't know relation at l)
833 */
834 while ( l < r ) { /* invariant: l <= valpos < r */
835 unsigned short i = (l + r) >> 1; /* l <= i < r */
836 int cmp = memcmp( v, b+i*sz, sz );
837 if ( ! cmp )
838 return QMATCH | i;
839 if ( cmp > 0 )
840 l = i+1;
841 else
842 r = i;
843 }
844 return l; /* == r */
845 } /* valpos */
846
847
848 #define TRACEKEY( l, f ) (void)(f)
849
850
851 /*
852 find and fetch key's leaf into the provided buffer.
853 */
854 static int findkey ( Block *leaf, FindKey *f
855 #ifdef BUILD_SHMODE
856 , int lck
857 # define FINDKEY(leaf, f, lck) findkey(leaf, f, lck)
858 #else
859 # define FINDKEY(leaf, f, lck) findkey(leaf, f)
860 #endif
861 )
862 {
863 #ifdef BUILD_SHMODE
864 Qdx *dx = f->dx; /* for the lock macros */
865 #endif
866 Fork fork[2]; /* buffer */
867 Leaf lbuf;
868 int i, ret=-1;
869 unsigned char *pkey = 0, pkln = 0, pvln = 0;
870 Block *b;
871
872 QLOCKSH(1);
873 b = getfork(&fork[0].b, f->dx, 0, 0);
874 if ( !b )
875 DONE((ERR_TRASH, "no root"));
876 if ( b->lev >= QLEVELS )
877 DONE((ERR_TRASH, "tree is too deep: %u", b->lev));
878 stat.key++;
879 for ( i=QLEVELS; i--; ) /* paranoia */
880 f->n[i] = f->e[i] = 0;
881 for ( i=b->lev;; ) {
882 unsigned short e;
883 unsigned n;
884 Dict *d;
885 int tog = 0;
886
887 f->n[i] = b->num;
888 e = i ? keyfork(b, f->key, f->dx->vsz) : keyleaf(b, f->key);
889 LOG_DBG( LOG_DEBUG, "keypos %4x", e );
890 /*
891 here goes the Lehmann/Yao race detection:
892 If we ran on right boundary, follow right link
893 */
894 while ( !(QMATCH & e) && b->ent == e && b->nxt && i < f->dx->dpt ) {
895 Block *s;
896 unsigned se;
897 /* peek next block, toggling buffers */
898 if ( i ) {
899 /* TODO: actually we should never see this in forks
900 as long as we lock the complete fork file */
901 s = getfork(&fork[(++tog)&1].b, f->dx, b->nxt, i);
902 se = keyfork(s, f->key, f->dx->vsz);
903 } else {
904 s = GETLEAF((++tog)&1?&lbuf.b:leaf, f->dx, b->nxt, lck);
905 se = keyleaf(s, f->key);
906 #ifdef BUILD_SHMODE
907 if ( LOCK_WRITE == lck ) QLOCKUN(se ? b->num : s->num);
908 #endif
909 }
910 if ( !se ) /* lower than first */
911 break;
912 LOG_DBG( LOG_DEBUG, "LYao %d -> %d lev %d for '%b' got 0x%4x",
913 b->num, s->num, i, f->key->len, f->key->byt, se );
914 /* trade b for s */
915 f->n[i] = (b = s)->num;
916 e = se;
917 }
918 f->e[i] = e;
919 if ( ! i ) {
920 if ( b != leaf )
921 memcpy(leaf, b, f->dx->lsz);
922 return 0; /* keep the lock */
923 }
924 if ( !(QMATCH & e) ) {
925 if ( e )
926 f->e[i] = --e;
927 else {
928 TRACEKEY( ERR_TRASH, f );
929 return eRr(ERR_IDIOT,
930 "ran on left bound of block %u ent %u",
931 b->num, b->ent );
932 }
933 }
934 d = b->dct + (QENTMASK & e);
935 /* direct access ok in fork */
936 pkey = ((unsigned char*)b)+d->pos;
937 pkln = d->kln;
938 pvln = d->vln ? f->dx->vsz : 0;
939 n = GETINT( pkey+pkln+pvln );
940 if ( --i )
941 b = getfork(&fork[0].b, f->dx, n, i);
942 else {
943 QLOCKUN(1);
944 b = GETLEAF(leaf, f->dx, n, lck); /* read 1st leaf */
945 /* continue only to do the LY */
946 }
947 }
948 done:
949 QLOCKUN(1);
950 return ret; /* go away */
951 } /* findkey */
952
953
954
955
956
957 /* ************************************************************
958 public functions
959 */
960
961 int qRemake ( Qdx *dx )
962 {
963 Key last;
964 Bloke child;
965 Fork fork;
966 unsigned char lev = 0; /* the current child level to be indexed */
967 unsigned left = 0; /* block on child level */
968 unsigned iblk = 0;
969 unsigned ikey = 0;
970 unsigned ikeyl = 0;
971 unsigned ipfx = 0;
972 unsigned ispn = 0;
973
974 fTrunc(&dx->mqx.fil, 0);
975 fMap(&dx->mqx, 0);
976 dx->fln = 1; /* pretend we had a root */
977 /* build tree */
978 for (dx->fln = 1;;lev++) { /* levels */
979 unsigned len = 0; /* current is n'th block on level */
980 Block *b = 0;
981 unsigned char *base = 0; /* == b */
982 unsigned have = 0; /* room in current block b */
983 Dict *d = 0; /* of current block b */
984 unsigned next = left; /* child */
985
986 left = dx->fln;
987 /* get child c and create child pointer in block b */
988 do {
989 unsigned char vln = 0; /* actual byte length: dx->vsz or 0 */
990 unsigned char *key;
991 unsigned char kln;
992 unsigned need;
993 Block *c = lev
994 ? getfork(&child.b, dx, next, lev)
995 : GETLEAF(&child.b, dx, next, 0);/* must have full ex lock */
996 if ( ! c )
997 return eRr(ERR_NOMEM, "no child block" );
998 next = c->nxt;
999 if ( ! c->ent ) /* OOPS empty node */
1000 continue;
1001 key = GETENT(c, 0);
1002 kln = c->dct[0].kln;
1003 if ( ! b ) /* the leftmost child on this level */
1004 kln = 0; /* always 0 key on left */
1005 else if ( lev ) /* the child is a fork -- have to carry vals ... */
1006 vln = c->dct[0].vln ? dx->vsz : 0; /* ... if the child starts with */
1007 else if ( last.len == kln /* compare key to last of prev. ... */
1008 && !memcmp( last.byt, key, kln ) /* ... which was saved in last */
1009 ) { /* key spans blocks */
1010 LOG_DBG( LOG_DEBUG, "found span key '%b' child %u",
1011 kln, key, c->num );
1012 vln = dx->vsz; /* qualify with value */
1013 } else {
1014 unsigned char x = 0;
1015 /* find prefix key */
1016 if ( kln > last.len )
1017 kln = last.len;
1018 while ( x < kln && key[x] == last.byt[x] )
1019 x++;
1020 if ( x < kln ) /* they differ at [x] */
1021 kln = x+1;
1022 else /* equal on the shorter length */
1023 kln++;
1024 if ( kln > c->dct[0].kln )
1025 return eRr(ERR_TRASH,
1026 "bad key '%b' on level %u matching previous of length %u",
1027 c->dct[0].kln, key, lev, last.len );
1028 LOG_DBG( LOG_DEBUG, "prefix on key '%b' child %u save %u",
1029 kln, key, c->num, c->dct[0].kln - kln );
1030 ipfx += c->dct[0].kln - kln;
1031 }
1032 if ( vln )
1033 ispn++;
1034 ikeyl += kln;
1035 need = sizeof(Dict) + kln + vln + 4;
1036 if ( need > have ) {
1037 if ( b ) {
1038 eRr(LOG_TRACE, "level %u block %u is full %u ent stack %u",
1039 lev, b->num, b->ent, b->stk );
1040 b->nxt = dx->fln; /* link */
1041 if ( wrtfork(b, dx) )
1042 goto nowrite;
1043 len++;
1044 }
1045 b = newfork(&fork.b, dx, lev+1);
1046 base = (unsigned char*)b;
1047 have = env.psz - DICTOFF; /* - ql->let; TODO: should we ??? */
1048 d = b->dct;
1049 len++;
1050 iblk++;
1051 }
1052 SETINT( base+(b->stk -= 4), c->num );
1053 d->kln = kln;
1054 d->vln = vln ? 1 : 0;
1055 if ( kln += vln ) /* key + consecutive qualifying value */
1056 memcpy( base+(b->stk -= kln), key, kln );
1057 d->pos = b->stk;
1058 d++;
1059 b->ent++;
1060 ikey++;
1061 have -= need;
1062 /* save the last dance for me */
1063 memcpy(last.byt, GETENT(c, c->ent-1), last.len = c->dct[c->ent-1].kln);
1064 } while (next);
1065
1066 if ( !len )
1067 return eRr(ERR_TRASH, "level %u was empty", lev );
1068 /* close level */
1069 eRr(LOG_INFO, "closing level %u with %u blocks %u..%u",
1070 lev, len, left, b->num );
1071 if ( 1 == len )
1072 b->num = 0; /* make it the root */
1073 if ( wrtfork(b, dx) )
1074 goto nowrite;
1075 if ( 1 == len ) {
1076 dx->dpt = b->lev;
1077 break;
1078 }
1079 }
1080 #define PM(a,b) (unsigned)(a*LULU(1000)/b) /* per mille */
1081 if (ikey && ikeyl) /* divisors */
1082 eRr(LOG_INFO,
1083 "\tused %u fork blocks %u keys %u bytes avg %d\n"
1084 "\t%u #spans (%d/oo) %u pfx (- %d/oo)",
1085 iblk, ikey, ikeyl, ikeyl/ikey,
1086 ispn, PM(ispn*dx->vsz,ikeyl), ipfx, PM(ipfx,(ipfx+ikeyl)) );
1087 dx->fln--; /* counted one over */
1088 fMap(&dx->mqx, dx->fln);
1089
1090 return 0;
1091 nowrite:
1092 dx->fln = 0;
1093 return eRr(LOG_SYSERR, "\twhen writing tree block" );
1094 } /* qRemake */
1095
1096
1097
1098
1099 static int load ( Qdx *dx, unsigned char *key )
1100 {
1101 int cmp = 0, keyl = 0, l;
1102 QLoad *ql = dx->qld;
1103 if (key) {
1104 keyl = *key - dx->vsz;
1105 l = keyl < ql->key.len ? keyl : ql->key.len; /* min */
1106 if ( !(cmp = memcmp(key+1, ql->key.byt, l)) )
1107 cmp = keyl - ql->key.len;
1108 if (0 > cmp)
1109 return eRr(LOG_ERROR, "qload key '%b' < last '%b'",
1110 keyl, key+1, ql->key.len, ql->key.byt );
1111 }
1112 /* flush ? */
1113 while ( !key /* end */
1114 || QENTMAX == ql->got /* full */
1115 || cmp /* next key */
1116 ) {
1117 Block *b = &ql->cur.b;
1118 unsigned have = b->stk - DICTOFF - b->ent*sizeof(Dict) - ql->let;
1119 unsigned need = sizeof(Dict) + ql->key.len;
1120 unsigned want = need + ql->got * dx->vsz;
1121 unsigned take = ql->got;
1122 if ( have < want ) { /* you can't always get ... */
1123 if ( have <= need )
1124 take = 0;
1125 else {
1126 take = (have - need) / dx->vsz;
1127 if ( 256 > take * dx->vsz )
1128 take = 0;
1129 }
1130 }
1131 LOG_DBG( LOG_DEBUG, "qload '%b' got %u need %u want %u have %u take %u",
1132 ql->key.len, ql->key.byt, ql->got, need, want, have, take );
1133 /* we have enough room in b for take entries */
1134 if ( take ) {
1135 unsigned char *base = (unsigned char*)b;
1136 unsigned char *s = base + b->stk;
1137 int i = take;
1138 Dict *d = &b->dct[ b->ent++ ];
1139 while ( i-- )
1140 memcpy( s -= dx->vsz, ql->val[i].byt, dx->vsz );
1141 memcpy( s -= ql->key.len, ql->key.byt, ql->key.len );
1142 b->stk = (unsigned short) (s - base);
1143 LEAFSETPV( d, b->stk, take );
1144 d->kln = ql->key.len;
1145 if ( ql->got > take )
1146 memcpy( ql->val, ql->val+take, sizeof(ql->val[0])*(ql->got - take) );
1147 ql->got -= take;
1148 }
1149 if ( ! ql->got )
1150 break;
1151 if ( take ) /* key spans blocks */
1152 ql->span++;
1153 ql->free += b->stk - DICTOFF - b->ent*sizeof(Dict); /* wasted */
1154 b->nxt = dx->lln; /* link */
1155 LOG_DBG( LOG_DEBUG, "qload: %u entries left, getting block %u",
1156 ql->got, dx->lln );
1157 if ( wrtleaf(b, dx) )
1158 return eRr(LOG_SYSERR, "\twhen writing qload block" );
1159 /* so couldn't flush all, thus we need another block */
1160 eRr(LOG_TRACE, "new qload leaf %d", dx->lln);
1161 newleaf(b = &ql->cur.b, dx);
1162 /* will continue on end or keychange */
1163 LOG_DBG( LOG_TRACE, "new block %u has %u ent %u stk, will keep %u",
1164 b->num, b->ent, b->stk, ql->let );
1165 }
1166 if (cmp || !key) /* done with key: stats */
1167 if (ql->maxv < ql->tot)
1168 ql->maxv = ql->tot;
1169 if (key) {
1170 if (cmp) { /* set new key */
1171 if (ql->got)
1172 return eRr(ERR_IDIOT, "qload not empty on new key");
1173 ql->tot = 0;
1174 memcpy(ql->key.byt, key+1, ql->key.len = keyl);
1175 ql->keys++;
1176 ql->keyl += keyl;
1177 if (ql->maxl < (unsigned)keyl)
1178 ql->maxl = keyl;
1179 }
1180 /* add new val */
1181 /* LOG_HEX( key->val.byt, dx->vsz ); */
1182 memcpy(ql->val[ ql->got++ ].byt, key+1+keyl, dx->vsz);
1183 ql->vals++;
1184 ql->tot++;
1185 } else {
1186 /* stats */
1187 unsigned size = dx->lln * dx->lsz;
1188 unsigned sval = ql->vals * dx->vsz;
1189 unsigned sove = dx->lln * DICTOFF + sizeof(Dict)*(ql->keys+ql->span);
1190
1191 if (size && ql->keys) /* divisors */
1192 eRr(LOG_INFO,
1193 "end qload:\n"
1194 "\t#blks %u total len %u free %u (%d/oo) overhead %u (%d/oo)\n"
1195 "\t#keys %u total len %u (%d/oo) avg %d max %u\n"
1196 "\t#vals %u total len %u (%d/oo) avg %d max %u spans %u",
1197 dx->lln, size, ql->free, PM(ql->free,size), sove, PM(sove,size),
1198 ql->keys, ql->keyl, PM(ql->keyl,size), ql->keyl/ql->keys, ql->maxl,
1199 ql->vals, sval, PM(sval,size), ql->vals/ql->keys, ql->maxv, ql->span
1200 );
1201 wrtleaf(&ql->cur.b, dx);
1202 }
1203
1204 return 0;
1205 } /* load */
1206
1207
1208 static int ncomp;
1209 #ifdef SORT_Q
1210 static int compar (const void *pa, const void *pb)
1211 {
1212 unsigned char *a = *(unsigned char **)pa;
1213 unsigned char *b = *(unsigned char **)pb;
1214 int l = *a, cmp;
1215 if (l > *b) l = *b;
1216 ncomp++;
1217 return (cmp = memcmp(a+1, b+1, l)) ? cmp : (int)*a - (int)*b;
1218 }
1219 #endif
1220 static OPT_INLINE int keyGt (unsigned char *a, unsigned char *b)
1221 {
1222 int l = *a, cmp;
1223 if (l > *b) l = *b;
1224 ncomp++;
1225 return 0 < (cmp = memcmp(a+1, b+1, l)) || (!cmp && *a > *b);
1226 }
1227
1228
1229 static int sort (Qdx *dx, unsigned char **key, int len)
1230 {
1231 unsigned char rev[SORTKEYS];
1232 unsigned char *tmp;
1233 int i, ret = 0;
1234 lolo tm;
1235
1236 tUpd(&tm);
1237 if (2 > len)
1238 goto cp;
1239 #ifdef SORT_Q
1240 qsort(key, len, sizeof key[0], compar);
1241 #else
1242 # define SORT_WH
1243 /* basic weak-heap-sort (Dutton 93) just like rSort; c.f. uti.c
1244 ``On the Performance of WEAK-HEAPSORT´´
1245 http://www.informatik.uni-freiburg.de/~edelkamp/publications/Weakheap.pdf
1246 for tuned versions see
1247 http://www.jea.acm.org/2002/EdelkampHeapsort/
1248 and ``Pushing the Limits in Sequential Sorting´´
1249 http://www.informatik.uni-freiburg.de/~edelkamp/publications/Improvements.pdf
1250 */
1251 memset(rev, 0, len);
1252 /* weak-heap-forestify
1253 every node roots a weak heap AND is <= root
1254 (especially the leftist independent roots are <= root)
1255 */
1256 for ( i=len; --i; ) {
1257 int g = i; /* granma is 1st left i or root, if i is totally leftist ;) */
1258 while (!(1&g)) /* we're left child */
1259 g >>= 1; /* shift granma right :( */
1260 g >>= 1;
1261 if (!keyGt(key[i], key[g])) /* merge: swap if g < i */
1262 continue;
1263 tmp = key[i]; key[i] = key[g]; key[g] = tmp; /* swap */
1264 rev[i] = 1;
1265 }
1266
1267 /* re-weak-heap-forestify */
1268 for ( i=len; --i>1; ) {
1269 int j=1, left;
1270 /* rev[i] = 0; i's REVBIT no longer needed */
1271 /* follow path of left childs (independent roots) */
1272 while (i > (left = (j<<1) + rev[j]))
1273 j = left;
1274 /* put max of them and i (new root) at i */
1275 do {
1276 if (!keyGt(key[j], key[i])) /* merge: swap if i < j */
1277 continue;
1278 tmp = key[i]; key[i] = key[j]; key[j] = tmp; /* swap */
1279 rev[j] ^= 1; /* toggle revbit */
1280 } while (j >>= 1);
1281 }
1282 /* rev[1] = 0; 2... have been cleared */
1283 key[len] = key[0]; /* initial root still untouched biggest */
1284 #endif
1285 #ifndef SORT_WH
1286 key--;
1287 #endif
1288 eRr(LOG_VERBOSE, "%d sorted %d keys with %d comparisions -- loading ...",
1289 tUpd(&tm), len, ncomp);
1290 cp:
1291 /* finally cp to dest */
1292 if (FIL_NONE == dx->qld->fil) { /* internal -- load */
1293 unsigned char **e = key+len+1;
1294 while (++key != e)
1295 if ((ret = load(dx, *key)))
1296 goto err;
1297 eRr(LOG_VERBOSE, "%d loaded", tUpd(&tm));
1298 } else { /* external -- write MERGECHK blks */
1299 unsigned char **e = key+len+1;
1300 unsigned char *out = dx->qld->mout, *end = out+MERGEBLK, *dst = out;
1301 unsigned blks = 0;
1302
1303 while (++key != e) {
1304 unsigned l = 1+**key;
1305 if (end <= dst+l) {
1306 *dst = 0;
1307 if (0 > (ret = fWrite(&dx->qld->fil, out, MERGEBLK)))
1308 goto err;
1309 dst = out;
1310 if (MERGECHK == ++blks)
1311 return eRr(ERR_IDIOT, "CHUNK!");
1312 }
1313 memcpy(dst, *key, l);
1314 dst += l;
1315 }
1316 do {
1317 *dst = 0;
1318 fWrite(&dx->qld->fil, out, MERGEBLK);
1319 dst = out;
1320 } while (MERGECHK != ++blks);
1321 eRr(LOG_VERBOSE, "%d wrote %d blocks ...", tUpd(&tm), blks);
1322 dx->qld->blks += MERGECHK;
1323 ret = 0;
1324 }
1325 err:
1326 return ret;
1327 } /* sort */
1328
1329
1330 static int merge (Qdx *dx, QLoad *ql)
1331 {
1332 #ifndef MERGEMULTIPASS
1333 # define chunk MERGECHK
1334 #else
1335 unsigned char *const out = ql->mout, *const end = out+MERGEBLK;
1336 unsigned chunk = MERGECHK, pass = 1;
1337 file fil2 = FIL_NONE;
1338 #endif
1339 lolo tm;
1340 int ret = 0;
1341
1342 eRr(LOG_VERBOSE, "%d-way merging %d blocks in %d chunks of %d ...",
1343 MERGEWAY, ql->blks, (ql->blks+MERGECHK-1)/MERGECHK, MERGECHK);
1344 tUpd(&tm);
1345 ncomp = 0;
1346 #ifndef MERGEMULTIPASS
1347 { /* single pass */
1348 #else
1349 if (ql->blks > MERGECHK*MERGEWAY) /* multipass */
1350 fOpen(&fil2, 0, 0);
1351 for (;; pass++) {
1352 #endif
1353 unsigned blkno = 0;
1354 #ifndef MERGEMULTIPASS
1355 # define inf &ql->fil
1356 { /* single run in single pass */
1357 #else
1358 unsigned nchunk = chunk*MERGEWAY; /* lastrun, if >= ql->blks */
1359 int lastpass = nchunk >= ql->blks, run = 1;
1360 file *inf, *outf;
1361
1362 if (1 & pass) {
1363 inf = &ql->fil; outf = &fil2;
1364 } else {
1365 inf = &fil2; outf = &ql->fil;
1366 }
1367 if (!lastpass)
1368 fSeek(outf, 0);
1369 while (blkno < ql->blks) {
1370 /* merge MERGEWAY chunks of length chunk to n chunks of size nchunk */
1371 /* only one run in last pass */
1372 unsigned char *dst = out;
1373 unsigned blks = 0;
1374 #endif
1375 #ifdef MERGEDEBUG
1376 unsigned char lastkey[256];
1377 #endif
1378 struct {
1379 unsigned char *blk; /* merge blk */
1380 unsigned char *key; /* current key */
1381 unsigned nxt; /* next block no */
1382 unsigned end; /* end */
1383 } in[MERGEWAY], *nxt[MERGEWAY], *tmp;
1384 unsigned ways = 0, i;
1385
1386 for (ways=0; ways<MERGEWAY; ways++) { /* prefill */
1387 if (blkno >= ql->blks)
1388 break;
1389 in[ways].key = in[ways].blk = ql->buf.m[ways];
1390 if (MERGEBLK != (ret =
1391 fPread(inf, in[ways].blk, MERGEBLK, blkno*MERGEBLK)))
1392 goto done;
1393 in[ways].nxt = blkno+1;
1394 if (ql->blks < (blkno += chunk)) blkno = ql->blks;
1395 in[ways].end = blkno;
1396 /* insert */
1397 for (i=0; i<ways && !keyGt(nxt[i]->key, in[ways].key); ) i++;
1398 if (i < ways) memmove(nxt+i+1, nxt+i, (ways-i)*sizeof nxt[0]);
1399 nxt[i] = in+ways;
1400 }
1401 #ifndef MERGEMULTIPASS
1402 eRr(LOG_VERBOSE, "%d prepared %d blocks %d ways", tUpd(&tm), blkno, ways);
1403 #else
1404 eRr(LOG_VERBOSE, "%d prepared %d/%d blocks %d ways run %d of pass %d%s",
1405 tUpd(&tm), blkno, ql->blks, ways, run, pass, lastpass?" (last)":"");
1406 #endif
1407 #ifdef MERGEDEBUG
1408 lastkey[0] = 0;
1409 for (i=0; i<ways; i++)
1410 eRr(LOG_VERBOSE, "%d %b", i, *nxt[i]->key, nxt[i]->key+1);
1411 #endif
1412 for (;;) { /* merge */
1413 int len = *nxt[0]->key + 1;
1414 /* stuff away 1st key */
1415 #ifdef MERGEDEBUG
1416 if (keyGt(lastkey, nxt[0]->key)) {
1417 ret = eRr(ERR_IDIOT, "'%b' > '%b' way %d",
1418 *lastkey, lastkey+1, *nxt[0]->key, nxt[0]->key+1, nxt[0]-in);
1419 goto done;
1420 }
1421 memcpy(lastkey, nxt[0]->key, len);
1422 #endif
1423 #ifdef MERGEMULTIPASS
1424 if (lastpass) {
1425 #endif
1426 if ((ret = load(dx, nxt[0]->key))) goto done;
1427 #ifdef MERGEMULTIPASS
1428 } else {
1429 if (len >= end - dst) { /* flush out block */
1430 # ifdef MERGEDEBUG
1431 eRr(LOG_VERBOSE, "out blk %d len %d %b", blks, dst-out, *out, out+1);
1432 # endif
1433 *dst = 0; /* terminate */
1434 if (MERGEBLK != (ret = fWrite(outf, dst = out, MERGEBLK)))
1435 goto done;
1436 blks++;
1437 }
1438 memcpy(dst, nxt[0]->key, len);
1439 dst += len;
1440 }
1441 #endif
1442 /* reload nxt[0] */
1443 nxt[0]->key += len;
1444 if (!*nxt[0]->key) { /* eob */
1445 if (nxt[0]->nxt != nxt[0]->end) {
1446 if (MERGEBLK != (ret = fPread(inf, nxt[0]->key = nxt[0]->blk,
1447 MERGEBLK, nxt[0]->nxt++ * MERGEBLK)))
1448 goto done;
1449 }
1450 if (!*nxt[0]->key) { /* done with this stream */
1451 if (!--ways)
1452 break;
1453 memmove(nxt, nxt+1, ways*sizeof nxt[0]);
1454 continue;
1455 }
1456 }
1457 #ifdef MERGEDEBUG
1458 if (keyGt(lastkey, nxt[0]->key)) {
1459 ret = eRr(ERR_IDIOT, "'%b' > '%b' in blk %d off %d way %d",
1460 *lastkey, lastkey+1, *nxt[0]->key, nxt[0]->key+1,
1461 nxt[0]->nxt-1, nxt[0]->key-nxt[0]->blk, nxt[0]-in);
1462 goto done;
1463 }
1464 #endif
1465 if (1 == ways) continue;
1466 /*
1467 this may look like pretty brute force.
1468 However, in practice we rarely see reinsertions at all
1469 and get by with about 1.1 comparisions per key.
1470 With sorted input we have even less than 1 per key,
1471 due to the continue (last exit oneway) above.
1472 */
1473 for (i=1; i<ways && keyGt(nxt[0]->key, nxt[i]->key); ) i++;
1474 /* nxt[0]->key is not gt nxt[i]->key: move to --i */
1475 if (--i) {
1476 tmp = nxt[0];
1477 memmove(nxt, nxt+1, i*sizeof nxt[0]);
1478 nxt[i] = tmp;
1479 }
1480 } /* merge */
1481 ret = 0;
1482 #ifndef MERGEMULTIPASS
1483 eRr(LOG_VERBOSE, "%d merged %d comparisions", tUpd(&tm), ncomp);
1484 } /* single run */
1485 #else
1486 eRr(LOG_VERBOSE, "%d merged %d comparisions %d/%d blocks pass %d",
1487 tUpd(&tm), ncomp, blks, nchunk, pass);
1488 if (lastpass)
1489 goto done;
1490 if (nchunk <= blks)
1491 eRr(ERR_IDIOT, "wrote %d >= %d blks pass %d", blks, nchunk, pass);
1492 do {
1493 *dst = 0;
1494 fWrite(outf, out, MERGEBLK);
1495 dst = out;
1496 } while (nchunk != ++blks);
1497 run++;
1498 } /* for new chunks */
1499 chunk = nchunk;
1500 #endif
1501 } /* pass */
1502 done:
1503 #ifdef MERGEMULTIPASS
1504 if (FIL_NONE != fil2) fClose(&fil2);
1505 #endif
1506 return ret;
1507 } /* merge */
1508
1509
1510 int qLoad ( Qdx *dx, Key *key )
1511 {
1512 QLoad *ql = dx->qld;
1513 int ret, l = key->len+dx->vsz;
1514
1515 if ( !ql ) {
1516 if (! key->val.len) return 0; /* double close ? */
1517 ncomp = 0;
1518 if (ENV_EXCL!=env.wri || !(DX_WRITE & dx->flg))
1519 return eRr(ERR_TRASH, "need excl write access to load index");
1520 ql = dx->qld = (QLoad*)mAlloc(sizeof (QLoad));
1521 if ( dx->let > 50 )
1522 dx->let = 50;
1523 ql->let = dx->let * dx->lsz / 100;
1524 fTrunc(&dx->mqd, 0);
1525 dx->lln = 0;
1526 newleaf(&ql->cur.b, dx);
1527 ql->fil = FIL_NONE;
1528 ql->nkey = ql->buf.s.keyb;
1529 ql->nmem = ql->buf.s.memb;
1530 eRr(LOG_INFO,
1531 "loading %d/%dK mem, %d keys, %dK blocks in %d-chunks, %d-way merge"
1532 #ifdef MERGEMULTIPASS
1533 " (multipass)"
1534 #endif
1535 , SORTUSE>>10, SORTMEM>>10, SORTKEYS, MERGEBLK>>10, MERGECHK, MERGEWAY
1536 );
1537 }
1538 if ( !key->val.len /* done */
1539 || SORTKEYS-2 <= ql->nkey-ql->buf.s.keyb /* reserve 1 for WHS */
1540 || SORTUSE <= ql->nmem-ql->buf.s.memb
1541 ) {
1542 if (key->val.len && FIL_NONE == ql->fil)
1543 fOpen(&ql->fil, 0, 0);
1544 if ((ret = sort(dx, ql->buf.s.keyb, ql->nkey - ql->buf.s.keyb))
1545 || !key->val.len
1546 )
1547 goto done;
1548 ql->nkey = ql->buf.s.keyb;
1549 ql->nmem = ql->buf.s.memb;
1550 }
1551 /* add key to sort buffer */
1552 if (key->val.len == dx->vsz && l < 256) {
1553 *ql->nkey++ = ql->nmem;
1554 *ql->nmem = (unsigned char)l;
1555 memcpy(++ ql->nmem, key->byt, key->len);
1556 memcpy(ql->nmem += key->len, key->val.byt, dx->vsz);
1557 ql->nmem += dx->vsz;
1558 return 0;
1559 }
1560 ret = eRr(ERR_INVAL, "bad klen %d vlen %d", key->len, key->val.len);
1561 done:
1562 if (FIL_NONE != ql->fil) {
1563 if (!ret) ret = merge(dx, ql);
1564 fClose(&ql->fil);
1565 }
1566 if (!ret) {
1567 load(dx, 0); /* close last leaf */
1568 qRemake(dx); /* build tree */
1569 }
1570 mFree(dx->qld);
1571 dx->qld = 0;
1572 return ret;
1573 } /* qLoad */
1574
1575
1576 int qLoadf ( Qdx *dx, file *f )
1577 {
1578 Key key;
1579 lolo tm;
1580
1581 FIL_DEFBUF(*f);
1582 memset( &key, 0, sizeof(key) );
1583 key.val.len = dx->vsz;
1584 tUpd(&tm);
1585 while ( fGets(&fb) && fb.l ) {
1586 if ( qMkKeyVal(dx, &key, fb.p, fb.l) ) {
1587 eRr(LOG_VERBOSE, "bad key at line %d offset %d", fb.n, fb.o);
1588 continue;
1589 }
1590 if ( qLoad( dx, &key ) )
1591 break;
1592 if ( !(fb.n & 0x3ff) )
1593 eRr(LOG_VERBOSE, "%dK lines", fb.n >> 10 );
1594 }
1595 eRr(LOG_INFO, "%d did %d lines %d bytes (%s)", tUpd(&tm), fb.n, fb.o,
1596 FIL_NONE == fb.f ? "eof" : "blank" );
1597 *f = fb.f;
1598 key.val.len = 0; /* close it */
1599 return qLoad(dx, &key);
1600 } /* qLoadf */
1601
1602
1603 /* TODO: this function is an awful mess and needs complete rewrite, KR
1604 */
1605 int qSet ( Qdx *dx, Key *key )
1606 {
1607 Bloke blk[2];
1608 Key up;
1609 Block * b = &blk[0].b; /* mainblock */
1610 unsigned char * base = blk[0].base;
1611 int ret = 0;
1612 unsigned short e, i = 0;
1613 Block *s = 0; /* sibling */
1614 Dict *d;
1615 unsigned char *sbase, *src, *v, *stk;
1616 FindKey f;
1617 int hadkey, split = 0, insibling = 0;
1618 unsigned need;
1619 unsigned sibnum = 0;
1620 unsigned char lev = 0;
1621 PrVal pv;
1622 #ifdef BUILD_SHMODE /* locks we hold on block, sibling, tree */
1623 int lb=-1, ls=-1, lt=0;
1624 #endif
1625
1626 f.dx = dx;
1627 f.key = key;
1628 if ( dx->ksz && key->len > dx->ksz )
1629 key->len = dx->ksz;
1630 if ( FINDKEY(b, &f, LOCK_WRITE) ) {
1631 b = 0;
1632 DONE((ERR_TRASH, "could not find key"));
1633 }
1634 #ifdef BUILD_SHMODE
1635 lb = b->num;
1636 #endif
1637 e = (QENTMASK & f.e[0]);
1638 d = b->dct + e;
1639 if ( ! (hadkey = QMATCH & f.e[0]) ) /* new key */
1640 need = key->len + sizeof(Dict) + dx->vsz;
1641 else {
1642 unsigned vln = LEAFVLN(d);
1643 v = base+LEAFPOS(d)+d->kln;
1644 if ( 1 == vln ) { /* check stopword -- value entirely zeroed */
1645 int l = dx->vsz;
1646 unsigned char *C = v;
1647 while ( l-- && !*C++ ) /* huh huh, he said "C++" */
1648 ;
1649 if ( -1 == l ) {
1650 LOG_DBG(LOG_VERBOSE, "key '%b' is a stopword", key->len, key->byt);
1651 return QST_STPWRD;
1652 }
1653 /* TODO: check QST_UNIKEY */
1654 }
1655 i = valpos( v, vln, dx->vsz, key->val.byt );
1656 if ( QMATCH & i ) {
1657 LOG_DBG( LOG_DEBUG,
1658 "adding key '%b'/%s: already there pos %u",
1659 key->len, key->byt, PRVAL( pv, key->val ), i & ~QMATCH );
1660 return QST_FOUND; /* value already there */
1661 }
1662 need = dx->vsz;
1663 }
1664 LOG_DBG( LOG_DEBUG,
1665 "adding key '%b'/%s in block %d had %d ent %d stk %d need %d",
1666 key->len, key->byt, PRVAL( pv, key->val ),
1667 b->num, hadkey, b->ent, b->stk, need );
1668
1669 if ( need > b->stk - DICTOFF - b->ent*sizeof(Dict) ) {
1670 /* now that's bad: not enough space
1671 create new block s, move last entries there
1672 starting from the end, entries (including the new one) are collected,
1673 until their combined size reaches more than half a block.
1674 If the combined size then is more than 2/3 a block,
1675 and the last entry alone takes more than 1/3 a block, it is split.
1676 TODO: check existing sibling for free space
1677 */
1678 unsigned half = (dx->lsz - DICTOFF) >> 1;
1679 unsigned tot = 0;
1680 unsigned mv = b->ent;
1681
1682 LOG_DBG( LOG_DEBUG, "splitting block %d need %d have %d (stk %d ent %d)",
1683 b->num, need, b->stk - DICTOFF - b->ent*sizeof(Dict), b->stk, b->ent );
1684 if ( need > (dx->lsz - DICTOFF) / 3 )
1685 DONE((ERR_IDIOT, "OOPS! excessive need for %d bytes", need));
1686 /* figure out split position */
1687 if ( !hadkey && b->ent == e ) {
1688 tot = need;
1689 insibling = !0;
1690 }
1691 d = b->dct+mv;
1692 while ( d--, mv-- ) {
1693 unsigned sz = sizeof(Dict)+d->kln+LEAFVLN(d)*dx->vsz;
1694 if ( half > (tot += sz) ) { /* sz fits */
1695 if ( e == mv && !hadkey ) {
1696 /*
1697 check size of new entry.
1698 In case of hadkey, we don't bother for the dx->vsz bytes.
1699 */
1700 if ( half < (tot + need) ) {
1701 if ( (half - tot) > need/2 ) { /* more balanced if we move */
1702 tot += need;
1703 insibling = !0;
1704 }
1705 break;
1706 }
1707 tot += need;
1708 insibling = !0;
1709 }
1710 if ( mv )
1711 continue; /* else croak on ! mv below */
1712 }
1713 /* exceeded half -- maybe we move entry mv anyway */
1714 if ( tot > (dx->lsz - DICTOFF)*2/3 ) {
1715 if ( tot-sz < (dx->lsz - DICTOFF)/3 )
1716 split = !0;
1717 else { /* undo -- don't move this entry */
1718 tot -= sz;
1719 d++; mv++;
1720 }
1721 } else if ( ! mv )
1722 DONE((ERR_IDIOT, "OOPS! didn't find end tot %d bytes", tot));
1723 break;
1724 } /* while mv-- */
1725 if ( mv < e || (mv == e && hadkey && !split) )
1726 insibling = !0;
1727 /* now mv is the first entry to move */
1728 /* create new leaf block dx->lln */
1729 #ifdef BUILD_SHMODE
1730 for (;;) {
1731 int size;
1732 QLOCKEX(dx->lln<<1);
1733 size = fSize( dx->mqd );
1734 if ( size == (int)(dx->lln * dx->lsz) ) {
1735 ls = dx->lln;
1736 break;
1737 }
1738 QLOCKUN(dx->lln<<1);
1739 if ( size % dx->lsz || size < (int)(dx->lln * dx->lsz) )
1740 DONE((ERR_TRASH, "bad mqd size 0x%08x", size));
1741 dx->lln = size / dx->lsz;
1742 }
1743 /* have lock on new leaf block dx->lln */
1744 #endif
1745 newleaf(s = &blk[1].b, dx);
1746 sbase = (unsigned char*)s; /* == blk[1].base */
1747 LOG_DBG( LOG_DEBUG,
1748 "split: move %d of %d to new blk %d e %d tot %d insibling %d split %d",
1749 mv, b->ent, s->num, e, tot, insibling, split );
1750 s->nxt = b->nxt;
1751 b->nxt = s->num;
1752 if ( split ) {
1753 /* now total includes full size of entry mv.
1754 if we included a new entry, it might even exceed lsz.
1755 calculate sizes for both blocks w/o mv's values.
1756 */
1757 unsigned esz = sizeof(Dict)+d->kln; /* entry's base cost */
1758 unsigned vln = LEAFVLN(d);
1759 unsigned keep, diff, mvval, kpval, mvsiz, pos = LEAFPOS(d);
1760 /*
1761 from currently used space, substract tot (including all
1762 values and possibly new key "need"), but add entry base cost.
1763 that should be the size remaining if we where moving
1764 all of the values but keep an entry and key.
1765 maybe slightly biased if need is dx->vsz (the hadkey case).
1766 */
1767 keep = dx->lsz - b->stk + b->ent*sizeof(Dict) + need - tot + esz;
1768 tot -= vln * dx->vsz; /* undo entries */
1769 diff = (keep > tot ? keep - tot : tot - keep) / dx->vsz;
1770 if ( diff > vln )
1771 DONE((ERR_IDIOT, "OOPS! got diff %d vln %d entry %d tot %d keep %d",
1772 diff, vln, mv, tot, keep));
1773 mvval = (vln - diff)/2;
1774 if ( keep > tot )
1775 mvval += diff;
1776 kpval = vln - mvval;
1777 if ( ! mvval || ! kpval ) {
1778 LOG_DBG( LOG_WARN, "bad mv/kpval %d/%d keep %d tot %d vln %d diff %d",
1779 mvval, kpval, keep, tot, vln, diff );
1780 if ( ! mvval )
1781 kpval = vln - (mvval = 1);
1782 else
1783 mvval = vln - (kpval = 1);
1784 }
1785 tot += mvsiz = mvval*dx->vsz;
1786 keep += kpval*dx->vsz;
1787 src = base+pos;
1788 LOG_DBG( LOG_DEBUG,
1789 "split: entry %d '%b' mv %d of %d values, i %d keep %d tot %d",
1790 mv, d->kln, src, mvval, vln, i, keep, tot );
1791 if ( keep > (dx->lsz-DICTOFF) || tot > (dx->lsz-DICTOFF) )
1792 DONE((ERR_IDIOT,
1793 "OOPS! got diff %d vln %d mvval %d entry %d tot %d keep %d",
1794 diff, vln, mvval, mv, tot, keep));
1795 /* SIGH -- do it */
1796 memcpy( sbase + (s->stk -= mvsiz),
1797 src+d->kln+kpval*dx->vsz, mvsiz );
1798 if ( e == mv && hadkey && i > kpval ) {
1799 insibling = !0;
1800 s->stk -= dx->vsz;
1801 i -= kpval;
1802 memmove( sbase+s->stk, sbase+s->stk+dx->vsz, i*dx->vsz );
1803 memcpy( sbase+s->stk+i*dx->vsz, key->val.byt, dx->vsz );
1804 mvval++;
1805 }
1806 memcpy( sbase + (s->stk -= d->kln), src, d->kln );
1807 LEAFSETPVK( s->dct, s->stk, mvval, d->kln );
1808 s->ent = 1;
1809 memmove( src + mvsiz, src, d->kln+kpval*dx->vsz );
1810 pos += mvsiz;
1811 LEAFSETPV( d, pos, kpval );
1812 d++; mv++;
1813 }
1814 /* now entries mv..b->ent are moved, mv > 0 */
1815 b->stk = LEAFPOS(b->dct+mv-1); /* pre-adjust stack */
1816 if ( mv < e ) { /* move entries mv..e-1 */
1817 /* used stack space is from pos of entry e-1 to end of entry mv
1818 (might differ from pos of mv-1 due to split)
1819 */
1820 unsigned from = LEAFPOS(b->dct+e-1), nent = e-mv,
1821 sz = LEAFPOS(b->dct+mv)+b->dct[mv].kln
1822 +LEAFVLN(b->dct+mv)*dx->vsz - from;
1823 int adj = (s->stk -= sz) - from; /* entry position adjustment */
1824 memcpy( sbase + s->stk, base+from, sz );
1825 memcpy( s->dct+s->ent, b->dct+mv, nent*sizeof(Dict) );
1826 d = s->dct + s->ent;
1827 s->ent += nent;
1828 for ( ; nent--; d++ )
1829 LEAFPOSADD( d, adj );
1830 }
1831 if ( insibling && mv <= e ) { /* now mv or insert e, unless done on split */
1832 d = b->dct + e;
1833 if ( hadkey ) { /* mv existing entry e >= mv, add new val */
1834 /* i is valpos */
1835 unsigned vln = LEAFVLN(d);
1836 src = base+LEAFPOS(d)+d->kln;
1837 if ( i < vln ) {
1838 unsigned sz = (vln - i)*dx->vsz;
1839 memcpy( sbase + (s->stk -= sz), src + i*dx->vsz, sz );
1840 }
1841 memcpy( sbase + (s->stk -= dx->vsz), key->val.byt, dx->vsz );
1842 if ( i ) {
1843 unsigned sz = i*dx->vsz;
1844 memcpy( sbase + (s->stk -= sz), src, sz );
1845 }
1846 memcpy( sbase + (s->stk -= d->kln), base+LEAFPOS(d), d->kln );
1847 LEAFSETPVK( s->dct + s->ent, s->stk, vln+1, d->kln );
1848 e++; /* e is moved */
1849 } else {
1850 memcpy( sbase + (s->stk -= dx->vsz), key->val.byt, dx->vsz );
1851 memcpy( sbase + (s->stk -= key->len), key->byt, key->len );
1852 LEAFSETPVK( s->dct + s->ent, s->stk, 1, key->len );
1853 }
1854 s->ent++;
1855 } /* mv or insert e */
1856 if ( e < b->ent && mv < b->ent ) { /* move entries max(e,mv) to end */
1857 unsigned fst = mv < e ? e : mv;
1858 /* used stack space is from pos of entry b->ent-1 (the original b->stk)
1859 to end of entry fst (might differ from pos of fst-1 due to split)
1860 */
1861 unsigned from = LEAFPOS(b->dct+b->ent-1), nent = b->ent-fst,
1862 sz = LEAFPOS(b->dct+fst)+b->dct[fst].kln
1863 +LEAFVLN(b->dct+fst)*dx->vsz - from;
1864 int adj = (s->stk -= sz) - from; /* entry position adjustment */
1865 LOG_DBG( LOG_DEBUG, "split: entries %d..%d sz %d from %d to %d",
1866 fst, b->ent-1, sz, from, s->stk );
1867 memcpy( sbase + s->stk, base+from, sz );
1868 memcpy( s->dct+s->ent, b->dct+fst, nent*sizeof(Dict) );
1869 d = s->dct + s->ent;
1870 s->ent += nent;
1871 for ( ; nent--; d++ ) {
1872 #if !defined(NDEBUG)
1873 unsigned p = LEAFPOS(d);
1874 #endif
1875 LEAFPOSADD( d, adj );
1876 #if !defined(NDEBUG)
1877 LOG_DBG( LOG_DEBUG, "new ent %d p/v/k %d/%d/%d o %d",
1878 d - s->dct, LEAFPOS(d), LEAFVLN(d), d->kln, p );
1879 #endif
1880 }
1881 }
1882 b->ent = mv; /* kill entries */
1883 LOG_DBG( LOG_DEBUG,
1884 "split: blks %d/%d with %d/%d entries, s starts '%b'",
1885 b->num, s->num, b->ent, s->ent, s->dct[0].kln, sbase+LEAFPOS(s->dct) );
1886 } /* new sibling */
1887
1888 if ( ! insibling ) {
1889 /* now we do have enough space */
1890 d = b->dct + e;
1891 stk = base + b->stk; /* bottom of data stack */
1892 if ( hadkey ) { /* insert value at i */
1893 unsigned char *val = base + LEAFPOS(d)+d->kln+i*dx->vsz;
1894 need = dx->vsz;
1895 #if 0
1896 LOG_DBG( LOG_DEBUG,
1897 "in entry %d '%b' val %d: memmove( %p %p %d) val %p need %d",
1898 e, d->kln, base+LEAFPOS(d), i,
1899 stk-need, stk, (val-stk)+need, val, need );
1900 #endif
1901 memmove( stk - need, stk, (val - stk)+need );
1902 memcpy( val - need, key->val.byt, dx->vsz );
1903 LEAFVLNADD( d, 1 );
1904 } else { /* insert key, value and dict entry */
1905 /* take dict and data space from entry e */
1906 unsigned char *nu;
1907 need = key->len + dx->vsz;
1908 if ( e == b->ent )
1909 nu = base + b->stk - need;
1910 else {
1911 /* at end of next entry, i.e. end of block or beg of previous */
1912 nu = base + LEAFPOS(d) + d->kln + LEAFVLN(d)*dx->vsz;
1913 memmove( stk - need, stk, nu - stk );
1914 nu -= need;
1915 memmove( d+1, d, (b->ent - e)*sizeof(Dict) );
1916 }
1917 memcpy( nu, key->byt, key->len );
1918 memcpy( nu + key->len, key->val.byt, dx->vsz );
1919 LEAFSETPV( d, nu-base, 1 );
1920 d->kln = key->len;
1921 d++;
1922 e++;
1923 b->ent++;
1924 }
1925 /*
1926 need is set to the needed (i.e. additionally used) data space,
1927 e is the first dict entry to redirect need bytes lower
1928 */
1929 b->stk -= need;
1930 for ( ; e < b->ent; e++, d++ )
1931 LEAFPOSADD( d, -need );
1932 } /* ! insibling */
1933 if ( s ) { /* save upkey before writing, which may kill s */
1934 unsigned char *e0 = ((unsigned char*)s) + LEAFPOS(s->dct);
1935 memcpy( up.byt, e0, up.len = s->dct[0].kln );
1936 sibnum = s->num;
1937 if ( split )
1938 memcpy( up.val.byt, e0+up.len, up.val.len = dx->vsz );
1939 else
1940 up.val.len = 0;
1941 LOG_DBG( LOG_DEBUG, "split: had %d/%d key '%b' vln %d %s",
1942 b->num, s->num, up.len, up.byt, up.val.len, PRVAL( pv, up.val ) );
1943 wrtleaf(s, dx);
1944 }
1945 wrtleaf(b, dx);
1946
1947 /*
1948 WHOA -- now all that was just to care for the leaf level!
1949 Now walk up inserting pointers in parent forks while we split.
1950 */
1951
1952 #ifdef BUILD_SHMODE /* lock tree, unlock leaves */
1953 QLOCKUN(lb<<1);
1954 lb = -1;
1955 if ( sibnum ) {
1956 QLOCKEX(1); /* get tree lock */
1957 lt = 1;
1958 QLOCKUN(ls<<1);
1959 ls = -1;
1960 }
1961 #endif
1962 while ( sibnum ) { /* created sibling */
1963 unsigned char *e0;
1964 int tog = 0;
1965 Block *ins;
1966
1967 /* get parent */
1968 lev++;
1969 b = getfork(&blk[0].b, dx, f.n[lev], lev);
1970 LOG_DBG( LOG_DEBUG,
1971 "split: got parent %d lev %d", b->num, b->lev );
1972 e = keyfork(b, &up, dx->vsz);
1973 /* again L/Y link hunt */
1974 while ( !(QMATCH & e) && b->ent == e && b->nxt && b->num ) {
1975 unsigned se;
1976 /* peek next block, toggling buffers */
1977 s = getfork(&blk[(++tog)&1].b, dx, b->nxt, lev);
1978 se = keyfork(s, &up, dx->vsz);
1979 if ( !se ) /* lower than first */
1980 break;
1981 LOG_DBG( LOG_DEBUG, "LYao %d -> %d lev %d for '%b' got 0x%4x",
1982 b->num, s->num, i, up.len, up.byt, se );
1983 /* trade b for s */
1984 b = s;
1985 e = se;
1986 }
1987 base = (unsigned char*)b;
1988 ins = b;
1989 s = 0;
1990 LOG_DBG( LOG_DEBUG, "split: got parent pos %4x", e );
1991 if ( QMATCH & e ) /* !??? */
1992 DONE((ERR_TRASH, "OOPS! found key in parent"));
1993 if ( ! e ) {
1994 TRACEKEY( ERR_TRASH, &f );
1995 DONE((ERR_TRASH, "OOPS! parent insert at 0 lev %d", lev));
1996 }
1997 need = sizeof(Dict) + up.len + 4 + up.val.len;
1998 if ( need > b->stk - DICTOFF - b->ent*sizeof(Dict) ) {
1999 unsigned half = ((env.psz - b->stk)+b->ent*sizeof(Dict)+need) / 2;
2000 unsigned mv = b->ent, tot = 0, nent, sz;
2001 int adj;
2002 if ( e == b->ent ) {
2003 tot = need;
2004 ins = 0;
2005 }
2006 for ( d = b->dct+mv; d--, mv--; ) {
2007 sz = sizeof(Dict) + d->kln + 4 + (d->vln ? dx->vsz : 0);
2008 if ( half > (tot += sz) ) {
2009 if ( e == mv ) {
2010 if ( half > tot+need || tot+need-half < half-tot ) {
2011 tot += need;
2012 ins = 0; /* new link goes to sibling */
2013 } else
2014 break;
2015 if ( half <= tot )
2016 break;
2017 }
2018 if ( mv )
2019 continue;
2020 }
2021 if ( tot - half > sz/2 ) { /* don't move this */
2022 tot -= sz;
2023 d++; mv++;
2024 } else if ( ! mv ) /* !??? */
2025 mv = 1;
2026 break;
2027 }
2028 /* split here a little bit simpler */
2029 s = newfork( b==&blk[0].b ? &blk[1].b : &blk[0].b, dx, lev );
2030 s->nxt = b->nxt;
2031 b->nxt = s->num;
2032 /* straight move entries mv..b->ent-1 to sibling */
2033 sbase = (unsigned char*)s;
2034 d = b->dct + mv;
2035 sz = b->dct[mv-1].pos - b->stk;
2036 nent = b->ent - mv;
2037 LOG_DBG( LOG_DEBUG,
2038 "split: parent lev %d split %d to new %d mv %d"
2039 " '%b'/%s nent %d sz %d",
2040 lev, b->num, s->num, mv, d->kln, base+d->pos,
2041 prval( pv, base+d->pos+d->kln, d->vln?dx->ptr:0 ), nent, sz );
2042 memcpy( sbase + (s->stk -= sz), base + b->stk, sz );
2043 adj = s->stk - b->stk;
2044 b->stk += sz;
2045 memcpy( s->dct, d, nent*sizeof(Dict) );
2046 b->ent = mv;
2047 s->ent = nent;
2048 for ( d = s->dct; nent--; d++ )
2049 d->pos += adj;
2050 if ( ! ins ) {
2051 ins = s;
2052 e -= mv;
2053 }
2054 if ( (ret = chkblk( s, dx )) )
2055 break;
2056 }
2057 /* now insert link in block ins at pos e */ {
2058 unsigned isz = up.len + 4 + up.val.len, nent = ins->ent - e;
2059 unsigned char *ibase = (unsigned char*)ins;
2060 unsigned end = e ? ins->dct[e-1].pos : env.psz;
2061 LOG_DBG( LOG_DEBUG,
2062 "split: ins parent %d lev %d pos %d key '%b' -> %d vln %d %s",
2063 ins->num, ins->lev, e, up.len, up.byt, sibnum, up.val.len,
2064 PRVAL( pv, up.val ) );
2065 memmove( ibase + ins->stk - isz, ibase + ins->stk, end - ins->stk );
2066 ins->stk -= isz;
2067 SETINT(ibase + (end -= 4), sibnum);
2068 if ( up.val.len )
2069 memcpy( ibase + (end -= up.val.len), up.val.byt, up.val.len );
2070 memcpy( ibase + (end -= up.len), up.byt, up.len );
2071 for ( d = ins->dct+ins->ent; nent--; d-- ) {
2072 *d = d[-1];
2073 d->pos -= isz;
2074 }
2075 d->pos = end;
2076 d->vln = up.val.len ? 1 : 0;
2077 d->kln = up.len;
2078 ins->ent++;
2079 if ( (ret = chkblk( ins, dx )) )
2080 break;
2081 }
2082 wrtfork(b, dx);
2083 if ( ! s ) /* not split - we're done */
2084 break;
2085 /* save sibling's key */
2086 sibnum = s->num;
2087 e0 = ((unsigned char*)s) + s->dct[0].pos;
2088 memcpy( up.byt, e0, up.len = s->dct[0].kln );
2089 if ( s->dct[0].vln )
2090 memcpy( up.val.byt, e0+up.len, up.val.len = dx->vsz );
2091 else
2092 up.val.len = 0;
2093 LOG_DBG( LOG_DEBUG, "split: had %d/%d on lev %d key '%b' vln %d %s",
2094 b->num, sibnum, s->lev, up.len, up.byt, up.val.len, PRVAL( pv, up.val ) );
2095 wrtfork(s, dx);
2096 if ( lev < dx->dpt )
2097 continue;
2098 LOG_DBG( LOG_DEBUG, "root split!" );
2099 if ( b != &blk[0].b && b != &blk[1].b ) /* was in map -- could be gone */
2100 b = getfork(&blk[0].b, dx, 0, lev); /* refetch */
2101 /* new block for the old root, use sibling buffer (if any) */
2102 s = newfork(b==&blk[0].b ? &blk[1].b : &blk[0].b, dx, lev);
2103 /* cp all but the num */
2104 memcpy( ((unsigned char*)s)+4, ((unsigned char*)b)+4, env.psz-4 );
2105 b->nxt = s->nxt; /* restore root's special nxt */
2106 s->nxt = 0;
2107 b->stk = env.psz; /* empty the root */
2108 /* add two childs old root and it's sibling */
2109 SETINT( base + (b->stk -= 4), s->num ); /* the old root */
2110 b->dct[0].pos = b->stk; b->dct[0].vln = 0; b->dct[0].kln = 0;
2111 SETINT( base + (b->stk -= 4), sibnum );
2112 e0 = ((unsigned char*)s) + s->dct[0].pos;
2113 if ( ! up.val.len )
2114 b->dct[1].vln = 0;
2115 else {
2116 memcpy( base + (b->stk -= dx->vsz), up.val.byt, dx->vsz );
2117 b->dct[1].vln = 1;
2118 }
2119 memcpy( base + (b->stk -= up.len), up.byt, b->dct[1].kln = up.len );
2120 b->dct[1].pos = b->stk;
2121 b->ent = 2;
2122 b->lev++; /* increase tree depth */
2123 dx->dpt = b->lev;
2124 ret = wrtfork(b, dx);
2125 break;
2126 }
2127 done:
2128 #ifdef BUILD_SHMODE
2129 if ( -1 != lb ) QLOCKUN(lb<<1);
2130 if ( -1 != ls ) QLOCKUN(ls<<1);
2131 if ( lt ) QLOCKUN(1);
2132 #endif
2133 if ( 0 <= ret && b->lev && FINDKEY(&blk[0].b, &f, LOCK_READ) )
2134 ret = eRr(ERR_TRASH, "could not find key after add" );
2135 return ret;
2136 } /* qSet */
2137
2138
2139 int qDel ( Qdx *dx, Key *key )
2140 {
2141 Leaf leaf;
2142 Block * const b = &leaf.b;
2143 unsigned char * const base = leaf.base;
2144 int ret = 0;
2145 unsigned short e, n, i, mv;
2146 Dict *d;
2147 unsigned char *v;
2148 FindKey f;
2149 PrVal pv;
2150
2151 f.dx = dx;
2152 f.key = key;
2153 LOG_DBG( LOG_DEBUG, "deleting key '%b'/%s",
2154 key->len, key->byt, PRVAL( pv, key->val ) );
2155 if ( FINDKEY(b, &f, LOCK_WRITE) )
2156 return eRr(ERR_TRASH, "could not find key");
2157 if ( !(QMATCH & f.e[0]) ) /* key not found - ok */
2158 goto done;
2159 LOG_DBG( LOG_DEBUG, "key found" );
2160 e = (QENTMASK & f.e[0]);
2161 d = b->dct + e;
2162 v = base+LEAFPOS(d)+d->kln; /* start of values */
2163 n = LEAFVLN(d);
2164 if ( ! n )
2165 goto done;
2166 i = valpos( v, n, dx->vsz, key->val.byt );
2167 LOG_DBG( LOG_DEBUG, "val %sfound pos %u of %u",
2168 (QMATCH & i) ? "" : "not ", i & ~QMATCH, n );
2169 if ( !(QMATCH & i) ) /* val not found - ok */
2170 goto done;
2171 i &= ~QMATCH;
2172 LOG_DBG( LOG_DEBUG,
2173 "del blk %d e %d i/n %d/%d", b->num, e, i, n );
2174 if ( 1 < n ) {
2175 v += i*dx->vsz; /* end of stack area to move: start of this value */
2176 mv = dx->vsz; /* amount to move: one value size */
2177 LEAFVLNADD( d, -1 );
2178 } else { /* sole value -- rm whole entry */
2179 v = base+LEAFPOS(d); /* start of key */
2180 mv = d->kln+dx->vsz;
2181 for ( i=b->ent - e - 1; i--; d++ ) /* move the i entries after d down */
2182 *d = d[1];
2183 d = b->dct + e; /* reset */
2184 b->ent--;
2185 }
2186 memmove( base+b->stk+mv, base+b->stk, v-(base+b->stk) );
2187 b->stk += mv;
2188 for ( ; e++ < b->ent; d++ )
2189 LEAFPOSADD( d, mv );
2190 wrtleaf(b, dx);
2191 done:
2192 QLOCKUN(b->num<<1);
2193 return ret;
2194 } /* qDel */
2195
2196
2197 static int setKeyVal (QSet *qst, char *val, int len, int split)
2198 {
2199 Qdx *qdx = qst->qdx;
2200 char *e = val+len;
2201 Key key;
2202 int nope, done = 0;
2203
2204 if (split && qst->pfx.len)
2205 split = 1+qst->pfx.len; /* don't split on 1st pfx.len bytes */
2206 do {
2207 char buf[512], *b;
2208 int l;
2209
2210 if (!split || qdx->cdx)
2211 l = e-val;
2212 else {
2213 char *s;
2214 while (val<e && (CT_WHITE|CT_SPECL)&lat1ct[(unsigned char)*val]) val++;
2215 if (e == (s = val))
2216 break;
2217 while (s<e && !((CT_WHITE|CT_SPECL)&lat1ct[(unsigned char)*s])) s++;
2218 l = s-val;
2219 }
2220 b = val;
2221 if (qst->pfx.len) {
2222 memcpy(b = buf, qst->pfx.byt, qst->pfx.len);
2223 if (l > (int)sizeof buf-qst->pfx.len)
2224 l = sizeof buf-qst->pfx.len;
2225 memcpy(buf+qst->pfx.len, val, l);
2226 l += qst->pfx.len;
2227 }
2228 /* qMkKey(qst->qdx, &key, val, len): */
2229 if (!qdx->cdx)
2230 memcpy(key.byt, b, key.len = l<qdx->ksz ? l : qdx->ksz);
2231 else {
2232 key.len = qdx->ksz;
2233 l = cEnc(qdx->cdx, &key, (unsigned char*)b, l, split);
2234 if ( split && (!key.len /* wordless */
2235 || l <= qst->pfx.len) /* houston, we have a problem */
2236 )
2237 break;
2238 }
2239 qMkVal(&key.val, &qst->ptr, qdx->ptr);
2240 qst->ptr.pos++;
2241 eRr(LOG_VERBOSE, "%s '%.*s'->'%b' %u %u %u %u",
2242 qst->del ? "del" : "set", l, b, key.len, key.byt,
2243 qst->ptr.rid, qst->ptr.tag, qst->ptr.pos>>16, qst->ptr.pos&0xffff);
2244 nope = qst->del ? qDel(qdx, &key) : qSet(qdx, &key);
2245 if (!split)
2246 return nope;
2247 val += l - qst->pfx.len;
2248 if (!nope)
2249 done++;
2250 } while (val < e);
2251 return done;
2252 } /* qSetKeyVal */
2253
2254 int qSetKeyVal (QSet *qst, char *val, int len)
2255 {
2256 return setKeyVal(qst, val, len, 0);
2257 }
2258
2259 int qSetKeyVals (QSet *qst, char *val, int len)
2260 {
2261 return setKeyVal(qst, val, len, 1);
2262 } /* qSetKeyVals */
2263
2264
2265
2266 int qLoop ( QLoop *self )
2267 {
2268 Leaf leaf;
2269 Qdx *dx = self->qdx;
2270 int ret = 0;
2271 unsigned i = 0;
2272 int stop = QSTOP & self->flg;
2273 int skip = QSKIP & self->flg;
2274 Block *b = &leaf.b;
2275 Key *cmp = &self->key;
2276
2277 if ( ! cmp->len ) { /* start at 1st leaf */
2278 if ( b != GETLEAF(b, dx, 0, LOCK_READ) )
2279 DONE((ERR_TRASH, "could not read leaf 0"));
2280 self->cur.len = 1; /* avoid QSAME match */
2281 } else { /* locate key */
2282 FindKey f;
2283 f.dx = dx;
2284 f.key = cmp;
2285 if ( FINDKEY(b, &f, LOCK_READ) )
2286 DONE((ERR_TRASH, "could not find key"));
2287 i = (unsigned)(QENTMASK & f.e[0]);
2288 if ( !(QMATCH & f.e[0]) && QEQ == stop )
2289 goto done;
2290 self->cur.len = 0; /* avoid QSAME match */
2291 }
2292 if ( QUPTO <= stop )
2293 cmp = &self->to;
2294 for ( ;; i=0 ) { /* blocks */
2295 unsigned char *base = (unsigned char*)b;
2296 Dict *d;
2297 if ( ! b || b->lev )
2298 return ERR_TRASH;
2299 d = b->dct + i;
2300 for ( ; i<b->ent; i++, d++ ) {
2301 unsigned short pos = LEAFPOS( d );
2302 unsigned short vln = LEAFVLN( d );
2303 unsigned short kln = d->kln;
2304 unsigned char *v = base+pos+kln;
2305 Ptr p;
2306
2307 if ( skip ) {
2308 if ( kln == self->key.len
2309 && !memcmp(self->key.byt, base+pos, kln)
2310 )
2311 continue;
2312 skip = 0;
2313 }
2314 if ( stop ) {
2315 int diff = memcmp( cmp->byt, base+pos,
2316 kln <= cmp->len ? kln : cmp->len );
2317 switch ( stop ) {
2318 case QPF:
2319 if ( diff || kln < cmp->len )
2320 goto moan;
2321 break;
2322 case QEQ:
2323 if ( diff || kln != cmp->len )
2324 goto done;
2325 break;
2326 case QUPTO:
2327 if ( 0 >= diff )
2328 goto done;
2329 break;
2330 case QINCL:
2331 if ( 0 > diff || (!diff && kln > cmp->len) )
2332 goto done;
2333 break;
2334 default: /* unused */
2335 moan:
2336 eRr(LOG_INFO,
2337 "loop ends on key '%b'(%d) %d %d",
2338 kln, base+pos, kln, diff, cmp->len );
2339 goto done;
2340 }
2341 } /* if stop */
2342
2343 if ( !i && kln == self->cur.len && !memcmp(self->cur.byt, base+pos, kln) )
2344 self->flg |= QSAME;
2345 else {
2346 self->flg &= ~QSAME;
2347 memcpy( self->cur.byt, base+pos, self->cur.len = kln );
2348 }
2349 if ( self->qcb ) {
2350 self->vals = v;
2351 self->nvals = vln;
2352 if ( (ret = self->qcb(self)) )
2353 goto done;
2354 } else { /* internal check */
2355 Leaf ll;
2356 FindKey f;
2357 f.dx = dx;
2358 f.key = &self->cur;
2359 memcpy( self->cur.val.byt, v, self->cur.val.len = dx->vsz );
2360 if ( FINDKEY(&ll.b, &f, LOCK_READ) )
2361 DONE((ERR_TRASH, "could not find key"));
2362 if ( !(QMATCH & f.e[0])
2363 || b->num != ll.b.num
2364 || i != (unsigned)(QENTMASK & f.e[0])
2365 ) {
2366 eRr(ERR_TRASH,
2367 "OOPS! want %u.%u got %u.%u (match %4x)",
2368 b->num, i, ll.b.num, QENTMASK & f.e[0], f.e[0] );
2369 qRdVal( &p, v, dx->vsz );
2370 eRr(ERR_TRASH,
2371 "\twhen looking for '%b'/%u.%u.%u",
2372 self->cur.len, self->cur.byt, p.rid, p.tag, p.pos );
2373 if ( ll.b.ent <= (QENTMASK & f.e[0]) )
2374 eRr(ERR_TRASH, "\tI found nothing" );
2375 else {
2376 Dict *dd = ll.b.dct + (QENTMASK & f.e[0]);
2377 unsigned char *bb = ((unsigned char*)&ll.b)+LEAFPOS(dd);
2378 qRdVal( &p, bb+dd->kln, dx->vsz );
2379 eRr(ERR_TRASH,
2380 "\tI found '%b'/%u.%u.%u",
2381 dd->kln, bb, p.rid, p.tag, p.pos );
2382 }
2383 } else
2384 eRr(LOG_TRACE, "key '%b' ok", self->cur.len, self->cur.byt );
2385 } /* internal check */
2386 }
2387 if ( ! b->nxt || !GETLEAF(b, dx, b->nxt, LOCK_READ) )
2388 break;
2389 /* TODO:
2390 b now is the block that was our sibling when reading the old b.
2391 As long as we don't shift entries,
2392 there is no need to advance to next key that is actually greater !?
2393 */
2394 }
2395 done:
2396 if ( ! self->qcb )
2397 eRr(LOG_INFO,
2398 "checked %d keys in %u blks of %dK depth %u cmp/fget/lget %u/%u/%u",
2399 stat.key, dx->lln, dx->lsz>>10, dx->dpt, stat.cmp, stat.fget, stat.lget );
2400 return ret;
2401 } /* qLoop */
2402
2403
2404
2405 int qInit ( Qdx *dx )
2406 {
2407 Bloke blk;
2408 int size;
2409
2410 /* defaults */
2411 if ( !dx->ksz ) dx->ksz = 255;
2412
2413 /* check leaves file */
2414 size = fSize( dx->mqd );
2415 if ( 0x3ff & size ) /* not on 1K boundary */
2416 return eRr(LOG_ERROR, "mqd has bad size %dK + %d",
2417 size >> 10, (int)size & 0x3ff );
2418 if ( size ) { /* read 1st leaf header */
2419 Block b;
2420 int got = fPread( &dx->mqd, (unsigned char*)&b, sizeof(b), 0 );
2421 if ( sizeof(b) != (unsigned)got )
2422 return eRr(LOG_SYSERR, "could not get leaf head" );
2423 if ( b.num )
2424 return eRr(ERR_TRASH, "leaf has num %d", b.num );
2425 if ( dx->typ && dx->typ != b.typ )
2426 eRr(LOG_WARN, "typ want 0x02%2x got 0x02%2x", dx->typ, b.typ );
2427 if ( QDX_COMPRS & dx->typ )
2428 eRr(LOG_WARN, "sorry this version does not support compressed keys");
2429 dx->typ = b.typ;
2430 dx->ksz = b.ksz;
2431 dx->ptr = b.ptr;
2432 }
2433 if ( QDX_LEAFPV & dx->typ )
2434 dx->vsz = dx->ptr;
2435 else
2436 dx->vsz = (3&(dx->ptr>>6)) + 3+(3&(dx->ptr>>4)) + (7&dx->ptr);
2437 if (!size && dx->ksz > 255 - dx->vsz)
2438 dx->ksz = 255 - dx->vsz;
2439 eRr(LOG_VERBOSE, "dx ptr %2x vsz %d=%d+%d+%d", dx->ptr, dx->vsz,
2440 (3&(dx->ptr>>6)), 3+(3&(dx->ptr>>4)), (7&dx->ptr));
2441 dx->lsz = 1<<QDX_LEAFSH<<(QDX_SIZMSK&dx->typ);
2442 if ( size % dx->lsz )
2443 return eRr(ERR_TRASH, "size 0x%08x %d blksize", size, dx->lsz );
2444 dx->lln = size / dx->lsz;
2445 if ( ! dx->lln
2446 && (!(DX_WRITE & dx->flg) || (newleaf(&blk.b, dx), wrtleaf(&blk.b, dx)))
2447 )
2448 return eRr(LOG_SYSERR, "could not init new leaf file" );
2449
2450 #ifdef CPU_BIG_ENDIAN
2451 # define QDX_FORK (QDX_FORKBE | (env.psh-QDX_FORKSH))
2452 #else
2453 # define QDX_FORK (QDX_FORKLE | (env.psh-QDX_FORKSH))
2454 #endif
2455 size = fSize(dx->mqx.fil);
2456 if ( size ) {
2457 if ( size % env.psz ) /* not on env.psz boundary */
2458 return eRr(LOG_ERROR, "mqx has bad size %dK + %d",
2459 size >> 10, (int)size & 0x3ff );
2460 dx->fln = size >> env.psh;
2461 if ( (int)dx->fln != fMap(&dx->mqx, dx->fln) )
2462 return eRr(LOG_ERROR, "could not map mqx");
2463 dx->ftp = ((Block*)dx->mqx.map)->typ;
2464 if ( dx->ftp != QDX_FORK ) /* bad pagesize, endianess or flags */
2465 return eRr(LOG_ERROR, "root has type %2x != %2x",
2466 dx->ftp, QDX_FORK );
2467 dx->dpt = ((Block*)dx->mqx.map)->lev;
2468 }
2469 dx->fln = size / env.psz;
2470 if ( ! dx->fln ) {
2471 dx->ftp = QDX_FORK;
2472 eRr(LOG_INFO, "creating index type 0x%2x/0x%2x lsz %u psz %u ksz %u vsz %u",
2473 dx->typ, dx->ftp, dx->lsz, env.psz, dx->ksz, dx->vsz );
2474 if ( 1 < dx->lln ) {
2475 if (ENV_EXCL!=env.wri || !(DX_WRITE & dx->flg))
2476 return eRr(ERR_TRASH, "need excl write access to rebuild tree");
2477 if ( qRemake(dx) )
2478 return eRr(LOG_ERROR, "could not rebuild tree");
2479 } else {
2480 /* init: link root to leaf */
2481 Block *b = newfork(&blk.b, dx, 1);
2482 SETINT( ((unsigned char*)b)+(b->stk -= 4), 0 );
2483 b->dct[0].pos = b->stk; /* 0, 0 */
2484 b->ent = 1;
2485 if ( wrtfork(b, dx) )
2486 return eRr(LOG_SYSERR, "\twhen writing root" );
2487 fMap(&dx->mqx, 1);
2488 dx->dpt = 1;
2489 }
2490 }
2491 eRr(LOG_INFO, "psz %d mapped %d", env.psz, dx->mqx.npg );
2492
2493 return 0;
2494 } /* qInit */
2495
2496
2497 void qFini ( Qdx *dx )
2498 {
2499 fMap(&dx->mqx, 0);
2500 } /* qFini */

  ViewVC Help
Powered by ViewVC 1.1.26