1 |
/* |
2 |
The Malete project - the Z39.2/Z39.50 database framework of OpenIsis. |
3 |
Version 0.9.x (patchlevel see file Version) |
4 |
Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org |
5 |
|
6 |
This library is free software; you can redistribute it and/or |
7 |
modify it under the terms of the GNU Lesser General Public |
8 |
License as published by the Free Software Foundation; either |
9 |
version 2.1 of the License, or (at your option) any later version. |
10 |
|
11 |
This library is distributed in the hope that it will be useful, |
12 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
14 |
See the GNU Lesser General Public License for more details. |
15 |
|
16 |
You should have received a copy of the GNU Lesser General Public |
17 |
License along with this library; if not, write to the Free Software |
18 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
|
20 |
see README for more information |
21 |
EOH */ |
22 |
|
23 |
/* |
24 |
$Id: qdx.c,v 1.29 2004/11/12 08:38:13 kripke Exp $ |
25 |
the btree. |
26 |
*/ |
27 |
|
28 |
#include <stdlib.h> |
29 |
#include <string.h> |
30 |
#include <assert.h> |
31 |
|
32 |
#include "../core/core.h" |
33 |
|
34 |
|
35 |
|
36 |
/* ************************************************************ |
37 |
private types |
38 |
*/ |
39 |
|
40 |
|
41 |
/** |
42 |
Dictionary layout for forks (inner nodes of tree): |
43 |
*/ |
44 |
typedef struct Dict { /* dictionary of block entries */ |
45 |
unsigned short pos; /* position of entry, native byte order */ |
46 |
unsigned char vln; /* number of values */ |
47 |
unsigned char kln; /* length of key */ |
48 |
} Dict; |
49 |
/* |
50 |
For leaves, the max block size is 8K, |
51 |
which need 13 bit for pos and 11 bit for vln (max 8K/min vsz 4 values). |
52 |
The 1st 3 bytes are: |
53 |
1. lower 8 bit of pos |
54 |
2. higher 5 bit of pos | higher 3 bit of vlen << 5 |
55 |
3. lower 8 bit of vln |
56 |
*/ |
57 |
#ifdef CPU_BIG_ENDIAN |
58 |
#define LEAFPOS( d ) (*((unsigned char*)(d)) \ |
59 |
| (((unsigned char*)(d))[1]&0x1f)<<8) |
60 |
#define LEAFVLN( d ) ((d)->vln \ |
61 |
| (((unsigned char*)(d))[1]&0xe0)<<3) |
62 |
#define LEAFPOSADD( d, a ) do { \ |
63 |
unsigned char *__u = (unsigned char*)(d); \ |
64 |
unsigned short __p = (a) + __u[0] + (__u[1]<<8); \ |
65 |
__u[0] = (unsigned char)__p; \ |
66 |
__u[1] = (unsigned char)((__u[1]&0xe0)|((__p>>8)&0x1f)); \ |
67 |
} while (0) |
68 |
#define LEAFVLNADD( d, a ) do { \ |
69 |
unsigned char *__u = (unsigned char*)(d); \ |
70 |
unsigned short __v = (a) + __u[2] + ((__u[1]&0xe0)<<3); \ |
71 |
__u[2] = (unsigned char)__v; \ |
72 |
__u[1] = (unsigned char)((__u[1]&0x1f)|((__v>>3)&0xe0)); \ |
73 |
} while (0) |
74 |
#define LEAFSETPV( d, p, v ) do { \ |
75 |
unsigned char *__u = (unsigned char*)(d); \ |
76 |
__u[0] = (unsigned char)(p); \ |
77 |
__u[1] = (unsigned char)((((p)>>8)&0x1f)|(((v)>>3)&0xe0)); \ |
78 |
__u[2] = (unsigned char)(v); \ |
79 |
} while (0) |
80 |
#else |
81 |
#define LEAFPOS( d ) (0x1fff & (d)->pos) |
82 |
#define LEAFVLN( d ) ((d)->vln | (~0x1fff & (d)->pos)>>5) |
83 |
#define LEAFPOSADD( d, a ) do { \ |
84 |
(d)->pos = (~0x1fff & (d)->pos) | (0x1fff & ((d)->pos + (a))); \ |
85 |
} while (0) |
86 |
#define LEAFVLNADD( d, a ) do { \ |
87 |
unsigned _vln = LEAFVLN( d ) + (a); \ |
88 |
(d)->pos = (0x1fff & (d)->pos) \ |
89 |
| (~0x1fff & ((_vln)<<5)); \ |
90 |
(d)->vln = (unsigned char)(_vln); \ |
91 |
} while (0) |
92 |
#define LEAFSETPV( d, p, v ) do { \ |
93 |
(d)->pos = (0x1fff & (p)) | (~0x1fff & ((v)<<5)); \ |
94 |
(d)->vln = (unsigned char)(v); \ |
95 |
} while (0) |
96 |
#endif |
97 |
|
98 |
#define LEAFSETPVK( d, p, v, k ) do { \ |
99 |
LEAFSETPV( d, p, v ); \ |
100 |
(d)->kln = (unsigned char)(k); \ |
101 |
} while (0) |
102 |
|
103 |
|
104 |
/* |
105 |
the first 8 bytes are actually redundant, |
106 |
however are here for convenience and as a check |
107 |
*/ |
108 |
typedef struct Block { |
109 |
unsigned num; /* number of this block, for check and convenience */ |
110 |
unsigned char typ; /* block type & endianess */ |
111 |
unsigned char ksz; /* max key length */ |
112 |
unsigned char ptr; /* ptr type; for forks+plain leaves: value size */ |
113 |
unsigned char lev; /* level over bottom, 0 for leaves */ |
114 |
unsigned nxt; /* right sibling, free list for root */ |
115 |
unsigned short ent; /* # entries */ |
116 |
unsigned short stk; /* offset of keyspace stack */ |
117 |
Dict dct[1]; |
118 |
} Block; |
119 |
#define DICTOFF ((char *)&((Block*)0)->dct - (char*)((Block*)0)) |
120 |
#define GETENT(b, i) (((unsigned char *)(b)) \ |
121 |
+ ((b)->lev ? (b)->dct[i].pos : LEAFPOS((b)->dct+(i)))) |
122 |
|
123 |
/* |
124 |
LAYOUT: |
125 |
dct is an array of dictionary entries in increasing key order |
126 |
stk is growing downwards from block end towards dict |
127 |
position of entry i is b->stk <= LEAFPOS(b->dct+i) < dx->t/lsz |
128 |
decreasing on i (i.e. stack space is in reverse ordering) |
129 |
|
130 |
at LEAFPOS(b->dct+i), there are b->dct[i].kln bytes key |
131 |
for a leaf entry, the key is followed by |
132 |
LEAFVLN(b->dct+i) (between 0..2047 incl.) values |
133 |
of dx->vsz (between 8..23 incl) each |
134 |
for a fork entry, the key is followed by |
135 |
a single 4 bytes block number (vln = 0) |
136 |
or vln pairs of qualifying values (vsz each) and block numbers |
137 |
(without the key compression flag to be implemented, only vln=1 is supported) |
138 |
|
139 |
SIZES: |
140 |
block size >= 512 |
141 |
block base overhead DICTOFF is 16 bytes |
142 |
avail size >= 496 |
143 |
Dict entry is 4 bytes |
144 |
max. used stack space per entry is: |
145 |
- fork: kln+4+dx->vsz <= 255+4+255 = 514 |
146 |
- new leaf entry (one value): kln+dx->vsz <= 510 |
147 |
max total space for a new entry incl. 4 bytes dict is 518. |
148 |
For leaves of minimal block size 512, |
149 |
the sum of ksz+vsz must not exceed 492=512-16-4, |
150 |
so that each block can hold at least one entry. |
151 |
Forks are at least 4K and thus can hold min 7 entries. |
152 |
Even in pathological cases with max kln and vsz and all forks |
153 |
split to the worst, a level's fork factor does not drop below 4. |
154 |
*/ |
155 |
|
156 |
enum { |
157 |
QMAXLEAF = 0x2000, /* max leaf size -- 8K fixed !!! */ |
158 |
QMAXFORK = 1<<CPU_PAGE_SHIFT, |
159 |
#if 0x2000 < 1<<CPU_PAGE_SHIFT |
160 |
QMAXBLCK = 1<<CPU_PAGE_SHIFT, |
161 |
#else |
162 |
QMAXBLCK = 0x2000, |
163 |
#endif |
164 |
/* With 2Gig files there are at most 2^31/2^9=2^22 leaves, |
165 |
needing 11 fork levels. */ |
166 |
QLEVELS = 12 /* max depth incl. leaves */ |
167 |
}; |
168 |
typedef union { Block b; unsigned char base[QMAXLEAF]; } Leaf; |
169 |
typedef union { Block b; unsigned char base[QMAXFORK]; } Fork; |
170 |
typedef union { Block b; unsigned char base[QMAXBLCK]; } Bloke; /* any dude */ |
171 |
|
172 |
|
173 |
enum { |
174 |
QENTSIZE = 8, /* minimum cost of entry: 4 byte dict, 4 byte val */ |
175 |
/* the number of entries is limited by (QMAXFORK-16)/QENTSIZE < 8K */ |
176 |
QENTBITS = 13, /* bits for # enries */ |
177 |
QENTMASK = 0x1fff, |
178 |
QENTMAX = QENTMASK, |
179 |
QMATCH = 0x8000 /* entry matched flag in found entry numbers */ |
180 |
}; |
181 |
|
182 |
#if 0 |
183 |
# define SORT_Q |
184 |
/* glibc's qsort is up to 20% faster than our basic untuned WHS. However: |
185 |
- dietlibc's is slower |
186 |
- it still has poor worst case performance |
187 |
- many implementations are known to be buggy |
188 |
*/ |
189 |
#endif |
190 |
/* memory for internal sorting */ |
191 |
/* |
192 |
loaded 5MB index in 5 seconds with as little as |
193 |
#define SORTMEM 0x30000 = 192K (+96K Keys) internal |
194 |
#define SORTMEM 0x400000 = 4MB (+2MB Keys) internal |
195 |
*/ |
196 |
#define SORTMEM 0x400000 |
197 |
#define SORTKEYS (SORTMEM/8) /* + another 50%: #entries (assume > 8 Bytes) */ |
198 |
|
199 |
/* blocksize for files */ |
200 |
/* |
201 |
#define MERGEBLK 0x20000 = 128K buffer, 48-way merge |
202 |
#define MERGEBLK 0x3000 = 12K buffer, 512-way merge |
203 |
|
204 |
12K blocks perform quite well and allow for 512-way merge, |
205 |
so we never have a second pass for 31-bit addressable files |
206 |
*/ |
207 |
#define MERGEBLK 0x3000 |
208 |
#define MERGECHK (SORTMEM/MERGEBLK) /* initial chunk size */ |
209 |
|
210 |
/* |
211 |
By organizing files in MERGEBLKs, so that entries don't span blocks, |
212 |
we may be wasting up to one full entry (256 bytes) per block: |
213 |
if we have only exactly the space for one such longest entry, |
214 |
use one terminating 0 byte instead. |
215 |
*/ |
216 |
#define SORTUSE (MERGECHK*(MERGEBLK-256)) |
217 |
|
218 |
typedef struct { |
219 |
unsigned char *keyb[SORTKEYS]; |
220 |
unsigned char memb[SORTMEM]; |
221 |
} SortBuf; |
222 |
typedef unsigned char MergeBlk[MERGEBLK]; |
223 |
|
224 |
/* n-way merge |
225 |
#define MERGEWAY (sizeof (SortBuf)/sizeof (MergeBlk)) |
226 |
*/ |
227 |
#ifndef MERGEWAY /* define possible max */ |
228 |
# define MERGEWAY ((4*SORTKEYS+SORTMEM)/MERGEBLK) /* as pre-pro constant */ |
229 |
#endif |
230 |
#if 0x80000000UL > (MERGEWAY*SORTMEM) |
231 |
# define MERGEMULTIPASS |
232 |
/* else (512-way merge on 4MB) we can do 2GB in one pass */ |
233 |
#endif |
234 |
|
235 |
typedef struct QLoad { |
236 |
unsigned got; /* current for this key */ |
237 |
unsigned tot; /* total for this key */ |
238 |
unsigned let; |
239 |
/* stats */ |
240 |
unsigned keys; |
241 |
unsigned keyl; |
242 |
unsigned maxl; |
243 |
unsigned vals; |
244 |
unsigned maxv; |
245 |
unsigned span; |
246 |
unsigned free; |
247 |
/* load buffer */ |
248 |
Leaf cur; |
249 |
Key key; |
250 |
Val val[QENTMAX]; |
251 |
/* sort buffer */ |
252 |
file fil; |
253 |
unsigned blks; |
254 |
unsigned char **nkey; |
255 |
unsigned char *nmem; |
256 |
#if 0 /* hard to measure ... */ |
257 |
# define ALIGN4K __attribute__ ((aligned (0x1000))) |
258 |
#else |
259 |
# define ALIGN4K |
260 |
#endif |
261 |
union { |
262 |
SortBuf s; |
263 |
MergeBlk m[MERGEWAY]; /* mergebuf */ |
264 |
} buf ALIGN4K; |
265 |
MergeBlk mout; /* only used temporary, but don't want to kill stack */ |
266 |
} QLoad; |
267 |
|
268 |
|
269 |
|
270 |
typedef struct { /* structure filled by findkey */ |
271 |
Qdx *dx; /* the tree */ |
272 |
Key *key; /* the key searched */ |
273 |
unsigned n[QLEVELS]; /* block numbers of path */ |
274 |
unsigned short e[QLEVELS]; /* entries in path with match flag */ |
275 |
} FindKey; |
276 |
|
277 |
|
278 |
typedef struct { |
279 |
unsigned key; |
280 |
unsigned cmp; |
281 |
unsigned fget; |
282 |
unsigned lget; |
283 |
} Stat; |
284 |
|
285 |
/* buffer to print a value to */ |
286 |
typedef char PrVal[64]; |
287 |
|
288 |
|
289 |
/* ************************************************************ |
290 |
private data |
291 |
*/ |
292 |
|
293 |
static Stat stat; |
294 |
|
295 |
|
296 |
/* ************************************************************ |
297 |
private functions |
298 |
*/ |
299 |
|
300 |
/* bail out */ |
301 |
#define DONE( args ) do { ret = eRr args; goto done; } while (0) |
302 |
|
303 |
#ifdef CPU_NEED_ALIGN |
304 |
static unsigned GETINT ( void *m ) /* bus error safe version of *(int*) */ |
305 |
{ |
306 |
unsigned l; |
307 |
memcpy( &l, m, 4 ); |
308 |
return l; |
309 |
} |
310 |
static void SETINT ( void *m, unsigned i ) |
311 |
{ |
312 |
memcpy( m, &i, 4 ); |
313 |
} |
314 |
#else |
315 |
#define GETINT( m ) (*(unsigned*)(m)) |
316 |
#define SETINT( m, i ) (*(unsigned*)(m) = (i)) |
317 |
#endif |
318 |
|
319 |
void qMkVal ( Val *v, Ptr *p, unsigned char typ ) |
320 |
{ |
321 |
unsigned char *c = v->byt; |
322 |
/* TODO: may macros to address the bytes in the ints would be faster */ |
323 |
if ( !(QDX_FULTXT & typ) ) /* leading tag */ |
324 |
switch ( QDX_TAGMSK&typ ) { |
325 |
case QDX_TAG2: *c++ = (unsigned char) (p->tag >> 8); |
326 |
case QDX_TAG1: *c++ = (unsigned char) p->tag; |
327 |
} |
328 |
switch ( QDX_RIDMSK&typ ) { |
329 |
case QDX_RID6: *c++ = (unsigned char) (p->ext >> 8); |
330 |
case QDX_RID5: *c++ = (unsigned char) p->ext; |
331 |
case QDX_RID4: *c++ = (unsigned char) (p->rid >> 24); |
332 |
case QDX_RID3: |
333 |
*c++ = (unsigned char) (p->rid >> 16); |
334 |
*c++ = (unsigned char) (p->rid >> 8); |
335 |
*c++ = (unsigned char) p->rid; |
336 |
} |
337 |
if ( QDX_FULTXT & typ ) |
338 |
switch ( QDX_TAGMSK&typ ) { |
339 |
case QDX_TAG2: *c++ = (unsigned char) (p->tag >> 8); |
340 |
case QDX_TAG1: *c++ = (unsigned char) p->tag; |
341 |
} |
342 |
switch ( QDX_POSMSK&typ ) { |
343 |
case 4: *c++ = (unsigned char) (p->pos >> 24); |
344 |
case 3: *c++ = (unsigned char) (p->pos >> 16); |
345 |
case 2: *c++ = (unsigned char) (p->pos >> 8); |
346 |
case 1: *c++ = (unsigned char) p->pos; |
347 |
} |
348 |
v->len = c - v->byt; |
349 |
} /* qMkVal */ |
350 |
|
351 |
|
352 |
int qRdKey ( Qdx *qdx, char *plain, int l, Key *key ) |
353 |
{ |
354 |
if (!qdx->cdx) { |
355 |
if ( l > key->len ) l = key->len; |
356 |
memcpy(plain, key->byt, l); |
357 |
} else |
358 |
l = cDec(qdx->cdx, (unsigned char*)plain, l, key); |
359 |
return l; |
360 |
} /* qRdKey */ |
361 |
|
362 |
|
363 |
void qMkKey ( Qdx *qdx, Key *key, char *b, int l ) |
364 |
{ |
365 |
if (!qdx->cdx) { |
366 |
if ( l > qdx->ksz ) l = qdx->ksz; |
367 |
memcpy(key->byt, b, key->len = l); |
368 |
} else { |
369 |
key->len = qdx->ksz; |
370 |
cEnc(qdx->cdx, key, (unsigned char*)b, l, 0); |
371 |
} |
372 |
} /* qMkKey */ |
373 |
|
374 |
|
375 |
int qMkKeyVal ( Qdx *qdx, Key *key, char *b, int l ) |
376 |
{ |
377 |
Ptr p = {0,0,0,0}; |
378 |
char *t = memchr(b, '\t', l); |
379 |
if ( !t ) |
380 |
return -1; |
381 |
qMkKey(qdx, key, b, t - b); |
382 |
if ( 0 >= (l -= t-b+1) || !(t = memchr( b=t+1, '\t', l )) ) |
383 |
return -1; |
384 |
p.rid = (unsigned) a2i( b, t-b ); |
385 |
if ( 0 >= (l -= t-b+1) || !(t = memchr( b=t+1, '\t', l )) ) |
386 |
return -1; |
387 |
p.tag = (unsigned short) a2i( b, t-b ); |
388 |
if ( 0 >= (l -= t-b+1) || !(t = memchr( b=t+1, '\t', l )) ) |
389 |
return -1; |
390 |
p.pos = (unsigned) a2i( b, t-b ); |
391 |
if ( 0 < (l -= t-b+1) ) |
392 |
p.pos = p.pos << 16 | (unsigned short) a2i( t+1, l ); |
393 |
eRr(LOG_TRACE, "%'%b %d %d 0x%4x", |
394 |
key->len, key->byt, p.rid, p.tag, p.pos ); |
395 |
qMkVal( &key->val, &p, qdx->ptr ); |
396 |
return 0; |
397 |
} /* qMkKeyVal */ |
398 |
|
399 |
|
400 |
void qRdVal ( Ptr *p, const unsigned char *c, unsigned char typ ) |
401 |
{ |
402 |
p->tag = p->ext = p->rid = p->pos = 0; |
403 |
if ( !(QDX_FULTXT & typ) ) /* leading tag */ |
404 |
switch ( QDX_TAGMSK&typ ) { |
405 |
case QDX_TAG2: p->tag = (unsigned short)*c++ << 8; |
406 |
case QDX_TAG1: p->tag |= *c++; |
407 |
} |
408 |
switch ( QDX_RIDMSK&typ ) { |
409 |
case QDX_RID6: p->ext = (unsigned short)*c++ << 8; |
410 |
case QDX_RID5: p->ext |= *c++; |
411 |
case QDX_RID4: p->rid = (unsigned int)*c++ << 24; |
412 |
case QDX_RID3: |
413 |
p->rid |= (unsigned int)*c++ << 16; |
414 |
p->rid |= (unsigned int)*c++ << 8; |
415 |
p->rid |= *c++; |
416 |
} |
417 |
if ( QDX_FULTXT & typ ) |
418 |
switch ( QDX_TAGMSK&typ ) { |
419 |
case QDX_TAG2: p->tag = (unsigned short)*c++ << 8; |
420 |
case QDX_TAG1: p->tag |= *c++; |
421 |
} |
422 |
switch ( QDX_POSMSK&typ ) { |
423 |
case 4: p->pos = (unsigned int)*c++ << 24; |
424 |
case 3: p->pos |= (unsigned int)*c++ << 16; |
425 |
case 2: p->pos |= (unsigned short)*c++ << 8; |
426 |
case 1: p->pos |= *c++; |
427 |
} |
428 |
} /* qRdVal */ |
429 |
|
430 |
|
431 |
static char *prval ( PrVal buf, unsigned char *c, unsigned typ ) |
432 |
{ |
433 |
char *d = buf; |
434 |
Ptr p; |
435 |
if ( ! typ ) { |
436 |
buf[0] = '#'; |
437 |
buf[1] = 0; |
438 |
return buf; |
439 |
} |
440 |
qRdVal( &p, c, typ ); |
441 |
if ( p.ext ) { d += u2a( d, p.ext ); *d++ = '.'; } |
442 |
d += u2a( d, p.rid ); *d++ = '.'; |
443 |
d += u2a( d, p.tag ); *d++ = '.'; |
444 |
d += u2a( d, p.pos ); *d++ = '.'; |
445 |
*d = 0; |
446 |
return buf; |
447 |
} /* prval */ |
448 |
|
449 |
#define PRVAL( buf, v ) prval( buf, v.byt, dx->ptr ) |
450 |
|
451 |
#define QLOCK(n,l) (void)FLOCK(dx->mqd, n, l) |
452 |
#define QLOCKSH(n) (void)FLOCKSH(dx->mqd, n) |
453 |
#define QLOCKEX(n) (void)FLOCKEX(dx->mqd, n) |
454 |
#define QLOCKUN(n) (void)FLOCKUN(dx->mqd, n) |
455 |
#ifdef BUILD_SHMODE |
456 |
enum { |
457 |
LOCK_READ = FIL_BLOCK, |
458 |
LOCK_WRITE = FIL_BLOCK|FIL_WR |
459 |
}; |
460 |
#endif |
461 |
|
462 |
|
463 |
/* ************************************************************ |
464 |
block I/O |
465 |
*/ |
466 |
|
467 |
|
468 |
static int chkblk ( Block *b, Qdx *dx ) |
469 |
{ |
470 |
int i; |
471 |
unsigned short eod = DICTOFF + b->ent*sizeof(Dict), siz, pos, vln, key; |
472 |
unsigned char *base = (unsigned char*)b; |
473 |
unsigned len = b->lev ? dx->fln : dx->lln; |
474 |
unsigned bsz = b->lev ? env.psz : dx->lsz; |
475 |
unsigned end = bsz, tot = 0; |
476 |
if ( b->num > len || (b->num && b->nxt > len) ) |
477 |
return eRr(ERR_TRASH, "bad num %d / nxt %d on lev %d", |
478 |
b->num, b->nxt, b->lev ); |
479 |
if ( b->typ != (b->lev ? dx->ftp : dx->typ) |
480 |
|| b->ksz != dx->ksz || b->ptr != (b->lev?dx->vsz:dx->ptr) |
481 |
) |
482 |
return eRr(ERR_TRASH, "bad sig 0x%02x,%d,%d in block %d want 0x%02x,%d,%d", |
483 |
b->typ, b->ksz, b->ptr, b->num, |
484 |
b->lev ? dx->ftp : dx->typ, dx->ksz, (b->lev?dx->vsz:dx->ptr) ); |
485 |
if ( b->ent > bsz / QENTSIZE || b->stk > bsz || b->stk < eod ) |
486 |
return eRr(ERR_TRASH, "bad ent/stk %d,%d in block %d eod %d sz %d", |
487 |
b->ent, b->stk, b->num, eod, bsz ); |
488 |
for ( i=0; i<b->ent; i++ ) { |
489 |
Dict *d = &b->dct[i]; |
490 |
if ( b->lev ) { |
491 |
pos = d->pos; vln = d->vln; |
492 |
siz = vln ? vln*(dx->vsz+4) : 4; |
493 |
} else { |
494 |
pos = LEAFPOS(d); vln = LEAFVLN(d); |
495 |
siz = vln*dx->vsz; |
496 |
} |
497 |
siz += key = d->kln; |
498 |
if ( pos < b->stk || end != (unsigned)pos + siz ) |
499 |
return eRr(ERR_TRASH, |
500 |
"bad pos/vln/key %d/%d/%d in entry %d of block %d" |
501 |
" stack %d sz %d end %d expected %d", |
502 |
pos, vln, key, i, b->num, b->stk, siz, pos+siz, end ); |
503 |
if ( b->lev ) { |
504 |
unsigned lnk = GETINT( base + pos + key + vln*dx->vsz ); |
505 |
if ( lnk >= (1==b->lev ? dx->lln : dx->fln) ) |
506 |
return eRr(ERR_TRASH, |
507 |
"bad link %u in entry %d of block %d", |
508 |
lnk, i, b->num ); |
509 |
} |
510 |
#ifndef NDEBUG |
511 |
if ( i ) { /* not on first */ |
512 |
unsigned short op = b->lev ? d[-1].pos : LEAFPOS(d-1), okl = d[-1].kln; |
513 |
int mc = memcmp( base+pos, base+op, key<okl ? key : okl ), j; |
514 |
if ( 0 < mc || (!mc && 0 < (mc = (int)key - (int)okl)) ) |
515 |
goto ok; |
516 |
j = 0; |
517 |
if ( !mc && b->lev && vln ) { /* compare values */ |
518 |
unsigned ovln = d[-1].vln; |
519 |
if ( ! ovln || 0 < (j = memcmp( base+pos+key, base+op+okl, dx->vsz )) ) |
520 |
goto ok; |
521 |
} |
522 |
return eRr(ERR_TRASH, |
523 |
"bad key order entry %d of block %d lev %d" |
524 |
" mc %d j %d key %d okl %d vln %d", |
525 |
i, b->num, b->lev, mc, j, key, okl, vln |
526 |
); |
527 |
} |
528 |
ok: |
529 |
#endif |
530 |
tot += siz; |
531 |
end = pos; |
532 |
} /* for i (entries) */ |
533 |
if ( tot > bsz - b->stk ) |
534 |
return eRr(ERR_TRASH, |
535 |
"bad total entry size %d in block %d stack %d", |
536 |
tot, b->num, b->stk ); |
537 |
#ifndef NDEBUG |
538 |
if ( LOG_TRACE >= env.log ) { |
539 |
eRr(LOG_TRACE, |
540 |
"block %d lev %d sig 0x%02x,%d ent %d eod %d stk %d", |
541 |
b->num, b->lev, b->typ, b->ksz, b->ent, eod, b->stk |
542 |
); |
543 |
for ( i=0; i<2; i++ ) { |
544 |
int j = i ? b->ent-1 : 0; |
545 |
Dict *d = &b->dct[j]; |
546 |
if ( i >= b->ent ) /* if b->ent is 0 or 1, log no or one key */ |
547 |
break; |
548 |
if ( b->lev ) { |
549 |
pos = d->pos; vln = d->vln; |
550 |
} else { |
551 |
pos = LEAFPOS(d); vln = LEAFVLN(d); |
552 |
} |
553 |
key = d->kln; |
554 |
/* start of value */ |
555 |
if ( b->lev ) |
556 |
eRr(LOG_TRACE, "key %d '%b'/%d -> %d", |
557 |
j, key, base+pos, vln, GETINT(base+pos+key+(vln?4:0)) ); |
558 |
else { |
559 |
Ptr p; |
560 |
qRdVal( &p, base+pos+key, dx->ptr ); |
561 |
eRr(LOG_TRACE, "key %d '%b'[%d] = %d[%d,%d]", |
562 |
j, key, base+pos, vln, p.rid, p.tag, p.pos ); |
563 |
} |
564 |
} |
565 |
} |
566 |
#endif |
567 |
return 0; |
568 |
} /* chkblk */ |
569 |
|
570 |
|
571 |
/* |
572 |
init a new leaf in the local buffer b (usually a stack based Leaf). |
573 |
increment dx's lln |
574 |
*/ |
575 |
static void newleaf ( Block *b, Qdx *dx ) |
576 |
{ |
577 |
memset(b, 0, dx->lsz); |
578 |
b->num = dx->lln++; |
579 |
b->typ = dx->typ; |
580 |
b->ksz = dx->ksz; |
581 |
b->ptr = dx->ptr; |
582 |
b->lev = 0; |
583 |
b->stk = dx->lsz; |
584 |
} /* newleaf */ |
585 |
|
586 |
|
587 |
/* |
588 |
init a new fork in the local buffer b (usually a stack based Fork). |
589 |
increment dx's fln |
590 |
TODO: for qSet, use variant checking for free list at root->nxt. |
591 |
*/ |
592 |
static Block *newfork ( Block *b, Qdx *dx, unsigned char lev ) |
593 |
{ |
594 |
memset(b, 0, env.psz); |
595 |
b->num = dx->fln++; |
596 |
b->typ = dx->ftp; |
597 |
b->ksz = dx->ksz; |
598 |
b->ptr = dx->vsz; |
599 |
b->lev = lev; |
600 |
b->stk = env.psz; |
601 |
#if 16 == CPU_PAGE_SHIFT /* possible short overflow on ia64 */ |
602 |
if ( !b->stk ) b->stk--; |
603 |
#endif |
604 |
return b; |
605 |
} /* newfork */ |
606 |
|
607 |
|
608 |
#ifdef CPU_BIG_ENDIAN |
609 |
static unsigned rvi ( unsigned i ) { |
610 |
unsigned r; |
611 |
((char*)&r)[0] = ((char*)&i)[3]; |
612 |
((char*)&r)[1] = ((char*)&i)[2]; |
613 |
((char*)&r)[2] = ((char*)&i)[1]; |
614 |
((char*)&r)[3] = ((char*)&i)[0]; |
615 |
return r; |
616 |
} |
617 |
static unsigned short rvs ( unsigned short i ) { |
618 |
unsigned short r; |
619 |
((char*)&r)[0] = ((char*)&i)[1]; |
620 |
((char*)&r)[1] = ((char*)&i)[0]; |
621 |
return r; |
622 |
} |
623 |
#define SWI( i ) b->i = rvi( b->i ) |
624 |
#define SWS( s ) b->s = rvs( b->s ) |
625 |
static void swapleaf ( Block *b ) /* leaves are little endian */ |
626 |
{ |
627 |
SWI(num); |
628 |
SWI(nxt); |
629 |
SWS(ent); |
630 |
SWS(stk); |
631 |
} |
632 |
#else |
633 |
# define swapleaf(b) |
634 |
#endif |
635 |
|
636 |
/* |
637 |
writing a leaf will render the leaf scrambled on big endians |
638 |
*/ |
639 |
static int wrtleaf ( Block *b, Qdx *dx ) |
640 |
{ |
641 |
unsigned num = b->num; |
642 |
int got; |
643 |
if ( chkblk( b, dx ) ) /* don't write no shize */ |
644 |
return eRr(ERR_TRASH, "\tbefore writing dx block %d", b->num ); |
645 |
swapleaf(b); /* a leaf written is a leaf lost */ |
646 |
got = fPwrite(&dx->mqd, b, dx->lsz, num*dx->lsz ); |
647 |
eRr(LOG_TRACE, "writing leaf %u got %u", num, got ); |
648 |
if ( dx->lsz != (unsigned)got ) |
649 |
return eRr(LOG_SYSERR, "\twhen writing dx block %d", b->num ); |
650 |
return 0; |
651 |
} /* wrtleaf */ |
652 |
|
653 |
|
654 |
/* |
655 |
writing a fork may invalidate all mapped forks due to remap |
656 |
*/ |
657 |
static int wrtfork ( Block *b, Qdx *dx ) |
658 |
{ |
659 |
int got; |
660 |
if ( chkblk( b, dx ) ) /* don't write no shize */ |
661 |
return eRr(ERR_TRASH, "\tbefore writing dx block %d", b->num ); |
662 |
if ( dx->mqx.npg ) { /* have mmap -- not during qload */ |
663 |
if ( b->num < dx->mqx.npg ) { /* should be in map */ |
664 |
if ( (char*)b != dx->mqx.map + (b->num<<env.psh) ) |
665 |
return eRr(ERR_TRASH, "fork %d should be mapped", b->num); |
666 |
if ( ENV_MSYNC & env.flg ) fMSync(&dx->mqx, b->num); |
667 |
return 0; |
668 |
} |
669 |
if ( b->num < dx->mqx.lim /* try to extend mapping */ |
670 |
&& 0 < (got = fMap(&dx->mqx, b->num+1)) |
671 |
&& b->num < (unsigned)got |
672 |
) { |
673 |
eRr(LOG_VERBOSE, "remapped to %d at %8x", dx->mqx.npg, dx->mqx.map); |
674 |
memcpy(dx->mqx.map + (b->num<<env.psh), b, env.psz); |
675 |
fMSync(&dx->mqx, b->num); |
676 |
return 0; |
677 |
} |
678 |
eRr(LOG_WARN, "remap to %d failed", b->num+1); |
679 |
/* else write to file */ |
680 |
} |
681 |
got = fPwrite(&dx->mqx.fil, b, env.psz, b->num*env.psz ); |
682 |
eRr(LOG_TRACE, "writing fork %u got %u", b->num, got ); |
683 |
if ( env.psz != (unsigned)got ) |
684 |
return eRr(LOG_SYSERR, "\twhen writing dx block %d", b->num ); |
685 |
return 0; |
686 |
} /* wrtfork */ |
687 |
|
688 |
|
689 |
|
690 |
static int chkread ( Block *b, Qdx *dx, |
691 |
int got, unsigned bsz, unsigned num, unsigned char lev ) |
692 |
{ |
693 |
if ( 0 > got || bsz != (unsigned)got ) |
694 |
return eRr(LOG_SYSERR, "\twhen reading dx block %d", num ); |
695 |
if ( b->num != num ) |
696 |
return eRr(ERR_TRASH, "bad num %d in block %d", b->num, num ); |
697 |
if ( lev && b->lev != lev ) |
698 |
return eRr(ERR_TRASH, "bad lev %d in block %d want %d", |
699 |
b->lev, b->num, lev ); |
700 |
return chkblk( b, dx ); |
701 |
} /* chkread */ |
702 |
|
703 |
|
704 |
/* read a leaf into the provided local buffer */ |
705 |
static Block *getleaf ( Block *b, Qdx *dx, unsigned num |
706 |
#ifdef BUILD_SHMODE |
707 |
, int lck |
708 |
# define GETLEAF(leaf, dx, num, lck) getleaf(leaf, dx, num, lck) |
709 |
#else |
710 |
# define GETLEAF(leaf, dx, num, lck) getleaf(leaf, dx, num) |
711 |
#endif |
712 |
) |
713 |
{ |
714 |
int got; |
715 |
#ifdef BUILD_SHMODE |
716 |
if ( lck ) QLOCK(num<<1, lck); |
717 |
#endif |
718 |
got = fPread(&dx->mqd, b, dx->lsz, num*dx->lsz); |
719 |
stat.lget++; |
720 |
#ifdef CPU_BIG_ENDIAN |
721 |
if ( 0 < got && dx->lsz == (unsigned)got ) swapleaf(b); |
722 |
#endif |
723 |
if ( chkread(b, dx, got, dx->lsz, num, 0) ) |
724 |
b = 0; |
725 |
#ifdef BUILD_SHMODE |
726 |
if ( LOCK_READ == lck ) QLOCKUN(num<<1); |
727 |
#endif |
728 |
return b; |
729 |
} /* getleaf */ |
730 |
|
731 |
|
732 |
/* pick a fork from cache or read into the provided local buffer */ |
733 |
static Block *getfork ( Block *b, Qdx *dx, unsigned num, unsigned char lev ) |
734 |
{ |
735 |
int got = env.psz; |
736 |
stat.fget++; |
737 |
if ( num < dx->mqx.npg ) |
738 |
b = (Block*)(dx->mqx.map + num*env.psz); |
739 |
else |
740 |
got = fPread(&dx->mqx.fil, b, env.psz, num*env.psz); |
741 |
return chkread(b, dx, got, env.psz, num, lev) ? 0 : b; |
742 |
} /* getfork */ |
743 |
|
744 |
|
745 |
|
746 |
/** binsearch position for key in a leaf. |
747 |
position is either the index of the matching entry (0..b->ent-1) |
748 |
or the index (0..b->ent) where key should be inserted. |
749 |
@return position for key (0..b->ent), |yfied with QMATCH, if found |
750 |
*/ |
751 |
static unsigned short keyleaf ( Block *b, Key *key ) |
752 |
{ |
753 |
unsigned char *base = (unsigned char*)b; |
754 |
unsigned char *kbt = key->byt; |
755 |
unsigned char kln = key->len; |
756 |
unsigned short l = 0, r = b->ent; |
757 |
Dict *d = b->dct; |
758 |
/* binsearch invariant: |
759 |
key is < all entries i >= r (we do know relation at r) |
760 |
key is > all entries i < l (don't know relation at l) |
761 |
*/ |
762 |
while ( l < r ) { /* invariant: l <= keypos < r */ |
763 |
unsigned short i = (l + r) >> 1; /* l <= i < r */ |
764 |
unsigned short pos = LEAFPOS(&d[i]); |
765 |
unsigned char cln = d[i].kln; |
766 |
int cmp = memcmp( kbt, base+pos, kln<cln ? kln : cln ); |
767 |
stat.cmp++; /* dirty */ |
768 |
if ( ! cmp && !(cmp = (int)kln - cln) ) |
769 |
return QMATCH | i; |
770 |
if ( cmp > 0 ) |
771 |
l = i+1; |
772 |
else |
773 |
r = i; |
774 |
} |
775 |
return l; /* == r */ |
776 |
} /* keyleaf */ |
777 |
|
778 |
/* same for forks. |
779 |
somewhat more complex since we might have duplicate keys |
780 |
with different values. |
781 |
Must not use the _POS/_VLN macros. |
782 |
*/ |
783 |
static unsigned short keyfork ( Block *b, Key *key, unsigned char vsz ) |
784 |
{ |
785 |
unsigned char *base = (unsigned char*)b; |
786 |
unsigned char *kbt = key->byt; |
787 |
unsigned char kln = key->len; |
788 |
unsigned short l = 0, r = b->ent; |
789 |
Dict *d = b->dct; |
790 |
/* binsearch invariant: |
791 |
key is < all entries i >= r (we do know relation at r) |
792 |
key is > all entries i < l (don't know relation at l) |
793 |
*/ |
794 |
while ( l < r ) { /* invariant: l <= keypos < r */ |
795 |
unsigned short i = (l + r) >> 1; /* l <= i < r */ |
796 |
unsigned short pos = d[i].pos; |
797 |
unsigned char cln = d[i].kln; |
798 |
int cmp = memcmp( kbt, base+pos, kln<cln ? kln : cln ); |
799 |
stat.cmp++; /* dirty */ |
800 |
if ( ! cmp && !(cmp = (int)kln - cln) ) { /* key match -- cmp value */ |
801 |
if ( ! d[i].vln ) { /* common case */ |
802 |
if ( !key->val.len ) /* no value */ |
803 |
return QMATCH | i; |
804 |
cmp = 1; |
805 |
} else if ( ! key->val.len ) |
806 |
cmp = -1; |
807 |
else if ( !(cmp = memcmp( key->val.byt, base+pos+cln, |
808 |
key->val.len < vsz ? key->val.len : vsz)) |
809 |
&& !(cmp = (int) key->val.len - vsz) |
810 |
) |
811 |
return QMATCH | i; |
812 |
} |
813 |
if ( cmp > 0 ) |
814 |
l = i+1; |
815 |
else |
816 |
r = i; |
817 |
} |
818 |
return l; /* == r */ |
819 |
} /* keyfork */ |
820 |
|
821 |
|
822 |
/** find position for (leaf) val. |
823 |
search v in array b of n values of size sz |
824 |
@return position for value (0..n), |yfied with QMATCH, if found |
825 |
*/ |
826 |
static unsigned short valpos ( unsigned char *b, |
827 |
unsigned short n, unsigned char sz, unsigned char *v ) |
828 |
{ |
829 |
unsigned short l = 0, r = n; |
830 |
/* binsearch invariant: |
831 |
v is < all values i >= r (we do know relation at r) |
832 |
v is > all values i < l (don't know relation at l) |
833 |
*/ |
834 |
while ( l < r ) { /* invariant: l <= valpos < r */ |
835 |
unsigned short i = (l + r) >> 1; /* l <= i < r */ |
836 |
int cmp = memcmp( v, b+i*sz, sz ); |
837 |
if ( ! cmp ) |
838 |
return QMATCH | i; |
839 |
if ( cmp > 0 ) |
840 |
l = i+1; |
841 |
else |
842 |
r = i; |
843 |
} |
844 |
return l; /* == r */ |
845 |
} /* valpos */ |
846 |
|
847 |
|
848 |
#define TRACEKEY( l, f ) (void)(f) |
849 |
|
850 |
|
851 |
/* |
852 |
find and fetch key's leaf into the provided buffer. |
853 |
*/ |
854 |
static int findkey ( Block *leaf, FindKey *f |
855 |
#ifdef BUILD_SHMODE |
856 |
, int lck |
857 |
# define FINDKEY(leaf, f, lck) findkey(leaf, f, lck) |
858 |
#else |
859 |
# define FINDKEY(leaf, f, lck) findkey(leaf, f) |
860 |
#endif |
861 |
) |
862 |
{ |
863 |
#ifdef BUILD_SHMODE |
864 |
Qdx *dx = f->dx; /* for the lock macros */ |
865 |
#endif |
866 |
Fork fork[2]; /* buffer */ |
867 |
Leaf lbuf; |
868 |
int i, ret=-1; |
869 |
unsigned char *pkey = 0, pkln = 0, pvln = 0; |
870 |
Block *b; |
871 |
|
872 |
QLOCKSH(1); |
873 |
b = getfork(&fork[0].b, f->dx, 0, 0); |
874 |
if ( !b ) |
875 |
DONE((ERR_TRASH, "no root")); |
876 |
if ( b->lev >= QLEVELS ) |
877 |
DONE((ERR_TRASH, "tree is too deep: %u", b->lev)); |
878 |
stat.key++; |
879 |
for ( i=QLEVELS; i--; ) /* paranoia */ |
880 |
f->n[i] = f->e[i] = 0; |
881 |
for ( i=b->lev;; ) { |
882 |
unsigned short e; |
883 |
unsigned n; |
884 |
Dict *d; |
885 |
int tog = 0; |
886 |
|
887 |
f->n[i] = b->num; |
888 |
e = i ? keyfork(b, f->key, f->dx->vsz) : keyleaf(b, f->key); |
889 |
LOG_DBG( LOG_DEBUG, "keypos %4x", e ); |
890 |
/* |
891 |
here goes the Lehmann/Yao race detection: |
892 |
If we ran on right boundary, follow right link |
893 |
*/ |
894 |
while ( !(QMATCH & e) && b->ent == e && b->nxt && i < f->dx->dpt ) { |
895 |
Block *s; |
896 |
unsigned se; |
897 |
/* peek next block, toggling buffers */ |
898 |
if ( i ) { |
899 |
/* TODO: actually we should never see this in forks |
900 |
as long as we lock the complete fork file */ |
901 |
s = getfork(&fork[(++tog)&1].b, f->dx, b->nxt, i); |
902 |
se = keyfork(s, f->key, f->dx->vsz); |
903 |
} else { |
904 |
s = GETLEAF((++tog)&1?&lbuf.b:leaf, f->dx, b->nxt, lck); |
905 |
se = keyleaf(s, f->key); |
906 |
#ifdef BUILD_SHMODE |
907 |
if ( LOCK_WRITE == lck ) QLOCKUN(se ? b->num : s->num); |
908 |
#endif |
909 |
} |
910 |
if ( !se ) /* lower than first */ |
911 |
break; |
912 |
LOG_DBG( LOG_DEBUG, "LYao %d -> %d lev %d for '%b' got 0x%4x", |
913 |
b->num, s->num, i, f->key->len, f->key->byt, se ); |
914 |
/* trade b for s */ |
915 |
f->n[i] = (b = s)->num; |
916 |
e = se; |
917 |
} |
918 |
f->e[i] = e; |
919 |
if ( ! i ) { |
920 |
if ( b != leaf ) |
921 |
memcpy(leaf, b, f->dx->lsz); |
922 |
return 0; /* keep the lock */ |
923 |
} |
924 |
if ( !(QMATCH & e) ) { |
925 |
if ( e ) |
926 |
f->e[i] = --e; |
927 |
else { |
928 |
TRACEKEY( ERR_TRASH, f ); |
929 |
return eRr(ERR_IDIOT, |
930 |
"ran on left bound of block %u ent %u", |
931 |
b->num, b->ent ); |
932 |
} |
933 |
} |
934 |
d = b->dct + (QENTMASK & e); |
935 |
/* direct access ok in fork */ |
936 |
pkey = ((unsigned char*)b)+d->pos; |
937 |
pkln = d->kln; |
938 |
pvln = d->vln ? f->dx->vsz : 0; |
939 |
n = GETINT( pkey+pkln+pvln ); |
940 |
if ( --i ) |
941 |
b = getfork(&fork[0].b, f->dx, n, i); |
942 |
else { |
943 |
QLOCKUN(1); |
944 |
b = GETLEAF(leaf, f->dx, n, lck); /* read 1st leaf */ |
945 |
/* continue only to do the LY */ |
946 |
} |
947 |
} |
948 |
done: |
949 |
QLOCKUN(1); |
950 |
return ret; /* go away */ |
951 |
} /* findkey */ |
952 |
|
953 |
|
954 |
|
955 |
|
956 |
|
957 |
/* ************************************************************ |
958 |
public functions |
959 |
*/ |
960 |
|
961 |
int qRemake ( Qdx *dx ) |
962 |
{ |
963 |
Key last; |
964 |
Bloke child; |
965 |
Fork fork; |
966 |
unsigned char lev = 0; /* the current child level to be indexed */ |
967 |
unsigned left = 0; /* block on child level */ |
968 |
unsigned iblk = 0; |
969 |
unsigned ikey = 0; |
970 |
unsigned ikeyl = 0; |
971 |
unsigned ipfx = 0; |
972 |
unsigned ispn = 0; |
973 |
|
974 |
fTrunc(&dx->mqx.fil, 0); |
975 |
fMap(&dx->mqx, 0); |
976 |
dx->fln = 1; /* pretend we had a root */ |
977 |
/* build tree */ |
978 |
for (dx->fln = 1;;lev++) { /* levels */ |
979 |
unsigned len = 0; /* current is n'th block on level */ |
980 |
Block *b = 0; |
981 |
unsigned char *base = 0; /* == b */ |
982 |
unsigned have = 0; /* room in current block b */ |
983 |
Dict *d = 0; /* of current block b */ |
984 |
unsigned next = left; /* child */ |
985 |
|
986 |
left = dx->fln; |
987 |
/* get child c and create child pointer in block b */ |
988 |
do { |
989 |
unsigned char vln = 0; /* actual byte length: dx->vsz or 0 */ |
990 |
unsigned char *key; |
991 |
unsigned char kln; |
992 |
unsigned need; |
993 |
Block *c = lev |
994 |
? getfork(&child.b, dx, next, lev) |
995 |
: GETLEAF(&child.b, dx, next, 0);/* must have full ex lock */ |
996 |
if ( ! c ) |
997 |
return eRr(ERR_NOMEM, "no child block" ); |
998 |
next = c->nxt; |
999 |
if ( ! c->ent ) /* OOPS empty node */ |
1000 |
continue; |
1001 |
key = GETENT(c, 0); |
1002 |
kln = c->dct[0].kln; |
1003 |
if ( ! b ) /* the leftmost child on this level */ |
1004 |
kln = 0; /* always 0 key on left */ |
1005 |
else if ( lev ) /* the child is a fork -- have to carry vals ... */ |
1006 |
vln = c->dct[0].vln ? dx->vsz : 0; /* ... if the child starts with */ |
1007 |
else if ( last.len == kln /* compare key to last of prev. ... */ |
1008 |
&& !memcmp( last.byt, key, kln ) /* ... which was saved in last */ |
1009 |
) { /* key spans blocks */ |
1010 |
LOG_DBG( LOG_DEBUG, "found span key '%b' child %u", |
1011 |
kln, key, c->num ); |
1012 |
vln = dx->vsz; /* qualify with value */ |
1013 |
} else { |
1014 |
unsigned char x = 0; |
1015 |
/* find prefix key */ |
1016 |
if ( kln > last.len ) |
1017 |
kln = last.len; |
1018 |
while ( x < kln && key[x] == last.byt[x] ) |
1019 |
x++; |
1020 |
if ( x < kln ) /* they differ at [x] */ |
1021 |
kln = x+1; |
1022 |
else /* equal on the shorter length */ |
1023 |
kln++; |
1024 |
if ( kln > c->dct[0].kln ) |
1025 |
return eRr(ERR_TRASH, |
1026 |
"bad key '%b' on level %u matching previous of length %u", |
1027 |
c->dct[0].kln, key, lev, last.len ); |
1028 |
LOG_DBG( LOG_DEBUG, "prefix on key '%b' child %u save %u", |
1029 |
kln, key, c->num, c->dct[0].kln - kln ); |
1030 |
ipfx += c->dct[0].kln - kln; |
1031 |
} |
1032 |
if ( vln ) |
1033 |
ispn++; |
1034 |
ikeyl += kln; |
1035 |
need = sizeof(Dict) + kln + vln + 4; |
1036 |
if ( need > have ) { |
1037 |
if ( b ) { |
1038 |
eRr(LOG_TRACE, "level %u block %u is full %u ent stack %u", |
1039 |
lev, b->num, b->ent, b->stk ); |
1040 |
b->nxt = dx->fln; /* link */ |
1041 |
if ( wrtfork(b, dx) ) |
1042 |
goto nowrite; |
1043 |
len++; |
1044 |
} |
1045 |
b = newfork(&fork.b, dx, lev+1); |
1046 |
base = (unsigned char*)b; |
1047 |
have = env.psz - DICTOFF; /* - ql->let; TODO: should we ??? */ |
1048 |
d = b->dct; |
1049 |
len++; |
1050 |
iblk++; |
1051 |
} |
1052 |
SETINT( base+(b->stk -= 4), c->num ); |
1053 |
d->kln = kln; |
1054 |
d->vln = vln ? 1 : 0; |
1055 |
if ( kln += vln ) /* key + consecutive qualifying value */ |
1056 |
memcpy( base+(b->stk -= kln), key, kln ); |
1057 |
d->pos = b->stk; |
1058 |
d++; |
1059 |
b->ent++; |
1060 |
ikey++; |
1061 |
have -= need; |
1062 |
/* save the last dance for me */ |
1063 |
memcpy(last.byt, GETENT(c, c->ent-1), last.len = c->dct[c->ent-1].kln); |
1064 |
} while (next); |
1065 |
|
1066 |
if ( !len ) |
1067 |
return eRr(ERR_TRASH, "level %u was empty", lev ); |
1068 |
/* close level */ |
1069 |
eRr(LOG_INFO, "closing level %u with %u blocks %u..%u", |
1070 |
lev, len, left, b->num ); |
1071 |
if ( 1 == len ) |
1072 |
b->num = 0; /* make it the root */ |
1073 |
if ( wrtfork(b, dx) ) |
1074 |
goto nowrite; |
1075 |
if ( 1 == len ) { |
1076 |
dx->dpt = b->lev; |
1077 |
break; |
1078 |
} |
1079 |
} |
1080 |
#define PM(a,b) (unsigned)(a*LULU(1000)/b) /* per mille */ |
1081 |
if (ikey && ikeyl) /* divisors */ |
1082 |
eRr(LOG_INFO, |
1083 |
"\tused %u fork blocks %u keys %u bytes avg %d\n" |
1084 |
"\t%u #spans (%d/oo) %u pfx (- %d/oo)", |
1085 |
iblk, ikey, ikeyl, ikeyl/ikey, |
1086 |
ispn, PM(ispn*dx->vsz,ikeyl), ipfx, PM(ipfx,(ipfx+ikeyl)) ); |
1087 |
dx->fln--; /* counted one over */ |
1088 |
fMap(&dx->mqx, dx->fln); |
1089 |
|
1090 |
return 0; |
1091 |
nowrite: |
1092 |
dx->fln = 0; |
1093 |
return eRr(LOG_SYSERR, "\twhen writing tree block" ); |
1094 |
} /* qRemake */ |
1095 |
|
1096 |
|
1097 |
|
1098 |
|
1099 |
static int load ( Qdx *dx, unsigned char *key ) |
1100 |
{ |
1101 |
int cmp = 0, keyl = 0, l; |
1102 |
QLoad *ql = dx->qld; |
1103 |
if (key) { |
1104 |
keyl = *key - dx->vsz; |
1105 |
l = keyl < ql->key.len ? keyl : ql->key.len; /* min */ |
1106 |
if ( !(cmp = memcmp(key+1, ql->key.byt, l)) ) |
1107 |
cmp = keyl - ql->key.len; |
1108 |
if (0 > cmp) |
1109 |
return eRr(LOG_ERROR, "qload key '%b' < last '%b'", |
1110 |
keyl, key+1, ql->key.len, ql->key.byt ); |
1111 |
} |
1112 |
/* flush ? */ |
1113 |
while ( !key /* end */ |
1114 |
|| QENTMAX == ql->got /* full */ |
1115 |
|| cmp /* next key */ |
1116 |
) { |
1117 |
Block *b = &ql->cur.b; |
1118 |
unsigned have = b->stk - DICTOFF - b->ent*sizeof(Dict) - ql->let; |
1119 |
unsigned need = sizeof(Dict) + ql->key.len; |
1120 |
unsigned want = need + ql->got * dx->vsz; |
1121 |
unsigned take = ql->got; |
1122 |
if ( have < want ) { /* you can't always get ... */ |
1123 |
if ( have <= need ) |
1124 |
take = 0; |
1125 |
else { |
1126 |
take = (have - need) / dx->vsz; |
1127 |
if ( 256 > take * dx->vsz ) |
1128 |
take = 0; |
1129 |
} |
1130 |
} |
1131 |
LOG_DBG( LOG_DEBUG, "qload '%b' got %u need %u want %u have %u take %u", |
1132 |
ql->key.len, ql->key.byt, ql->got, need, want, have, take ); |
1133 |
/* we have enough room in b for take entries */ |
1134 |
if ( take ) { |
1135 |
unsigned char *base = (unsigned char*)b; |
1136 |
unsigned char *s = base + b->stk; |
1137 |
int i = take; |
1138 |
Dict *d = &b->dct[ b->ent++ ]; |
1139 |
while ( i-- ) |
1140 |
memcpy( s -= dx->vsz, ql->val[i].byt, dx->vsz ); |
1141 |
memcpy( s -= ql->key.len, ql->key.byt, ql->key.len ); |
1142 |
b->stk = (unsigned short) (s - base); |
1143 |
LEAFSETPV( d, b->stk, take ); |
1144 |
d->kln = ql->key.len; |
1145 |
if ( ql->got > take ) |
1146 |
memcpy( ql->val, ql->val+take, sizeof(ql->val[0])*(ql->got - take) ); |
1147 |
ql->got -= take; |
1148 |
} |
1149 |
if ( ! ql->got ) |
1150 |
break; |
1151 |
if ( take ) /* key spans blocks */ |
1152 |
ql->span++; |
1153 |
ql->free += b->stk - DICTOFF - b->ent*sizeof(Dict); /* wasted */ |
1154 |
b->nxt = dx->lln; /* link */ |
1155 |
LOG_DBG( LOG_DEBUG, "qload: %u entries left, getting block %u", |
1156 |
ql->got, dx->lln ); |
1157 |
if ( wrtleaf(b, dx) ) |
1158 |
return eRr(LOG_SYSERR, "\twhen writing qload block" ); |
1159 |
/* so couldn't flush all, thus we need another block */ |
1160 |
eRr(LOG_TRACE, "new qload leaf %d", dx->lln); |
1161 |
newleaf(b = &ql->cur.b, dx); |
1162 |
/* will continue on end or keychange */ |
1163 |
LOG_DBG( LOG_TRACE, "new block %u has %u ent %u stk, will keep %u", |
1164 |
b->num, b->ent, b->stk, ql->let ); |
1165 |
} |
1166 |
if (cmp || !key) /* done with key: stats */ |
1167 |
if (ql->maxv < ql->tot) |
1168 |
ql->maxv = ql->tot; |
1169 |
if (key) { |
1170 |
if (cmp) { /* set new key */ |
1171 |
if (ql->got) |
1172 |
return eRr(ERR_IDIOT, "qload not empty on new key"); |
1173 |
ql->tot = 0; |
1174 |
memcpy(ql->key.byt, key+1, ql->key.len = keyl); |
1175 |
ql->keys++; |
1176 |
ql->keyl += keyl; |
1177 |
if (ql->maxl < (unsigned)keyl) |
1178 |
ql->maxl = keyl; |
1179 |
} |
1180 |
/* add new val */ |
1181 |
/* LOG_HEX( key->val.byt, dx->vsz ); */ |
1182 |
memcpy(ql->val[ ql->got++ ].byt, key+1+keyl, dx->vsz); |
1183 |
ql->vals++; |
1184 |
ql->tot++; |
1185 |
} else { |
1186 |
/* stats */ |
1187 |
unsigned size = dx->lln * dx->lsz; |
1188 |
unsigned sval = ql->vals * dx->vsz; |
1189 |
unsigned sove = dx->lln * DICTOFF + sizeof(Dict)*(ql->keys+ql->span); |
1190 |
|
1191 |
if (size && ql->keys) /* divisors */ |
1192 |
eRr(LOG_INFO, |
1193 |
"end qload:\n" |
1194 |
"\t#blks %u total len %u free %u (%d/oo) overhead %u (%d/oo)\n" |
1195 |
"\t#keys %u total len %u (%d/oo) avg %d max %u\n" |
1196 |
"\t#vals %u total len %u (%d/oo) avg %d max %u spans %u", |
1197 |
dx->lln, size, ql->free, PM(ql->free,size), sove, PM(sove,size), |
1198 |
ql->keys, ql->keyl, PM(ql->keyl,size), ql->keyl/ql->keys, ql->maxl, |
1199 |
ql->vals, sval, PM(sval,size), ql->vals/ql->keys, ql->maxv, ql->span |
1200 |
); |
1201 |
wrtleaf(&ql->cur.b, dx); |
1202 |
} |
1203 |
|
1204 |
return 0; |
1205 |
} /* load */ |
1206 |
|
1207 |
|
1208 |
static int ncomp; |
1209 |
#ifdef SORT_Q |
1210 |
static int compar (const void *pa, const void *pb) |
1211 |
{ |
1212 |
unsigned char *a = *(unsigned char **)pa; |
1213 |
unsigned char *b = *(unsigned char **)pb; |
1214 |
int l = *a, cmp; |
1215 |
if (l > *b) l = *b; |
1216 |
ncomp++; |
1217 |
return (cmp = memcmp(a+1, b+1, l)) ? cmp : (int)*a - (int)*b; |
1218 |
} |
1219 |
#endif |
1220 |
static OPT_INLINE int keyGt (unsigned char *a, unsigned char *b) |
1221 |
{ |
1222 |
int l = *a, cmp; |
1223 |
if (l > *b) l = *b; |
1224 |
ncomp++; |
1225 |
return 0 < (cmp = memcmp(a+1, b+1, l)) || (!cmp && *a > *b); |
1226 |
} |
1227 |
|
1228 |
|
1229 |
static int sort (Qdx *dx, unsigned char **key, int len) |
1230 |
{ |
1231 |
unsigned char rev[SORTKEYS]; |
1232 |
unsigned char *tmp; |
1233 |
int i, ret = 0; |
1234 |
lolo tm; |
1235 |
|
1236 |
tUpd(&tm); |
1237 |
if (2 > len) |
1238 |
goto cp; |
1239 |
#ifdef SORT_Q |
1240 |
qsort(key, len, sizeof key[0], compar); |
1241 |
#else |
1242 |
# define SORT_WH |
1243 |
/* basic weak-heap-sort (Dutton 93) just like rSort; c.f. uti.c |
1244 |
``On the Performance of WEAK-HEAPSORT´´ |
1245 |
http://www.informatik.uni-freiburg.de/~edelkamp/publications/Weakheap.pdf |
1246 |
for tuned versions see |
1247 |
http://www.jea.acm.org/2002/EdelkampHeapsort/ |
1248 |
and ``Pushing the Limits in Sequential Sorting´´ |
1249 |
http://www.informatik.uni-freiburg.de/~edelkamp/publications/Improvements.pdf |
1250 |
*/ |
1251 |
memset(rev, 0, len); |
1252 |
/* weak-heap-forestify |
1253 |
every node roots a weak heap AND is <= root |
1254 |
(especially the leftist independent roots are <= root) |
1255 |
*/ |
1256 |
for ( i=len; --i; ) { |
1257 |
int g = i; /* granma is 1st left i or root, if i is totally leftist ;) */ |
1258 |
while (!(1&g)) /* we're left child */ |
1259 |
g >>= 1; /* shift granma right :( */ |
1260 |
g >>= 1; |
1261 |
if (!keyGt(key[i], key[g])) /* merge: swap if g < i */ |
1262 |
continue; |
1263 |
tmp = key[i]; key[i] = key[g]; key[g] = tmp; /* swap */ |
1264 |
rev[i] = 1; |
1265 |
} |
1266 |
|
1267 |
/* re-weak-heap-forestify */ |
1268 |
for ( i=len; --i>1; ) { |
1269 |
int j=1, left; |
1270 |
/* rev[i] = 0; i's REVBIT no longer needed */ |
1271 |
/* follow path of left childs (independent roots) */ |
1272 |
while (i > (left = (j<<1) + rev[j])) |
1273 |
j = left; |
1274 |
/* put max of them and i (new root) at i */ |
1275 |
do { |
1276 |
if (!keyGt(key[j], key[i])) /* merge: swap if i < j */ |
1277 |
continue; |
1278 |
tmp = key[i]; key[i] = key[j]; key[j] = tmp; /* swap */ |
1279 |
rev[j] ^= 1; /* toggle revbit */ |
1280 |
} while (j >>= 1); |
1281 |
} |
1282 |
/* rev[1] = 0; 2... have been cleared */ |
1283 |
key[len] = key[0]; /* initial root still untouched biggest */ |
1284 |
#endif |
1285 |
#ifndef SORT_WH |
1286 |
key--; |
1287 |
#endif |
1288 |
eRr(LOG_VERBOSE, "%d sorted %d keys with %d comparisions -- loading ...", |
1289 |
tUpd(&tm), len, ncomp); |
1290 |
cp: |
1291 |
/* finally cp to dest */ |
1292 |
if (FIL_NONE == dx->qld->fil) { /* internal -- load */ |
1293 |
unsigned char **e = key+len+1; |
1294 |
while (++key != e) |
1295 |
if ((ret = load(dx, *key))) |
1296 |
goto err; |
1297 |
eRr(LOG_VERBOSE, "%d loaded", tUpd(&tm)); |
1298 |
} else { /* external -- write MERGECHK blks */ |
1299 |
unsigned char **e = key+len+1; |
1300 |
unsigned char *out = dx->qld->mout, *end = out+MERGEBLK, *dst = out; |
1301 |
unsigned blks = 0; |
1302 |
|
1303 |
while (++key != e) { |
1304 |
unsigned l = 1+**key; |
1305 |
if (end <= dst+l) { |
1306 |
*dst = 0; |
1307 |
if (0 > (ret = fWrite(&dx->qld->fil, out, MERGEBLK))) |
1308 |
goto err; |
1309 |
dst = out; |
1310 |
if (MERGECHK == ++blks) |
1311 |
return eRr(ERR_IDIOT, "CHUNK!"); |
1312 |
} |
1313 |
memcpy(dst, *key, l); |
1314 |
dst += l; |
1315 |
} |
1316 |
do { |
1317 |
*dst = 0; |
1318 |
fWrite(&dx->qld->fil, out, MERGEBLK); |
1319 |
dst = out; |
1320 |
} while (MERGECHK != ++blks); |
1321 |
eRr(LOG_VERBOSE, "%d wrote %d blocks ...", tUpd(&tm), blks); |
1322 |
dx->qld->blks += MERGECHK; |
1323 |
ret = 0; |
1324 |
} |
1325 |
err: |
1326 |
return ret; |
1327 |
} /* sort */ |
1328 |
|
1329 |
|
1330 |
static int merge (Qdx *dx, QLoad *ql) |
1331 |
{ |
1332 |
#ifndef MERGEMULTIPASS |
1333 |
# define chunk MERGECHK |
1334 |
#else |
1335 |
unsigned char *const out = ql->mout, *const end = out+MERGEBLK; |
1336 |
unsigned chunk = MERGECHK, pass = 1; |
1337 |
file fil2 = FIL_NONE; |
1338 |
#endif |
1339 |
lolo tm; |
1340 |
int ret = 0; |
1341 |
|
1342 |
eRr(LOG_VERBOSE, "%d-way merging %d blocks in %d chunks of %d ...", |
1343 |
MERGEWAY, ql->blks, (ql->blks+MERGECHK-1)/MERGECHK, MERGECHK); |
1344 |
tUpd(&tm); |
1345 |
ncomp = 0; |
1346 |
#ifndef MERGEMULTIPASS |
1347 |
{ /* single pass */ |
1348 |
#else |
1349 |
if (ql->blks > MERGECHK*MERGEWAY) /* multipass */ |
1350 |
fOpen(&fil2, 0, 0); |
1351 |
for (;; pass++) { |
1352 |
#endif |
1353 |
unsigned blkno = 0; |
1354 |
#ifndef MERGEMULTIPASS |
1355 |
# define inf &ql->fil |
1356 |
{ /* single run in single pass */ |
1357 |
#else |
1358 |
unsigned nchunk = chunk*MERGEWAY; /* lastrun, if >= ql->blks */ |
1359 |
int lastpass = nchunk >= ql->blks, run = 1; |
1360 |
file *inf, *outf; |
1361 |
|
1362 |
if (1 & pass) { |
1363 |
inf = &ql->fil; outf = &fil2; |
1364 |
} else { |
1365 |
inf = &fil2; outf = &ql->fil; |
1366 |
} |
1367 |
if (!lastpass) |
1368 |
fSeek(outf, 0); |
1369 |
while (blkno < ql->blks) { |
1370 |
/* merge MERGEWAY chunks of length chunk to n chunks of size nchunk */ |
1371 |
/* only one run in last pass */ |
1372 |
unsigned char *dst = out; |
1373 |
unsigned blks = 0; |
1374 |
#endif |
1375 |
#ifdef MERGEDEBUG |
1376 |
unsigned char lastkey[256]; |
1377 |
#endif |
1378 |
struct { |
1379 |
unsigned char *blk; /* merge blk */ |
1380 |
unsigned char *key; /* current key */ |
1381 |
unsigned nxt; /* next block no */ |
1382 |
unsigned end; /* end */ |
1383 |
} in[MERGEWAY], *nxt[MERGEWAY], *tmp; |
1384 |
unsigned ways = 0, i; |
1385 |
|
1386 |
for (ways=0; ways<MERGEWAY; ways++) { /* prefill */ |
1387 |
if (blkno >= ql->blks) |
1388 |
break; |
1389 |
in[ways].key = in[ways].blk = ql->buf.m[ways]; |
1390 |
if (MERGEBLK != (ret = |
1391 |
fPread(inf, in[ways].blk, MERGEBLK, blkno*MERGEBLK))) |
1392 |
goto done; |
1393 |
in[ways].nxt = blkno+1; |
1394 |
if (ql->blks < (blkno += chunk)) blkno = ql->blks; |
1395 |
in[ways].end = blkno; |
1396 |
/* insert */ |
1397 |
for (i=0; i<ways && !keyGt(nxt[i]->key, in[ways].key); ) i++; |
1398 |
if (i < ways) memmove(nxt+i+1, nxt+i, (ways-i)*sizeof nxt[0]); |
1399 |
nxt[i] = in+ways; |
1400 |
} |
1401 |
#ifndef MERGEMULTIPASS |
1402 |
eRr(LOG_VERBOSE, "%d prepared %d blocks %d ways", tUpd(&tm), blkno, ways); |
1403 |
#else |
1404 |
eRr(LOG_VERBOSE, "%d prepared %d/%d blocks %d ways run %d of pass %d%s", |
1405 |
tUpd(&tm), blkno, ql->blks, ways, run, pass, lastpass?" (last)":""); |
1406 |
#endif |
1407 |
#ifdef MERGEDEBUG |
1408 |
lastkey[0] = 0; |
1409 |
for (i=0; i<ways; i++) |
1410 |
eRr(LOG_VERBOSE, "%d %b", i, *nxt[i]->key, nxt[i]->key+1); |
1411 |
#endif |
1412 |
for (;;) { /* merge */ |
1413 |
int len = *nxt[0]->key + 1; |
1414 |
/* stuff away 1st key */ |
1415 |
#ifdef MERGEDEBUG |
1416 |
if (keyGt(lastkey, nxt[0]->key)) { |
1417 |
ret = eRr(ERR_IDIOT, "'%b' > '%b' way %d", |
1418 |
*lastkey, lastkey+1, *nxt[0]->key, nxt[0]->key+1, nxt[0]-in); |
1419 |
goto done; |
1420 |
} |
1421 |
memcpy(lastkey, nxt[0]->key, len); |
1422 |
#endif |
1423 |
#ifdef MERGEMULTIPASS |
1424 |
if (lastpass) { |
1425 |
#endif |
1426 |
if ((ret = load(dx, nxt[0]->key))) goto done; |
1427 |
#ifdef MERGEMULTIPASS |
1428 |
} else { |
1429 |
if (len >= end - dst) { /* flush out block */ |
1430 |
# ifdef MERGEDEBUG |
1431 |
eRr(LOG_VERBOSE, "out blk %d len %d %b", blks, dst-out, *out, out+1); |
1432 |
# endif |
1433 |
*dst = 0; /* terminate */ |
1434 |
if (MERGEBLK != (ret = fWrite(outf, dst = out, MERGEBLK))) |
1435 |
goto done; |
1436 |
blks++; |
1437 |
} |
1438 |
memcpy(dst, nxt[0]->key, len); |
1439 |
dst += len; |
1440 |
} |
1441 |
#endif |
1442 |
/* reload nxt[0] */ |
1443 |
nxt[0]->key += len; |
1444 |
if (!*nxt[0]->key) { /* eob */ |
1445 |
if (nxt[0]->nxt != nxt[0]->end) { |
1446 |
if (MERGEBLK != (ret = fPread(inf, nxt[0]->key = nxt[0]->blk, |
1447 |
MERGEBLK, nxt[0]->nxt++ * MERGEBLK))) |
1448 |
goto done; |
1449 |
} |
1450 |
if (!*nxt[0]->key) { /* done with this stream */ |
1451 |
if (!--ways) |
1452 |
break; |
1453 |
memmove(nxt, nxt+1, ways*sizeof nxt[0]); |
1454 |
continue; |
1455 |
} |
1456 |
} |
1457 |
#ifdef MERGEDEBUG |
1458 |
if (keyGt(lastkey, nxt[0]->key)) { |
1459 |
ret = eRr(ERR_IDIOT, "'%b' > '%b' in blk %d off %d way %d", |
1460 |
*lastkey, lastkey+1, *nxt[0]->key, nxt[0]->key+1, |
1461 |
nxt[0]->nxt-1, nxt[0]->key-nxt[0]->blk, nxt[0]-in); |
1462 |
goto done; |
1463 |
} |
1464 |
#endif |
1465 |
if (1 == ways) continue; |
1466 |
/* |
1467 |
this may look like pretty brute force. |
1468 |
However, in practice we rarely see reinsertions at all |
1469 |
and get by with about 1.1 comparisions per key. |
1470 |
With sorted input we have even less than 1 per key, |
1471 |
due to the continue (last exit oneway) above. |
1472 |
*/ |
1473 |
for (i=1; i<ways && keyGt(nxt[0]->key, nxt[i]->key); ) i++; |
1474 |
/* nxt[0]->key is not gt nxt[i]->key: move to --i */ |
1475 |
if (--i) { |
1476 |
tmp = nxt[0]; |
1477 |
memmove(nxt, nxt+1, i*sizeof nxt[0]); |
1478 |
nxt[i] = tmp; |
1479 |
} |
1480 |
} /* merge */ |
1481 |
ret = 0; |
1482 |
#ifndef MERGEMULTIPASS |
1483 |
eRr(LOG_VERBOSE, "%d merged %d comparisions", tUpd(&tm), ncomp); |
1484 |
} /* single run */ |
1485 |
#else |
1486 |
eRr(LOG_VERBOSE, "%d merged %d comparisions %d/%d blocks pass %d", |
1487 |
tUpd(&tm), ncomp, blks, nchunk, pass); |
1488 |
if (lastpass) |
1489 |
goto done; |
1490 |
if (nchunk <= blks) |
1491 |
eRr(ERR_IDIOT, "wrote %d >= %d blks pass %d", blks, nchunk, pass); |
1492 |
do { |
1493 |
*dst = 0; |
1494 |
fWrite(outf, out, MERGEBLK); |
1495 |
dst = out; |
1496 |
} while (nchunk != ++blks); |
1497 |
run++; |
1498 |
} /* for new chunks */ |
1499 |
chunk = nchunk; |
1500 |
#endif |
1501 |
} /* pass */ |
1502 |
done: |
1503 |
#ifdef MERGEMULTIPASS |
1504 |
if (FIL_NONE != fil2) fClose(&fil2); |
1505 |
#endif |
1506 |
return ret; |
1507 |
} /* merge */ |
1508 |
|
1509 |
|
1510 |
int qLoad ( Qdx *dx, Key *key ) |
1511 |
{ |
1512 |
QLoad *ql = dx->qld; |
1513 |
int ret, l = key->len+dx->vsz; |
1514 |
|
1515 |
if ( !ql ) { |
1516 |
if (! key->val.len) return 0; /* double close ? */ |
1517 |
ncomp = 0; |
1518 |
if (ENV_EXCL!=env.wri || !(DX_WRITE & dx->flg)) |
1519 |
return eRr(ERR_TRASH, "need excl write access to load index"); |
1520 |
ql = dx->qld = (QLoad*)mAlloc(sizeof (QLoad)); |
1521 |
if ( dx->let > 50 ) |
1522 |
dx->let = 50; |
1523 |
ql->let = dx->let * dx->lsz / 100; |
1524 |
fTrunc(&dx->mqd, 0); |
1525 |
dx->lln = 0; |
1526 |
newleaf(&ql->cur.b, dx); |
1527 |
ql->fil = FIL_NONE; |
1528 |
ql->nkey = ql->buf.s.keyb; |
1529 |
ql->nmem = ql->buf.s.memb; |
1530 |
eRr(LOG_INFO, |
1531 |
"loading %d/%dK mem, %d keys, %dK blocks in %d-chunks, %d-way merge" |
1532 |
#ifdef MERGEMULTIPASS |
1533 |
" (multipass)" |
1534 |
#endif |
1535 |
, SORTUSE>>10, SORTMEM>>10, SORTKEYS, MERGEBLK>>10, MERGECHK, MERGEWAY |
1536 |
); |
1537 |
} |
1538 |
if ( !key->val.len /* done */ |
1539 |
|| SORTKEYS-2 <= ql->nkey-ql->buf.s.keyb /* reserve 1 for WHS */ |
1540 |
|| SORTUSE <= ql->nmem-ql->buf.s.memb |
1541 |
) { |
1542 |
if (key->val.len && FIL_NONE == ql->fil) |
1543 |
fOpen(&ql->fil, 0, 0); |
1544 |
if ((ret = sort(dx, ql->buf.s.keyb, ql->nkey - ql->buf.s.keyb)) |
1545 |
|| !key->val.len |
1546 |
) |
1547 |
goto done; |
1548 |
ql->nkey = ql->buf.s.keyb; |
1549 |
ql->nmem = ql->buf.s.memb; |
1550 |
} |
1551 |
/* add key to sort buffer */ |
1552 |
if (key->val.len == dx->vsz && l < 256) { |
1553 |
*ql->nkey++ = ql->nmem; |
1554 |
*ql->nmem = (unsigned char)l; |
1555 |
memcpy(++ ql->nmem, key->byt, key->len); |
1556 |
memcpy(ql->nmem += key->len, key->val.byt, dx->vsz); |
1557 |
ql->nmem += dx->vsz; |
1558 |
return 0; |
1559 |
} |
1560 |
ret = eRr(ERR_INVAL, "bad klen %d vlen %d", key->len, key->val.len); |
1561 |
done: |
1562 |
if (FIL_NONE != ql->fil) { |
1563 |
if (!ret) ret = merge(dx, ql); |
1564 |
fClose(&ql->fil); |
1565 |
} |
1566 |
if (!ret) { |
1567 |
load(dx, 0); /* close last leaf */ |
1568 |
qRemake(dx); /* build tree */ |
1569 |
} |
1570 |
mFree(dx->qld); |
1571 |
dx->qld = 0; |
1572 |
return ret; |
1573 |
} /* qLoad */ |
1574 |
|
1575 |
|
1576 |
int qLoadf ( Qdx *dx, file *f ) |
1577 |
{ |
1578 |
Key key; |
1579 |
lolo tm; |
1580 |
|
1581 |
FIL_DEFBUF(*f); |
1582 |
memset( &key, 0, sizeof(key) ); |
1583 |
key.val.len = dx->vsz; |
1584 |
tUpd(&tm); |
1585 |
while ( fGets(&fb) && fb.l ) { |
1586 |
if ( qMkKeyVal(dx, &key, fb.p, fb.l) ) { |
1587 |
eRr(LOG_VERBOSE, "bad key at line %d offset %d", fb.n, fb.o); |
1588 |
continue; |
1589 |
} |
1590 |
if ( qLoad( dx, &key ) ) |
1591 |
break; |
1592 |
if ( !(fb.n & 0x3ff) ) |
1593 |
eRr(LOG_VERBOSE, "%dK lines", fb.n >> 10 ); |
1594 |
} |
1595 |
eRr(LOG_INFO, "%d did %d lines %d bytes (%s)", tUpd(&tm), fb.n, fb.o, |
1596 |
FIL_NONE == fb.f ? "eof" : "blank" ); |
1597 |
*f = fb.f; |
1598 |
key.val.len = 0; /* close it */ |
1599 |
return qLoad(dx, &key); |
1600 |
} /* qLoadf */ |
1601 |
|
1602 |
|
1603 |
/* TODO: this function is an awful mess and needs complete rewrite, KR |
1604 |
*/ |
1605 |
int qSet ( Qdx *dx, Key *key ) |
1606 |
{ |
1607 |
Bloke blk[2]; |
1608 |
Key up; |
1609 |
Block * b = &blk[0].b; /* mainblock */ |
1610 |
unsigned char * base = blk[0].base; |
1611 |
int ret = 0; |
1612 |
unsigned short e, i = 0; |
1613 |
Block *s = 0; /* sibling */ |
1614 |
Dict *d; |
1615 |
unsigned char *sbase, *src, *v, *stk; |
1616 |
FindKey f; |
1617 |
int hadkey, split = 0, insibling = 0; |
1618 |
unsigned need; |
1619 |
unsigned sibnum = 0; |
1620 |
unsigned char lev = 0; |
1621 |
PrVal pv; |
1622 |
#ifdef BUILD_SHMODE /* locks we hold on block, sibling, tree */ |
1623 |
int lb=-1, ls=-1, lt=0; |
1624 |
#endif |
1625 |
|
1626 |
f.dx = dx; |
1627 |
f.key = key; |
1628 |
if ( dx->ksz && key->len > dx->ksz ) |
1629 |
key->len = dx->ksz; |
1630 |
if ( FINDKEY(b, &f, LOCK_WRITE) ) { |
1631 |
b = 0; |
1632 |
DONE((ERR_TRASH, "could not find key")); |
1633 |
} |
1634 |
#ifdef BUILD_SHMODE |
1635 |
lb = b->num; |
1636 |
#endif |
1637 |
e = (QENTMASK & f.e[0]); |
1638 |
d = b->dct + e; |
1639 |
if ( ! (hadkey = QMATCH & f.e[0]) ) /* new key */ |
1640 |
need = key->len + sizeof(Dict) + dx->vsz; |
1641 |
else { |
1642 |
unsigned vln = LEAFVLN(d); |
1643 |
v = base+LEAFPOS(d)+d->kln; |
1644 |
if ( 1 == vln ) { /* check stopword -- value entirely zeroed */ |
1645 |
int l = dx->vsz; |
1646 |
unsigned char *C = v; |
1647 |
while ( l-- && !*C++ ) /* huh huh, he said "C++" */ |
1648 |
; |
1649 |
if ( -1 == l ) { |
1650 |
LOG_DBG(LOG_VERBOSE, "key '%b' is a stopword", key->len, key->byt); |
1651 |
return QST_STPWRD; |
1652 |
} |
1653 |
/* TODO: check QST_UNIKEY */ |
1654 |
} |
1655 |
i = valpos( v, vln, dx->vsz, key->val.byt ); |
1656 |
if ( QMATCH & i ) { |
1657 |
LOG_DBG( LOG_DEBUG, |
1658 |
"adding key '%b'/%s: already there pos %u", |
1659 |
key->len, key->byt, PRVAL( pv, key->val ), i & ~QMATCH ); |
1660 |
return QST_FOUND; /* value already there */ |
1661 |
} |
1662 |
need = dx->vsz; |
1663 |
} |
1664 |
LOG_DBG( LOG_DEBUG, |
1665 |
"adding key '%b'/%s in block %d had %d ent %d stk %d need %d", |
1666 |
key->len, key->byt, PRVAL( pv, key->val ), |
1667 |
b->num, hadkey, b->ent, b->stk, need ); |
1668 |
|
1669 |
if ( need > b->stk - DICTOFF - b->ent*sizeof(Dict) ) { |
1670 |
/* now that's bad: not enough space |
1671 |
create new block s, move last entries there |
1672 |
starting from the end, entries (including the new one) are collected, |
1673 |
until their combined size reaches more than half a block. |
1674 |
If the combined size then is more than 2/3 a block, |
1675 |
and the last entry alone takes more than 1/3 a block, it is split. |
1676 |
TODO: check existing sibling for free space |
1677 |
*/ |
1678 |
unsigned half = (dx->lsz - DICTOFF) >> 1; |
1679 |
unsigned tot = 0; |
1680 |
unsigned mv = b->ent; |
1681 |
|
1682 |
LOG_DBG( LOG_DEBUG, "splitting block %d need %d have %d (stk %d ent %d)", |
1683 |
b->num, need, b->stk - DICTOFF - b->ent*sizeof(Dict), b->stk, b->ent ); |
1684 |
if ( need > (dx->lsz - DICTOFF) / 3 ) |
1685 |
DONE((ERR_IDIOT, "OOPS! excessive need for %d bytes", need)); |
1686 |
/* figure out split position */ |
1687 |
if ( !hadkey && b->ent == e ) { |
1688 |
tot = need; |
1689 |
insibling = !0; |
1690 |
} |
1691 |
d = b->dct+mv; |
1692 |
while ( d--, mv-- ) { |
1693 |
unsigned sz = sizeof(Dict)+d->kln+LEAFVLN(d)*dx->vsz; |
1694 |
if ( half > (tot += sz) ) { /* sz fits */ |
1695 |
if ( e == mv && !hadkey ) { |
1696 |
/* |
1697 |
check size of new entry. |
1698 |
In case of hadkey, we don't bother for the dx->vsz bytes. |
1699 |
*/ |
1700 |
if ( half < (tot + need) ) { |
1701 |
if ( (half - tot) > need/2 ) { /* more balanced if we move */ |
1702 |
tot += need; |
1703 |
insibling = !0; |
1704 |
} |
1705 |
break; |
1706 |
} |
1707 |
tot += need; |
1708 |
insibling = !0; |
1709 |
} |
1710 |
if ( mv ) |
1711 |
continue; /* else croak on ! mv below */ |
1712 |
} |
1713 |
/* exceeded half -- maybe we move entry mv anyway */ |
1714 |
if ( tot > (dx->lsz - DICTOFF)*2/3 ) { |
1715 |
if ( tot-sz < (dx->lsz - DICTOFF)/3 ) |
1716 |
split = !0; |
1717 |
else { /* undo -- don't move this entry */ |
1718 |
tot -= sz; |
1719 |
d++; mv++; |
1720 |
} |
1721 |
} else if ( ! mv ) |
1722 |
DONE((ERR_IDIOT, "OOPS! didn't find end tot %d bytes", tot)); |
1723 |
break; |
1724 |
} /* while mv-- */ |
1725 |
if ( mv < e || (mv == e && hadkey && !split) ) |
1726 |
insibling = !0; |
1727 |
/* now mv is the first entry to move */ |
1728 |
/* create new leaf block dx->lln */ |
1729 |
#ifdef BUILD_SHMODE |
1730 |
for (;;) { |
1731 |
int size; |
1732 |
QLOCKEX(dx->lln<<1); |
1733 |
size = fSize( dx->mqd ); |
1734 |
if ( size == (int)(dx->lln * dx->lsz) ) { |
1735 |
ls = dx->lln; |
1736 |
break; |
1737 |
} |
1738 |
QLOCKUN(dx->lln<<1); |
1739 |
if ( size % dx->lsz || size < (int)(dx->lln * dx->lsz) ) |
1740 |
DONE((ERR_TRASH, "bad mqd size 0x%08x", size)); |
1741 |
dx->lln = size / dx->lsz; |
1742 |
} |
1743 |
/* have lock on new leaf block dx->lln */ |
1744 |
#endif |
1745 |
newleaf(s = &blk[1].b, dx); |
1746 |
sbase = (unsigned char*)s; /* == blk[1].base */ |
1747 |
LOG_DBG( LOG_DEBUG, |
1748 |
"split: move %d of %d to new blk %d e %d tot %d insibling %d split %d", |
1749 |
mv, b->ent, s->num, e, tot, insibling, split ); |
1750 |
s->nxt = b->nxt; |
1751 |
b->nxt = s->num; |
1752 |
if ( split ) { |
1753 |
/* now total includes full size of entry mv. |
1754 |
if we included a new entry, it might even exceed lsz. |
1755 |
calculate sizes for both blocks w/o mv's values. |
1756 |
*/ |
1757 |
unsigned esz = sizeof(Dict)+d->kln; /* entry's base cost */ |
1758 |
unsigned vln = LEAFVLN(d); |
1759 |
unsigned keep, diff, mvval, kpval, mvsiz, pos = LEAFPOS(d); |
1760 |
/* |
1761 |
from currently used space, substract tot (including all |
1762 |
values and possibly new key "need"), but add entry base cost. |
1763 |
that should be the size remaining if we where moving |
1764 |
all of the values but keep an entry and key. |
1765 |
maybe slightly biased if need is dx->vsz (the hadkey case). |
1766 |
*/ |
1767 |
keep = dx->lsz - b->stk + b->ent*sizeof(Dict) + need - tot + esz; |
1768 |
tot -= vln * dx->vsz; /* undo entries */ |
1769 |
diff = (keep > tot ? keep - tot : tot - keep) / dx->vsz; |
1770 |
if ( diff > vln ) |
1771 |
DONE((ERR_IDIOT, "OOPS! got diff %d vln %d entry %d tot %d keep %d", |
1772 |
diff, vln, mv, tot, keep)); |
1773 |
mvval = (vln - diff)/2; |
1774 |
if ( keep > tot ) |
1775 |
mvval += diff; |
1776 |
kpval = vln - mvval; |
1777 |
if ( ! mvval || ! kpval ) { |
1778 |
LOG_DBG( LOG_WARN, "bad mv/kpval %d/%d keep %d tot %d vln %d diff %d", |
1779 |
mvval, kpval, keep, tot, vln, diff ); |
1780 |
if ( ! mvval ) |
1781 |
kpval = vln - (mvval = 1); |
1782 |
else |
1783 |
mvval = vln - (kpval = 1); |
1784 |
} |
1785 |
tot += mvsiz = mvval*dx->vsz; |
1786 |
keep += kpval*dx->vsz; |
1787 |
src = base+pos; |
1788 |
LOG_DBG( LOG_DEBUG, |
1789 |
"split: entry %d '%b' mv %d of %d values, i %d keep %d tot %d", |
1790 |
mv, d->kln, src, mvval, vln, i, keep, tot ); |
1791 |
if ( keep > (dx->lsz-DICTOFF) || tot > (dx->lsz-DICTOFF) ) |
1792 |
DONE((ERR_IDIOT, |
1793 |
"OOPS! got diff %d vln %d mvval %d entry %d tot %d keep %d", |
1794 |
diff, vln, mvval, mv, tot, keep)); |
1795 |
/* SIGH -- do it */ |
1796 |
memcpy( sbase + (s->stk -= mvsiz), |
1797 |
src+d->kln+kpval*dx->vsz, mvsiz ); |
1798 |
if ( e == mv && hadkey && i > kpval ) { |
1799 |
insibling = !0; |
1800 |
s->stk -= dx->vsz; |
1801 |
i -= kpval; |
1802 |
memmove( sbase+s->stk, sbase+s->stk+dx->vsz, i*dx->vsz ); |
1803 |
memcpy( sbase+s->stk+i*dx->vsz, key->val.byt, dx->vsz ); |
1804 |
mvval++; |
1805 |
} |
1806 |
memcpy( sbase + (s->stk -= d->kln), src, d->kln ); |
1807 |
LEAFSETPVK( s->dct, s->stk, mvval, d->kln ); |
1808 |
s->ent = 1; |
1809 |
memmove( src + mvsiz, src, d->kln+kpval*dx->vsz ); |
1810 |
pos += mvsiz; |
1811 |
LEAFSETPV( d, pos, kpval ); |
1812 |
d++; mv++; |
1813 |
} |
1814 |
/* now entries mv..b->ent are moved, mv > 0 */ |
1815 |
b->stk = LEAFPOS(b->dct+mv-1); /* pre-adjust stack */ |
1816 |
if ( mv < e ) { /* move entries mv..e-1 */ |
1817 |
/* used stack space is from pos of entry e-1 to end of entry mv |
1818 |
(might differ from pos of mv-1 due to split) |
1819 |
*/ |
1820 |
unsigned from = LEAFPOS(b->dct+e-1), nent = e-mv, |
1821 |
sz = LEAFPOS(b->dct+mv)+b->dct[mv].kln |
1822 |
+LEAFVLN(b->dct+mv)*dx->vsz - from; |
1823 |
int adj = (s->stk -= sz) - from; /* entry position adjustment */ |
1824 |
memcpy( sbase + s->stk, base+from, sz ); |
1825 |
memcpy( s->dct+s->ent, b->dct+mv, nent*sizeof(Dict) ); |
1826 |
d = s->dct + s->ent; |
1827 |
s->ent += nent; |
1828 |
for ( ; nent--; d++ ) |
1829 |
LEAFPOSADD( d, adj ); |
1830 |
} |
1831 |
if ( insibling && mv <= e ) { /* now mv or insert e, unless done on split */ |
1832 |
d = b->dct + e; |
1833 |
if ( hadkey ) { /* mv existing entry e >= mv, add new val */ |
1834 |
/* i is valpos */ |
1835 |
unsigned vln = LEAFVLN(d); |
1836 |
src = base+LEAFPOS(d)+d->kln; |
1837 |
if ( i < vln ) { |
1838 |
unsigned sz = (vln - i)*dx->vsz; |
1839 |
memcpy( sbase + (s->stk -= sz), src + i*dx->vsz, sz ); |
1840 |
} |
1841 |
memcpy( sbase + (s->stk -= dx->vsz), key->val.byt, dx->vsz ); |
1842 |
if ( i ) { |
1843 |
unsigned sz = i*dx->vsz; |
1844 |
memcpy( sbase + (s->stk -= sz), src, sz ); |
1845 |
} |
1846 |
memcpy( sbase + (s->stk -= d->kln), base+LEAFPOS(d), d->kln ); |
1847 |
LEAFSETPVK( s->dct + s->ent, s->stk, vln+1, d->kln ); |
1848 |
e++; /* e is moved */ |
1849 |
} else { |
1850 |
memcpy( sbase + (s->stk -= dx->vsz), key->val.byt, dx->vsz ); |
1851 |
memcpy( sbase + (s->stk -= key->len), key->byt, key->len ); |
1852 |
LEAFSETPVK( s->dct + s->ent, s->stk, 1, key->len ); |
1853 |
} |
1854 |
s->ent++; |
1855 |
} /* mv or insert e */ |
1856 |
if ( e < b->ent && mv < b->ent ) { /* move entries max(e,mv) to end */ |
1857 |
unsigned fst = mv < e ? e : mv; |
1858 |
/* used stack space is from pos of entry b->ent-1 (the original b->stk) |
1859 |
to end of entry fst (might differ from pos of fst-1 due to split) |
1860 |
*/ |
1861 |
unsigned from = LEAFPOS(b->dct+b->ent-1), nent = b->ent-fst, |
1862 |
sz = LEAFPOS(b->dct+fst)+b->dct[fst].kln |
1863 |
+LEAFVLN(b->dct+fst)*dx->vsz - from; |
1864 |
int adj = (s->stk -= sz) - from; /* entry position adjustment */ |
1865 |
LOG_DBG( LOG_DEBUG, "split: entries %d..%d sz %d from %d to %d", |
1866 |
fst, b->ent-1, sz, from, s->stk ); |
1867 |
memcpy( sbase + s->stk, base+from, sz ); |
1868 |
memcpy( s->dct+s->ent, b->dct+fst, nent*sizeof(Dict) ); |
1869 |
d = s->dct + s->ent; |
1870 |
s->ent += nent; |
1871 |
for ( ; nent--; d++ ) { |
1872 |
#if !defined(NDEBUG) |
1873 |
unsigned p = LEAFPOS(d); |
1874 |
#endif |
1875 |
LEAFPOSADD( d, adj ); |
1876 |
#if !defined(NDEBUG) |
1877 |
LOG_DBG( LOG_DEBUG, "new ent %d p/v/k %d/%d/%d o %d", |
1878 |
d - s->dct, LEAFPOS(d), LEAFVLN(d), d->kln, p ); |
1879 |
#endif |
1880 |
} |
1881 |
} |
1882 |
b->ent = mv; /* kill entries */ |
1883 |
LOG_DBG( LOG_DEBUG, |
1884 |
"split: blks %d/%d with %d/%d entries, s starts '%b'", |
1885 |
b->num, s->num, b->ent, s->ent, s->dct[0].kln, sbase+LEAFPOS(s->dct) ); |
1886 |
} /* new sibling */ |
1887 |
|
1888 |
if ( ! insibling ) { |
1889 |
/* now we do have enough space */ |
1890 |
d = b->dct + e; |
1891 |
stk = base + b->stk; /* bottom of data stack */ |
1892 |
if ( hadkey ) { /* insert value at i */ |
1893 |
unsigned char *val = base + LEAFPOS(d)+d->kln+i*dx->vsz; |
1894 |
need = dx->vsz; |
1895 |
#if 0 |
1896 |
LOG_DBG( LOG_DEBUG, |
1897 |
"in entry %d '%b' val %d: memmove( %p %p %d) val %p need %d", |
1898 |
e, d->kln, base+LEAFPOS(d), i, |
1899 |
stk-need, stk, (val-stk)+need, val, need ); |
1900 |
#endif |
1901 |
memmove( stk - need, stk, (val - stk)+need ); |
1902 |
memcpy( val - need, key->val.byt, dx->vsz ); |
1903 |
LEAFVLNADD( d, 1 ); |
1904 |
} else { /* insert key, value and dict entry */ |
1905 |
/* take dict and data space from entry e */ |
1906 |
unsigned char *nu; |
1907 |
need = key->len + dx->vsz; |
1908 |
if ( e == b->ent ) |
1909 |
nu = base + b->stk - need; |
1910 |
else { |
1911 |
/* at end of next entry, i.e. end of block or beg of previous */ |
1912 |
nu = base + LEAFPOS(d) + d->kln + LEAFVLN(d)*dx->vsz; |
1913 |
memmove( stk - need, stk, nu - stk ); |
1914 |
nu -= need; |
1915 |
memmove( d+1, d, (b->ent - e)*sizeof(Dict) ); |
1916 |
} |
1917 |
memcpy( nu, key->byt, key->len ); |
1918 |
memcpy( nu + key->len, key->val.byt, dx->vsz ); |
1919 |
LEAFSETPV( d, nu-base, 1 ); |
1920 |
d->kln = key->len; |
1921 |
d++; |
1922 |
e++; |
1923 |
b->ent++; |
1924 |
} |
1925 |
/* |
1926 |
need is set to the needed (i.e. additionally used) data space, |
1927 |
e is the first dict entry to redirect need bytes lower |
1928 |
*/ |
1929 |
b->stk -= need; |
1930 |
for ( ; e < b->ent; e++, d++ ) |
1931 |
LEAFPOSADD( d, -need ); |
1932 |
} /* ! insibling */ |
1933 |
if ( s ) { /* save upkey before writing, which may kill s */ |
1934 |
unsigned char *e0 = ((unsigned char*)s) + LEAFPOS(s->dct); |
1935 |
memcpy( up.byt, e0, up.len = s->dct[0].kln ); |
1936 |
sibnum = s->num; |
1937 |
if ( split ) |
1938 |
memcpy( up.val.byt, e0+up.len, up.val.len = dx->vsz ); |
1939 |
else |
1940 |
up.val.len = 0; |
1941 |
LOG_DBG( LOG_DEBUG, "split: had %d/%d key '%b' vln %d %s", |
1942 |
b->num, s->num, up.len, up.byt, up.val.len, PRVAL( pv, up.val ) ); |
1943 |
wrtleaf(s, dx); |
1944 |
} |
1945 |
wrtleaf(b, dx); |
1946 |
|
1947 |
/* |
1948 |
WHOA -- now all that was just to care for the leaf level! |
1949 |
Now walk up inserting pointers in parent forks while we split. |
1950 |
*/ |
1951 |
|
1952 |
#ifdef BUILD_SHMODE /* lock tree, unlock leaves */ |
1953 |
QLOCKUN(lb<<1); |
1954 |
lb = -1; |
1955 |
if ( sibnum ) { |
1956 |
QLOCKEX(1); /* get tree lock */ |
1957 |
lt = 1; |
1958 |
QLOCKUN(ls<<1); |
1959 |
ls = -1; |
1960 |
} |
1961 |
#endif |
1962 |
while ( sibnum ) { /* created sibling */ |
1963 |
unsigned char *e0; |
1964 |
int tog = 0; |
1965 |
Block *ins; |
1966 |
|
1967 |
/* get parent */ |
1968 |
lev++; |
1969 |
b = getfork(&blk[0].b, dx, f.n[lev], lev); |
1970 |
LOG_DBG( LOG_DEBUG, |
1971 |
"split: got parent %d lev %d", b->num, b->lev ); |
1972 |
e = keyfork(b, &up, dx->vsz); |
1973 |
/* again L/Y link hunt */ |
1974 |
while ( !(QMATCH & e) && b->ent == e && b->nxt && b->num ) { |
1975 |
unsigned se; |
1976 |
/* peek next block, toggling buffers */ |
1977 |
s = getfork(&blk[(++tog)&1].b, dx, b->nxt, lev); |
1978 |
se = keyfork(s, &up, dx->vsz); |
1979 |
if ( !se ) /* lower than first */ |
1980 |
break; |
1981 |
LOG_DBG( LOG_DEBUG, "LYao %d -> %d lev %d for '%b' got 0x%4x", |
1982 |
b->num, s->num, i, up.len, up.byt, se ); |
1983 |
/* trade b for s */ |
1984 |
b = s; |
1985 |
e = se; |
1986 |
} |
1987 |
base = (unsigned char*)b; |
1988 |
ins = b; |
1989 |
s = 0; |
1990 |
LOG_DBG( LOG_DEBUG, "split: got parent pos %4x", e ); |
1991 |
if ( QMATCH & e ) /* !??? */ |
1992 |
DONE((ERR_TRASH, "OOPS! found key in parent")); |
1993 |
if ( ! e ) { |
1994 |
TRACEKEY( ERR_TRASH, &f ); |
1995 |
DONE((ERR_TRASH, "OOPS! parent insert at 0 lev %d", lev)); |
1996 |
} |
1997 |
need = sizeof(Dict) + up.len + 4 + up.val.len; |
1998 |
if ( need > b->stk - DICTOFF - b->ent*sizeof(Dict) ) { |
1999 |
unsigned half = ((env.psz - b->stk)+b->ent*sizeof(Dict)+need) / 2; |
2000 |
unsigned mv = b->ent, tot = 0, nent, sz; |
2001 |
int adj; |
2002 |
if ( e == b->ent ) { |
2003 |
tot = need; |
2004 |
ins = 0; |
2005 |
} |
2006 |
for ( d = b->dct+mv; d--, mv--; ) { |
2007 |
sz = sizeof(Dict) + d->kln + 4 + (d->vln ? dx->vsz : 0); |
2008 |
if ( half > (tot += sz) ) { |
2009 |
if ( e == mv ) { |
2010 |
if ( half > tot+need || tot+need-half < half-tot ) { |
2011 |
tot += need; |
2012 |
ins = 0; /* new link goes to sibling */ |
2013 |
} else |
2014 |
break; |
2015 |
if ( half <= tot ) |
2016 |
break; |
2017 |
} |
2018 |
if ( mv ) |
2019 |
continue; |
2020 |
} |
2021 |
if ( tot - half > sz/2 ) { /* don't move this */ |
2022 |
tot -= sz; |
2023 |
d++; mv++; |
2024 |
} else if ( ! mv ) /* !??? */ |
2025 |
mv = 1; |
2026 |
break; |
2027 |
} |
2028 |
/* split here a little bit simpler */ |
2029 |
s = newfork( b==&blk[0].b ? &blk[1].b : &blk[0].b, dx, lev ); |
2030 |
s->nxt = b->nxt; |
2031 |
b->nxt = s->num; |
2032 |
/* straight move entries mv..b->ent-1 to sibling */ |
2033 |
sbase = (unsigned char*)s; |
2034 |
d = b->dct + mv; |
2035 |
sz = b->dct[mv-1].pos - b->stk; |
2036 |
nent = b->ent - mv; |
2037 |
LOG_DBG( LOG_DEBUG, |
2038 |
"split: parent lev %d split %d to new %d mv %d" |
2039 |
" '%b'/%s nent %d sz %d", |
2040 |
lev, b->num, s->num, mv, d->kln, base+d->pos, |
2041 |
prval( pv, base+d->pos+d->kln, d->vln?dx->ptr:0 ), nent, sz ); |
2042 |
memcpy( sbase + (s->stk -= sz), base + b->stk, sz ); |
2043 |
adj = s->stk - b->stk; |
2044 |
b->stk += sz; |
2045 |
memcpy( s->dct, d, nent*sizeof(Dict) ); |
2046 |
b->ent = mv; |
2047 |
s->ent = nent; |
2048 |
for ( d = s->dct; nent--; d++ ) |
2049 |
d->pos += adj; |
2050 |
if ( ! ins ) { |
2051 |
ins = s; |
2052 |
e -= mv; |
2053 |
} |
2054 |
if ( (ret = chkblk( s, dx )) ) |
2055 |
break; |
2056 |
} |
2057 |
/* now insert link in block ins at pos e */ { |
2058 |
unsigned isz = up.len + 4 + up.val.len, nent = ins->ent - e; |
2059 |
unsigned char *ibase = (unsigned char*)ins; |
2060 |
unsigned end = e ? ins->dct[e-1].pos : env.psz; |
2061 |
LOG_DBG( LOG_DEBUG, |
2062 |
"split: ins parent %d lev %d pos %d key '%b' -> %d vln %d %s", |
2063 |
ins->num, ins->lev, e, up.len, up.byt, sibnum, up.val.len, |
2064 |
PRVAL( pv, up.val ) ); |
2065 |
memmove( ibase + ins->stk - isz, ibase + ins->stk, end - ins->stk ); |
2066 |
ins->stk -= isz; |
2067 |
SETINT(ibase + (end -= 4), sibnum); |
2068 |
if ( up.val.len ) |
2069 |
memcpy( ibase + (end -= up.val.len), up.val.byt, up.val.len ); |
2070 |
memcpy( ibase + (end -= up.len), up.byt, up.len ); |
2071 |
for ( d = ins->dct+ins->ent; nent--; d-- ) { |
2072 |
*d = d[-1]; |
2073 |
d->pos -= isz; |
2074 |
} |
2075 |
d->pos = end; |
2076 |
d->vln = up.val.len ? 1 : 0; |
2077 |
d->kln = up.len; |
2078 |
ins->ent++; |
2079 |
if ( (ret = chkblk( ins, dx )) ) |
2080 |
break; |
2081 |
} |
2082 |
wrtfork(b, dx); |
2083 |
if ( ! s ) /* not split - we're done */ |
2084 |
break; |
2085 |
/* save sibling's key */ |
2086 |
sibnum = s->num; |
2087 |
e0 = ((unsigned char*)s) + s->dct[0].pos; |
2088 |
memcpy( up.byt, e0, up.len = s->dct[0].kln ); |
2089 |
if ( s->dct[0].vln ) |
2090 |
memcpy( up.val.byt, e0+up.len, up.val.len = dx->vsz ); |
2091 |
else |
2092 |
up.val.len = 0; |
2093 |
LOG_DBG( LOG_DEBUG, "split: had %d/%d on lev %d key '%b' vln %d %s", |
2094 |
b->num, sibnum, s->lev, up.len, up.byt, up.val.len, PRVAL( pv, up.val ) ); |
2095 |
wrtfork(s, dx); |
2096 |
if ( lev < dx->dpt ) |
2097 |
continue; |
2098 |
LOG_DBG( LOG_DEBUG, "root split!" ); |
2099 |
if ( b != &blk[0].b && b != &blk[1].b ) /* was in map -- could be gone */ |
2100 |
b = getfork(&blk[0].b, dx, 0, lev); /* refetch */ |
2101 |
/* new block for the old root, use sibling buffer (if any) */ |
2102 |
s = newfork(b==&blk[0].b ? &blk[1].b : &blk[0].b, dx, lev); |
2103 |
/* cp all but the num */ |
2104 |
memcpy( ((unsigned char*)s)+4, ((unsigned char*)b)+4, env.psz-4 ); |
2105 |
b->nxt = s->nxt; /* restore root's special nxt */ |
2106 |
s->nxt = 0; |
2107 |
b->stk = env.psz; /* empty the root */ |
2108 |
/* add two childs old root and it's sibling */ |
2109 |
SETINT( base + (b->stk -= 4), s->num ); /* the old root */ |
2110 |
b->dct[0].pos = b->stk; b->dct[0].vln = 0; b->dct[0].kln = 0; |
2111 |
SETINT( base + (b->stk -= 4), sibnum ); |
2112 |
e0 = ((unsigned char*)s) + s->dct[0].pos; |
2113 |
if ( ! up.val.len ) |
2114 |
b->dct[1].vln = 0; |
2115 |
else { |
2116 |
memcpy( base + (b->stk -= dx->vsz), up.val.byt, dx->vsz ); |
2117 |
b->dct[1].vln = 1; |
2118 |
} |
2119 |
memcpy( base + (b->stk -= up.len), up.byt, b->dct[1].kln = up.len ); |
2120 |
b->dct[1].pos = b->stk; |
2121 |
b->ent = 2; |
2122 |
b->lev++; /* increase tree depth */ |
2123 |
dx->dpt = b->lev; |
2124 |
ret = wrtfork(b, dx); |
2125 |
break; |
2126 |
} |
2127 |
done: |
2128 |
#ifdef BUILD_SHMODE |
2129 |
if ( -1 != lb ) QLOCKUN(lb<<1); |
2130 |
if ( -1 != ls ) QLOCKUN(ls<<1); |
2131 |
if ( lt ) QLOCKUN(1); |
2132 |
#endif |
2133 |
if ( 0 <= ret && b->lev && FINDKEY(&blk[0].b, &f, LOCK_READ) ) |
2134 |
ret = eRr(ERR_TRASH, "could not find key after add" ); |
2135 |
return ret; |
2136 |
} /* qSet */ |
2137 |
|
2138 |
|
2139 |
int qDel ( Qdx *dx, Key *key ) |
2140 |
{ |
2141 |
Leaf leaf; |
2142 |
Block * const b = &leaf.b; |
2143 |
unsigned char * const base = leaf.base; |
2144 |
int ret = 0; |
2145 |
unsigned short e, n, i, mv; |
2146 |
Dict *d; |
2147 |
unsigned char *v; |
2148 |
FindKey f; |
2149 |
PrVal pv; |
2150 |
|
2151 |
f.dx = dx; |
2152 |
f.key = key; |
2153 |
LOG_DBG( LOG_DEBUG, "deleting key '%b'/%s", |
2154 |
key->len, key->byt, PRVAL( pv, key->val ) ); |
2155 |
if ( FINDKEY(b, &f, LOCK_WRITE) ) |
2156 |
return eRr(ERR_TRASH, "could not find key"); |
2157 |
if ( !(QMATCH & f.e[0]) ) /* key not found - ok */ |
2158 |
goto done; |
2159 |
LOG_DBG( LOG_DEBUG, "key found" ); |
2160 |
e = (QENTMASK & f.e[0]); |
2161 |
d = b->dct + e; |
2162 |
v = base+LEAFPOS(d)+d->kln; /* start of values */ |
2163 |
n = LEAFVLN(d); |
2164 |
if ( ! n ) |
2165 |
goto done; |
2166 |
i = valpos( v, n, dx->vsz, key->val.byt ); |
2167 |
LOG_DBG( LOG_DEBUG, "val %sfound pos %u of %u", |
2168 |
(QMATCH & i) ? "" : "not ", i & ~QMATCH, n ); |
2169 |
if ( !(QMATCH & i) ) /* val not found - ok */ |
2170 |
goto done; |
2171 |
i &= ~QMATCH; |
2172 |
LOG_DBG( LOG_DEBUG, |
2173 |
"del blk %d e %d i/n %d/%d", b->num, e, i, n ); |
2174 |
if ( 1 < n ) { |
2175 |
v += i*dx->vsz; /* end of stack area to move: start of this value */ |
2176 |
mv = dx->vsz; /* amount to move: one value size */ |
2177 |
LEAFVLNADD( d, -1 ); |
2178 |
} else { /* sole value -- rm whole entry */ |
2179 |
v = base+LEAFPOS(d); /* start of key */ |
2180 |
mv = d->kln+dx->vsz; |
2181 |
for ( i=b->ent - e - 1; i--; d++ ) /* move the i entries after d down */ |
2182 |
*d = d[1]; |
2183 |
d = b->dct + e; /* reset */ |
2184 |
b->ent--; |
2185 |
} |
2186 |
memmove( base+b->stk+mv, base+b->stk, v-(base+b->stk) ); |
2187 |
b->stk += mv; |
2188 |
for ( ; e++ < b->ent; d++ ) |
2189 |
LEAFPOSADD( d, mv ); |
2190 |
wrtleaf(b, dx); |
2191 |
done: |
2192 |
QLOCKUN(b->num<<1); |
2193 |
return ret; |
2194 |
} /* qDel */ |
2195 |
|
2196 |
|
2197 |
static int setKeyVal (QSet *qst, char *val, int len, int split) |
2198 |
{ |
2199 |
Qdx *qdx = qst->qdx; |
2200 |
char *e = val+len; |
2201 |
Key key; |
2202 |
int nope, done = 0; |
2203 |
|
2204 |
if (split && qst->pfx.len) |
2205 |
split = 1+qst->pfx.len; /* don't split on 1st pfx.len bytes */ |
2206 |
do { |
2207 |
char buf[512], *b; |
2208 |
int l; |
2209 |
|
2210 |
if (!split || qdx->cdx) |
2211 |
l = e-val; |
2212 |
else { |
2213 |
char *s; |
2214 |
while (val<e && (CT_WHITE|CT_SPECL)&lat1ct[(unsigned char)*val]) val++; |
2215 |
if (e == (s = val)) |
2216 |
break; |
2217 |
while (s<e && !((CT_WHITE|CT_SPECL)&lat1ct[(unsigned char)*s])) s++; |
2218 |
l = s-val; |
2219 |
} |
2220 |
b = val; |
2221 |
if (qst->pfx.len) { |
2222 |
memcpy(b = buf, qst->pfx.byt, qst->pfx.len); |
2223 |
if (l > (int)sizeof buf-qst->pfx.len) |
2224 |
l = sizeof buf-qst->pfx.len; |
2225 |
memcpy(buf+qst->pfx.len, val, l); |
2226 |
l += qst->pfx.len; |
2227 |
} |
2228 |
/* qMkKey(qst->qdx, &key, val, len): */ |
2229 |
if (!qdx->cdx) |
2230 |
memcpy(key.byt, b, key.len = l<qdx->ksz ? l : qdx->ksz); |
2231 |
else { |
2232 |
key.len = qdx->ksz; |
2233 |
l = cEnc(qdx->cdx, &key, (unsigned char*)b, l, split); |
2234 |
if ( split && (!key.len /* wordless */ |
2235 |
|| l <= qst->pfx.len) /* houston, we have a problem */ |
2236 |
) |
2237 |
break; |
2238 |
} |
2239 |
qMkVal(&key.val, &qst->ptr, qdx->ptr); |
2240 |
qst->ptr.pos++; |
2241 |
eRr(LOG_VERBOSE, "%s '%.*s'->'%b' %u %u %u %u", |
2242 |
qst->del ? "del" : "set", l, b, key.len, key.byt, |
2243 |
qst->ptr.rid, qst->ptr.tag, qst->ptr.pos>>16, qst->ptr.pos&0xffff); |
2244 |
nope = qst->del ? qDel(qdx, &key) : qSet(qdx, &key); |
2245 |
if (!split) |
2246 |
return nope; |
2247 |
val += l - qst->pfx.len; |
2248 |
if (!nope) |
2249 |
done++; |
2250 |
} while (val < e); |
2251 |
return done; |
2252 |
} /* qSetKeyVal */ |
2253 |
|
2254 |
int qSetKeyVal (QSet *qst, char *val, int len) |
2255 |
{ |
2256 |
return setKeyVal(qst, val, len, 0); |
2257 |
} |
2258 |
|
2259 |
int qSetKeyVals (QSet *qst, char *val, int len) |
2260 |
{ |
2261 |
return setKeyVal(qst, val, len, 1); |
2262 |
} /* qSetKeyVals */ |
2263 |
|
2264 |
|
2265 |
|
2266 |
int qLoop ( QLoop *self ) |
2267 |
{ |
2268 |
Leaf leaf; |
2269 |
Qdx *dx = self->qdx; |
2270 |
int ret = 0; |
2271 |
unsigned i = 0; |
2272 |
int stop = QSTOP & self->flg; |
2273 |
int skip = QSKIP & self->flg; |
2274 |
Block *b = &leaf.b; |
2275 |
Key *cmp = &self->key; |
2276 |
|
2277 |
if ( ! cmp->len ) { /* start at 1st leaf */ |
2278 |
if ( b != GETLEAF(b, dx, 0, LOCK_READ) ) |
2279 |
DONE((ERR_TRASH, "could not read leaf 0")); |
2280 |
self->cur.len = 1; /* avoid QSAME match */ |
2281 |
} else { /* locate key */ |
2282 |
FindKey f; |
2283 |
f.dx = dx; |
2284 |
f.key = cmp; |
2285 |
if ( FINDKEY(b, &f, LOCK_READ) ) |
2286 |
DONE((ERR_TRASH, "could not find key")); |
2287 |
i = (unsigned)(QENTMASK & f.e[0]); |
2288 |
if ( !(QMATCH & f.e[0]) && QEQ == stop ) |
2289 |
goto done; |
2290 |
self->cur.len = 0; /* avoid QSAME match */ |
2291 |
} |
2292 |
if ( QUPTO <= stop ) |
2293 |
cmp = &self->to; |
2294 |
for ( ;; i=0 ) { /* blocks */ |
2295 |
unsigned char *base = (unsigned char*)b; |
2296 |
Dict *d; |
2297 |
if ( ! b || b->lev ) |
2298 |
return ERR_TRASH; |
2299 |
d = b->dct + i; |
2300 |
for ( ; i<b->ent; i++, d++ ) { |
2301 |
unsigned short pos = LEAFPOS( d ); |
2302 |
unsigned short vln = LEAFVLN( d ); |
2303 |
unsigned short kln = d->kln; |
2304 |
unsigned char *v = base+pos+kln; |
2305 |
Ptr p; |
2306 |
|
2307 |
if ( skip ) { |
2308 |
if ( kln == self->key.len |
2309 |
&& !memcmp(self->key.byt, base+pos, kln) |
2310 |
) |
2311 |
continue; |
2312 |
skip = 0; |
2313 |
} |
2314 |
if ( stop ) { |
2315 |
int diff = memcmp( cmp->byt, base+pos, |
2316 |
kln <= cmp->len ? kln : cmp->len ); |
2317 |
switch ( stop ) { |
2318 |
case QPF: |
2319 |
if ( diff || kln < cmp->len ) |
2320 |
goto moan; |
2321 |
break; |
2322 |
case QEQ: |
2323 |
if ( diff || kln != cmp->len ) |
2324 |
goto done; |
2325 |
break; |
2326 |
case QUPTO: |
2327 |
if ( 0 >= diff ) |
2328 |
goto done; |
2329 |
break; |
2330 |
case QINCL: |
2331 |
if ( 0 > diff || (!diff && kln > cmp->len) ) |
2332 |
goto done; |
2333 |
break; |
2334 |
default: /* unused */ |
2335 |
moan: |
2336 |
eRr(LOG_INFO, |
2337 |
"loop ends on key '%b'(%d) %d %d", |
2338 |
kln, base+pos, kln, diff, cmp->len ); |
2339 |
goto done; |
2340 |
} |
2341 |
} /* if stop */ |
2342 |
|
2343 |
if ( !i && kln == self->cur.len && !memcmp(self->cur.byt, base+pos, kln) ) |
2344 |
self->flg |= QSAME; |
2345 |
else { |
2346 |
self->flg &= ~QSAME; |
2347 |
memcpy( self->cur.byt, base+pos, self->cur.len = kln ); |
2348 |
} |
2349 |
if ( self->qcb ) { |
2350 |
self->vals = v; |
2351 |
self->nvals = vln; |
2352 |
if ( (ret = self->qcb(self)) ) |
2353 |
goto done; |
2354 |
} else { /* internal check */ |
2355 |
Leaf ll; |
2356 |
FindKey f; |
2357 |
f.dx = dx; |
2358 |
f.key = &self->cur; |
2359 |
memcpy( self->cur.val.byt, v, self->cur.val.len = dx->vsz ); |
2360 |
if ( FINDKEY(&ll.b, &f, LOCK_READ) ) |
2361 |
DONE((ERR_TRASH, "could not find key")); |
2362 |
if ( !(QMATCH & f.e[0]) |
2363 |
|| b->num != ll.b.num |
2364 |
|| i != (unsigned)(QENTMASK & f.e[0]) |
2365 |
) { |
2366 |
eRr(ERR_TRASH, |
2367 |
"OOPS! want %u.%u got %u.%u (match %4x)", |
2368 |
b->num, i, ll.b.num, QENTMASK & f.e[0], f.e[0] ); |
2369 |
qRdVal( &p, v, dx->vsz ); |
2370 |
eRr(ERR_TRASH, |
2371 |
"\twhen looking for '%b'/%u.%u.%u", |
2372 |
self->cur.len, self->cur.byt, p.rid, p.tag, p.pos ); |
2373 |
if ( ll.b.ent <= (QENTMASK & f.e[0]) ) |
2374 |
eRr(ERR_TRASH, "\tI found nothing" ); |
2375 |
else { |
2376 |
Dict *dd = ll.b.dct + (QENTMASK & f.e[0]); |
2377 |
unsigned char *bb = ((unsigned char*)&ll.b)+LEAFPOS(dd); |
2378 |
qRdVal( &p, bb+dd->kln, dx->vsz ); |
2379 |
eRr(ERR_TRASH, |
2380 |
"\tI found '%b'/%u.%u.%u", |
2381 |
dd->kln, bb, p.rid, p.tag, p.pos ); |
2382 |
} |
2383 |
} else |
2384 |
eRr(LOG_TRACE, "key '%b' ok", self->cur.len, self->cur.byt ); |
2385 |
} /* internal check */ |
2386 |
} |
2387 |
if ( ! b->nxt || !GETLEAF(b, dx, b->nxt, LOCK_READ) ) |
2388 |
break; |
2389 |
/* TODO: |
2390 |
b now is the block that was our sibling when reading the old b. |
2391 |
As long as we don't shift entries, |
2392 |
there is no need to advance to next key that is actually greater !? |
2393 |
*/ |
2394 |
} |
2395 |
done: |
2396 |
if ( ! self->qcb ) |
2397 |
eRr(LOG_INFO, |
2398 |
"checked %d keys in %u blks of %dK depth %u cmp/fget/lget %u/%u/%u", |
2399 |
stat.key, dx->lln, dx->lsz>>10, dx->dpt, stat.cmp, stat.fget, stat.lget ); |
2400 |
return ret; |
2401 |
} /* qLoop */ |
2402 |
|
2403 |
|
2404 |
|
2405 |
int qInit ( Qdx *dx ) |
2406 |
{ |
2407 |
Bloke blk; |
2408 |
int size; |
2409 |
|
2410 |
/* defaults */ |
2411 |
if ( !dx->ksz ) dx->ksz = 255; |
2412 |
|
2413 |
/* check leaves file */ |
2414 |
size = fSize( dx->mqd ); |
2415 |
if ( 0x3ff & size ) /* not on 1K boundary */ |
2416 |
return eRr(LOG_ERROR, "mqd has bad size %dK + %d", |
2417 |
size >> 10, (int)size & 0x3ff ); |
2418 |
if ( size ) { /* read 1st leaf header */ |
2419 |
Block b; |
2420 |
int got = fPread( &dx->mqd, (unsigned char*)&b, sizeof(b), 0 ); |
2421 |
if ( sizeof(b) != (unsigned)got ) |
2422 |
return eRr(LOG_SYSERR, "could not get leaf head" ); |
2423 |
if ( b.num ) |
2424 |
return eRr(ERR_TRASH, "leaf has num %d", b.num ); |
2425 |
if ( dx->typ && dx->typ != b.typ ) |
2426 |
eRr(LOG_WARN, "typ want 0x02%2x got 0x02%2x", dx->typ, b.typ ); |
2427 |
if ( QDX_COMPRS & dx->typ ) |
2428 |
eRr(LOG_WARN, "sorry this version does not support compressed keys"); |
2429 |
dx->typ = b.typ; |
2430 |
dx->ksz = b.ksz; |
2431 |
dx->ptr = b.ptr; |
2432 |
} |
2433 |
if ( QDX_LEAFPV & dx->typ ) |
2434 |
dx->vsz = dx->ptr; |
2435 |
else |
2436 |
dx->vsz = (3&(dx->ptr>>6)) + 3+(3&(dx->ptr>>4)) + (7&dx->ptr); |
2437 |
if (!size && dx->ksz > 255 - dx->vsz) |
2438 |
dx->ksz = 255 - dx->vsz; |
2439 |
eRr(LOG_VERBOSE, "dx ptr %2x vsz %d=%d+%d+%d", dx->ptr, dx->vsz, |
2440 |
(3&(dx->ptr>>6)), 3+(3&(dx->ptr>>4)), (7&dx->ptr)); |
2441 |
dx->lsz = 1<<QDX_LEAFSH<<(QDX_SIZMSK&dx->typ); |
2442 |
if ( size % dx->lsz ) |
2443 |
return eRr(ERR_TRASH, "size 0x%08x %d blksize", size, dx->lsz ); |
2444 |
dx->lln = size / dx->lsz; |
2445 |
if ( ! dx->lln |
2446 |
&& (!(DX_WRITE & dx->flg) || (newleaf(&blk.b, dx), wrtleaf(&blk.b, dx))) |
2447 |
) |
2448 |
return eRr(LOG_SYSERR, "could not init new leaf file" ); |
2449 |
|
2450 |
#ifdef CPU_BIG_ENDIAN |
2451 |
# define QDX_FORK (QDX_FORKBE | (env.psh-QDX_FORKSH)) |
2452 |
#else |
2453 |
# define QDX_FORK (QDX_FORKLE | (env.psh-QDX_FORKSH)) |
2454 |
#endif |
2455 |
size = fSize(dx->mqx.fil); |
2456 |
if ( size ) { |
2457 |
if ( size % env.psz ) /* not on env.psz boundary */ |
2458 |
return eRr(LOG_ERROR, "mqx has bad size %dK + %d", |
2459 |
size >> 10, (int)size & 0x3ff ); |
2460 |
dx->fln = size >> env.psh; |
2461 |
if ( (int)dx->fln != fMap(&dx->mqx, dx->fln) ) |
2462 |
return eRr(LOG_ERROR, "could not map mqx"); |
2463 |
dx->ftp = ((Block*)dx->mqx.map)->typ; |
2464 |
if ( dx->ftp != QDX_FORK ) /* bad pagesize, endianess or flags */ |
2465 |
return eRr(LOG_ERROR, "root has type %2x != %2x", |
2466 |
dx->ftp, QDX_FORK ); |
2467 |
dx->dpt = ((Block*)dx->mqx.map)->lev; |
2468 |
} |
2469 |
dx->fln = size / env.psz; |
2470 |
if ( ! dx->fln ) { |
2471 |
dx->ftp = QDX_FORK; |
2472 |
eRr(LOG_INFO, "creating index type 0x%2x/0x%2x lsz %u psz %u ksz %u vsz %u", |
2473 |
dx->typ, dx->ftp, dx->lsz, env.psz, dx->ksz, dx->vsz ); |
2474 |
if ( 1 < dx->lln ) { |
2475 |
if (ENV_EXCL!=env.wri || !(DX_WRITE & dx->flg)) |
2476 |
return eRr(ERR_TRASH, "need excl write access to rebuild tree"); |
2477 |
if ( qRemake(dx) ) |
2478 |
return eRr(LOG_ERROR, "could not rebuild tree"); |
2479 |
} else { |
2480 |
/* init: link root to leaf */ |
2481 |
Block *b = newfork(&blk.b, dx, 1); |
2482 |
SETINT( ((unsigned char*)b)+(b->stk -= 4), 0 ); |
2483 |
b->dct[0].pos = b->stk; /* 0, 0 */ |
2484 |
b->ent = 1; |
2485 |
if ( wrtfork(b, dx) ) |
2486 |
return eRr(LOG_SYSERR, "\twhen writing root" ); |
2487 |
fMap(&dx->mqx, 1); |
2488 |
dx->dpt = 1; |
2489 |
} |
2490 |
} |
2491 |
eRr(LOG_INFO, "psz %d mapped %d", env.psz, dx->mqx.npg ); |
2492 |
|
2493 |
return 0; |
2494 |
} /* qInit */ |
2495 |
|
2496 |
|
2497 |
void qFini ( Qdx *dx ) |
2498 |
{ |
2499 |
fMap(&dx->mqx, 0); |
2500 |
} /* qFini */ |