1 |
/* |
2 |
The Malete project - the Z39.2/Z39.50 database framework of OpenIsis. |
3 |
Version 0.9.x (patchlevel see file Version) |
4 |
Copyright (C) 2001-2004 by Erik Grziwotz, erik@openisis.org |
5 |
|
6 |
This library is free software; you can redistribute it and/or |
7 |
modify it under the terms of the GNU Lesser General Public |
8 |
License as published by the Free Software Foundation; either |
9 |
version 2.1 of the License, or (at your option) any later version. |
10 |
|
11 |
This library is distributed in the hope that it will be useful, |
12 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
14 |
See the GNU Lesser General Public License for more details. |
15 |
|
16 |
You should have received a copy of the GNU Lesser General Public |
17 |
License along with this library; if not, write to the Free Software |
18 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
|
20 |
see README for more information |
21 |
EOH */ |
22 |
|
23 |
/* |
24 |
$Id: cds.c,v 1.3 2004/07/25 10:19:29 kripke Exp $ |
25 |
malete CDS/ISIS conversion |
26 |
*/ |
27 |
|
28 |
#include "../tool/tool.h" |
29 |
|
30 |
/* structs with 4-byte aligned ("UNIX") or packed (Intel "DOS"+WinIsis) ints */ |
31 |
#ifdef CPU_BIG_ENDIAN |
32 |
# define BUILD_CDSUNIX /* no packed version on SPARCs, M68K or PPCs */ |
33 |
#endif |
34 |
#ifdef BUILD_CDSUNIX |
35 |
# define PACKED |
36 |
#else |
37 |
# ifdef __GNUC__ |
38 |
# define PACKED __attribute__((packed)) |
39 |
# else |
40 |
# error "please define how to pack" |
41 |
# endif |
42 |
#endif |
43 |
|
44 |
enum { |
45 |
CDS_MST, CDS_XRF, /* master file, cross ref */ |
46 |
CDS_CNT, CDS_IFP, /* index control and record pointers */ |
47 |
/* do not change this order: see f2 */ |
48 |
CDS_N01, CDS_L01, /* 10 byte nodes and leaves */ |
49 |
CDS_N02, CDS_L02, /* 30 byte nodes and leaves */ |
50 |
CDS_FILES |
51 |
}; |
52 |
static const char *cds_ext[] = { |
53 |
"mst", "xrf", "cnt", "ifp", "n01", "l01", "n02", "l02" |
54 |
}; |
55 |
typedef file cds_files[CDS_FILES]; |
56 |
|
57 |
|
58 |
typedef struct { /* master file control header */ |
59 |
int ctlm; /* should be 0 */ |
60 |
int nmfn; /* next avail MFN */ |
61 |
int nmfb; /* next free block, 1st = 1 */ |
62 |
short nmfp; /* next free pos (in block) */ |
63 |
short type; /* 0 (1 is for "messages" mst) */ |
64 |
int rcnt; /* record count; nmfn-1 - #deleted */ |
65 |
int mfx1; /* unused */ |
66 |
int mfx2; /* unused; "LAN lock" */ |
67 |
int mfx3; /* unused; "LAN lock" */ |
68 |
} Mfc; |
69 |
|
70 |
|
71 |
typedef struct { /* field dictionary in master file record */ |
72 |
unsigned short tag; |
73 |
unsigned short pos; |
74 |
unsigned short len; |
75 |
} Dct; |
76 |
|
77 |
typedef struct { /* master file record */ |
78 |
int mfn; |
79 |
short mfrl; |
80 |
int bwb PACKED; |
81 |
short bwp; |
82 |
short base; |
83 |
short nvf; |
84 |
short stat; |
85 |
Dct dict[1]; |
86 |
} Mfr; |
87 |
/* it is common belief, that the first 4 fields |
88 |
(12 bytes packed, 14 aligned) are to be in one 512-byte block |
89 |
*/ |
90 |
enum { |
91 |
MFR_BASEOFF = ((char*)&((Mfr*)0)->base) - (char*)0, |
92 |
MFR_DICTOFF = ((char*)&((Mfr*)0)->dict) - (char*)0 |
93 |
}; |
94 |
|
95 |
typedef struct { /* cross ref block */ |
96 |
int xpos; |
97 |
int xrec[127]; |
98 |
} Xrf; |
99 |
|
100 |
typedef struct { /* index control */ |
101 |
short type; /* 1 or 2 of N0x */ |
102 |
short ordn; /* 5 */ |
103 |
short ordf; /* 5 */ |
104 |
short n; /* 15 */ |
105 |
short k; /* 5 */ |
106 |
short lev; /* depth: N0x levels under root (root only: 0) */ |
107 |
int posr; /* root pos */ |
108 |
int nmax; /* N0x blocks */ |
109 |
int fmax; /* L0x blocks */ |
110 |
short abno; /* 0 (!) if "abnormal" (root only) */ |
111 |
} Cnt; |
112 |
#ifdef BUILD_CDSUNIX /* while Cnt layout is independent of alignment ... */ |
113 |
# define CNT_SIZE 28 /* ... sizeof(Cnt) is not */ |
114 |
#else |
115 |
# define CNT_SIZE 26 |
116 |
#endif |
117 |
|
118 |
typedef struct { /* "posting" pointer to record */ |
119 |
char b[8]; /* 3+2+1+2 byte rid+tag+occ+pos big endian */ |
120 |
} Pst; |
121 |
|
122 |
typedef struct { /* inverted file postings header */ |
123 |
int nxtb; /* continuation */ |
124 |
int nxtp; |
125 |
int totp; /* total postings */ |
126 |
int segp; /* actually in this segment <= totp */ |
127 |
int segc; /* segment capacity >= segc */ |
128 |
/* Pst pst[...]; possibly spanning several Ifb blocks */ |
129 |
} Ifp; |
130 |
|
131 |
typedef struct { /* inverted file block */ |
132 |
int ifpblk; |
133 |
int ifprec[127]; /* ... */ |
134 |
} Ifb; |
135 |
|
136 |
#define NSTRUCT(n) struct { \ |
137 |
int pos; \ |
138 |
short ock; \ |
139 |
short it; \ |
140 |
struct { \ |
141 |
char key[n]; \ |
142 |
int ref PACKED; \ |
143 |
} idx[10]; \ |
144 |
} |
145 |
typedef NSTRUCT(10) N01; |
146 |
typedef NSTRUCT(30) N02; |
147 |
|
148 |
#define LSTRUCT(n) struct { \ |
149 |
int pos; \ |
150 |
short ock; \ |
151 |
short it; \ |
152 |
int ps; \ |
153 |
struct { \ |
154 |
char key[n]; \ |
155 |
int infb PACKED; \ |
156 |
int infp PACKED; \ |
157 |
} idx[10]; \ |
158 |
} |
159 |
typedef LSTRUCT(10) L01; |
160 |
typedef LSTRUCT(30) L02; |
161 |
|
162 |
static const unsigned char cp850[] = { /* Latin1 values of Cp850 upper half */ |
163 |
199,252,233,226,228,224,229,231,234,235,232,239,238,236,196,197, |
164 |
201,230,198,244,246,242,251,249,255,214,220,248,163,216,215,'?', |
165 |
225,237,243,250,241,209,170,186,191,174,172,189,188,161,171,187, |
166 |
'?','?','?','?','?',193,194,192,169,'?','?','?','?',162,165,'?', |
167 |
'?','?','?','?','?','?',227,195,'?','?','?','?','?','?','?',164, |
168 |
240,208,202,203,200,'?',205,206,207,'?','?','?','?',166,204,'?', |
169 |
211,223,212,210,245,213,181,254,222,218,219,217,253,221,175,180, |
170 |
173,177,'?',190,182,167,247,184,176,168,183,185,179,178,'?',160 |
171 |
}; |
172 |
|
173 |
typedef struct { |
174 |
Fld base; /* points into env->opt */ |
175 |
int notab; /* do not convert ^ <-> tabs */ |
176 |
int novt; /* do not convert newline <-> vtabs */ |
177 |
int no850; /* do not convert Cp850 <-> ISO Latin1 */ |
178 |
int write; /* write cds */ |
179 |
} CdsOpt; |
180 |
|
181 |
|
182 |
static int cdsOpt (CdsOpt *opt, Fld *args, Db *db) |
183 |
{ |
184 |
Fld o; |
185 |
memset(opt, 0, sizeof(*opt)); |
186 |
for ( o.val = 0; vGet(&o, args, "BN"); ) |
187 |
switch (o.tag) { |
188 |
case 'B': |
189 |
opt->base = o; |
190 |
continue; |
191 |
case 'N': |
192 |
if (VEQZ(&o, "otab")) |
193 |
opt->notab = 1; |
194 |
else if (VEQZ(&o, "ovt")) |
195 |
opt->novt = 1; |
196 |
else if (VEQZ(&o, "o850")) |
197 |
opt->no850 = 1; |
198 |
else |
199 |
return eRr(ERR_INVAL, "bad cds opt '-N%.*s'", o.len, o.val); |
200 |
} |
201 |
if ( !opt->base.len ) |
202 |
opt->base.len = strlen(opt->base.val = db->pat); |
203 |
return 0; |
204 |
} /* cdsOpt */ |
205 |
|
206 |
|
207 |
int cdsuse () |
208 |
{ |
209 |
eRr(ERR_INVAL, "[cdsimp|cdsexp]:\n" |
210 |
"-Bbase: path/basename of cds db (default = malete db)\n" |
211 |
"-Notab: do not convert ^ <-> tab\n" |
212 |
"-Novt: do not convert newline <-> vtab\n" |
213 |
"-No850: do not convert Cp850 <-> ISO Latin1" |
214 |
); |
215 |
return ERR_INVAL; |
216 |
} /* cdsuse */ |
217 |
|
218 |
|
219 |
static int cdsOpen (cds_files cds, CdsOpt *opt) |
220 |
{ |
221 |
int filmode = opt->write ? FIL_RDWR|FIL_CREAT|FIL_TRUNC : FIL_RD; |
222 |
int l = opt->base.len, i; |
223 |
char *path = mAlloc(l+5); |
224 |
memcpy(path, opt->base.val, l); |
225 |
path[l++] = '.'; |
226 |
for (i=0; i<CDS_FILES; i++) |
227 |
cds[i] = FIL_NONE; |
228 |
for (i=0; i<CDS_FILES; i++) { |
229 |
memcpy(path+l, cds_ext[i], 4); |
230 |
if ( fOpen( cds+i, path, filmode ) ) |
231 |
return eRr(LOG_ERROR, "no access to CDS file '%s'", path); |
232 |
} |
233 |
return 0; |
234 |
} /* cdsOpen */ |
235 |
|
236 |
|
237 |
static void cdsClose (cds_files cds) |
238 |
{ |
239 |
int i; |
240 |
for (i=0; i<CDS_FILES; i++) |
241 |
fClose(cds+i); |
242 |
} /* cdsClose */ |
243 |
|
244 |
|
245 |
int cdsimp (Db *db, Fld *args) |
246 |
{ |
247 |
CdsOpt opt; |
248 |
cds_files cds; |
249 |
int ret = 0, m, i; |
250 |
Mfc mfc; |
251 |
List rec; |
252 |
char recode[256]; |
253 |
L01 l01; |
254 |
L02 l02; |
255 |
Ifb ifbs[2]; |
256 |
int n1, n2; |
257 |
Key key; |
258 |
|
259 |
if ( cdsOpt(&opt, args, db) ) |
260 |
return cdsuse(); |
261 |
if ( cdsOpen(cds, &opt) ) |
262 |
return eRr(ERR_INVAL, "bad CDS base"); |
263 |
#define CLEANUP( args ) do { ret = eRr args; goto cleanup; } while (0) |
264 |
if ( sizeof(mfc) != fPread(cds+CDS_MST, &mfc, sizeof(mfc), 0) ) |
265 |
CLEANUP((ERR_TRASH, "no mfc")); |
266 |
eRr(LOG_TRACE, |
267 |
"MFC ctlm %d nmfn %d nmfb %d nmfp %d type %d" |
268 |
" rcnt %d x1 %d x2 %d x3 %d", |
269 |
mfc.ctlm, mfc.nmfn, mfc.nmfb, mfc.nmfp, mfc.type, |
270 |
mfc.rcnt, mfc.mfx1, mfc.mfx2, mfc.mfx3); |
271 |
eRr(LOG_INFO, "importing %d recs", mfc.nmfn-1); |
272 |
lInit(&rec, ""); |
273 |
/* prepare recode table */ |
274 |
for (i=0; i<128; i++) recode[i] = (char)i; |
275 |
if ( !opt.notab ) recode['^'] = TAB; |
276 |
if ( !opt.novt ) recode[LF] = VT; |
277 |
if ( opt.no850 ) |
278 |
for (i=128; i<256; i++) recode[i] = (char)i; |
279 |
else |
280 |
for (i=128; i<256; i++) recode[i] = (char)cp850[i-128]; |
281 |
/* loop m := ceil(numrecs/127) xrf blocks */ |
282 |
m = (mfc.nmfn+125)/127; |
283 |
for (i=1; i<=m; i++) { |
284 |
Xrf xrf; |
285 |
int f = 1+127*(i-1), t = f+126, mfn; |
286 |
int got = fRead(cds+CDS_XRF, &xrf, sizeof(xrf)); |
287 |
if ( (int)sizeof(xrf) != got ) |
288 |
CLEANUP((ERR_TRASH, "reading xrf block %d", i)); |
289 |
if ( xrf.xpos != (i==m ? -i : i) ) |
290 |
CLEANUP((ERR_TRASH, "bad pos %d in xrf block %d", xrf.xpos, i)); |
291 |
if ( t >= mfc.nmfn ) |
292 |
t = mfc.nmfn-1; |
293 |
for (mfn=f; mfn<=t; mfn++) { |
294 |
int pos = xrf.xrec[mfn-f]; |
295 |
int mfb = (pos >> 11); |
296 |
union { |
297 |
Mfr mfr; |
298 |
char buf[0x7fff]; |
299 |
} da; |
300 |
Dct *d = da.mfr.dict; |
301 |
char *b, *e; |
302 |
|
303 |
if ( 0 > mfb ) { |
304 |
eRr(LOG_TRACE, "rec %d was deleted", mfn); |
305 |
continue; |
306 |
} |
307 |
/* ignore bits 9, 10 signaling pending index update */ |
308 |
pos &= 0x1ff; |
309 |
pos |= (mfb-1)<<9; |
310 |
got = fPread(cds+CDS_MST, da.buf, sizeof(Mfr), pos); |
311 |
if ( got < MFR_DICTOFF ) |
312 |
CLEANUP((ERR_TRASH, "rec %d too short %d", mfn, got)); |
313 |
if ( da.mfr.mfrl < 0 ) { |
314 |
eRr(LOG_WARN, "rec %d pos %d locked", mfn, pos); |
315 |
da.mfr.mfrl = -da.mfr.mfrl; |
316 |
} |
317 |
if ( da.mfr.mfrl < (int)(MFR_DICTOFF+da.mfr.nvf*sizeof(Dct)) ) |
318 |
CLEANUP((ERR_TRASH, "rec %d pos %d bad mfrl %d nvf %d", |
319 |
mfn, pos, da.mfr.mfrl, da.mfr.nvf)); |
320 |
got = fPread(cds+CDS_MST, da.buf, da.mfr.mfrl, pos); |
321 |
if ( da.mfr.mfrl != got ) { |
322 |
if ( got == -da.mfr.mfrl ) |
323 |
da.mfr.mfrl = -da.mfr.mfrl; |
324 |
else |
325 |
CLEANUP((ERR_TRASH, "rec %d pos %d got %d bytes want %d", |
326 |
mfn, pos, got, da.mfr.mfrl)); |
327 |
} |
328 |
b = da.buf+da.mfr.base; |
329 |
e = da.buf+da.mfr.mfrl; |
330 |
lReset(&rec); |
331 |
for ( ; da.mfr.nvf--; d++ ) { |
332 |
char *p = b+d->pos, *q = p+d->len; |
333 |
if ( p < b || q > e ) |
334 |
CLEANUP((ERR_TRASH, "bad pos %d len %d field %d", |
335 |
d->pos, d->len, d-da.mfr.dict)); |
336 |
for ( ; p<q; p++ ) *p = recode[(unsigned char)*p]; |
337 |
LADD(&rec, d->tag, b+d->pos, d->len); |
338 |
} |
339 |
dWrite(db, rec.fld, mfn); |
340 |
} |
341 |
} |
342 |
/* mst done */ |
343 |
/* we assume first leaf has pos 1; maybe we should use N0x to search */ |
344 |
l01.ps = l02.ps = 1; |
345 |
l01.ock = l02.ock = n1 = n2 = 0; |
346 |
ifbs[0].ifpblk = ifbs[1].ifpblk = 0; |
347 |
for (;;) { /* loop leaves */ |
348 |
Fld use; |
349 |
int infb, infp, got, more = 0; |
350 |
Ifp ifp; |
351 |
Ifb *ifb; |
352 |
|
353 |
if ( n1 == l01.ock && (l01.pos = l01.ps) ) { |
354 |
if ((int)sizeof(l01) != (got |
355 |
= fPread(cds+CDS_L01,&l01,sizeof(l01),(l01.ps-1)*sizeof(l01))) |
356 |
) |
357 |
CLEANUP((ERR_TRASH, "l01 %d got %d", l01.ps, got)); |
358 |
eRr(LOG_TRACE, "l01 pos %d ock %d nxt %d", l01.pos, l01.ock, l01.ps); |
359 |
n1 = 0; |
360 |
if ( !l01.ock ) |
361 |
continue; |
362 |
} |
363 |
if ( n2 == l02.ock && (l02.pos = l02.ps) ) { |
364 |
if ((int)sizeof(l02) != (got |
365 |
= fPread(cds+CDS_L02,&l02,sizeof(l02),(l02.ps-1)*sizeof(l02))) |
366 |
) |
367 |
CLEANUP((ERR_TRASH, "l02 %d got %d", l02.ps, got)); |
368 |
eRr(LOG_TRACE, "l02 pos %d ock %d nxt %d", l02.pos, l02.ock, l02.ps); |
369 |
n2 = 0; |
370 |
if ( !l02.ock ) |
371 |
continue; |
372 |
} |
373 |
if ( !l01.pos && !l02.pos ) |
374 |
break; |
375 |
if ( n1 == l01.ock ) |
376 |
use.tag = 1; |
377 |
else if ( n2 == l02.ock ) |
378 |
use.tag = 0; |
379 |
else if ( 0 > memcmp(l02.idx[n2].key, l01.idx[n1].key, 10) ) |
380 |
use.tag = 1; |
381 |
else |
382 |
use.tag = 0; |
383 |
if ( use.tag ) { |
384 |
infb = l02.idx[n2].infb; |
385 |
infp = l02.idx[n2].infp; |
386 |
use.val = l02.idx[n2++].key; |
387 |
use.len = 30; |
388 |
} else { |
389 |
infb = l01.idx[n1].infb; |
390 |
infp = l01.idx[n1].infp; |
391 |
use.val = l01.idx[n1++].key; |
392 |
use.len = 10; |
393 |
} |
394 |
ifb = ifbs+use.tag; |
395 |
/* kill trailing blanks */ |
396 |
while ( use.len && ' ' == use.val[use.len-1] ) use.len--; |
397 |
eRr(LOG_TRACE, "'%.*s'", use.len, use.val); |
398 |
memcpy(key.byt, use.val, key.len = use.len); |
399 |
key.val.len = db->qdx.vsz; |
400 |
while (infb) { |
401 |
Pst *pst; |
402 |
int n; |
403 |
if ( ifb->ifpblk != infb |
404 |
&& (512 != fPread(cds+CDS_IFP,ifb,sizeof(Ifb),(infb-1)*512) |
405 |
|| ifb->ifpblk != infb) |
406 |
) |
407 |
CLEANUP((ERR_TRASH, "ifp %d",infb)); |
408 |
if ( more ) { |
409 |
pst = (Pst*)ifb->ifprec; |
410 |
if ( (n = 127/2) > more ) |
411 |
n = more; |
412 |
more -= n; |
413 |
eRr(LOG_TRACE, "+%d @%d", n, infb); |
414 |
} else { |
415 |
if ( infp > 127-5 ) |
416 |
CLEANUP((ERR_TRASH, "bad infp %d for infb %d", infp, infb)); |
417 |
ifp = *(Ifp*)(ifb->ifprec + infp); |
418 |
pst = (Pst*)(ifb->ifprec + infp + 5); |
419 |
if ( (n = (127-5-infp)/2) > ifp.segp ) |
420 |
n = ifp.segp; |
421 |
more = ifp.segp - n; |
422 |
eRr(LOG_TRACE, "ifp @%d,%d: %d %d %d %d %d", infb, infp, |
423 |
ifp.nxtb, ifp.nxtp, ifp.totp, ifp.segp, ifp.segc); |
424 |
} |
425 |
if ( more ) |
426 |
infb++; |
427 |
else { |
428 |
infb = ifp.nxtb; |
429 |
infp = ifp.nxtp; |
430 |
} |
431 |
for ( ; n--; pst++ ) { |
432 |
if ( QDX_ISIS == db->qdx.ptr ) |
433 |
memcpy(key.val.byt, pst->b, 8); |
434 |
else { |
435 |
Ptr ptr; |
436 |
ptr.ext = 0; |
437 |
ptr.rid = pst->b[0]<<16 | pst->b[1]<<8 | pst->b[2]; |
438 |
ptr.tag = pst->b[3]<<8 | pst->b[4]; |
439 |
ptr.pos = pst->b[5]<<16 | pst->b[6]<<8 | pst->b[7]; |
440 |
qMkVal(&key.val, &ptr, db->qdx.ptr); |
441 |
} |
442 |
qLoad(&db->qdx, &key); |
443 |
} |
444 |
} /* while infb */ |
445 |
} |
446 |
key.val.len = 0; |
447 |
qLoad(&db->qdx, &key); |
448 |
cleanup: |
449 |
#undef CLEANUP |
450 |
cdsClose(cds); |
451 |
return ret; |
452 |
} /* cdsimp */ |
453 |
|
454 |
|
455 |
/* export index */ |
456 |
typedef struct { |
457 |
QLoop qlp; |
458 |
file *cds; |
459 |
int totp; /* in current segment */ |
460 |
int infb; /* of current ifp */ |
461 |
int infp; /* of current ifp */ |
462 |
Ifb ifb; /* current block; flushed on demand */ |
463 |
int rec; /* next in ifb.ifprec 0..126 */ |
464 |
L01 l01; /* new keys are immediatly inserted here ... */ |
465 |
L02 l02; /* ... structs flushed on demand */ |
466 |
N01 n01; |
467 |
N02 n02; |
468 |
} CdsLoop; |
469 |
|
470 |
|
471 |
static void closeifp (CdsLoop *self) /* write Ifp header */ |
472 |
{ |
473 |
Ifp buf, *ifp; |
474 |
if ( self->infb != self->ifb.ifpblk ) /* Ifp not in current blk */ |
475 |
ifp = &buf; |
476 |
else |
477 |
ifp = (Ifp*)(self->ifb.ifprec+self->infp); |
478 |
ifp->nxtb = ifp->nxtp = 0; /* used only for partial updates */ |
479 |
ifp->totp = ifp->segp = ifp->segc = self->totp; /* diff only after split */ |
480 |
if ( self->infb != self->ifb.ifpblk ) |
481 |
fPwrite(self->cds+CDS_IFP, ifp, sizeof(Ifp), |
482 |
((self->infb-1)<<9) + 4*(self->infp+1)); |
483 |
self->totp = 0; |
484 |
/* |
485 |
hmmmm ... From the manual: |
486 |
Cuando se carga secuencialmente el archivo |
487 |
invertido (por ejemplo despues de una generacion completa del archivo |
488 |
invertido con ISISINV), cada lista esta formada por uno o mas segmentos |
489 |
adyacentes. Si IFPTOT <= 32768, entonces: IFPNXTB/IFPNXTP = 0/0 y IFPTOT = |
490 |
IFPSEGP = IFPSEGC. |
491 |
This seems to imply that numbers really should be used as signed shorts!? |
492 |
*/ |
493 |
} /* closeifp */ |
494 |
|
495 |
static void closeifb (CdsLoop *self) |
496 |
{ |
497 |
eRr(LOG_TRACE, "w ifb %d", self->ifb.ifpblk); |
498 |
fPwrite(self->cds+CDS_IFP,&self->ifb,sizeof(Ifb),(self->ifb.ifpblk-1)<<9); |
499 |
self->ifb.ifpblk++; |
500 |
self->rec = 0; |
501 |
} /* closeifb */ |
502 |
|
503 |
|
504 |
static int cdsCb (CdsLoop *self) |
505 |
{ |
506 |
Pst *pst; |
507 |
unsigned i, vsz = self->qlp.qdx->vsz; |
508 |
const unsigned char *v = self->qlp.vals; |
509 |
|
510 |
if ( !(QSAME & self->qlp.flg) ) { /* add new key -- new segment */ |
511 |
L01 *l0x; /* might really be a L02 */ |
512 |
int l, n, oref; /* key length, offset of ref int(s) in idx */ |
513 |
int f2, sl, *ref; |
514 |
char *key; |
515 |
|
516 |
if ( self->totp ) |
517 |
closeifp(self); |
518 |
if ( self->rec > 127-5 ) |
519 |
closeifb(self); |
520 |
if ( (l = self->qlp.cur.len) <= 10 ) { /* l01 entry */ |
521 |
l0x = &self->l01; |
522 |
f2 = 0; |
523 |
n = 10; |
524 |
sl = sizeof(L01); |
525 |
} else { |
526 |
l0x = (L01*)&self->l02; |
527 |
f2 = 2; |
528 |
if ( l > (n = 30) ) |
529 |
l = 30; |
530 |
sl = sizeof(L02); |
531 |
} |
532 |
#ifdef BUILD_CDSUNIX |
533 |
oref = n+2; |
534 |
#else |
535 |
oref = n; |
536 |
#endif |
537 |
if ( 10 == l0x->ock ) { /* need new block */ |
538 |
l0x->ps = l0x->pos+1; |
539 |
eRr(LOG_TRACE, "w l0 n %d pos %d", n, l0x->pos); |
540 |
fPwrite(self->cds+CDS_L01+f2, l0x, sl, (l0x->pos-1)*sl); |
541 |
l0x->pos++; |
542 |
l0x->ock = 0; |
543 |
} |
544 |
key = l0x->idx[0].key + l0x->ock*(oref+8); /* sizeof idx */ |
545 |
memset(key, ' ', n); |
546 |
memcpy(key, self->qlp.cur.byt, l); |
547 |
ref = (int*)(key+oref); |
548 |
ref[0] = self->infb = self->ifb.ifpblk; |
549 |
ref[1] = self->infp = self->rec; |
550 |
self->rec += 5; /* reserve space for Ifp header */ |
551 |
if ( !l0x->ock++ ) { /* also write node entry */ |
552 |
N01 *n0x = 10==n ? &self->n01 : (N01*)&self->n02; |
553 |
int sn = 10==n ? sizeof(N01) : sizeof(N02); |
554 |
char *nkey; |
555 |
if ( 10 == n0x->ock ) { |
556 |
eRr(LOG_TRACE, "w n0 n %d pos %d", n, n0x->pos); |
557 |
fPwrite(self->cds+CDS_N01+f2, n0x, sn, (n0x->pos-1)*sn); |
558 |
n0x->pos++; |
559 |
n0x->ock = 0; |
560 |
} |
561 |
nkey = n0x->idx[0].key + n0x->ock++*(oref+4); |
562 |
if ( 2 == n0x->pos && 1 == n0x->ock ) |
563 |
memset(nkey, ' ', n); /* 1st key - start of level */ |
564 |
else |
565 |
memcpy(nkey, key, n); |
566 |
*(int*)(nkey+oref) = -l0x->pos; |
567 |
} |
568 |
} |
569 |
pst = (Pst*)(self->ifb.ifprec + self->rec); |
570 |
eRr(LOG_TRACE, "+ %d ptr @ %d.%d", self->qlp.nvals, |
571 |
self->ifb.ifpblk, self->rec); |
572 |
for ( i=self->qlp.nvals; i--; self->rec+=2, pst++, v += vsz) { |
573 |
if ( self->rec > 125 ) { |
574 |
closeifb(self); |
575 |
pst = (Pst*)self->ifb.ifprec; |
576 |
} |
577 |
if ( QDX_ISIS == self->qlp.qdx->ptr ) |
578 |
memcpy(pst->b, v, 8); |
579 |
else { |
580 |
Ptr ptr; |
581 |
qRdVal(&ptr, v, self->qlp.qdx->ptr); |
582 |
pst->b[0] = ptr.rid >> 16; |
583 |
pst->b[1] = ptr.rid >> 8; |
584 |
pst->b[2] = ptr.rid; |
585 |
pst->b[3] = ptr.tag >> 8; |
586 |
pst->b[4] = ptr.tag; |
587 |
pst->b[5] = ptr.pos >> 16; |
588 |
pst->b[6] = ptr.pos >> 8; |
589 |
pst->b[7] = ptr.pos; |
590 |
} |
591 |
} |
592 |
self->totp += self->qlp.nvals; |
593 |
return 0; |
594 |
} /* cdsCb */ |
595 |
|
596 |
|
597 |
int cdsexp (Db *db, Fld *args) |
598 |
{ |
599 |
CdsOpt opt; |
600 |
cds_files cds; |
601 |
int ret = 0, pos, i, j; |
602 |
Mfc mfc; |
603 |
List rec; |
604 |
char recode[256]; |
605 |
Xrf xrf; |
606 |
CdsLoop clp; |
607 |
Cnt cnt[2]; |
608 |
|
609 |
if ( cdsOpt(&opt, args, db) ) |
610 |
return cdsuse(); |
611 |
opt.write = 1; |
612 |
if ( cdsOpen(cds, &opt) ) |
613 |
return eRr(ERR_INVAL, "error creating CDS base"); |
614 |
eRr(LOG_INFO, "exporting %d recs", db->rdx.mid); |
615 |
lInit(&rec, ""); |
616 |
/* prepare recode table */ |
617 |
for (i=0; i<128; i++) recode[i] = (char)i; |
618 |
if ( !opt.notab ) recode[TAB] = '^'; |
619 |
if ( !opt.novt ) recode[VT] = LF; |
620 |
if ( opt.no850 ) |
621 |
for (i=128; i<256; i++) recode[i] = (char)i; |
622 |
else { |
623 |
for (i=128; i<256; i++) recode[i] = '?'; |
624 |
for (i=160; i<256; i++) /* reverse cp850 */ |
625 |
for (j=0; j<128; j++) if ( i == cp850[j] ) { recode[i] = j+128; break; } |
626 |
} |
627 |
xrf.xpos = 1; |
628 |
mfc.rcnt = 0; /* record count; nmfn-1 - #deleted */ |
629 |
pos = 64; /* as of folklore, 1st rec should start here */ |
630 |
for (i=1; i<=db->rdx.mid; i++) { /* loop records */ |
631 |
int xref; |
632 |
if ( 0 >= dRead(lClr(&rec), db, i) ) { /* no rec */ |
633 |
eRr(LOG_TRACE, "rec %d was deleted", i); |
634 |
xref = -1 << 11; |
635 |
} else { |
636 |
union { |
637 |
Mfr mfr; |
638 |
char buf[0x7fff]; |
639 |
} da; |
640 |
char *b, *p, *e = da.buf+sizeof(da.buf); |
641 |
Dct *d = da.mfr.dict, *de; |
642 |
Fld *f, *fe = LEND(&rec); |
643 |
unsigned avl = 0x7fff - MFR_DICTOFF; |
644 |
|
645 |
xref = pos & 511; /* pos in block */ |
646 |
/* the manual explicitly demands 0-498 and forbids 500 */ |
647 |
if ( xref > 498 /*512 - MFR_BASEOFF = 500 packed */ ) { |
648 |
pos += 512; /* advance to next block */ |
649 |
pos &= ~511; |
650 |
xref = 0; |
651 |
} |
652 |
xref |= ((pos >> 9)+1) << 11; |
653 |
da.mfr.mfn = i; |
654 |
da.mfr.bwb = 0; |
655 |
da.mfr.bwp = 0; |
656 |
da.mfr.base = 0; |
657 |
da.mfr.nvf = 0; |
658 |
da.mfr.stat = 0; |
659 |
for ( f=rec.fld; ++f<fe; ) { |
660 |
if ( avl < (int)sizeof(Dct) ) |
661 |
break; |
662 |
da.mfr.nvf++; |
663 |
avl -= sizeof(Dct); |
664 |
if ( avl >= f->len ) |
665 |
avl -= f->len; |
666 |
else |
667 |
avl = 0; |
668 |
} |
669 |
p = b = da.buf + (da.mfr.base = MFR_DICTOFF + da.mfr.nvf*sizeof(Dct)); |
670 |
de = d + da.mfr.nvf; |
671 |
for ( f=rec.fld; d<de; d++ ) { |
672 |
unsigned char *v = (unsigned char*)(++f)->val, *ve; |
673 |
d->tag = f->tag; |
674 |
d->pos = p-b; |
675 |
d->len = e-p < (int)f->len ? e-p : f->len; |
676 |
for ( ve = v+d->len; v < ve; ) *p++ = recode[*v++]; |
677 |
} |
678 |
da.mfr.mfrl = p - da.buf; |
679 |
eRr(LOG_VERBOSE, "write rec %d len %d pos %d", i, da.mfr.mfrl, pos); |
680 |
fPwrite(cds+CDS_MST, da.buf, da.mfr.mfrl, pos); |
681 |
if ( 1 & (pos += da.mfr.mfrl) ) /* round to even */ |
682 |
pos++; |
683 |
mfc.rcnt++; |
684 |
} |
685 |
xrf.xrec[j = (i-1)%127] = xref; |
686 |
if ( i == db->rdx.mid ) { /* last */ |
687 |
while ( ++j < 127 ) xrf.xrec[j] = 0; |
688 |
xrf.xpos = -xrf.xpos; |
689 |
} else if ( 126 != j ) |
690 |
continue; |
691 |
eRr(LOG_VERBOSE, "%d: write Xrf %d", i, xrf.xpos); |
692 |
fWrite(cds+CDS_XRF, &xrf, sizeof(Xrf)); |
693 |
xrf.xpos++; |
694 |
} |
695 |
mfc.ctlm = 0; /* should be 0 */ |
696 |
mfc.nmfn = i; /* next avail MFN */ |
697 |
mfc.nmfb = pos >> 9; /* next free block, 1st = 1 */ |
698 |
mfc.nmfp = pos & 511; /* next free pos (in block) */ |
699 |
mfc.type = 0; /* 0 (1 is for "messages" mst) */ |
700 |
mfc.mfx1 = 0; /* unused */ |
701 |
mfc.mfx2 = 0; /* unused; "LAN lock" */ |
702 |
mfc.mfx3 = 0; /* unused; "LAN lock" */ |
703 |
fPwrite(cds+CDS_MST, &mfc, sizeof(mfc), 0); |
704 |
/* mst done */ |
705 |
memset(&clp, 0, sizeof(clp)); |
706 |
clp.qlp.qcb = (QCb*)cdsCb; |
707 |
clp.cds = cds; |
708 |
clp.ifb.ifpblk = 1; |
709 |
clp.l01.pos = clp.l02.pos = 1; |
710 |
clp.n01.pos = clp.n02.pos = 2; /* start at 2; 1 will hold the root */ |
711 |
clp.l01.it = clp.n01.it = 1; |
712 |
clp.l02.it = clp.n02.it = 2; |
713 |
clp.qlp.qdx = &db->qdx; |
714 |
qLoop(&clp.qlp); |
715 |
eRr(LOG_VERBOSE, "loop got pos ifp %d l01 %d l02 %d n01 %d n02 %d", |
716 |
clp.ifb.ifpblk, clp.l01.pos, clp.l02.pos, clp.n01.pos, clp.n02.pos); |
717 |
if ( clp.totp ) |
718 |
closeifp(&clp); |
719 |
if ( clp.rec ) |
720 |
closeifb(&clp); |
721 |
clp.l01.ps = clp.l02.ps = 0; /* no next */ |
722 |
fPwrite(cds+CDS_L01, &clp.l01, sizeof(L01), (clp.l01.pos-1)*sizeof(L01)); |
723 |
fPwrite(cds+CDS_L02, &clp.l02, sizeof(L02), (clp.l02.pos-1)*sizeof(L02)); |
724 |
for ( i=2; i--; ) { |
725 |
cnt[i].type = i+1; |
726 |
cnt[i].ordn = 5; |
727 |
cnt[i].ordf = 5; |
728 |
cnt[i].n = 15; |
729 |
cnt[i].k = 5; |
730 |
cnt[i].lev = 0; |
731 |
cnt[i].posr = 1; |
732 |
cnt[i].nmax = 1; |
733 |
cnt[i].abno = 1; |
734 |
} |
735 |
cnt[0].fmax = clp.l01.pos; |
736 |
cnt[1].fmax = clp.l02.pos; |
737 |
if ( 2 == clp.n01.pos ) { /* the poor one and only node */ |
738 |
/* since we did not open a 2nd node, this one was not written */ |
739 |
clp.n01.pos = 1; /* make it the root */ |
740 |
cnt[0].abno = 0; /* really the field means "normal", not "abnormal" */ |
741 |
} |
742 |
fPwrite(cds+CDS_N01, &clp.n01, sizeof(N01), (clp.n01.pos-1)*sizeof(N01)); |
743 |
if ( 2 == clp.n02.pos ) { |
744 |
clp.n02.pos = 1; |
745 |
cnt[1].abno = 0; |
746 |
} |
747 |
fPwrite(cds+CDS_N02, &clp.n02, sizeof(N02), (clp.n02.pos-1)*sizeof(N02)); |
748 |
/* build the trees */ |
749 |
if ( cnt[0].abno ) { |
750 |
int f = 2, t = clp.n01.pos; /* from - to on level */ |
751 |
for (;;) { |
752 |
N01 c, p; /* child, parent */ |
753 |
int start = t+1; |
754 |
cnt[0].lev++; |
755 |
p.pos = 10 >= (t-f) ? 1 : start; |
756 |
p.ock = 1; /* blank key ref to f */ |
757 |
p.it = 1; |
758 |
memset(p.idx[0].key, ' ', sizeof(p.idx[0].key)); |
759 |
p.idx[0].ref = f; |
760 |
for ( i=f+1; i<=t; i++ ) { |
761 |
fPread(cds+CDS_N01, &c, sizeof(c), (i-1)*sizeof(c)); |
762 |
eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d", |
763 |
cnt[0].lev-1, c.pos, sizeof(c.idx[0].key), c.idx[0].key, c.idx[0].ref, |
764 |
c.ock, sizeof(c.idx[0].key), c.idx[c.ock-1].key, c.idx[c.ock-1].ref); |
765 |
if ( 10 == p.ock ) { |
766 |
eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - '%.*s'->%d", |
767 |
cnt[0].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref, |
768 |
sizeof(p.idx[0].key), p.idx[9].key, p.idx[9].ref); |
769 |
fPwrite(cds+CDS_N01, &p, sizeof(p), (p.pos-1)*sizeof(p)); |
770 |
p.pos++; |
771 |
p.ock = 0; |
772 |
} |
773 |
memcpy(p.idx[p.ock].key, c.idx[0].key, sizeof(c.idx[0].key)); |
774 |
p.idx[p.ock++].ref = i; |
775 |
} |
776 |
eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d", |
777 |
cnt[0].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref, |
778 |
p.ock, sizeof(p.idx[0].key), p.idx[p.ock-1].key, p.idx[p.ock-1].ref); |
779 |
fPwrite(cds+CDS_N01, &p, sizeof(p), (p.pos-1)*sizeof(p)); |
780 |
if ( 1 == p.pos ) |
781 |
break; |
782 |
f = start; |
783 |
cnt[0].nmax = t = p.pos; |
784 |
} |
785 |
} |
786 |
if ( cnt[1].abno ) { |
787 |
int f = 2, t = clp.n02.pos; /* from - to on level */ |
788 |
for (;;) { |
789 |
N02 c, p; /* child, parent */ |
790 |
int start = t+1; |
791 |
cnt[1].lev++; |
792 |
p.pos = 10 >= (t-f) ? 1 : start; |
793 |
p.ock = 1; /* blank key ref to f */ |
794 |
p.it = 2; |
795 |
memset(p.idx[0].key, ' ', sizeof(p.idx[0].key)); |
796 |
p.idx[0].ref = f; |
797 |
for ( i=f+1; i<=t; i++ ) { |
798 |
fPread(cds+CDS_N02, &c, sizeof(c), (i-1)*sizeof(c)); |
799 |
eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d", |
800 |
cnt[1].lev-1, c.pos, sizeof(c.idx[0].key), c.idx[0].key, c.idx[0].ref, |
801 |
c.ock, sizeof(c.idx[0].key), c.idx[c.ock-1].key, c.idx[c.ock-1].ref); |
802 |
if ( 10 == p.ock ) { |
803 |
eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - '%.*s'->%d", |
804 |
cnt[1].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref, |
805 |
sizeof(p.idx[0].key), p.idx[9].key, p.idx[9].ref); |
806 |
fPwrite(cds+CDS_N02, &p, sizeof(p), (p.pos-1)*sizeof(p)); |
807 |
p.pos++; |
808 |
p.ock = 0; |
809 |
} |
810 |
memcpy(p.idx[p.ock].key, c.idx[0].key, sizeof(c.idx[0].key)); |
811 |
p.idx[p.ock++].ref = i; |
812 |
} |
813 |
eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d", |
814 |
cnt[0].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref, |
815 |
p.ock, sizeof(p.idx[0].key), p.idx[p.ock-1].key, p.idx[p.ock-1].ref); |
816 |
fPwrite(cds+CDS_N02, &p, sizeof(p), (p.pos-1)*sizeof(p)); |
817 |
if ( 1 == p.pos ) |
818 |
break; |
819 |
f = start; |
820 |
cnt[1].nmax = t = p.pos; |
821 |
} |
822 |
} |
823 |
fWrite(cds+CDS_CNT,cnt,CNT_SIZE); |
824 |
fWrite(cds+CDS_CNT,cnt+1,CNT_SIZE); |
825 |
cdsClose(cds); |
826 |
return ret; |
827 |
} /* cdsexp */ |