/[webpac]/openisis/0.9.9e/tool/cds.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /openisis/0.9.9e/tool/cds.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (show annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 3 months ago) by dpavlin
File MIME type: text/plain
File size: 23847 byte(s)
import of new openisis release, 0.9.9e

1 /*
2 The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3 Version 0.9.x (patchlevel see file Version)
4 Copyright (C) 2001-2004 by Erik Grziwotz, erik@openisis.org
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 see README for more information
21 EOH */
22
23 /*
24 $Id: cds.c,v 1.3 2004/07/25 10:19:29 kripke Exp $
25 malete CDS/ISIS conversion
26 */
27
28 #include "../tool/tool.h"
29
30 /* structs with 4-byte aligned ("UNIX") or packed (Intel "DOS"+WinIsis) ints */
31 #ifdef CPU_BIG_ENDIAN
32 # define BUILD_CDSUNIX /* no packed version on SPARCs, M68K or PPCs */
33 #endif
34 #ifdef BUILD_CDSUNIX
35 # define PACKED
36 #else
37 # ifdef __GNUC__
38 # define PACKED __attribute__((packed))
39 # else
40 # error "please define how to pack"
41 # endif
42 #endif
43
44 enum {
45 CDS_MST, CDS_XRF, /* master file, cross ref */
46 CDS_CNT, CDS_IFP, /* index control and record pointers */
47 /* do not change this order: see f2 */
48 CDS_N01, CDS_L01, /* 10 byte nodes and leaves */
49 CDS_N02, CDS_L02, /* 30 byte nodes and leaves */
50 CDS_FILES
51 };
52 static const char *cds_ext[] = {
53 "mst", "xrf", "cnt", "ifp", "n01", "l01", "n02", "l02"
54 };
55 typedef file cds_files[CDS_FILES];
56
57
58 typedef struct { /* master file control header */
59 int ctlm; /* should be 0 */
60 int nmfn; /* next avail MFN */
61 int nmfb; /* next free block, 1st = 1 */
62 short nmfp; /* next free pos (in block) */
63 short type; /* 0 (1 is for "messages" mst) */
64 int rcnt; /* record count; nmfn-1 - #deleted */
65 int mfx1; /* unused */
66 int mfx2; /* unused; "LAN lock" */
67 int mfx3; /* unused; "LAN lock" */
68 } Mfc;
69
70
71 typedef struct { /* field dictionary in master file record */
72 unsigned short tag;
73 unsigned short pos;
74 unsigned short len;
75 } Dct;
76
77 typedef struct { /* master file record */
78 int mfn;
79 short mfrl;
80 int bwb PACKED;
81 short bwp;
82 short base;
83 short nvf;
84 short stat;
85 Dct dict[1];
86 } Mfr;
87 /* it is common belief, that the first 4 fields
88 (12 bytes packed, 14 aligned) are to be in one 512-byte block
89 */
90 enum {
91 MFR_BASEOFF = ((char*)&((Mfr*)0)->base) - (char*)0,
92 MFR_DICTOFF = ((char*)&((Mfr*)0)->dict) - (char*)0
93 };
94
95 typedef struct { /* cross ref block */
96 int xpos;
97 int xrec[127];
98 } Xrf;
99
100 typedef struct { /* index control */
101 short type; /* 1 or 2 of N0x */
102 short ordn; /* 5 */
103 short ordf; /* 5 */
104 short n; /* 15 */
105 short k; /* 5 */
106 short lev; /* depth: N0x levels under root (root only: 0) */
107 int posr; /* root pos */
108 int nmax; /* N0x blocks */
109 int fmax; /* L0x blocks */
110 short abno; /* 0 (!) if "abnormal" (root only) */
111 } Cnt;
112 #ifdef BUILD_CDSUNIX /* while Cnt layout is independent of alignment ... */
113 # define CNT_SIZE 28 /* ... sizeof(Cnt) is not */
114 #else
115 # define CNT_SIZE 26
116 #endif
117
118 typedef struct { /* "posting" pointer to record */
119 char b[8]; /* 3+2+1+2 byte rid+tag+occ+pos big endian */
120 } Pst;
121
122 typedef struct { /* inverted file postings header */
123 int nxtb; /* continuation */
124 int nxtp;
125 int totp; /* total postings */
126 int segp; /* actually in this segment <= totp */
127 int segc; /* segment capacity >= segc */
128 /* Pst pst[...]; possibly spanning several Ifb blocks */
129 } Ifp;
130
131 typedef struct { /* inverted file block */
132 int ifpblk;
133 int ifprec[127]; /* ... */
134 } Ifb;
135
136 #define NSTRUCT(n) struct { \
137 int pos; \
138 short ock; \
139 short it; \
140 struct { \
141 char key[n]; \
142 int ref PACKED; \
143 } idx[10]; \
144 }
145 typedef NSTRUCT(10) N01;
146 typedef NSTRUCT(30) N02;
147
148 #define LSTRUCT(n) struct { \
149 int pos; \
150 short ock; \
151 short it; \
152 int ps; \
153 struct { \
154 char key[n]; \
155 int infb PACKED; \
156 int infp PACKED; \
157 } idx[10]; \
158 }
159 typedef LSTRUCT(10) L01;
160 typedef LSTRUCT(30) L02;
161
162 static const unsigned char cp850[] = { /* Latin1 values of Cp850 upper half */
163 199,252,233,226,228,224,229,231,234,235,232,239,238,236,196,197,
164 201,230,198,244,246,242,251,249,255,214,220,248,163,216,215,'?',
165 225,237,243,250,241,209,170,186,191,174,172,189,188,161,171,187,
166 '?','?','?','?','?',193,194,192,169,'?','?','?','?',162,165,'?',
167 '?','?','?','?','?','?',227,195,'?','?','?','?','?','?','?',164,
168 240,208,202,203,200,'?',205,206,207,'?','?','?','?',166,204,'?',
169 211,223,212,210,245,213,181,254,222,218,219,217,253,221,175,180,
170 173,177,'?',190,182,167,247,184,176,168,183,185,179,178,'?',160
171 };
172
173 typedef struct {
174 Fld base; /* points into env->opt */
175 int notab; /* do not convert ^ <-> tabs */
176 int novt; /* do not convert newline <-> vtabs */
177 int no850; /* do not convert Cp850 <-> ISO Latin1 */
178 int write; /* write cds */
179 } CdsOpt;
180
181
182 static int cdsOpt (CdsOpt *opt, Fld *args, Db *db)
183 {
184 Fld o;
185 memset(opt, 0, sizeof(*opt));
186 for ( o.val = 0; vGet(&o, args, "BN"); )
187 switch (o.tag) {
188 case 'B':
189 opt->base = o;
190 continue;
191 case 'N':
192 if (VEQZ(&o, "otab"))
193 opt->notab = 1;
194 else if (VEQZ(&o, "ovt"))
195 opt->novt = 1;
196 else if (VEQZ(&o, "o850"))
197 opt->no850 = 1;
198 else
199 return eRr(ERR_INVAL, "bad cds opt '-N%.*s'", o.len, o.val);
200 }
201 if ( !opt->base.len )
202 opt->base.len = strlen(opt->base.val = db->pat);
203 return 0;
204 } /* cdsOpt */
205
206
207 int cdsuse ()
208 {
209 eRr(ERR_INVAL, "[cdsimp|cdsexp]:\n"
210 "-Bbase: path/basename of cds db (default = malete db)\n"
211 "-Notab: do not convert ^ <-> tab\n"
212 "-Novt: do not convert newline <-> vtab\n"
213 "-No850: do not convert Cp850 <-> ISO Latin1"
214 );
215 return ERR_INVAL;
216 } /* cdsuse */
217
218
219 static int cdsOpen (cds_files cds, CdsOpt *opt)
220 {
221 int filmode = opt->write ? FIL_RDWR|FIL_CREAT|FIL_TRUNC : FIL_RD;
222 int l = opt->base.len, i;
223 char *path = mAlloc(l+5);
224 memcpy(path, opt->base.val, l);
225 path[l++] = '.';
226 for (i=0; i<CDS_FILES; i++)
227 cds[i] = FIL_NONE;
228 for (i=0; i<CDS_FILES; i++) {
229 memcpy(path+l, cds_ext[i], 4);
230 if ( fOpen( cds+i, path, filmode ) )
231 return eRr(LOG_ERROR, "no access to CDS file '%s'", path);
232 }
233 return 0;
234 } /* cdsOpen */
235
236
237 static void cdsClose (cds_files cds)
238 {
239 int i;
240 for (i=0; i<CDS_FILES; i++)
241 fClose(cds+i);
242 } /* cdsClose */
243
244
245 int cdsimp (Db *db, Fld *args)
246 {
247 CdsOpt opt;
248 cds_files cds;
249 int ret = 0, m, i;
250 Mfc mfc;
251 List rec;
252 char recode[256];
253 L01 l01;
254 L02 l02;
255 Ifb ifbs[2];
256 int n1, n2;
257 Key key;
258
259 if ( cdsOpt(&opt, args, db) )
260 return cdsuse();
261 if ( cdsOpen(cds, &opt) )
262 return eRr(ERR_INVAL, "bad CDS base");
263 #define CLEANUP( args ) do { ret = eRr args; goto cleanup; } while (0)
264 if ( sizeof(mfc) != fPread(cds+CDS_MST, &mfc, sizeof(mfc), 0) )
265 CLEANUP((ERR_TRASH, "no mfc"));
266 eRr(LOG_TRACE,
267 "MFC ctlm %d nmfn %d nmfb %d nmfp %d type %d"
268 " rcnt %d x1 %d x2 %d x3 %d",
269 mfc.ctlm, mfc.nmfn, mfc.nmfb, mfc.nmfp, mfc.type,
270 mfc.rcnt, mfc.mfx1, mfc.mfx2, mfc.mfx3);
271 eRr(LOG_INFO, "importing %d recs", mfc.nmfn-1);
272 lInit(&rec, "");
273 /* prepare recode table */
274 for (i=0; i<128; i++) recode[i] = (char)i;
275 if ( !opt.notab ) recode['^'] = TAB;
276 if ( !opt.novt ) recode[LF] = VT;
277 if ( opt.no850 )
278 for (i=128; i<256; i++) recode[i] = (char)i;
279 else
280 for (i=128; i<256; i++) recode[i] = (char)cp850[i-128];
281 /* loop m := ceil(numrecs/127) xrf blocks */
282 m = (mfc.nmfn+125)/127;
283 for (i=1; i<=m; i++) {
284 Xrf xrf;
285 int f = 1+127*(i-1), t = f+126, mfn;
286 int got = fRead(cds+CDS_XRF, &xrf, sizeof(xrf));
287 if ( (int)sizeof(xrf) != got )
288 CLEANUP((ERR_TRASH, "reading xrf block %d", i));
289 if ( xrf.xpos != (i==m ? -i : i) )
290 CLEANUP((ERR_TRASH, "bad pos %d in xrf block %d", xrf.xpos, i));
291 if ( t >= mfc.nmfn )
292 t = mfc.nmfn-1;
293 for (mfn=f; mfn<=t; mfn++) {
294 int pos = xrf.xrec[mfn-f];
295 int mfb = (pos >> 11);
296 union {
297 Mfr mfr;
298 char buf[0x7fff];
299 } da;
300 Dct *d = da.mfr.dict;
301 char *b, *e;
302
303 if ( 0 > mfb ) {
304 eRr(LOG_TRACE, "rec %d was deleted", mfn);
305 continue;
306 }
307 /* ignore bits 9, 10 signaling pending index update */
308 pos &= 0x1ff;
309 pos |= (mfb-1)<<9;
310 got = fPread(cds+CDS_MST, da.buf, sizeof(Mfr), pos);
311 if ( got < MFR_DICTOFF )
312 CLEANUP((ERR_TRASH, "rec %d too short %d", mfn, got));
313 if ( da.mfr.mfrl < 0 ) {
314 eRr(LOG_WARN, "rec %d pos %d locked", mfn, pos);
315 da.mfr.mfrl = -da.mfr.mfrl;
316 }
317 if ( da.mfr.mfrl < (int)(MFR_DICTOFF+da.mfr.nvf*sizeof(Dct)) )
318 CLEANUP((ERR_TRASH, "rec %d pos %d bad mfrl %d nvf %d",
319 mfn, pos, da.mfr.mfrl, da.mfr.nvf));
320 got = fPread(cds+CDS_MST, da.buf, da.mfr.mfrl, pos);
321 if ( da.mfr.mfrl != got ) {
322 if ( got == -da.mfr.mfrl )
323 da.mfr.mfrl = -da.mfr.mfrl;
324 else
325 CLEANUP((ERR_TRASH, "rec %d pos %d got %d bytes want %d",
326 mfn, pos, got, da.mfr.mfrl));
327 }
328 b = da.buf+da.mfr.base;
329 e = da.buf+da.mfr.mfrl;
330 lReset(&rec);
331 for ( ; da.mfr.nvf--; d++ ) {
332 char *p = b+d->pos, *q = p+d->len;
333 if ( p < b || q > e )
334 CLEANUP((ERR_TRASH, "bad pos %d len %d field %d",
335 d->pos, d->len, d-da.mfr.dict));
336 for ( ; p<q; p++ ) *p = recode[(unsigned char)*p];
337 LADD(&rec, d->tag, b+d->pos, d->len);
338 }
339 dWrite(db, rec.fld, mfn);
340 }
341 }
342 /* mst done */
343 /* we assume first leaf has pos 1; maybe we should use N0x to search */
344 l01.ps = l02.ps = 1;
345 l01.ock = l02.ock = n1 = n2 = 0;
346 ifbs[0].ifpblk = ifbs[1].ifpblk = 0;
347 for (;;) { /* loop leaves */
348 Fld use;
349 int infb, infp, got, more = 0;
350 Ifp ifp;
351 Ifb *ifb;
352
353 if ( n1 == l01.ock && (l01.pos = l01.ps) ) {
354 if ((int)sizeof(l01) != (got
355 = fPread(cds+CDS_L01,&l01,sizeof(l01),(l01.ps-1)*sizeof(l01)))
356 )
357 CLEANUP((ERR_TRASH, "l01 %d got %d", l01.ps, got));
358 eRr(LOG_TRACE, "l01 pos %d ock %d nxt %d", l01.pos, l01.ock, l01.ps);
359 n1 = 0;
360 if ( !l01.ock )
361 continue;
362 }
363 if ( n2 == l02.ock && (l02.pos = l02.ps) ) {
364 if ((int)sizeof(l02) != (got
365 = fPread(cds+CDS_L02,&l02,sizeof(l02),(l02.ps-1)*sizeof(l02)))
366 )
367 CLEANUP((ERR_TRASH, "l02 %d got %d", l02.ps, got));
368 eRr(LOG_TRACE, "l02 pos %d ock %d nxt %d", l02.pos, l02.ock, l02.ps);
369 n2 = 0;
370 if ( !l02.ock )
371 continue;
372 }
373 if ( !l01.pos && !l02.pos )
374 break;
375 if ( n1 == l01.ock )
376 use.tag = 1;
377 else if ( n2 == l02.ock )
378 use.tag = 0;
379 else if ( 0 > memcmp(l02.idx[n2].key, l01.idx[n1].key, 10) )
380 use.tag = 1;
381 else
382 use.tag = 0;
383 if ( use.tag ) {
384 infb = l02.idx[n2].infb;
385 infp = l02.idx[n2].infp;
386 use.val = l02.idx[n2++].key;
387 use.len = 30;
388 } else {
389 infb = l01.idx[n1].infb;
390 infp = l01.idx[n1].infp;
391 use.val = l01.idx[n1++].key;
392 use.len = 10;
393 }
394 ifb = ifbs+use.tag;
395 /* kill trailing blanks */
396 while ( use.len && ' ' == use.val[use.len-1] ) use.len--;
397 eRr(LOG_TRACE, "'%.*s'", use.len, use.val);
398 memcpy(key.byt, use.val, key.len = use.len);
399 key.val.len = db->qdx.vsz;
400 while (infb) {
401 Pst *pst;
402 int n;
403 if ( ifb->ifpblk != infb
404 && (512 != fPread(cds+CDS_IFP,ifb,sizeof(Ifb),(infb-1)*512)
405 || ifb->ifpblk != infb)
406 )
407 CLEANUP((ERR_TRASH, "ifp %d",infb));
408 if ( more ) {
409 pst = (Pst*)ifb->ifprec;
410 if ( (n = 127/2) > more )
411 n = more;
412 more -= n;
413 eRr(LOG_TRACE, "+%d @%d", n, infb);
414 } else {
415 if ( infp > 127-5 )
416 CLEANUP((ERR_TRASH, "bad infp %d for infb %d", infp, infb));
417 ifp = *(Ifp*)(ifb->ifprec + infp);
418 pst = (Pst*)(ifb->ifprec + infp + 5);
419 if ( (n = (127-5-infp)/2) > ifp.segp )
420 n = ifp.segp;
421 more = ifp.segp - n;
422 eRr(LOG_TRACE, "ifp @%d,%d: %d %d %d %d %d", infb, infp,
423 ifp.nxtb, ifp.nxtp, ifp.totp, ifp.segp, ifp.segc);
424 }
425 if ( more )
426 infb++;
427 else {
428 infb = ifp.nxtb;
429 infp = ifp.nxtp;
430 }
431 for ( ; n--; pst++ ) {
432 if ( QDX_ISIS == db->qdx.ptr )
433 memcpy(key.val.byt, pst->b, 8);
434 else {
435 Ptr ptr;
436 ptr.ext = 0;
437 ptr.rid = pst->b[0]<<16 | pst->b[1]<<8 | pst->b[2];
438 ptr.tag = pst->b[3]<<8 | pst->b[4];
439 ptr.pos = pst->b[5]<<16 | pst->b[6]<<8 | pst->b[7];
440 qMkVal(&key.val, &ptr, db->qdx.ptr);
441 }
442 qLoad(&db->qdx, &key);
443 }
444 } /* while infb */
445 }
446 key.val.len = 0;
447 qLoad(&db->qdx, &key);
448 cleanup:
449 #undef CLEANUP
450 cdsClose(cds);
451 return ret;
452 } /* cdsimp */
453
454
455 /* export index */
456 typedef struct {
457 QLoop qlp;
458 file *cds;
459 int totp; /* in current segment */
460 int infb; /* of current ifp */
461 int infp; /* of current ifp */
462 Ifb ifb; /* current block; flushed on demand */
463 int rec; /* next in ifb.ifprec 0..126 */
464 L01 l01; /* new keys are immediatly inserted here ... */
465 L02 l02; /* ... structs flushed on demand */
466 N01 n01;
467 N02 n02;
468 } CdsLoop;
469
470
471 static void closeifp (CdsLoop *self) /* write Ifp header */
472 {
473 Ifp buf, *ifp;
474 if ( self->infb != self->ifb.ifpblk ) /* Ifp not in current blk */
475 ifp = &buf;
476 else
477 ifp = (Ifp*)(self->ifb.ifprec+self->infp);
478 ifp->nxtb = ifp->nxtp = 0; /* used only for partial updates */
479 ifp->totp = ifp->segp = ifp->segc = self->totp; /* diff only after split */
480 if ( self->infb != self->ifb.ifpblk )
481 fPwrite(self->cds+CDS_IFP, ifp, sizeof(Ifp),
482 ((self->infb-1)<<9) + 4*(self->infp+1));
483 self->totp = 0;
484 /*
485 hmmmm ... From the manual:
486 Cuando se carga secuencialmente el archivo
487 invertido (por ejemplo despues de una generacion completa del archivo
488 invertido con ISISINV), cada lista esta formada por uno o mas segmentos
489 adyacentes. Si IFPTOT <= 32768, entonces: IFPNXTB/IFPNXTP = 0/0 y IFPTOT =
490 IFPSEGP = IFPSEGC.
491 This seems to imply that numbers really should be used as signed shorts!?
492 */
493 } /* closeifp */
494
495 static void closeifb (CdsLoop *self)
496 {
497 eRr(LOG_TRACE, "w ifb %d", self->ifb.ifpblk);
498 fPwrite(self->cds+CDS_IFP,&self->ifb,sizeof(Ifb),(self->ifb.ifpblk-1)<<9);
499 self->ifb.ifpblk++;
500 self->rec = 0;
501 } /* closeifb */
502
503
504 static int cdsCb (CdsLoop *self)
505 {
506 Pst *pst;
507 unsigned i, vsz = self->qlp.qdx->vsz;
508 const unsigned char *v = self->qlp.vals;
509
510 if ( !(QSAME & self->qlp.flg) ) { /* add new key -- new segment */
511 L01 *l0x; /* might really be a L02 */
512 int l, n, oref; /* key length, offset of ref int(s) in idx */
513 int f2, sl, *ref;
514 char *key;
515
516 if ( self->totp )
517 closeifp(self);
518 if ( self->rec > 127-5 )
519 closeifb(self);
520 if ( (l = self->qlp.cur.len) <= 10 ) { /* l01 entry */
521 l0x = &self->l01;
522 f2 = 0;
523 n = 10;
524 sl = sizeof(L01);
525 } else {
526 l0x = (L01*)&self->l02;
527 f2 = 2;
528 if ( l > (n = 30) )
529 l = 30;
530 sl = sizeof(L02);
531 }
532 #ifdef BUILD_CDSUNIX
533 oref = n+2;
534 #else
535 oref = n;
536 #endif
537 if ( 10 == l0x->ock ) { /* need new block */
538 l0x->ps = l0x->pos+1;
539 eRr(LOG_TRACE, "w l0 n %d pos %d", n, l0x->pos);
540 fPwrite(self->cds+CDS_L01+f2, l0x, sl, (l0x->pos-1)*sl);
541 l0x->pos++;
542 l0x->ock = 0;
543 }
544 key = l0x->idx[0].key + l0x->ock*(oref+8); /* sizeof idx */
545 memset(key, ' ', n);
546 memcpy(key, self->qlp.cur.byt, l);
547 ref = (int*)(key+oref);
548 ref[0] = self->infb = self->ifb.ifpblk;
549 ref[1] = self->infp = self->rec;
550 self->rec += 5; /* reserve space for Ifp header */
551 if ( !l0x->ock++ ) { /* also write node entry */
552 N01 *n0x = 10==n ? &self->n01 : (N01*)&self->n02;
553 int sn = 10==n ? sizeof(N01) : sizeof(N02);
554 char *nkey;
555 if ( 10 == n0x->ock ) {
556 eRr(LOG_TRACE, "w n0 n %d pos %d", n, n0x->pos);
557 fPwrite(self->cds+CDS_N01+f2, n0x, sn, (n0x->pos-1)*sn);
558 n0x->pos++;
559 n0x->ock = 0;
560 }
561 nkey = n0x->idx[0].key + n0x->ock++*(oref+4);
562 if ( 2 == n0x->pos && 1 == n0x->ock )
563 memset(nkey, ' ', n); /* 1st key - start of level */
564 else
565 memcpy(nkey, key, n);
566 *(int*)(nkey+oref) = -l0x->pos;
567 }
568 }
569 pst = (Pst*)(self->ifb.ifprec + self->rec);
570 eRr(LOG_TRACE, "+ %d ptr @ %d.%d", self->qlp.nvals,
571 self->ifb.ifpblk, self->rec);
572 for ( i=self->qlp.nvals; i--; self->rec+=2, pst++, v += vsz) {
573 if ( self->rec > 125 ) {
574 closeifb(self);
575 pst = (Pst*)self->ifb.ifprec;
576 }
577 if ( QDX_ISIS == self->qlp.qdx->ptr )
578 memcpy(pst->b, v, 8);
579 else {
580 Ptr ptr;
581 qRdVal(&ptr, v, self->qlp.qdx->ptr);
582 pst->b[0] = ptr.rid >> 16;
583 pst->b[1] = ptr.rid >> 8;
584 pst->b[2] = ptr.rid;
585 pst->b[3] = ptr.tag >> 8;
586 pst->b[4] = ptr.tag;
587 pst->b[5] = ptr.pos >> 16;
588 pst->b[6] = ptr.pos >> 8;
589 pst->b[7] = ptr.pos;
590 }
591 }
592 self->totp += self->qlp.nvals;
593 return 0;
594 } /* cdsCb */
595
596
597 int cdsexp (Db *db, Fld *args)
598 {
599 CdsOpt opt;
600 cds_files cds;
601 int ret = 0, pos, i, j;
602 Mfc mfc;
603 List rec;
604 char recode[256];
605 Xrf xrf;
606 CdsLoop clp;
607 Cnt cnt[2];
608
609 if ( cdsOpt(&opt, args, db) )
610 return cdsuse();
611 opt.write = 1;
612 if ( cdsOpen(cds, &opt) )
613 return eRr(ERR_INVAL, "error creating CDS base");
614 eRr(LOG_INFO, "exporting %d recs", db->rdx.mid);
615 lInit(&rec, "");
616 /* prepare recode table */
617 for (i=0; i<128; i++) recode[i] = (char)i;
618 if ( !opt.notab ) recode[TAB] = '^';
619 if ( !opt.novt ) recode[VT] = LF;
620 if ( opt.no850 )
621 for (i=128; i<256; i++) recode[i] = (char)i;
622 else {
623 for (i=128; i<256; i++) recode[i] = '?';
624 for (i=160; i<256; i++) /* reverse cp850 */
625 for (j=0; j<128; j++) if ( i == cp850[j] ) { recode[i] = j+128; break; }
626 }
627 xrf.xpos = 1;
628 mfc.rcnt = 0; /* record count; nmfn-1 - #deleted */
629 pos = 64; /* as of folklore, 1st rec should start here */
630 for (i=1; i<=db->rdx.mid; i++) { /* loop records */
631 int xref;
632 if ( 0 >= dRead(lClr(&rec), db, i) ) { /* no rec */
633 eRr(LOG_TRACE, "rec %d was deleted", i);
634 xref = -1 << 11;
635 } else {
636 union {
637 Mfr mfr;
638 char buf[0x7fff];
639 } da;
640 char *b, *p, *e = da.buf+sizeof(da.buf);
641 Dct *d = da.mfr.dict, *de;
642 Fld *f, *fe = LEND(&rec);
643 unsigned avl = 0x7fff - MFR_DICTOFF;
644
645 xref = pos & 511; /* pos in block */
646 /* the manual explicitly demands 0-498 and forbids 500 */
647 if ( xref > 498 /*512 - MFR_BASEOFF = 500 packed */ ) {
648 pos += 512; /* advance to next block */
649 pos &= ~511;
650 xref = 0;
651 }
652 xref |= ((pos >> 9)+1) << 11;
653 da.mfr.mfn = i;
654 da.mfr.bwb = 0;
655 da.mfr.bwp = 0;
656 da.mfr.base = 0;
657 da.mfr.nvf = 0;
658 da.mfr.stat = 0;
659 for ( f=rec.fld; ++f<fe; ) {
660 if ( avl < (int)sizeof(Dct) )
661 break;
662 da.mfr.nvf++;
663 avl -= sizeof(Dct);
664 if ( avl >= f->len )
665 avl -= f->len;
666 else
667 avl = 0;
668 }
669 p = b = da.buf + (da.mfr.base = MFR_DICTOFF + da.mfr.nvf*sizeof(Dct));
670 de = d + da.mfr.nvf;
671 for ( f=rec.fld; d<de; d++ ) {
672 unsigned char *v = (unsigned char*)(++f)->val, *ve;
673 d->tag = f->tag;
674 d->pos = p-b;
675 d->len = e-p < (int)f->len ? e-p : f->len;
676 for ( ve = v+d->len; v < ve; ) *p++ = recode[*v++];
677 }
678 da.mfr.mfrl = p - da.buf;
679 eRr(LOG_VERBOSE, "write rec %d len %d pos %d", i, da.mfr.mfrl, pos);
680 fPwrite(cds+CDS_MST, da.buf, da.mfr.mfrl, pos);
681 if ( 1 & (pos += da.mfr.mfrl) ) /* round to even */
682 pos++;
683 mfc.rcnt++;
684 }
685 xrf.xrec[j = (i-1)%127] = xref;
686 if ( i == db->rdx.mid ) { /* last */
687 while ( ++j < 127 ) xrf.xrec[j] = 0;
688 xrf.xpos = -xrf.xpos;
689 } else if ( 126 != j )
690 continue;
691 eRr(LOG_VERBOSE, "%d: write Xrf %d", i, xrf.xpos);
692 fWrite(cds+CDS_XRF, &xrf, sizeof(Xrf));
693 xrf.xpos++;
694 }
695 mfc.ctlm = 0; /* should be 0 */
696 mfc.nmfn = i; /* next avail MFN */
697 mfc.nmfb = pos >> 9; /* next free block, 1st = 1 */
698 mfc.nmfp = pos & 511; /* next free pos (in block) */
699 mfc.type = 0; /* 0 (1 is for "messages" mst) */
700 mfc.mfx1 = 0; /* unused */
701 mfc.mfx2 = 0; /* unused; "LAN lock" */
702 mfc.mfx3 = 0; /* unused; "LAN lock" */
703 fPwrite(cds+CDS_MST, &mfc, sizeof(mfc), 0);
704 /* mst done */
705 memset(&clp, 0, sizeof(clp));
706 clp.qlp.qcb = (QCb*)cdsCb;
707 clp.cds = cds;
708 clp.ifb.ifpblk = 1;
709 clp.l01.pos = clp.l02.pos = 1;
710 clp.n01.pos = clp.n02.pos = 2; /* start at 2; 1 will hold the root */
711 clp.l01.it = clp.n01.it = 1;
712 clp.l02.it = clp.n02.it = 2;
713 clp.qlp.qdx = &db->qdx;
714 qLoop(&clp.qlp);
715 eRr(LOG_VERBOSE, "loop got pos ifp %d l01 %d l02 %d n01 %d n02 %d",
716 clp.ifb.ifpblk, clp.l01.pos, clp.l02.pos, clp.n01.pos, clp.n02.pos);
717 if ( clp.totp )
718 closeifp(&clp);
719 if ( clp.rec )
720 closeifb(&clp);
721 clp.l01.ps = clp.l02.ps = 0; /* no next */
722 fPwrite(cds+CDS_L01, &clp.l01, sizeof(L01), (clp.l01.pos-1)*sizeof(L01));
723 fPwrite(cds+CDS_L02, &clp.l02, sizeof(L02), (clp.l02.pos-1)*sizeof(L02));
724 for ( i=2; i--; ) {
725 cnt[i].type = i+1;
726 cnt[i].ordn = 5;
727 cnt[i].ordf = 5;
728 cnt[i].n = 15;
729 cnt[i].k = 5;
730 cnt[i].lev = 0;
731 cnt[i].posr = 1;
732 cnt[i].nmax = 1;
733 cnt[i].abno = 1;
734 }
735 cnt[0].fmax = clp.l01.pos;
736 cnt[1].fmax = clp.l02.pos;
737 if ( 2 == clp.n01.pos ) { /* the poor one and only node */
738 /* since we did not open a 2nd node, this one was not written */
739 clp.n01.pos = 1; /* make it the root */
740 cnt[0].abno = 0; /* really the field means "normal", not "abnormal" */
741 }
742 fPwrite(cds+CDS_N01, &clp.n01, sizeof(N01), (clp.n01.pos-1)*sizeof(N01));
743 if ( 2 == clp.n02.pos ) {
744 clp.n02.pos = 1;
745 cnt[1].abno = 0;
746 }
747 fPwrite(cds+CDS_N02, &clp.n02, sizeof(N02), (clp.n02.pos-1)*sizeof(N02));
748 /* build the trees */
749 if ( cnt[0].abno ) {
750 int f = 2, t = clp.n01.pos; /* from - to on level */
751 for (;;) {
752 N01 c, p; /* child, parent */
753 int start = t+1;
754 cnt[0].lev++;
755 p.pos = 10 >= (t-f) ? 1 : start;
756 p.ock = 1; /* blank key ref to f */
757 p.it = 1;
758 memset(p.idx[0].key, ' ', sizeof(p.idx[0].key));
759 p.idx[0].ref = f;
760 for ( i=f+1; i<=t; i++ ) {
761 fPread(cds+CDS_N01, &c, sizeof(c), (i-1)*sizeof(c));
762 eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d",
763 cnt[0].lev-1, c.pos, sizeof(c.idx[0].key), c.idx[0].key, c.idx[0].ref,
764 c.ock, sizeof(c.idx[0].key), c.idx[c.ock-1].key, c.idx[c.ock-1].ref);
765 if ( 10 == p.ock ) {
766 eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - '%.*s'->%d",
767 cnt[0].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref,
768 sizeof(p.idx[0].key), p.idx[9].key, p.idx[9].ref);
769 fPwrite(cds+CDS_N01, &p, sizeof(p), (p.pos-1)*sizeof(p));
770 p.pos++;
771 p.ock = 0;
772 }
773 memcpy(p.idx[p.ock].key, c.idx[0].key, sizeof(c.idx[0].key));
774 p.idx[p.ock++].ref = i;
775 }
776 eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d",
777 cnt[0].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref,
778 p.ock, sizeof(p.idx[0].key), p.idx[p.ock-1].key, p.idx[p.ock-1].ref);
779 fPwrite(cds+CDS_N01, &p, sizeof(p), (p.pos-1)*sizeof(p));
780 if ( 1 == p.pos )
781 break;
782 f = start;
783 cnt[0].nmax = t = p.pos;
784 }
785 }
786 if ( cnt[1].abno ) {
787 int f = 2, t = clp.n02.pos; /* from - to on level */
788 for (;;) {
789 N02 c, p; /* child, parent */
790 int start = t+1;
791 cnt[1].lev++;
792 p.pos = 10 >= (t-f) ? 1 : start;
793 p.ock = 1; /* blank key ref to f */
794 p.it = 2;
795 memset(p.idx[0].key, ' ', sizeof(p.idx[0].key));
796 p.idx[0].ref = f;
797 for ( i=f+1; i<=t; i++ ) {
798 fPread(cds+CDS_N02, &c, sizeof(c), (i-1)*sizeof(c));
799 eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d",
800 cnt[1].lev-1, c.pos, sizeof(c.idx[0].key), c.idx[0].key, c.idx[0].ref,
801 c.ock, sizeof(c.idx[0].key), c.idx[c.ock-1].key, c.idx[c.ock-1].ref);
802 if ( 10 == p.ock ) {
803 eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - '%.*s'->%d",
804 cnt[1].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref,
805 sizeof(p.idx[0].key), p.idx[9].key, p.idx[9].ref);
806 fPwrite(cds+CDS_N02, &p, sizeof(p), (p.pos-1)*sizeof(p));
807 p.pos++;
808 p.ock = 0;
809 }
810 memcpy(p.idx[p.ock].key, c.idx[0].key, sizeof(c.idx[0].key));
811 p.idx[p.ock++].ref = i;
812 }
813 eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d",
814 cnt[0].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref,
815 p.ock, sizeof(p.idx[0].key), p.idx[p.ock-1].key, p.idx[p.ock-1].ref);
816 fPwrite(cds+CDS_N02, &p, sizeof(p), (p.pos-1)*sizeof(p));
817 if ( 1 == p.pos )
818 break;
819 f = start;
820 cnt[1].nmax = t = p.pos;
821 }
822 }
823 fWrite(cds+CDS_CNT,cnt,CNT_SIZE);
824 fWrite(cds+CDS_CNT,cnt+1,CNT_SIZE);
825 cdsClose(cds);
826 return ret;
827 } /* cdsexp */

  ViewVC Help
Powered by ViewVC 1.1.26