/[webpac]/openisis/0.9.9e/tool/cds.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /openisis/0.9.9e/tool/cds.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (hide annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 4 months ago) by dpavlin
File MIME type: text/plain
File size: 23847 byte(s)
import of new openisis release, 0.9.9e

1 dpavlin 604 /*
2     The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3     Version 0.9.x (patchlevel see file Version)
4     Copyright (C) 2001-2004 by Erik Grziwotz, erik@openisis.org
5    
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Lesser General Public
8     License as published by the Free Software Foundation; either
9     version 2.1 of the License, or (at your option) any later version.
10    
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14     See the GNU Lesser General Public License for more details.
15    
16     You should have received a copy of the GNU Lesser General Public
17     License along with this library; if not, write to the Free Software
18     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19    
20     see README for more information
21     EOH */
22    
23     /*
24     $Id: cds.c,v 1.3 2004/07/25 10:19:29 kripke Exp $
25     malete CDS/ISIS conversion
26     */
27    
28     #include "../tool/tool.h"
29    
30     /* structs with 4-byte aligned ("UNIX") or packed (Intel "DOS"+WinIsis) ints */
31     #ifdef CPU_BIG_ENDIAN
32     # define BUILD_CDSUNIX /* no packed version on SPARCs, M68K or PPCs */
33     #endif
34     #ifdef BUILD_CDSUNIX
35     # define PACKED
36     #else
37     # ifdef __GNUC__
38     # define PACKED __attribute__((packed))
39     # else
40     # error "please define how to pack"
41     # endif
42     #endif
43    
44     enum {
45     CDS_MST, CDS_XRF, /* master file, cross ref */
46     CDS_CNT, CDS_IFP, /* index control and record pointers */
47     /* do not change this order: see f2 */
48     CDS_N01, CDS_L01, /* 10 byte nodes and leaves */
49     CDS_N02, CDS_L02, /* 30 byte nodes and leaves */
50     CDS_FILES
51     };
52     static const char *cds_ext[] = {
53     "mst", "xrf", "cnt", "ifp", "n01", "l01", "n02", "l02"
54     };
55     typedef file cds_files[CDS_FILES];
56    
57    
58     typedef struct { /* master file control header */
59     int ctlm; /* should be 0 */
60     int nmfn; /* next avail MFN */
61     int nmfb; /* next free block, 1st = 1 */
62     short nmfp; /* next free pos (in block) */
63     short type; /* 0 (1 is for "messages" mst) */
64     int rcnt; /* record count; nmfn-1 - #deleted */
65     int mfx1; /* unused */
66     int mfx2; /* unused; "LAN lock" */
67     int mfx3; /* unused; "LAN lock" */
68     } Mfc;
69    
70    
71     typedef struct { /* field dictionary in master file record */
72     unsigned short tag;
73     unsigned short pos;
74     unsigned short len;
75     } Dct;
76    
77     typedef struct { /* master file record */
78     int mfn;
79     short mfrl;
80     int bwb PACKED;
81     short bwp;
82     short base;
83     short nvf;
84     short stat;
85     Dct dict[1];
86     } Mfr;
87     /* it is common belief, that the first 4 fields
88     (12 bytes packed, 14 aligned) are to be in one 512-byte block
89     */
90     enum {
91     MFR_BASEOFF = ((char*)&((Mfr*)0)->base) - (char*)0,
92     MFR_DICTOFF = ((char*)&((Mfr*)0)->dict) - (char*)0
93     };
94    
95     typedef struct { /* cross ref block */
96     int xpos;
97     int xrec[127];
98     } Xrf;
99    
100     typedef struct { /* index control */
101     short type; /* 1 or 2 of N0x */
102     short ordn; /* 5 */
103     short ordf; /* 5 */
104     short n; /* 15 */
105     short k; /* 5 */
106     short lev; /* depth: N0x levels under root (root only: 0) */
107     int posr; /* root pos */
108     int nmax; /* N0x blocks */
109     int fmax; /* L0x blocks */
110     short abno; /* 0 (!) if "abnormal" (root only) */
111     } Cnt;
112     #ifdef BUILD_CDSUNIX /* while Cnt layout is independent of alignment ... */
113     # define CNT_SIZE 28 /* ... sizeof(Cnt) is not */
114     #else
115     # define CNT_SIZE 26
116     #endif
117    
118     typedef struct { /* "posting" pointer to record */
119     char b[8]; /* 3+2+1+2 byte rid+tag+occ+pos big endian */
120     } Pst;
121    
122     typedef struct { /* inverted file postings header */
123     int nxtb; /* continuation */
124     int nxtp;
125     int totp; /* total postings */
126     int segp; /* actually in this segment <= totp */
127     int segc; /* segment capacity >= segc */
128     /* Pst pst[...]; possibly spanning several Ifb blocks */
129     } Ifp;
130    
131     typedef struct { /* inverted file block */
132     int ifpblk;
133     int ifprec[127]; /* ... */
134     } Ifb;
135    
136     #define NSTRUCT(n) struct { \
137     int pos; \
138     short ock; \
139     short it; \
140     struct { \
141     char key[n]; \
142     int ref PACKED; \
143     } idx[10]; \
144     }
145     typedef NSTRUCT(10) N01;
146     typedef NSTRUCT(30) N02;
147    
148     #define LSTRUCT(n) struct { \
149     int pos; \
150     short ock; \
151     short it; \
152     int ps; \
153     struct { \
154     char key[n]; \
155     int infb PACKED; \
156     int infp PACKED; \
157     } idx[10]; \
158     }
159     typedef LSTRUCT(10) L01;
160     typedef LSTRUCT(30) L02;
161    
162     static const unsigned char cp850[] = { /* Latin1 values of Cp850 upper half */
163     199,252,233,226,228,224,229,231,234,235,232,239,238,236,196,197,
164     201,230,198,244,246,242,251,249,255,214,220,248,163,216,215,'?',
165     225,237,243,250,241,209,170,186,191,174,172,189,188,161,171,187,
166     '?','?','?','?','?',193,194,192,169,'?','?','?','?',162,165,'?',
167     '?','?','?','?','?','?',227,195,'?','?','?','?','?','?','?',164,
168     240,208,202,203,200,'?',205,206,207,'?','?','?','?',166,204,'?',
169     211,223,212,210,245,213,181,254,222,218,219,217,253,221,175,180,
170     173,177,'?',190,182,167,247,184,176,168,183,185,179,178,'?',160
171     };
172    
173     typedef struct {
174     Fld base; /* points into env->opt */
175     int notab; /* do not convert ^ <-> tabs */
176     int novt; /* do not convert newline <-> vtabs */
177     int no850; /* do not convert Cp850 <-> ISO Latin1 */
178     int write; /* write cds */
179     } CdsOpt;
180    
181    
182     static int cdsOpt (CdsOpt *opt, Fld *args, Db *db)
183     {
184     Fld o;
185     memset(opt, 0, sizeof(*opt));
186     for ( o.val = 0; vGet(&o, args, "BN"); )
187     switch (o.tag) {
188     case 'B':
189     opt->base = o;
190     continue;
191     case 'N':
192     if (VEQZ(&o, "otab"))
193     opt->notab = 1;
194     else if (VEQZ(&o, "ovt"))
195     opt->novt = 1;
196     else if (VEQZ(&o, "o850"))
197     opt->no850 = 1;
198     else
199     return eRr(ERR_INVAL, "bad cds opt '-N%.*s'", o.len, o.val);
200     }
201     if ( !opt->base.len )
202     opt->base.len = strlen(opt->base.val = db->pat);
203     return 0;
204     } /* cdsOpt */
205    
206    
207     int cdsuse ()
208     {
209     eRr(ERR_INVAL, "[cdsimp|cdsexp]:\n"
210     "-Bbase: path/basename of cds db (default = malete db)\n"
211     "-Notab: do not convert ^ <-> tab\n"
212     "-Novt: do not convert newline <-> vtab\n"
213     "-No850: do not convert Cp850 <-> ISO Latin1"
214     );
215     return ERR_INVAL;
216     } /* cdsuse */
217    
218    
219     static int cdsOpen (cds_files cds, CdsOpt *opt)
220     {
221     int filmode = opt->write ? FIL_RDWR|FIL_CREAT|FIL_TRUNC : FIL_RD;
222     int l = opt->base.len, i;
223     char *path = mAlloc(l+5);
224     memcpy(path, opt->base.val, l);
225     path[l++] = '.';
226     for (i=0; i<CDS_FILES; i++)
227     cds[i] = FIL_NONE;
228     for (i=0; i<CDS_FILES; i++) {
229     memcpy(path+l, cds_ext[i], 4);
230     if ( fOpen( cds+i, path, filmode ) )
231     return eRr(LOG_ERROR, "no access to CDS file '%s'", path);
232     }
233     return 0;
234     } /* cdsOpen */
235    
236    
237     static void cdsClose (cds_files cds)
238     {
239     int i;
240     for (i=0; i<CDS_FILES; i++)
241     fClose(cds+i);
242     } /* cdsClose */
243    
244    
245     int cdsimp (Db *db, Fld *args)
246     {
247     CdsOpt opt;
248     cds_files cds;
249     int ret = 0, m, i;
250     Mfc mfc;
251     List rec;
252     char recode[256];
253     L01 l01;
254     L02 l02;
255     Ifb ifbs[2];
256     int n1, n2;
257     Key key;
258    
259     if ( cdsOpt(&opt, args, db) )
260     return cdsuse();
261     if ( cdsOpen(cds, &opt) )
262     return eRr(ERR_INVAL, "bad CDS base");
263     #define CLEANUP( args ) do { ret = eRr args; goto cleanup; } while (0)
264     if ( sizeof(mfc) != fPread(cds+CDS_MST, &mfc, sizeof(mfc), 0) )
265     CLEANUP((ERR_TRASH, "no mfc"));
266     eRr(LOG_TRACE,
267     "MFC ctlm %d nmfn %d nmfb %d nmfp %d type %d"
268     " rcnt %d x1 %d x2 %d x3 %d",
269     mfc.ctlm, mfc.nmfn, mfc.nmfb, mfc.nmfp, mfc.type,
270     mfc.rcnt, mfc.mfx1, mfc.mfx2, mfc.mfx3);
271     eRr(LOG_INFO, "importing %d recs", mfc.nmfn-1);
272     lInit(&rec, "");
273     /* prepare recode table */
274     for (i=0; i<128; i++) recode[i] = (char)i;
275     if ( !opt.notab ) recode['^'] = TAB;
276     if ( !opt.novt ) recode[LF] = VT;
277     if ( opt.no850 )
278     for (i=128; i<256; i++) recode[i] = (char)i;
279     else
280     for (i=128; i<256; i++) recode[i] = (char)cp850[i-128];
281     /* loop m := ceil(numrecs/127) xrf blocks */
282     m = (mfc.nmfn+125)/127;
283     for (i=1; i<=m; i++) {
284     Xrf xrf;
285     int f = 1+127*(i-1), t = f+126, mfn;
286     int got = fRead(cds+CDS_XRF, &xrf, sizeof(xrf));
287     if ( (int)sizeof(xrf) != got )
288     CLEANUP((ERR_TRASH, "reading xrf block %d", i));
289     if ( xrf.xpos != (i==m ? -i : i) )
290     CLEANUP((ERR_TRASH, "bad pos %d in xrf block %d", xrf.xpos, i));
291     if ( t >= mfc.nmfn )
292     t = mfc.nmfn-1;
293     for (mfn=f; mfn<=t; mfn++) {
294     int pos = xrf.xrec[mfn-f];
295     int mfb = (pos >> 11);
296     union {
297     Mfr mfr;
298     char buf[0x7fff];
299     } da;
300     Dct *d = da.mfr.dict;
301     char *b, *e;
302    
303     if ( 0 > mfb ) {
304     eRr(LOG_TRACE, "rec %d was deleted", mfn);
305     continue;
306     }
307     /* ignore bits 9, 10 signaling pending index update */
308     pos &= 0x1ff;
309     pos |= (mfb-1)<<9;
310     got = fPread(cds+CDS_MST, da.buf, sizeof(Mfr), pos);
311     if ( got < MFR_DICTOFF )
312     CLEANUP((ERR_TRASH, "rec %d too short %d", mfn, got));
313     if ( da.mfr.mfrl < 0 ) {
314     eRr(LOG_WARN, "rec %d pos %d locked", mfn, pos);
315     da.mfr.mfrl = -da.mfr.mfrl;
316     }
317     if ( da.mfr.mfrl < (int)(MFR_DICTOFF+da.mfr.nvf*sizeof(Dct)) )
318     CLEANUP((ERR_TRASH, "rec %d pos %d bad mfrl %d nvf %d",
319     mfn, pos, da.mfr.mfrl, da.mfr.nvf));
320     got = fPread(cds+CDS_MST, da.buf, da.mfr.mfrl, pos);
321     if ( da.mfr.mfrl != got ) {
322     if ( got == -da.mfr.mfrl )
323     da.mfr.mfrl = -da.mfr.mfrl;
324     else
325     CLEANUP((ERR_TRASH, "rec %d pos %d got %d bytes want %d",
326     mfn, pos, got, da.mfr.mfrl));
327     }
328     b = da.buf+da.mfr.base;
329     e = da.buf+da.mfr.mfrl;
330     lReset(&rec);
331     for ( ; da.mfr.nvf--; d++ ) {
332     char *p = b+d->pos, *q = p+d->len;
333     if ( p < b || q > e )
334     CLEANUP((ERR_TRASH, "bad pos %d len %d field %d",
335     d->pos, d->len, d-da.mfr.dict));
336     for ( ; p<q; p++ ) *p = recode[(unsigned char)*p];
337     LADD(&rec, d->tag, b+d->pos, d->len);
338     }
339     dWrite(db, rec.fld, mfn);
340     }
341     }
342     /* mst done */
343     /* we assume first leaf has pos 1; maybe we should use N0x to search */
344     l01.ps = l02.ps = 1;
345     l01.ock = l02.ock = n1 = n2 = 0;
346     ifbs[0].ifpblk = ifbs[1].ifpblk = 0;
347     for (;;) { /* loop leaves */
348     Fld use;
349     int infb, infp, got, more = 0;
350     Ifp ifp;
351     Ifb *ifb;
352    
353     if ( n1 == l01.ock && (l01.pos = l01.ps) ) {
354     if ((int)sizeof(l01) != (got
355     = fPread(cds+CDS_L01,&l01,sizeof(l01),(l01.ps-1)*sizeof(l01)))
356     )
357     CLEANUP((ERR_TRASH, "l01 %d got %d", l01.ps, got));
358     eRr(LOG_TRACE, "l01 pos %d ock %d nxt %d", l01.pos, l01.ock, l01.ps);
359     n1 = 0;
360     if ( !l01.ock )
361     continue;
362     }
363     if ( n2 == l02.ock && (l02.pos = l02.ps) ) {
364     if ((int)sizeof(l02) != (got
365     = fPread(cds+CDS_L02,&l02,sizeof(l02),(l02.ps-1)*sizeof(l02)))
366     )
367     CLEANUP((ERR_TRASH, "l02 %d got %d", l02.ps, got));
368     eRr(LOG_TRACE, "l02 pos %d ock %d nxt %d", l02.pos, l02.ock, l02.ps);
369     n2 = 0;
370     if ( !l02.ock )
371     continue;
372     }
373     if ( !l01.pos && !l02.pos )
374     break;
375     if ( n1 == l01.ock )
376     use.tag = 1;
377     else if ( n2 == l02.ock )
378     use.tag = 0;
379     else if ( 0 > memcmp(l02.idx[n2].key, l01.idx[n1].key, 10) )
380     use.tag = 1;
381     else
382     use.tag = 0;
383     if ( use.tag ) {
384     infb = l02.idx[n2].infb;
385     infp = l02.idx[n2].infp;
386     use.val = l02.idx[n2++].key;
387     use.len = 30;
388     } else {
389     infb = l01.idx[n1].infb;
390     infp = l01.idx[n1].infp;
391     use.val = l01.idx[n1++].key;
392     use.len = 10;
393     }
394     ifb = ifbs+use.tag;
395     /* kill trailing blanks */
396     while ( use.len && ' ' == use.val[use.len-1] ) use.len--;
397     eRr(LOG_TRACE, "'%.*s'", use.len, use.val);
398     memcpy(key.byt, use.val, key.len = use.len);
399     key.val.len = db->qdx.vsz;
400     while (infb) {
401     Pst *pst;
402     int n;
403     if ( ifb->ifpblk != infb
404     && (512 != fPread(cds+CDS_IFP,ifb,sizeof(Ifb),(infb-1)*512)
405     || ifb->ifpblk != infb)
406     )
407     CLEANUP((ERR_TRASH, "ifp %d",infb));
408     if ( more ) {
409     pst = (Pst*)ifb->ifprec;
410     if ( (n = 127/2) > more )
411     n = more;
412     more -= n;
413     eRr(LOG_TRACE, "+%d @%d", n, infb);
414     } else {
415     if ( infp > 127-5 )
416     CLEANUP((ERR_TRASH, "bad infp %d for infb %d", infp, infb));
417     ifp = *(Ifp*)(ifb->ifprec + infp);
418     pst = (Pst*)(ifb->ifprec + infp + 5);
419     if ( (n = (127-5-infp)/2) > ifp.segp )
420     n = ifp.segp;
421     more = ifp.segp - n;
422     eRr(LOG_TRACE, "ifp @%d,%d: %d %d %d %d %d", infb, infp,
423     ifp.nxtb, ifp.nxtp, ifp.totp, ifp.segp, ifp.segc);
424     }
425     if ( more )
426     infb++;
427     else {
428     infb = ifp.nxtb;
429     infp = ifp.nxtp;
430     }
431     for ( ; n--; pst++ ) {
432     if ( QDX_ISIS == db->qdx.ptr )
433     memcpy(key.val.byt, pst->b, 8);
434     else {
435     Ptr ptr;
436     ptr.ext = 0;
437     ptr.rid = pst->b[0]<<16 | pst->b[1]<<8 | pst->b[2];
438     ptr.tag = pst->b[3]<<8 | pst->b[4];
439     ptr.pos = pst->b[5]<<16 | pst->b[6]<<8 | pst->b[7];
440     qMkVal(&key.val, &ptr, db->qdx.ptr);
441     }
442     qLoad(&db->qdx, &key);
443     }
444     } /* while infb */
445     }
446     key.val.len = 0;
447     qLoad(&db->qdx, &key);
448     cleanup:
449     #undef CLEANUP
450     cdsClose(cds);
451     return ret;
452     } /* cdsimp */
453    
454    
455     /* export index */
456     typedef struct {
457     QLoop qlp;
458     file *cds;
459     int totp; /* in current segment */
460     int infb; /* of current ifp */
461     int infp; /* of current ifp */
462     Ifb ifb; /* current block; flushed on demand */
463     int rec; /* next in ifb.ifprec 0..126 */
464     L01 l01; /* new keys are immediatly inserted here ... */
465     L02 l02; /* ... structs flushed on demand */
466     N01 n01;
467     N02 n02;
468     } CdsLoop;
469    
470    
471     static void closeifp (CdsLoop *self) /* write Ifp header */
472     {
473     Ifp buf, *ifp;
474     if ( self->infb != self->ifb.ifpblk ) /* Ifp not in current blk */
475     ifp = &buf;
476     else
477     ifp = (Ifp*)(self->ifb.ifprec+self->infp);
478     ifp->nxtb = ifp->nxtp = 0; /* used only for partial updates */
479     ifp->totp = ifp->segp = ifp->segc = self->totp; /* diff only after split */
480     if ( self->infb != self->ifb.ifpblk )
481     fPwrite(self->cds+CDS_IFP, ifp, sizeof(Ifp),
482     ((self->infb-1)<<9) + 4*(self->infp+1));
483     self->totp = 0;
484     /*
485     hmmmm ... From the manual:
486     Cuando se carga secuencialmente el archivo
487     invertido (por ejemplo despues de una generacion completa del archivo
488     invertido con ISISINV), cada lista esta formada por uno o mas segmentos
489     adyacentes. Si IFPTOT <= 32768, entonces: IFPNXTB/IFPNXTP = 0/0 y IFPTOT =
490     IFPSEGP = IFPSEGC.
491     This seems to imply that numbers really should be used as signed shorts!?
492     */
493     } /* closeifp */
494    
495     static void closeifb (CdsLoop *self)
496     {
497     eRr(LOG_TRACE, "w ifb %d", self->ifb.ifpblk);
498     fPwrite(self->cds+CDS_IFP,&self->ifb,sizeof(Ifb),(self->ifb.ifpblk-1)<<9);
499     self->ifb.ifpblk++;
500     self->rec = 0;
501     } /* closeifb */
502    
503    
504     static int cdsCb (CdsLoop *self)
505     {
506     Pst *pst;
507     unsigned i, vsz = self->qlp.qdx->vsz;
508     const unsigned char *v = self->qlp.vals;
509    
510     if ( !(QSAME & self->qlp.flg) ) { /* add new key -- new segment */
511     L01 *l0x; /* might really be a L02 */
512     int l, n, oref; /* key length, offset of ref int(s) in idx */
513     int f2, sl, *ref;
514     char *key;
515    
516     if ( self->totp )
517     closeifp(self);
518     if ( self->rec > 127-5 )
519     closeifb(self);
520     if ( (l = self->qlp.cur.len) <= 10 ) { /* l01 entry */
521     l0x = &self->l01;
522     f2 = 0;
523     n = 10;
524     sl = sizeof(L01);
525     } else {
526     l0x = (L01*)&self->l02;
527     f2 = 2;
528     if ( l > (n = 30) )
529     l = 30;
530     sl = sizeof(L02);
531     }
532     #ifdef BUILD_CDSUNIX
533     oref = n+2;
534     #else
535     oref = n;
536     #endif
537     if ( 10 == l0x->ock ) { /* need new block */
538     l0x->ps = l0x->pos+1;
539     eRr(LOG_TRACE, "w l0 n %d pos %d", n, l0x->pos);
540     fPwrite(self->cds+CDS_L01+f2, l0x, sl, (l0x->pos-1)*sl);
541     l0x->pos++;
542     l0x->ock = 0;
543     }
544     key = l0x->idx[0].key + l0x->ock*(oref+8); /* sizeof idx */
545     memset(key, ' ', n);
546     memcpy(key, self->qlp.cur.byt, l);
547     ref = (int*)(key+oref);
548     ref[0] = self->infb = self->ifb.ifpblk;
549     ref[1] = self->infp = self->rec;
550     self->rec += 5; /* reserve space for Ifp header */
551     if ( !l0x->ock++ ) { /* also write node entry */
552     N01 *n0x = 10==n ? &self->n01 : (N01*)&self->n02;
553     int sn = 10==n ? sizeof(N01) : sizeof(N02);
554     char *nkey;
555     if ( 10 == n0x->ock ) {
556     eRr(LOG_TRACE, "w n0 n %d pos %d", n, n0x->pos);
557     fPwrite(self->cds+CDS_N01+f2, n0x, sn, (n0x->pos-1)*sn);
558     n0x->pos++;
559     n0x->ock = 0;
560     }
561     nkey = n0x->idx[0].key + n0x->ock++*(oref+4);
562     if ( 2 == n0x->pos && 1 == n0x->ock )
563     memset(nkey, ' ', n); /* 1st key - start of level */
564     else
565     memcpy(nkey, key, n);
566     *(int*)(nkey+oref) = -l0x->pos;
567     }
568     }
569     pst = (Pst*)(self->ifb.ifprec + self->rec);
570     eRr(LOG_TRACE, "+ %d ptr @ %d.%d", self->qlp.nvals,
571     self->ifb.ifpblk, self->rec);
572     for ( i=self->qlp.nvals; i--; self->rec+=2, pst++, v += vsz) {
573     if ( self->rec > 125 ) {
574     closeifb(self);
575     pst = (Pst*)self->ifb.ifprec;
576     }
577     if ( QDX_ISIS == self->qlp.qdx->ptr )
578     memcpy(pst->b, v, 8);
579     else {
580     Ptr ptr;
581     qRdVal(&ptr, v, self->qlp.qdx->ptr);
582     pst->b[0] = ptr.rid >> 16;
583     pst->b[1] = ptr.rid >> 8;
584     pst->b[2] = ptr.rid;
585     pst->b[3] = ptr.tag >> 8;
586     pst->b[4] = ptr.tag;
587     pst->b[5] = ptr.pos >> 16;
588     pst->b[6] = ptr.pos >> 8;
589     pst->b[7] = ptr.pos;
590     }
591     }
592     self->totp += self->qlp.nvals;
593     return 0;
594     } /* cdsCb */
595    
596    
597     int cdsexp (Db *db, Fld *args)
598     {
599     CdsOpt opt;
600     cds_files cds;
601     int ret = 0, pos, i, j;
602     Mfc mfc;
603     List rec;
604     char recode[256];
605     Xrf xrf;
606     CdsLoop clp;
607     Cnt cnt[2];
608    
609     if ( cdsOpt(&opt, args, db) )
610     return cdsuse();
611     opt.write = 1;
612     if ( cdsOpen(cds, &opt) )
613     return eRr(ERR_INVAL, "error creating CDS base");
614     eRr(LOG_INFO, "exporting %d recs", db->rdx.mid);
615     lInit(&rec, "");
616     /* prepare recode table */
617     for (i=0; i<128; i++) recode[i] = (char)i;
618     if ( !opt.notab ) recode[TAB] = '^';
619     if ( !opt.novt ) recode[VT] = LF;
620     if ( opt.no850 )
621     for (i=128; i<256; i++) recode[i] = (char)i;
622     else {
623     for (i=128; i<256; i++) recode[i] = '?';
624     for (i=160; i<256; i++) /* reverse cp850 */
625     for (j=0; j<128; j++) if ( i == cp850[j] ) { recode[i] = j+128; break; }
626     }
627     xrf.xpos = 1;
628     mfc.rcnt = 0; /* record count; nmfn-1 - #deleted */
629     pos = 64; /* as of folklore, 1st rec should start here */
630     for (i=1; i<=db->rdx.mid; i++) { /* loop records */
631     int xref;
632     if ( 0 >= dRead(lClr(&rec), db, i) ) { /* no rec */
633     eRr(LOG_TRACE, "rec %d was deleted", i);
634     xref = -1 << 11;
635     } else {
636     union {
637     Mfr mfr;
638     char buf[0x7fff];
639     } da;
640     char *b, *p, *e = da.buf+sizeof(da.buf);
641     Dct *d = da.mfr.dict, *de;
642     Fld *f, *fe = LEND(&rec);
643     unsigned avl = 0x7fff - MFR_DICTOFF;
644    
645     xref = pos & 511; /* pos in block */
646     /* the manual explicitly demands 0-498 and forbids 500 */
647     if ( xref > 498 /*512 - MFR_BASEOFF = 500 packed */ ) {
648     pos += 512; /* advance to next block */
649     pos &= ~511;
650     xref = 0;
651     }
652     xref |= ((pos >> 9)+1) << 11;
653     da.mfr.mfn = i;
654     da.mfr.bwb = 0;
655     da.mfr.bwp = 0;
656     da.mfr.base = 0;
657     da.mfr.nvf = 0;
658     da.mfr.stat = 0;
659     for ( f=rec.fld; ++f<fe; ) {
660     if ( avl < (int)sizeof(Dct) )
661     break;
662     da.mfr.nvf++;
663     avl -= sizeof(Dct);
664     if ( avl >= f->len )
665     avl -= f->len;
666     else
667     avl = 0;
668     }
669     p = b = da.buf + (da.mfr.base = MFR_DICTOFF + da.mfr.nvf*sizeof(Dct));
670     de = d + da.mfr.nvf;
671     for ( f=rec.fld; d<de; d++ ) {
672     unsigned char *v = (unsigned char*)(++f)->val, *ve;
673     d->tag = f->tag;
674     d->pos = p-b;
675     d->len = e-p < (int)f->len ? e-p : f->len;
676     for ( ve = v+d->len; v < ve; ) *p++ = recode[*v++];
677     }
678     da.mfr.mfrl = p - da.buf;
679     eRr(LOG_VERBOSE, "write rec %d len %d pos %d", i, da.mfr.mfrl, pos);
680     fPwrite(cds+CDS_MST, da.buf, da.mfr.mfrl, pos);
681     if ( 1 & (pos += da.mfr.mfrl) ) /* round to even */
682     pos++;
683     mfc.rcnt++;
684     }
685     xrf.xrec[j = (i-1)%127] = xref;
686     if ( i == db->rdx.mid ) { /* last */
687     while ( ++j < 127 ) xrf.xrec[j] = 0;
688     xrf.xpos = -xrf.xpos;
689     } else if ( 126 != j )
690     continue;
691     eRr(LOG_VERBOSE, "%d: write Xrf %d", i, xrf.xpos);
692     fWrite(cds+CDS_XRF, &xrf, sizeof(Xrf));
693     xrf.xpos++;
694     }
695     mfc.ctlm = 0; /* should be 0 */
696     mfc.nmfn = i; /* next avail MFN */
697     mfc.nmfb = pos >> 9; /* next free block, 1st = 1 */
698     mfc.nmfp = pos & 511; /* next free pos (in block) */
699     mfc.type = 0; /* 0 (1 is for "messages" mst) */
700     mfc.mfx1 = 0; /* unused */
701     mfc.mfx2 = 0; /* unused; "LAN lock" */
702     mfc.mfx3 = 0; /* unused; "LAN lock" */
703     fPwrite(cds+CDS_MST, &mfc, sizeof(mfc), 0);
704     /* mst done */
705     memset(&clp, 0, sizeof(clp));
706     clp.qlp.qcb = (QCb*)cdsCb;
707     clp.cds = cds;
708     clp.ifb.ifpblk = 1;
709     clp.l01.pos = clp.l02.pos = 1;
710     clp.n01.pos = clp.n02.pos = 2; /* start at 2; 1 will hold the root */
711     clp.l01.it = clp.n01.it = 1;
712     clp.l02.it = clp.n02.it = 2;
713     clp.qlp.qdx = &db->qdx;
714     qLoop(&clp.qlp);
715     eRr(LOG_VERBOSE, "loop got pos ifp %d l01 %d l02 %d n01 %d n02 %d",
716     clp.ifb.ifpblk, clp.l01.pos, clp.l02.pos, clp.n01.pos, clp.n02.pos);
717     if ( clp.totp )
718     closeifp(&clp);
719     if ( clp.rec )
720     closeifb(&clp);
721     clp.l01.ps = clp.l02.ps = 0; /* no next */
722     fPwrite(cds+CDS_L01, &clp.l01, sizeof(L01), (clp.l01.pos-1)*sizeof(L01));
723     fPwrite(cds+CDS_L02, &clp.l02, sizeof(L02), (clp.l02.pos-1)*sizeof(L02));
724     for ( i=2; i--; ) {
725     cnt[i].type = i+1;
726     cnt[i].ordn = 5;
727     cnt[i].ordf = 5;
728     cnt[i].n = 15;
729     cnt[i].k = 5;
730     cnt[i].lev = 0;
731     cnt[i].posr = 1;
732     cnt[i].nmax = 1;
733     cnt[i].abno = 1;
734     }
735     cnt[0].fmax = clp.l01.pos;
736     cnt[1].fmax = clp.l02.pos;
737     if ( 2 == clp.n01.pos ) { /* the poor one and only node */
738     /* since we did not open a 2nd node, this one was not written */
739     clp.n01.pos = 1; /* make it the root */
740     cnt[0].abno = 0; /* really the field means "normal", not "abnormal" */
741     }
742     fPwrite(cds+CDS_N01, &clp.n01, sizeof(N01), (clp.n01.pos-1)*sizeof(N01));
743     if ( 2 == clp.n02.pos ) {
744     clp.n02.pos = 1;
745     cnt[1].abno = 0;
746     }
747     fPwrite(cds+CDS_N02, &clp.n02, sizeof(N02), (clp.n02.pos-1)*sizeof(N02));
748     /* build the trees */
749     if ( cnt[0].abno ) {
750     int f = 2, t = clp.n01.pos; /* from - to on level */
751     for (;;) {
752     N01 c, p; /* child, parent */
753     int start = t+1;
754     cnt[0].lev++;
755     p.pos = 10 >= (t-f) ? 1 : start;
756     p.ock = 1; /* blank key ref to f */
757     p.it = 1;
758     memset(p.idx[0].key, ' ', sizeof(p.idx[0].key));
759     p.idx[0].ref = f;
760     for ( i=f+1; i<=t; i++ ) {
761     fPread(cds+CDS_N01, &c, sizeof(c), (i-1)*sizeof(c));
762     eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d",
763     cnt[0].lev-1, c.pos, sizeof(c.idx[0].key), c.idx[0].key, c.idx[0].ref,
764     c.ock, sizeof(c.idx[0].key), c.idx[c.ock-1].key, c.idx[c.ock-1].ref);
765     if ( 10 == p.ock ) {
766     eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - '%.*s'->%d",
767     cnt[0].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref,
768     sizeof(p.idx[0].key), p.idx[9].key, p.idx[9].ref);
769     fPwrite(cds+CDS_N01, &p, sizeof(p), (p.pos-1)*sizeof(p));
770     p.pos++;
771     p.ock = 0;
772     }
773     memcpy(p.idx[p.ock].key, c.idx[0].key, sizeof(c.idx[0].key));
774     p.idx[p.ock++].ref = i;
775     }
776     eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d",
777     cnt[0].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref,
778     p.ock, sizeof(p.idx[0].key), p.idx[p.ock-1].key, p.idx[p.ock-1].ref);
779     fPwrite(cds+CDS_N01, &p, sizeof(p), (p.pos-1)*sizeof(p));
780     if ( 1 == p.pos )
781     break;
782     f = start;
783     cnt[0].nmax = t = p.pos;
784     }
785     }
786     if ( cnt[1].abno ) {
787     int f = 2, t = clp.n02.pos; /* from - to on level */
788     for (;;) {
789     N02 c, p; /* child, parent */
790     int start = t+1;
791     cnt[1].lev++;
792     p.pos = 10 >= (t-f) ? 1 : start;
793     p.ock = 1; /* blank key ref to f */
794     p.it = 2;
795     memset(p.idx[0].key, ' ', sizeof(p.idx[0].key));
796     p.idx[0].ref = f;
797     for ( i=f+1; i<=t; i++ ) {
798     fPread(cds+CDS_N02, &c, sizeof(c), (i-1)*sizeof(c));
799     eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d",
800     cnt[1].lev-1, c.pos, sizeof(c.idx[0].key), c.idx[0].key, c.idx[0].ref,
801     c.ock, sizeof(c.idx[0].key), c.idx[c.ock-1].key, c.idx[c.ock-1].ref);
802     if ( 10 == p.ock ) {
803     eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - '%.*s'->%d",
804     cnt[1].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref,
805     sizeof(p.idx[0].key), p.idx[9].key, p.idx[9].ref);
806     fPwrite(cds+CDS_N02, &p, sizeof(p), (p.pos-1)*sizeof(p));
807     p.pos++;
808     p.ock = 0;
809     }
810     memcpy(p.idx[p.ock].key, c.idx[0].key, sizeof(c.idx[0].key));
811     p.idx[p.ock++].ref = i;
812     }
813     eRr(LOG_TRACE, "lev %d blk %d '%.*s'->%d - %d:'%.*s'->%d",
814     cnt[0].lev, p.pos, sizeof(p.idx[0].key), p.idx[0].key, p.idx[0].ref,
815     p.ock, sizeof(p.idx[0].key), p.idx[p.ock-1].key, p.idx[p.ock-1].ref);
816     fPwrite(cds+CDS_N02, &p, sizeof(p), (p.pos-1)*sizeof(p));
817     if ( 1 == p.pos )
818     break;
819     f = start;
820     cnt[1].nmax = t = p.pos;
821     }
822     }
823     fWrite(cds+CDS_CNT,cnt,CNT_SIZE);
824     fWrite(cds+CDS_CNT,cnt+1,CNT_SIZE);
825     cdsClose(cds);
826     return ret;
827     } /* cdsexp */

  ViewVC Help
Powered by ViewVC 1.1.26