/[webpac]/openisis/0.9.9e/core/rdx.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /openisis/0.9.9e/core/rdx.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (hide annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 4 months ago) by dpavlin
File MIME type: text/plain
File size: 15809 byte(s)
import of new openisis release, 0.9.9e

1 dpavlin 604 /*
2     The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3     Version 0.9.x (patchlevel see file Version)
4     Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5    
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Lesser General Public
8     License as published by the Free Software Foundation; either
9     version 2.1 of the License, or (at your option) any later version.
10    
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14     See the GNU Lesser General Public License for more details.
15    
16     You should have received a copy of the GNU Lesser General Public
17     License along with this library; if not, write to the Free Software
18     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19    
20     see README for more information
21     EOH */
22    
23     /*
24     $Id: rdx.c,v 1.15 2004/11/12 08:37:56 kripke Exp $
25     implementation of general db access functions.
26     */
27    
28    
29     #include "core/core.h"
30    
31    
32    
33     /*
34     type of rx
35     posbytes in [4..8], lenbytes in [3..6], fldbytes in [0..2]
36     */
37     #define RX_TYPE(posbytes,lenbytes,fldbytes) \
38     ((posbytes-4)<<4 | (lenbytes-3)<<2 | fldbytes)
39     enum { /* by now, only the two 8 byte types are implemented */
40     RX_431 = RX_TYPE(4,3,1),
41     RX_440 = RX_TYPE(4,4,0)
42     };
43    
44     /* ************************************************************
45     private types
46     */
47    
48     typedef union {
49     lolo bar;
50     char r[16];
51     } Xrf;
52    
53     /** header of first record index block */
54     typedef struct Rxh {
55     char mag[3]; /* magic MRX or mrx */
56     char typ;
57     unsigned mid; /* max record id == total number of used pointers */
58     /* for type of more than 8 byte: upper mid */
59     } Rxh;
60    
61    
62     /* ************************************************************
63     data
64     */
65    
66     /* the magic we need depending on our endianess */
67     static char MAG_MRX[] =
68     #ifdef CPU_BIG_ENDIAN
69     "MRX"
70     #else
71     "mrx"
72     #endif
73     ;
74    
75    
76     /* ************************************************************
77     private functions
78     */
79     #define RLOCKSH(n) (void)FLOCKSH(dx->mrd, n)
80     #define RLOCKEX(n) (void)FLOCKEX(dx->mrd, n)
81     #define RLOCKUN(n) (void)FLOCKUN(dx->mrd, n)
82    
83     /*
84     create a pointer from the least significant bytes of pos, len, fld
85     buf must have dx->ptl bytes (up to 16 = 8+4+4)
86     and the most strict alignment (i.e. 4 or 8) possible for dx->ptl
87     returns buf
88     supports only RX_431 and RX_440
89     */
90     static char *mkxrf ( char *buf, Rdx *dx,
91     unsigned pos, unsigned len, unsigned fld )
92     {
93     ((unsigned*)buf)[0] = pos;
94     if ( RX_431 == dx->typ ) {
95     if ( ~0xff & fld ) fld = 0; /* too big */
96     #ifdef CPU_BIG_ENDIAN /* the first = high order 3 bytes are len */
97     len = len<<8 | fld;
98     #else /* the first = low order 3 bytes are len */
99     len = (0xffffff&len) | fld<<24;
100     #endif
101     }
102     ((unsigned*)buf)[1] = len;
103     return buf;
104     }
105    
106    
107     /*
108     read pointer, return len
109     if 0x0f00 & dx->typ, fld must not be 0, else *fld is untouched
110     supports only RX_431 and RX_440
111     */
112     static unsigned
113     rdxrf ( unsigned *pos, unsigned *fld, Rdx *dx, char *buf )
114     {
115     unsigned len = ((unsigned*)buf)[1];
116     *pos = *(unsigned*)buf;
117     if ( RX_431 == dx->typ ) {
118     *fld = ((unsigned char *)buf)[7];
119     #ifdef CPU_BIG_ENDIAN
120     len >>= 8;
121     #else
122     len &= 0xffffff;
123     #endif
124     }
125     return len;
126     }
127    
128    
129     static void setXrf ( Rdx *dx, unsigned rid,
130     unsigned pos, unsigned len, unsigned fld )
131     {
132     Xrf xrf;
133     unsigned off = rid*dx->ptl;
134     eRr(LOG_INFO,"setXrf %d %d %d %d",rid,pos,len,fld);
135     RLOCKEX(rid);
136     if ( dx->mrx.npg ) {
137     if ( (off+dx->ptl) > (dx->mrx.npg<<env.psh) ) { /* remap ? */
138     unsigned npg = 1 + ((off+dx->ptl-1)>>env.psh); /* num pages we need */
139     if ( npg > dx->mrx.lim ) { /* at limit, drop the mapping completely */
140     eRr(LOG_WARN, "dropping mrx mapping at limit %u", dx->mrx.lim);
141     npg = dx->mrx.lim = 0;
142     }
143     fMap(&dx->mrx, npg);
144     }
145     if ( (off+dx->ptl) <= (dx->mrx.npg<<env.psh) ) {
146     mkxrf(dx->mrx.map + off, dx, pos, len, fld);
147     if ( ENV_MSYNC & env.flg ) fMSync(&dx->mrx, off>>env.psh);
148     goto done;
149     }
150     }
151     fPwrite(&dx->mrx.fil, mkxrf(xrf.r, dx, pos, len, fld), dx->ptl, off);
152     done:
153     if ( rid > (unsigned)dx->mid ) {
154     if ( dx->mrx.npg )
155     ((Rxh*)dx->mrx.map)->mid = rid;
156     else
157     fPwrite(&dx->mrx.fil, &rid, 4, 4);
158     dx->mid = rid;
159     }
160     RLOCKUN(rid);
161     } /* setXrf */
162    
163    
164     static unsigned getXrf ( unsigned *pos, unsigned *fld, Rdx *dx, int rid )
165     {
166     unsigned ret;
167     Xrf xrf;
168     unsigned off = (unsigned)rid*dx->ptl;
169     RLOCKSH(rid);
170     if ( (off+dx->ptl) <= (dx->mrx.npg<<env.psh) )
171     ret = rdxrf(pos, fld, dx, dx->mrx.map + off);
172     else {
173     #ifdef BUILD_SHMODE
174     if ( ENV_SHARED==env.wri ) { /* might have been extended */
175     unsigned pgl;
176     /* TODO: maybe we should guard this by the record lock ? */
177     if ( dx->mrx.npg )
178     dx->mid = ((Rxh*)dx->mrx.map)->mid;
179     else
180     fPread(&dx->mrx.fil, &dx->mid, 4, 4);
181     if ( rid > dx->mid ) {
182     ret = 0;
183     goto out;
184     }
185     pgl = fSize(dx->mrx.fil) >> env.psh;
186     if ( pgl != dx->mrx.npg )
187     fMap(&dx->mrx, pgl);
188     if ( (off+dx->ptl) <= (dx->mrx.npg<<env.psh) ) {
189     ret = rdxrf(pos, fld, dx, dx->mrx.map + off);
190     goto out;
191     }
192     }
193     #endif
194     ret = rid > dx->mid || dx->ptl != fPread(&dx->mrx.fil, xrf.r, dx->ptl, off)
195     ? 0 : rdxrf(pos, fld, dx, xrf.r);
196     }
197     #ifdef BUILD_SHMODE
198     out:
199     RLOCKUN(rid);
200     #endif
201     return ret;
202     } /* getXrf */
203    
204    
205     static int rEmake ( Rdx *dx )
206     {
207     char buf[65536+1]; /* need 64K buf for copying DO NOT SHRINK !!! */
208     int sz;
209     char *p, *q;
210     Xrf xrf;
211     unsigned base = 0; /* of current block */
212     unsigned pos = 0; /* of last record */
213     /* record parsing starts at the first char of rid 1, so fld is initially 1 */
214     unsigned fld = 1; /* of last record */
215     unsigned nrid = 1; /* next rid = maxrid+1 */
216     unsigned xrid = 0; /* explicitly given */
217     char op = 0;
218     int more; /* buf not empty flag */
219     char *last; /* of current block */
220    
221     /* TODO: base on FBuf? */
222     if ( fTrunc(&dx->mrx.fil, 0) )
223     return eRr( LOG_ERROR, "could not trunc mrx" );
224     /* write signature */
225     memcpy( xrf.r, MAG_MRX, 3 );
226     xrf.r[3] = dx->typ;
227     memset( xrf.r+4, 0, dx->ptl-4 );
228     fPwrite( &dx->mrx.fil, xrf.r, dx->ptl, 0 );
229     /* loop the masterfile */
230     fSeek( &dx->mrd, 0 );
231     last = (p = buf) + fRead( &dx->mrd, buf, 8192 ) - 1;
232     more = last > buf; /* one byte is no byte ;) */
233     for (;;) { /* records */
234     unsigned len, rid;
235     for (;;) { /* lines and stuff to end of record */
236     if ( p < last ) { /* have one lookahead */
237     if ( LF != *p++ )
238     continue; /* the tight loop ... or use memchr ? */
239     if ( LF != *p ) { /* now p <= last */
240     fld++; /* count field, unless we really recognize a opline */
241     if ( 'W' != *p )
242     continue;
243     sz = last - p; /* avail after p */
244     if ( sz > 127 ) /* longer -> no opline */
245     sz = 127;
246     if ( ! sz || ! (q = memchr(p+1, LF, sz)) ) {
247     if ( sz >= 127 || ! more )
248     continue; /* too long or undelimited last */
249     p--; /* back to \n, so we come here again */
250     goto gimmemore;
251     }
252     if ( q < p+3 || p[2] < '0' || '9' < p[2] )
253     continue;
254     /* TODO:
255     take a closer look at whether the whole line makes sense
256     */
257     if ( op ) { /* yeah, two metas in sequence! weird stuff! */
258     p--; /* step back to newline */
259     pos = base+(p-buf); /* fake pos as if we had no line at all */
260     break; /* go handle the PREVIOUS opline */
261     }
262     op = *p;
263     xrid = a2i( p+2, q-p-2 );
264     continue;
265     }
266     break;
267     }
268     gimmemore:
269     LOG_DBG( LOG_DEBUG, "MORE %d at pos %d base %d p +%d last +%d",
270     more, pos, base, p-buf, last-buf );
271     if ( !more )
272     goto schicht; /* german: done */
273     base += p - buf; /* shift out bytes before p */
274     len = last-p; /* bytes to keep after p; < 128 */
275     if ( len ) /* we're probing for more lookahead */
276     memmove( buf, p, 1+last-p );
277     else /* typically */
278     *buf = *p; /* but save the last dance */
279     p = buf;
280     last = buf + len;
281     /* reload */
282     if ( 0 < (sz = fRead( &dx->mrd, buf+1+len, 8192 )) ) {
283     last += sz;
284     continue;
285     }
286     more = 0; /* but yet, finish this up */
287     /* since *buf = *last was the files last character,
288     we'd expect a newline
289     */
290     if ( last == p )
291     p = buf+(LF==*buf ? 1 : 2); /* pretend buf started \n */
292     if ( ! len )
293     break;
294     /* else try again opline */
295     } /* lines and stuff */
296     /* now p is on a delimiting blank lines \n -- or such ... */
297     len = base + (p-buf) - pos; /* >= 0 */
298     rid = xrid ? xrid : nrid;
299     eRr( LOG_INFO, "mrx %c %d(%d/%d) pos %d len %d",
300     op?op:'>', rid, xrid, nrid, pos, len );
301     if ( base + (p-buf) < pos ) /* FOO !!! */
302     len = 0;
303     if ( rid && (len || op) )
304     fPwrite( &dx->mrx.fil,
305     mkxrf( xrf.r, dx, pos, len, fld), dx->ptl, rid*dx->ptl );
306     else if (!more)
307     break;
308     pos = base + (p-buf) + 1; /* next starts after p */
309     if (!op)
310     nrid++; /* continue after this */
311     else if (nrid <= xrid)
312     nrid = xrid+1;
313     xrid = fld = op = 0;
314     }
315     schicht: ;
316     dx->mid = nrid-1;
317     fPwrite(&dx->mrx.fil, &dx->mid, 4, 4);
318     fTrunc(&dx->mrx.fil,
319     ((1+dx->mid)*dx->ptl+env.psz-1)>>env.psh<<env.psh);
320     return 0;
321     } /* rEmake */
322    
323    
324    
325     /* ************************************************************
326     public functions
327     */
328    
329     /**
330     get text
331     the original text is read contigously at base.
332     */
333     int rRead ( List *l, Rdx *dx, int rid, unsigned mpos )
334     {
335     unsigned pos, len, n=0;
336     int head; /* offset of header */
337     Fld *f;
338     char *rd, *p, *q, *e;
339     int got, first = 1, cont = 1;
340    
341     len = getXrf( &pos, &n, dx, rid ); /* this maybe locked */
342     head = LLEN(l); /* not necessarily 0 */
343     LOG_DBG( LOG_TRACE, "dRead %d pos %d len %d fld %d at head %d",
344     rid, pos, len, n, head );
345     if ( !len )
346     return 0;
347     /*
348     reading the current version is easy, since we know the byte length
349     and usually also number of fields.
350     When backtracking to older versions, we have to figure out everything
351     from the position alone.
352     */
353     for (; cont; first=0) { /* extend or backtrack */
354     if ( !n ) {
355     n = len / 36; /* assume one (costing 12 bytes) per 36 bytes data */
356     if ( n < 8 ) /* small record is likely to have some short fields */
357     n = 8;
358     }
359     /* allow 22 bytes to print a nice rid@pos in header,
360     plus one for a sentinel LF */
361     if ( l->fav < n || LAVL(l) < len+23 )
362     lExtend(l, len+23, n); /* the call alone is about 2% ! */
363     if ( 0 >= (got = fPread(&dx->mrd, rd = l->buf+22, len, pos)) )
364     return got;
365     if ( len != (unsigned)got ) {
366     if ( first )
367     return eRr(ERR_TRASH, "wanted %d bytes got %d at %d", len, got, pos);
368     len = got;
369     cont = 0;
370     }
371     e = (p = rd) + len;
372     /* check header for rid@pos, mv p to leader */
373     if ( 2 < len && 'W'==*p && TAB==p[1] ) { /* else plain record */
374     int val;
375     p += 2;
376     if ( (got = a2il(p,e-p,&val)) ) {
377     if ( val != rid )
378     return eRr(ERR_TRASH, "wanted rid %d got %d at %d",
379     rid, val, pos);
380     p += got;
381     if ( p < e && '@'==*p ) {
382     p++;
383     if ( (got = a2il(p,e-p,&val)) ) {
384     if ( mpos && pos >= mpos ) {
385     if ( val < 0 || (unsigned)val >= pos )
386     return eRr(ERR_TRASH, "bad hpos %d at %d", val, pos);
387     pos = (unsigned)val;
388     if ( first )
389     len += len>>1; /* give some safety margin */
390     continue;
391     }
392     p += got;
393     }
394     }
395     }
396     }
397     if ( mpos && pos >= mpos )
398     return 0;
399     if ( !first ) { /* check for complete record */
400     for ( q=p; q<e && (q = memchr(q,LF,e-q)); )
401     if ( ++q<e && LF==*q )
402     goto ok;
403     len += 32 + (len>>1); /* try using more space */
404     continue;
405     ok:
406     len = q+1 - rd;
407     }
408     LOG_DBG( LOG_TRACE, "'%.*s'", len, rd );
409     /* go for it */
410     f = l->fld + head;
411     f->len = u2a(l->buf, rid); /* print header */
412     l->buf[f->len++] = '@';
413     f->len += u2a(l->buf+f->len, pos); /* now f->len <= 21 */
414     if ( p == rd ) { /* got no header -- rec data starts at rd */
415     if ( l->buf < (f->val = rd - f->len - 1) )
416     memmove(f->val, l->buf, f->len); /* just to keep it contiguous */
417     f->val[f->len] = LF;
418     } else { /* TAB-leader-LF at p */
419     memmove(f->val = p - f->len, l->buf, f->len);
420     q = memchr(p,LF,e-p);
421     if ( !q ) return eRr(ERR_TRASH, "missing LF");
422     f->len = q - f->val;
423     }
424     p = f->val + f->len + 1;
425     f->tag = -1;
426     if ( head ) l->fld->tag--; /* count in total length */
427     l->fav--;
428     f++;
429     *e = LF; /* sentinel for memchr */
430     for ( n=0; p<e && p!=(q = memchr(p, LF, e-p+1)); p = q+1 ) {
431     int min, t=0, d;
432     if ( !l->fav ) {
433     lExtend( l, 0, 16 );
434     f = LEND(l);
435     }
436     /* p += a2il( p, q-p, &f->tag ); */
437     if ( (min = ('-' == *p)) )
438     p++;
439     for ( ; 10>(d = b36val[(unsigned char)*p]); p++ )
440     t = 10*t + d;
441     f->tag = min ? -t : t;
442     if ( TAB == *p )
443     p++;
444     f->len = q - (f->val = p);
445     l->fld->tag--;
446     l->fav--;
447     f++;
448     n++;
449     }
450     if ( head )
451     l->fld[head].tag = -1-n;
452     l->buf = e;
453     return n+1;
454     }
455     return 0;
456     } /* rRead */
457    
458    
459     int rWrite ( Rdx *dx, const Fld *r, int rid, int opos, unsigned siz )
460     {
461     char buf[128 + 65536];
462     LBlk *blk;
463     unsigned pos = 0, rlen;
464     char *p, *b;
465     int intro = 1, ret;
466    
467     if ( !(dx->flg & DX_WRITE) )
468     return eRr(ERR_BADF, "dx is not writable");
469     rlen = RLEN(r);
470     if ( rid && dx->mid >= rid ) {
471     unsigned fld;
472     getXrf( &pos, &fld, dx, rid );
473     if ( 0<=opos && pos != (unsigned)opos )
474     return eRr(ERR_AGAIN, "pos %d != opos %d", pos, opos);
475     }
476     RLOCKEX(0);
477     if ( ! rid || rid == dx->mid+1 ) {
478     rid = dx->mid+1; /* assign new rid; mid updated in setXrf */
479     if ( !rlen || !r->len ) /* no header */
480     intro = 0;
481     }
482     if ( !siz )
483     siz = rSiz( r );
484     siz += 13*rlen + 128; /* field, header, trailing LF */
485     if ( sizeof(buf) >= siz ) {
486     blk = 0;
487     b = buf;
488     } else {
489     blk = mBlkAlloc( siz ); /* hmm ... maybe would be better ... */
490     b = blk->byt; /* ... to serialize and write several 64K chunks */
491     }
492     p = b;
493     if ( intro ) {
494     *p++ = 'W';
495     *p++ = TAB;
496     p += u2a( p, rid );
497     if ( pos ) {
498     *p++ = '@';
499     p += u2a( p, pos );
500     }
501     if ( rlen && r->len ) {
502     *p++ = TAB;
503     memcpy(p, r->val, r->len);
504     p += r->len;
505     }
506     *p++ = LF;
507     }
508     p += rSer( p, r );
509     dx->flg |= DX_MODIF;
510     pos = dx->rdl;
511     dx->rdl += p-b;
512     ret = fPwrite(&dx->mrd, b, p-b, pos);
513     if ( ret == p - b ) {
514     setXrf(dx, (unsigned)rid, pos, p-b, rlen);
515     ret = rid;
516     } else if (0 <= ret) /* should not happen */
517     ret = ERR_IDIOT;
518     if ( blk )
519     mBlkFree( blk );
520     RLOCKUN(0);
521     return ret;
522     } /* rWrite */
523    
524    
525     int rInit ( Rdx *dx )
526     {
527     int remake = 1;
528    
529     dx->rdl = fSize( dx->mrd );
530    
531     { /* reasons why we should remake */
532     Rxh head;
533     int mxlen = fSize( dx->mrx.fil );
534    
535     if ( 8 != fRead(&dx->mrx.fil,&head,8) )
536     eRr(LOG_WARN, "mrx too short");
537     else if ( memcmp(MAG_MRX,head.mag,3) ) /* FOO! */
538     eRr(LOG_WARN, "mrx has black magic '%.*s'", 3, head.mag);
539     else if ( 0x80 & head.typ )
540     eRr(LOG_WARN, "mrx has bad type 0x%04x", head.typ);
541     else if ( (dx->typ && dx->typ != head.typ) ) /* other type configured */
542     eRr(LOG_WARN, "mrx type 0x%02x != cfg 0x%02x", head.typ, dx->typ);
543     else if ( mxlen < (int)(1+head.mid)*dx->ptl )
544     eRr(LOG_WARN, "mrx too short for len %d", head.mid);
545     else if ( mxlen % env.psz )
546     eRr(LOG_WARN, "mrx len mismatch pagesize %d", env.psz);
547     else if ( (DX_WRITE&dx->mrx.flg) && fTime(dx->mrx.fil) < fTime(dx->mrd) )
548     eRr(LOG_WARN, "mrx older than data"); /* ignore with RO */
549     else {
550     dx->typ = head.typ; /* but further checked below */
551     dx->mid = head.mid;
552     remake = 0;
553     }
554     }
555    
556     switch (dx->typ) {
557     default:
558     eRr(LOG_WARN, "mrx type 0x%02x unsupported", dx->typ);
559     case 0:
560     dx->typ = RX_431;
561     remake = 1;
562     case RX_431:
563     case RX_440:
564     break;
565     }
566     dx->ptl = 8;
567    
568     if ( remake ) {
569     if (ENV_EXCL!=env.wri || !(DX_WRITE & dx->flg))
570     return eRr(ERR_TRASH, "need excl write access to rebuild .mrx");
571     rEmake(dx);
572     }
573    
574     fMap(&dx->mrx, fSize(dx->mrx.fil)>>env.psh);
575     eRr(LOG_INFO, "mapped %d*%d = %d", 1+dx->mid, dx->ptl, dx->mrx.npg);
576     return 0;
577     } /* rInit */
578    
579    
580     void rFini ( Rdx *dx )
581     {
582     if ( DX_MODIF & dx->flg ) {
583     if ( dx->mrx.npg ) {
584     ((Rxh*)dx->mrx.map)->mid = dx->mid;
585     fMap(&dx->mrx, 0);
586     } else
587     fPwrite(&dx->mrx.fil, &dx->mid, 4, 4);
588     }
589     } /* rFini */

  ViewVC Help
Powered by ViewVC 1.1.26