/[webpac]/openisis/0.9.9e/core/rdx.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /openisis/0.9.9e/core/rdx.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (show annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 3 months ago) by dpavlin
File MIME type: text/plain
File size: 15809 byte(s)
import of new openisis release, 0.9.9e

1 /*
2 The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3 Version 0.9.x (patchlevel see file Version)
4 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 see README for more information
21 EOH */
22
23 /*
24 $Id: rdx.c,v 1.15 2004/11/12 08:37:56 kripke Exp $
25 implementation of general db access functions.
26 */
27
28
29 #include "core/core.h"
30
31
32
33 /*
34 type of rx
35 posbytes in [4..8], lenbytes in [3..6], fldbytes in [0..2]
36 */
37 #define RX_TYPE(posbytes,lenbytes,fldbytes) \
38 ((posbytes-4)<<4 | (lenbytes-3)<<2 | fldbytes)
39 enum { /* by now, only the two 8 byte types are implemented */
40 RX_431 = RX_TYPE(4,3,1),
41 RX_440 = RX_TYPE(4,4,0)
42 };
43
44 /* ************************************************************
45 private types
46 */
47
48 typedef union {
49 lolo bar;
50 char r[16];
51 } Xrf;
52
53 /** header of first record index block */
54 typedef struct Rxh {
55 char mag[3]; /* magic MRX or mrx */
56 char typ;
57 unsigned mid; /* max record id == total number of used pointers */
58 /* for type of more than 8 byte: upper mid */
59 } Rxh;
60
61
62 /* ************************************************************
63 data
64 */
65
66 /* the magic we need depending on our endianess */
67 static char MAG_MRX[] =
68 #ifdef CPU_BIG_ENDIAN
69 "MRX"
70 #else
71 "mrx"
72 #endif
73 ;
74
75
76 /* ************************************************************
77 private functions
78 */
79 #define RLOCKSH(n) (void)FLOCKSH(dx->mrd, n)
80 #define RLOCKEX(n) (void)FLOCKEX(dx->mrd, n)
81 #define RLOCKUN(n) (void)FLOCKUN(dx->mrd, n)
82
83 /*
84 create a pointer from the least significant bytes of pos, len, fld
85 buf must have dx->ptl bytes (up to 16 = 8+4+4)
86 and the most strict alignment (i.e. 4 or 8) possible for dx->ptl
87 returns buf
88 supports only RX_431 and RX_440
89 */
90 static char *mkxrf ( char *buf, Rdx *dx,
91 unsigned pos, unsigned len, unsigned fld )
92 {
93 ((unsigned*)buf)[0] = pos;
94 if ( RX_431 == dx->typ ) {
95 if ( ~0xff & fld ) fld = 0; /* too big */
96 #ifdef CPU_BIG_ENDIAN /* the first = high order 3 bytes are len */
97 len = len<<8 | fld;
98 #else /* the first = low order 3 bytes are len */
99 len = (0xffffff&len) | fld<<24;
100 #endif
101 }
102 ((unsigned*)buf)[1] = len;
103 return buf;
104 }
105
106
107 /*
108 read pointer, return len
109 if 0x0f00 & dx->typ, fld must not be 0, else *fld is untouched
110 supports only RX_431 and RX_440
111 */
112 static unsigned
113 rdxrf ( unsigned *pos, unsigned *fld, Rdx *dx, char *buf )
114 {
115 unsigned len = ((unsigned*)buf)[1];
116 *pos = *(unsigned*)buf;
117 if ( RX_431 == dx->typ ) {
118 *fld = ((unsigned char *)buf)[7];
119 #ifdef CPU_BIG_ENDIAN
120 len >>= 8;
121 #else
122 len &= 0xffffff;
123 #endif
124 }
125 return len;
126 }
127
128
129 static void setXrf ( Rdx *dx, unsigned rid,
130 unsigned pos, unsigned len, unsigned fld )
131 {
132 Xrf xrf;
133 unsigned off = rid*dx->ptl;
134 eRr(LOG_INFO,"setXrf %d %d %d %d",rid,pos,len,fld);
135 RLOCKEX(rid);
136 if ( dx->mrx.npg ) {
137 if ( (off+dx->ptl) > (dx->mrx.npg<<env.psh) ) { /* remap ? */
138 unsigned npg = 1 + ((off+dx->ptl-1)>>env.psh); /* num pages we need */
139 if ( npg > dx->mrx.lim ) { /* at limit, drop the mapping completely */
140 eRr(LOG_WARN, "dropping mrx mapping at limit %u", dx->mrx.lim);
141 npg = dx->mrx.lim = 0;
142 }
143 fMap(&dx->mrx, npg);
144 }
145 if ( (off+dx->ptl) <= (dx->mrx.npg<<env.psh) ) {
146 mkxrf(dx->mrx.map + off, dx, pos, len, fld);
147 if ( ENV_MSYNC & env.flg ) fMSync(&dx->mrx, off>>env.psh);
148 goto done;
149 }
150 }
151 fPwrite(&dx->mrx.fil, mkxrf(xrf.r, dx, pos, len, fld), dx->ptl, off);
152 done:
153 if ( rid > (unsigned)dx->mid ) {
154 if ( dx->mrx.npg )
155 ((Rxh*)dx->mrx.map)->mid = rid;
156 else
157 fPwrite(&dx->mrx.fil, &rid, 4, 4);
158 dx->mid = rid;
159 }
160 RLOCKUN(rid);
161 } /* setXrf */
162
163
164 static unsigned getXrf ( unsigned *pos, unsigned *fld, Rdx *dx, int rid )
165 {
166 unsigned ret;
167 Xrf xrf;
168 unsigned off = (unsigned)rid*dx->ptl;
169 RLOCKSH(rid);
170 if ( (off+dx->ptl) <= (dx->mrx.npg<<env.psh) )
171 ret = rdxrf(pos, fld, dx, dx->mrx.map + off);
172 else {
173 #ifdef BUILD_SHMODE
174 if ( ENV_SHARED==env.wri ) { /* might have been extended */
175 unsigned pgl;
176 /* TODO: maybe we should guard this by the record lock ? */
177 if ( dx->mrx.npg )
178 dx->mid = ((Rxh*)dx->mrx.map)->mid;
179 else
180 fPread(&dx->mrx.fil, &dx->mid, 4, 4);
181 if ( rid > dx->mid ) {
182 ret = 0;
183 goto out;
184 }
185 pgl = fSize(dx->mrx.fil) >> env.psh;
186 if ( pgl != dx->mrx.npg )
187 fMap(&dx->mrx, pgl);
188 if ( (off+dx->ptl) <= (dx->mrx.npg<<env.psh) ) {
189 ret = rdxrf(pos, fld, dx, dx->mrx.map + off);
190 goto out;
191 }
192 }
193 #endif
194 ret = rid > dx->mid || dx->ptl != fPread(&dx->mrx.fil, xrf.r, dx->ptl, off)
195 ? 0 : rdxrf(pos, fld, dx, xrf.r);
196 }
197 #ifdef BUILD_SHMODE
198 out:
199 RLOCKUN(rid);
200 #endif
201 return ret;
202 } /* getXrf */
203
204
205 static int rEmake ( Rdx *dx )
206 {
207 char buf[65536+1]; /* need 64K buf for copying DO NOT SHRINK !!! */
208 int sz;
209 char *p, *q;
210 Xrf xrf;
211 unsigned base = 0; /* of current block */
212 unsigned pos = 0; /* of last record */
213 /* record parsing starts at the first char of rid 1, so fld is initially 1 */
214 unsigned fld = 1; /* of last record */
215 unsigned nrid = 1; /* next rid = maxrid+1 */
216 unsigned xrid = 0; /* explicitly given */
217 char op = 0;
218 int more; /* buf not empty flag */
219 char *last; /* of current block */
220
221 /* TODO: base on FBuf? */
222 if ( fTrunc(&dx->mrx.fil, 0) )
223 return eRr( LOG_ERROR, "could not trunc mrx" );
224 /* write signature */
225 memcpy( xrf.r, MAG_MRX, 3 );
226 xrf.r[3] = dx->typ;
227 memset( xrf.r+4, 0, dx->ptl-4 );
228 fPwrite( &dx->mrx.fil, xrf.r, dx->ptl, 0 );
229 /* loop the masterfile */
230 fSeek( &dx->mrd, 0 );
231 last = (p = buf) + fRead( &dx->mrd, buf, 8192 ) - 1;
232 more = last > buf; /* one byte is no byte ;) */
233 for (;;) { /* records */
234 unsigned len, rid;
235 for (;;) { /* lines and stuff to end of record */
236 if ( p < last ) { /* have one lookahead */
237 if ( LF != *p++ )
238 continue; /* the tight loop ... or use memchr ? */
239 if ( LF != *p ) { /* now p <= last */
240 fld++; /* count field, unless we really recognize a opline */
241 if ( 'W' != *p )
242 continue;
243 sz = last - p; /* avail after p */
244 if ( sz > 127 ) /* longer -> no opline */
245 sz = 127;
246 if ( ! sz || ! (q = memchr(p+1, LF, sz)) ) {
247 if ( sz >= 127 || ! more )
248 continue; /* too long or undelimited last */
249 p--; /* back to \n, so we come here again */
250 goto gimmemore;
251 }
252 if ( q < p+3 || p[2] < '0' || '9' < p[2] )
253 continue;
254 /* TODO:
255 take a closer look at whether the whole line makes sense
256 */
257 if ( op ) { /* yeah, two metas in sequence! weird stuff! */
258 p--; /* step back to newline */
259 pos = base+(p-buf); /* fake pos as if we had no line at all */
260 break; /* go handle the PREVIOUS opline */
261 }
262 op = *p;
263 xrid = a2i( p+2, q-p-2 );
264 continue;
265 }
266 break;
267 }
268 gimmemore:
269 LOG_DBG( LOG_DEBUG, "MORE %d at pos %d base %d p +%d last +%d",
270 more, pos, base, p-buf, last-buf );
271 if ( !more )
272 goto schicht; /* german: done */
273 base += p - buf; /* shift out bytes before p */
274 len = last-p; /* bytes to keep after p; < 128 */
275 if ( len ) /* we're probing for more lookahead */
276 memmove( buf, p, 1+last-p );
277 else /* typically */
278 *buf = *p; /* but save the last dance */
279 p = buf;
280 last = buf + len;
281 /* reload */
282 if ( 0 < (sz = fRead( &dx->mrd, buf+1+len, 8192 )) ) {
283 last += sz;
284 continue;
285 }
286 more = 0; /* but yet, finish this up */
287 /* since *buf = *last was the files last character,
288 we'd expect a newline
289 */
290 if ( last == p )
291 p = buf+(LF==*buf ? 1 : 2); /* pretend buf started \n */
292 if ( ! len )
293 break;
294 /* else try again opline */
295 } /* lines and stuff */
296 /* now p is on a delimiting blank lines \n -- or such ... */
297 len = base + (p-buf) - pos; /* >= 0 */
298 rid = xrid ? xrid : nrid;
299 eRr( LOG_INFO, "mrx %c %d(%d/%d) pos %d len %d",
300 op?op:'>', rid, xrid, nrid, pos, len );
301 if ( base + (p-buf) < pos ) /* FOO !!! */
302 len = 0;
303 if ( rid && (len || op) )
304 fPwrite( &dx->mrx.fil,
305 mkxrf( xrf.r, dx, pos, len, fld), dx->ptl, rid*dx->ptl );
306 else if (!more)
307 break;
308 pos = base + (p-buf) + 1; /* next starts after p */
309 if (!op)
310 nrid++; /* continue after this */
311 else if (nrid <= xrid)
312 nrid = xrid+1;
313 xrid = fld = op = 0;
314 }
315 schicht: ;
316 dx->mid = nrid-1;
317 fPwrite(&dx->mrx.fil, &dx->mid, 4, 4);
318 fTrunc(&dx->mrx.fil,
319 ((1+dx->mid)*dx->ptl+env.psz-1)>>env.psh<<env.psh);
320 return 0;
321 } /* rEmake */
322
323
324
325 /* ************************************************************
326 public functions
327 */
328
329 /**
330 get text
331 the original text is read contigously at base.
332 */
333 int rRead ( List *l, Rdx *dx, int rid, unsigned mpos )
334 {
335 unsigned pos, len, n=0;
336 int head; /* offset of header */
337 Fld *f;
338 char *rd, *p, *q, *e;
339 int got, first = 1, cont = 1;
340
341 len = getXrf( &pos, &n, dx, rid ); /* this maybe locked */
342 head = LLEN(l); /* not necessarily 0 */
343 LOG_DBG( LOG_TRACE, "dRead %d pos %d len %d fld %d at head %d",
344 rid, pos, len, n, head );
345 if ( !len )
346 return 0;
347 /*
348 reading the current version is easy, since we know the byte length
349 and usually also number of fields.
350 When backtracking to older versions, we have to figure out everything
351 from the position alone.
352 */
353 for (; cont; first=0) { /* extend or backtrack */
354 if ( !n ) {
355 n = len / 36; /* assume one (costing 12 bytes) per 36 bytes data */
356 if ( n < 8 ) /* small record is likely to have some short fields */
357 n = 8;
358 }
359 /* allow 22 bytes to print a nice rid@pos in header,
360 plus one for a sentinel LF */
361 if ( l->fav < n || LAVL(l) < len+23 )
362 lExtend(l, len+23, n); /* the call alone is about 2% ! */
363 if ( 0 >= (got = fPread(&dx->mrd, rd = l->buf+22, len, pos)) )
364 return got;
365 if ( len != (unsigned)got ) {
366 if ( first )
367 return eRr(ERR_TRASH, "wanted %d bytes got %d at %d", len, got, pos);
368 len = got;
369 cont = 0;
370 }
371 e = (p = rd) + len;
372 /* check header for rid@pos, mv p to leader */
373 if ( 2 < len && 'W'==*p && TAB==p[1] ) { /* else plain record */
374 int val;
375 p += 2;
376 if ( (got = a2il(p,e-p,&val)) ) {
377 if ( val != rid )
378 return eRr(ERR_TRASH, "wanted rid %d got %d at %d",
379 rid, val, pos);
380 p += got;
381 if ( p < e && '@'==*p ) {
382 p++;
383 if ( (got = a2il(p,e-p,&val)) ) {
384 if ( mpos && pos >= mpos ) {
385 if ( val < 0 || (unsigned)val >= pos )
386 return eRr(ERR_TRASH, "bad hpos %d at %d", val, pos);
387 pos = (unsigned)val;
388 if ( first )
389 len += len>>1; /* give some safety margin */
390 continue;
391 }
392 p += got;
393 }
394 }
395 }
396 }
397 if ( mpos && pos >= mpos )
398 return 0;
399 if ( !first ) { /* check for complete record */
400 for ( q=p; q<e && (q = memchr(q,LF,e-q)); )
401 if ( ++q<e && LF==*q )
402 goto ok;
403 len += 32 + (len>>1); /* try using more space */
404 continue;
405 ok:
406 len = q+1 - rd;
407 }
408 LOG_DBG( LOG_TRACE, "'%.*s'", len, rd );
409 /* go for it */
410 f = l->fld + head;
411 f->len = u2a(l->buf, rid); /* print header */
412 l->buf[f->len++] = '@';
413 f->len += u2a(l->buf+f->len, pos); /* now f->len <= 21 */
414 if ( p == rd ) { /* got no header -- rec data starts at rd */
415 if ( l->buf < (f->val = rd - f->len - 1) )
416 memmove(f->val, l->buf, f->len); /* just to keep it contiguous */
417 f->val[f->len] = LF;
418 } else { /* TAB-leader-LF at p */
419 memmove(f->val = p - f->len, l->buf, f->len);
420 q = memchr(p,LF,e-p);
421 if ( !q ) return eRr(ERR_TRASH, "missing LF");
422 f->len = q - f->val;
423 }
424 p = f->val + f->len + 1;
425 f->tag = -1;
426 if ( head ) l->fld->tag--; /* count in total length */
427 l->fav--;
428 f++;
429 *e = LF; /* sentinel for memchr */
430 for ( n=0; p<e && p!=(q = memchr(p, LF, e-p+1)); p = q+1 ) {
431 int min, t=0, d;
432 if ( !l->fav ) {
433 lExtend( l, 0, 16 );
434 f = LEND(l);
435 }
436 /* p += a2il( p, q-p, &f->tag ); */
437 if ( (min = ('-' == *p)) )
438 p++;
439 for ( ; 10>(d = b36val[(unsigned char)*p]); p++ )
440 t = 10*t + d;
441 f->tag = min ? -t : t;
442 if ( TAB == *p )
443 p++;
444 f->len = q - (f->val = p);
445 l->fld->tag--;
446 l->fav--;
447 f++;
448 n++;
449 }
450 if ( head )
451 l->fld[head].tag = -1-n;
452 l->buf = e;
453 return n+1;
454 }
455 return 0;
456 } /* rRead */
457
458
459 int rWrite ( Rdx *dx, const Fld *r, int rid, int opos, unsigned siz )
460 {
461 char buf[128 + 65536];
462 LBlk *blk;
463 unsigned pos = 0, rlen;
464 char *p, *b;
465 int intro = 1, ret;
466
467 if ( !(dx->flg & DX_WRITE) )
468 return eRr(ERR_BADF, "dx is not writable");
469 rlen = RLEN(r);
470 if ( rid && dx->mid >= rid ) {
471 unsigned fld;
472 getXrf( &pos, &fld, dx, rid );
473 if ( 0<=opos && pos != (unsigned)opos )
474 return eRr(ERR_AGAIN, "pos %d != opos %d", pos, opos);
475 }
476 RLOCKEX(0);
477 if ( ! rid || rid == dx->mid+1 ) {
478 rid = dx->mid+1; /* assign new rid; mid updated in setXrf */
479 if ( !rlen || !r->len ) /* no header */
480 intro = 0;
481 }
482 if ( !siz )
483 siz = rSiz( r );
484 siz += 13*rlen + 128; /* field, header, trailing LF */
485 if ( sizeof(buf) >= siz ) {
486 blk = 0;
487 b = buf;
488 } else {
489 blk = mBlkAlloc( siz ); /* hmm ... maybe would be better ... */
490 b = blk->byt; /* ... to serialize and write several 64K chunks */
491 }
492 p = b;
493 if ( intro ) {
494 *p++ = 'W';
495 *p++ = TAB;
496 p += u2a( p, rid );
497 if ( pos ) {
498 *p++ = '@';
499 p += u2a( p, pos );
500 }
501 if ( rlen && r->len ) {
502 *p++ = TAB;
503 memcpy(p, r->val, r->len);
504 p += r->len;
505 }
506 *p++ = LF;
507 }
508 p += rSer( p, r );
509 dx->flg |= DX_MODIF;
510 pos = dx->rdl;
511 dx->rdl += p-b;
512 ret = fPwrite(&dx->mrd, b, p-b, pos);
513 if ( ret == p - b ) {
514 setXrf(dx, (unsigned)rid, pos, p-b, rlen);
515 ret = rid;
516 } else if (0 <= ret) /* should not happen */
517 ret = ERR_IDIOT;
518 if ( blk )
519 mBlkFree( blk );
520 RLOCKUN(0);
521 return ret;
522 } /* rWrite */
523
524
525 int rInit ( Rdx *dx )
526 {
527 int remake = 1;
528
529 dx->rdl = fSize( dx->mrd );
530
531 { /* reasons why we should remake */
532 Rxh head;
533 int mxlen = fSize( dx->mrx.fil );
534
535 if ( 8 != fRead(&dx->mrx.fil,&head,8) )
536 eRr(LOG_WARN, "mrx too short");
537 else if ( memcmp(MAG_MRX,head.mag,3) ) /* FOO! */
538 eRr(LOG_WARN, "mrx has black magic '%.*s'", 3, head.mag);
539 else if ( 0x80 & head.typ )
540 eRr(LOG_WARN, "mrx has bad type 0x%04x", head.typ);
541 else if ( (dx->typ && dx->typ != head.typ) ) /* other type configured */
542 eRr(LOG_WARN, "mrx type 0x%02x != cfg 0x%02x", head.typ, dx->typ);
543 else if ( mxlen < (int)(1+head.mid)*dx->ptl )
544 eRr(LOG_WARN, "mrx too short for len %d", head.mid);
545 else if ( mxlen % env.psz )
546 eRr(LOG_WARN, "mrx len mismatch pagesize %d", env.psz);
547 else if ( (DX_WRITE&dx->mrx.flg) && fTime(dx->mrx.fil) < fTime(dx->mrd) )
548 eRr(LOG_WARN, "mrx older than data"); /* ignore with RO */
549 else {
550 dx->typ = head.typ; /* but further checked below */
551 dx->mid = head.mid;
552 remake = 0;
553 }
554 }
555
556 switch (dx->typ) {
557 default:
558 eRr(LOG_WARN, "mrx type 0x%02x unsupported", dx->typ);
559 case 0:
560 dx->typ = RX_431;
561 remake = 1;
562 case RX_431:
563 case RX_440:
564 break;
565 }
566 dx->ptl = 8;
567
568 if ( remake ) {
569 if (ENV_EXCL!=env.wri || !(DX_WRITE & dx->flg))
570 return eRr(ERR_TRASH, "need excl write access to rebuild .mrx");
571 rEmake(dx);
572 }
573
574 fMap(&dx->mrx, fSize(dx->mrx.fil)>>env.psh);
575 eRr(LOG_INFO, "mapped %d*%d = %d", 1+dx->mid, dx->ptl, dx->mrx.npg);
576 return 0;
577 } /* rInit */
578
579
580 void rFini ( Rdx *dx )
581 {
582 if ( DX_MODIF & dx->flg ) {
583 if ( dx->mrx.npg ) {
584 ((Rxh*)dx->mrx.map)->mid = dx->mid;
585 fMap(&dx->mrx, 0);
586 } else
587 fPwrite(&dx->mrx.fil, &dx->mid, 4, 4);
588 }
589 } /* rFini */

  ViewVC Help
Powered by ViewVC 1.1.26