Line # Revision Author
1 237 dpavlin /*
2 openisis - an open implementation of the CDS/ISIS database
3 Version 0.8.x (patchlevel see file Version)
4 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 see README for more information
21 EOH */
22
23 /*
24 $Id: ldb.c,v 1.95 2003/06/10 11:00:34 kripke Exp $
25 implementation of general db access functions.
26 */
27
28 #include <stdlib.h>
29 #include <string.h>
30 #include <limits.h> /* PATH_MAX */
31 #include <errno.h>
32
33
34 /* special */
35 #if defined( __GNUC__ ) && defined ( alloca )
36 #include <alloca.h>
37 #endif
38
39 /* gcc defines always a cpu type - this we use for byteorder checking */
40 #if defined( sparc ) || defined( __ppc__ )
41 # define LDB_BIG_ENDIAN
42 /* TODO: figure out fastest "htonl" on those boxes that usually don't swap */
43 static int rvi ( int i ) {
44 int r;
45 ((char*)&r)[0] = ((char*)&i)[3];
46 ((char*)&r)[1] = ((char*)&i)[2];
47 ((char*)&r)[2] = ((char*)&i)[1];
48 ((char*)&r)[3] = ((char*)&i)[0];
49 return r;
50 }
51 static short rvs ( short i ) {
52 short r;
53 ((char*)&r)[0] = ((char*)&i)[1];
54 ((char*)&r)[1] = ((char*)&i)[0];
55 return r;
56 }
57 #define SWI( i ) i = rvi( i )
58 #define SWS( s ) s = rvs( s )
59 #else
60 # define rvi(i) i
61 # define rvs(s) s
62 #define SWI( i )
63 #define SWS( s )
64 #endif
65 #if defined( sparc )
66 # define LDB_NEEDALIGN
67 #endif
68 #if defined( LDB_NEEDALIGN )
69 static unsigned GETINT ( const void *m )
70 {
71 unsigned l;
72 memcpy( &l, m, 4 );
73 return l;
74 }
75 static unsigned short GETSHORT ( const void *m )
76 {
77 unsigned short s;
78 memcpy( &s, m, 2 );
79 return s;
80 }
81 #else
82 #define GETINT( m ) (*(unsigned*)(m))
83 #define GETSHORT( m ) (*(unsigned short*)(m))
84 #endif
85
86 #include "lstr.h"
87 #include "lio.h"
88 #include "lbt.h"
89 #include "lcs.h"
90 #include "ldb.h"
91 #include "lfdt.h"
92 #include "luti.h"
93
94 #ifdef WIN32
95 #define IsAbsPath(p) \
96 ((p) && *(p) && ( \
97 '/' == *(p) || '\\' == *(p) || ( \
98 ':' == (p)[1] && ( \
99 '/' == (p)[2] || '\\' == (p)[2] \
100 ))))
101 #else
102 #define IsAbsPath(p) \
103 ((p) && '/' == *(p))
104 #endif
105
106
107 #define LF 10 /* LineFeed a.k.a. newline - '\n' isn't really well defined */
108 #define TAB 9 /* horizontal, that is */
109 #define VT 11 /* vertical, used as newline replacement */
110
111 /* ************************************************************
112 private types
113 */
114
115 /** extension of master file proper. */
116 static const char EXT_MST_MST[] = ".mst";
117 /** extension of master file xref. */
118 static const char EXT_MST_XRF[] = ".xrf";
119
120 typedef enum {
121 MST_MST,
122 MST_XRF,
123 MST_FILES
124 } mst_file;
125
126 static const char * const EXT_MST[MST_FILES] = {
127 EXT_MST_MST,
128 EXT_MST_XRF
129 };
130
131 /** extension of inverted file short term nodes. */
132 static const char EXT_INV_N01[] = ".n01";
133 /** extension of inverted file short term leaves. */
134 static const char EXT_INV_L01[] = ".l01";
135 /** extension of inverted file long term nodes. */
136 static const char EXT_INV_N02[] = ".n02";
137 /** extension of inverted file long term leaves. */
138 static const char EXT_INV_L02[] = ".l02";
139 /** extension of inverted file control records. */
140 static const char EXT_INV_CNT[] = ".cnt";
141 /** extension of inverted file postings. */
142 static const char EXT_INV_IFP[] = ".ifp";
143
144 typedef enum {
145 INV_N01,
146 INV_L01,
147 INV_N02,
148 INV_L02,
149 INV_CNT,
150 INV_IFP,
151 INV_FILES
152 } inv_file;
153
154 static const char * const EXT_INV[INV_FILES] = {
155 EXT_INV_N01,
156 EXT_INV_L01,
157 EXT_INV_N02,
158 EXT_INV_L02,
159 EXT_INV_CNT,
160 EXT_INV_IFP
161 };
162
163 /** extension of lbt B-Link-Tree.
164 It's named oxi because that is nicer than oix for OpenIsis indeX.
165 however, see http://www.oxicenter.com.br/
166 */
167 static const char EXT_LBT_OXI[] = ".oxi";
168 static const char * const EXT_LBT[] = {
169 EXT_LBT_OXI
170 };
171
172
173 /** plaintext master file
174 */
175 static const char EXT_TXT_TXT[] = ".txt";
176 static const char EXT_TXT_PTR[] = ".ptr";
177 static const char EXT_TXT_OPT[] = ".opt";
178 typedef enum {
179 TXT_TXT,
180 TXT_PTR,
181 TXT_FILES
182 } txt_file;
183 static const char * const EXT_TXT[] = {
184 EXT_TXT_TXT,
185 EXT_TXT_PTR
186 };
187
188 static const char ISIX[] = "ISIX"; /* ptr magic */
189
190
191 /** extension of supporting file alpha character table. */
192 static const char EXT_SUP_ACT[] = ".act";
193 /** extension of supporting file uppercase table. */
194 static const char EXT_SUP_UCT[] = ".uct";
195
196 typedef enum {
197 SUP_ACT,
198 SUP_UCT,
199 SUP_FILES
200 } sup_file;
201
202 static const char * const EXT_SUP[SUP_FILES] = {
203 EXT_SUP_ACT,
204 EXT_SUP_UCT
205 };
206
207
208 typedef int lblk[128];
209
210
211 typedef struct {
212 Db head;
213 int flags;
214 const char *path;
215 int mst[MST_FILES]; /* master file */
216 int inv[INV_FILES]; /* primary inverted file */
217 int mfc[LMFC__FL]; /* master file control record */
218 unsigned mflen; /* master file length */
219 int xrf[129]; /* last used xrf block : THREAD THREAT */
220 int xrlen; /* length of xrf (in blocks) */
221 unsigned short ptr; /* type of pointer file (new style xrf) */
222 unsigned short ptrl; /* pointer bytes, 512 for old xrf */
223 char *mmap; /* memory map of xrf/ptr */
224 int mmlen; /* length of map (in ptrl) */
225 int cnt[LDB_INDEXES][LCNT__FL]; /* two cnt records */
226 short tlen[LDB_INDEXES]; /* max term length for each index */
227 LcsTab ctab[LCS__TABS];
228 Idx oxi;
229 } LDb;
230
231
232 typedef union {
233 lll bar;
234 char r[16];
235 } Ptr;
236
237
238 /* db flags */
239 #define DB_OPEN 0x010000
240 #define DB_INVOPEN 0x020000
241 #define DB_LBTOPEN 0x040000
242 #define DB_TXTOPEN 0x080000
243 #define DB_WRITABLE 0x100000
244 #define DB_MODIFIED 0x200000
245
246 #define DB_TXTMODE 0x20
247 #define DB_MMAP 0x10
248 #define DB_VARI 0xf /* mask for variant */
249
250
251 /* get xstr for record rec in set */
252 #define DB_XSTR( db, set, rec ) \
253 lstrlib[ set ].desc[ DB_VARI & (db)->flags ][ rec ]
254 /* get record names for record rec in set */
255 #define DB_RNAM( db, set, rec ) \
256 lstrlib[ set ].name[ rec ]
257
258
259 /** packed little endian masterfile control structure.
260 */
261 typedef struct Mfc {
262 int ctlm;
263 int nmfn;
264 int nmfb;
265 short nmfp;
266 short type;
267 int rcnt;
268 int mfx1;
269 int mfx2;
270 int mfx3;
271 } Mfc;
272
273
274 /** packed little endian masterfile record.
275 */
276 typedef struct Dict {
277 short tag;
278 short pos;
279 short len;
280 } Dict;
281
282 /** packed little endian masterfile record.
283 */
284 typedef struct Mfr {
285 int mfn;
286 short recl; /* a.k.a. mfrl */
287 short bwbl; /* low part of int */
288 short bwbh; /* high part of int */
289 short bwp;
290 /* it is believed, that this first five fields up to here (12 bytes packed)
291 are to be in one 512-byte block; the manual mentiones even 14 bytes ... ???
292 */
293 short base;
294 short nvf;
295 short stat;
296 Dict dict[1];
297 } Mfr;
298
299
300
301 /* ************************************************************
302 private data
303 */
304
305 static LDb defdbspace[32];
306 /* array of open dbs. should expand dynamically. */
307 static LDb *dbs = defdbspace;
308 static int dbs_len = sizeof(defdbspace)/sizeof(defdbspace[0]);
309
310 static int init;
311
312
313 /* ************************************************************
314 private functions
315 */
316 static LDb *getDb ( int id )
317 {
318 if ( 0 <= id && id < dbs_len && dbs[id].flags ) {
319 return &dbs[id];
320 }
321 log_msg( LOG_ERROR, "attempt to access bad db id %d", id );
322 return 0;
323 } /* getDb */
324
325
326 /* ************************************************************
327 start of io section
328 */
329 enum {
330 /* additional flags in the LIO_FD range */
331 OPEN_TRY = 1, /* try writable, open readonly else */
332 OPEN_UC = 2, /* use uppercase ext */
333 /* commonly used combinations */
334 /* 1) open as is, do not complain about any failure, do not create */
335 OPEN_ASIS = LIO_SEEK|LIO_RDWR|OPEN_TRY,
336 /* 2) open readonly, do not complain about any failure, do not create */
337 OPEN_RDIF = LIO_SEEK|LIO_RD|LIO_TRY,
338 /* 3) open readonly, complain about any failure */
339 OPEN_RD = LIO_SEEK|LIO_RD,
340 /* 4) open or create writable, complain on failure */
341 OPEN_NEW = LIO_SEEK|LIO_RDWR|LIO_CREAT,
342 OPEN_BLANK = LIO_SEEK|LIO_RDWR|LIO_CREAT|LIO_TRUNC
343 };
344
345 /* figure out wether to use uppercase extension on path.
346 if last path component (everything after the last / and \)
347 does contain an uppercase ascii and does not contain a lowercase ascii,
348 return OPEN_UC, else 0.
349 */
350 static int autocase ( const char *path )
351 {
352 int ret = 0;
353 const char *e = path + strlen( path );
354 while ( e-- > path )
355 if ( 'A'<=*e && *e<= 'Z' )
356 ret = OPEN_UC;
357 else if ( 'a'<=*e && *e<= 'z' )
358 return 0;
359 else if ( '/'==*e || '\\' == *e )
360 break;
361 return ret;
362 }
363
364 /* set extension. fname MUST already end with .xxx.
365 if how has OPEN_UC set, use uppercase extension
366 */
367 static char *setext ( char *fname, const char *ext, int how )
368 {
369 int l = strlen( fname ) - 4;
370 memcpy( fname+l, ext, 4 );
371 if ( OPEN_UC & how ) {
372 char *p = fname+l;
373 for ( ;*p; p++ ) /* use uppercase extensions */
374 if ( 'a' <= *p && *p <= 'z' )
375 *p -= 'a'-'A';
376 }
377 return fname;
378 }
379
380 /**
381 try to open all files according to how.
382 ldb is only interested in seekable readable true files.
383 @return
384 1 if all files could be opened writable
385 0 if all files could be opened readonly,
386 and that was requested by a RD mode or try write
387 something negative else
388 */
389 static int openfiles ( int *fid, char *path,
390 const char * const *ext, int nfiles, int how )
391 {
392 int i;
393 int wr = LIO_WR&how ? 1 : 0, mode = LIO_WANT & how;
394
395 for ( i=0; i<nfiles; i++ ) {
396 setext( path, ext[i], how );
397 fid[i] = lio_open( path, mode & LIO_WANT );
398 log_msg( LOG_INFO, "opening file '%s' %c 0x%x",
399 path, wr ? 'w' : 'r', fid[i] );
400 if ( 0 < fid[i] ) { /* ok */
401 mode &= ~LIO_FLOCK; /* lock only leading file */
402 continue;
403 }
404 fid[i] = 0;
405 while ( i-- ) /* close others */
406 lio_close( &fid[i], LIO_INOUT );
407 if ( OPEN_TRY & how )
408 return openfiles( fid, path, ext, nfiles,
409 (how & ~(OPEN_TRY|LIO_WR)) | LIO_TRY );
410 return LIO_TRY&how ? -ERR_BADF /* silent */
411 : log_msg( LOG_SYSERR, "could not open file '%s' for %sing",
412 path, wr ? "writ" : "read" );
413 }
414 return wr; /* good */
415 } /* openfiles */
416
417
418 static int closefiles ( int *fid, int nfiles )
419 {
420 int ret = 0, i;
421 for ( i=0; i<nfiles; i++ )
422 if ( 0 < fid[i] && LIO_INOUT & fid[i] )
423 lio_close( &fid[i], LIO_INOUT );
424 return ret;
425 } /* closefiles */
426
427
428 static int readblk ( void *dst, int siz, int fid, int where )
429 {
430 int got;
431 got = lio_pread( &fid, dst, siz, where );
432 if ( 0 > got )
433 return got;
434 #ifndef NDEBUG
435 if ( LOG_DO( LOG_ALL ) )
436 LOG_HEX( dst, got );
437 #endif
438 if ( siz == (int)got )
439 return 0;
440 log_msg( LOG_WARN, "got %u bytes wanted %d at %d in 0x%x",
441 got, siz, where, fid );
442 return 1+(int)got;
443 } /* readblk */
444
445
446 /* ************************************************************
447 end of io section
448 */
449
450 static int *nrec ( int *xstr )
451 {
452 int *dst = (int*)mAlloc( xstr[LSTR_ILEN] );
453 if ( dst )
454 *dst = *xstr;
455 return dst;
456 } /* nrec */
457
458
459
460 typedef struct b8 { char x[8]; } b8;
461 typedef struct b4 { char x[4]; } b4;
462 typedef struct b2 { char x[2]; } b2;
463
464
465 static int convert ( int *dst, char *src, int *xstr )
466 {
467 static int pow2[] = { 1, 2, 4, 8 };
468 int occ = -1;
469 int *xmbrs = xstr+LSTR_XMBR;
470 int nmbrs = LSTRFIX(*xstr);
471 int *mbr;
472 char *buf, *part = src, *srcend = src + xstr[LSTR_XLEN];
473
474 if ( ! dst )
475 return log_msg( ERR_NOMEM, "out of memory (no dst) in convert" );
476 if ( LSTRLEN(*xstr) > xstr[LSTR_ILEN] )
477 return log_msg( ERR_INVAL, "bad ilen %d need %d",
478 xstr[LSTR_ILEN], (int)LSTRLEN(*xstr) );
479 if ( *dst != *xstr )
480 return log_msg( ERR_INVAL, "bad *dst 0x%08x need 0x%08x", *dst, *xstr );
481 /* clean and re-init */
482 memset( dst, 0, xstr[LSTR_ILEN] );
483 *dst = *xstr;
484 mbr = dst+1;
485 buf = ((char*)dst) + LSTRLEN(*dst);
486
487 /* cvt the fixed part (occ==-1) and each occurrence of repeated part. */
488 for ( ;/* occ < LSTROCC(*dst) */; ) { /* cvt one part */
489 int i;
490 for ( i=0; i<nmbrs; i++, mbr++ ) { /* assign one xmbr */
491 int xmbr = xmbrs[i];
492 char *s = part + LONG2OFF(xmbr);
493 int sbytes = srcend - s;
494 int len,j;
495 union {
496 char buf[8];
497 b8 x8;
498 b4 x4;
499 b2 x2;
500 short s;
501 int i;
502 lll ll;
503 } num;
504 if ( LMBRISNUM( xmbr ) ) {
505 /* numeric data */
506 #ifdef LDB_BIG_ENDIAN
507 # define NEEDSWAP(xmbr) ( ! ( LMBR_FHIE & (xmbr) ) )
508 #else
509 # define NEEDSWAP(xmbr) (LMBR_FHIE & (xmbr))
510 #endif
511 int ld = LMBRLD(xmbr);
512 #ifndef LDB_NEEDALIGN
513 if ( ! NEEDSWAP( xmbr ) ) {
514 /* much faster this way ... */
515 /* TODO: len and bit checks; actually not needed yet ... */
516 switch ( ld ) {
517 case 3: *mbr = *(lll*)s; break;
518 case 2: *mbr = *(int*)s; break;
519 case 1: *mbr = *(short*)s; break;
520 case 0: *mbr = *s; break;
521 }
522 continue;
523 }
524 #endif
525 len = pow2[ LMBRLD(xmbr) ];
526 if ( len > sbytes )
527 return log_msg( ERR_INVAL,
528 "srcbuf too short %d have %d need %d occ %d mbr %d",
529 xstr[LSTR_XLEN], sbytes, len, occ, i );
530 if ( !NEEDSWAP( xmbr ) )
531 /* for ( j = len; j--; ) num.buf[j] = s[j]; */
532 switch ( ld ) {
533 case 3: num.x8 = *(b8*)s; break;
534 case 2: num.x4 = *(b4*)s; break;
535 case 1: num.x2 = *(b2*)s; break;
536 case 0: num.buf[0] = *s; break;
537 }
538 else /* swap bytes */
539 for ( j = len; j--; )
540 num.buf[j] = s[len - 1 - j];
541 switch ( len ) {
542 case 8: *mbr = num.ll; break;
543 /* TODO: defines for 16 and 64 bit compilers */
544 case 4: *mbr = num.i; break;
545 case 2: *mbr = num.s; break;
546 case 1: *mbr = num.buf[0]; break;
547 }
548 if ( LMBRISBITS( xmbr ) ) { /* apply bit shift and mask */
549 *mbr >>= LMBRBITOFF( xmbr );
550 *mbr &= ~(-1L << LMBRBITLEN( xmbr ));
551 }
552 continue;
553 }
554 /* else raw data -- that's easy :) */
555 {
556 int offset = buf - (char*)dst;
557 int need;
558 len = LONG2LEN(xmbr);
559 need = offset + len;
560 if ( need > xstr[LSTR_ILEN] )
561 return log_msg( ERR_INVAL,
562 "bad buflen %d need %d+%d in occ %d mbr %d",
563 xstr[LSTR_ILEN], offset, len, occ, i );
564 if ( len > sbytes )
565 return log_msg( ERR_INVAL,
566 "srcbuf too short %d have %d need %d occ %d mbr %d",
567 xstr[LSTR_XLEN], sbytes, len, occ, i );
568 memcpy( buf, s, len );
569 *mbr = buf - (char*)dst;
570 buf += len;
571 }
572 } /* for mbrs */
573
574 if ( ++occ >= LSTROCC(*dst) )
575 break;
576 if ( occ )
577 part += (short)xstr[LSTR_XRLO]; /* adv. rep. part len */
578 else { /* was the fixed part, setup for repeated */
579 nmbrs = LSTRREP(*xstr);
580 part += (short)(xstr[LSTR_XRLO]>>16); /* adv. rep. part off */
581 xmbrs += i;
582 }
583 }
584 return 0;
585 } /* convert */
586
587
588 static int readrec ( int *dst, int fid, int where, int *xstr )
589 {
590 char *buf = (char *)
591 #ifdef alloca
592 alloca( xstr[LSTR_XLEN] )
593 #else
594 mAlloc( xstr[LSTR_XLEN] )
595 #endif
596 ;
597 int got = 0;
598 int ret = 0;
599
600 if ( ! buf )
601 return -ERR_NOMEM;
602 if ( 0 > where )
603 where = -where * xstr[LSTR_XLEN];
604 got = readblk( buf, xstr[LSTR_XLEN], fid, where );
605 ret = got ? got : convert( dst, buf, xstr );
606 #ifndef alloca
607 mFree( buf );
608 #endif
609