/[webpac]/trunk/openisis/ldb.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/openisis/ldb.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 239 - (show annotations)
Mon Mar 8 17:49:13 2004 UTC (20 years ago) by dpavlin
File MIME type: text/plain
File size: 80336 byte(s)
including openisis 0.9.0 into webpac tree

1 /*
2 openisis - an open implementation of the CDS/ISIS database
3 Version 0.8.x (patchlevel see file Version)
4 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 see README for more information
21 EOH */
22
23 /*
24 $Id: ldb.c,v 1.95 2003/06/10 11:00:34 kripke Exp $
25 implementation of general db access functions.
26 */
27
28 #include <stdlib.h>
29 #include <string.h>
30 #include <limits.h> /* PATH_MAX */
31 #include <errno.h>
32
33
34 /* special */
35 #if defined( __GNUC__ ) && defined ( alloca )
36 #include <alloca.h>
37 #endif
38
39 /* gcc defines always a cpu type - this we use for byteorder checking */
40 #if defined( sparc ) || defined( __ppc__ )
41 # define LDB_BIG_ENDIAN
42 /* TODO: figure out fastest "htonl" on those boxes that usually don't swap */
43 static int rvi ( int i ) {
44 int r;
45 ((char*)&r)[0] = ((char*)&i)[3];
46 ((char*)&r)[1] = ((char*)&i)[2];
47 ((char*)&r)[2] = ((char*)&i)[1];
48 ((char*)&r)[3] = ((char*)&i)[0];
49 return r;
50 }
51 static short rvs ( short i ) {
52 short r;
53 ((char*)&r)[0] = ((char*)&i)[1];
54 ((char*)&r)[1] = ((char*)&i)[0];
55 return r;
56 }
57 #define SWI( i ) i = rvi( i )
58 #define SWS( s ) s = rvs( s )
59 #else
60 # define rvi(i) i
61 # define rvs(s) s
62 #define SWI( i )
63 #define SWS( s )
64 #endif
65 #if defined( sparc )
66 # define LDB_NEEDALIGN
67 #endif
68 #if defined( LDB_NEEDALIGN )
69 static unsigned GETINT ( const void *m )
70 {
71 unsigned l;
72 memcpy( &l, m, 4 );
73 return l;
74 }
75 static unsigned short GETSHORT ( const void *m )
76 {
77 unsigned short s;
78 memcpy( &s, m, 2 );
79 return s;
80 }
81 #else
82 #define GETINT( m ) (*(unsigned*)(m))
83 #define GETSHORT( m ) (*(unsigned short*)(m))
84 #endif
85
86 #include "lstr.h"
87 #include "lio.h"
88 #include "lbt.h"
89 #include "lcs.h"
90 #include "ldb.h"
91 #include "lfdt.h"
92 #include "luti.h"
93
94 #ifdef WIN32
95 #define IsAbsPath(p) \
96 ((p) && *(p) && ( \
97 '/' == *(p) || '\\' == *(p) || ( \
98 ':' == (p)[1] && ( \
99 '/' == (p)[2] || '\\' == (p)[2] \
100 ))))
101 #else
102 #define IsAbsPath(p) \
103 ((p) && '/' == *(p))
104 #endif
105
106
107 #define LF 10 /* LineFeed a.k.a. newline - '\n' isn't really well defined */
108 #define TAB 9 /* horizontal, that is */
109 #define VT 11 /* vertical, used as newline replacement */
110
111 /* ************************************************************
112 private types
113 */
114
115 /** extension of master file proper. */
116 static const char EXT_MST_MST[] = ".mst";
117 /** extension of master file xref. */
118 static const char EXT_MST_XRF[] = ".xrf";
119
120 typedef enum {
121 MST_MST,
122 MST_XRF,
123 MST_FILES
124 } mst_file;
125
126 static const char * const EXT_MST[MST_FILES] = {
127 EXT_MST_MST,
128 EXT_MST_XRF
129 };
130
131 /** extension of inverted file short term nodes. */
132 static const char EXT_INV_N01[] = ".n01";
133 /** extension of inverted file short term leaves. */
134 static const char EXT_INV_L01[] = ".l01";
135 /** extension of inverted file long term nodes. */
136 static const char EXT_INV_N02[] = ".n02";
137 /** extension of inverted file long term leaves. */
138 static const char EXT_INV_L02[] = ".l02";
139 /** extension of inverted file control records. */
140 static const char EXT_INV_CNT[] = ".cnt";
141 /** extension of inverted file postings. */
142 static const char EXT_INV_IFP[] = ".ifp";
143
144 typedef enum {
145 INV_N01,
146 INV_L01,
147 INV_N02,
148 INV_L02,
149 INV_CNT,
150 INV_IFP,
151 INV_FILES
152 } inv_file;
153
154 static const char * const EXT_INV[INV_FILES] = {
155 EXT_INV_N01,
156 EXT_INV_L01,
157 EXT_INV_N02,
158 EXT_INV_L02,
159 EXT_INV_CNT,
160 EXT_INV_IFP
161 };
162
163 /** extension of lbt B-Link-Tree.
164 It's named oxi because that is nicer than oix for OpenIsis indeX.
165 however, see http://www.oxicenter.com.br/
166 */
167 static const char EXT_LBT_OXI[] = ".oxi";
168 static const char * const EXT_LBT[] = {
169 EXT_LBT_OXI
170 };
171
172
173 /** plaintext master file
174 */
175 static const char EXT_TXT_TXT[] = ".txt";
176 static const char EXT_TXT_PTR[] = ".ptr";
177 static const char EXT_TXT_OPT[] = ".opt";
178 typedef enum {
179 TXT_TXT,
180 TXT_PTR,
181 TXT_FILES
182 } txt_file;
183 static const char * const EXT_TXT[] = {
184 EXT_TXT_TXT,
185 EXT_TXT_PTR
186 };
187
188 static const char ISIX[] = "ISIX"; /* ptr magic */
189
190
191 /** extension of supporting file alpha character table. */
192 static const char EXT_SUP_ACT[] = ".act";
193 /** extension of supporting file uppercase table. */
194 static const char EXT_SUP_UCT[] = ".uct";
195
196 typedef enum {
197 SUP_ACT,
198 SUP_UCT,
199 SUP_FILES
200 } sup_file;
201
202 static const char * const EXT_SUP[SUP_FILES] = {
203 EXT_SUP_ACT,
204 EXT_SUP_UCT
205 };
206
207
208 typedef int lblk[128];
209
210
211 typedef struct {
212 Db head;
213 int flags;
214 const char *path;
215 int mst[MST_FILES]; /* master file */
216 int inv[INV_FILES]; /* primary inverted file */
217 int mfc[LMFC__FL]; /* master file control record */
218 unsigned mflen; /* master file length */
219 int xrf[129]; /* last used xrf block : THREAD THREAT */
220 int xrlen; /* length of xrf (in blocks) */
221 unsigned short ptr; /* type of pointer file (new style xrf) */
222 unsigned short ptrl; /* pointer bytes, 512 for old xrf */
223 char *mmap; /* memory map of xrf/ptr */
224 int mmlen; /* length of map (in ptrl) */
225 int cnt[LDB_INDEXES][LCNT__FL]; /* two cnt records */
226 short tlen[LDB_INDEXES]; /* max term length for each index */
227 LcsTab ctab[LCS__TABS];
228 Idx oxi;
229 } LDb;
230
231
232 typedef union {
233 lll bar;
234 char r[16];
235 } Ptr;
236
237
238 /* db flags */
239 #define DB_OPEN 0x010000
240 #define DB_INVOPEN 0x020000
241 #define DB_LBTOPEN 0x040000
242 #define DB_TXTOPEN 0x080000
243 #define DB_WRITABLE 0x100000
244 #define DB_MODIFIED 0x200000
245
246 #define DB_TXTMODE 0x20
247 #define DB_MMAP 0x10
248 #define DB_VARI 0xf /* mask for variant */
249
250
251 /* get xstr for record rec in set */
252 #define DB_XSTR( db, set, rec ) \
253 lstrlib[ set ].desc[ DB_VARI & (db)->flags ][ rec ]
254 /* get record names for record rec in set */
255 #define DB_RNAM( db, set, rec ) \
256 lstrlib[ set ].name[ rec ]
257
258
259 /** packed little endian masterfile control structure.
260 */
261 typedef struct Mfc {
262 int ctlm;
263 int nmfn;
264 int nmfb;
265 short nmfp;
266 short type;
267 int rcnt;
268 int mfx1;
269 int mfx2;
270 int mfx3;
271 } Mfc;
272
273
274 /** packed little endian masterfile record.
275 */
276 typedef struct Dict {
277 short tag;
278 short pos;
279 short len;
280 } Dict;
281
282 /** packed little endian masterfile record.
283 */
284 typedef struct Mfr {
285 int mfn;
286 short recl; /* a.k.a. mfrl */
287 short bwbl; /* low part of int */
288 short bwbh; /* high part of int */
289 short bwp;
290 /* it is believed, that this first five fields up to here (12 bytes packed)
291 are to be in one 512-byte block; the manual mentiones even 14 bytes ... ???
292 */
293 short base;
294 short nvf;
295 short stat;
296 Dict dict[1];
297 } Mfr;
298
299
300
301 /* ************************************************************
302 private data
303 */
304
305 static LDb defdbspace[32];
306 /* array of open dbs. should expand dynamically. */
307 static LDb *dbs = defdbspace;
308 static int dbs_len = sizeof(defdbspace)/sizeof(defdbspace[0]);
309
310 static int init;
311
312
313 /* ************************************************************
314 private functions
315 */
316 static LDb *getDb ( int id )
317 {
318 if ( 0 <= id && id < dbs_len && dbs[id].flags ) {
319 return &dbs[id];
320 }
321 log_msg( LOG_ERROR, "attempt to access bad db id %d", id );
322 return 0;
323 } /* getDb */
324
325
326 /* ************************************************************
327 start of io section
328 */
329 enum {
330 /* additional flags in the LIO_FD range */
331 OPEN_TRY = 1, /* try writable, open readonly else */
332 OPEN_UC = 2, /* use uppercase ext */
333 /* commonly used combinations */
334 /* 1) open as is, do not complain about any failure, do not create */
335 OPEN_ASIS = LIO_SEEK|LIO_RDWR|OPEN_TRY,
336 /* 2) open readonly, do not complain about any failure, do not create */
337 OPEN_RDIF = LIO_SEEK|LIO_RD|LIO_TRY,
338 /* 3) open readonly, complain about any failure */
339 OPEN_RD = LIO_SEEK|LIO_RD,
340 /* 4) open or create writable, complain on failure */
341 OPEN_NEW = LIO_SEEK|LIO_RDWR|LIO_CREAT,
342 OPEN_BLANK = LIO_SEEK|LIO_RDWR|LIO_CREAT|LIO_TRUNC
343 };
344
345 /* figure out wether to use uppercase extension on path.
346 if last path component (everything after the last / and \)
347 does contain an uppercase ascii and does not contain a lowercase ascii,
348 return OPEN_UC, else 0.
349 */
350 static int autocase ( const char *path )
351 {
352 int ret = 0;
353 const char *e = path + strlen( path );
354 while ( e-- > path )
355 if ( 'A'<=*e && *e<= 'Z' )
356 ret = OPEN_UC;
357 else if ( 'a'<=*e && *e<= 'z' )
358 return 0;
359 else if ( '/'==*e || '\\' == *e )
360 break;
361 return ret;
362 }
363
364 /* set extension. fname MUST already end with .xxx.
365 if how has OPEN_UC set, use uppercase extension
366 */
367 static char *setext ( char *fname, const char *ext, int how )
368 {
369 int l = strlen( fname ) - 4;
370 memcpy( fname+l, ext, 4 );
371 if ( OPEN_UC & how ) {
372 char *p = fname+l;
373 for ( ;*p; p++ ) /* use uppercase extensions */
374 if ( 'a' <= *p && *p <= 'z' )
375 *p -= 'a'-'A';
376 }
377 return fname;
378 }
379
380 /**
381 try to open all files according to how.
382 ldb is only interested in seekable readable true files.
383 @return
384 1 if all files could be opened writable
385 0 if all files could be opened readonly,
386 and that was requested by a RD mode or try write
387 something negative else
388 */
389 static int openfiles ( int *fid, char *path,
390 const char * const *ext, int nfiles, int how )
391 {
392 int i;
393 int wr = LIO_WR&how ? 1 : 0, mode = LIO_WANT & how;
394
395 for ( i=0; i<nfiles; i++ ) {
396 setext( path, ext[i], how );
397 fid[i] = lio_open( path, mode & LIO_WANT );
398 log_msg( LOG_INFO, "opening file '%s' %c 0x%x",
399 path, wr ? 'w' : 'r', fid[i] );
400 if ( 0 < fid[i] ) { /* ok */
401 mode &= ~LIO_FLOCK; /* lock only leading file */
402 continue;
403 }
404 fid[i] = 0;
405 while ( i-- ) /* close others */
406 lio_close( &fid[i], LIO_INOUT );
407 if ( OPEN_TRY & how )
408 return openfiles( fid, path, ext, nfiles,
409 (how & ~(OPEN_TRY|LIO_WR)) | LIO_TRY );
410 return LIO_TRY&how ? -ERR_BADF /* silent */
411 : log_msg( LOG_SYSERR, "could not open file '%s' for %sing",
412 path, wr ? "writ" : "read" );
413 }
414 return wr; /* good */
415 } /* openfiles */
416
417
418 static int closefiles ( int *fid, int nfiles )
419 {
420 int ret = 0, i;
421 for ( i=0; i<nfiles; i++ )
422 if ( 0 < fid[i] && LIO_INOUT & fid[i] )
423 lio_close( &fid[i], LIO_INOUT );
424 return ret;
425 } /* closefiles */
426
427
428 static int readblk ( void *dst, int siz, int fid, int where )
429 {
430 int got;
431 got = lio_pread( &fid, dst, siz, where );
432 if ( 0 > got )
433 return got;
434 #ifndef NDEBUG
435 if ( LOG_DO( LOG_ALL ) )
436 LOG_HEX( dst, got );
437 #endif
438 if ( siz == (int)got )
439 return 0;
440 log_msg( LOG_WARN, "got %u bytes wanted %d at %d in 0x%x",
441 got, siz, where, fid );
442 return 1+(int)got;
443 } /* readblk */
444
445
446 /* ************************************************************
447 end of io section
448 */
449
450 static int *nrec ( int *xstr )
451 {
452 int *dst = (int*)mAlloc( xstr[LSTR_ILEN] );
453 if ( dst )
454 *dst = *xstr;
455 return dst;
456 } /* nrec */
457
458
459
460 typedef struct b8 { char x[8]; } b8;
461 typedef struct b4 { char x[4]; } b4;
462 typedef struct b2 { char x[2]; } b2;
463
464
465 static int convert ( int *dst, char *src, int *xstr )
466 {
467 static int pow2[] = { 1, 2, 4, 8 };
468 int occ = -1;
469 int *xmbrs = xstr+LSTR_XMBR;
470 int nmbrs = LSTRFIX(*xstr);
471 int *mbr;
472 char *buf, *part = src, *srcend = src + xstr[LSTR_XLEN];
473
474 if ( ! dst )
475 return log_msg( ERR_NOMEM, "out of memory (no dst) in convert" );
476 if ( LSTRLEN(*xstr) > xstr[LSTR_ILEN] )
477 return log_msg( ERR_INVAL, "bad ilen %d need %d",
478 xstr[LSTR_ILEN], (int)LSTRLEN(*xstr) );
479 if ( *dst != *xstr )
480 return log_msg( ERR_INVAL, "bad *dst 0x%08x need 0x%08x", *dst, *xstr );
481 /* clean and re-init */
482 memset( dst, 0, xstr[LSTR_ILEN] );
483 *dst = *xstr;
484 mbr = dst+1;
485 buf = ((char*)dst) + LSTRLEN(*dst);
486
487 /* cvt the fixed part (occ==-1) and each occurrence of repeated part. */
488 for ( ;/* occ < LSTROCC(*dst) */; ) { /* cvt one part */
489 int i;
490 for ( i=0; i<nmbrs; i++, mbr++ ) { /* assign one xmbr */
491 int xmbr = xmbrs[i];
492 char *s = part + LONG2OFF(xmbr);
493 int sbytes = srcend - s;
494 int len,j;
495 union {
496 char buf[8];
497 b8 x8;
498 b4 x4;
499 b2 x2;
500 short s;
501 int i;
502 lll ll;
503 } num;
504 if ( LMBRISNUM( xmbr ) ) {
505 /* numeric data */
506 #ifdef LDB_BIG_ENDIAN
507 # define NEEDSWAP(xmbr) ( ! ( LMBR_FHIE & (xmbr) ) )
508 #else
509 # define NEEDSWAP(xmbr) (LMBR_FHIE & (xmbr))
510 #endif
511 int ld = LMBRLD(xmbr);
512 #ifndef LDB_NEEDALIGN
513 if ( ! NEEDSWAP( xmbr ) ) {
514 /* much faster this way ... */
515 /* TODO: len and bit checks; actually not needed yet ... */
516 switch ( ld ) {
517 case 3: *mbr = *(lll*)s; break;
518 case 2: *mbr = *(int*)s; break;
519 case 1: *mbr = *(short*)s; break;
520 case 0: *mbr = *s; break;
521 }
522 continue;
523 }
524 #endif
525 len = pow2[ LMBRLD(xmbr) ];
526 if ( len > sbytes )
527 return log_msg( ERR_INVAL,
528 "srcbuf too short %d have %d need %d occ %d mbr %d",
529 xstr[LSTR_XLEN], sbytes, len, occ, i );
530 if ( !NEEDSWAP( xmbr ) )
531 /* for ( j = len; j--; ) num.buf[j] = s[j]; */
532 switch ( ld ) {
533 case 3: num.x8 = *(b8*)s; break;
534 case 2: num.x4 = *(b4*)s; break;
535 case 1: num.x2 = *(b2*)s; break;
536 case 0: num.buf[0] = *s; break;
537 }
538 else /* swap bytes */
539 for ( j = len; j--; )
540 num.buf[j] = s[len - 1 - j];
541 switch ( len ) {
542 case 8: *mbr = num.ll; break;
543 /* TODO: defines for 16 and 64 bit compilers */
544 case 4: *mbr = num.i; break;
545 case 2: *mbr = num.s; break;
546 case 1: *mbr = num.buf[0]; break;
547 }
548 if ( LMBRISBITS( xmbr ) ) { /* apply bit shift and mask */
549 *mbr >>= LMBRBITOFF( xmbr );
550 *mbr &= ~(-1L << LMBRBITLEN( xmbr ));
551 }
552 continue;
553 }
554 /* else raw data -- that's easy :) */
555 {
556 int offset = buf - (char*)dst;
557 int need;
558 len = LONG2LEN(xmbr);
559 need = offset + len;
560 if ( need > xstr[LSTR_ILEN] )
561 return log_msg( ERR_INVAL,
562 "bad buflen %d need %d+%d in occ %d mbr %d",
563 xstr[LSTR_ILEN], offset, len, occ, i );
564 if ( len > sbytes )
565 return log_msg( ERR_INVAL,
566 "srcbuf too short %d have %d need %d occ %d mbr %d",
567 xstr[LSTR_XLEN], sbytes, len, occ, i );
568 memcpy( buf, s, len );
569 *mbr = buf - (char*)dst;
570 buf += len;
571 }
572 } /* for mbrs */
573
574 if ( ++occ >= LSTROCC(*dst) )
575 break;
576 if ( occ )
577 part += (short)xstr[LSTR_XRLO]; /* adv. rep. part len */
578 else { /* was the fixed part, setup for repeated */
579 nmbrs = LSTRREP(*xstr);
580 part += (short)(xstr[LSTR_XRLO]>>16); /* adv. rep. part off */
581 xmbrs += i;
582 }
583 }
584 return 0;
585 } /* convert */
586
587
588 static int readrec ( int *dst, int fid, int where, int *xstr )
589 {
590 char *buf = (char *)
591 #ifdef alloca
592 alloca( xstr[LSTR_XLEN] )
593 #else
594 mAlloc( xstr[LSTR_XLEN] )
595 #endif
596 ;
597 int got = 0;
598 int ret = 0;
599
600 if ( ! buf )
601 return -ERR_NOMEM;
602 if ( 0 > where )
603 where = -where * xstr[LSTR_XLEN];
604 got = readblk( buf, xstr[LSTR_XLEN], fid, where );
605 ret = got ? got : convert( dst, buf, xstr );
606 #ifndef alloca
607 mFree( buf );
608 #endif
609
610 return ret;
611 } /* readrec */
612
613
614 /* read and log */
615 static int readlog ( int *dst, int fid, int where, LDb *db, int set, int rec )
616 {
617 int ret = readrec( dst, fid, where, DB_XSTR( db, set, rec ) );
618 if ( !ret && LOG_DO( LOG_TRACE ) )
619 LOG_STR( dst, lstrlib[ set ].name[ rec ] );
620 return ret;
621 } /* readlog */
622
623
624
625 /* ************************************************************
626 access functions for the record types
627 */
628
629 static int getOff ( LDb *db, int rowid, int xr )
630 {
631 int rowix = rowid - 1; /* mfns count from 1 */
632 int xrf_block = rowix / 127;
633 int blkix = xrf_block + 1; /* ... so do xrf blocks */
634 int off;
635 if ( xrf_block < db->mmlen ) {
636 if ( xr )
637 ((lblk*)db->mmap)[xrf_block][1+(int)(rowix % 127)] = rvi( xr );
638 else
639 xr = rvi( ((lblk*)db->mmap)[xrf_block][1+(int)(rowix % 127)] );
640 } else {
641 int err = 0;
642 /* if ( LIO_LOCK() ) return -3; */
643 if ( xr ) { /* write */
644 if ( blkix <= db->xrlen ) {
645 SWI( xr );
646 if ( 4 != lio_pwrite( &db->mst[MST_XRF], &xr, 4,
647 xrf_block*512 + 4*(1 + (rowix % 127))) )
648 return 0;
649 } else { /* extent */
650 lblk extend;
651 memset( extend, 0, sizeof(extend) );
652 while ( db->xrlen < blkix ) { /* extend */
653 extend[0] = db->xrlen + 1; /* set blk id */
654 if ( blkix == extend[0] )
655 extend[1+(int)(rowix % 127)] = rvi( xr );
656 SWI( extend[0] );
657 if ( 512 != lio_pwrite( &db->mst[MST_XRF], extend, 512,
658 db->xrlen*512) )
659 return 0;
660 db->xrlen++;
661 }
662 }
663 if ( blkix == db->xrf[LXRF_XPOS] )
664 db->xrf[ LXRF_XREC + (int)(rowix % 127) ] = xr;
665 /* and go on read it back, just to check ... */
666 }
667 if ( blkix != db->xrf[LXRF_XPOS] ) {
668 int ret;
669 LOG_DBG( LOG_VERBOSE, "fetching xrf block %d had %d",
670 blkix, ! db->xrf ? -1 : db->xrf[LXRF_XPOS] );
671 ret = readlog( db->xrf, db->mst[MST_XRF],
672 -xrf_block, db, LSET_MST, LSTR_XRF );
673 if ( ret ) {
674 log_msg( LOG_ERROR, "\twhen fetching xrf block %d", blkix );
675 err = -1;
676 } else if ( blkix == -db->xrf[LXRF_XPOS] ) {
677 LOG_DBG( LOG_DEBUG, "hmmm ... negative" );
678 db->xrf[LXRF_XPOS] = blkix;
679 } else if ( blkix != db->xrf[LXRF_XPOS] ) {
680 log_msg( LOG_WARN, "bad xrf %d wanted %d",
681 db->xrf[LXRF_XPOS], blkix );
682 err = -2;
683 }
684 }
685 xr = db->xrf[ LXRF_XREC + (int)(rowix % 127) ];
686 /* LIO_RELE(); */
687 if ( err )
688 return err;
689 }
690 /*
691 21 bits (<<11) signed for the (512 byte) block ("xrmfb")
692 1 for the first block (offset 0)
693 0 means, never had such a record
694 -1 and xrmfp=0: record removed from MST
695 (there is no record at pos 0 in 1st block,
696 since there resides the MST header)
697 other negative value -x or pos!=0:
698 record logically deleted, was at +x
699 1 bit (1<<10): this record is new and not yet inverted
700 1 bit (1<<9): this record is changed and not yet re-inverted
701 9 bits for the block-relative position ("xrmfp")
702 */
703 off = (((xr & 0xfffff800) >> 2) - 0x200) | (0x1ff & xr);
704 if ( 0 < (xr & ~0x600) ) {
705 LOG_DBG( LOG_DEBUG,
706 "offset for rowid %d is %d (blk %d pos %d) flg 0x%08x at %d[%d]",
707 rowid, off, (xr>>11)&0xfffff, xr&0x1ff, xr&0x80000600, blkix, rowix%127 );
708 return off;
709 }
710 /* deleted */
711 log_msg( LOG_INFO,
712 "offset for rowid %d is %d (blk %d pos %d) flg 0x%08x at %d[%d]",
713 rowid, off, (xr>>11)&0xfffff, xr&0x1ff, xr&0x80000600, blkix, rowix%127 );
714 return 0;
715 } /* getOff */
716
717
718 static int* getMfr ( LDb *db, int off, int *nxtoff )
719 {
720 struct mfcxstr {
721 int xstr[LSTR_LONGS(7+3)];
722 } my = *(struct mfcxstr *)DB_XSTR( db, LSET_MST, LSTR_MFR );
723 int head[1+7+3];
724 int len = 0, base;
725 int *rec = 0;
726 char *buf = 0;
727 #ifdef alloca
728 int notalloca = 0;
729 #endif
730
731 LOG_DBG( LOG_VERBOSE, "getting MFR at off %d", off );
732 if ( 498 < off % 512 )
733 log_msg( LOG_WARN, "blk pos > 498 in offset 0x%08x", off );
734 *head = *my.xstr;
735 if ( readrec( head, db->mst[MST_MST], off, my.xstr ) ) {
736 log_msg( LOG_ERROR, "\twhen reading MFR head at %d", off );
737 return 0;
738 }
739 /* log_str( LOG_VERBOSE, head, lstrlib[LSET_MST].name[LSTR_MFR] ); */
740 len = head[LMFR_RECL];
741 LOG_DBG( LOG_VERBOSE, "got MFR %d reclen %d", head[LMFR_MFN], len );
742 if ( nxtoff ) {
743 *nxtoff = off + (0 < len ? len : -len); /* an odditiy */
744 if ( 1 & *nxtoff ) /* an odditiy */
745 (*nxtoff)++; /* round up to even */
746 if ( 498 < *nxtoff % 512 ) {
747 *nxtoff += 512;
748 *nxtoff &= ~0x1ff;
749 }
750 if ( *nxtoff > db->mfc[LMFC_NMFB]*512 + db->mfc[LMFC_NMFP] ) {
751 LOG_DBG( LOG_VERBOSE, "at end of db: %d > %d*512+%hd",
752 *nxtoff, db->mfc[LMFC_NMFB], db->mfc[LMFC_NMFP] );
753 *nxtoff = -1;
754 }
755 }
756 if ( len < 0 ) {
757 log_msg( LOG_INFO, "found deleted rec len %hd at offset %d", len, off );
758 return 0;
759 }
760
761 /* check external base length */
762 base = LONG2OFF(my.xstr[LSTR_XRLO])
763 + head[LMFR_NVF]*LONG2LEN(my.xstr[LSTR_XRLO]);
764 if ( 0 > head[LMFR_BASE] || 0 > head[LMFR_NVF]
765 || 0x8fff < head[LMFR_NVF]
766 || len < head[LMFR_BASE] || len < base
767 || (head[LMFR_NVF] && head[LMFR_BASE] < base)
768 ) {
769 log_msg( LOG_ERROR,
770 "bad len %d base %d nvf %d need base %d at offset %d",
771 len, head[LMFR_BASE], head[LMFR_NVF], base, off );
772 /* check alignment problem */
773 base = LONG2OFF(my.xstr[LSTR_XRLO])
774 + head[LMFR_STAT]*LONG2LEN(my.xstr[LSTR_XRLO]);
775 if ( 0 > head[LMFR_NVF] || 0 > head[LMFR_STAT]
776 || 0x8fff < head[LMFR_STAT]
777 || len < head[LMFR_NVF] || len < base
778 || (head[LMFR_STAT] && head[LMFR_NVF] < base)
779 )
780 ;
781 else
782 log_msg( LOG_ERROR, "probably alignment problem, try -format aligned" );
783 goto cleanup;
784 }
785
786 /* set up external structure for this rec */
787 my.xstr[LSTR_SIZE] |= head[LMFR_NVF] << 16; /* occ of rep. part */
788 my.xstr[LSTR_XLEN] = len;
789 /* internal base length */
790 base = LSTRLEN( *my.xstr );
791 /* internal len adjusted for the slightly longer base */
792 my.xstr[LSTR_ILEN] = len + base - head[LMFR_BASE]; /* the buffer */
793
794 rec = nrec( my.xstr );
795 if ( ! rec ) {
796 log_msg( LOG_SYSERR, "could not alloc MFR of len %hd", my.xstr[LSTR_ILEN] );
797 goto cleanup;
798 }
799 buf = (char *)
800 #ifdef alloca
801 alloca( len );
802 /* first try faster alloca, but stack may be too limited for large records */
803 notalloca = ! buf;
804 if ( notalloca )
805 buf =
806 #endif
807 mAlloc( len );
808
809 if ( ! buf ) {
810 log_msg( LOG_SYSERR, "could not alloc MFR of len %hd", len );
811 goto cleanup;
812 }
813 if ( readblk( buf, len, db->mst[MST_MST], off ) ) {
814 log_msg( LOG_ERROR, "\twhen reading MFR" );
815 goto cleanup;
816 }
817 #ifndef LDB_BIG_ENDIAN
818 if ( LVAR_PAC != (DB_VARI & db->flags) ) {
819 #endif
820 if ( convert( rec, buf, my.xstr ) ) {
821 log_msg( LOG_ERROR, "\twhen converting MFR" );
822 goto cleanup;
823 }
824 #ifndef LDB_BIG_ENDIAN
825 } else { /* 10% faster */
826 Mfr *mfr = (Mfr*)buf;
827 short *s = &mfr->dict->tag;
828 int *f = rec + LMFR__FL;
829 int *e = f + 3*head[LMFR_NVF];
830 rec[LMFR_MFN] = mfr->mfn;
831 rec[LMFR_RECL] = mfr->recl;
832 /*
833 rec[LMFR_BWB] = mfr->bwbh<<16 | mfr->bwbl;
834 rec[LMFR_BWP] = mfr->bwp;
835 */
836 rec[LMFR_BASE] = mfr->base;
837 rec[LMFR_NVF] = mfr->nvf;
838 rec[LMFR_STAT] = mfr->stat;
839 while ( f < e ) {
840 *f++ = *s++;
841 *f++ = *s++;
842 *f++ = *s++;
843 }
844 }
845 #endif
846
847 if ( rec[LMFR_STAT] ) {
848 log_msg( LOG_WARN, "found status %hd", rec[LMFR_STAT] );
849 goto cleanok;
850 }
851
852 /* do a consistency check */
853 if ( rec[LMFR_NVF] < 0 || rec[LMFR_BASE] < 0 ) {
854 log_msg( LOG_ERROR, "found neg. field nvf %hd base %hd",
855 rec[LMFR_NVF], rec[LMFR_BASE] );
856 goto cleanup;
857 }
858
859 /* now care for the field values */
860 {
861 char *valsrc = buf+rec[LMFR_BASE];
862 char *recsta = ((char*)rec);
863 char *valdst = recsta + base;
864 int xbufl = rec[LMFR_RECL] - rec[LMFR_BASE];
865 int sumlens = 0;
866 int i;
867 for ( i=0; i < rec[LMFR_NVF]; i++ ) {
868 int *d = &rec[LMFR__FL + i*LMFR__RL];
869 if ( d[LMFR_POS] < 0 || d[LMFR_LEN] < 0 ) {
870 log_msg( LOG_ERROR,
871 "bad field %d at off %d: negativ pos %hd or len %hd",
872 i, off, d[LMFR_POS], d[LMFR_LEN] );
873 goto cleanup;
874 }
875 if ( d[LMFR_POS] + d[LMFR_LEN] > xbufl ) {
876 log_msg( LOG_ERROR,
877 "bad field %d at off %d: pos %hd + len %hd > buf %d",
878 i, off, d[LMFR_POS], d[LMFR_LEN], xbufl );
879 goto cleanup;
880 }
881 sumlens += d[LMFR_LEN];
882 if ( sumlens > xbufl ) {
883 log_msg( LOG_ERROR,
884 "bad fields at off %d: sum of lengths %d > buf %d",
885 off, sumlens, xbufl );
886 goto cleanup;
887 }
888 memcpy( valdst, valsrc+d[LMFR_POS], d[LMFR_LEN] );
889 d[LMFR_POS] = valdst - recsta;
890 valdst += d[LMFR_LEN];
891 }
892 } /* consistency check */
893 rec[LMFR_BWB] = /* "used" bytes */
894 rec[LMFR_RECL] = my.xstr[LSTR_ILEN];
895 rec[LMFR_BWP] = rec[LMFR_NVF]; /* avail fields = used fields */
896 rec[LMFR_BASE] = base;
897
898 if ( LOG_TRACE <= log_lev )
899 LOG_STR( rec, lstrlib[LSET_MST].name[LSTR_MFR] );
900 goto done;
901
902 cleanup:
903 if ( nxtoff )
904 *nxtoff = -1;
905 cleanok:
906 if ( rec ) {
907 mFree( rec );
908 rec = 0;
909 }
910 done:
911 if ( buf
912 #ifdef alloca
913 && notalloca
914 #endif
915 )
916 mFree( buf );
917 if ( rec )
918 *rec = db->head.dbid;
919 return rec;
920 } /* getMfr */
921
922
923 /** write the record.
924 If it doesn't yet have a mfn, assign one.
925 NOTE: on a BIG_ENDIAN, anything but the mfn and recl will be frobbed
926 after this call
927 */
928 static int putMfr ( LDb *db, Mfr *mfr )
929 {
930 int oldpos, newpos;
931 int ret;
932
933 if ( !(db->flags & DB_WRITABLE) ) {
934 log_msg( LOG_ERROR, "db is not writable" );
935 return -1;
936 }
937 /* minimalist sanity check */
938 if ( mfr->nvf < 0
939 || mfr->base != 18 + 6*mfr->nvf
940 || mfr->base > mfr->recl
941 ) {
942 log_msg( LOG_ERROR, "bad nvf/base/recl %d/%d/%d ",
943 mfr->nvf, mfr->base, mfr->recl );
944 return -2;
945 }
946 db->flags |= DB_MODIFIED;
947 if ( mfr->mfn ) {
948 int block;
949 if ( db->mfc[LMFC_NMFN] > mfr->mfn )
950 oldpos = getOff( db, mfr->mfn, 0 );
951 else {
952 db->mfc[LMFC_NMFN] = mfr->mfn+1;
953 oldpos = 0;
954 }
955 mfr->bwp = 511 & oldpos;
956 block = 1 + (oldpos >> 9); /* blockno counting from 1 */
957 mfr->bwbl = 0xffff & block;
958 mfr->bwbh = block >> 16;
959 if ( db->mfc[LMFC_NMFN] <= mfr->mfn )
960 db->mfc[LMFC_NMFN] = mfr->mfn+1;
961 } else {
962 mfr->mfn = db->mfc[LMFC_NMFN]++; /* assign new mfn */
963 oldpos = 0;
964 mfr->bwbl = mfr->bwbh = mfr->bwp = 0;
965 }
966 mfr->stat = 0;
967 newpos = db->mflen;
968 if ( 498 < (newpos & 511) ) /* round up to next block boundary */
969 newpos = ~511 & (newpos + 14);
970 if ( 1 & newpos )
971 newpos++;
972 db->mflen = newpos + mfr->recl;
973 #ifdef LDB_BIG_ENDIAN
974 { /* swap swap swap */
975 Dict *d = mfr->dict;
976 short nvf = mfr->nvf;
977 SWI( mfr->mfn ); SWS( mfr->recl ); SWS( mfr->bwbl ); SWS( mfr->bwbh );
978 SWS( mfr->bwp ); SWS( mfr->base ); SWS( mfr->nvf ); SWS( mfr->stat );
979 for ( ; nvf--; d++ ) {
980 SWS( d->tag );
981 SWS( d->pos );
982 SWS( d->len );
983 }
984 }
985 #endif
986 ret = lio_pwrite( &db->mst[MST_MST], (char*)mfr, rvs(mfr->recl), newpos );
987 #ifdef LDB_BIG_ENDIAN
988 /* restore mnf, recl */
989 SWI( mfr->mfn );
990 SWS( mfr->recl );
991 #endif
992 if ( ret != mfr->recl )
993 return log_msg( ERR_TRASH, "could not write Mfr %d bytes got %d",
994 mfr->recl, ret );
995 getOff( db, mfr->mfn, (1 << (oldpos ? 9 : 10))
996 | (((newpos & 0xfffffe00) + 0x200) << 2) | (0x1ff & newpos) );
997
998 return 0;
999 } /* putMfr */
1000
1001
1002 static int putRec ( LDb *db, Rec *rec )
1003 {
1004 int ret = 0, i;
1005 Mfr *mfr = 0;
1006 int buflen = 0;
1007 int reclen = 0;
1008 int contig = 1;
1009 #ifdef alloca
1010 int notalloca = 0;
1011 #endif
1012 const char *rbase = ((char *)rec) + rec->base;
1013 Dict *d;
1014 Field *f = rec->field;
1015 /* TODO: if not rec->len, delete ? */
1016 for ( i = rec->len; i--; f++ ) {
1017 if ( ! f->len )
1018 continue;
1019 if ( ! f->val )
1020 LOG_OTO( cleanup, ( ERR_FAULT, "bad rec NULL val" ) );
1021 contig = contig && (f->val == rbase + buflen);
1022 buflen += f->len;
1023 }
1024 reclen = 18 + 6*rec->len + buflen;
1025 if ( 1 & reclen )
1026 reclen++;
1027 mfr = (Mfr*)
1028 #ifdef alloca
1029 alloca( reclen );
1030 notalloca = ! mfr;
1031 if ( notalloca )
1032 mfr = (Mfr*)
1033 #endif
1034 mAlloc( reclen );
1035 if ( ! mfr )
1036 LOG_OTO( cleanup,
1037 ( ERR_NOMEM, "could not alloc MFR of len %hd", reclen ) );
1038 mfr->mfn = rec->rowid;
1039 mfr->recl = reclen;
1040 mfr->bwbl = mfr->bwbh = mfr->bwp = 0;
1041 mfr->base = 18 + 6*rec->len;
1042 mfr->nvf = rec->len;
1043 mfr->stat = 0;
1044 d = mfr->dict;
1045 f = rec->field;
1046 buflen = 0;
1047 for ( i = rec->len; i--; d++, f++ ) {
1048 d->tag = f->tag;
1049 d->pos = buflen;
1050 buflen += (d->len = f->len);
1051 }
1052 if ( contig )
1053 memcpy( ((char*)mfr)+mfr->base, rbase, buflen );
1054 else {
1055 char *mbase = ((char*)mfr)+mfr->base;
1056 d = mfr->dict;
1057 f = rec->field;
1058 for ( i = rec->len; i--; d++, f++ )
1059 if ( d->len )
1060 memcpy( mbase + d->pos, f->val, d->len );
1061 }
1062 ret = putMfr( db, mfr );
1063 if ( !ret && !rec->rowid )
1064 rec->rowid = mfr->mfn;
1065
1066 cleanup:
1067 if ( mfr
1068 #ifdef alloca
1069 && notalloca
1070 #endif
1071 )
1072 mFree( mfr );
1073 return ret;
1074 } /* putRec */
1075
1076
1077
1078 /* ************************************************************
1079 access functions for plaintext db
1080 */
1081
1082 /*
1083 create a pointer from the least significant bytes of pos, len, fld
1084 buf must have db->ptrl bytes (up to 16 = 8+4+4)
1085 and the most strict alignment (i.e. 4 or 8) possible for db->ptrl
1086 returns buf
1087 */
1088 static char *mkptr ( char *buf, LDb *db,
1089 unsigned pos, unsigned len, unsigned fld )
1090 {
1091 switch ( db->ptr ) {
1092 case 0x0134: /* '4' */
1093 ((unsigned*)buf)[0] = pos;
1094 if ( ~0xff & fld ) fld = 0;
1095 #ifdef LDB_BIG_ENDIAN /* the first = high order 3 bytes are len */
1096 ((unsigned*)buf)[1] = (0xff&fld) | len<<8;
1097 #else /* the first = low order 3 bytes are len */
1098 ((unsigned*)buf)[1] = (0xffffff&len) | fld<<24;
1099 #endif
1100 return buf;
1101 case 0x0044: /* 'D' */
1102 ((unsigned*)buf)[0] = pos;
1103 ((unsigned*)buf)[1] = len;
1104 return buf;
1105 case 0x0035: /* '5' */
1106 #ifdef LDB_BIG_ENDIAN /* the first = high order 5 bytes are pos */
1107 *(lll*)buf = (0xffffff&len) | ((lll)pos)<<24;
1108 #else /* the first = low order 5 bytes are pos */
1109 *(lll*)buf = pos | ((lll)len)<<40;
1110 #endif
1111 return buf;
1112 }
1113 /* TODO */
1114 (void)fld;
1115 assert( 0 );
1116 return 0;
1117 }
1118
1119
1120 /*
1121 read pointer, return len
1122 if 0x0f00 & db->ptr, fld must not be 0, else *fld is untouched
1123 */
1124 static unsigned rdptr ( unsigned *pos, unsigned *fld, LDb *db, char *buf )
1125 {
1126 switch ( db->ptr ) {
1127 case 0x0134:
1128 *pos = *(unsigned*)buf;
1129 *fld = ((unsigned char *)buf)[7];
1130 #ifdef LDB_BIG_ENDIAN
1131 return ((unsigned*)buf)[1] >> 8;
1132 #else
1133 return 0xffffff & ((unsigned*)buf)[1];
1134 #endif
1135 case 0x0044:
1136 *pos = *(unsigned*)buf;
1137 return ((unsigned*)buf)[1];
1138 case 0x0035:
1139 #ifdef LDB_BIG_ENDIAN
1140 /* *pos = (*(unsigned*)(buf+1)); would bus error on sparc */
1141 *pos = (unsigned) (*(lll*)buf >> 24);
1142 return 0xffffff & (unsigned)*(lll*)buf; /* last 3 bytes */
1143 #else /* guess there is no little endian that needs alignment ? */
1144 *pos = *(unsigned*)buf; /* use low order = first 4 of first 5 bytes */
1145 return 0xffffff & (*(unsigned*)(buf+5));
1146 #endif
1147 }
1148 /* TODO */
1149 (void)fld;
1150 assert( 0 );
1151 return 0;
1152 }
1153
1154
1155 static void setPtr ( LDb *db, int mfn,
1156 unsigned pos, unsigned len, unsigned fld )
1157 {
1158 Ptr pt;
1159 if ( mfn < db->mmlen ) {
1160 mkptr( db->mmap + mfn*db->ptrl, db, pos, len, fld );
1161 return;
1162 }
1163 lio_pwrite( &db->mst[MST_XRF],
1164 mkptr( pt.r, db, pos, len, fld), db->ptrl, mfn*db->ptrl );
1165 } /* setPtr */
1166
1167
1168 static unsigned getPtr ( unsigned *pos, unsigned *fld, LDb *db, int mfn )
1169 {
1170 Ptr pt;
1171 return mfn < db->mmlen
1172 ? rdptr( pos, fld, db, db->mmap + mfn*db->ptrl )
1173 : db->ptrl == lio_pread( &db->mst[MST_XRF],
1174 pt.r, db->ptrl, mfn*db->ptrl )
1175 ? rdptr( pos, fld, db, pt.r )
1176 : 0;
1177 } /* getPtr */
1178
1179
1180 #if 0
1181 static int putPlain ( LDb *db, Rec *rec )
1182 {
1183 return 0;
1184 } /* putPlain */
1185 #endif
1186
1187 /**
1188 get text
1189 the original text is read contigously at base.
1190 the record is then cooked as requested:
1191 0 well done: do full fixup,
1192 apply conversions and create fields.
1193 1 english: do not create fields (rec->fields is 0), apply no conversions,
1194 but set rec->len to actual number of fields (counting if necessary).
1195 2 raw:
1196 set len only if it's known from the pointer
1197 */
1198 static Rec *dText ( LDb *db, int mfn, int raw )
1199 {
1200 unsigned base, sz, pos, len, fld; /* #fields actually used */
1201 unsigned n = 0; /* known #fields */
1202 Rec *r, *x;
1203 Field *f, *fe;
1204 char *p, *q, *e;
1205
1206 len = getPtr( &pos, &n, db, mfn );
1207 LOG_DBG( LOG_TRACE, "dText %d pos %d len %d fld %d", mfn, pos, len, n );
1208 if ( !len )
1209 return 0;
1210 if ( raw )
1211 fld = 0;
1212 else if ( !(fld = n) ) {
1213 fld = len / 36; /* assume one (costing 12 bytes) per 36 bytes data */
1214 if ( fld < 8 ) /* small record is likely to have some short fields */
1215 fld = 8;
1216 }
1217 base = BASESZ(fld);
1218 sz = base + len;
1219 r = (Rec*)mAlloc( sz );
1220 p = ((char*)r) + base;
1221 if ( (int)len != lio_pread( &db->mst[MST_MST], p, len, pos ) ) {
1222 mFree( r );
1223 return 0;
1224 }
1225 LOG_DBG( LOG_TRACE, "'%.*s'", len, p );
1226 r->dbid = db->head.dbid;
1227 r->rowid = mfn;
1228 r->used = r->bytes = sz;
1229 r->fields = fld;
1230 r->base = base;
1231 r->len = n;
1232 if ( raw && (n || 1 != raw) )
1233 return r;
1234 e = p + len;
1235 f = r->field; /* next field to assign */
1236 fe = f + fld; /* end of assignable fields */
1237 /*
1238 loop through buffer lines to count a/o assign
1239 count them in n
1240 while f < fe, also fix and assign them
1241 */
1242 for ( n=0;;) { /* possibly 2 passes needed */
1243 for ( ;p < e; p = q+1 ) {
1244 if ( !(q = memchr( p, LF, e-p )) )
1245 q = e; /* > p */
1246 if ( TAB != *p || !n ) {
1247 if ( f < fe ) {
1248 p += a2il( p, q-p, &f->tag );
1249 if ( p < q && TAB == *p )
1250 p++;
1251 f->len = q - (f->val = p);
1252 f++; /* f == r->field+n, as long as we don't hit fe */
1253 }
1254 n++;
1255 continue;
1256 }
1257 /* continuation line */
1258 if ( f != r->field+n )
1259 continue;
1260 /* we ARE assigning & didn't loose sync at fe */
1261 /* append to previous */ {
1262 char *dest = (char*)f[-1].val + f[-1].len;
1263 int dist = p - dest, l = q-p;
1264 *p = LF;
1265 memmove( dest, p, l );
1266 memset( q-dist, ' ', dist ); /* cleanup */
1267 f[-1].len += l;
1268 }
1269 }
1270 /* now n != 0, since initially p < e, since len != 0 */
1271 if ( r->len && r->len != (int)n ) {
1272 log_msg( LOG_WARN, "rec %d len %d != ptr %d", mfn, n, r->len );
1273 break;
1274 }
1275 if ( raw || (int)n <= r->fields ) /* all counted/assigned */
1276 break;
1277 /* extend the record to n fields */
1278 log_msg( LOG_INFO, "extending rec %d %d -> %d fields", mfn, fld, n );
1279 fld = n;
1280 base = BASESZ(fld);
1281 sz = base + len;
1282 x = (Rec*)mAlloc( sz );
1283 x->dbid = db->head.dbid;
1284 x->rowid = mfn;
1285 x->used = x->bytes = sz;
1286 x->fields = fld;
1287 x->base = base;
1288 x->len = n;
1289 p = ((char*)x) + base;
1290 e = p + len;
1291 q = ((char*)r) + r->base;
1292 memcpy( p, q, len );
1293 memcpy( x->field, r->field, r->fields*sizeof(Field) );
1294 for ( f=x->field, n=r->fields; n--; )
1295 (f++)->val += p-q;
1296 n = r->fields;
1297 mFree( r );
1298 r = x;
1299 f = r->field + n;
1300 fe = r->field + fld;
1301 p = (char*)f[-1].val + f[-1].len;
1302 /* seek behind the LF that delimited the last field */
1303 while (LF != *p++)
1304 ;
1305 }
1306 r->len = n;
1307 if ( !raw && (DB_TXTMODE & db->flags) )
1308 for ( f = r->field, fe = r->field + r->fields; f < fe; f++ )
1309 for ( p = (char*)f->val, e = p+f->len; (p = memchr(p,VT,e-p)); )
1310 *p++ = LF;
1311 return r;
1312 } /* dText */
1313
1314
1315 static int pText ( LDb *db, Rec *r, const char *mark )
1316 {
1317 char buf[128 + 65536];
1318 unsigned pos = 0, len = 0, fld, off;
1319 char *p, *b;
1320 int ret;
1321
1322 if ( !(db->flags & DB_WRITABLE) ) {
1323 log_msg( LOG_ERROR, "db is not writable" );
1324 return -1;
1325 }
1326 if ( ! r->rowid )
1327 r->rowid = db->mfc[LMFC_NMFN]++; /* assign new mfn */
1328 else if ( db->mfc[LMFC_NMFN] <= r->rowid )
1329 db->mfc[LMFC_NMFN] = r->rowid + 1;
1330 else {
1331 fld = 0;
1332 len = getPtr( &pos, &fld, db, r->rowid );
1333 }
1334 p = b = 32768 >= r->used ? buf : mAlloc(128+2*r->used);
1335 *p++ = 'W';
1336 *p++ = TAB;
1337 p += u2a( p, r->rowid );
1338 *p++ = TAB;
1339 if ( pos ) {
1340 p += u2a( p, pos );
1341 *p++ = '.';
1342 p += u2a( p, len );
1343 if ( fld ) {
1344 *p++ = '.';
1345 p += u2a( p, fld );
1346 }
1347 }
1348 *p++ = TAB;
1349 if ( mark ) {
1350 int l = strlen(mark);
1351 if ( l > 31 ) {
1352 log_msg( LOG_WARN, "mark '%.48s'%s has length %d",
1353 mark, l<48 ? "" : "...", l );
1354 l = 31;
1355 }
1356 memcpy( p, mark, l );
1357 p += l;
1358 } else {
1359 timeGtfm( p, 0 );
1360 p += 17;
1361 }
1362 *p++ = LF;
1363 off = p - b;
1364 p += len = rSerB( p, r );
1365 if ( len > 1 ) /* don't count 2 trailing LFs */
1366 len -= 2;
1367 db->flags |= DB_MODIFIED;
1368 pos = db->mflen;
1369 db->mflen += p - b;
1370 ret = lio_pwrite( &db->mst[MST_MST], b, p - b, pos );
1371 if ( ret == p - b ) {
1372 setPtr( db, r->rowid, pos+off, len, r->len );
1373 ret = 0;
1374 }
1375 if ( buf != b )
1376 mFree( b );
1377 return ret;
1378 } /* pText */
1379
1380
1381 /* ************************************************************
1382 utilities
1383 */
1384
1385 static int search ( LDb *db, const char *key, LdbPost *post,
1386 Rec *rec, DXLoop *lp )
1387 {
1388 int i, j, prefix, idx, ret, ock;
1389 int pos;
1390 int *leaf, *entry;
1391 char *term;
1392 int *xstr;
1393 struct { /* terms cursor */
1394 char key[LDB_MAX_KEYLEN+1]; /* key or key prefix */
1395 short klen; /* key length to compare */
1396 char imin; /* minimum index to search */
1397 char imax; /* maximum index to search */
1398 int leaf[LDB_INDEXES][LDB_TERMBUF]; /* one leaf buffer per index */
1399 short lpos[LDB_INDEXES]; /* next position in leaf, -1 if done */
1400 } crs;
1401 short klen; /* length for initial locate */
1402 int block[128]; /* buffer to read one block */
1403 int blockpos = 0;
1404 LdbP *p = 0;
1405
1406 if ( ! key )
1407 key = "$";
1408 /* prepare cursor struct */
1409 memset( &crs, 0, sizeof(crs) ); /* tabula rasa */
1410 crs.klen = strlen( key );
1411 /* check for prefix match */
1412 if ( post )
1413 prefix = LDB_PFX & post->mode;
1414 else if ( (prefix = crs.klen && '$' == key[crs.klen - 1]) )
1415 crs.klen--;
1416 /* check out minimum index to search */
1417 for ( crs.imin=0; crs.klen > db->tlen[(int)crs.imin]; )
1418 if ( LDB_INDEXES == ++(crs.imin) )
1419 return log_msg( ERR_INVAL, "bad keylen %d key '%.64s'", crs.klen, key );
1420 /* prepare key */
1421 memset( crs.key, ' ', sizeof(crs.key)-1 );
1422 {
1423 unsigned char *uc = (unsigned char*)crs.key;
1424 unsigned char *uk = (unsigned char*)key;
1425 for ( i=crs.klen; i--; )
1426 uc[i] = db->ctab[LCS_UCASE].c[ uk[i] ];
1427 }
1428 if ( prefix )
1429 crs.imax = LDB_INDEXES-1;
1430 else {
1431 crs.imax = crs.imin;
1432 crs.klen = db->tlen[(int)crs.imin];
1433 }
1434 log_msg( LOG_INFO, "search for '%.*s'%c", crs.klen, crs.key, prefix?'$':' ' );
1435 key = crs.key;
1436 klen = crs.klen;
1437 if ( rec && rec->len ) {
1438 /* use last key from record to locate starting position */
1439 key = rec->field[rec->len-1].val;
1440 klen = rec->field[rec->len-1].len;
1441 rec->len = 0;
1442 }
1443
1444 for ( i=crs.imin; i<=crs.imax; i++ ) { /* find leaf positions */
1445 int nFile = INV_N01 + 2*i; /* node file index */
1446 int nStr = LSTR_N01 + 2*i; /* node struct index */
1447 int *nstr = DB_XSTR( db, LSET_INV, nStr );
1448 int lvl;
1449 short cmplen = klen <= db->tlen[i] ? klen : db->tlen[i];
1450 pos = db->cnt[i][LCNT_POSR]; /* pos of root record */
1451 j = 0;
1452 for ( lvl = 0; 0<pos; lvl++ ) { /* traverse node levels */
1453 int node[102];
1454 LOG_DBG( LOG_DEBUG, "node %d at %d lvl %d", pos, j, lvl );
1455 assert( (int)sizeof(node) >= nstr[LSTR_ILEN] );
1456 *node = *nstr;
1457 ret = readlog( node, db->inv[nFile], 1-pos, db, LSET_INV, nStr );
1458 if ( pos != node[LN0X_POS] /* wrong address */
1459 || i+1 != node[LN0X_TYPE] /* wrong type */
1460 || node[LN0X_OCK] < 1 /* no keys */
1461 || 2*db->cnt[i][LCNT_ORDN] < node[LN0X_OCK] /* too many keys */
1462 )
1463 return log_msg( ERR_TRASH, "bad node pos %d type %d keys %d",
1464 node[LN0X_POS], node[LN0X_TYPE], node[LN0X_OCK]
1465 );
1466 ock = node[LN0X_OCK];
1467 for ( j=1;
1468 j<ock && 0 < (ret = memcmp( key,
1469 ((char*)node)+node[j*LN0X__RL+LN0X__FL+LN0X_KEY], cmplen ));
1470 j++ )
1471 ;
1472 /* now j is at end or on next index not less */
1473 if ( j==ock /* end */
1474 || ret /* index is greater than key */
1475 || prefix /* backtrack even on exact match */
1476 )
1477 j--; /* step into last ock with lower key */
1478 pos = node[LN0X__FL + j*LN0X__RL + LN0X_REF];
1479 } /* for lvl */
1480 /* got some negative ref to leaf; set leaf pos */
1481 crs.leaf[i][LL0X_PS] = -pos;
1482 /*
1483 since the lpos and LL0X_OCK are both 0 by the memset above,
1484 we will initially load the leaves
1485 */
1486 } /* for indexes */
1487 /* done preparing cursor */
1488
1489 if ( post ) /* prepare for postings */
1490 p = post->p;
1491 xstr = DB_XSTR( db, LSET_INV, LSTR_IFP );
1492
1493 for (;;) { /* loop terms in prefix mode */
1494 /* vars for postings: */
1495 int infb, infp; /* block and pos where to read postings */
1496 int added; /* postings added or marked per term */
1497 int blkno; /* postings block number */
1498 int remain = 0; /* postings to fetch from next block of segment */
1499 int ifp[LIFP__FL]; /* postings header */
1500
1501 idx = -1; /* index to use */
1502 /* compare index terms, load leafes if needed */
1503 for ( i = crs.imin; i <= crs.imax; i++ ) {
1504 short cmplen = klen <= db->tlen[i] ? klen : db->tlen[i];
1505 leaf = crs.leaf[i];
1506 if ( leaf[LL0X_OCK] <= crs.lpos[i] ) { /* load */
1507 int lFile = INV_L01 + 2*i; /* leaf file index */
1508 int lStr = LSTR_L01 + 2*i; /* leaf struct index */
1509 int *lstr = DB_XSTR( db, LSET_INV, lStr );
1510
1511 crs.lpos[i] = -1;
1512 reread:
1513 if ( ! (pos = leaf[LL0X_PS]) )
1514 continue;
1515 LOG_DBG( LOG_DEBUG, "leaf %d", pos );
1516 assert( (int)sizeof(crs.leaf[i]) >= lstr[LSTR_ILEN] );
1517 *leaf = *lstr;
1518 ret = readlog( leaf, db->inv[lFile], 1-pos, db, LSET_INV, lStr );
1519 if ( pos != leaf[LL0X_POS] /* wrong address */
1520 || i+1 != leaf[LL0X_TYPE] /* wrong type */
1521 || leaf[LL0X_OCK] < 1 /* no keys */
1522 || 2*db->cnt[i][LCNT_ORDN] < leaf[LL0X_OCK] /* too many keys */
1523 )
1524 return log_msg( ERR_TRASH, "bad leaf pos %d type %d keys %d",
1525 leaf[LL0X_POS], leaf[LL0X_TYPE], leaf[LL0X_OCK] );
1526 ock = leaf[LL0X_OCK];
1527 /* advance to first term which is not too small
1528 (should be needed only for first leaf of an index)
1529 */
1530 for ( j=0;
1531 j<ock && (0 < (ret = memcmp( key,
1532 ((char*)leaf)+leaf[LL0X__FL + j*LL0X__RL + LL0X_KEY], cmplen ))
1533 || (!ret && key!=crs.key) ); /* skip exact while locating */
1534 j++ )
1535 ;
1536 if ( ock == j )
1537 goto reread; /* start over w/ next leaf of same index */
1538 if ( 0 <= ret
1539 || (key!=crs.key && !memcmp( crs.key,
1540 ((char*)leaf)+leaf[LL0X__FL + j*LL0X__RL + LL0X_KEY], crs.klen ))
1541 )
1542 crs.lpos[i] = j;
1543 /* else let -1 */
1544 } /* if reload */
1545 if ( 0 > crs.lpos[i] )
1546 continue;
1547 if ( 0 > idx ) {
1548 idx = i;
1549 continue;
1550 }
1551 /* compare this index next term to that of index idx */
1552 /* assume that index w/ lower number has shorter keys */
1553 ret = memcmp(
1554 ((char*)leaf)+leaf[LL0X__FL + crs.lpos[i]*LL0X__RL + LL0X_KEY],
1555 ((char*)crs.leaf[idx])+
1556 crs.leaf[idx][LL0X__FL + crs.lpos[idx]*LL0X__RL + LL0X_KEY],
1557 db->tlen[idx] );
1558 if ( 0 > ret )
1559 idx = i;
1560 }
1561 if ( 0 > idx )
1562 goto done;
1563 j = crs.lpos[idx];
1564 leaf = crs.leaf[idx];
1565 entry = leaf + LL0X__FL + j*LL0X__RL;
1566 term = ((char*)leaf) + entry[LL0X_KEY];
1567 if ( memcmp( crs.key, term, crs.klen ) )
1568 goto done;
1569 crs.lpos[idx]++;
1570
1571 if ( rec ) { /* record the term */
1572 /* field to assign */
1573 Field *f = rec->field + rec->len;
1574 short tlen = db->tlen[idx];
1575 /* end of available buffer */
1576 char *b = rec->len
1577 ? (char*)f[-1].val /* before previously assigned field */
1578 : ((char*)rec + rec->bytes); /* end of record */
1579 while ( tlen && ' ' == term[tlen-1] )
1580 tlen--;
1581 b -= tlen;
1582 if ( b < (char*)(f+1) ) /* no space left on device */
1583 goto done;
1584 /* probably we're nuking the locator now: */
1585 memcpy( b, term, tlen );
1586 f->tag = 0;
1587 f->val = b;
1588 f->len = tlen;
1589 rec->len++;
1590 /* reset key from locator to prefix */
1591 key = crs.key;
1592 klen = crs.klen;
1593 }
1594
1595 if ( ! post && ! lp )
1596 continue;
1597 /* collect postings */
1598 infb = entry[LL0X_INFB];
1599 infp = entry[LL0X_INFP];
1600 /* the IFP file is organized in blocks of 128 longs.
1601 1st int is block number followed by 127 data.
1602 postings are organized in chained segments so that each segment
1603 fits within one such block. a segment has five longs header,
1604 giving number of postings and pointer to next segment.
1605 */
1606 added = 0;
1607 for ( blkno=0; infb; blkno++ ) { /* segments */
1608 LdbP merge[127/2]; /* buffer to collect new postings */
1609 int *base; /* start of data */
1610 int *b; /* start of postings */
1611 int n; /* max postings in this seg's 1st block */
1612 int xlen; /* external length to read */
1613 int f = post ? post->fil - 1 : 0; /* highest pos to consider in given postings */
1614 int m = 0; /* fill merge buffer */
1615 int k; /* loop segment */
1616
1617 if ( infp > 127-2-5 ) {
1618 return log_msg( ERR_TRASH, "found bad IFP pos %d blk %d for %.*s",
1619 infp, blkno, klen, key );
1620 }
1621 if ( remain ) { /* consecutive block of same segment */
1622 n = remain;
1623 if ( n > 127/2 )
1624 n = 127/2;
1625 xlen = 8*n;
1626 } else {
1627 n = (127 - 5 - infp)/2;
1628 xlen = 20 + 8*n;
1629 }
1630 pos = (infb - 1) * 512 + (infp + 1) * 4;
1631 if ( blockpos
1632 && !((pos-blockpos) >> 9) /* 0 <= (pos-blockpos) < 512 */
1633 && pos+xlen <= blockpos+ 1 + (0x1ff & ~blockpos)
1634 )
1635 base = block + (pos - blockpos)/sizeof(int);
1636 else {
1637 int blklen = 1 + (0x1ff & ~pos);
1638 assert( xlen <= blklen );
1639 assert( blklen <= (int)sizeof(block) );
1640 assert( 0 == (0x1ff & (pos + blklen)) );
1641 ret = readblk( block, blklen, db->inv[INV_IFP], pos );
1642 if ( ret )
1643 return log_msg( ERR_IO, "\twhen reading IFP" );
1644 blockpos = pos;
1645 base = block;
1646 }
1647 if ( remain ) { /* no header to convert */
1648 remain -= n;
1649 b = base; /* no header */
1650 } else {
1651 assert( (int)sizeof(ifp) >= xstr[LSTR_ILEN] );
1652 *ifp = *xstr;
1653 ret = convert( ifp, (char *)base, xstr );
1654 if ( ret )
1655 return log_msg( ERR_TRASH, "\twhen converting IFP header" );
1656 if ( n > ifp[LIFP_SEGP] )
1657 n = ifp[LIFP_SEGP];
1658 remain = ifp[LIFP_SEGP] - n;
1659 b = base+5; /* after header */
1660 }
1661 LOG_DBG( LOG_VERBOSE,
1662 "key %d.%d '%.*s' blk %d post %d/%d r %d xlen %d at b/p %d.%d=%d",
1663 leaf[LL0X_PS], j, db->tlen[idx], term, blkno,
1664 n, ifp[LIFP_TOTP], remain, xlen, infb, infp, pos );
1665 if ( LOG_DO( LOG_TRACE ) )
1666 LOG_STR( ifp, lstrlib[ LSET_INV ].name[ LSTR_IFP ] );
1667 assert( (size_t)n <= sizeof(merge)/sizeof(merge[0]) );
1668 if ( lp ) {
1669 Key kbf;
1670 Hit hit;
1671 unsigned char tlen = (unsigned char) db->tlen[idx];
1672 while ( tlen && ' ' == term[tlen-1] )
1673 tlen--;
1674 memcpy( kbf.byt, term, kbf.len = tlen );
1675 for ( k=0; k<n; k++ ) { /* callback needs 'em sorted */
1676 int ppos;
1677 unsigned char *c = (unsigned char *)&b[k*2];
1678 LdbP e; /* the entry */
1679 #ifdef LDB_BIG_ENDIAN
1680 memcpy(e.bytes,c,8);
1681 #else
1682 e.bytes[0] = c[7]; e.bytes[1] = c[6];
1683 e.bytes[2] = c[5]; e.bytes[3] = c[4];
1684 e.bytes[4] = c[3]; e.bytes[5] = c[2];
1685 e.bytes[6] = c[1]; e.bytes[7] = c[0];
1686 #endif
1687 ppos = LDBP_POS( &e );
1688 hit.mfn = (unsigned)LDBP_ROW( &e );
1689 hit.tag = (unsigned short)LDBP_TAG( &e );
1690 hit.occ = (unsigned short)(ppos >> 16);
1691 hit.pos = (unsigned short)ppos;
1692 if ( lp->cb( lp->me, &kbf, &hit ) )
1693 goto done;
1694 }
1695 }
1696 if ( post ) for ( k=n; k--; ) {
1697 /* loop backwards (for the fun of it) postings in segment */
1698 int prow, ptag, ppos;
1699 unsigned char *c = (unsigned char *)&b[k*2];
1700 LdbP e; /* the entry */
1701 LdbP samerow; /* highest possible entry w/ same row as e */
1702 #ifdef LDB_BIG_ENDIAN
1703 /* the 8 bytes of a posting are BIG ENDIAN ! */
1704 memcpy(e.bytes,c,8);
1705 #else
1706 e.bytes[0] = c[7]; e.bytes[1] = c[6];
1707 e.bytes[2] = c[5]; e.bytes[3] = c[4];
1708 e.bytes[4] = c[3]; e.bytes[5] = c[2];
1709 e.bytes[6] = c[1]; e.bytes[7] = c[0];
1710 #endif
1711 prow = LDBP_ROW( &e );
1712 ptag = LDBP_TAG( &e );
1713 ppos = LDBP_POS( &e );
1714 LOG_DBG( LOG_VERBOSE, "post %d.%hd pos %06x key '%.*s'",
1715 prow, ptag, ppos, db->tlen[idx], term );
1716 if ( 0 >= ptag /* bad tag */
1717 || !prow || prow >= db->mfc[LMFC_NMFN] /* bad mfn */
1718 )
1719 continue;
1720 if ( ! post
1721 || (post->cut && prow >= post->cut)
1722 || (post->tag && post->tag != ptag)
1723 )
1724 continue;
1725 if ( prow < post->skp ) /* quickly bail out on skip condition */
1726 break;
1727 LDBP_SETROWTOP( &samerow, &e ); /* for mfn comparison */
1728 /* sweep down to postings for the same row as e ... */
1729 while ( f >= 0 && LDBP_GT( p+f, &samerow ) )
1730 f--;
1731 if ( LDB_AND & post->mode ) {
1732 int l;
1733 /* loop postings for same row, mark all (that are near enough) */
1734 LDBP_SETROWBOT( &samerow, &e ); /* for mfn comparison */
1735 /* NOTE: postings for row are GT than bottom even if marked */
1736 for ( l = f; l>=0 && LDBP_GT( p+l, &samerow ); l-- ) {
1737 if ( post->near ) {
1738 int dist;
1739 if ( ptag != LDBP_TAG( p+l ) ) continue;
1740 if ( LDB_NEAR_G != post->near ) {
1741 dist = LDBP_POS( p+l ) - LDBP_POS( &e );
1742 if ( dist < 0 ) dist = -dist;
1743 if ( 0 < post->near
1744 ? post->near < dist
1745 : -post->near != dist /* exact $$$$ */
1746 ) continue;
1747 }
1748 }
1749 LDBP_SETMARK( p+l );
1750 added++;
1751 }
1752 } else { /* OR mode */
1753 int add;
1754 if ( ! post->near ) /* add if row not found: ignore details */
1755 add = 0 > f || prow > LDBP_ROW( p+f );
1756 else { /* add if no exact match */
1757 int l;
1758 /* NOTE: we don't use mark bit in OR mode, do we ? */
1759 for ( l = f; l>=0 && LDBP_GT( p+l, &e ); l-- )
1760 ;
1761 add = 0 > l || LDBP_GT( &e, p+l );
1762 }
1763 if ( add )
1764 merge[ m++ ] = e;
1765 }
1766 } /* for postings in segment */
1767 if ( m ) { /* merge in the merge buffer */
1768 LdbP *mm = merge;
1769 added += m;
1770 for ( k = post->fil += m; m && k--; ) {
1771 LdbP src;
1772 if ( k < m || LDBP_GT( mm, &p[k-m] ) ) {
1773 src = *mm++;
1774 m--;
1775 LOG_DBG( LOG_DEBUG, "merging %d at %d", LDBP_ROW(&src), k );
1776 } else
1777 src = p[k-m];
1778 if ( k < post->len )
1779 p[k] = src;
1780 else { /* set cut */
1781 int row = LDBP_ROW( &src );
1782 if ( row < post->cut || !post->cut )
1783 post->cut = row;
1784 }
1785 }
1786 if ( post->fil > post->len )
1787 post->fil = post->len;
1788 if ( post->cut ) /* postings for cut row are unreliable */
1789 while ( post->fil && post->cut <= LDBP_ROW(p+post->fil-1) )
1790 post->fil--;
1791 }
1792 if ( remain ) { /* advance to start of next block */
1793 infb++;
1794 infp = 0;
1795 } else {
1796 infb = ifp[LIFP_NXTB];
1797 infp = ifp[LIFP_NXTP];
1798 }
1799 } /* for segments */
1800 LOG_DBG( LOG_VERBOSE, "added %d postings for key '%.*s'",
1801 added, db->tlen[idx], term );
1802 } /* for terms in prefix/postings mode */
1803 done:
1804 if ( post /* fixup */
1805 && LDB_AND & post->mode && !(LDB_KEEPMARKS & post->mode)
1806 ) {
1807 int mark = LDB_NOT & post->mode ? 0 : 0x8000;
1808 j=0;
1809 for ( i=0; i<post->fil; i++ )
1810 if ( mark == LDBP_MARK(p+i) ) {
1811 LDBP_CLRMARK(p+i);
1812 p[j++] = p[i];
1813 }
1814 post->fil = j;
1815 }
1816 return ! rec ? 0 : rec->len;
1817 } /* search */
1818
1819
1820 static int ldb_last_path_sep (const char *path) {
1821 char *p2;
1822 int i2;
1823 #ifdef WIN32
1824 char *p3;
1825 int i3;
1826 #endif
1827 if (! path) {
1828 return -1;
1829 }
1830 p2 = strrchr (path, '/');
1831 i2 = p2 ? p2 - path : -1;
1832 #ifdef WIN32
1833 p3 = strrchr (path, '\\');
1834 i3 = p3 ? p3 - path : -1;
1835 if (i3 > i2) {
1836 i2 = i3;
1837 }
1838 #endif
1839 return i2;
1840 } /* ldb_last_path_sep */
1841
1842
1843 static int ldb_open (const char *dbname, Rec *dbpar, Rec *syspar, Fdt *fdt)
1844 {
1845 LDb ndb, *db;
1846 int i, plen, sz, dbid, lck = LIO_TLOCK; /* WLOCK only on special demand */
1847 int ret = 0, invret = -1, lbtret = 0, autoformat = 1, writable = -1;
1848 int uc = -1, gotopt = 0, txtfd = 0, copyidx = 0;
1849 char *autoenc = 0;
1850 char *p, *q;
1851 char buf[65536+1]; /* need 64K buf for copying DO NOT SHRINK !!! */
1852 char path[ PATH_MAX ];
1853
1854 memset( &ndb, 0, sizeof(ndb) );
1855 /* these should be 0 by memsetting to 0 anyway ... */
1856 ndb.path = 0; ndb.mmap = 0;
1857 ndb.flags |= DB_MMAP; /* it mean's: we'll try */
1858
1859 /* loglevel */
1860 if ( 0 <= (i = rInt2(dbpar, syspar, OPENISIS_SLOGV, -1)) )
1861 cLog( i, 0 );
1862
1863 /* prepare name ... */
1864 if (! dbname) {
1865 if (! dbpar)
1866 return log_msg( ERR_FAULT, "ldb_open: dbname not given");
1867 dbname = rString (dbpar, OPENISIS_DNAME, 0, buf, sizeof(buf));
1868 if (! dbname)
1869 return log_msg( ERR_FAULT, "ldb_open: no dbname parameter");
1870 }
1871 plen = strlen (dbname);
1872 if (0 >= plen)
1873 return log_msg( ERR_FAULT, "ldb_open: empty dbname");
1874 if ( 4 < plen ) {
1875 if ( !memcmp( ".mst", dbname+plen-4, 4 ) ) {
1876 uc = 0;
1877 plen -= 4;
1878 } else if ( !memcmp( ".MST", dbname+plen-4, 4 ) ) {
1879 uc = OPEN_UC;
1880 plen -= 4;
1881 }
1882 }
1883 if ( sizeof(buf) <= (unsigned)plen
1884 || sizeof(path) <= (unsigned)(plen + 4 + 1)
1885 )
1886 return log_msg( ERR_FAULT, "ldb_open: dbname too long '%s'", dbname);
1887 if ('/' == dbname[plen - 1]
1888 #ifdef WIN32
1889 || '\\' == dbname[plen - 1]
1890 #endif
1891 )
1892 return log_msg( ERR_FAULT,
1893 "ldb_open: must not specify directory as dbname '%s'", dbname);
1894 if (DBNLEN > plen)
1895 strcpy(ndb.head.name, dbname);
1896 else {
1897 int i1 = 1 + plen - DBNLEN ;
1898 int i2 = ldb_last_path_sep (dbname);
1899 if (0 <= i2 && plen > ++i2 && i2 > i1) {
1900 i1 = i2;
1901 }
1902 strncpy(ndb.head.name, dbname + i1, DBNLEN - 1) [DBNLEN - 1] = 0;
1903 log_msg( LOG_WARN, "ldb_open: truncating dbname '%s' to '%s'",
1904 dbname, ndb.head.name);
1905 }
1906 /* ... and path */
1907 strcpy(path, dbname);
1908 if (! IsAbsPath (path)) {
1909 int plen2;
1910 if ( (dbpar || syspar)
1911 && (p = rString2 (dbpar, syspar, OPENISIS_DPATH, buf, sizeof(buf)))
1912 ) {
1913 plen2 = strlen (p);
1914 if (sizeof(path) <= (unsigned)(plen + plen2 + 4 + 1 + 1))
1915 return log_msg( ERR_FAULT,
1916 "ldb_open: dbname or dbpath too long: %d %d '%s'",
1917 plen, plen2, path);
1918 memmove (path + 1 + plen2, path, 1 + plen);
1919 path[plen2] = '/';
1920 memcpy (path, p, plen2);
1921 plen += 1 + plen2;
1922 }
1923 if ( !IsAbsPath(path)
1924 && syspar
1925 && (p = rString(syspar, OPENISIS_SPATH, 0, buf, sizeof(buf)))
1926 ) {
1927 plen2 = strlen(p);
1928 if (sizeof(path) <= (unsigned)(plen + plen2 + 4 + 1 + 1))
1929 return log_msg( ERR_FAULT,
1930 "ldb_open: dbname or syspath too long: %d %d '%s'",
1931 plen, plen2, path);
1932 memmove(path + 1 + plen2, path, 1 + plen);
1933 path[plen2] = '/';
1934 memcpy(path, p, plen2);
1935 plen += 1 + plen2;
1936 }
1937 } /* name and path */
1938
1939 /* more init AFTER honoring verbosity */
1940 if ( ! init ) {
1941 lstr_auto(0);
1942 init = !0;
1943 }
1944
1945 for ( dbid=0; dbid<dbs_len; dbid++ ) {
1946 if ( dbs[dbid].flags &&
1947 !strcmp( ndb.head.name, dbs[dbid].head.name ) ) {
1948 log_msg( LOG_INFO, "reopening %d '%s'", dbid, ndb.head.name );
1949 return dbid;
1950 }
1951 }
1952 /* go for slot */
1953 if ( dbid == dbs_len )
1954 for ( dbid=0; dbid<dbs_len && dbs[dbid].flags; dbid++ )
1955 ;
1956 if ( dbid == dbs_len )
1957 return -1;
1958 db = &dbs[dbid];
1959 /* got slot */
1960 *db = ndb;
1961 db->head.dbid = dbid;
1962
1963 /* preset record sizes */
1964 db->mfc[0] = *DB_XSTR( db, LSET_MST, LSTR_MFC );
1965 db->xrf[0] = *DB_XSTR( db, LSET_MST, LSTR_XRF );
1966 db->cnt[0][0] =
1967 db->cnt[1][0] = *DB_XSTR( db, LSET_INV, LSTR_CNT );
1968 /* isis-1 index term lengths */
1969 db->tlen[0] = 10;
1970 db->tlen[1] = 30;
1971
1972 /* only the packed little endian ("DOS") format is writable
1973 test later ...
1974 if ( LVAR_PAC != (DB_VARI & db->flags) )
1975 writable = 0;
1976 */
1977
1978 db->path = mDup( path, plen+1 ); /* save path */
1979 memcpy( path+plen, ".???", 5 );
1980
1981 if ( dbpar )
1982 dbpar = rDup(dbpar, 0, 0);
1983 /* check options file and extension case */
1984 if ( 0 <= uc ) /* use case from dbname */
1985 i = lio_open( setext(path,EXT_TXT_OPT,uc), OPEN_RDIF );
1986 else if ( 0 > (i = lio_open( setext(path,EXT_TXT_OPT,uc=0), OPEN_RDIF ))
1987 && 0 > (i = lio_open( setext(path,EXT_TXT_OPT,uc=OPEN_UC), OPEN_RDIF ))
1988 )
1989 uc = autocase( db->path );
1990 if ( 0 < i ) {
1991 if ( 0 < (sz = lio_size(i)) ) {
1992 p = sz < (int)sizeof(buf) ? buf : mAlloc(sz);
1993 if ( (gotopt = (sz == lio_read( &i, p, sz ))) )
1994 rDeser( &dbpar, p, sz, 0 );
1995 log_msg( LOG_INFO, "reading %d bytes options from '%s' %s",
1996 sz, path, gotopt ? "ok" : "nok" );
1997 if ( buf != p )
1998 mFree( p );
1999 }
2000 lio_close( &i, LIO_INOUT );
2001 }
2002 lck |= uc;
2003
2004 if ( (dbpar || syspar) && 0 <= (i = rInt2(dbpar, syspar, OPENISIS_DRO, -1)))
2005 writable = !i; /* explicit 0/1 */
2006
2007 /* open files */
2008 /* trad. index is never openend writable. */
2009 invret = openfiles( db->inv, path, EXT_INV, INV_FILES, uc|OPEN_RDIF );
2010 if (dbpar || syspar) {
2011 char fmtstr[32];
2012 if (rString2 (dbpar, syspar, OPENISIS_DTYPE, fmtstr, sizeof(fmtstr))) {
2013 if (! strcmp ("aligned", fmtstr)) {
2014 db->flags |= LVAR_ALI;
2015 autoformat = 0;
2016 } else if (! strcmp ("naligned", fmtstr))
2017 autoformat = 0;
2018 }
2019 }
2020 if ( autoformat ) {
2021 if ( invret )
2022 log_msg( LOG_WARN, "cannot guess format -- no inverted file" );
2023 else {
2024 unsigned len = lio_size( db->inv[INV_CNT] );
2025 if ( 56L == len ) {
2026 db->flags |= LVAR_ALI;
2027 autoenc = "iso8859-1";
2028 /* writable = 0; we do not write aligned format */
2029 } else if ( 52L == len )
2030 autoenc = "cp850";
2031 else
2032 log_msg( LOG_WARN, "cannot guess format -- bad .cnt len %d", len );
2033 log_msg( LOG_INFO, "using autoformat %saligned for .cnt len %d",
2034 (db->flags & LVAR_ALI) ? "":"un", len );
2035 }
2036 }
2037
2038 /* data */
2039 #ifdef NOTXTDB
2040 if ( !(ret = openfiles( db->mst, path, EXT_MST, MST_FILES,
2041 lck|OPEN_ASIS|LIO_CREAT ))
2042 )
2043 writable = 0;
2044 else if (0 > ret)
2045 #else
2046 if ( 0 <= (ret = openfiles( &txtfd, path, EXT_TXT, 1,
2047 lck|LIO_SYNC|(writable?OPEN_ASIS:OPEN_RDIF) ))
2048 ) { /* .txt exists: use it */
2049 if ( ret )
2050 writable = 1;
2051 else if (1 == writable) {
2052 log_msg( LOG_ERROR, "file '%s' is readonly", path );
2053 goto cleanup;
2054 } else
2055 writable = 0;
2056 } else if (
2057 0 <= (ret = openfiles( db->mst, path, EXT_MST, MST_FILES,
2058 lck|((writable && !(db->flags & LVAR_ALI))?OPEN_ASIS:OPEN_RDIF) ))
2059 && (ret || 1!=writable)
2060 ) { /* trad. files are ok */
2061 if ( !ret )
2062 writable = 0;
2063 } else if ( 1 != (ret = openfiles( &txtfd, path, EXT_TXT, 1,
2064 lck|(ret ? LIO_SYNC : 0)|OPEN_NEW )) ) /* don't sync on autoconv */
2065 #endif
2066 goto cleanup;
2067
2068 /* MW: creation mode? KR: ugo+rw & ~umask */
2069 if ( 1 == (lbtret = openfiles( &db->oxi.fd, path, EXT_LBT, 1,
2070 lck|(writable?OPEN_ASIS:OPEN_RDIF) ))
2071 )
2072 lbtret = 0;
2073 else if ( !writable )
2074 ;/* no problem */
2075 else if ( !lbtret ) { /* exists ro */
2076 log_msg( LOG_ERROR, "file '%s' is readonly", path );
2077 goto cleanup;
2078 } else { /* create and copy to oxi */
2079 if ( 1 != openfiles( &db->oxi.fd, path, EXT_LBT, 1, lck|OPEN_NEW ) )
2080 goto cleanup;
2081 lbtret = 0;
2082 copyidx = 1;
2083 }
2084
2085 if ( db->mst[MST_MST] ) { /* care for the traditionals */
2086 if ( (ret = readlog(
2087 db->mfc, db->mst[MST_MST], 0, db, LSET_MST, LSTR_MFC ))
2088 ) {
2089 /* NEW goto cleanup; */
2090 memset( db->mfc, 0, sizeof(db->mfc) );
2091 db->mfc[LMFC_NMFN] = 1;
2092 db->mfc[LMFC_NMFB] = 1;
2093 db->mfc[LMFC_NMFP] = 64;
2094 db->mflen = 64;
2095 } else {
2096 /*
2097 int lastblock = (db->mflen = lio_size( db->mst[MST_MST] ))/512;
2098 if ( 511 & db->mflen ) lastblock++;
2099 counting from 1
2100 the next record's block should be either the last one we have
2101 or the next one to follow
2102 if ( db->mfc[LMFC_NMFB] != lastblock
2103 && db->mfc[LMFC_NMFB] != lastblock+1
2104 )
2105 log_msg( LOG_VERBOSE, "NMFB mismatch: NMFB %d ~ %d",
2106 db->mfc[LMFC_NMFB], lastblock );
2107 */
2108 /* set LOGICAL mf length */
2109 db->mflen = (db->mfc[LMFC_NMFB]-1)*512 + db->mfc[LMFC_NMFP];
2110 }
2111 db->ptrl = 512;
2112 db->xrlen = lio_size( db->mst[MST_XRF] ) / 512;
2113 if ( (DB_MMAP & db->flags)
2114 && db->xrlen
2115 && db->xrlen*512
2116 == lio_mmap( &db->mst[MST_XRF], (void**)&db->mmap, db->xrlen*512 )
2117 )
2118 db->mmlen = db->xrlen;
2119 }
2120
2121 if ( txtfd ) {
2122 int remake = 0;
2123 /* TODO: make on-demand preparation even faster using buffered IO */
2124 if ( !lio_size(txtfd) ) {
2125 const char newline = LF;
2126 if ( gotopt
2127 && 0 < (i = lio_open( setext(path,EXT_TXT_OPT,uc), LIO_RD ))
2128 ) { /* copy the options file */
2129 log_msg( LOG_INFO, "copying %d bytes options", lio_size(i) );
2130 while ( 0 < (sz = lio_read( &i, buf, sizeof(buf)-1 )) )
2131 lio_write( &txtfd, buf, sz );
2132 if ( LIO_INOUT & i ) { /* is supposed to autoclose */
2133 log_msg( LOG_WARN, "tss tss tss ..." );
2134 lio_close( &i, LIO_INOUT );
2135 }
2136 }
2137 lio_write( &txtfd, &newline, 1 );
2138 }
2139
2140 if ( db->mst[MST_MST] ) { /* copy to new empty txt */
2141 int end = db->mfc[LMFC_NMFN];
2142 /*
2143 max recsize for traditionals is 32K.
2144 field values may double, if consisting entirely of newlines.
2145 rec->used may be more than 32K, since we 12 bytes per field.
2146 However, we know there are only sign+5digits+tab+newline used per tag,
2147 fitting within 2* the original 6 bytes per field.
2148 */
2149
2150 log_msg( LOG_INFO, "copying traditional data" );
2151 db->flags |= DB_OPEN; /* pretend */
2152 for ( i=1; i<end; i++ ) {
2153 Rec *r = dRead( dbid, i );
2154 if ( !r )
2155 sz = 1;
2156 else if ( (int)sizeof(buf) <= (sz = rSerB( buf, r )) ) {
2157 log_msg( ERR_IDIOT, "serialized %d bytes" );
2158 exit(42);
2159 }
2160 lio_write( &txtfd, buf, sz );
2161 }
2162 db->flags &= ~DB_OPEN; /* pret end */
2163 remake = 1;
2164 if ( db->mmap )
2165 lio_mmap( 0, (void**)&db->mmap, db->mmlen*512 );
2166 db->mmlen = 0;
2167 closefiles( db->mst, MST_FILES );
2168 } /* copying */
2169 db->mst[MST_MST] = txtfd;
2170 db->mflen = lio_size( db->mst[MST_MST] );
2171
2172 db->ptr = 0x0134; /* should be config opt */
2173 if ( !remake ) { /* other reasons why we should remake */
2174 unsigned short ptr;
2175 unsigned isix = GETINT(ISIX);
2176 unsigned magic;
2177
2178 remake = 1;
2179 if ( 0 > (db->mst[MST_XRF] = lio_open( setext(path,EXT_TXT_PTR,uc),
2180 LIO_SEEK|(writable?LIO_RDWR:LIO_RD) ))
2181 )
2182 log_msg( LOG_INFO, "'%s' not found", path );
2183 else if ( 6 != lio_read(&db->mst[MST_XRF],buf,6) )
2184 log_msg( LOG_WARN, "'%s' too short", path );
2185 else if ( isix != (magic = GETINT(buf)) ) /* FOO! */
2186 log_msg( LOG_WARN, "'%s' has black magic 0x%08x", path, magic );
2187 /* TODO: save that foo if it doesn't read ISIX ? */
2188 else if ( 0xf000 & (ptr = GETSHORT(buf+4)) ) /* bad endianess */
2189 log_msg( LOG_WARN, "'%s' has bad endianess type 0x%04x", path, ptr );
2190 else if ( (db->ptr && db->ptr != ptr) ) /* other type configured */
2191 log_msg( LOG_WARN, "'%s' type 0x%04x != cfg 0x%04x", path, ptr, db->ptr );
2192 else if ( lio_time(db->mst[MST_XRF]) < lio_time(db->mst[MST_MST]) )
2193 log_msg( LOG_WARN, "'%s' older than data", path );
2194 else {
2195 db->ptr = ptr;
2196 remake = 0;
2197 }
2198 }
2199 if ( ! db->ptr ) {
2200 db->ptr = 0x0134; /* m*256 + l*16 + k, doc/Serialized */
2201 /* BTW: 0x34 is ASCII digit '4', so it's ISIX4^A on little endian */
2202 db->ptrl = 8;
2203 } else { /* fix unsupported type */
2204 unsigned m = 0xf&(db->ptr>>8);
2205 unsigned l = 0xf&(db->ptr>>4);
2206 unsigned k = 0xf&db->ptr;
2207 int mod = 0;
2208 if ( m > 4 ) { m = 4; mod = 1; }
2209 if ( l > 4 ) { l = 4; mod = 1; }
2210 if ( k > 4 ) { k = 4; mod = 1; } /* TODO: allow 8 with large files */
2211 /* total ptr bytes = sum(nibbles) <= 45, but won't use more than 8+4+4 */
2212 if ( mod ) {
2213 log_msg( LOG_WARN, "fixing unsupported ptr type 0x%04x", db->ptr );
2214 db->ptr = (unsigned short)(m<<8 | l<<4 | k);
2215 remake = 1;
2216 }
2217 db->ptrl = k+l+m;
2218 }
2219 if ( remake ) {
2220 Ptr pt;
2221 unsigned base = 0; /* of current block */
2222 unsigned pos = 0; /* of last record */
2223 unsigned fld = 0; /* of last record */
2224 unsigned nmfn = 0; /* next mfn = maxmfn+1 */
2225 unsigned xmfn = 0; /* explicitly given */
2226 char op = 0;
2227 int more; /* buf not empty flag */
2228 char *last; /* of current block */
2229
2230 lio_close( &db->mst[MST_XRF], LIO_INOUT );
2231 if ( 0 > (db->mst[MST_XRF] = lio_open(
2232 setext(path,EXT_TXT_PTR,uc), OPEN_BLANK ))
2233 )
2234 goto cleanup;
2235 /* write signature */
2236 memcpy( pt.r, "ISIX", 4 );
2237 memcpy( pt.r+4, &db->ptr, 2 );
2238 memcpy( pt.r+6, ":)", 2 );
2239 if ( 8 < db->ptrl )
2240 memset( pt.r+8, ')', db->ptrl - 8 );
2241 lio_pwrite( &db->mst[MST_XRF], pt.r, db->ptrl, 0 );
2242 /* loop the masterfile */
2243 lio_seek( &db->mst[MST_MST], 0 );
2244 last = (p = buf) + lio_read( &db->mst[MST_MST], buf, 8192 ) - 1;
2245 more = last > buf; /* one byte is no byte ;) */
2246 if ( more && LF == *p ) { /* no options: no \n\n */
2247 nmfn = pos = 1;
2248 p++;
2249 }
2250 for (;;) { /* records */
2251 unsigned len, mfn;
2252 for (;;) { /* lines and stuff to end of record */
2253 if ( p < last ) { /* have one lookahead */
2254 if ( LF != *p++ )
2255 continue; /* the tight loop ... or use memchr ? */
2256 if ( LF != *p ) { /* now p <= last */
2257 if ( fld || !(0xc0 & *p) ) { /* < '@', 'A', ... */
2258 if ( TAB != *p ) /* no continuation */
2259 fld++;
2260 continue;
2261 }
2262 fld++; /* count field, unless we really recognize a opline */
2263 if ( 'Z' < *p )
2264 continue;
2265 /* now we have '@'...'Z' at start of 1st line */
2266 sz = last - p; /* avail after p */
2267 if ( sz && TAB != p[1] ) /* no opline */
2268 continue;
2269 switch (*p) {
2270 case 'D':
2271 case 'I':
2272 case 'W':
2273 break; /* give it a try */
2274 default:
2275 log_msg( LOG_WARN, "unknown opline %c at mfn %d", *p, nmfn );
2276 continue;
2277 }
2278 if ( sz > 127 ) /* longer -> no opline */
2279 sz = 127;
2280 if ( ! sz || ! (q = memchr(p+1, LF, sz)) ) {
2281 if ( sz >= 127 || ! more )
2282 continue; /* too long or undelimited last */
2283 p--; /* back to \n, so we come here again */
2284 goto gimmemore;
2285 }
2286 if ( q < p+3 || p[2] < '0' || '9' < p[2] )
2287 continue;
2288 /* TODO:
2289 take a closer look at whether the whole line makes sense
2290 */
2291 if ( op ) { /* yeah, two metas in sequence! weird stuff! */
2292 p--; /* step back to newline */
2293 pos = base+(p-buf); /* fake pos as if we had no line at all */
2294 break; /* go handle the PREVIOUS opline */
2295 }
2296 op = *p;
2297 xmfn = a2i( p+2, q-p-2 );
2298 fld--; /* uncount this line */
2299 pos = base + (q-buf) + 1; /* start after q */
2300 continue;
2301 }
2302 break;
2303 }
2304 gimmemore:
2305 LOG_DBG( LOG_DEBUG, "MORE %d at pos %d base %d p +%d last +%d",
2306 more, pos, base, p-buf, last-buf );
2307 if ( !more )
2308 goto schicht; /* german: done */
2309 base += p - buf; /* shift out bytes before p */
2310 len = last-p; /* bytes to keep after p; < 128 */
2311 if ( len ) /* we're probing for more lookahead */
2312 memmove( buf, p, 1+last-p );
2313 else /* typically */
2314 *buf = *p; /* but save the last dance */
2315 p = buf;
2316 last = buf + len;
2317 /* reload */
2318 if ( 0 < (sz = lio_read( &db->mst[MST_MST], buf+1+len, 8192 )) ) {
2319 last += sz;
2320 continue;
2321 }
2322 more = 0; /* but yet, finish this up */
2323 /* since *buf = *last was the files last character,
2324 we'd expect a newline
2325 */
2326 if ( last == p )
2327 p = buf+(LF==*buf ? 1 : 2); /* pretend buf started \n */
2328 if ( ! len )
2329 break;
2330 /* else try again opline */
2331 } /* lines and stuff */
2332 /* now p is on a delimiting blank lines \n -- or such ... */
2333 len = base + (p-buf) - pos; /* >= 0 */
2334 mfn = xmfn ? xmfn : nmfn;
2335 log_msg( LOG_INFO, "ptr %c %d(%d/%d) pos %d len %d",
2336 op?op:'>', mfn, xmfn, nmfn, pos, len );
2337 if ( base + (p-buf) < pos ) /* FOO !!! */
2338 len = 0;
2339 if ( len ) /* could have been completely empty */
2340 len--; /* mute last \n */
2341 if ( 'D' == op && len ) /* FOO !!! */
2342 len = 0;
2343 if ( mfn && (len || op) )
2344 lio_pwrite( &db->mst[MST_XRF],
2345 mkptr( pt.r, db, pos, len, fld), db->ptrl, mfn*db->ptrl );
2346 pos = base + (p-buf) + 1; /* next starts after p */
2347 if ( 'D' != op ) { /* 'D'elete does not lead to implicit reuse */
2348 if ( op && nmfn < xmfn )
2349 nmfn = xmfn;
2350 nmfn++; /* continue after this */
2351 }
2352 xmfn = fld = op = 0;
2353 }
2354 schicht: ;
2355 } /* remake */
2356 db->mfc[LMFC_NMFN] =
2357 db->xrlen = lio_size( db->mst[MST_XRF] ) / db->ptrl;
2358 if ( (DB_MMAP & db->flags)
2359 && db->xrlen
2360 && db->xrlen*db->ptrl
2361 == lio_mmap( &db->mst[MST_XRF], (void**)&db->mmap, db->xrlen*db->ptrl )
2362 )
2363 db->mmlen = db->xrlen;
2364 log_msg( LOG_INFO, "mapped %d*%d = %d",
2365 db->xrlen, db->ptrl, db->xrlen*db->ptrl );
2366 db->flags |= DB_TXTOPEN;
2367 db->flags &= ~DB_VARI; /* clear alignment and such */
2368 } /* if ( txtfd ) */
2369
2370 /* supporting files, ctables */
2371 p = buf;
2372 if ( 0 >= (sz = lio_slurp( &p, sizeof(buf), setext(path,EXT_SUP_ACT,uc), 1 ))
2373 || lcs_mktab( db->ctab+LCS_CTYPE, p, sz, LCS_A )
2374 )
2375 memcpy( db->ctab+LCS_CTYPE, lcs_latin1_ct, sizeof(db->ctab[0]) );
2376 if ( 0 >= (sz = lio_slurp( &p, sizeof(buf), setext(path,EXT_SUP_UCT,uc), 1 ))
2377 || lcs_mktab( db->ctab+LCS_UCASE, p, sz, 0 )
2378 )
2379 memcpy( db->ctab+LCS_UCASE, lcs_latin1_uc, sizeof(db->ctab[0]) );
2380 /* fill header */
2381
2382 if (! fdt) {
2383 if ( (p = rString (dbpar, OPENISIS_DFDT, 0, buf, sizeof(buf))) ) {
2384 Rec *recfdt = 0;
2385 Db *dbfdt = nDbByName (openisis_stub0, p);
2386 if ( dbfdt)
2387 recfdt = dRead (dbfdt->dbid, 1);
2388 else {
2389 int idfdt = ldb_open (p, 0, syspar, 0);
2390 if (0 <= idfdt) {
2391 recfdt = dRead (idfdt, 1); /*MMM*/
2392 cDClose (idfdt);
2393 }
2394 }
2395 if (recfdt)
2396 fdt = fRec2Fdt (recfdt);
2397 } else if ( gotopt )
2398 fdt = fRec2Fdt(dbpar);
2399 if (! fdt)
2400 fdt = fFromFile (path);
2401 }
2402 db->head.fdt = fdt;
2403 if (fdt)
2404 log_msg( LOG_INFO, "have %d fdt entries for %s",
2405 fdt->len, db->head.name);
2406 else
2407 log_msg( LOG_INFO, "have no fdt for %s", db->head.name);
2408
2409 db->head.tms = timeUpd(0); /* what watch? */
2410 log_msg( LOG_INFO, "tms %d for %s", db->head.tms, db->head.name);
2411
2412
2413 /* set path and name */
2414 if (0 <= (i = ldb_last_path_sep (db->path))) {
2415 if (i)
2416 strncpy(path, db->path, i)[i] = 0;
2417 else
2418 strcpy (path, "/");
2419 dbpar = rSet (dbpar, RCHG | RDIS, OPENISIS_DPATH, path, 0);
2420 }
2421 dbpar = rSet (dbpar, RCHG | RDIS, OPENISIS_DNAME, db->head.name, 0);
2422
2423 /* set encoding */
2424 if (!(p = rString (dbpar, OPENISIS_DENC, 0, buf, sizeof(buf))))
2425 if ( (syspar
2426 && (p = rString (syspar, OPENISIS_DENC, 0, buf, sizeof(buf))))
2427 || (p = autoenc)
2428 )
2429 dbpar = rSet(dbpar, RDIS, OPENISIS_DENC, p, 0);
2430 if ( p )
2431 log_msg( LOG_INFO, "using encoding %s for %s", p, db->head.name);
2432
2433 db->head.cfg = dbpar;
2434
2435 /* done */
2436 db->flags |= DB_OPEN;
2437
2438 if ( writable && LVAR_PAC == (DB_VARI & db->flags) )
2439 db->flags |= DB_WRITABLE;
2440
2441 /*
2442 if ( (dbpar || syspar) && 0 < rInt2(dbpar, syspar, OPENISIS_DDUMP, -1) ) {
2443 int off = 0;
2444 int *r;
2445 do {
2446 if ( (r = ldb_readRecAtOff(dbid,off,&off)) )
2447 mFree( r );
2448 } while ( 0 < off );
2449 exit(0);
2450 }
2451 */
2452
2453 /* init oxi */
2454 if ( writable )
2455 db->oxi.flg |= LBT_WRITE;
2456 if ( (p = getenv("OXITYP")) && 0 < (i = atoi(p)) && 4 > i )
2457 db->oxi.typ = i << 4;
2458 if ( !lbtret && !lbt_init( &db->oxi ) )
2459 db->flags |= DB_LBTOPEN;
2460
2461 if ( ! invret
2462 && ! (ret = readlog( db->cnt[0], db->inv[INV_CNT],
2463 0, db, LSET_INV, LSTR_CNT ))
2464 && ! (ret = readlog( db->cnt[1], db->inv[INV_CNT],
2465 -1, db, LSET_INV, LSTR_CNT ))
2466 ) {
2467 if ( lbtret )
2468 db->flags |= DB_INVOPEN;
2469 else {
2470 if ( copyidx ) {
2471 DXLoop l;
2472 log_msg( LOG_INFO, "copying traditional index" );
2473 lbtret = 0;
2474 memset( &l, 0, sizeof(l) );
2475 l.me = & db->oxi;
2476 l.cb = (DXCb*)cXAdd;
2477 lbt_batch( & db->oxi, 5 );
2478 search( db, 0, 0, 0, &l );
2479 cXAdd( & db->oxi, 0, 0 );
2480 }
2481 closefiles( db->inv, INV_FILES );
2482 }
2483 }
2484
2485 return dbid;
2486
2487 cleanup:
2488 /* cleanup ... */
2489 db->flags = 0;
2490 closefiles( &db->oxi.fd, 1 );
2491 closefiles( &txtfd, 1 );
2492 closefiles( db->inv, INV_FILES );
2493 closefiles( db->mst, MST_FILES );
2494 return 0 > ret ? ret : ret ? -ret : -1;
2495 } /* ldb_open */
2496
2497
2498 /* ************************************************************
2499 package data
2500 */
2501
2502
2503
2504 /* ************************************************************
2505 package functions
2506 */
2507
2508 int *ldb_readRecAtOff ( int dbid, lxref off, int *nxtoff )
2509 {
2510 int *rec;
2511 LDb *db = getDb( dbid );
2512 if ( ! db ) {
2513 log_msg( LOG_ERROR, "\tat ldb_readRecAtOff" );
2514 return 0;
2515 }
2516 if ( 0 == off )
2517 off = 64;
2518 rec = getMfr( db, off, nxtoff );
2519 if ( ! rec )
2520 return 0;
2521 LOG_DBG( LOG_VERBOSE, "db %d off %d: got %hd bytes",
2522 dbid, off, !rec ? -1 : rec[LMFR_RECL] );
2523 return rec;
2524 } /* ldb_readRecAtOff */
2525
2526
2527
2528 int ldb_search ( int dbid, const char *key, LdbPost *post, Rec *rec )
2529 {
2530 LDb *db = getDb( dbid );
2531 Key k;
2532
2533 if ( ! db )
2534 return -ERR_BADF;
2535 if ( post ) { /* prepare for postings */
2536 if ( ! post->len )
2537 post->len = sizeof(post->p)/sizeof(post->p[0]); /* standard length */
2538 if ( LDB_NOT & post->mode )
2539 post->mode |= LDB_AND;
2540 }
2541 if ( DB_INVOPEN & db->flags )
2542 return search( db, key, post, rec, 0 );
2543 if ( !(DB_LBTOPEN & db->flags) )
2544 return -ERR_BADF;
2545 if ( db->oxi.bat )
2546 return -ERR_BUSY;
2547 memset( &k, 0, sizeof(k) );
2548 if ( ! key ) {
2549 k.byt[0] = '$';
2550 k.len = 1;
2551 } else {
2552 unsigned char *uk = (unsigned char*)key;
2553 int l = strlen( key );
2554 if ( l > 255 )
2555 l = 255;
2556 k.len = (unsigned char)l;
2557 while ( l-- )
2558 k.byt[l] = db->ctab[LCS_UCASE].c[ uk[l] ];
2559 }
2560 return lbt_search( &db->oxi, &k, post, rec );
2561 } /* ldb_search */
2562
2563
2564 int ldb_p2s ( Set *set, LdbPost *post )
2565 {
2566 int *s = set->id;
2567 int last=0, max = set->len;
2568 int i;
2569 set->len = 0;
2570 if ( ! max )
2571 max = OPENISIS_SETLEN;
2572 max--;
2573 if ( !post->fil )
2574 return 0L;
2575 s[0] = LDBP_ROW(post->p);
2576 for ( i=1; i<post->fil && last < max; i++ ) {
2577 int row = LDBP_ROW(post->p+i);
2578 if ( s[last] != row )
2579 s[++last] = row;
2580 }
2581 return set->len = last+1;
2582 } /* ldb_p2s */
2583
2584
2585 #if 0
2586 LcsTab *ldb_tabs( int dbid )
2587 {
2588 LDb *db = getDb( dbid );
2589 return ! db ? 0 : db->ctab;
2590 } /* ldb_tabs */
2591 #endif
2592
2593
2594 Db *ldb_getdb (int dbid) {
2595 LDb *db = getDb (dbid);
2596 return db ? &db->head : 0;
2597 }
2598
2599 /* ************************************************************
2600 public functions
2601 */
2602 int dMaxId ( int dbid )
2603 {
2604 LDb *db = getDb( dbid );
2605 if ( ! db )
2606 return -ERR_BADF;
2607 return db->mfc[LMFC_NMFN] - 1;
2608 } /* dMaxId */
2609
2610
2611 Raw *dRaw ( int dbid, int rowid )
2612 {
2613 int off;
2614 int *rec = 0;
2615 LDb *db;
2616
2617 if ( LIO_LOCK() ) return 0;
2618 db = getDb( dbid );
2619 if ( ! db ) {
2620 log_msg( LOG_ERROR, "\tat openIsisReadRaw %d", rowid );
2621 goto done;
2622 }
2623 off = getOff( db, rowid, 0 );
2624 log_msg( LOG_INFO, "found xref 0x%08x for %d", off, rowid );
2625 if ( 0 >= off ) {
2626 log_msg( LOG_INFO, "found deleted xref 0x%08x for %d", off, rowid );
2627 goto done;
2628 }
2629 rec = getMfr( db, off, 0 );
2630 if ( ! rec ) {
2631 log_msg( LOG_WARN, "\tno record at %d rowid %d", off, rowid );
2632 goto done;
2633 }
2634 LOG_DBG( LOG_VERBOSE, "db %d row %d: got %hd bytes",
2635 dbid, rowid, !rec ? -1 : rec[LMFR_RECL] );
2636 if ( rec[LMFR_MFN] != rowid ) {
2637 log_msg( LOG_ERROR, "got mfn %d expected %d", rec[LMFR_MFN], rowid );
2638 mFree( rec );
2639 rec = 0;
2640 goto done;
2641 }
2642 done:
2643 (void)LIO_RELE();
2644 return (Raw*)rec;
2645 } /* dRaw */
2646
2647
2648 Rec *dRead ( int dbid, int rowid )
2649 {
2650 LDb *db = getDb( dbid );
2651 Rec *r;
2652 if ( DB_TXTOPEN & db->flags )
2653 return dText( db, rowid, 0 );
2654 if ( (r = (Rec *) dRaw( dbid, rowid )) ) {
2655 char * base = (char*)r;
2656 Field *f = r->field;
2657 int i = r->len;
2658 for ( ; i--; f++ )
2659 f->val = base + (int)f->val;
2660 assert( RECOK( r ) );
2661 }
2662 return r;
2663 } /* dRead */
2664
2665
2666 int dWritex ( int dbid, Rec *rec, Rec *idx )
2667 {
2668 LDb *db = getDb( dbid );
2669 int ret = 0;
2670
2671 if ( ! db )
2672 return -ERR_BADF;
2673 if ( !(DB_WRITABLE & db->flags) )
2674 return log_msg( ERR_INVAL, "db %d not writable", dbid );
2675 if ( rec && (ret =
2676 DB_TXTOPEN & db->flags ? pText( db, rec, 0 ) : putRec( db, rec )
2677 ) )
2678 return ret;
2679 if ( idx ) {
2680 const unsigned char *const uc = db->ctab[LCS_UCASE].c;
2681 int delmode = 0;
2682 int tag = -1;
2683 int mode = 'f'; /* 'w', 's' */
2684 int occ = 0;
2685 int pos = 0;
2686 int cut = 30;
2687 int mfn = rec ? rec->rowid : 0;
2688 Hit h;
2689 Key k;
2690 Field *f = idx->field, *last = f + idx->len - 1;
2691
2692 for ( ; f <= last; f++ ) {
2693 const char *val = f->val;
2694 int len = f->len;
2695 int del = delmode;
2696
2697 k.val.len = 0;
2698 switch ( f->tag ) {
2699 case XCTL: { /* index cmd [opt] */
2700 const char *cmd = val, *e = val + len;
2701 int cmdlen, opt = 0, haveopt;
2702 while ( val < e && 64 < *val ) /* eat ASCII letters */
2703 val++;
2704 cmdlen = val - cmd;
2705 if ( val < e && (TAB == *val || ' ' == *val) )
2706 val++;
2707 haveopt = val < e && a2il( val, e-val, &opt );
2708 if ( ! cmdlen ) {
2709 cut = haveopt ? opt : 30;
2710 continue;
2711 }
2712 switch (*cmd) {
2713 case 'f': /* fields */
2714 mode = 'f';
2715 occ = opt;
2716 pos = 0;
2717 continue;
2718 case 'w': /* words */
2719 mode = 'w';
2720 pos = opt;
2721 continue;
2722 case 's': /* split */
2723 mode = 's';
2724 pos = opt;
2725 continue;
2726 case 'a': /* add */
2727 delmode = 0;
2728 occ = pos = 0;
2729 continue;
2730 case 'd': /* del */
2731 delmode = 1;
2732 occ = pos = 0;
2733 continue;
2734 case 'm': /* mfn */
2735 mfn = opt;
2736 occ = pos = 0;
2737 continue;
2738 }
2739 return log_msg( ERR_INVAL, "bad index control '%.*s'", cmdlen, cmd );
2740 }
2741 case XHIT: {
2742 int i = 0, v[5], *pv = v;
2743 const char *e = val + len;
2744 if ( len )
2745 switch (*val) {
2746 case '+': del = 0; val++; break;
2747 case '-': del = 1; val++; break;
2748 }
2749 for ( ; val < e && i<5; i++ ) {
2750 int dig = a2il( val, e-val, v+i );
2751 val += dig;
2752 if ( val >= e || TAB == *val )
2753 break;
2754 if ( '.' != *val )
2755 return log_msg( ERR_INVAL,
2756 "bad HIT '%.*s' after %d", e-val, val, v[i] );
2757 val++;
2758 }
2759 h.dbn = 0;
2760 h.mfn = mfn;
2761 h.pos = pos;
2762 h.occ = occ;
2763 h.tag = tag;
2764 switch ( i ) {
2765 case 5: h.dbn = (unsigned short)*pv++;
2766 case 4: h.mfn = (unsigned)*pv++;
2767 case 3: h.pos = (unsigned short)pv[2];
2768 case 2: h.occ = (unsigned short)pv[1];
2769 case 1: h.tag = (unsigned short)pv[0];
2770 /* case 0: ! f->len */
2771 }
2772 if ( val < e && TAB == *val )
2773 val++;
2774 len = e - val;
2775 } break; /* case XHIT */
2776 case XFST:
2777 return log_msg( ERR_IDIOT, "sorry, XFST not implemented" );
2778 #if 0
2779 case XADD: /* binary key */
2780 /* if ( f->len < db->oxi.vsz )
2781 memset( k.val.byt, 0, db->oxi.vsz - f->len );
2782 */
2783 memcpy( k.val.byt
2784 + (f->len < (int)db->oxi.vsz ? (int)db->oxi.vsz - f->len : 0),
2785 f->val, f->len > (int)db->oxi.vsz ? (int)db->oxi.vsz : f->len );
2786 k.val.len = db->oxi.vsz;
2787 break;
2788 #endif
2789 default:
2790 if ( 0 > f->tag )
2791 return log_msg( ERR_INVAL, "bad index control tag %d", f->tag );
2792 switch ( mode ) { /* check for tag change */
2793 case 'f':
2794 if ( tag == f->tag )
2795 occ++;
2796 else
2797 occ = 0;
2798 break;
2799 case 'w':
2800 if ( tag == f->tag )
2801 pos++;
2802 else
2803 pos = 0;
2804 break;
2805 }
2806 tag = f->tag;
2807 h.dbn = 0;
2808 h.mfn = mfn;
2809 h.pos = pos;
2810 h.occ = occ;
2811 h.tag = f->tag;
2812 }
2813 if ( ! k.val.len ) { /* not ADD/DEL: use hit, val */
2814 unsigned char *dst = k.byt;
2815 const unsigned char *src = (const unsigned char *)val;
2816 if ( cut < len )
2817 len = cut;
2818 k.len = (unsigned char)len;
2819 while ( len-- )
2820 *dst++ = uc[ *src++ ];
2821 cXMkVal( &db->oxi, &k.val, &h );
2822 LOG_DBG( LOG_DEBUG, "#%d %c key '%.*s' hit %d.%d.%d.%d.%d",
2823 f - idx->field, del ? '-' : '+', k.len, k.byt,
2824 h.dbn, h.mfn, h.tag, h.occ, h.pos );
2825 }
2826 ret = del ? lbt_del( &db->oxi, &k ) : lbt_add( &db->oxi, &k );
2827 }
2828 }
2829 return ret;
2830 } /* dWritex */
2831
2832
2833 int dWrite ( int dbid, Rec *rec )
2834 {
2835 /* TODO: use FST lines as idx */
2836 return dWritex( dbid, rec, 0 );
2837 } /* dWrite */
2838
2839
2840 Rec* dTerm ( Rec *rec, int dbid, const char *key )
2841 {
2842 return 0 > ldb_search( dbid, key, 0, rec ) ? 0 : rec;
2843 } /* dTerm */
2844
2845
2846 int dXLoop ( int dbid, DXLoop *l )
2847 {
2848 LDb *db = getDb( dbid );
2849
2850 if ( !db )
2851 return -ERR_BADF;
2852 if ( OPENISIS_IDXTRAD & l->flg ) {
2853 if ( !(db->flags & DB_INVOPEN) )
2854 return -ERR_BUSY;
2855 return search( db, 0, 0, 0, l );
2856 }
2857 if ( !(db->flags & DB_LBTOPEN) || db->oxi.bat )
2858 return -ERR_BUSY;
2859 return lbt_loop( & db->oxi, l );
2860 } /* dXLoop */
2861
2862
2863
2864 int cInit ( int argc, const char **argv, CLockFunc lockfunc )
2865 {
2866 (void)argc; (void)argv;
2867 cOpen( 0 );
2868 if ( lockfunc )
2869 lio_lock = lockfunc;
2870 return 0;
2871 }
2872
2873
2874 Db* cDOpen (const char *dbname, Rec *dbpar, Rec *syspar, Fdt *fdt) {
2875 int dbid;
2876 if ( ! init )
2877 cOpen( 0 );
2878 dbid = ldb_open (dbname, dbpar, syspar, fdt);
2879 if (0 <= dbid) {
2880 return &dbs[dbid].head;
2881 }
2882 return 0;
2883 }
2884
2885 int cDOpenv ( const char *dbname, const char **argv, int argc )
2886 {
2887 Rec *dbpar = 0;
2888 int rt;
2889 if ( ! init )
2890 cOpen( 0 );
2891 if (argc) {
2892 dbpar = rSet (0, RARGV | RFDT | RNOC | RIGN | argc,
2893 openIsisFdtDbpar, argv);
2894 }
2895 rt = ldb_open (dbname, dbpar, 0, 0);
2896 if (dbpar) {
2897 mFree (dbpar);
2898 }
2899 return rt;
2900 }
2901
2902
2903 int cDClose ( int dbid )
2904 {
2905 LDb *db = getDb( dbid );
2906 if ( ! db )
2907 return -ERR_BADF;
2908 if ( LIO_LOCK() ) return -ERR_BUSY;
2909 if ( DB_MODIFIED == ((DB_MODIFIED|DB_TXTOPEN) & db->flags) ) {
2910 /* write back the MF control */
2911 Mfc mfc;
2912 /* if ( 498 < (db->mflen & 511) ) db->mflen = ~511 & (db->mflen + 14); */
2913 mfc.ctlm = rvi( db->mfc[LMFC_CTLM] );
2914 mfc.nmfn = rvi( db->mfc[LMFC_NMFN] );
2915 mfc.nmfb = rvi( 1 + (db->mflen >> 9) );
2916 mfc.nmfp = rvs( 511 & db->mflen );
2917 mfc.type = rvs( db->mfc[LMFC_TYPE] );
2918 mfc.rcnt = rvi( db->mfc[LMFC_RCNT] );
2919 mfc.mfx1 = rvi( db->mfc[LMFC_MFX1] );
2920 mfc.mfx2 = rvi( db->mfc[LMFC_MFX2] );
2921 mfc.mfx3 = rvi( db->mfc[LMFC_MFX3] );
2922 if ( sizeof(mfc) != lio_pwrite( &db->mst[MST_MST], &mfc, sizeof(mfc), 0) )
2923 log_msg( ERR_TRASH, "could not write MST header" );
2924 }
2925 if ( db->mmap ) {
2926 if ( (DB_MODIFIED|DB_TXTOPEN) == ((DB_MODIFIED|DB_TXTOPEN) & db->flags) )
2927 memcpy( db->mmap, ISIX, 4 ); /* force newer mtime on proper close */
2928 lio_mmap( 0, (void**)&db->mmap, db->mmlen*db->ptrl );
2929 }
2930 db->mmlen = 0;
2931 closefiles( db->mst, MST_FILES );
2932 if ( DB_INVOPEN & db->flags )
2933 closefiles( db->inv, INV_FILES );
2934 if ( DB_LBTOPEN & db->flags )
2935 lbt_close( &db->oxi );
2936 db->flags = 0L;
2937 if ( db->path ) mFree( (char*)db->path );
2938 if (db->head.cfg) mFree (db->head.cfg);
2939 if (db->head.fdt) fFree (db->head.fdt);
2940 memset( db, 0, sizeof(db) );
2941 (void)LIO_RELE();
2942 return 0;
2943 } /* cDClose */
2944
2945
2946 int cDCheck ( int dbid, int flags )
2947 {
2948 static char dot = '.';
2949 int *r;
2950 LDb *db = getDb( dbid );
2951 int nxtoff = 64, off;
2952
2953 if ( ! db )
2954 return -ERR_BADF;
2955 (void)flags;
2956 do {
2957 lio_write( &lio_out, &dot, 1 );
2958 if ( (r = ldb_readRecAtOff(dbid,off=nxtoff,&nxtoff)) ) {
2959 int o = getOff( db, r[LMFR_MFN], 0 );
2960 if ( o != off ) {
2961 log_msg( LOG_WARN, "mfn %d xrf %d != real %d\n",
2962 r[LMFR_MFN], o, off );
2963 }
2964 mFree( r );
2965 }
2966 } while ( 0 < nxtoff );
2967 return 0;
2968 } /* cDCheck */
2969
2970
2971 OpenIsisIdx *cXOpen ( int dbid, int mode )
2972 {
2973 LDb *db = getDb( dbid );
2974 if ( !db
2975 || !(db->flags & DB_LBTOPEN)
2976 || !(db->oxi.flg & LBT_WRITE) /* may be writable if db is not */
2977 || db->oxi.bat
2978 /*
2979 preliminary undocumented feature:
2980 mode -1 gives direct access in non-batch mode
2981 */
2982 || (0 <= mode && lbt_batch( & db->oxi, (unsigned char)mode ))
2983 )
2984 return 0;
2985 return & db->oxi;
2986 } /* cXOpen */

  ViewVC Help
Powered by ViewVC 1.1.26