/[webpac]/trunk2/openisis/ldb.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/openisis/ldb.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 239 - (hide annotations)
Mon Mar 8 17:49:13 2004 UTC (20 years ago) by dpavlin
Original Path: trunk/openisis/ldb.c
File MIME type: text/plain
File size: 80336 byte(s)
including openisis 0.9.0 into webpac tree

1 dpavlin 237 /*
2     openisis - an open implementation of the CDS/ISIS database
3     Version 0.8.x (patchlevel see file Version)
4     Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5    
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Lesser General Public
8     License as published by the Free Software Foundation; either
9     version 2.1 of the License, or (at your option) any later version.
10    
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14     Lesser General Public License for more details.
15    
16     You should have received a copy of the GNU Lesser General Public
17     License along with this library; if not, write to the Free Software
18     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19    
20     see README for more information
21     EOH */
22    
23     /*
24     $Id: ldb.c,v 1.95 2003/06/10 11:00:34 kripke Exp $
25     implementation of general db access functions.
26     */
27    
28     #include <stdlib.h>
29     #include <string.h>
30     #include <limits.h> /* PATH_MAX */
31     #include <errno.h>
32    
33    
34     /* special */
35     #if defined( __GNUC__ ) && defined ( alloca )
36     #include <alloca.h>
37     #endif
38    
39     /* gcc defines always a cpu type - this we use for byteorder checking */
40     #if defined( sparc ) || defined( __ppc__ )
41     # define LDB_BIG_ENDIAN
42     /* TODO: figure out fastest "htonl" on those boxes that usually don't swap */
43     static int rvi ( int i ) {
44     int r;
45     ((char*)&r)[0] = ((char*)&i)[3];
46     ((char*)&r)[1] = ((char*)&i)[2];
47     ((char*)&r)[2] = ((char*)&i)[1];
48     ((char*)&r)[3] = ((char*)&i)[0];
49     return r;
50     }
51     static short rvs ( short i ) {
52     short r;
53     ((char*)&r)[0] = ((char*)&i)[1];
54     ((char*)&r)[1] = ((char*)&i)[0];
55     return r;
56     }
57     #define SWI( i ) i = rvi( i )
58     #define SWS( s ) s = rvs( s )
59     #else
60     # define rvi(i) i
61     # define rvs(s) s
62     #define SWI( i )
63     #define SWS( s )
64     #endif
65     #if defined( sparc )
66     # define LDB_NEEDALIGN
67     #endif
68     #if defined( LDB_NEEDALIGN )
69     static unsigned GETINT ( const void *m )
70     {
71     unsigned l;
72     memcpy( &l, m, 4 );
73     return l;
74     }
75     static unsigned short GETSHORT ( const void *m )
76     {
77     unsigned short s;
78     memcpy( &s, m, 2 );
79     return s;
80     }
81     #else
82     #define GETINT( m ) (*(unsigned*)(m))
83     #define GETSHORT( m ) (*(unsigned short*)(m))
84     #endif
85    
86     #include "lstr.h"
87     #include "lio.h"
88     #include "lbt.h"
89     #include "lcs.h"
90     #include "ldb.h"
91     #include "lfdt.h"
92     #include "luti.h"
93    
94     #ifdef WIN32
95     #define IsAbsPath(p) \
96     ((p) && *(p) && ( \
97     '/' == *(p) || '\\' == *(p) || ( \
98     ':' == (p)[1] && ( \
99     '/' == (p)[2] || '\\' == (p)[2] \
100     ))))
101     #else
102     #define IsAbsPath(p) \
103     ((p) && '/' == *(p))
104     #endif
105    
106    
107     #define LF 10 /* LineFeed a.k.a. newline - '\n' isn't really well defined */
108     #define TAB 9 /* horizontal, that is */
109     #define VT 11 /* vertical, used as newline replacement */
110    
111     /* ************************************************************
112     private types
113     */
114    
115     /** extension of master file proper. */
116     static const char EXT_MST_MST[] = ".mst";
117     /** extension of master file xref. */
118     static const char EXT_MST_XRF[] = ".xrf";
119    
120     typedef enum {
121     MST_MST,
122     MST_XRF,
123     MST_FILES
124     } mst_file;
125    
126     static const char * const EXT_MST[MST_FILES] = {
127     EXT_MST_MST,
128     EXT_MST_XRF
129     };
130    
131     /** extension of inverted file short term nodes. */
132     static const char EXT_INV_N01[] = ".n01";
133     /** extension of inverted file short term leaves. */
134     static const char EXT_INV_L01[] = ".l01";
135     /** extension of inverted file long term nodes. */
136     static const char EXT_INV_N02[] = ".n02";
137     /** extension of inverted file long term leaves. */
138     static const char EXT_INV_L02[] = ".l02";
139     /** extension of inverted file control records. */
140     static const char EXT_INV_CNT[] = ".cnt";
141     /** extension of inverted file postings. */
142     static const char EXT_INV_IFP[] = ".ifp";
143    
144     typedef enum {
145     INV_N01,
146     INV_L01,
147     INV_N02,
148     INV_L02,
149     INV_CNT,
150     INV_IFP,
151     INV_FILES
152     } inv_file;
153    
154     static const char * const EXT_INV[INV_FILES] = {
155     EXT_INV_N01,
156     EXT_INV_L01,
157     EXT_INV_N02,
158     EXT_INV_L02,
159     EXT_INV_CNT,
160     EXT_INV_IFP
161     };
162    
163     /** extension of lbt B-Link-Tree.
164     It's named oxi because that is nicer than oix for OpenIsis indeX.
165     however, see http://www.oxicenter.com.br/
166     */
167     static const char EXT_LBT_OXI[] = ".oxi";
168     static const char * const EXT_LBT[] = {
169     EXT_LBT_OXI
170     };
171    
172    
173     /** plaintext master file
174     */
175     static const char EXT_TXT_TXT[] = ".txt";
176     static const char EXT_TXT_PTR[] = ".ptr";
177     static const char EXT_TXT_OPT[] = ".opt";
178     typedef enum {
179     TXT_TXT,
180     TXT_PTR,
181     TXT_FILES
182     } txt_file;
183     static const char * const EXT_TXT[] = {
184     EXT_TXT_TXT,
185     EXT_TXT_PTR
186     };
187    
188     static const char ISIX[] = "ISIX"; /* ptr magic */
189    
190    
191     /** extension of supporting file alpha character table. */
192     static const char EXT_SUP_ACT[] = ".act";
193     /** extension of supporting file uppercase table. */
194     static const char EXT_SUP_UCT[] = ".uct";
195    
196     typedef enum {
197     SUP_ACT,
198     SUP_UCT,
199     SUP_FILES
200     } sup_file;
201    
202     static const char * const EXT_SUP[SUP_FILES] = {
203     EXT_SUP_ACT,
204     EXT_SUP_UCT
205     };
206    
207    
208     typedef int lblk[128];
209    
210    
211     typedef struct {
212     Db head;
213     int flags;
214     const char *path;
215     int mst[MST_FILES]; /* master file */
216     int inv[INV_FILES]; /* primary inverted file */
217     int mfc[LMFC__FL]; /* master file control record */
218     unsigned mflen; /* master file length */
219     int xrf[129]; /* last used xrf block : THREAD THREAT */
220     int xrlen; /* length of xrf (in blocks) */
221     unsigned short ptr; /* type of pointer file (new style xrf) */
222     unsigned short ptrl; /* pointer bytes, 512 for old xrf */
223     char *mmap; /* memory map of xrf/ptr */
224     int mmlen; /* length of map (in ptrl) */
225     int cnt[LDB_INDEXES][LCNT__FL]; /* two cnt records */
226     short tlen[LDB_INDEXES]; /* max term length for each index */
227     LcsTab ctab[LCS__TABS];
228     Idx oxi;
229     } LDb;
230    
231    
232     typedef union {
233     lll bar;
234     char r[16];
235     } Ptr;
236    
237    
238     /* db flags */
239     #define DB_OPEN 0x010000
240     #define DB_INVOPEN 0x020000
241     #define DB_LBTOPEN 0x040000
242     #define DB_TXTOPEN 0x080000
243     #define DB_WRITABLE 0x100000
244     #define DB_MODIFIED 0x200000
245    
246     #define DB_TXTMODE 0x20
247     #define DB_MMAP 0x10
248     #define DB_VARI 0xf /* mask for variant */
249    
250    
251     /* get xstr for record rec in set */
252     #define DB_XSTR( db, set, rec ) \
253     lstrlib[ set ].desc[ DB_VARI & (db)->flags ][ rec ]
254     /* get record names for record rec in set */
255     #define DB_RNAM( db, set, rec ) \
256     lstrlib[ set ].name[ rec ]
257    
258    
259     /** packed little endian masterfile control structure.
260     */
261     typedef struct Mfc {
262     int ctlm;
263     int nmfn;
264     int nmfb;
265     short nmfp;
266     short type;
267     int rcnt;
268     int mfx1;
269     int mfx2;
270     int mfx3;
271     } Mfc;
272    
273    
274     /** packed little endian masterfile record.
275     */
276     typedef struct Dict {
277     short tag;
278     short pos;
279     short len;
280     } Dict;
281    
282     /** packed little endian masterfile record.
283     */
284     typedef struct Mfr {
285     int mfn;
286     short recl; /* a.k.a. mfrl */
287     short bwbl; /* low part of int */
288     short bwbh; /* high part of int */
289     short bwp;
290     /* it is believed, that this first five fields up to here (12 bytes packed)
291     are to be in one 512-byte block; the manual mentiones even 14 bytes ... ???
292     */
293     short base;
294     short nvf;
295     short stat;
296     Dict dict[1];
297     } Mfr;
298    
299    
300    
301     /* ************************************************************
302     private data
303     */
304    
305     static LDb defdbspace[32];
306     /* array of open dbs. should expand dynamically. */
307     static LDb *dbs = defdbspace;
308     static int dbs_len = sizeof(defdbspace)/sizeof(defdbspace[0]);
309    
310     static int init;
311    
312    
313     /* ************************************************************
314     private functions
315     */
316     static LDb *getDb ( int id )
317     {
318     if ( 0 <= id && id < dbs_len && dbs[id].flags ) {
319     return &dbs[id];
320     }
321     log_msg( LOG_ERROR, "attempt to access bad db id %d", id );
322     return 0;
323     } /* getDb */
324    
325    
326     /* ************************************************************
327     start of io section
328     */
329     enum {
330     /* additional flags in the LIO_FD range */
331     OPEN_TRY = 1, /* try writable, open readonly else */
332     OPEN_UC = 2, /* use uppercase ext */
333     /* commonly used combinations */
334     /* 1) open as is, do not complain about any failure, do not create */
335     OPEN_ASIS = LIO_SEEK|LIO_RDWR|OPEN_TRY,
336     /* 2) open readonly, do not complain about any failure, do not create */
337     OPEN_RDIF = LIO_SEEK|LIO_RD|LIO_TRY,
338     /* 3) open readonly, complain about any failure */
339     OPEN_RD = LIO_SEEK|LIO_RD,
340     /* 4) open or create writable, complain on failure */
341     OPEN_NEW = LIO_SEEK|LIO_RDWR|LIO_CREAT,
342     OPEN_BLANK = LIO_SEEK|LIO_RDWR|LIO_CREAT|LIO_TRUNC
343     };
344    
345     /* figure out wether to use uppercase extension on path.
346     if last path component (everything after the last / and \)
347     does contain an uppercase ascii and does not contain a lowercase ascii,
348     return OPEN_UC, else 0.
349     */
350     static int autocase ( const char *path )
351     {
352     int ret = 0;
353     const char *e = path + strlen( path );
354     while ( e-- > path )
355     if ( 'A'<=*e && *e<= 'Z' )
356     ret = OPEN_UC;
357     else if ( 'a'<=*e && *e<= 'z' )
358     return 0;
359     else if ( '/'==*e || '\\' == *e )
360     break;
361     return ret;
362     }
363    
364     /* set extension. fname MUST already end with .xxx.
365     if how has OPEN_UC set, use uppercase extension
366     */
367     static char *setext ( char *fname, const char *ext, int how )
368     {
369     int l = strlen( fname ) - 4;
370     memcpy( fname+l, ext, 4 );
371     if ( OPEN_UC & how ) {
372     char *p = fname+l;
373     for ( ;*p; p++ ) /* use uppercase extensions */
374     if ( 'a' <= *p && *p <= 'z' )
375     *p -= 'a'-'A';
376     }
377     return fname;
378     }
379    
380     /**
381     try to open all files according to how.
382     ldb is only interested in seekable readable true files.
383     @return
384     1 if all files could be opened writable
385     0 if all files could be opened readonly,
386     and that was requested by a RD mode or try write
387     something negative else
388     */
389     static int openfiles ( int *fid, char *path,
390     const char * const *ext, int nfiles, int how )
391     {
392     int i;
393     int wr = LIO_WR&how ? 1 : 0, mode = LIO_WANT & how;
394    
395     for ( i=0; i<nfiles; i++ ) {
396     setext( path, ext[i], how );
397     fid[i] = lio_open( path, mode & LIO_WANT );
398     log_msg( LOG_INFO, "opening file '%s' %c 0x%x",
399     path, wr ? 'w' : 'r', fid[i] );
400     if ( 0 < fid[i] ) { /* ok */
401     mode &= ~LIO_FLOCK; /* lock only leading file */
402     continue;
403     }
404     fid[i] = 0;
405     while ( i-- ) /* close others */
406     lio_close( &fid[i], LIO_INOUT );
407     if ( OPEN_TRY & how )
408     return openfiles( fid, path, ext, nfiles,
409     (how & ~(OPEN_TRY|LIO_WR)) | LIO_TRY );
410     return LIO_TRY&how ? -ERR_BADF /* silent */
411     : log_msg( LOG_SYSERR, "could not open file '%s' for %sing",
412     path, wr ? "writ" : "read" );
413     }
414     return wr; /* good */
415     } /* openfiles */
416    
417    
418     static int closefiles ( int *fid, int nfiles )
419     {
420     int ret = 0, i;
421     for ( i=0; i<nfiles; i++ )
422     if ( 0 < fid[i] && LIO_INOUT & fid[i] )
423     lio_close( &fid[i], LIO_INOUT );
424     return ret;
425     } /* closefiles */
426    
427    
428     static int readblk ( void *dst, int siz, int fid, int where )
429     {
430     int got;
431     got = lio_pread( &fid, dst, siz, where );
432     if ( 0 > got )
433     return got;
434     #ifndef NDEBUG
435     if ( LOG_DO( LOG_ALL ) )
436     LOG_HEX( dst, got );
437     #endif
438     if ( siz == (int)got )
439     return 0;
440     log_msg( LOG_WARN, "got %u bytes wanted %d at %d in 0x%x",
441     got, siz, where, fid );
442     return 1+(int)got;
443     } /* readblk */
444    
445    
446     /* ************************************************************
447     end of io section
448     */
449    
450     static int *nrec ( int *xstr )
451     {
452     int *dst = (int*)mAlloc( xstr[LSTR_ILEN] );
453     if ( dst )
454     *dst = *xstr;
455     return dst;
456     } /* nrec */
457    
458    
459    
460     typedef struct b8 { char x[8]; } b8;
461     typedef struct b4 { char x[4]; } b4;
462     typedef struct b2 { char x[2]; } b2;
463    
464    
465     static int convert ( int *dst, char *src, int *xstr )
466     {
467     static int pow2[] = { 1, 2, 4, 8 };
468     int occ = -1;
469     int *xmbrs = xstr+LSTR_XMBR;
470     int nmbrs = LSTRFIX(*xstr);
471     int *mbr;
472     char *buf, *part = src, *srcend = src + xstr[LSTR_XLEN];
473    
474     if ( ! dst )
475     return log_msg( ERR_NOMEM, "out of memory (no dst) in convert" );
476     if ( LSTRLEN(*xstr) > xstr[LSTR_ILEN] )
477     return log_msg( ERR_INVAL, "bad ilen %d need %d",
478     xstr[LSTR_ILEN], (int)LSTRLEN(*xstr) );
479     if ( *dst != *xstr )
480     return log_msg( ERR_INVAL, "bad *dst 0x%08x need 0x%08x", *dst, *xstr );
481     /* clean and re-init */
482     memset( dst, 0, xstr[LSTR_ILEN] );
483     *dst = *xstr;
484     mbr = dst+1;
485     buf = ((char*)dst) + LSTRLEN(*dst);
486    
487     /* cvt the fixed part (occ==-1) and each occurrence of repeated part. */
488     for ( ;/* occ < LSTROCC(*dst) */; ) { /* cvt one part */
489     int i;
490     for ( i=0; i<nmbrs; i++, mbr++ ) { /* assign one xmbr */
491     int xmbr = xmbrs[i];
492     char *s = part + LONG2OFF(xmbr);
493     int sbytes = srcend - s;
494     int len,j;
495     union {
496     char buf[8];
497     b8 x8;
498     b4 x4;
499     b2 x2;
500     short s;
501     int i;
502     lll ll;
503     } num;
504     if ( LMBRISNUM( xmbr ) ) {
505     /* numeric data */
506     #ifdef LDB_BIG_ENDIAN
507     # define NEEDSWAP(xmbr) ( ! ( LMBR_FHIE & (xmbr) ) )
508     #else
509     # define NEEDSWAP(xmbr) (LMBR_FHIE & (xmbr))
510     #endif
511     int ld = LMBRLD(xmbr);
512     #ifndef LDB_NEEDALIGN
513     if ( ! NEEDSWAP( xmbr ) ) {
514     /* much faster this way ... */
515     /* TODO: len and bit checks; actually not needed yet ... */
516     switch ( ld ) {
517     case 3: *mbr = *(lll*)s; break;
518     case 2: *mbr = *(int*)s; break;
519     case 1: *mbr = *(short*)s; break;
520     case 0: *mbr = *s; break;
521     }
522     continue;
523     }
524     #endif
525     len = pow2[ LMBRLD(xmbr) ];
526     if ( len > sbytes )
527     return log_msg( ERR_INVAL,
528     "srcbuf too short %d have %d need %d occ %d mbr %d",
529     xstr[LSTR_XLEN], sbytes, len, occ, i );
530     if ( !NEEDSWAP( xmbr ) )
531     /* for ( j = len; j--; ) num.buf[j] = s[j]; */
532     switch ( ld ) {
533     case 3: num.x8 = *(b8*)s; break;
534     case 2: num.x4 = *(b4*)s; break;
535     case 1: num.x2 = *(b2*)s; break;
536     case 0: num.buf[0] = *s; break;
537     }
538     else /* swap bytes */
539     for ( j = len; j--; )
540     num.buf[j] = s[len - 1 - j];
541     switch ( len ) {
542     case 8: *mbr = num.ll; break;
543     /* TODO: defines for 16 and 64 bit compilers */
544     case 4: *mbr = num.i; break;
545     case 2: *mbr = num.s; break;
546     case 1: *mbr = num.buf[0]; break;
547     }
548     if ( LMBRISBITS( xmbr ) ) { /* apply bit shift and mask */
549     *mbr >>= LMBRBITOFF( xmbr );
550     *mbr &= ~(-1L << LMBRBITLEN( xmbr ));
551     }
552     continue;
553     }
554     /* else raw data -- that's easy :) */
555     {
556     int offset = buf - (char*)dst;
557     int need;
558     len = LONG2LEN(xmbr);
559     need = offset + len;
560     if ( need > xstr[LSTR_ILEN] )
561     return log_msg( ERR_INVAL,
562     "bad buflen %d need %d+%d in occ %d mbr %d",
563     xstr[LSTR_ILEN], offset, len, occ, i );
564     if ( len > sbytes )
565     return log_msg( ERR_INVAL,
566     "srcbuf too short %d have %d need %d occ %d mbr %d",
567     xstr[LSTR_XLEN], sbytes, len, occ, i );
568     memcpy( buf, s, len );
569     *mbr = buf - (char*)dst;
570     buf += len;
571     }
572     } /* for mbrs */
573    
574     if ( ++occ >= LSTROCC(*dst) )
575     break;
576     if ( occ )
577     part += (short)xstr[LSTR_XRLO]; /* adv. rep. part len */
578     else { /* was the fixed part, setup for repeated */
579     nmbrs = LSTRREP(*xstr);
580     part += (short)(xstr[LSTR_XRLO]>>16); /* adv. rep. part off */
581     xmbrs += i;
582     }
583     }
584     return 0;
585     } /* convert */
586    
587    
588     static int readrec ( int *dst, int fid, int where, int *xstr )
589     {
590     char *buf = (char *)
591     #ifdef alloca
592     alloca( xstr[LSTR_XLEN] )
593     #else
594     mAlloc( xstr[LSTR_XLEN] )
595     #endif
596     ;
597     int got = 0;
598     int ret = 0;
599    
600     if ( ! buf )
601     return -ERR_NOMEM;
602     if ( 0 > where )
603     where = -where * xstr[LSTR_XLEN];
604     got = readblk( buf, xstr[LSTR_XLEN], fid, where );
605     ret = got ? got : convert( dst, buf, xstr );
606     #ifndef alloca
607     mFree( buf );
608     #endif
609    
610     return ret;
611     } /* readrec */
612    
613    
614     /* read and log */
615     static int readlog ( int *dst, int fid, int where, LDb *db, int set, int rec )
616     {
617     int ret = readrec( dst, fid, where, DB_XSTR( db, set, rec ) );
618     if ( !ret && LOG_DO( LOG_TRACE ) )
619     LOG_STR( dst, lstrlib[ set ].name[ rec ] );
620     return ret;
621     } /* readlog */
622    
623    
624    
625     /* ************************************************************
626     access functions for the record types
627     */
628    
629     static int getOff ( LDb *db, int rowid, int xr )
630     {
631     int rowix = rowid - 1; /* mfns count from 1 */
632     int xrf_block = rowix / 127;
633     int blkix = xrf_block + 1; /* ... so do xrf blocks */
634     int off;
635     if ( xrf_block < db->mmlen ) {
636     if ( xr )
637     ((lblk*)db->mmap)[xrf_block][1+(int)(rowix % 127)] = rvi( xr );
638     else
639     xr = rvi( ((lblk*)db->mmap)[xrf_block][1+(int)(rowix % 127)] );
640     } else {
641     int err = 0;
642     /* if ( LIO_LOCK() ) return -3; */
643     if ( xr ) { /* write */
644     if ( blkix <= db->xrlen ) {
645     SWI( xr );
646     if ( 4 != lio_pwrite( &db->mst[MST_XRF], &xr, 4,
647     xrf_block*512 + 4*(1 + (rowix % 127))) )
648     return 0;
649     } else { /* extent */
650     lblk extend;
651     memset( extend, 0, sizeof(extend) );
652     while ( db->xrlen < blkix ) { /* extend */
653     extend[0] = db->xrlen + 1; /* set blk id */
654     if ( blkix == extend[0] )
655     extend[1+(int)(rowix % 127)] = rvi( xr );
656     SWI( extend[0] );
657     if ( 512 != lio_pwrite( &db->mst[MST_XRF], extend, 512,
658     db->xrlen*512) )
659     return 0;
660     db->xrlen++;
661     }
662     }
663     if ( blkix == db->xrf[LXRF_XPOS] )
664     db->xrf[ LXRF_XREC + (int)(rowix % 127) ] = xr;
665     /* and go on read it back, just to check ... */
666     }
667     if ( blkix != db->xrf[LXRF_XPOS] ) {
668     int ret;
669     LOG_DBG( LOG_VERBOSE, "fetching xrf block %d had %d",
670     blkix, ! db->xrf ? -1 : db->xrf[LXRF_XPOS] );
671     ret = readlog( db->xrf, db->mst[MST_XRF],
672     -xrf_block, db, LSET_MST, LSTR_XRF );
673     if ( ret ) {
674     log_msg( LOG_ERROR, "\twhen fetching xrf block %d", blkix );
675     err = -1;
676     } else if ( blkix == -db->xrf[LXRF_XPOS] ) {
677     LOG_DBG( LOG_DEBUG, "hmmm ... negative" );
678     db->xrf[LXRF_XPOS] = blkix;
679     } else if ( blkix != db->xrf[LXRF_XPOS] ) {
680     log_msg( LOG_WARN, "bad xrf %d wanted %d",
681     db->xrf[LXRF_XPOS], blkix );
682     err = -2;
683     }
684     }
685     xr = db->xrf[ LXRF_XREC + (int)(rowix % 127) ];
686     /* LIO_RELE(); */
687     if ( err )
688     return err;
689     }
690     /*
691     21 bits (<<11) signed for the (512 byte) block ("xrmfb")
692     1 for the first block (offset 0)
693     0 means, never had such a record
694     -1 and xrmfp=0: record removed from MST
695     (there is no record at pos 0 in 1st block,
696     since there resides the MST header)
697     other negative value -x or pos!=0:
698     record logically deleted, was at +x
699     1 bit (1<<10): this record is new and not yet inverted
700     1 bit (1<<9): this record is changed and not yet re-inverted
701     9 bits for the block-relative position ("xrmfp")
702     */
703     off = (((xr & 0xfffff800) >> 2) - 0x200) | (0x1ff & xr);
704     if ( 0 < (xr & ~0x600) ) {
705     LOG_DBG( LOG_DEBUG,
706     "offset for rowid %d is %d (blk %d pos %d) flg 0x%08x at %d[%d]",
707     rowid, off, (xr>>11)&0xfffff, xr&0x1ff, xr&0x80000600, blkix, rowix%127 );
708     return off;
709     }
710     /* deleted */
711     log_msg( LOG_INFO,
712     "offset for rowid %d is %d (blk %d pos %d) flg 0x%08x at %d[%d]",
713     rowid, off, (xr>>11)&0xfffff, xr&0x1ff, xr&0x80000600, blkix, rowix%127 );
714     return 0;
715     } /* getOff */
716    
717    
718     static int* getMfr ( LDb *db, int off, int *nxtoff )
719     {
720     struct mfcxstr {
721     int xstr[LSTR_LONGS(7+3)];
722     } my = *(struct mfcxstr *)DB_XSTR( db, LSET_MST, LSTR_MFR );
723     int head[1+7+3];
724     int len = 0, base;
725     int *rec = 0;
726     char *buf = 0;
727     #ifdef alloca
728     int notalloca = 0;
729     #endif
730    
731     LOG_DBG( LOG_VERBOSE, "getting MFR at off %d", off );
732     if ( 498 < off % 512 )
733     log_msg( LOG_WARN, "blk pos > 498 in offset 0x%08x", off );
734     *head = *my.xstr;
735     if ( readrec( head, db->mst[MST_MST], off, my.xstr ) ) {
736     log_msg( LOG_ERROR, "\twhen reading MFR head at %d", off );
737     return 0;
738     }
739     /* log_str( LOG_VERBOSE, head, lstrlib[LSET_MST].name[LSTR_MFR] ); */
740     len = head[LMFR_RECL];
741     LOG_DBG( LOG_VERBOSE, "got MFR %d reclen %d", head[LMFR_MFN], len );
742     if ( nxtoff ) {
743     *nxtoff = off + (0 < len ? len : -len); /* an odditiy */
744     if ( 1 & *nxtoff ) /* an odditiy */
745     (*nxtoff)++; /* round up to even */
746     if ( 498 < *nxtoff % 512 ) {
747     *nxtoff += 512;
748     *nxtoff &= ~0x1ff;
749     }
750     if ( *nxtoff > db->mfc[LMFC_NMFB]*512 + db->mfc[LMFC_NMFP] ) {
751     LOG_DBG( LOG_VERBOSE, "at end of db: %d > %d*512+%hd",
752     *nxtoff, db->mfc[LMFC_NMFB], db->mfc[LMFC_NMFP] );
753     *nxtoff = -1;
754     }
755     }
756     if ( len < 0 ) {
757     log_msg( LOG_INFO, "found deleted rec len %hd at offset %d", len, off );
758     return 0;
759     }
760    
761     /* check external base length */
762     base = LONG2OFF(my.xstr[LSTR_XRLO])
763     + head[LMFR_NVF]*LONG2LEN(my.xstr[LSTR_XRLO]);
764     if ( 0 > head[LMFR_BASE] || 0 > head[LMFR_NVF]
765     || 0x8fff < head[LMFR_NVF]
766     || len < head[LMFR_BASE] || len < base
767     || (head[LMFR_NVF] && head[LMFR_BASE] < base)
768     ) {
769     log_msg( LOG_ERROR,
770     "bad len %d base %d nvf %d need base %d at offset %d",
771     len, head[LMFR_BASE], head[LMFR_NVF], base, off );
772     /* check alignment problem */
773     base = LONG2OFF(my.xstr[LSTR_XRLO])
774     + head[LMFR_STAT]*LONG2LEN(my.xstr[LSTR_XRLO]);
775     if ( 0 > head[LMFR_NVF] || 0 > head[LMFR_STAT]
776     || 0x8fff < head[LMFR_STAT]
777     || len < head[LMFR_NVF] || len < base
778     || (head[LMFR_STAT] && head[LMFR_NVF] < base)
779     )
780     ;
781     else
782     log_msg( LOG_ERROR, "probably alignment problem, try -format aligned" );
783     goto cleanup;
784     }
785    
786     /* set up external structure for this rec */
787     my.xstr[LSTR_SIZE] |= head[LMFR_NVF] << 16; /* occ of rep. part */
788     my.xstr[LSTR_XLEN] = len;
789     /* internal base length */
790     base = LSTRLEN( *my.xstr );
791     /* internal len adjusted for the slightly longer base */
792     my.xstr[LSTR_ILEN] = len + base - head[LMFR_BASE]; /* the buffer */
793    
794     rec = nrec( my.xstr );
795     if ( ! rec ) {
796     log_msg( LOG_SYSERR, "could not alloc MFR of len %hd", my.xstr[LSTR_ILEN] );
797     goto cleanup;
798     }
799     buf = (char *)
800     #ifdef alloca
801     alloca( len );
802     /* first try faster alloca, but stack may be too limited for large records */
803     notalloca = ! buf;
804     if ( notalloca )
805     buf =
806     #endif
807     mAlloc( len );
808    
809     if ( ! buf ) {
810     log_msg( LOG_SYSERR, "could not alloc MFR of len %hd", len );
811     goto cleanup;
812     }
813     if ( readblk( buf, len, db->mst[MST_MST], off ) ) {
814     log_msg( LOG_ERROR, "\twhen reading MFR" );
815     goto cleanup;
816     }
817     #ifndef LDB_BIG_ENDIAN
818     if ( LVAR_PAC != (DB_VARI & db->flags) ) {
819     #endif
820     if ( convert( rec, buf, my.xstr ) ) {
821     log_msg( LOG_ERROR, "\twhen converting MFR" );
822     goto cleanup;
823     }
824     #ifndef LDB_BIG_ENDIAN
825     } else { /* 10% faster */
826     Mfr *mfr = (Mfr*)buf;
827     short *s = &mfr->dict->tag;
828     int *f = rec + LMFR__FL;
829     int *e = f + 3*head[LMFR_NVF];
830     rec[LMFR_MFN] = mfr->mfn;
831     rec[LMFR_RECL] = mfr->recl;
832     /*
833     rec[LMFR_BWB] = mfr->bwbh<<16 | mfr->bwbl;
834     rec[LMFR_BWP] = mfr->bwp;
835     */
836     rec[LMFR_BASE] = mfr->base;
837     rec[LMFR_NVF] = mfr->nvf;
838     rec[LMFR_STAT] = mfr->stat;
839     while ( f < e ) {
840     *f++ = *s++;
841     *f++ = *s++;
842     *f++ = *s++;
843     }
844     }
845     #endif
846    
847     if ( rec[LMFR_STAT] ) {
848     log_msg( LOG_WARN, "found status %hd", rec[LMFR_STAT] );
849     goto cleanok;
850     }
851    
852     /* do a consistency check */
853     if ( rec[LMFR_NVF] < 0 || rec[LMFR_BASE] < 0 ) {
854     log_msg( LOG_ERROR, "found neg. field nvf %hd base %hd",
855     rec[LMFR_NVF], rec[LMFR_BASE] );
856     goto cleanup;
857     }
858    
859     /* now care for the field values */
860     {
861     char *valsrc = buf+rec[LMFR_BASE];
862     char *recsta = ((char*)rec);
863     char *valdst = recsta + base;
864     int xbufl = rec[LMFR_RECL] - rec[LMFR_BASE];
865     int sumlens = 0;
866     int i;
867     for ( i=0; i < rec[LMFR_NVF]; i++ ) {
868     int *d = &rec[LMFR__FL + i*LMFR__RL];
869     if ( d[LMFR_POS] < 0 || d[LMFR_LEN] < 0 ) {
870     log_msg( LOG_ERROR,
871     "bad field %d at off %d: negativ pos %hd or len %hd",
872     i, off, d[LMFR_POS], d[LMFR_LEN] );
873     goto cleanup;
874     }
875     if ( d[LMFR_POS] + d[LMFR_LEN] > xbufl ) {
876     log_msg( LOG_ERROR,
877     "bad field %d at off %d: pos %hd + len %hd > buf %d",
878     i, off, d[LMFR_POS], d[LMFR_LEN], xbufl );
879     goto cleanup;
880     }
881     sumlens += d[LMFR_LEN];
882     if ( sumlens > xbufl ) {
883     log_msg( LOG_ERROR,
884     "bad fields at off %d: sum of lengths %d > buf %d",
885     off, sumlens, xbufl );
886     goto cleanup;
887     }
888     memcpy( valdst, valsrc+d[LMFR_POS], d[LMFR_LEN] );
889     d[LMFR_POS] = valdst - recsta;
890     valdst += d[LMFR_LEN];
891     }
892     } /* consistency check */
893     rec[LMFR_BWB] = /* "used" bytes */
894     rec[LMFR_RECL] = my.xstr[LSTR_ILEN];
895     rec[LMFR_BWP] = rec[LMFR_NVF]; /* avail fields = used fields */
896     rec[LMFR_BASE] = base;
897    
898     if ( LOG_TRACE <= log_lev )
899     LOG_STR( rec, lstrlib[LSET_MST].name[LSTR_MFR] );
900     goto done;
901    
902     cleanup:
903     if ( nxtoff )
904     *nxtoff = -1;
905     cleanok:
906     if ( rec ) {
907     mFree( rec );
908     rec = 0;
909     }
910     done:
911     if ( buf
912     #ifdef alloca
913     && notalloca
914     #endif
915     )
916     mFree( buf );
917     if ( rec )
918     *rec = db->head.dbid;
919     return rec;
920     } /* getMfr */
921    
922    
923     /** write the record.
924     If it doesn't yet have a mfn, assign one.
925     NOTE: on a BIG_ENDIAN, anything but the mfn and recl will be frobbed
926     after this call
927     */
928     static int putMfr ( LDb *db, Mfr *mfr )
929     {
930     int oldpos, newpos;
931     int ret;
932    
933     if ( !(db->flags & DB_WRITABLE) ) {
934     log_msg( LOG_ERROR, "db is not writable" );
935     return -1;
936     }
937     /* minimalist sanity check */
938     if ( mfr->nvf < 0
939     || mfr->base != 18 + 6*mfr->nvf
940     || mfr->base > mfr->recl
941     ) {
942     log_msg( LOG_ERROR, "bad nvf/base/recl %d/%d/%d ",
943     mfr->nvf, mfr->base, mfr->recl );
944     return -2;
945     }
946     db->flags |= DB_MODIFIED;
947     if ( mfr->mfn ) {
948     int block;
949     if ( db->mfc[LMFC_NMFN] > mfr->mfn )
950     oldpos = getOff( db, mfr->mfn, 0 );
951     else {
952     db->mfc[LMFC_NMFN] = mfr->mfn+1;
953     oldpos = 0;
954     }
955     mfr->bwp = 511 & oldpos;
956     block = 1 + (oldpos >> 9); /* blockno counting from 1 */
957     mfr->bwbl = 0xffff & block;
958     mfr->bwbh = block >> 16;
959     if ( db->mfc[LMFC_NMFN] <= mfr->mfn )
960     db->mfc[LMFC_NMFN] = mfr->mfn+1;
961     } else {
962     mfr->mfn = db->mfc[LMFC_NMFN]++; /* assign new mfn */
963     oldpos = 0;
964     mfr->bwbl = mfr->bwbh = mfr->bwp = 0;
965     }
966     mfr->stat = 0;
967     newpos = db->mflen;
968     if ( 498 < (newpos & 511) ) /* round up to next block boundary */
969     newpos = ~511 & (newpos + 14);
970     if ( 1 & newpos )
971     newpos++;
972     db->mflen = newpos + mfr->recl;
973     #ifdef LDB_BIG_ENDIAN
974     { /* swap swap swap */
975     Dict *d = mfr->dict;
976     short nvf = mfr->nvf;
977     SWI( mfr->mfn ); SWS( mfr->recl ); SWS( mfr->bwbl ); SWS( mfr->bwbh );
978     SWS( mfr->bwp ); SWS( mfr->base ); SWS( mfr->nvf ); SWS( mfr->stat );
979     for ( ; nvf--; d++ ) {
980     SWS( d->tag );
981     SWS( d->pos );
982     SWS( d->len );
983     }
984     }
985     #endif
986     ret = lio_pwrite( &db->mst[MST_MST], (char*)mfr, rvs(mfr->recl), newpos );
987     #ifdef LDB_BIG_ENDIAN
988     /* restore mnf, recl */
989     SWI( mfr->mfn );
990     SWS( mfr->recl );
991     #endif
992     if ( ret != mfr->recl )
993     return log_msg( ERR_TRASH, "could not write Mfr %d bytes got %d",
994     mfr->recl, ret );
995     getOff( db, mfr->mfn, (1 << (oldpos ? 9 : 10))
996     | (((newpos & 0xfffffe00) + 0x200) << 2) | (0x1ff & newpos) );
997    
998     return 0;
999     } /* putMfr */
1000    
1001    
1002     static int putRec ( LDb *db, Rec *rec )
1003     {
1004     int ret = 0, i;
1005     Mfr *mfr = 0;
1006     int buflen = 0;
1007     int reclen = 0;
1008     int contig = 1;
1009     #ifdef alloca
1010     int notalloca = 0;
1011     #endif
1012     const char *rbase = ((char *)rec) + rec->base;
1013     Dict *d;
1014     Field *f = rec->field;
1015     /* TODO: if not rec->len, delete ? */
1016     for ( i = rec->len; i--; f++ ) {
1017     if ( ! f->len )
1018     continue;
1019     if ( ! f->val )
1020     LOG_OTO( cleanup, ( ERR_FAULT, "bad rec NULL val" ) );
1021     contig = contig && (f->val == rbase + buflen);
1022     buflen += f->len;
1023     }
1024     reclen = 18 + 6*rec->len + buflen;
1025     if ( 1 & reclen )
1026     reclen++;
1027     mfr = (Mfr*)
1028     #ifdef alloca
1029     alloca( reclen );
1030     notalloca = ! mfr;
1031     if ( notalloca )
1032     mfr = (Mfr*)
1033     #endif
1034     mAlloc( reclen );
1035     if ( ! mfr )
1036     LOG_OTO( cleanup,
1037     ( ERR_NOMEM, "could not alloc MFR of len %hd", reclen ) );
1038     mfr->mfn = rec->rowid;
1039     mfr->recl = reclen;
1040     mfr->bwbl = mfr->bwbh = mfr->bwp = 0;
1041     mfr->base = 18 + 6*rec->len;
1042     mfr->nvf = rec->len;
1043     mfr->stat = 0;
1044     d = mfr->dict;
1045     f = rec->field;
1046     buflen = 0;
1047     for ( i = rec->len; i--; d++, f++ ) {
1048     d->tag = f->tag;
1049     d->pos = buflen;
1050     buflen += (d->len = f->len);
1051     }
1052     if ( contig )
1053     memcpy( ((char*)mfr)+mfr->base, rbase, buflen );
1054     else {
1055     char *mbase = ((char*)mfr)+mfr->base;
1056     d = mfr->dict;
1057     f = rec->field;
1058     for ( i = rec->len; i--; d++, f++ )
1059     if ( d->len )
1060     memcpy( mbase + d->pos, f->val, d->len );
1061     }
1062     ret = putMfr( db, mfr );
1063     if ( !ret && !rec->rowid )
1064     rec->rowid = mfr->mfn;
1065    
1066     cleanup:
1067     if ( mfr
1068     #ifdef alloca
1069     && notalloca
1070     #endif
1071     )
1072     mFree( mfr );
1073     return ret;
1074     } /* putRec */
1075    
1076    
1077    
1078     /* ************************************************************
1079     access functions for plaintext db
1080     */
1081    
1082     /*
1083     create a pointer from the least significant bytes of pos, len, fld
1084     buf must have db->ptrl bytes (up to 16 = 8+4+4)
1085     and the most strict alignment (i.e. 4 or 8) possible for db->ptrl
1086     returns buf
1087     */
1088     static char *mkptr ( char *buf, LDb *db,
1089     unsigned pos, unsigned len, unsigned fld )
1090     {
1091     switch ( db->ptr ) {
1092     case 0x0134: /* '4' */
1093     ((unsigned*)buf)[0] = pos;
1094     if ( ~0xff & fld ) fld = 0;
1095     #ifdef LDB_BIG_ENDIAN /* the first = high order 3 bytes are len */
1096     ((unsigned*)buf)[1] = (0xff&fld) | len<<8;
1097     #else /* the first = low order 3 bytes are len */
1098     ((unsigned*)buf)[1] = (0xffffff&len) | fld<<24;
1099     #endif
1100     return buf;
1101     case 0x0044: /* 'D' */
1102     ((unsigned*)buf)[0] = pos;
1103     ((unsigned*)buf)[1] = len;
1104     return buf;
1105     case 0x0035: /* '5' */
1106     #ifdef LDB_BIG_ENDIAN /* the first = high order 5 bytes are pos */
1107     *(lll*)buf = (0xffffff&len) | ((lll)pos)<<24;
1108     #else /* the first = low order 5 bytes are pos */
1109     *(lll*)buf = pos | ((lll)len)<<40;
1110     #endif
1111     return buf;
1112     }
1113     /* TODO */
1114     (void)fld;
1115     assert( 0 );
1116     return 0;
1117     }
1118    
1119    
1120     /*
1121     read pointer, return len
1122     if 0x0f00 & db->ptr, fld must not be 0, else *fld is untouched
1123     */
1124     static unsigned rdptr ( unsigned *pos, unsigned *fld, LDb *db, char *buf )
1125     {
1126     switch ( db->ptr ) {
1127     case 0x0134:
1128     *pos = *(unsigned*)buf;
1129     *fld = ((unsigned char *)buf)[7];
1130     #ifdef LDB_BIG_ENDIAN
1131     return ((unsigned*)buf)[1] >> 8;
1132     #else
1133     return 0xffffff & ((unsigned*)buf)[1];
1134     #endif
1135     case 0x0044:
1136     *pos = *(unsigned*)buf;
1137     return ((unsigned*)buf)[1];
1138     case 0x0035:
1139     #ifdef LDB_BIG_ENDIAN
1140     /* *pos = (*(unsigned*)(buf+1)); would bus error on sparc */
1141     *pos = (unsigned) (*(lll*)buf >> 24);
1142     return 0xffffff & (unsigned)*(lll*)buf; /* last 3 bytes */
1143     #else /* guess there is no little endian that needs alignment ? */
1144     *pos = *(unsigned*)buf; /* use low order = first 4 of first 5 bytes */
1145     return 0xffffff & (*(unsigned*)(buf+5));
1146     #endif
1147     }
1148     /* TODO */
1149     (void)fld;
1150     assert( 0 );
1151     return 0;
1152     }
1153    
1154    
1155     static void setPtr ( LDb *db, int mfn,
1156     unsigned pos, unsigned len, unsigned fld )
1157     {
1158     Ptr pt;
1159     if ( mfn < db->mmlen ) {
1160     mkptr( db->mmap + mfn*db->ptrl, db, pos, len, fld );
1161     return;
1162     }
1163     lio_pwrite( &db->mst[MST_XRF],
1164     mkptr( pt.r, db, pos, len, fld), db->ptrl, mfn*db->ptrl );
1165     } /* setPtr */
1166    
1167    
1168     static unsigned getPtr ( unsigned *pos, unsigned *fld, LDb *db, int mfn )
1169     {
1170     Ptr pt;
1171     return mfn < db->mmlen
1172     ? rdptr( pos, fld, db, db->mmap + mfn*db->ptrl )
1173     : db->ptrl == lio_pread( &db->mst[MST_XRF],
1174     pt.r, db->ptrl, mfn*db->ptrl )
1175     ? rdptr( pos, fld, db, pt.r )
1176     : 0;
1177     } /* getPtr */
1178    
1179    
1180     #if 0
1181     static int putPlain ( LDb *db, Rec *rec )
1182     {
1183     return 0;
1184     } /* putPlain */
1185     #endif
1186    
1187     /**
1188     get text
1189     the original text is read contigously at base.
1190     the record is then cooked as requested:
1191     0 well done: do full fixup,
1192     apply conversions and create fields.
1193     1 english: do not create fields (rec->fields is 0), apply no conversions,
1194     but set rec->len to actual number of fields (counting if necessary).
1195     2 raw:
1196     set len only if it's known from the pointer
1197     */
1198     static Rec *dText ( LDb *db, int mfn, int raw )
1199     {
1200     unsigned base, sz, pos, len, fld; /* #fields actually used */
1201     unsigned n = 0; /* known #fields */
1202     Rec *r, *x;
1203     Field *f, *fe;
1204     char *p, *q, *e;
1205    
1206     len = getPtr( &pos, &n, db, mfn );
1207     LOG_DBG( LOG_TRACE, "dText %d pos %d len %d fld %d", mfn, pos, len, n );
1208     if ( !len )
1209     return 0;
1210     if ( raw )
1211     fld = 0;
1212     else if ( !(fld = n) ) {
1213     fld = len / 36; /* assume one (costing 12 bytes) per 36 bytes data */
1214     if ( fld < 8 ) /* small record is likely to have some short fields */
1215     fld = 8;
1216     }
1217     base = BASESZ(fld);
1218     sz = base + len;
1219     r = (Rec*)mAlloc( sz );
1220     p = ((char*)r) + base;
1221     if ( (int)len != lio_pread( &db->mst[MST_MST], p, len, pos ) ) {
1222     mFree( r );
1223     return 0;
1224     }
1225     LOG_DBG( LOG_TRACE, "'%.*s'", len, p );
1226     r->dbid = db->head.dbid;
1227     r->rowid = mfn;
1228     r->used = r->bytes = sz;
1229     r->fields = fld;
1230     r->base = base;
1231     r->len = n;
1232     if ( raw && (n || 1 != raw) )
1233     return r;
1234     e = p + len;
1235     f = r->field; /* next field to assign */
1236     fe = f + fld; /* end of assignable fields */
1237     /*
1238     loop through buffer lines to count a/o assign
1239     count them in n
1240     while f < fe, also fix and assign them
1241     */
1242     for ( n=0;;) { /* possibly 2 passes needed */
1243     for ( ;p < e; p = q+1 ) {
1244     if ( !(q = memchr( p, LF, e-p )) )
1245     q = e; /* > p */
1246     if ( TAB != *p || !n ) {
1247     if ( f < fe ) {
1248     p += a2il( p, q-p, &f->tag );
1249     if ( p < q && TAB == *p )
1250     p++;
1251     f->len = q - (f->val = p);
1252     f++; /* f == r->field+n, as long as we don't hit fe */
1253     }
1254     n++;
1255     continue;
1256     }
1257     /* continuation line */
1258     if ( f != r->field+n )
1259     continue;
1260     /* we ARE assigning & didn't loose sync at fe */
1261     /* append to previous */ {
1262     char *dest = (char*)f[-1].val + f[-1].len;
1263     int dist = p - dest, l = q-p;
1264     *p = LF;
1265     memmove( dest, p, l );
1266     memset( q-dist, ' ', dist ); /* cleanup */
1267     f[-1].len += l;
1268     }
1269     }
1270     /* now n != 0, since initially p < e, since len != 0 */
1271     if ( r->len && r->len != (int)n ) {
1272     log_msg( LOG_WARN, "rec %d len %d != ptr %d", mfn, n, r->len );
1273     break;
1274     }
1275     if ( raw || (int)n <= r->fields ) /* all counted/assigned */
1276     break;
1277     /* extend the record to n fields */
1278     log_msg( LOG_INFO, "extending rec %d %d -> %d fields", mfn, fld, n );
1279     fld = n;
1280     base = BASESZ(fld);
1281     sz = base + len;
1282     x = (Rec*)mAlloc( sz );
1283     x->dbid = db->head.dbid;
1284     x->rowid = mfn;
1285     x->used = x->bytes = sz;
1286     x->fields = fld;
1287     x->base = base;
1288     x->len = n;
1289     p = ((char*)x) + base;
1290     e = p + len;
1291     q = ((char*)r) + r->base;
1292     memcpy( p, q, len );
1293     memcpy( x->field, r->field, r->fields*sizeof(Field) );
1294     for ( f=x->field, n=r->fields; n--; )
1295     (f++)->val += p-q;
1296     n = r->fields;
1297     mFree( r );
1298     r = x;
1299     f = r->field + n;
1300     fe = r->field + fld;
1301     p = (char*)f[-1].val + f[-1].len;
1302     /* seek behind the LF that delimited the last field */
1303     while (LF != *p++)
1304     ;
1305     }
1306     r->len = n;
1307     if ( !raw && (DB_TXTMODE & db->flags) )
1308     for ( f = r->field, fe = r->field + r->fields; f < fe; f++ )
1309     for ( p = (char*)f->val, e = p+f->len; (p = memchr(p,VT,e-p)); )
1310     *p++ = LF;
1311     return r;
1312     } /* dText */
1313    
1314    
1315     static int pText ( LDb *db, Rec *r, const char *mark )
1316     {
1317     char buf[128 + 65536];
1318     unsigned pos = 0, len = 0, fld, off;
1319     char *p, *b;
1320     int ret;
1321    
1322     if ( !(db->flags & DB_WRITABLE) ) {
1323     log_msg( LOG_ERROR, "db is not writable" );
1324     return -1;
1325     }
1326     if ( ! r->rowid )
1327     r->rowid = db->mfc[LMFC_NMFN]++; /* assign new mfn */
1328     else if ( db->mfc[LMFC_NMFN] <= r->rowid )
1329     db->mfc[LMFC_NMFN] = r->rowid + 1;
1330     else {
1331     fld = 0;
1332     len = getPtr( &pos, &fld, db, r->rowid );
1333     }
1334     p = b = 32768 >= r->used ? buf : mAlloc(128+2*r->used);
1335     *p++ = 'W';
1336     *p++ = TAB;
1337     p += u2a( p, r->rowid );
1338     *p++ = TAB;
1339     if ( pos ) {
1340     p += u2a( p, pos );
1341     *p++ = '.';
1342     p += u2a( p, len );
1343     if ( fld ) {
1344     *p++ = '.';
1345     p += u2a( p, fld );
1346     }
1347     }
1348     *p++ = TAB;
1349     if ( mark ) {
1350     int l = strlen(mark);
1351     if ( l > 31 ) {
1352     log_msg( LOG_WARN, "mark '%.48s'%s has length %d",
1353     mark, l<48 ? "" : "...", l );
1354     l = 31;
1355     }
1356     memcpy( p, mark, l );
1357     p += l;
1358     } else {
1359     timeGtfm( p, 0 );
1360     p += 17;
1361     }
1362     *p++ = LF;
1363     off = p - b;
1364     p += len = rSerB( p, r );
1365     if ( len > 1 ) /* don't count 2 trailing LFs */
1366     len -= 2;
1367     db->flags |= DB_MODIFIED;
1368     pos = db->mflen;
1369     db->mflen += p - b;
1370     ret = lio_pwrite( &db->mst[MST_MST], b, p - b, pos );
1371     if ( ret == p - b ) {
1372     setPtr( db, r->rowid, pos+off, len, r->len );
1373     ret = 0;
1374     }
1375     if ( buf != b )
1376     mFree( b );
1377     return ret;
1378     } /* pText */
1379    
1380    
1381     /* ************************************************************
1382     utilities
1383     */
1384    
1385     static int search ( LDb *db, const char *key, LdbPost *post,
1386     Rec *rec, DXLoop *lp )
1387     {
1388     int i, j, prefix, idx, ret, ock;
1389     int pos;
1390     int *leaf, *entry;
1391     char *term;
1392     int *xstr;
1393     struct { /* terms cursor */
1394     char key[LDB_MAX_KEYLEN+1]; /* key or key prefix */
1395     short klen; /* key length to compare */
1396     char imin; /* minimum index to search */
1397     char imax; /* maximum index to search */
1398     int leaf[LDB_INDEXES][LDB_TERMBUF]; /* one leaf buffer per index */
1399     short lpos[LDB_INDEXES]; /* next position in leaf, -1 if done */
1400     } crs;
1401     short klen; /* length for initial locate */
1402     int block[128]; /* buffer to read one block */
1403     int blockpos = 0;
1404     LdbP *p = 0;
1405    
1406     if ( ! key )
1407     key = "$";
1408     /* prepare cursor struct */
1409     memset( &crs, 0, sizeof(crs) ); /* tabula rasa */
1410     crs.klen = strlen( key );
1411     /* check for prefix match */
1412     if ( post )
1413     prefix = LDB_PFX & post->mode;
1414     else if ( (prefix = crs.klen && '$' == key[crs.klen - 1]) )
1415     crs.klen--;
1416     /* check out minimum index to search */
1417     for ( crs.imin=0; crs.klen > db->tlen[(int)crs.imin]; )
1418     if ( LDB_INDEXES == ++(crs.imin) )
1419     return log_msg( ERR_INVAL, "bad keylen %d key '%.64s'", crs.klen, key );
1420     /* prepare key */
1421     memset( crs.key, ' ', sizeof(crs.key)-1 );
1422     {
1423     unsigned char *uc = (unsigned char*)crs.key;
1424     unsigned char *uk = (unsigned char*)key;
1425     for ( i=crs.klen; i--; )
1426     uc[i] = db->ctab[LCS_UCASE].c[ uk[i] ];
1427     }
1428     if ( prefix )
1429     crs.imax = LDB_INDEXES-1;
1430     else {
1431     crs.imax = crs.imin;
1432     crs.klen = db->tlen[(int)crs.imin];
1433     }
1434     log_msg( LOG_INFO, "search for '%.*s'%c", crs.klen, crs.key, prefix?'$':' ' );
1435     key = crs.key;
1436     klen = crs.klen;
1437     if ( rec && rec->len ) {
1438     /* use last key from record to locate starting position */
1439     key = rec->field[rec->len-1].val;
1440     klen = rec->field[rec->len-1].len;
1441     rec->len = 0;
1442     }
1443    
1444     for ( i=crs.imin; i<=crs.imax; i++ ) { /* find leaf positions */
1445     int nFile = INV_N01 + 2*i; /* node file index */
1446     int nStr = LSTR_N01 + 2*i; /* node struct index */
1447     int *nstr = DB_XSTR( db, LSET_INV, nStr );
1448     int lvl;
1449     short cmplen = klen <= db->tlen[i] ? klen : db->tlen[i];
1450     pos = db->cnt[i][LCNT_POSR]; /* pos of root record */
1451     j = 0;
1452     for ( lvl = 0; 0<pos; lvl++ ) { /* traverse node levels */
1453     int node[102];
1454     LOG_DBG( LOG_DEBUG, "node %d at %d lvl %d", pos, j, lvl );
1455     assert( (int)sizeof(node) >= nstr[LSTR_ILEN] );
1456     *node = *nstr;
1457     ret = readlog( node, db->inv[nFile], 1-pos, db, LSET_INV, nStr );
1458     if ( pos != node[LN0X_POS] /* wrong address */
1459     || i+1 != node[LN0X_TYPE] /* wrong type */
1460     || node[LN0X_OCK] < 1 /* no keys */
1461     || 2*db->cnt[i][LCNT_ORDN] < node[LN0X_OCK] /* too many keys */
1462     )
1463     return log_msg( ERR_TRASH, "bad node pos %d type %d keys %d",
1464     node[LN0X_POS], node[LN0X_TYPE], node[LN0X_OCK]
1465     );
1466     ock = node[LN0X_OCK];
1467     for ( j=1;
1468     j<ock && 0 < (ret = memcmp( key,
1469     ((char*)node)+node[j*LN0X__RL+LN0X__FL+LN0X_KEY], cmplen ));
1470     j++ )
1471     ;
1472     /* now j is at end or on next index not less */
1473     if ( j==ock /* end */
1474     || ret /* index is greater than key */
1475     || prefix /* backtrack even on exact match */
1476     )
1477     j--; /* step into last ock with lower key */
1478     pos = node[LN0X__FL + j*LN0X__RL + LN0X_REF];
1479     } /* for lvl */
1480     /* got some negative ref to leaf; set leaf pos */
1481     crs.leaf[i][LL0X_PS] = -pos;
1482     /*
1483     since the lpos and LL0X_OCK are both 0 by the memset above,
1484     we will initially load the leaves
1485     */
1486     } /* for indexes */
1487     /* done preparing cursor */
1488    
1489     if ( post ) /* prepare for postings */
1490     p = post->p;
1491     xstr = DB_XSTR( db, LSET_INV, LSTR_IFP );
1492    
1493     for (;;) { /* loop terms in prefix mode */
1494     /* vars for postings: */
1495     int infb, infp; /* block and pos where to read postings */
1496     int added; /* postings added or marked per term */
1497     int blkno; /* postings block number */
1498     int remain = 0; /* postings to fetch from next block of segment */
1499     int ifp[LIFP__FL]; /* postings header */
1500    
1501     idx = -1; /* index to use */
1502     /* compare index terms, load leafes if needed */
1503     for ( i = crs.imin; i <= crs.imax; i++ ) {
1504     short cmplen = klen <= db->tlen[i] ? klen : db->tlen[i];
1505     leaf = crs.leaf[i];
1506     if ( leaf[LL0X_OCK] <= crs.lpos[i] ) { /* load */
1507     int lFile = INV_L01 + 2*i; /* leaf file index */
1508     int lStr = LSTR_L01 + 2*i; /* leaf struct index */
1509     int *lstr = DB_XSTR( db, LSET_INV, lStr );
1510    
1511     crs.lpos[i] = -1;
1512     reread:
1513     if ( ! (pos = leaf[LL0X_PS]) )
1514     continue;
1515     LOG_DBG( LOG_DEBUG, "leaf %d", pos );
1516     assert( (int)sizeof(crs.leaf[i]) >= lstr[LSTR_ILEN] );
1517     *leaf = *lstr;
1518     ret = readlog( leaf, db->inv[lFile], 1-pos, db, LSET_INV, lStr );
1519     if ( pos != leaf[LL0X_POS] /* wrong address */
1520     || i+1 != leaf[LL0X_TYPE] /* wrong type */
1521     || leaf[LL0X_OCK] < 1 /* no keys */
1522     || 2*db->cnt[i][LCNT_ORDN] < leaf[LL0X_OCK] /* too many keys */
1523     )
1524     return log_msg( ERR_TRASH, "bad leaf pos %d type %d keys %d",
1525     leaf[LL0X_POS], leaf[LL0X_TYPE], leaf[LL0X_OCK] );
1526     ock = leaf[LL0X_OCK];
1527     /* advance to first term which is not too small
1528     (should be needed only for first leaf of an index)
1529     */
1530     for ( j=0;
1531     j<ock && (0 < (ret = memcmp( key,
1532     ((char*)leaf)+leaf[LL0X__FL + j*LL0X__RL + LL0X_KEY], cmplen ))
1533     || (!ret && key!=crs.key) ); /* skip exact while locating */
1534     j++ )
1535     ;
1536     if ( ock == j )
1537     goto reread; /* start over w/ next leaf of same index */
1538     if ( 0 <= ret
1539     || (key!=crs.key && !memcmp( crs.key,
1540     ((char*)leaf)+leaf[LL0X__FL + j*LL0X__RL + LL0X_KEY], crs.klen ))
1541     )
1542     crs.lpos[i] = j;
1543     /* else let -1 */
1544     } /* if reload */
1545     if ( 0 > crs.lpos[i] )
1546     continue;
1547     if ( 0 > idx ) {
1548     idx = i;
1549     continue;
1550     }
1551     /* compare this index next term to that of index idx */
1552     /* assume that index w/ lower number has shorter keys */
1553     ret = memcmp(
1554     ((char*)leaf)+leaf[LL0X__FL + crs.lpos[i]*LL0X__RL + LL0X_KEY],
1555     ((char*)crs.leaf[idx])+
1556     crs.leaf[idx][LL0X__FL + crs.lpos[idx]*LL0X__RL + LL0X_KEY],
1557     db->tlen[idx] );
1558     if ( 0 > ret )
1559     idx = i;
1560     }
1561     if ( 0 > idx )
1562     goto done;
1563     j = crs.lpos[idx];
1564     leaf = crs.leaf[idx];
1565     entry = leaf + LL0X__FL + j*LL0X__RL;
1566     term = ((char*)leaf) + entry[LL0X_KEY];
1567     if ( memcmp( crs.key, term, crs.klen ) )
1568     goto done;
1569     crs.lpos[idx]++;
1570    
1571     if ( rec ) { /* record the term */
1572     /* field to assign */
1573     Field *f = rec->field + rec->len;
1574     short tlen = db->tlen[idx];
1575     /* end of available buffer */
1576     char *b = rec->len
1577     ? (char*)f[-1].val /* before previously assigned field */
1578     : ((char*)rec + rec->bytes); /* end of record */
1579     while ( tlen && ' ' == term[tlen-1] )
1580     tlen--;
1581     b -= tlen;
1582     if ( b < (char*)(f+1) ) /* no space left on device */
1583     goto done;
1584     /* probably we're nuking the locator now: */
1585     memcpy( b, term, tlen );
1586     f->tag = 0;
1587     f->val = b;
1588     f->len = tlen;
1589     rec->len++;
1590     /* reset key from locator to prefix */
1591     key = crs.key;
1592     klen = crs.klen;
1593     }
1594    
1595     if ( ! post && ! lp )
1596     continue;
1597     /* collect postings */
1598     infb = entry[LL0X_INFB];
1599     infp = entry[LL0X_INFP];
1600     /* the IFP file is organized in blocks of 128 longs.
1601     1st int is block number followed by 127 data.
1602     postings are organized in chained segments so that each segment
1603     fits within one such block. a segment has five longs header,
1604     giving number of postings and pointer to next segment.
1605     */
1606     added = 0;
1607     for ( blkno=0; infb; blkno++ ) { /* segments */
1608     LdbP merge[127/2]; /* buffer to collect new postings */
1609     int *base; /* start of data */
1610     int *b; /* start of postings */
1611     int n; /* max postings in this seg's 1st block */
1612     int xlen; /* external length to read */
1613     int f = post ? post->fil - 1 : 0; /* highest pos to consider in given postings */
1614     int m = 0; /* fill merge buffer */
1615     int k; /* loop segment */
1616    
1617     if ( infp > 127-2-5 ) {
1618     return log_msg( ERR_TRASH, "found bad IFP pos %d blk %d for %.*s",
1619     infp, blkno, klen, key );
1620     }
1621     if ( remain ) { /* consecutive block of same segment */
1622     n = remain;
1623     if ( n > 127/2 )
1624     n = 127/2;
1625     xlen = 8*n;
1626     } else {
1627     n = (127 - 5 - infp)/2;
1628     xlen = 20 + 8*n;
1629     }
1630     pos = (infb - 1) * 512 + (infp + 1) * 4;
1631     if ( blockpos
1632     && !((pos-blockpos) >> 9) /* 0 <= (pos-blockpos) < 512 */
1633     && pos+xlen <= blockpos+ 1 + (0x1ff & ~blockpos)
1634     )
1635     base = block + (pos - blockpos)/sizeof(int);
1636     else {
1637     int blklen = 1 + (0x1ff & ~pos);
1638     assert( xlen <= blklen );
1639     assert( blklen <= (int)sizeof(block) );
1640     assert( 0 == (0x1ff & (pos + blklen)) );
1641     ret = readblk( block, blklen, db->inv[INV_IFP], pos );
1642     if ( ret )
1643     return log_msg( ERR_IO, "\twhen reading IFP" );
1644     blockpos = pos;
1645     base = block;
1646     }
1647     if ( remain ) { /* no header to convert */
1648     remain -= n;
1649     b = base; /* no header */
1650     } else {
1651     assert( (int)sizeof(ifp) >= xstr[LSTR_ILEN] );
1652     *ifp = *xstr;
1653     ret = convert( ifp, (char *)base, xstr );
1654     if ( ret )
1655     return log_msg( ERR_TRASH, "\twhen converting IFP header" );
1656     if ( n > ifp[LIFP_SEGP] )
1657     n = ifp[LIFP_SEGP];
1658     remain = ifp[LIFP_SEGP] - n;
1659     b = base+5; /* after header */
1660     }
1661     LOG_DBG( LOG_VERBOSE,
1662     "key %d.%d '%.*s' blk %d post %d/%d r %d xlen %d at b/p %d.%d=%d",
1663     leaf[LL0X_PS], j, db->tlen[idx], term, blkno,
1664     n, ifp[LIFP_TOTP], remain, xlen, infb, infp, pos );
1665     if ( LOG_DO( LOG_TRACE ) )
1666     LOG_STR( ifp, lstrlib[ LSET_INV ].name[ LSTR_IFP ] );
1667     assert( (size_t)n <= sizeof(merge)/sizeof(merge[0]) );
1668     if ( lp ) {
1669     Key kbf;
1670     Hit hit;
1671     unsigned char tlen = (unsigned char) db->tlen[idx];
1672     while ( tlen && ' ' == term[tlen-1] )
1673     tlen--;
1674     memcpy( kbf.byt, term, kbf.len = tlen );
1675     for ( k=0; k<n; k++ ) { /* callback needs 'em sorted */
1676     int ppos;
1677     unsigned char *c = (unsigned char *)&b[k*2];
1678     LdbP e; /* the entry */
1679     #ifdef LDB_BIG_ENDIAN
1680     memcpy(e.bytes,c,8);
1681     #else
1682     e.bytes[0] = c[7]; e.bytes[1] = c[6];
1683     e.bytes[2] = c[5]; e.bytes[3] = c[4];
1684     e.bytes[4] = c[3]; e.bytes[5] = c[2];
1685     e.bytes[6] = c[1]; e.bytes[7] = c[0];
1686     #endif
1687     ppos = LDBP_POS( &e );
1688     hit.mfn = (unsigned)LDBP_ROW( &e );
1689     hit.tag = (unsigned short)LDBP_TAG( &e );
1690     hit.occ = (unsigned short)(ppos >> 16);
1691     hit.pos = (unsigned short)ppos;
1692     if ( lp->cb( lp->me, &kbf, &hit ) )
1693     goto done;
1694     }
1695     }
1696     if ( post ) for ( k=n; k--; ) {
1697     /* loop backwards (for the fun of it) postings in segment */
1698     int prow, ptag, ppos;
1699     unsigned char *c = (unsigned char *)&b[k*2];
1700     LdbP e; /* the entry */
1701     LdbP samerow; /* highest possible entry w/ same row as e */
1702     #ifdef LDB_BIG_ENDIAN
1703     /* the 8 bytes of a posting are BIG ENDIAN ! */
1704     memcpy(e.bytes,c,8);
1705     #else
1706     e.bytes[0] = c[7]; e.bytes[1] = c[6];
1707     e.bytes[2] = c[5]; e.bytes[3] = c[4];
1708     e.bytes[4] = c[3]; e.bytes[5] = c[2];
1709     e.bytes[6] = c[1]; e.bytes[7] = c[0];
1710     #endif
1711     prow = LDBP_ROW( &e );
1712     ptag = LDBP_TAG( &e );
1713     ppos = LDBP_POS( &e );
1714     LOG_DBG( LOG_VERBOSE, "post %d.%hd pos %06x key '%.*s'",
1715     prow, ptag, ppos, db->tlen[idx], term );
1716     if ( 0 >= ptag /* bad tag */
1717     || !prow || prow >= db->mfc[LMFC_NMFN] /* bad mfn */
1718     )
1719     continue;
1720     if ( ! post
1721     || (post->cut && prow >= post->cut)
1722     || (post->tag && post->tag != ptag)
1723     )
1724     continue;
1725     if ( prow < post->skp ) /* quickly bail out on skip condition */
1726     break;
1727     LDBP_SETROWTOP( &samerow, &e ); /* for mfn comparison */
1728     /* sweep down to postings for the same row as e ... */
1729     while ( f >= 0 && LDBP_GT( p+f, &samerow ) )
1730     f--;
1731     if ( LDB_AND & post->mode ) {
1732     int l;
1733     /* loop postings for same row, mark all (that are near enough) */
1734     LDBP_SETROWBOT( &samerow, &e ); /* for mfn comparison */
1735     /* NOTE: postings for row are GT than bottom even if marked */
1736     for ( l = f; l>=0 && LDBP_GT( p+l, &samerow ); l-- ) {
1737     if ( post->near ) {
1738     int dist;
1739     if ( ptag != LDBP_TAG( p+l ) ) continue;
1740     if ( LDB_NEAR_G != post->near ) {
1741     dist = LDBP_POS( p+l ) - LDBP_POS( &e );
1742     if ( dist < 0 ) dist = -dist;
1743     if ( 0 < post->near
1744     ? post->near < dist
1745     : -post->near != dist /* exact $$$$ */
1746     ) continue;
1747     }
1748     }
1749     LDBP_SETMARK( p+l );
1750     added++;
1751     }
1752     } else { /* OR mode */
1753     int add;
1754     if ( ! post->near ) /* add if row not found: ignore details */
1755     add = 0 > f || prow > LDBP_ROW( p+f );
1756     else { /* add if no exact match */
1757     int l;
1758     /* NOTE: we don't use mark bit in OR mode, do we ? */
1759     for ( l = f; l>=0 && LDBP_GT( p+l, &e ); l-- )
1760     ;
1761     add = 0 > l || LDBP_GT( &e, p+l );
1762     }
1763     if ( add )
1764     merge[ m++ ] = e;
1765     }
1766     } /* for postings in segment */
1767     if ( m ) { /* merge in the merge buffer */
1768     LdbP *mm = merge;
1769     added += m;
1770     for ( k = post->fil += m; m && k--; ) {
1771     LdbP src;
1772     if ( k < m || LDBP_GT( mm, &p[k-m] ) ) {
1773     src = *mm++;
1774     m--;
1775     LOG_DBG( LOG_DEBUG, "merging %d at %d", LDBP_ROW(&src), k );
1776     } else
1777     src = p[k-m];
1778     if ( k < post->len )
1779     p[k] = src;
1780     else { /* set cut */
1781     int row = LDBP_ROW( &src );
1782     if ( row < post->cut || !post->cut )
1783     post->cut = row;
1784     }
1785     }
1786     if ( post->fil > post->len )
1787     post->fil = post->len;
1788     if ( post->cut ) /* postings for cut row are unreliable */
1789     while ( post->fil && post->cut <= LDBP_ROW(p+post->fil-1) )
1790     post->fil--;
1791     }
1792     if ( remain ) { /* advance to start of next block */
1793     infb++;
1794     infp = 0;
1795     } else {
1796     infb = ifp[LIFP_NXTB];
1797     infp = ifp[LIFP_NXTP];
1798     }
1799     } /* for segments */
1800     LOG_DBG( LOG_VERBOSE, "added %d postings for key '%.*s'",
1801     added, db->tlen[idx], term );
1802     } /* for terms in prefix/postings mode */
1803     done:
1804     if ( post /* fixup */
1805     && LDB_AND & post->mode && !(LDB_KEEPMARKS & post->mode)
1806     ) {
1807     int mark = LDB_NOT & post->mode ? 0 : 0x8000;
1808     j=0;
1809     for ( i=0; i<post->fil; i++ )
1810     if ( mark == LDBP_MARK(p+i) ) {
1811     LDBP_CLRMARK(p+i);
1812     p[j++] = p[i];
1813     }
1814     post->fil = j;
1815     }
1816     return ! rec ? 0 : rec->len;
1817     } /* search */
1818    
1819    
1820     static int ldb_last_path_sep (const char *path) {
1821     char *p2;
1822     int i2;
1823     #ifdef WIN32
1824     char *p3;
1825     int i3;
1826     #endif
1827     if (! path) {
1828     return -1;
1829     }
1830     p2 = strrchr (path, '/');
1831     i2 = p2 ? p2 - path : -1;
1832     #ifdef WIN32
1833     p3 = strrchr (path, '\\');
1834     i3 = p3 ? p3 - path : -1;
1835     if (i3 > i2) {
1836     i2 = i3;
1837     }
1838     #endif
1839     return i2;
1840     } /* ldb_last_path_sep */
1841    
1842    
1843     static int ldb_open (const char *dbname, Rec *dbpar, Rec *syspar, Fdt *fdt)
1844     {
1845     LDb ndb, *db;
1846     int i, plen, sz, dbid, lck = LIO_TLOCK; /* WLOCK only on special demand */
1847     int ret = 0, invret = -1, lbtret = 0, autoformat = 1, writable = -1;
1848     int uc = -1, gotopt = 0, txtfd = 0, copyidx = 0;
1849     char *autoenc = 0;
1850     char *p, *q;
1851     char buf[65536+1]; /* need 64K buf for copying DO NOT SHRINK !!! */
1852     char path[ PATH_MAX ];
1853    
1854     memset( &ndb, 0, sizeof(ndb) );
1855     /* these should be 0 by memsetting to 0 anyway ... */
1856     ndb.path = 0; ndb.mmap = 0;
1857     ndb.flags |= DB_MMAP; /* it mean's: we'll try */
1858    
1859     /* loglevel */
1860     if ( 0 <= (i = rInt2(dbpar, syspar, OPENISIS_SLOGV, -1)) )
1861     cLog( i, 0 );
1862    
1863     /* prepare name ... */
1864     if (! dbname) {
1865     if (! dbpar)
1866     return log_msg( ERR_FAULT, "ldb_open: dbname not given");
1867     dbname = rString (dbpar, OPENISIS_DNAME, 0, buf, sizeof(buf));
1868     if (! dbname)
1869     return log_msg( ERR_FAULT, "ldb_open: no dbname parameter");
1870     }
1871     plen = strlen (dbname);
1872     if (0 >= plen)
1873     return log_msg( ERR_FAULT, "ldb_open: empty dbname");
1874     if ( 4 < plen ) {
1875     if ( !memcmp( ".mst", dbname+plen-4, 4 ) ) {
1876     uc = 0;
1877     plen -= 4;
1878     } else if ( !memcmp( ".MST", dbname+plen-4, 4 ) ) {
1879     uc = OPEN_UC;
1880     plen -= 4;
1881     }
1882     }
1883     if ( sizeof(buf) <= (unsigned)plen
1884     || sizeof(path) <= (unsigned)(plen + 4 + 1)
1885     )
1886     return log_msg( ERR_FAULT, "ldb_open: dbname too long '%s'", dbname);
1887     if ('/' == dbname[plen - 1]
1888     #ifdef WIN32
1889     || '\\' == dbname[plen - 1]
1890     #endif
1891     )
1892     return log_msg( ERR_FAULT,
1893     "ldb_open: must not specify directory as dbname '%s'", dbname);
1894     if (DBNLEN > plen)
1895     strcpy(ndb.head.name, dbname);
1896     else {
1897     int i1 = 1 + plen - DBNLEN ;
1898     int i2 = ldb_last_path_sep (dbname);
1899     if (0 <= i2 && plen > ++i2 && i2 > i1) {
1900     i1 = i2;
1901     }
1902     strncpy(ndb.head.name, dbname + i1, DBNLEN - 1) [DBNLEN - 1] = 0;
1903     log_msg( LOG_WARN, "ldb_open: truncating dbname '%s' to '%s'",
1904     dbname, ndb.head.name);
1905     }
1906     /* ... and path */
1907     strcpy(path, dbname);
1908     if (! IsAbsPath (path)) {
1909     int plen2;
1910     if ( (dbpar || syspar)
1911     && (p = rString2 (dbpar, syspar, OPENISIS_DPATH, buf, sizeof(buf)))
1912     ) {
1913     plen2 = strlen (p);
1914     if (sizeof(path) <= (unsigned)(plen + plen2 + 4 + 1 + 1))
1915     return log_msg( ERR_FAULT,
1916     "ldb_open: dbname or dbpath too long: %d %d '%s'",
1917     plen, plen2, path);
1918     memmove (path + 1 + plen2, path, 1 + plen);
1919     path[plen2] = '/';
1920     memcpy (path, p, plen2);
1921     plen += 1 + plen2;
1922     }
1923     if ( !IsAbsPath(path)
1924     && syspar
1925     && (p = rString(syspar, OPENISIS_SPATH, 0, buf, sizeof(buf)))
1926     ) {
1927     plen2 = strlen(p);
1928     if (sizeof(path) <= (unsigned)(plen + plen2 + 4 + 1 + 1))
1929     return log_msg( ERR_FAULT,
1930     "ldb_open: dbname or syspath too long: %d %d '%s'",
1931     plen, plen2, path);
1932     memmove(path + 1 + plen2, path, 1 + plen);
1933     path[plen2] = '/';
1934     memcpy(path, p, plen2);
1935     plen += 1 + plen2;
1936     }
1937     } /* name and path */
1938    
1939     /* more init AFTER honoring verbosity */
1940     if ( ! init ) {
1941     lstr_auto(0);
1942     init = !0;
1943     }
1944    
1945     for ( dbid=0; dbid<dbs_len; dbid++ ) {
1946     if ( dbs[dbid].flags &&
1947     !strcmp( ndb.head.name, dbs[dbid].head.name ) ) {
1948     log_msg( LOG_INFO, "reopening %d '%s'", dbid, ndb.head.name );
1949     return dbid;
1950     }
1951     }
1952     /* go for slot */
1953     if ( dbid == dbs_len )
1954     for ( dbid=0; dbid<dbs_len && dbs[dbid].flags; dbid++ )
1955     ;
1956     if ( dbid == dbs_len )
1957     return -1;
1958     db = &dbs[dbid];
1959     /* got slot */
1960     *db = ndb;
1961     db->head.dbid = dbid;
1962    
1963     /* preset record sizes */
1964     db->mfc[0] = *DB_XSTR( db, LSET_MST, LSTR_MFC );
1965     db->xrf[0] = *DB_XSTR( db, LSET_MST, LSTR_XRF );
1966     db->cnt[0][0] =
1967     db->cnt[1][0] = *DB_XSTR( db, LSET_INV, LSTR_CNT );
1968     /* isis-1 index term lengths */
1969     db->tlen[0] = 10;
1970     db->tlen[1] = 30;
1971    
1972     /* only the packed little endian ("DOS") format is writable
1973     test later ...
1974     if ( LVAR_PAC != (DB_VARI & db->flags) )
1975     writable = 0;
1976     */
1977    
1978     db->path = mDup( path, plen+1 ); /* save path */
1979     memcpy( path+plen, ".???", 5 );
1980    
1981     if ( dbpar )
1982     dbpar = rDup(dbpar, 0, 0);
1983     /* check options file and extension case */
1984     if ( 0 <= uc ) /* use case from dbname */
1985     i = lio_open( setext(path,EXT_TXT_OPT,uc), OPEN_RDIF );
1986     else if ( 0 > (i = lio_open( setext(path,EXT_TXT_OPT,uc=0), OPEN_RDIF ))
1987     && 0 > (i = lio_open( setext(path,EXT_TXT_OPT,uc=OPEN_UC), OPEN_RDIF ))
1988     )
1989     uc = autocase( db->path );
1990     if ( 0 < i ) {
1991     if ( 0 < (sz = lio_size(i)) ) {
1992     p = sz < (int)sizeof(buf) ? buf : mAlloc(sz);
1993     if ( (gotopt = (sz == lio_read( &i, p, sz ))) )
1994     rDeser( &dbpar, p, sz, 0 );
1995     log_msg( LOG_INFO, "reading %d bytes options from '%s' %s",
1996     sz, path, gotopt ? "ok" : "nok" );
1997     if ( buf != p )
1998     mFree( p );
1999     }
2000     lio_close( &i, LIO_INOUT );
2001     }
2002     lck |= uc;
2003    
2004     if ( (dbpar || syspar) && 0 <= (i = rInt2(dbpar, syspar, OPENISIS_DRO, -1)))
2005     writable = !i; /* explicit 0/1 */
2006    
2007     /* open files */
2008     /* trad. index is never openend writable. */
2009     invret = openfiles( db->inv, path, EXT_INV, INV_FILES, uc|OPEN_RDIF );
2010     if (dbpar || syspar) {
2011     char fmtstr[32];
2012     if (rString2 (dbpar, syspar, OPENISIS_DTYPE, fmtstr, sizeof(fmtstr))) {
2013     if (! strcmp ("aligned", fmtstr)) {
2014     db->flags |= LVAR_ALI;
2015     autoformat = 0;
2016     } else if (! strcmp ("naligned", fmtstr))
2017     autoformat = 0;
2018     }
2019     }
2020     if ( autoformat ) {
2021     if ( invret )
2022     log_msg( LOG_WARN, "cannot guess format -- no inverted file" );
2023     else {
2024     unsigned len = lio_size( db->inv[INV_CNT] );
2025     if ( 56L == len ) {
2026     db->flags |= LVAR_ALI;
2027     autoenc = "iso8859-1";
2028     /* writable = 0; we do not write aligned format */
2029     } else if ( 52L == len )
2030     autoenc = "cp850";
2031     else
2032     log_msg( LOG_WARN, "cannot guess format -- bad .cnt len %d", len );
2033     log_msg( LOG_INFO, "using autoformat %saligned for .cnt len %d",
2034     (db->flags & LVAR_ALI) ? "":"un", len );
2035     }
2036     }
2037    
2038     /* data */
2039     #ifdef NOTXTDB
2040     if ( !(ret = openfiles( db->mst, path, EXT_MST, MST_FILES,
2041     lck|OPEN_ASIS|LIO_CREAT ))
2042     )
2043     writable = 0;
2044     else if (0 > ret)
2045     #else
2046     if ( 0 <= (ret = openfiles( &txtfd, path, EXT_TXT, 1,
2047     lck|LIO_SYNC|(writable?OPEN_ASIS:OPEN_RDIF) ))
2048     ) { /* .txt exists: use it */
2049     if ( ret )
2050     writable = 1;
2051     else if (1 == writable) {
2052     log_msg( LOG_ERROR, "file '%s' is readonly", path );
2053     goto cleanup;
2054     } else
2055     writable = 0;
2056     } else if (
2057     0 <= (ret = openfiles( db->mst, path, EXT_MST, MST_FILES,
2058     lck|((writable && !(db->flags & LVAR_ALI))?OPEN_ASIS:OPEN_RDIF) ))
2059     && (ret || 1!=writable)
2060     ) { /* trad. files are ok */
2061     if ( !ret )
2062     writable = 0;
2063     } else if ( 1 != (ret = openfiles( &txtfd, path, EXT_TXT, 1,
2064     lck|(ret ? LIO_SYNC : 0)|OPEN_NEW )) ) /* don't sync on autoconv */
2065     #endif
2066     goto cleanup;
2067    
2068     /* MW: creation mode? KR: ugo+rw & ~umask */
2069     if ( 1 == (lbtret = openfiles( &db->oxi.fd, path, EXT_LBT, 1,
2070     lck|(writable?OPEN_ASIS:OPEN_RDIF) ))
2071     )
2072     lbtret = 0;
2073     else if ( !writable )
2074     ;/* no problem */
2075     else if ( !lbtret ) { /* exists ro */
2076     log_msg( LOG_ERROR, "file '%s' is readonly", path );
2077     goto cleanup;
2078     } else { /* create and copy to oxi */
2079     if ( 1 != openfiles( &db->oxi.fd, path, EXT_LBT, 1, lck|OPEN_NEW ) )
2080     goto cleanup;
2081     lbtret = 0;
2082     copyidx = 1;
2083     }
2084    
2085     if ( db->mst[MST_MST] ) { /* care for the traditionals */
2086     if ( (ret = readlog(
2087     db->mfc, db->mst[MST_MST], 0, db, LSET_MST, LSTR_MFC ))
2088     ) {
2089     /* NEW goto cleanup; */
2090     memset( db->mfc, 0, sizeof(db->mfc) );
2091     db->mfc[LMFC_NMFN] = 1;
2092     db->mfc[LMFC_NMFB] = 1;
2093     db->mfc[LMFC_NMFP] = 64;
2094     db->mflen = 64;
2095     } else {
2096     /*
2097     int lastblock = (db->mflen = lio_size( db->mst[MST_MST] ))/512;
2098     if ( 511 & db->mflen ) lastblock++;
2099     counting from 1
2100     the next record's block should be either the last one we have
2101     or the next one to follow
2102     if ( db->mfc[LMFC_NMFB] != lastblock
2103     && db->mfc[LMFC_NMFB] != lastblock+1
2104     )
2105     log_msg( LOG_VERBOSE, "NMFB mismatch: NMFB %d ~ %d",
2106     db->mfc[LMFC_NMFB], lastblock );
2107     */
2108     /* set LOGICAL mf length */
2109     db->mflen = (db->mfc[LMFC_NMFB]-1)*512 + db->mfc[LMFC_NMFP];
2110     }
2111     db->ptrl = 512;
2112     db->xrlen = lio_size( db->mst[MST_XRF] ) / 512;
2113     if ( (DB_MMAP & db->flags)
2114     && db->xrlen
2115     && db->xrlen*512
2116     == lio_mmap( &db->mst[MST_XRF], (void**)&db->mmap, db->xrlen*512 )
2117     )
2118     db->mmlen = db->xrlen;
2119     }
2120    
2121     if ( txtfd ) {
2122     int remake = 0;
2123     /* TODO: make on-demand preparation even faster using buffered IO */
2124     if ( !lio_size(txtfd) ) {
2125     const char newline = LF;
2126     if ( gotopt
2127     && 0 < (i = lio_open( setext(path,EXT_TXT_OPT,uc), LIO_RD ))
2128     ) { /* copy the options file */
2129     log_msg( LOG_INFO, "copying %d bytes options", lio_size(i) );
2130     while ( 0 < (sz = lio_read( &i, buf, sizeof(buf)-1 )) )
2131     lio_write( &txtfd, buf, sz );
2132     if ( LIO_INOUT & i ) { /* is supposed to autoclose */
2133     log_msg( LOG_WARN, "tss tss tss ..." );
2134     lio_close( &i, LIO_INOUT );
2135     }
2136     }
2137     lio_write( &txtfd, &newline, 1 );
2138     }
2139    
2140     if ( db->mst[MST_MST] ) { /* copy to new empty txt */
2141     int end = db->mfc[LMFC_NMFN];
2142     /*
2143     max recsize for traditionals is 32K.
2144     field values may double, if consisting entirely of newlines.
2145     rec->used may be more than 32K, since we 12 bytes per field.
2146     However, we know there are only sign+5digits+tab+newline used per tag,
2147     fitting within 2* the original 6 bytes per field.
2148     */
2149    
2150     log_msg( LOG_INFO, "copying traditional data" );
2151     db->flags |= DB_OPEN; /* pretend */
2152     for ( i=1; i<end; i++ ) {
2153     Rec *r = dRead( dbid, i );
2154     if ( !r )
2155     sz = 1;
2156     else if ( (int)sizeof(buf) <= (sz = rSerB( buf, r )) ) {
2157     log_msg( ERR_IDIOT, "serialized %d bytes" );
2158     exit(42);
2159     }
2160     lio_write( &txtfd, buf, sz );
2161     }
2162     db->flags &= ~DB_OPEN; /* pret end */
2163     remake = 1;
2164     if ( db->mmap )
2165     lio_mmap( 0, (void**)&db->mmap, db->mmlen*512 );
2166     db->mmlen = 0;
2167     closefiles( db->mst, MST_FILES );
2168     } /* copying */
2169     db->mst[MST_MST] = txtfd;
2170     db->mflen = lio_size( db->mst[MST_MST] );
2171    
2172     db->ptr = 0x0134; /* should be config opt */
2173     if ( !remake ) { /* other reasons why we should remake */
2174     unsigned short ptr;
2175     unsigned isix = GETINT(ISIX);
2176     unsigned magic;
2177    
2178     remake = 1;
2179     if ( 0 > (db->mst[MST_XRF] = lio_open( setext(path,EXT_TXT_PTR,uc),
2180     LIO_SEEK|(writable?LIO_RDWR:LIO_RD) ))
2181     )
2182     log_msg( LOG_INFO, "'%s' not found", path );
2183     else if ( 6 != lio_read(&db->mst[MST_XRF],buf,6) )
2184     log_msg( LOG_WARN, "'%s' too short", path );
2185     else if ( isix != (magic = GETINT(buf)) ) /* FOO! */
2186     log_msg( LOG_WARN, "'%s' has black magic 0x%08x", path, magic );
2187     /* TODO: save that foo if it doesn't read ISIX ? */
2188     else if ( 0xf000 & (ptr = GETSHORT(buf+4)) ) /* bad endianess */
2189     log_msg( LOG_WARN, "'%s' has bad endianess type 0x%04x", path, ptr );
2190     else if ( (db->ptr && db->ptr != ptr) ) /* other type configured */
2191     log_msg( LOG_WARN, "'%s' type 0x%04x != cfg 0x%04x", path, ptr, db->ptr );
2192     else if ( lio_time(db->mst[MST_XRF]) < lio_time(db->mst[MST_MST]) )
2193     log_msg( LOG_WARN, "'%s' older than data", path );
2194     else {
2195     db->ptr = ptr;
2196     remake = 0;
2197     }
2198     }
2199     if ( ! db->ptr ) {
2200     db->ptr = 0x0134; /* m*256 + l*16 + k, doc/Serialized */
2201     /* BTW: 0x34 is ASCII digit '4', so it's ISIX4^A on little endian */
2202     db->ptrl = 8;
2203     } else { /* fix unsupported type */
2204     unsigned m = 0xf&(db->ptr>>8);
2205     unsigned l = 0xf&(db->ptr>>4);
2206     unsigned k = 0xf&db->ptr;
2207     int mod = 0;
2208     if ( m > 4 ) { m = 4; mod = 1; }
2209     if ( l > 4 ) { l = 4; mod = 1; }
2210     if ( k > 4 ) { k = 4; mod = 1; } /* TODO: allow 8 with large files */
2211     /* total ptr bytes = sum(nibbles) <= 45, but won't use more than 8+4+4 */
2212     if ( mod ) {
2213     log_msg( LOG_WARN, "fixing unsupported ptr type 0x%04x", db->ptr );
2214     db->ptr = (unsigned short)(m<<8 | l<<4 | k);
2215     remake = 1;
2216     }
2217     db->ptrl = k+l+m;
2218     }
2219     if ( remake ) {
2220     Ptr pt;
2221     unsigned base = 0; /* of current block */
2222     unsigned pos = 0; /* of last record */
2223     unsigned fld = 0; /* of last record */
2224     unsigned nmfn = 0; /* next mfn = maxmfn+1 */
2225     unsigned xmfn = 0; /* explicitly given */
2226     char op = 0;
2227     int more; /* buf not empty flag */
2228     char *last; /* of current block */
2229    
2230     lio_close( &db->mst[MST_XRF], LIO_INOUT );
2231     if ( 0 > (db->mst[MST_XRF] = lio_open(
2232     setext(path,EXT_TXT_PTR,uc), OPEN_BLANK ))
2233     )
2234     goto cleanup;
2235     /* write signature */
2236     memcpy( pt.r, "ISIX", 4 );
2237     memcpy( pt.r+4, &db->ptr, 2 );
2238     memcpy( pt.r+6, ":)", 2 );
2239     if ( 8 < db->ptrl )
2240     memset( pt.r+8, ')', db->ptrl - 8 );
2241     lio_pwrite( &db->mst[MST_XRF], pt.r, db->ptrl, 0 );
2242     /* loop the masterfile */
2243     lio_seek( &db->mst[MST_MST], 0 );
2244     last = (p = buf) + lio_read( &db->mst[MST_MST], buf, 8192 ) - 1;
2245     more = last > buf; /* one byte is no byte ;) */
2246     if ( more && LF == *p ) { /* no options: no \n\n */
2247     nmfn = pos = 1;
2248     p++;
2249     }
2250     for (;;) { /* records */
2251     unsigned len, mfn;
2252     for (;;) { /* lines and stuff to end of record */
2253     if ( p < last ) { /* have one lookahead */
2254     if ( LF != *p++ )
2255     continue; /* the tight loop ... or use memchr ? */
2256     if ( LF != *p ) { /* now p <= last */
2257     if ( fld || !(0xc0 & *p) ) { /* < '@', 'A', ... */
2258     if ( TAB != *p ) /* no continuation */
2259     fld++;
2260     continue;
2261     }
2262     fld++; /* count field, unless we really recognize a opline */
2263     if ( 'Z' < *p )
2264     continue;
2265     /* now we have '@'...'Z' at start of 1st line */
2266     sz = last - p; /* avail after p */
2267     if ( sz && TAB != p[1] ) /* no opline */
2268     continue;
2269     switch (*p) {
2270     case 'D':
2271     case 'I':
2272     case 'W':
2273     break; /* give it a try */
2274     default:
2275     log_msg( LOG_WARN, "unknown opline %c at mfn %d", *p, nmfn );
2276     continue;
2277     }
2278     if ( sz > 127 ) /* longer -> no opline */
2279     sz = 127;
2280     if ( ! sz || ! (q = memchr(p+1, LF, sz)) ) {
2281     if ( sz >= 127 || ! more )
2282     continue; /* too long or undelimited last */
2283     p--; /* back to \n, so we come here again */
2284     goto gimmemore;
2285     }
2286     if ( q < p+3 || p[2] < '0' || '9' < p[2] )
2287     continue;
2288     /* TODO:
2289     take a closer look at whether the whole line makes sense
2290     */
2291     if ( op ) { /* yeah, two metas in sequence! weird stuff! */
2292     p--; /* step back to newline */
2293     pos = base+(p-buf); /* fake pos as if we had no line at all */
2294     break; /* go handle the PREVIOUS opline */
2295     }
2296     op = *p;
2297     xmfn = a2i( p+2, q-p-2 );
2298     fld--; /* uncount this line */
2299     pos = base + (q-buf) + 1; /* start after q */
2300     continue;
2301     }
2302     break;
2303     }
2304     gimmemore:
2305     LOG_DBG( LOG_DEBUG, "MORE %d at pos %d base %d p +%d last +%d",
2306     more, pos, base, p-buf, last-buf );
2307     if ( !more )
2308     goto schicht; /* german: done */
2309     base += p - buf; /* shift out bytes before p */
2310     len = last-p; /* bytes to keep after p; < 128 */
2311     if ( len ) /* we're probing for more lookahead */
2312     memmove( buf, p, 1+last-p );
2313     else /* typically */
2314     *buf = *p; /* but save the last dance */
2315     p = buf;
2316     last = buf + len;
2317     /* reload */
2318     if ( 0 < (sz = lio_read( &db->mst[MST_MST], buf+1+len, 8192 )) ) {
2319     last += sz;
2320     continue;
2321     }
2322     more = 0; /* but yet, finish this up */
2323     /* since *buf = *last was the files last character,
2324     we'd expect a newline
2325     */
2326     if ( last == p )
2327     p = buf+(LF==*buf ? 1 : 2); /* pretend buf started \n */
2328     if ( ! len )
2329     break;
2330     /* else try again opline */
2331     } /* lines and stuff */
2332     /* now p is on a delimiting blank lines \n -- or such ... */
2333     len = base + (p-buf) - pos; /* >= 0 */
2334     mfn = xmfn ? xmfn : nmfn;
2335     log_msg( LOG_INFO, "ptr %c %d(%d/%d) pos %d len %d",
2336     op?op:'>', mfn, xmfn, nmfn, pos, len );
2337     if ( base + (p-buf) < pos ) /* FOO !!! */
2338     len = 0;
2339     if ( len ) /* could have been completely empty */
2340     len--; /* mute last \n */
2341     if ( 'D' == op && len ) /* FOO !!! */
2342     len = 0;
2343     if ( mfn && (len || op) )
2344     lio_pwrite( &db->mst[MST_XRF],
2345     mkptr( pt.r, db, pos, len, fld), db->ptrl, mfn*db->ptrl );
2346     pos = base + (p-buf) + 1; /* next starts after p */
2347     if ( 'D' != op ) { /* 'D'elete does not lead to implicit reuse */
2348     if ( op && nmfn < xmfn )
2349     nmfn = xmfn;
2350     nmfn++; /* continue after this */
2351     }
2352     xmfn = fld = op = 0;
2353     }
2354     schicht: ;
2355     } /* remake */
2356     db->mfc[LMFC_NMFN] =
2357     db->xrlen = lio_size( db->mst[MST_XRF] ) / db->ptrl;
2358     if ( (DB_MMAP & db->flags)
2359     && db->xrlen
2360     && db->xrlen*db->ptrl
2361     == lio_mmap( &db->mst[MST_XRF], (void**)&db->mmap, db->xrlen*db->ptrl )
2362     )
2363     db->mmlen = db->xrlen;
2364     log_msg( LOG_INFO, "mapped %d*%d = %d",
2365     db->xrlen, db->ptrl, db->xrlen*db->ptrl );
2366     db->flags |= DB_TXTOPEN;
2367     db->flags &= ~DB_VARI; /* clear alignment and such */
2368     } /* if ( txtfd ) */
2369    
2370     /* supporting files, ctables */
2371     p = buf;
2372     if ( 0 >= (sz = lio_slurp( &p, sizeof(buf), setext(path,EXT_SUP_ACT,uc), 1 ))
2373     || lcs_mktab( db->ctab+LCS_CTYPE, p, sz, LCS_A )
2374     )
2375     memcpy( db->ctab+LCS_CTYPE, lcs_latin1_ct, sizeof(db->ctab[0]) );
2376     if ( 0 >= (sz = lio_slurp( &p, sizeof(buf), setext(path,EXT_SUP_UCT,uc), 1 ))
2377     || lcs_mktab( db->ctab+LCS_UCASE, p, sz, 0 )
2378     )
2379     memcpy( db->ctab+LCS_UCASE, lcs_latin1_uc, sizeof(db->ctab[0]) );
2380     /* fill header */
2381    
2382     if (! fdt) {
2383     if ( (p = rString (dbpar, OPENISIS_DFDT, 0, buf, sizeof(buf))) ) {
2384     Rec *recfdt = 0;
2385     Db *dbfdt = nDbByName (openisis_stub0, p);
2386     if ( dbfdt)
2387     recfdt = dRead (dbfdt->dbid, 1);
2388     else {
2389     int idfdt = ldb_open (p, 0, syspar, 0);
2390     if (0 <= idfdt) {
2391     recfdt = dRead (idfdt, 1); /*MMM*/
2392     cDClose (idfdt);
2393     }
2394     }
2395     if (recfdt)
2396     fdt = fRec2Fdt (recfdt);
2397     } else if ( gotopt )
2398     fdt = fRec2Fdt(dbpar);
2399     if (! fdt)
2400     fdt = fFromFile (path);
2401     }
2402     db->head.fdt = fdt;
2403     if (fdt)
2404     log_msg( LOG_INFO, "have %d fdt entries for %s",
2405     fdt->len, db->head.name);
2406     else
2407     log_msg( LOG_INFO, "have no fdt for %s", db->head.name);
2408    
2409     db->head.tms = timeUpd(0); /* what watch? */
2410     log_msg( LOG_INFO, "tms %d for %s", db->head.tms, db->head.name);
2411    
2412    
2413     /* set path and name */
2414     if (0 <= (i = ldb_last_path_sep (db->path))) {
2415     if (i)
2416     strncpy(path, db->path, i)[i] = 0;
2417     else
2418     strcpy (path, "/");
2419     dbpar = rSet (dbpar, RCHG | RDIS, OPENISIS_DPATH, path, 0);
2420     }
2421     dbpar = rSet (dbpar, RCHG | RDIS, OPENISIS_DNAME, db->head.name, 0);
2422    
2423     /* set encoding */
2424     if (!(p = rString (dbpar, OPENISIS_DENC, 0, buf, sizeof(buf))))
2425     if ( (syspar
2426     && (p = rString (syspar, OPENISIS_DENC, 0, buf, sizeof(buf))))
2427     || (p = autoenc)
2428     )
2429     dbpar = rSet(dbpar, RDIS, OPENISIS_DENC, p, 0);
2430     if ( p )
2431     log_msg( LOG_INFO, "using encoding %s for %s", p, db->head.name);
2432    
2433     db->head.cfg = dbpar;
2434    
2435     /* done */
2436     db->flags |= DB_OPEN;
2437    
2438     if ( writable && LVAR_PAC == (DB_VARI & db->flags) )
2439     db->flags |= DB_WRITABLE;
2440    
2441     /*
2442     if ( (dbpar || syspar) && 0 < rInt2(dbpar, syspar, OPENISIS_DDUMP, -1) ) {
2443     int off = 0;
2444     int *r;
2445     do {
2446     if ( (r = ldb_readRecAtOff(dbid,off,&off)) )
2447     mFree( r );
2448     } while ( 0 < off );
2449     exit(0);
2450     }
2451     */
2452    
2453     /* init oxi */
2454     if ( writable )
2455     db->oxi.flg |= LBT_WRITE;
2456     if ( (p = getenv("OXITYP")) && 0 < (i = atoi(p)) && 4 > i )
2457     db->oxi.typ = i << 4;
2458     if ( !lbtret && !lbt_init( &db->oxi ) )
2459     db->flags |= DB_LBTOPEN;
2460    
2461     if ( ! invret
2462     && ! (ret = readlog( db->cnt[0], db->inv[INV_CNT],
2463     0, db, LSET_INV, LSTR_CNT ))
2464     && ! (ret = readlog( db->cnt[1], db->inv[INV_CNT],
2465     -1, db, LSET_INV, LSTR_CNT ))
2466     ) {
2467     if ( lbtret )
2468     db->flags |= DB_INVOPEN;
2469     else {
2470     if ( copyidx ) {
2471     DXLoop l;
2472     log_msg( LOG_INFO, "copying traditional index" );
2473     lbtret = 0;
2474     memset( &l, 0, sizeof(l) );
2475     l.me = & db->oxi;
2476     l.cb = (DXCb*)cXAdd;
2477     lbt_batch( & db->oxi, 5 );
2478     search( db, 0, 0, 0, &l );
2479     cXAdd( & db->oxi, 0, 0 );
2480     }
2481     closefiles( db->inv, INV_FILES );
2482     }
2483     }
2484    
2485     return dbid;
2486    
2487     cleanup:
2488     /* cleanup ... */
2489     db->flags = 0;
2490     closefiles( &db->oxi.fd, 1 );
2491     closefiles( &txtfd, 1 );
2492     closefiles( db->inv, INV_FILES );
2493     closefiles( db->mst, MST_FILES );
2494     return 0 > ret ? ret : ret ? -ret : -1;
2495     } /* ldb_open */
2496    
2497    
2498     /* ************************************************************
2499     package data
2500     */
2501    
2502    
2503    
2504     /* ************************************************************
2505     package functions
2506     */
2507    
2508     int *ldb_readRecAtOff ( int dbid, lxref off, int *nxtoff )
2509     {
2510     int *rec;
2511     LDb *db = getDb( dbid );
2512     if ( ! db ) {
2513     log_msg( LOG_ERROR, "\tat ldb_readRecAtOff" );
2514     return 0;
2515     }
2516     if ( 0 == off )
2517     off = 64;
2518     rec = getMfr( db, off, nxtoff );
2519     if ( ! rec )
2520     return 0;
2521     LOG_DBG( LOG_VERBOSE, "db %d off %d: got %hd bytes",
2522     dbid, off, !rec ? -1 : rec[LMFR_RECL] );
2523     return rec;
2524     } /* ldb_readRecAtOff */
2525    
2526    
2527    
2528     int ldb_search ( int dbid, const char *key, LdbPost *post, Rec *rec )
2529     {
2530     LDb *db = getDb( dbid );
2531     Key k;
2532    
2533     if ( ! db )
2534     return -ERR_BADF;
2535     if ( post ) { /* prepare for postings */
2536     if ( ! post->len )
2537     post->len = sizeof(post->p)/sizeof(post->p[0]); /* standard length */
2538     if ( LDB_NOT & post->mode )
2539     post->mode |= LDB_AND;
2540     }
2541     if ( DB_INVOPEN & db->flags )
2542     return search( db, key, post, rec, 0 );
2543     if ( !(DB_LBTOPEN & db->flags) )
2544     return -ERR_BADF;
2545     if ( db->oxi.bat )
2546     return -ERR_BUSY;
2547     memset( &k, 0, sizeof(k) );
2548     if ( ! key ) {
2549     k.byt[0] = '$';
2550     k.len = 1;
2551     } else {
2552     unsigned char *uk = (unsigned char*)key;
2553     int l = strlen( key );
2554     if ( l > 255 )
2555     l = 255;
2556     k.len = (unsigned char)l;
2557     while ( l-- )
2558     k.byt[l] = db->ctab[LCS_UCASE].c[ uk[l] ];
2559     }
2560     return lbt_search( &db->oxi, &k, post, rec );
2561     } /* ldb_search */
2562    
2563    
2564     int ldb_p2s ( Set *set, LdbPost *post )
2565     {
2566     int *s = set->id;
2567     int last=0, max = set->len;
2568     int i;
2569     set->len = 0;
2570     if ( ! max )
2571     max = OPENISIS_SETLEN;
2572     max--;
2573     if ( !post->fil )
2574     return 0L;
2575     s[0] = LDBP_ROW(post->p);
2576     for ( i=1; i<post->fil && last < max; i++ ) {
2577     int row = LDBP_ROW(post->p+i);
2578     if ( s[last] != row )
2579     s[++last] = row;
2580     }
2581     return set->len = last+1;
2582     } /* ldb_p2s */
2583    
2584    
2585     #if 0
2586     LcsTab *ldb_tabs( int dbid )
2587     {
2588     LDb *db = getDb( dbid );
2589     return ! db ? 0 : db->ctab;
2590     } /* ldb_tabs */
2591     #endif
2592    
2593    
2594     Db *ldb_getdb (int dbid) {
2595     LDb *db = getDb (dbid);
2596     return db ? &db->head : 0;
2597     }
2598    
2599     /* ************************************************************
2600     public functions
2601     */
2602     int dMaxId ( int dbid )
2603     {
2604     LDb *db = getDb( dbid );
2605     if ( ! db )
2606     return -ERR_BADF;
2607     return db->mfc[LMFC_NMFN] - 1;
2608     } /* dMaxId */
2609    
2610    
2611     Raw *dRaw ( int dbid, int rowid )
2612     {
2613     int off;
2614     int *rec = 0;
2615     LDb *db;
2616    
2617     if ( LIO_LOCK() ) return 0;
2618     db = getDb( dbid );
2619     if ( ! db ) {
2620     log_msg( LOG_ERROR, "\tat openIsisReadRaw %d", rowid );
2621     goto done;
2622     }
2623     off = getOff( db, rowid, 0 );
2624     log_msg( LOG_INFO, "found xref 0x%08x for %d", off, rowid );
2625     if ( 0 >= off ) {
2626     log_msg( LOG_INFO, "found deleted xref 0x%08x for %d", off, rowid );
2627     goto done;
2628     }
2629     rec = getMfr( db, off, 0 );
2630     if ( ! rec ) {
2631     log_msg( LOG_WARN, "\tno record at %d rowid %d", off, rowid );
2632     goto done;
2633     }
2634     LOG_DBG( LOG_VERBOSE, "db %d row %d: got %hd bytes",
2635     dbid, rowid, !rec ? -1 : rec[LMFR_RECL] );
2636     if ( rec[LMFR_MFN] != rowid ) {
2637     log_msg( LOG_ERROR, "got mfn %d expected %d", rec[LMFR_MFN], rowid );
2638     mFree( rec );
2639     rec = 0;
2640     goto done;
2641     }
2642     done:
2643     (void)LIO_RELE();
2644     return (Raw*)rec;
2645     } /* dRaw */
2646    
2647    
2648     Rec *dRead ( int dbid, int rowid )
2649     {
2650     LDb *db = getDb( dbid );
2651     Rec *r;
2652     if ( DB_TXTOPEN & db->flags )
2653     return dText( db, rowid, 0 );
2654     if ( (r = (Rec *) dRaw( dbid, rowid )) ) {
2655     char * base = (char*)r;
2656     Field *f = r->field;
2657     int i = r->len;
2658     for ( ; i--; f++ )
2659     f->val = base + (int)f->val;
2660     assert( RECOK( r ) );
2661     }
2662     return r;
2663     } /* dRead */
2664    
2665    
2666     int dWritex ( int dbid, Rec *rec, Rec *idx )
2667     {
2668     LDb *db = getDb( dbid );
2669     int ret = 0;
2670    
2671     if ( ! db )
2672     return -ERR_BADF;
2673     if ( !(DB_WRITABLE & db->flags) )
2674     return log_msg( ERR_INVAL, "db %d not writable", dbid );
2675     if ( rec && (ret =
2676     DB_TXTOPEN & db->flags ? pText( db, rec, 0 ) : putRec( db, rec )
2677     ) )
2678     return ret;
2679     if ( idx ) {
2680     const unsigned char *const uc = db->ctab[LCS_UCASE].c;
2681     int delmode = 0;
2682     int tag = -1;
2683     int mode = 'f'; /* 'w', 's' */
2684     int occ = 0;
2685     int pos = 0;
2686     int cut = 30;
2687     int mfn = rec ? rec->rowid : 0;
2688     Hit h;
2689     Key k;
2690     Field *f = idx->field, *last = f + idx->len - 1;
2691    
2692     for ( ; f <= last; f++ ) {
2693     const char *val = f->val;
2694     int len = f->len;
2695     int del = delmode;
2696    
2697     k.val.len = 0;
2698     switch ( f->tag ) {
2699     case XCTL: { /* index cmd [opt] */
2700     const char *cmd = val, *e = val + len;
2701     int cmdlen, opt = 0, haveopt;
2702     while ( val < e && 64 < *val ) /* eat ASCII letters */
2703     val++;
2704     cmdlen = val - cmd;
2705     if ( val < e && (TAB == *val || ' ' == *val) )
2706     val++;
2707     haveopt = val < e && a2il( val, e-val, &opt );
2708     if ( ! cmdlen ) {
2709     cut = haveopt ? opt : 30;
2710     continue;
2711     }
2712     switch (*cmd) {
2713     case 'f': /* fields */
2714     mode = 'f';
2715     occ = opt;
2716     pos = 0;
2717     continue;
2718     case 'w': /* words */
2719     mode = 'w';
2720     pos = opt;
2721     continue;
2722     case 's': /* split */
2723     mode = 's';
2724     pos = opt;
2725     continue;
2726     case 'a': /* add */
2727     delmode = 0;
2728     occ = pos = 0;
2729     continue;
2730     case 'd': /* del */
2731     delmode = 1;
2732     occ = pos = 0;
2733     continue;
2734     case 'm': /* mfn */
2735     mfn = opt;
2736     occ = pos = 0;
2737     continue;
2738     }
2739     return log_msg( ERR_INVAL, "bad index control '%.*s'", cmdlen, cmd );
2740     }
2741     case XHIT: {
2742     int i = 0, v[5], *pv = v;
2743     const char *e = val + len;
2744     if ( len )
2745     switch (*val) {
2746     case '+': del = 0; val++; break;
2747     case '-': del = 1; val++; break;
2748     }
2749     for ( ; val < e && i<5; i++ ) {
2750     int dig = a2il( val, e-val, v+i );
2751     val += dig;
2752     if ( val >= e || TAB == *val )
2753     break;
2754     if ( '.' != *val )
2755     return log_msg( ERR_INVAL,
2756     "bad HIT '%.*s' after %d", e-val, val, v[i] );
2757     val++;
2758     }
2759     h.dbn = 0;
2760     h.mfn = mfn;
2761     h.pos = pos;
2762     h.occ = occ;
2763     h.tag = tag;
2764     switch ( i ) {
2765     case 5: h.dbn = (unsigned short)*pv++;
2766     case 4: h.mfn = (unsigned)*pv++;
2767     case 3: h.pos = (unsigned short)pv[2];
2768     case 2: h.occ = (unsigned short)pv[1];
2769     case 1: h.tag = (unsigned short)pv[0];
2770     /* case 0: ! f->len */
2771     }
2772     if ( val < e && TAB == *val )
2773     val++;
2774     len = e - val;
2775     } break; /* case XHIT */
2776     case XFST:
2777     return log_msg( ERR_IDIOT, "sorry, XFST not implemented" );
2778     #if 0
2779     case XADD: /* binary key */
2780     /* if ( f->len < db->oxi.vsz )
2781     memset( k.val.byt, 0, db->oxi.vsz - f->len );
2782     */
2783     memcpy( k.val.byt
2784     + (f->len < (int)db->oxi.vsz ? (int)db->oxi.vsz - f->len : 0),
2785     f->val, f->len > (int)db->oxi.vsz ? (int)db->oxi.vsz : f->len );
2786     k.val.len = db->oxi.vsz;
2787     break;
2788     #endif
2789     default:
2790     if ( 0 > f->tag )
2791     return log_msg( ERR_INVAL, "bad index control tag %d", f->tag );
2792     switch ( mode ) { /* check for tag change */
2793     case 'f':
2794     if ( tag == f->tag )
2795     occ++;
2796     else
2797     occ = 0;
2798     break;
2799     case 'w':
2800     if ( tag == f->tag )
2801     pos++;
2802     else
2803     pos = 0;
2804     break;
2805     }
2806     tag = f->tag;
2807     h.dbn = 0;
2808     h.mfn = mfn;
2809     h.pos = pos;
2810     h.occ = occ;
2811     h.tag = f->tag;
2812     }
2813     if ( ! k.val.len ) { /* not ADD/DEL: use hit, val */
2814     unsigned char *dst = k.byt;
2815     const unsigned char *src = (const unsigned char *)val;
2816     if ( cut < len )
2817     len = cut;
2818     k.len = (unsigned char)len;
2819     while ( len-- )
2820     *dst++ = uc[ *src++ ];
2821     cXMkVal( &db->oxi, &k.val, &h );
2822     LOG_DBG( LOG_DEBUG, "#%d %c key '%.*s' hit %d.%d.%d.%d.%d",
2823     f - idx->field, del ? '-' : '+', k.len, k.byt,
2824     h.dbn, h.mfn, h.tag, h.occ, h.pos );
2825     }
2826     ret = del ? lbt_del( &db->oxi, &k ) : lbt_add( &db->oxi, &k );
2827     }
2828     }
2829     return ret;
2830     } /* dWritex */
2831    
2832    
2833     int dWrite ( int dbid, Rec *rec )
2834     {
2835     /* TODO: use FST lines as idx */
2836     return dWritex( dbid, rec, 0 );
2837     } /* dWrite */
2838    
2839    
2840     Rec* dTerm ( Rec *rec, int dbid, const char *key )
2841     {
2842     return 0 > ldb_search( dbid, key, 0, rec ) ? 0 : rec;
2843     } /* dTerm */
2844    
2845    
2846     int dXLoop ( int dbid, DXLoop *l )
2847     {
2848     LDb *db = getDb( dbid );
2849    
2850     if ( !db )
2851     return -ERR_BADF;
2852     if ( OPENISIS_IDXTRAD & l->flg ) {
2853     if ( !(db->flags & DB_INVOPEN) )
2854     return -ERR_BUSY;
2855     return search( db, 0, 0, 0, l );
2856     }
2857     if ( !(db->flags & DB_LBTOPEN) || db->oxi.bat )
2858     return -ERR_BUSY;
2859     return lbt_loop( & db->oxi, l );
2860     } /* dXLoop */
2861    
2862    
2863    
2864     int cInit ( int argc, const char **argv, CLockFunc lockfunc )
2865     {
2866     (void)argc; (void)argv;
2867     cOpen( 0 );
2868     if ( lockfunc )
2869     lio_lock = lockfunc;
2870     return 0;
2871     }
2872    
2873    
2874     Db* cDOpen (const char *dbname, Rec *dbpar, Rec *syspar, Fdt *fdt) {
2875     int dbid;
2876     if ( ! init )
2877     cOpen( 0 );
2878     dbid = ldb_open (dbname, dbpar, syspar, fdt);
2879     if (0 <= dbid) {
2880     return &dbs[dbid].head;
2881     }
2882     return 0;
2883     }
2884    
2885     int cDOpenv ( const char *dbname, const char **argv, int argc )
2886     {
2887     Rec *dbpar = 0;
2888     int rt;
2889     if ( ! init )
2890     cOpen( 0 );
2891     if (argc) {
2892     dbpar = rSet (0, RARGV | RFDT | RNOC | RIGN | argc,
2893     openIsisFdtDbpar, argv);
2894     }
2895     rt = ldb_open (dbname, dbpar, 0, 0);
2896     if (dbpar) {
2897     mFree (dbpar);
2898     }
2899     return rt;
2900     }
2901    
2902    
2903     int cDClose ( int dbid )
2904     {
2905     LDb *db = getDb( dbid );
2906     if ( ! db )
2907     return -ERR_BADF;
2908     if ( LIO_LOCK() ) return -ERR_BUSY;
2909     if ( DB_MODIFIED == ((DB_MODIFIED|DB_TXTOPEN) & db->flags) ) {
2910     /* write back the MF control */
2911     Mfc mfc;
2912     /* if ( 498 < (db->mflen & 511) ) db->mflen = ~511 & (db->mflen + 14); */
2913     mfc.ctlm = rvi( db->mfc[LMFC_CTLM] );
2914     mfc.nmfn = rvi( db->mfc[LMFC_NMFN] );
2915     mfc.nmfb = rvi( 1 + (db->mflen >> 9) );
2916     mfc.nmfp = rvs( 511 & db->mflen );
2917     mfc.type = rvs( db->mfc[LMFC_TYPE] );
2918     mfc.rcnt = rvi( db->mfc[LMFC_RCNT] );
2919     mfc.mfx1 = rvi( db->mfc[LMFC_MFX1] );
2920     mfc.mfx2 = rvi( db->mfc[LMFC_MFX2] );
2921     mfc.mfx3 = rvi( db->mfc[LMFC_MFX3] );
2922     if ( sizeof(mfc) != lio_pwrite( &db->mst[MST_MST], &mfc, sizeof(mfc), 0) )
2923     log_msg( ERR_TRASH, "could not write MST header" );
2924     }
2925     if ( db->mmap ) {
2926     if ( (DB_MODIFIED|DB_TXTOPEN) == ((DB_MODIFIED|DB_TXTOPEN) & db->flags) )
2927     memcpy( db->mmap, ISIX, 4 ); /* force newer mtime on proper close */
2928     lio_mmap( 0, (void**)&db->mmap, db->mmlen*db->ptrl );
2929     }
2930     db->mmlen = 0;
2931     closefiles( db->mst, MST_FILES );
2932     if ( DB_INVOPEN & db->flags )
2933     closefiles( db->inv, INV_FILES );
2934     if ( DB_LBTOPEN & db->flags )
2935     lbt_close( &db->oxi );
2936     db->flags = 0L;
2937     if ( db->path ) mFree( (char*)db->path );
2938     if (db->head.cfg) mFree (db->head.cfg);
2939     if (db->head.fdt) fFree (db->head.fdt);
2940     memset( db, 0, sizeof(db) );
2941     (void)LIO_RELE();
2942     return 0;
2943     } /* cDClose */
2944    
2945    
2946     int cDCheck ( int dbid, int flags )
2947     {
2948     static char dot = '.';
2949     int *r;
2950     LDb *db = getDb( dbid );
2951     int nxtoff = 64, off;
2952    
2953     if ( ! db )
2954     return -ERR_BADF;
2955     (void)flags;
2956     do {
2957     lio_write( &lio_out, &dot, 1 );
2958     if ( (r = ldb_readRecAtOff(dbid,off=nxtoff,&nxtoff)) ) {
2959     int o = getOff( db, r[LMFR_MFN], 0 );
2960     if ( o != off ) {
2961     log_msg( LOG_WARN, "mfn %d xrf %d != real %d\n",
2962     r[LMFR_MFN], o, off );
2963     }
2964     mFree( r );
2965     }
2966     } while ( 0 < nxtoff );
2967     return 0;
2968     } /* cDCheck */
2969    
2970    
2971     OpenIsisIdx *cXOpen ( int dbid, int mode )
2972     {
2973     LDb *db = getDb( dbid );
2974     if ( !db
2975     || !(db->flags & DB_LBTOPEN)
2976     || !(db->oxi.flg & LBT_WRITE) /* may be writable if db is not */
2977     || db->oxi.bat
2978     /*
2979     preliminary undocumented feature:
2980     mode -1 gives direct access in non-batch mode
2981     */
2982     || (0 <= mode && lbt_batch( & db->oxi, (unsigned char)mode ))
2983     )
2984     return 0;
2985     return & db->oxi;
2986     } /* cXOpen */

  ViewVC Help
Powered by ViewVC 1.1.26