/*
openisis - an open implementation of the CDS/ISIS database
Version 0.8.x (patchlevel see file Version)
Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
see README for more information
EOH */
#ifndef LDB_H
/*
$Id: ldb.h,v 1.19 2003/04/08 00:20:52 kripke Exp $
package interface of the general db access functions.
*/
#include "luti.h"
#include "lll.h"
/* #define LDB_MAGIC 0x0FE91515 read OPENISIS */
enum { /* MFR members */
LMFR_MFN = 1, /* the rowid */
LMFR_RECL, /* total external length (even), negative for locked record */
LMFR_BWB, /* block of rec's previous version */
LMFR_BWP, /* pos of " */
LMFR_BASE, /* offset of contents area */
LMFR_NVF, /* number of fields */
LMFR_STAT, /* state, if != 0, about to be deleted */
LMFR__FL, /* offset of repeated part */
LMFR_TAG = 0,
LMFR_POS,
LMFR_LEN,
LMFR__RL /* length of repeated part */
};
/**
read a raw isis record by rowid.
the memory must be freed after usage.
*/
extern int *ldb_readRec ( int db, int rowid );
/**
numerical type for a records file position.
To support large DBs, this may be lll.
*/
typedef int lxref;
/**
read a raw isis record by xref.
the memory must be freed after usage.
*/
extern int *ldb_readRecAtOff ( int dbid, lxref off, int *nxtoff );
/*
** ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
** detailed search structures and functions
**
** ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*/
enum {
LDB_MAX_KEYLEN = 31, /* actually, it's 30 for isis-1 */
LDB_TERMBUF = 128, /* ints */
LDB_INDEXES = 2
};
/**
the isis-1 posting is an 64bit number.
In the file it's in big endian order, to allow ordering using memcmp.
In the LdbP we have native order to use native 64bit.
Until we need more bits for postings, we may use the 64bit type
provided by gcc and some other compilers.
the 8 bytes are: highest 3 for the rowid, then 2 tag, then 3 pos.
the 3 pos bytes in turn are 1 byte for the occurence
and 2 bytes for the word count.
(e.g. value 2<<16 | 7 if term was found in the 7th word of
the 2nd occurence of field 'tag').
we usually don't care, since for a near condition we always
want a small distance w/in the same occurence which may be
conveniently checked by the difference of 3 byte pos values.
BUT NOTE: ... unfortunately we DO NEED one more bit for marking
postings during AND operations. we abuse the highest bit of the
word counter, since it's for sure useless for any reasonable
distance check.
*** BE CAREFUL WHEN COMPARING POSTINGS DURING "AND" OP ***
use the LDBP_xxM macros below.
*/
typedef union {
char bytes[8];
lll val;
} LdbP;
/* all macros operate on pointers */
/* access parts */
#define LDBP_ROW( p ) (0xffffffL & (int)((p)->val >> 40))
#define LDBP_TAG( p ) ( 0xffffL & (int)((p)->val >> 24))
#define LDBP_POS( p ) (0xff7fffL & (int)((p)->val))
#define LDBP_MARK( p ) ( 0x8000 & (int)((p)->val))
#define LDBP_SETMARK( p ) ((p)->val |= LLL( 0x8000))
#define LDBP_CLRMARK( p ) ((p)->val &= LLL(0xffffffffffff7fff))
/* value w/o the mark bit */
#define LDBP_IGNMARK( p ) ((p)->val & LLL(0xffffffffffff7fff))
/* compare according to rowid, tag, pos */
#define LDBP_EQ( a, b ) ((a)->val == (b)->val)
#define LDBP_GT( a, b ) ((a)->val > (b)->val)
/* compare IGNORING MARK according to rowid, tag, pos */
#define LDBP_EQM( a, b ) (LDBP_IGNMARK(a) == LDBP_IGNMARK(b))
#define LDBP_GTM( a, b ) (LDBP_IGNMARK(a) > LDBP_IGNMARK(b))
/* manipulation: set bottom / top for row of given posting */
#define LDBP_SETROWBOT( d, s ) ((d)->val = (s)->val & LLL(0xffffff0000000000))
#define LDBP_SETROWTOP( d, s ) ((d)->val = (s)->val | LLL( 0xffffffffff))
/**
postings come in arrays with some header.
The standard structure gives a 8k buffer,
but member len may give other actual length.
*/
typedef struct {
short mode; /* in: merge flags */
short near; /* in: near distance; in OR mode: collect pos info */
int tag; /* in: tag, to which postings are restricted */
int skp; /* in: ignore mfns < skp */
int len; /* in: length (# of postings) of buffer (if 0 : default length) */
int fil; /* io: number of postings actually used */
int cut; /* io: min mfn ignored due to buffer length */
LdbP p[OPENISIS_SETLEN];
/**
8 bytes as in IFP file: mfn[3],tag[2],occ[1],cnt[2]
highest bit of cnt (1LL<<15) is used as mark
*/
} LdbPost;
enum {
LDB_OR,
LDB_AND,
LDB_NOT, /* like AND, but keep unmarked postings */
LDB_PFX = 4, /* prefix match */
LDB_KEEPMARKS = 8 /* do not compact after AND/NOT */
};
enum {
LDB_NEAR_F = 0x7fff, /* the (F): same occurence of field */
LDB_NEAR_G = -0x8000 /* the (G): same field */
};
/**
*/
extern int ldb_search ( int db, const char *key, LdbPost *post,
OpenIsisRec *rec );
extern int ldb_p2s ( OpenIsisSet *set, LdbPost *post );
extern Db* ldb_getdb (int dbid);
#define LDB_H
#endif /* LDB_H */