Revision 237 (by dpavlin, 2004/03/08 17:43:12) initial import of openisis 0.9.0 vendor drop
/*
	openisis - an open implementation of the CDS/ISIS database
	Version 0.8.x (patchlevel see file Version)
	Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org

	This library is free software; you can redistribute it and/or
	modify it under the terms of the GNU Lesser General Public
	License as published by the Free Software Foundation; either
	version 2.1 of the License, or (at your option) any later version.

	This library is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
	Lesser General Public License for more details.

	You should have received a copy of the GNU Lesser General Public
	License along with this library; if not, write to the Free Software
	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

	see README for more information
EOH */

/*
	$Id: lfmt.c,v 1.14 2003/04/08 00:20:52 kripke Exp $
	implementation of record cooking.
*/

#include <string.h> /* memset et al */

#include "ldb.h"


#define LFMT_DBG LOG_WARN /* level of debugging */

/* ************************************************************
	private types
*/
typedef struct {	/* function definition */
	const char *name; /* hmmm ... fixed char[] probably faster */
	const char *sign;
	int tag;
}	LfmtToken;

typedef enum {
	LFMT_MP,	/* proof */
	LFMT_MH,	/* heading */
	LFMT_MD  /* data */
}	lfmtMode;

typedef struct LfmtIter_ { /* value iterator */
	const unsigned char *def; /* position in format where iter was opened. */
	const unsigned char *end; /* position in format where iter was closed. */
	int  pos; /* next field pos */
	int  off; /* offset to start search for next subfield */
	int  occ; /* next occ */
	/* filter */
	int  tag;
	char sub;
	char submode; /* primary loop (counted by occ) is over subfields. */
	int  from; /* primary range. */
	int  to;
	int  sfrom; /* additional subfield range. */
	int  sto;
}	LfmtIter;

#define LFMT_NITER 8

typedef struct LfmtRec_ { /* record context -- pushed by the r in REF */
	struct LfmtRec_ *prev;
	Rec  *r;
	int frame; /* stack frame associated with this record */
	const unsigned char *loop; /* position in format where loop was opened. */
	LfmtIter iter[LFMT_NITER]; /* iterators in loop */
	int  occ; /* loop occ */
	unsigned char i; /* iterator count */
	char more; /* more flag */
	char had; /* had flag */
	char sub; /* database's subfield separator, typically ^ or $ */
}	LfmtRec;

typedef struct LfmtFmt_ { /* format context -- pushed by @ */
	struct LfmtFmt_ *prev;
	const unsigned char *f;
	const unsigned char *e;	/* end -- just for paranoia check */
	lfmtMode mode;
	int upcase;	/* uppercase flag */
}	LfmtFmt;


typedef struct { /* other type */
	const LfmtToken *tok; /* recognized token */
	int  data; /* start of params in output */
	int  used; /* used len of output up to our params */
	char pos; /* position in signature */
	char want; /* currently expected type */
	char expl; /* explicit flag */
	char rec; /* record flag */
}	LfmtFrame;


/*
	list of special token tags after the format tags 1 .. 999.
	unlike formatting tokens, which just emit a field with their negative tag,
	the literal's tag is reset to 0,
	and all other tokens emit fields with tags based on their parameters.

	tokens are organized in groups of (up to) 16.
	the group of an operator determines it's precedence.
*/
#define LFMT_DEFGROUP(i) ((i) << 4) /* * 16 */
#define LFMT_GROUP(i)    ((i) >> 4)

/* token groups */
enum {
	LFMT_LITERAL = 63, /* literals group starting from 63*16 = 1008 */
	/* syntactical groups starting from 64*16 = 0x400 */
	LFMT_FUNCTIONS,
	LFMT_VALUES = 96, /* starting from 96*16 = 0x600 */
	LFMT_ITERATORS,
	LFMT_OPITER, /* iterator operators */
	LFMT_OPSTR, /* string operators */
	LFMT_OPMUL, /* multiplicative operators */
	LFMT_OPADD, /* additive operators */
	LFMT_OPREL, /* relational operators */
	LFMT_OPBOOL, /* boolean operators */
	LFMT_OPASSIGN, /* assignment */
	LFMT_STOPPER = 128, /* starting from 128*16 = 0x800 */
	LFMT_SYNTAX
};

enum {
	/* literals */
	LFMT_LIT = LFMT_DEFGROUP( LFMT_LITERAL ),
	LFMT_COND,
	LFMT_REP,
	LFMT_COMM,
	LFMT_BARE, /* field of bareword C&A literal */
	LFMT_NUMBER, /* standalone numeric literal */
	/* functions */
	LFMT_OPEN = LFMT_DEFGROUP( LFMT_FUNCTIONS ),
	LFMT_AMP,
	LFMT_AT,
	LFMT_XREF,
	LFMT_S,
	LFMT_Z,
	LFMT_F,
	/* */
	LFMT_MDL,
	LFMT_MDU,
	LFMT_MHL,
	LFMT_MHU,
	LFMT_MPL,
	LFMT_MPU,
	/* values */
	LFMT_SI = LFMT_DEFGROUP( LFMT_VALUES ),
	LFMT_EI,
	/* iterators */
	LFMT_V = LFMT_DEFGROUP( LFMT_ITERATORS ),
	LFMT_D,
	LFMT_N,
	/* operator groups by decreasing precedence */
	LFMT_INDEX = LFMT_DEFGROUP( LFMT_OPITER ),
	LFMT_SUBFIELD,
	LFMT_STAR = LFMT_DEFGROUP( LFMT_OPSTR ),
	LFMT_DOT, LFMT_INDENT,
	LFMT_MUL = LFMT_DEFGROUP( LFMT_OPMUL ),
	LFMT_DIV,
	LFMT_PLUS = LFMT_DEFGROUP( LFMT_OPADD ),
	LFMT_MINUS,
	LFMT_EQ = LFMT_DEFGROUP( LFMT_OPREL ),
	LFMT_NE, LFMT_LT, LFMT_LE, LFMT_GT, LFMT_GE, LFMT_CT,
	LFMT_AND = LFMT_DEFGROUP( LFMT_OPBOOL ),
	LFMT_OR, LFMT_NOT,
	LFMT_ASSIGN = LFMT_DEFGROUP( LFMT_OPASSIGN ),
	/* stoppers */
	LFMT_COMMA = LFMT_DEFGROUP( LFMT_STOPPER ),
	LFMT_RANGE,
	LFMT_CLOSE,
	/* syntax */
	LFMT_BLANK = LFMT_DEFGROUP( LFMT_SYNTAX ),
	LFMT_LOOP,
	LFMT_WHILE,
	LFMT_REF,
	LFMT_IF, LFMT_THEN, LFMT_ELSE, LFMT_FI,
	LFMT_SELECT, LFMT_CASE, LFMT_ELSECASE, LFMT_ENDSEL,
	LFMT__LAST
};

/*
	while fields emitted by formatting and literal tokens
	have tags between -1 and -1023 (0xffffffff to 0xfffffc01)
	with all high bits set, numerics have lowest negative values
	ranging from 0x80000000 to 0xbfffffff (-2147483648 to -1073741825).
	That means the highest two bits are 1 and 0.
	All numeric fields have a val of NULL and use len to hold a number.
	Besides pure int or fixed-point numbers,
	there are other numerical types:
	next two bits may indicate a range or field selector.
	The lower 3 bytes may give fractional part or range end.
*/
#define LFMT_NMASK 0xf0000000L /* highest bits 1 and 0 mark a number */
#define LFMT_NUM 0x80000000L /* basic number */
#define LFMT_ITR 0xa0000000L /* with iterator bit */
#define LFMT_VAL 0x00ffffffL /* mask for 3 bytes additional info */
/* is any numeric */
#define LFMT_ISNUM(t) (LFMT_NUM == (LFMT_NMASK & (t)))
/* is pure int */
#define LFMT_ISINT(t) (LFMT_NUM == (t))
/* is iterator */
#define LFMT_ISITR(t) (LFMT_ITR == (LFMT_NMASK & (t)))

/* ************************************************************
	private data
*/
static const LfmtToken number = { "", "_1i", LFMT_NUMBER };
	/* while a numeric *field* has large LFMT_NUM tag,
		the anonymous number *token* has a literal tag,
		so we don't take it as high precedence operator.
	*/
static const LfmtToken blank  = { "_", "_0o_", LFMT_BLANK };
static const LfmtToken loop   = { "loop", "_0o_", LFMT_LOOP };

/*
 32 =    33 = !  34 = "  35 = #  36 = $  37 = %  38 = &  39 = '
 40 = (  41 = )  42 = *  43 = +  44 = ,  45 = -  46 = .  47 = /
 48 = 0  49 = 1  50 = 2  51 = 3  52 = 4  53 = 5  54 = 6  55 = 7
 56 = 8  57 = 9  58 = :  59 = ;  60 = <  61 = =  62 = >  63 = ?
 64 = @  65 = A  66 = B  67 = C  68 = D  69 = E  70 = F  71 = G
 72 = H  73 = I  74 = J  75 = K  76 = L  77 = M  78 = N  79 = O
 80 = P  81 = Q  82 = R  83 = S  84 = T  85 = U  86 = V  87 = W
 88 = X  89 = Y  90 = Z  91 = [  92 = \  93 = ]  94 = ^  95 = _
 96 = `  97 = a  98 = b  99 = c 100 = d 101 = e 102 = f 103 = g
104 = h 105 = i 106 = j 107 = k 108 = l 109 = m 110 = n 111 = o
112 = p 113 = q 114 = r 115 = s 116 = t 117 = u 118 = v 119 = w
120 = x 121 = y 122 = z 123 = { 124 = | 125 = } 126 = ~ 
*/
/* stops dictionary search */
static const char sentinel[] = "\x7f";
#define ENDDICT { sentinel, NULL, 0 }

static const LfmtToken dictSpecial[] = {
	{ "", "_0", 0 }, /* eof */
	{ "!", "_1x", OPENISIS_FMT_ESC },
	{ "\"", "_1x", LFMT_COND },
	{ "#", "_0", OPENISIS_FMT_SHARP },
	/* { "$", "_0", LFMT_LIT }, */
	{ "%", "_0", OPENISIS_FMT_PERCENT },
	{ "&", "_1as_", LFMT_AMP },
	{ "'", "_1x", LFMT_LIT },
	{ "(", "_0", LFMT_OPEN },
	{ ")", "_0", LFMT_CLOSE },
	{ "*", "n1n", LFMT_MUL },
	{ "+", "n1n", LFMT_PLUS },
	{ ",", "_0", LFMT_COMMA },
	{ "-", "n1n", LFMT_MINUS },
	{ ".", "s1i", LFMT_DOT },
	{ "..","_0", LFMT_RANGE },
	{ "/", "n1n", LFMT_DIV }, /* alias { "/", "_0", OPENISIS_FMT_SLASH }, */
	{ "/*","_1x", LFMT_COMM },
	/* 0 - 9 */
	{ ":", "s1s", LFMT_CT },
	{ ":=","v1f", LFMT_ASSIGN },
	/* { ";", "b_", LFMT_LIT }, */
	{ "<", "n1n", LFMT_LT },
	{ "<=","n1n", LFMT_LE },
	{ "<>","n1n", LFMT_NE },
	{ "=", "n1n", LFMT_EQ },
	{ ">", "n1n", LFMT_GT },
	{ ">=","n1n", LFMT_GE },
	/* { "?", "b_", LFMT_LIT }, */
	{ "@", "_1a", LFMT_AT },
	/* A - Z */
	{ "[", "v1nn", LFMT_INDEX },
	/* { "\\", "_0", LFMT_LIT }, */
	{ "]", "_0", LFMT_CLOSE },
	{ "^", "v1c", LFMT_SUBFIELD },
	/* { "_", "_0", LFMT_LIT }, */
	/* { "`", "_0", LFMT_LIT }, */
	/* a - z */
	{ "{", "_0", OPENISIS_FMT_OPEN },
	{ "|", "_1x", LFMT_REP },
	{ "}", "_0", OPENISIS_FMT_CLOSE },
	{ "~", "_1n", LFMT_NOT },
	ENDDICT };
static const LfmtToken dictA[] = { ENDDICT };
static const LfmtToken dictB[] = {
	{ "B", "_0", OPENISIS_FMT_B },
	ENDDICT };
static const LfmtToken dictC[] = {
	{ "C", "_1i", OPENISIS_FMT_C },
	ENDDICT };
static const LfmtToken dictD[] = {
	{ "D", "_1i", LFMT_D },
	ENDDICT };
static const LfmtToken dictE[] = {
	{ "E", "_1i", LFMT_EI },
	{ "ELSE", "_1s_", LFMT_ELSE },
	ENDDICT };
static const LfmtToken dictF[] = {
	/* { "F", "_1nii", LFMT_F }, */
	{ "F", "_1i", OPENISIS_FMT_F },
	ENDDICT };
static const LfmtToken dictG[] = { ENDDICT };
static const LfmtToken dictH[] = { ENDDICT };
static const LfmtToken dictI[] = {
	{ "I", "_0", OPENISIS_FMT_I },
	{ "IF", "_1n", LFMT_IF },
	ENDDICT };
static const LfmtToken dictJ[] = { ENDDICT };
static const LfmtToken dictK[] = { ENDDICT };
static const LfmtToken dictL[] = {
	{ "LINK", "_1s_", OPENISIS_FMT_LINK },
	ENDDICT };
static const LfmtToken dictM[] = {
	{ "M", "_1ii", OPENISIS_FMT_M },
	{ "MDL", "_0", LFMT_MDL },
	{ "MDU", "_0", LFMT_MDU },
	{ "MHL", "_0", LFMT_MHL },
	{ "MHU", "_0", LFMT_MHU },
	{ "MPL", "_0", LFMT_MPL },
	{ "MPU", "_0", LFMT_MPU },
	ENDDICT };
static const LfmtToken dictN[] = {
	{ "N", "_1i", LFMT_N },
	{ "NC", "_0i", OPENISIS_FMT_NC },
	ENDDICT };
static const LfmtToken dictO[] = { ENDDICT };
static const LfmtToken dictP[] = {
	{ "PICT", "_1s", OPENISIS_FMT_PICT },
	ENDDICT };
static const LfmtToken dictQ[] = {
	{ "QC", "_0", OPENISIS_FMT_QC },
	ENDDICT };
static const LfmtToken dictR[] = {
	{ "REF", "_2rs_", LFMT_REF },
	ENDDICT };
static const LfmtToken dictS[] = {
	{ "S", "_1s_", LFMT_S },
	ENDDICT };
static const LfmtToken dictT[] = {
	{ "TAB", "_0i", OPENISIS_FMT_TAB },
	ENDDICT };
static const LfmtToken dictU[] = {
	{ "UL", "_0", OPENISIS_FMT_UL },
	ENDDICT };
static const LfmtToken dictV[] = {
	{ "V", "_1i", LFMT_V },
	ENDDICT };
static const LfmtToken dictW[] = { ENDDICT };
static const LfmtToken dictX[] = {
	{ "X", "_0i", OPENISIS_FMT_X },
	ENDDICT };
static const LfmtToken dictY[] = { ENDDICT };
static const LfmtToken dictZ[] = {
	{ "Z", "_1is", LFMT_Z },
	ENDDICT };

static const LfmtToken * const dictAZ[26] = {
	dictA, dictB, dictC, dictD, dictE, dictF, dictG, dictH, dictI,
	dictJ, dictK, dictL, dictM, dictN, dictO, dictP, dictQ, dictR,
	dictS, dictT, dictU, dictV, dictW, dictX, dictY, dictZ
};


/* character conversion */
static const unsigned char upcase[256] = {
	  0,' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
	' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
	' ','!','"','#','$','%','&','\'', '(',')','*','+',',','-','.','/',
	'0','1','2','3','4','5','6','7', '8','9',':',';','<','=','>','?',
	'@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
	'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_',
	'`','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
	'P','Q','R','S','T','U','V','W','X','Y','Z','{','|','}','~',' ',
	'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
	'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
	'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
	'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
	'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
	'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
	'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
	'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?'
};

enum {
	DIG=0x01, /* digit */
	ALP=0x02, /* ascii alphabetic */
	IDE=0x04, /* other identifier character */
	LIT=0x10, /* as signature type, eat literal (a,c,i,x) */
	ALT=ALP|LIT
};
/* character type */
static const unsigned char ctype[256] = {
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	DIG,DIG,DIG,DIG,DIG,DIG,DIG,DIG,DIG,DIG,  0,  0,  0,  0,  0,  0,
/*'@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',*/
	  0,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,
/*'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_',*/
	ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,  0,  0,  0,  0,IDE,
	  0,ALT,ALP,ALT,ALP,ALP,ALP,ALP,ALP,ALT,ALP,ALP,ALP,ALP,ALP,ALP,
	ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALT,ALP,ALP,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
};


/* ************************************************************
	private functions
*/
/* don't rely on braindead ctype.h, it may use locale */
#define ISALPHA(u) ( ALP          & ctype[(unsigned char)(u)])
#define ISDIGIT(u) (     DIG      & ctype[(unsigned char)(u)])
#define ISALNUM(u) ((ALP|DIG)     & ctype[(unsigned char)(u)])
#define ISIDENT(u) ((ALP|DIG|IDE) & ctype[(unsigned char)(u)])
#define ISLITER(u) (     LIT      & ctype[(unsigned char)(u)])

/* ************************************************************
	package functions
*/
/* ************************************************************
	public functions
*/

Rec* rFmt ( Rec *buf, const char *fmt, Rec *irec )
{
	/* context */
	Rec *o = buf;
#define ADD( ntag, s, n ) do { \
		RADD( o, ntag, s, n, o!=buf ); \
		if ( NULL == o ) goto outofmem; \
	}	while(0)
#define CAT( s, n ) do { \
		RCAT( o, s, n, o!=buf ); \
		if ( NULL == o ) goto outofmem; \
	}	while(0)
/* field add */
#define FADD( f ) ADD( f.tag, f.val, f.len )
/* string add */
#define SADD( ntag, s ) ADD( ntag, s, strlen(s) )
#define SCAT( s ) CAT( s, strlen(s) )
#define SERR( err ) do { \
		o = rMsg( o, o!=buf, -LOG_ERROR, \
			"@%d: %s", fp - f->f, err ); \
		if ( NULL == o ) goto outofmem; \
	}	while(0)
#define SERR1( fmt, a ) do { \
		o = rMsg( o, o!=buf, -LOG_ERROR, \
			"@%d: " fmt, fp - f->f, a ); \
		if ( NULL == o ) goto outofmem; \
	}	while(0)
#define SERR2( fmt, a, b ) do { \
		o = rMsg( o, o!=buf, -LOG_ERROR, \
			"@%d: " fmt, fp - f->f, a, b ); \
		if ( NULL == o ) goto outofmem; \
	}	while(0)
/* number add */
#define NADD( i, d ) do { \
		ADD( LFMT_NUM | (d), NULL, 0 ); \
		o->field[o->len - 1].val = NULL; \
		o->field[o->len - 1].len = i; \
	}	while(0)

	LfmtFrame stack[128], *s = stack; /* current frame (always == stack+sp) */
	int       sp = -1;
/* push new frame for token t */
#define PUSHFRAME( t ) do { \
		if ( ++sp == sizeof(stack)/sizeof(stack[0]) ) goto overflow; \
		s = stack+sp; \
		memset( s, 0, sizeof(*s) ); \
		s->data = o->len; \
		s->used = o->used; \
		s->tok = (t); \
		s->want = (t)->sign[2]; \
	}	while(0)

	LfmtFmt     fmt0, *f = NULL; /* default and current format */
	const unsigned char *fp = NULL; /* format pointer */
#define PUSHFORMAT( pfmt, fstr ) do { \
		memset( pfmt, 0, sizeof(*(pfmt)) ); \
		(pfmt)->prev = f; \
		f = (pfmt); \
		fp = f->f = (const unsigned char *)fstr; \
		f->e = fp + strlen(fstr); \
	}	while(0)

	LfmtRec     rec0, *r = NULL; /* default and current record */
#define PUSHRECORD( prec, nrec ) do { \
		memset( prec, 0, sizeof(*(prec)) ); \
		(prec)->prev = r; \
		r = (prec); \
		r->frame = sp; \
		r->r = nrec; \
		r->sub = '^'; \
	}	while(0)

	int tlen = 0; /* token length */

	
	PUSHFRAME( &blank );
	s->rec = 1;
	PUSHFORMAT( &fmt0, fmt );
	PUSHRECORD( &rec0, irec );

	o->dbid  = irec->dbid ;
	o->rowid = irec->rowid;


	for ( ;; fp += tlen ) {
		const LfmtToken *tok = NULL;
		Field lit;
		unsigned char u;
		int start = s->data + s->pos; /* start of current param in output */
		int flds = o->len - start; /* # fields for current param */
		int vpos = 0; /* pos in dict where we had match of tlen chars */
		int dpos = 0; /* pos in dict */
		int expl = 0;
		unsigned char eatlit = 0;
		const LfmtToken *dict;
		enum {
			CLOSE_OP, /* don't close anything, operator wants field */
			CLOSE_VAR,	/* close variables only */
			CLOSE_FIELD,	/* coerce field only */
			CLOSE_PARAM,	/* compact param only (eat comma) */
			CLOSE_EXPL,	/* close frame explicitly (eat closing token) */
			CLOSE_IMPL	/* close frame implicitly, continue on token */
		} close;
		static const char *closename[] = {
			"", "var", "field", "param", "expl", "impl" };

		lit.tag = 0;
		tlen = 0; /* verified length */

		/*
		LOG_DBG( LFMT_DBG,
			"frame %s%c%s id %d pos %d got %d want %c%s",
			s->tok->name, s->expl ? '(' : ' ', s->tok->sign, s->tok->tag,
			s->pos, o->len - s->data, s->want, flds ? " (have)" : "" );
		*/
		/* lookup token */

		while ( ' ' == (u = upcase[*fp]) ) /* eat whitespace */
			fp++;

		if ( ISALPHA(u) ) {
			dict = dictAZ[u-'A'];
			if ( sentinel != dict->name )
				u = upcase[ fp[tlen = 1] ];
		} else if ( ISDIGIT( *fp ) ) {
			tok = &number; /* read numeric literal */
			goto countnumber;
		} else
			dict = dictSpecial;

		/* tok is the last candidate that matched up to IT'S length */
		for ( ;; dpos++ ) {
			unsigned char test;
			LOG_DBG( LOG_TRACE, "trying token '%s'", dict[dpos].name );
			if ( tlen && vpos != dpos
				&& strncmp( dict[dpos].name, dict[vpos].name, tlen ) )
				break;	/* ran out of verified length */
			if ( u == dict[dpos].name[tlen] )
				vpos = dpos;
			while ( (test = dict[dpos].name[tlen]) && u == test )
				/* test this entry -- same pos, next char */
				u = upcase[ fp[++tlen] ];
			if ( !test ) /* dict entry ends here -- a hit so far */
				tok = &dict[dpos];
				/*
				LOG_DBG( LFMT_DBG, "\thit on token %s(%s) len %d",
					tok->name, tok->sign, tlen );
				*/
			if ( u < test || !u ) /* too large - bail out*/
				break;
		}

		if ( NULL != tok ) { /* had some match */
			if ( tok < &dict[vpos] )	/* had later == longer match */
				tok = NULL;
			else if ( dict != dictSpecial ) {
				/* alpha-token must not be followed by alpha */
				assert( tlen == (int)strlen(tok->name) );
				if ( ISALPHA( u ) && ISALPHA( tok->name[tlen-1] ) )
					tok = NULL;
			}
		}

		if ( NULL == tok /* no hit in alphadict */
			|| (dictSpecial == tok && *fp) /* false eof hit in special dict */
		) {
			SERR2( "unrecognized token '%.*s'", tlen+1, fp );
			goto broken;
		}

		if ( ISLITER( tok->sign[2] ) ) /* eat literals */
		switch ( tok->sign[2] ) { /* c, a, i, x */
		case 'c':
			if ( (lit.len = (eatlit = fp[tlen]) ? 1 : 0 ) ) {
				lit.tag = -LFMT_BARE;
				lit.val = (char *)fp + tlen;
				tlen++;
			}
			break;
		case 'a':
			if ( (eatlit = ISIDENT( fp[tlen] )) ) {
				lit.tag = -LFMT_BARE;
				lit.val = (char *)fp + tlen;
				lit.len = 0;
				while ( ISIDENT( fp[tlen] ) ) {
					lit.len++;
					tlen++;
				}
			}
			break;
		case 'i':
		countnumber:
			lit.val = NULL;
			lit.len = 0;
			if ( (eatlit = ISDIGIT( u = fp[tlen] )) ) {
				lit.tag = LFMT_NUM;
				while ( u && 10 > (u -= '0') ) {
					lit.len = 10*lit.len + u;
					u = fp[++tlen];
				}
			}
			/* decimal digits on standalone numeric literal */
			if ( &number == tok
				&& (unsigned char)('.'-'0') == u
				&& '.' != fp[tlen+1] /* avoid eating 1..2 */
			) {
				int dec = 0;
				int v = 0;
				while ( (u = fp[++tlen]) && 10 > (u -= '0') )
					if ( 6 > dec ) {
						v = 10*v + u;
						dec++;
					}
				while ( 6 < dec++ ) v *= 10;
				lit.tag = LFMT_NUM | (v & LFMT_VAL);
			}
			break;
		case 'x':
			switch ( tok->tag ) {
			case LFMT_COMM:
				eatlit='*';
				break;
			case OPENISIS_FMT_ESC:
				eatlit=fp[tlen++];
				break;
			default:
				eatlit=tok->name[0];
				break;
			}
			lit.tag = - tok->tag;
			lit.val = (char *)fp+1;
			while ( (u = fp[tlen++])
				&& (eatlit != u || (LFMT_COMM==tok->tag && '/' != fp[tlen]))
			)	;
			if ( !u ) {
				SERR1( "unterminated %s-literal", tok->name );
				goto broken;
			}
			lit.len = tlen - 2;
			if ( LFMT_COMM == tok->tag ) {
				tlen++;
				continue;
			}
		}

		if ( tok->sign[2] ) { /* token wants params */
			if ( (!eatlit || tok->sign[3]) && (expl = '(' == fp[tlen]) )
				tlen++; /* eat ( */
		}

		/* got token */
		LOG_DBG( LFMT_DBG,
			"at %d: '%.*s' %s(%s) id %d (%x) group %d pos %d%c of %s%c%s",
			fp - f->f, tlen > 10 ? 10 : tlen, fp,
			tok->name, tok->sign, tok->tag, tok->tag, LFMT_GROUP(tok->tag),
			s->pos, s->want, s->tok->name, s->expl?'(':' ', s->tok->sign
		);

		/* close what needs to be closed */
		do {
			int group = LFMT_GROUP( tok->tag );
			int dflt, wantnum, gotnum;

			close = CLOSE_FIELD;

			if ( '_' != tok->sign[0] /* token is operator ... */
				&& (s->expl /* ... within explicit frame or ... */
				|| group < LFMT_GROUP( s->tok->tag )) /* ... of higher prec */
			) /* we take it -- even as a variable ? */
				close = 'v' == tok->sign[0] ? CLOSE_OP : CLOSE_VAR;
			else switch ( tok->tag ) {
			case LFMT_COMMA:
			case LFMT_RANGE:
				close = CLOSE_PARAM;
				break;
			case LFMT_CLOSE:
				close = CLOSE_EXPL;
				break;
			}

			if ( CLOSE_VAR > close ) /* leave variable to operator */
				break; /* close loop */

			if ( flds && LFMT_ISITR( o->field[ o->len-1 ].tag ) ) {
				/* dereference iterator variable */
				LfmtIter * iter = r->iter + r->i;
				if ( ! iter->occ ) {
					if ( ! iter->from )
						iter->from = 1; /* field defaults to 1..0 (all) */
					if ( ! iter->sfrom )
						iter->sfrom = iter->sto = 1; /* subfield defaults to 1..1 (1st) */
				}

				LOG_DBG( LFMT_DBG, "iterator %d/%d V%d[%d..%d]^%c[%d..%d]%s",
					o->field[ o->len-1 ].len, r->occ,
					iter->tag, iter->from, iter->to,
					iter->sub ? iter->sub : ' ', iter->sfrom, iter->sto,
					iter->submode ? " subfield mode" : "" 
				);

				o->len--; /* kill the variable. */
				/*	move to next legal position.
					In standard mode, where we advance one field occurence at a time,
					this is just the occ >= from.
					A legal position may still emit no field,
					if a selected subfield is not available in the current field occ.
					In subfield mode, we may have to advance 0, 1 or several field
					occurences to find next occ of subfield.
				*/
				r->had = 0;
				do {
					int socc;
					if ( 0 > iter->occ ) /* we were already done */
						break;
					/* if have legal occurence, ADD it */
					if ( iter->occ >= iter->from ) for ( socc=1;; socc++ ) {
						/* not initialization pass. */
						Field *v = r->r->field + iter->pos;
						const char *src = v->val;
						int len = v->len;
						if ( iter->sub ) { /* find subfield */
							const char *p = src + iter->off, *e = src+len;
							while ( p < e && (
								r->sub != *p++
								|| e == p
								|| (iter->sub != *p++ && iter->sub != '*')
							) )
								;
							iter->off = p - src;
							if ( p >= e )
								break;
							if ( iter->sfrom && socc < iter->sfrom )
								continue;
							if ( iter->sto && socc > iter->sto )
								break;
							src = p;
							while ( p < e && r->sub != *p )
								p++;
							iter->off += len = p - src;
						}
						/* make sure there's enough room, +3 for DATA mode */
						ADD( v->tag, NULL, len+3 );
						if ( ! f->mode )
							CAT( src, len );
						else {
							const char *e = src + len;
							char *dst = (char *)o->field[ o->len - 1 ].val;
							for ( ; src < e; ) {
								if ( '<' == *src ) {
									for ( src++; src < e; *dst++ = *src++ )
										if ( '=' == *src || '>' == *src ) {
											while ( '>' != *src++ && src < e )
												;
											if ( src < e && '<' == *src ) { /* have >< */
												*dst++ = ';';
												*dst++ = ' ';
											}
											break;
										}
								} else if ( r->sub == *src ) {
									if ( ++src == e )
										break;
									if ( dst == o->field[ o->len - 1 ].val )
										; /* no spearator */
									else if ( 'a' == *src )
										*dst++ = ';';
									else if ( 'j' > *src )
										*dst++ = ',';
									else
										*dst++ = '.';
									src++;
								} else
									*dst++ = *src++;
							}
							o->used +=
								o->field[ o->len - 1 ].len =
								dst - o->field[ o->len - 1 ].val;
							if ( LFMT_MD == f->mode )
								CAT( ".  ", 3 );
						}	/* f->mode */
						if ( f->upcase ) {
							char *p = (char *)o->field[ o->len - 1 ].val;
							char *e = p + o->field[ o->len - 1 ].len;
							for ( ; p < e; p++ )
								*p = upcase[(unsigned char)*p];
						}
						r->had = 1;
						if ( iter->submode || ! iter->sub )
							break;
					}
					if ( iter->submode ) {
						if ( iter->occ && iter->occ < iter->from ) /* was skipped */
							iter->off++;
						for ( ; iter->pos < r->r->len; iter->pos++, iter->off = 0 )
							if ( iter->tag == r->r->field[ iter->pos ].tag || ! iter->tag ) {
								Field *v = r->r->field + iter->pos;
								const char *p = v->val + iter->off, *e = v->val + v->len;
								while ( p < e && (
									r->sub != *p++
									|| e == p
									|| (iter->sub != *p++ && iter->sub != '*')
								) )
									;
								if ( p < e ) { /* hit */
									iter->off = (p - v->val) - 2;
									break;
								}
							}
					} else {
						if ( iter->occ )
							iter->pos++;
						while ( iter->pos < r->r->len
							&& iter->tag
							&& iter->tag != r->r->field[ iter->pos ].tag
						)
							iter->pos++;
						iter->off = 0;
					}
					iter->occ++;
					if ( iter->pos >= r->r->len /* end of record */
						|| (iter->to && iter->to < iter->occ) /* end of selected occ */
					) {
						iter->occ = -1;
						break;
					}
				} while ( iter->occ <= iter->from );

				if ( 0 < iter->occ ) /* we have a next occurence */
					r->more = 1;
				if ( ! r->had && flds && -LFMT_REP == o->field[ o->len - 1 ].tag ) {
					flds--;
					o->len--;
					o->used -= o->field[o->len].len;
				}
				r->i++; /* advance to next iterator */
			} /* dereference iterator variable */

			if ( CLOSE_FIELD > close ) /* leave field to operator */
				break; /* close loop */

			if ( ! flds ) { /* had no field */
				if ( CLOSE_PARAM > close ) /* nothing to do */
					break; /* close loop */
			} else if ( ! s->expl /* frame is implicit ... */
				&& ( LFMT_LOOP != s->tok->tag /* ... and not a loop ... */
					|| CLOSE_EXPL == close /* ... or we saw a hard closer ... */
					|| (r->i && CLOSE_PARAM == close)
					/* ... or a loop after 1st iterator on param closer */
				)
			) /** upgrade the closing mode to IMPL */
				close = CLOSE_IMPL;

			/* close the field, i.e. coerce it */
			dflt = !flds /* param was not given ... */
				&& (s->pos /* but was started explicitly by a comma like F( 3, ) */
				|| CLOSE_PARAM == close /* or is closed explicitly like F( ,3 ) */
			);
			wantnum = 'i' == s->want || 'n' == s->want;
			gotnum = flds && LFMT_ISNUM( o->field[ o->len-1 ].tag );

			LOG_DBG( LFMT_DBG,
				"\tclose %s %d%c pos %d%c of %s%c%s id %x",
				closename[close],
				flds, flds ? gotnum ? 'n' : 's' : dflt ? 'd' : '-', s->pos, s->want,
				s->tok->name, s->expl?'(':' ', s->tok->sign, s->tok->tag );

			/* close and fix last field */
			if ( gotnum && !wantnum ) {
				/* coerce number to string, so it may get concatenated */
				char b[32];
				int ll = lprint( b, o->field[ o->len-1 ].len );
				b[ll++] = ' ';
				o->len--;
				ADD( 0, b, ll );
				gotnum = 0;
			}

			if ( CLOSE_PARAM > close )
				break; /* close loop */

			/* close and fix param */
			if ( wantnum ) {
				if ( dflt ) { /* default 0 */
					NADD( 0, 0 );
					flds = 1;
				} else if ( 1 < flds ) {
					SERR( "multiple fields for numerical param" );
					goto broken;
				} else if ( flds && !gotnum ) {
					SERR( "expected number" );
					goto broken;
				}
				if ( 'i' == s->want ) /* kill decimal */
					o->field[s->data].tag &= ~LFMT_VAL;
			} else {
				if ( dflt ) {
					SADD( 0, "" );
					flds = 1;
				} else if ( gotnum ) {
					SERR( "expected string" ); /* shouldn't happen !? */
					goto broken;
				} else if ( 1 < flds	/* concat strings ... */
					&& ! s->rec /* ... unless in record mode frame (blank,loop,ref) */
				) {
					int i;
					for ( i = start+1; i < o->len; i++ )
						if ( o->field[i].val )
							o->field[ start ].len += o->field[ i ].len;
					o->len = start + 1;
				}
			}

			/* done fixing param */

			if ( CLOSE_IMPL > close ) {
				tok = NULL; /* eat token */
				if ( CLOSE_EXPL > close ) { /* i.e. CLOSE_PARAM */
					if ( flds ) { /* increment param pos, set next wanted type */
						int sl = strlen( s->tok->sign ); /* frame takes sl-2 params */
						if ( ++(s->pos) < sl-2 ) {
							if ( '_' != s->tok->sign[ 2 + s->pos ] ) /* repeated param */
								s->want = s->tok->sign[ 2 + s->pos ]; /* else keep last val */
						} else if ( '_' != s->tok->sign[sl-1] ) { /* too much */
							SERR2( "expected at most %d params for '%s'",
								sl-2, s->tok->name );
							goto broken;
						}
					}
					break; /* close loop */
				}
			}

			/* done with token -- close and fix frame */

#define	KILLARGS() do { \
		o->len = s->data; \
		o->used = s->used; \
	} while(0)

#define	KILLOP() do { \
		o->len = s->data - 1; \
		o->used = s->used - (o->field[o->len].val ? o->field[o->len].len : 0); \
	} while(0)

			switch ( s->tok->tag ) {
			case LFMT_PLUS: /* arithmetic */
			case LFMT_MINUS:
			case LFMT_MUL:
			case LFMT_DIV: {
				lll a = o->field[s->data-1].len;
				lll b = o->field[s->data].len;
				/* LOG_DBG( LFMT_DBG, "\tarith on %Ld %Ld", a, b ); */
				switch ( s->tok->tag ) {
				case LFMT_PLUS:	a += b; break;
				case LFMT_MINUS:	a -= b; break;
				case LFMT_MUL:	a *= b; break;
				case LFMT_DIV: if ( 0 != b ) a /= b; break;
				}
				KILLOP();
				NADD( (int)a, 0 );
			} break;
			case LFMT_F: {
				char b[32];
				int ll = lprint( b, o->field[ s->data ].len );
				KILLARGS();
				ADD( 0, b, ll );
			} break;
			case LFMT_V:
				r->iter[ r->i ].tag = o->field[ s->data ].len;
				/* set iterator field */
				o->field[ s->data ].tag = LFMT_ITR | r->iter[ r->i ].tag;
				o->field[ s->data ].len = r->i;
				break;
			case LFMT_INDEX: {
				LfmtIter * iter = r->iter + r->i;
				if ( 1 != r->occ ) {
					SERR( "OOPS! index not within loop" );
					goto internalerr;
				}
				if ( ! iter->sub || (iter->submode = !iter->from) ) {
					/* primary loop */
					if ( !(iter->from = (int) o->field[ s->data ].len) )
						iter->from = 1;
					iter->to = s->pos ? (int) o->field[ s->data+1 ].len : iter->from;
				} else {
					/* additional subfield loop */
					if ( !(iter->sfrom = (int) o->field[ s->data ].len) )
						iter->sfrom = 1;
					iter->sto = s->pos ? (int) o->field[ s->data+1 ].len : iter->sfrom;
				}
				iter->end = fp + tlen;
				KILLARGS();
			} break; /* case LFMT_INDEX: */
			case LFMT_SUBFIELD:
				if ( 1 != r->occ ) {
					SERR( "OOPS! subfield not within loop" );
					goto internalerr;
				}
				r->iter[ r->i ].sub = o->field[ s->data ].val[0];
				r->iter[ r->i ].end = fp + tlen;
				KILLARGS();
				break;
			case LFMT_LOOP:
				if ( r->more ) {
					fp = r->loop; /* start over */
					tlen = 0; /* care for += tlen */
					tok = NULL;
					r->occ++;
					r->more = 0;
					r->i = 0;
					goto doneclosing;
				}
				r->loop = 0;
				break;
			}

			/* close frame, unless it's the outmost */
			if ( ! sp )
				break;

			/* TODO: close loop ? */
			s = &stack[ --sp ];

			start = s->data + s->pos; /* start of current param in output */
			flds = o->len - start; /* # fields for current param */

		}	while ( CLOSE_IMPL == close );
		doneclosing:

		if ( NULL == tok )
			continue;
		if ( ! tok->tag )
			goto done;

		/* add new token */
		if ( NULL == r->loop ) /* open loop ? */
			switch ( LFMT_GROUP(tok->tag) ) {
			case LFMT_FUNCTIONS:
				if ( LFMT_OPEN != tok->tag )
					break;
				goto openloop; /* else should do, but gcc doesn't grok it right */
			case LFMT_LITERAL:
				if ( LFMT_COND != tok->tag && LFMT_REP != tok->tag )
					break;
			case LFMT_ITERATORS:
				openloop:
				LOG_DBG( LFMT_DBG, "\topening loop on %s", tok->name );
				PUSHFRAME( &loop );
				s->rec = stack[sp-1].rec; /* inherit record mode */
				r->loop = fp;
				r->occ = 1;
				r->more = 0;
				r->had = 0;
				r->i = 0;
				if ( (s->expl = (LFMT_OPEN == tok->tag)) ) {
					r->loop += tlen;
					continue;
				}
			}

		if ( tok->sign[2] && LFMT_LITERAL != LFMT_GROUP(tok->tag) ) {
			/* token opens standard frame */
			if ( LFMT_ITERATORS == LFMT_GROUP(tok->tag) ) {
				if ( 1 != r->occ ) { /* not first run */
					if ( r->iter[r->i].def != fp ) {
						SERR1( "internal error at %d. iterator", r->i + 1 );
						goto internalerr;
					}
					NADD( r->i, LFMT_ITR | r->iter[r->i].tag ); /* push iterator */
					fp = r->iter[r->i].end; /* skip parsing */
					tlen = 0; /* care for += tlen */
					continue; /* next token */
				}
				if ( LFMT_NITER-1 == r->i ) {
					SERR1( "maximum number of iterators %d exceeded", LFMT_NITER );
					goto overflow;
				}
				memset( r->iter + r->i, 0, sizeof(r->iter[0]) );
				r->iter[r->i].def = fp;
				r->iter[r->i].end = fp + tlen;
			}
			PUSHFRAME( tok );
			switch ( tok->tag ) { /* special treatment */
			case LFMT_INDEX:
				s->expl = 1;
				break;
			default:
				if ( expl )
					s->expl = 1;
			}
			LOG_DBG( LFMT_DBG, "\topen %s%c%s id %x",
				s->tok->name, s->expl?'(':' ', s->tok->sign, s->tok->tag );
		}

		if ( lit.tag ) { /* a literal */
			if ( NULL != lit.val ) { /* string literal */
				switch ( lit.tag ) {
				case -LFMT_COND:
					if ( r->i ? !r->more : r->occ > 1 )
						continue;
					break;
				case -LFMT_REP:
					if ( r->i && !r->had )
						continue;
					break;
				}
				ADD( lit.tag, lit.val, lit.len );
				LOG_DBG( LFMT_DBG, "\tlit %s%.*s id %x",
					tok->name, lit.len, lit.val, tok->tag );
			} else { /* numeric literal */
				NADD( lit.len, lit.tag );
				LOG_DBG( LFMT_DBG, "\tlit %d", lit.len );
			}
			/* new implicit frame immediatly closed by literal ?
				problem are implicit loops
			if ( o->len == s->data + 1 && ! s->expl ) {
				tok = NULL;
				goto closeframe;
			}
			*/
		} else switch ( tok->tag ) { /* other special tokens */
		case LFMT_MDL: f->mode = LFMT_MD; f->upcase = 0; break;
		case LFMT_MDU: f->mode = LFMT_MD; f->upcase = 1; break;
		case LFMT_MHL: f->mode = LFMT_MH; f->upcase = 0; break;
		case LFMT_MHU: f->mode = LFMT_MH; f->upcase = 1; break;
		case LFMT_MPL: f->mode = LFMT_MP; f->upcase = 0; break;
		case LFMT_MPU: f->mode = LFMT_MP; f->upcase = 1; break;
		}

	}	/* for token */

broken:
internalerr:
overflow:
outofmem:
done:
	if ( NULL != o ) {
		int i = o->len;
		while ( i-- )
			if ( NULL == o->field[i].val ) {
				;
			} else if ( -LFMT_LIT >= o->field[i].tag )
				o->field[i].tag = 0;
	}
	return o;
}	/* rFmt */