1 |
dpavlin |
237 |
/* |
2 |
|
|
openisis - an open implementation of the CDS/ISIS database |
3 |
|
|
Version 0.8.x (patchlevel see file Version) |
4 |
|
|
Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org |
5 |
|
|
|
6 |
|
|
This library is free software; you can redistribute it and/or |
7 |
|
|
modify it under the terms of the GNU Lesser General Public |
8 |
|
|
License as published by the Free Software Foundation; either |
9 |
|
|
version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
|
11 |
|
|
This library is distributed in the hope that it will be useful, |
12 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
|
|
Lesser General Public License for more details. |
15 |
|
|
|
16 |
|
|
You should have received a copy of the GNU Lesser General Public |
17 |
|
|
License along with this library; if not, write to the Free Software |
18 |
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
|
|
|
20 |
|
|
see README for more information |
21 |
|
|
EOH */ |
22 |
|
|
|
23 |
|
|
/* |
24 |
|
|
$Id: lfmt.c,v 1.14 2003/04/08 00:20:52 kripke Exp $ |
25 |
|
|
implementation of record cooking. |
26 |
|
|
*/ |
27 |
|
|
|
28 |
|
|
#include <string.h> /* memset et al */ |
29 |
|
|
|
30 |
|
|
#include "ldb.h" |
31 |
|
|
|
32 |
|
|
|
33 |
|
|
#define LFMT_DBG LOG_WARN /* level of debugging */ |
34 |
|
|
|
35 |
|
|
/* ************************************************************ |
36 |
|
|
private types |
37 |
|
|
*/ |
38 |
|
|
typedef struct { /* function definition */ |
39 |
|
|
const char *name; /* hmmm ... fixed char[] probably faster */ |
40 |
|
|
const char *sign; |
41 |
|
|
int tag; |
42 |
|
|
} LfmtToken; |
43 |
|
|
|
44 |
|
|
typedef enum { |
45 |
|
|
LFMT_MP, /* proof */ |
46 |
|
|
LFMT_MH, /* heading */ |
47 |
|
|
LFMT_MD /* data */ |
48 |
|
|
} lfmtMode; |
49 |
|
|
|
50 |
|
|
typedef struct LfmtIter_ { /* value iterator */ |
51 |
|
|
const unsigned char *def; /* position in format where iter was opened. */ |
52 |
|
|
const unsigned char *end; /* position in format where iter was closed. */ |
53 |
|
|
int pos; /* next field pos */ |
54 |
|
|
int off; /* offset to start search for next subfield */ |
55 |
|
|
int occ; /* next occ */ |
56 |
|
|
/* filter */ |
57 |
|
|
int tag; |
58 |
|
|
char sub; |
59 |
|
|
char submode; /* primary loop (counted by occ) is over subfields. */ |
60 |
|
|
int from; /* primary range. */ |
61 |
|
|
int to; |
62 |
|
|
int sfrom; /* additional subfield range. */ |
63 |
|
|
int sto; |
64 |
|
|
} LfmtIter; |
65 |
|
|
|
66 |
|
|
#define LFMT_NITER 8 |
67 |
|
|
|
68 |
|
|
typedef struct LfmtRec_ { /* record context -- pushed by the r in REF */ |
69 |
|
|
struct LfmtRec_ *prev; |
70 |
|
|
Rec *r; |
71 |
|
|
int frame; /* stack frame associated with this record */ |
72 |
|
|
const unsigned char *loop; /* position in format where loop was opened. */ |
73 |
|
|
LfmtIter iter[LFMT_NITER]; /* iterators in loop */ |
74 |
|
|
int occ; /* loop occ */ |
75 |
|
|
unsigned char i; /* iterator count */ |
76 |
|
|
char more; /* more flag */ |
77 |
|
|
char had; /* had flag */ |
78 |
|
|
char sub; /* database's subfield separator, typically ^ or $ */ |
79 |
|
|
} LfmtRec; |
80 |
|
|
|
81 |
|
|
typedef struct LfmtFmt_ { /* format context -- pushed by @ */ |
82 |
|
|
struct LfmtFmt_ *prev; |
83 |
|
|
const unsigned char *f; |
84 |
|
|
const unsigned char *e; /* end -- just for paranoia check */ |
85 |
|
|
lfmtMode mode; |
86 |
|
|
int upcase; /* uppercase flag */ |
87 |
|
|
} LfmtFmt; |
88 |
|
|
|
89 |
|
|
|
90 |
|
|
typedef struct { /* other type */ |
91 |
|
|
const LfmtToken *tok; /* recognized token */ |
92 |
|
|
int data; /* start of params in output */ |
93 |
|
|
int used; /* used len of output up to our params */ |
94 |
|
|
char pos; /* position in signature */ |
95 |
|
|
char want; /* currently expected type */ |
96 |
|
|
char expl; /* explicit flag */ |
97 |
|
|
char rec; /* record flag */ |
98 |
|
|
} LfmtFrame; |
99 |
|
|
|
100 |
|
|
|
101 |
|
|
/* |
102 |
|
|
list of special token tags after the format tags 1 .. 999. |
103 |
|
|
unlike formatting tokens, which just emit a field with their negative tag, |
104 |
|
|
the literal's tag is reset to 0, |
105 |
|
|
and all other tokens emit fields with tags based on their parameters. |
106 |
|
|
|
107 |
|
|
tokens are organized in groups of (up to) 16. |
108 |
|
|
the group of an operator determines it's precedence. |
109 |
|
|
*/ |
110 |
|
|
#define LFMT_DEFGROUP(i) ((i) << 4) /* * 16 */ |
111 |
|
|
#define LFMT_GROUP(i) ((i) >> 4) |
112 |
|
|
|
113 |
|
|
/* token groups */ |
114 |
|
|
enum { |
115 |
|
|
LFMT_LITERAL = 63, /* literals group starting from 63*16 = 1008 */ |
116 |
|
|
/* syntactical groups starting from 64*16 = 0x400 */ |
117 |
|
|
LFMT_FUNCTIONS, |
118 |
|
|
LFMT_VALUES = 96, /* starting from 96*16 = 0x600 */ |
119 |
|
|
LFMT_ITERATORS, |
120 |
|
|
LFMT_OPITER, /* iterator operators */ |
121 |
|
|
LFMT_OPSTR, /* string operators */ |
122 |
|
|
LFMT_OPMUL, /* multiplicative operators */ |
123 |
|
|
LFMT_OPADD, /* additive operators */ |
124 |
|
|
LFMT_OPREL, /* relational operators */ |
125 |
|
|
LFMT_OPBOOL, /* boolean operators */ |
126 |
|
|
LFMT_OPASSIGN, /* assignment */ |
127 |
|
|
LFMT_STOPPER = 128, /* starting from 128*16 = 0x800 */ |
128 |
|
|
LFMT_SYNTAX |
129 |
|
|
}; |
130 |
|
|
|
131 |
|
|
enum { |
132 |
|
|
/* literals */ |
133 |
|
|
LFMT_LIT = LFMT_DEFGROUP( LFMT_LITERAL ), |
134 |
|
|
LFMT_COND, |
135 |
|
|
LFMT_REP, |
136 |
|
|
LFMT_COMM, |
137 |
|
|
LFMT_BARE, /* field of bareword C&A literal */ |
138 |
|
|
LFMT_NUMBER, /* standalone numeric literal */ |
139 |
|
|
/* functions */ |
140 |
|
|
LFMT_OPEN = LFMT_DEFGROUP( LFMT_FUNCTIONS ), |
141 |
|
|
LFMT_AMP, |
142 |
|
|
LFMT_AT, |
143 |
|
|
LFMT_XREF, |
144 |
|
|
LFMT_S, |
145 |
|
|
LFMT_Z, |
146 |
|
|
LFMT_F, |
147 |
|
|
/* */ |
148 |
|
|
LFMT_MDL, |
149 |
|
|
LFMT_MDU, |
150 |
|
|
LFMT_MHL, |
151 |
|
|
LFMT_MHU, |
152 |
|
|
LFMT_MPL, |
153 |
|
|
LFMT_MPU, |
154 |
|
|
/* values */ |
155 |
|
|
LFMT_SI = LFMT_DEFGROUP( LFMT_VALUES ), |
156 |
|
|
LFMT_EI, |
157 |
|
|
/* iterators */ |
158 |
|
|
LFMT_V = LFMT_DEFGROUP( LFMT_ITERATORS ), |
159 |
|
|
LFMT_D, |
160 |
|
|
LFMT_N, |
161 |
|
|
/* operator groups by decreasing precedence */ |
162 |
|
|
LFMT_INDEX = LFMT_DEFGROUP( LFMT_OPITER ), |
163 |
|
|
LFMT_SUBFIELD, |
164 |
|
|
LFMT_STAR = LFMT_DEFGROUP( LFMT_OPSTR ), |
165 |
|
|
LFMT_DOT, LFMT_INDENT, |
166 |
|
|
LFMT_MUL = LFMT_DEFGROUP( LFMT_OPMUL ), |
167 |
|
|
LFMT_DIV, |
168 |
|
|
LFMT_PLUS = LFMT_DEFGROUP( LFMT_OPADD ), |
169 |
|
|
LFMT_MINUS, |
170 |
|
|
LFMT_EQ = LFMT_DEFGROUP( LFMT_OPREL ), |
171 |
|
|
LFMT_NE, LFMT_LT, LFMT_LE, LFMT_GT, LFMT_GE, LFMT_CT, |
172 |
|
|
LFMT_AND = LFMT_DEFGROUP( LFMT_OPBOOL ), |
173 |
|
|
LFMT_OR, LFMT_NOT, |
174 |
|
|
LFMT_ASSIGN = LFMT_DEFGROUP( LFMT_OPASSIGN ), |
175 |
|
|
/* stoppers */ |
176 |
|
|
LFMT_COMMA = LFMT_DEFGROUP( LFMT_STOPPER ), |
177 |
|
|
LFMT_RANGE, |
178 |
|
|
LFMT_CLOSE, |
179 |
|
|
/* syntax */ |
180 |
|
|
LFMT_BLANK = LFMT_DEFGROUP( LFMT_SYNTAX ), |
181 |
|
|
LFMT_LOOP, |
182 |
|
|
LFMT_WHILE, |
183 |
|
|
LFMT_REF, |
184 |
|
|
LFMT_IF, LFMT_THEN, LFMT_ELSE, LFMT_FI, |
185 |
|
|
LFMT_SELECT, LFMT_CASE, LFMT_ELSECASE, LFMT_ENDSEL, |
186 |
|
|
LFMT__LAST |
187 |
|
|
}; |
188 |
|
|
|
189 |
|
|
/* |
190 |
|
|
while fields emitted by formatting and literal tokens |
191 |
|
|
have tags between -1 and -1023 (0xffffffff to 0xfffffc01) |
192 |
|
|
with all high bits set, numerics have lowest negative values |
193 |
|
|
ranging from 0x80000000 to 0xbfffffff (-2147483648 to -1073741825). |
194 |
|
|
That means the highest two bits are 1 and 0. |
195 |
|
|
All numeric fields have a val of NULL and use len to hold a number. |
196 |
|
|
Besides pure int or fixed-point numbers, |
197 |
|
|
there are other numerical types: |
198 |
|
|
next two bits may indicate a range or field selector. |
199 |
|
|
The lower 3 bytes may give fractional part or range end. |
200 |
|
|
*/ |
201 |
|
|
#define LFMT_NMASK 0xf0000000L /* highest bits 1 and 0 mark a number */ |
202 |
|
|
#define LFMT_NUM 0x80000000L /* basic number */ |
203 |
|
|
#define LFMT_ITR 0xa0000000L /* with iterator bit */ |
204 |
|
|
#define LFMT_VAL 0x00ffffffL /* mask for 3 bytes additional info */ |
205 |
|
|
/* is any numeric */ |
206 |
|
|
#define LFMT_ISNUM(t) (LFMT_NUM == (LFMT_NMASK & (t))) |
207 |
|
|
/* is pure int */ |
208 |
|
|
#define LFMT_ISINT(t) (LFMT_NUM == (t)) |
209 |
|
|
/* is iterator */ |
210 |
|
|
#define LFMT_ISITR(t) (LFMT_ITR == (LFMT_NMASK & (t))) |
211 |
|
|
|
212 |
|
|
/* ************************************************************ |
213 |
|
|
private data |
214 |
|
|
*/ |
215 |
|
|
static const LfmtToken number = { "", "_1i", LFMT_NUMBER }; |
216 |
|
|
/* while a numeric *field* has large LFMT_NUM tag, |
217 |
|
|
the anonymous number *token* has a literal tag, |
218 |
|
|
so we don't take it as high precedence operator. |
219 |
|
|
*/ |
220 |
|
|
static const LfmtToken blank = { "_", "_0o_", LFMT_BLANK }; |
221 |
|
|
static const LfmtToken loop = { "loop", "_0o_", LFMT_LOOP }; |
222 |
|
|
|
223 |
|
|
/* |
224 |
|
|
32 = 33 = ! 34 = " 35 = # 36 = $ 37 = % 38 = & 39 = ' |
225 |
|
|
40 = ( 41 = ) 42 = * 43 = + 44 = , 45 = - 46 = . 47 = / |
226 |
|
|
48 = 0 49 = 1 50 = 2 51 = 3 52 = 4 53 = 5 54 = 6 55 = 7 |
227 |
|
|
56 = 8 57 = 9 58 = : 59 = ; 60 = < 61 = = 62 = > 63 = ? |
228 |
|
|
64 = @ 65 = A 66 = B 67 = C 68 = D 69 = E 70 = F 71 = G |
229 |
|
|
72 = H 73 = I 74 = J 75 = K 76 = L 77 = M 78 = N 79 = O |
230 |
|
|
80 = P 81 = Q 82 = R 83 = S 84 = T 85 = U 86 = V 87 = W |
231 |
|
|
88 = X 89 = Y 90 = Z 91 = [ 92 = \ 93 = ] 94 = ^ 95 = _ |
232 |
|
|
96 = ` 97 = a 98 = b 99 = c 100 = d 101 = e 102 = f 103 = g |
233 |
|
|
104 = h 105 = i 106 = j 107 = k 108 = l 109 = m 110 = n 111 = o |
234 |
|
|
112 = p 113 = q 114 = r 115 = s 116 = t 117 = u 118 = v 119 = w |
235 |
|
|
120 = x 121 = y 122 = z 123 = { 124 = | 125 = } 126 = ~ |
236 |
|
|
*/ |
237 |
|
|
/* stops dictionary search */ |
238 |
|
|
static const char sentinel[] = "\x7f"; |
239 |
|
|
#define ENDDICT { sentinel, NULL, 0 } |
240 |
|
|
|
241 |
|
|
static const LfmtToken dictSpecial[] = { |
242 |
|
|
{ "", "_0", 0 }, /* eof */ |
243 |
|
|
{ "!", "_1x", OPENISIS_FMT_ESC }, |
244 |
|
|
{ "\"", "_1x", LFMT_COND }, |
245 |
|
|
{ "#", "_0", OPENISIS_FMT_SHARP }, |
246 |
|
|
/* { "$", "_0", LFMT_LIT }, */ |
247 |
|
|
{ "%", "_0", OPENISIS_FMT_PERCENT }, |
248 |
|
|
{ "&", "_1as_", LFMT_AMP }, |
249 |
|
|
{ "'", "_1x", LFMT_LIT }, |
250 |
|
|
{ "(", "_0", LFMT_OPEN }, |
251 |
|
|
{ ")", "_0", LFMT_CLOSE }, |
252 |
|
|
{ "*", "n1n", LFMT_MUL }, |
253 |
|
|
{ "+", "n1n", LFMT_PLUS }, |
254 |
|
|
{ ",", "_0", LFMT_COMMA }, |
255 |
|
|
{ "-", "n1n", LFMT_MINUS }, |
256 |
|
|
{ ".", "s1i", LFMT_DOT }, |
257 |
|
|
{ "..","_0", LFMT_RANGE }, |
258 |
|
|
{ "/", "n1n", LFMT_DIV }, /* alias { "/", "_0", OPENISIS_FMT_SLASH }, */ |
259 |
|
|
{ "/*","_1x", LFMT_COMM }, |
260 |
|
|
/* 0 - 9 */ |
261 |
|
|
{ ":", "s1s", LFMT_CT }, |
262 |
|
|
{ ":=","v1f", LFMT_ASSIGN }, |
263 |
|
|
/* { ";", "b_", LFMT_LIT }, */ |
264 |
|
|
{ "<", "n1n", LFMT_LT }, |
265 |
|
|
{ "<=","n1n", LFMT_LE }, |
266 |
|
|
{ "<>","n1n", LFMT_NE }, |
267 |
|
|
{ "=", "n1n", LFMT_EQ }, |
268 |
|
|
{ ">", "n1n", LFMT_GT }, |
269 |
|
|
{ ">=","n1n", LFMT_GE }, |
270 |
|
|
/* { "?", "b_", LFMT_LIT }, */ |
271 |
|
|
{ "@", "_1a", LFMT_AT }, |
272 |
|
|
/* A - Z */ |
273 |
|
|
{ "[", "v1nn", LFMT_INDEX }, |
274 |
|
|
/* { "\\", "_0", LFMT_LIT }, */ |
275 |
|
|
{ "]", "_0", LFMT_CLOSE }, |
276 |
|
|
{ "^", "v1c", LFMT_SUBFIELD }, |
277 |
|
|
/* { "_", "_0", LFMT_LIT }, */ |
278 |
|
|
/* { "`", "_0", LFMT_LIT }, */ |
279 |
|
|
/* a - z */ |
280 |
|
|
{ "{", "_0", OPENISIS_FMT_OPEN }, |
281 |
|
|
{ "|", "_1x", LFMT_REP }, |
282 |
|
|
{ "}", "_0", OPENISIS_FMT_CLOSE }, |
283 |
|
|
{ "~", "_1n", LFMT_NOT }, |
284 |
|
|
ENDDICT }; |
285 |
|
|
static const LfmtToken dictA[] = { ENDDICT }; |
286 |
|
|
static const LfmtToken dictB[] = { |
287 |
|
|
{ "B", "_0", OPENISIS_FMT_B }, |
288 |
|
|
ENDDICT }; |
289 |
|
|
static const LfmtToken dictC[] = { |
290 |
|
|
{ "C", "_1i", OPENISIS_FMT_C }, |
291 |
|
|
ENDDICT }; |
292 |
|
|
static const LfmtToken dictD[] = { |
293 |
|
|
{ "D", "_1i", LFMT_D }, |
294 |
|
|
ENDDICT }; |
295 |
|
|
static const LfmtToken dictE[] = { |
296 |
|
|
{ "E", "_1i", LFMT_EI }, |
297 |
|
|
{ "ELSE", "_1s_", LFMT_ELSE }, |
298 |
|
|
ENDDICT }; |
299 |
|
|
static const LfmtToken dictF[] = { |
300 |
|
|
/* { "F", "_1nii", LFMT_F }, */ |
301 |
|
|
{ "F", "_1i", OPENISIS_FMT_F }, |
302 |
|
|
ENDDICT }; |
303 |
|
|
static const LfmtToken dictG[] = { ENDDICT }; |
304 |
|
|
static const LfmtToken dictH[] = { ENDDICT }; |
305 |
|
|
static const LfmtToken dictI[] = { |
306 |
|
|
{ "I", "_0", OPENISIS_FMT_I }, |
307 |
|
|
{ "IF", "_1n", LFMT_IF }, |
308 |
|
|
ENDDICT }; |
309 |
|
|
static const LfmtToken dictJ[] = { ENDDICT }; |
310 |
|
|
static const LfmtToken dictK[] = { ENDDICT }; |
311 |
|
|
static const LfmtToken dictL[] = { |
312 |
|
|
{ "LINK", "_1s_", OPENISIS_FMT_LINK }, |
313 |
|
|
ENDDICT }; |
314 |
|
|
static const LfmtToken dictM[] = { |
315 |
|
|
{ "M", "_1ii", OPENISIS_FMT_M }, |
316 |
|
|
{ "MDL", "_0", LFMT_MDL }, |
317 |
|
|
{ "MDU", "_0", LFMT_MDU }, |
318 |
|
|
{ "MHL", "_0", LFMT_MHL }, |
319 |
|
|
{ "MHU", "_0", LFMT_MHU }, |
320 |
|
|
{ "MPL", "_0", LFMT_MPL }, |
321 |
|
|
{ "MPU", "_0", LFMT_MPU }, |
322 |
|
|
ENDDICT }; |
323 |
|
|
static const LfmtToken dictN[] = { |
324 |
|
|
{ "N", "_1i", LFMT_N }, |
325 |
|
|
{ "NC", "_0i", OPENISIS_FMT_NC }, |
326 |
|
|
ENDDICT }; |
327 |
|
|
static const LfmtToken dictO[] = { ENDDICT }; |
328 |
|
|
static const LfmtToken dictP[] = { |
329 |
|
|
{ "PICT", "_1s", OPENISIS_FMT_PICT }, |
330 |
|
|
ENDDICT }; |
331 |
|
|
static const LfmtToken dictQ[] = { |
332 |
|
|
{ "QC", "_0", OPENISIS_FMT_QC }, |
333 |
|
|
ENDDICT }; |
334 |
|
|
static const LfmtToken dictR[] = { |
335 |
|
|
{ "REF", "_2rs_", LFMT_REF }, |
336 |
|
|
ENDDICT }; |
337 |
|
|
static const LfmtToken dictS[] = { |
338 |
|
|
{ "S", "_1s_", LFMT_S }, |
339 |
|
|
ENDDICT }; |
340 |
|
|
static const LfmtToken dictT[] = { |
341 |
|
|
{ "TAB", "_0i", OPENISIS_FMT_TAB }, |
342 |
|
|
ENDDICT }; |
343 |
|
|
static const LfmtToken dictU[] = { |
344 |
|
|
{ "UL", "_0", OPENISIS_FMT_UL }, |
345 |
|
|
ENDDICT }; |
346 |
|
|
static const LfmtToken dictV[] = { |
347 |
|
|
{ "V", "_1i", LFMT_V }, |
348 |
|
|
ENDDICT }; |
349 |
|
|
static const LfmtToken dictW[] = { ENDDICT }; |
350 |
|
|
static const LfmtToken dictX[] = { |
351 |
|
|
{ "X", "_0i", OPENISIS_FMT_X }, |
352 |
|
|
ENDDICT }; |
353 |
|
|
static const LfmtToken dictY[] = { ENDDICT }; |
354 |
|
|
static const LfmtToken dictZ[] = { |
355 |
|
|
{ "Z", "_1is", LFMT_Z }, |
356 |
|
|
ENDDICT }; |
357 |
|
|
|
358 |
|
|
static const LfmtToken * const dictAZ[26] = { |
359 |
|
|
dictA, dictB, dictC, dictD, dictE, dictF, dictG, dictH, dictI, |
360 |
|
|
dictJ, dictK, dictL, dictM, dictN, dictO, dictP, dictQ, dictR, |
361 |
|
|
dictS, dictT, dictU, dictV, dictW, dictX, dictY, dictZ |
362 |
|
|
}; |
363 |
|
|
|
364 |
|
|
|
365 |
|
|
/* character conversion */ |
366 |
|
|
static const unsigned char upcase[256] = { |
367 |
|
|
0,' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ', |
368 |
|
|
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ', |
369 |
|
|
' ','!','"','#','$','%','&','\'', '(',')','*','+',',','-','.','/', |
370 |
|
|
'0','1','2','3','4','5','6','7', '8','9',':',';','<','=','>','?', |
371 |
|
|
'@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', |
372 |
|
|
'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_', |
373 |
|
|
'`','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', |
374 |
|
|
'P','Q','R','S','T','U','V','W','X','Y','Z','{','|','}','~',' ', |
375 |
|
|
'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?', |
376 |
|
|
'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?', |
377 |
|
|
'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?', |
378 |
|
|
'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?', |
379 |
|
|
'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?', |
380 |
|
|
'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?', |
381 |
|
|
'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?', |
382 |
|
|
'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?' |
383 |
|
|
}; |
384 |
|
|
|
385 |
|
|
enum { |
386 |
|
|
DIG=0x01, /* digit */ |
387 |
|
|
ALP=0x02, /* ascii alphabetic */ |
388 |
|
|
IDE=0x04, /* other identifier character */ |
389 |
|
|
LIT=0x10, /* as signature type, eat literal (a,c,i,x) */ |
390 |
|
|
ALT=ALP|LIT |
391 |
|
|
}; |
392 |
|
|
/* character type */ |
393 |
|
|
static const unsigned char ctype[256] = { |
394 |
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
395 |
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
396 |
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
397 |
|
|
DIG,DIG,DIG,DIG,DIG,DIG,DIG,DIG,DIG,DIG, 0, 0, 0, 0, 0, 0, |
398 |
|
|
/*'@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',*/ |
399 |
|
|
0,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP, |
400 |
|
|
/*'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_',*/ |
401 |
|
|
ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP, 0, 0, 0, 0,IDE, |
402 |
|
|
0,ALT,ALP,ALT,ALP,ALP,ALP,ALP,ALP,ALT,ALP,ALP,ALP,ALP,ALP,ALP, |
403 |
|
|
ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALT,ALP,ALP, 0, 0, 0, 0, 0, |
404 |
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
405 |
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
406 |
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
407 |
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
408 |
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
409 |
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
410 |
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
411 |
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
412 |
|
|
}; |
413 |
|
|
|
414 |
|
|
|
415 |
|
|
/* ************************************************************ |
416 |
|
|
private functions |
417 |
|
|
*/ |
418 |
|
|
/* don't rely on braindead ctype.h, it may use locale */ |
419 |
|
|
#define ISALPHA(u) ( ALP & ctype[(unsigned char)(u)]) |
420 |
|
|
#define ISDIGIT(u) ( DIG & ctype[(unsigned char)(u)]) |
421 |
|
|
#define ISALNUM(u) ((ALP|DIG) & ctype[(unsigned char)(u)]) |
422 |
|
|
#define ISIDENT(u) ((ALP|DIG|IDE) & ctype[(unsigned char)(u)]) |
423 |
|
|
#define ISLITER(u) ( LIT & ctype[(unsigned char)(u)]) |
424 |
|
|
|
425 |
|
|
/* ************************************************************ |
426 |
|
|
package functions |
427 |
|
|
*/ |
428 |
|
|
/* ************************************************************ |
429 |
|
|
public functions |
430 |
|
|
*/ |
431 |
|
|
|
432 |
|
|
Rec* rFmt ( Rec *buf, const char *fmt, Rec *irec ) |
433 |
|
|
{ |
434 |
|
|
/* context */ |
435 |
|
|
Rec *o = buf; |
436 |
|
|
#define ADD( ntag, s, n ) do { \ |
437 |
|
|
RADD( o, ntag, s, n, o!=buf ); \ |
438 |
|
|
if ( NULL == o ) goto outofmem; \ |
439 |
|
|
} while(0) |
440 |
|
|
#define CAT( s, n ) do { \ |
441 |
|
|
RCAT( o, s, n, o!=buf ); \ |
442 |
|
|
if ( NULL == o ) goto outofmem; \ |
443 |
|
|
} while(0) |
444 |
|
|
/* field add */ |
445 |
|
|
#define FADD( f ) ADD( f.tag, f.val, f.len ) |
446 |
|
|
/* string add */ |
447 |
|
|
#define SADD( ntag, s ) ADD( ntag, s, strlen(s) ) |
448 |
|
|
#define SCAT( s ) CAT( s, strlen(s) ) |
449 |
|
|
#define SERR( err ) do { \ |
450 |
|
|
o = rMsg( o, o!=buf, -LOG_ERROR, \ |
451 |
|
|
"@%d: %s", fp - f->f, err ); \ |
452 |
|
|
if ( NULL == o ) goto outofmem; \ |
453 |
|
|
} while(0) |
454 |
|
|
#define SERR1( fmt, a ) do { \ |
455 |
|
|
o = rMsg( o, o!=buf, -LOG_ERROR, \ |
456 |
|
|
"@%d: " fmt, fp - f->f, a ); \ |
457 |
|
|
if ( NULL == o ) goto outofmem; \ |
458 |
|
|
} while(0) |
459 |
|
|
#define SERR2( fmt, a, b ) do { \ |
460 |
|
|
o = rMsg( o, o!=buf, -LOG_ERROR, \ |
461 |
|
|
"@%d: " fmt, fp - f->f, a, b ); \ |
462 |
|
|
if ( NULL == o ) goto outofmem; \ |
463 |
|
|
} while(0) |
464 |
|
|
/* number add */ |
465 |
|
|
#define NADD( i, d ) do { \ |
466 |
|
|
ADD( LFMT_NUM | (d), NULL, 0 ); \ |
467 |
|
|
o->field[o->len - 1].val = NULL; \ |
468 |
|
|
o->field[o->len - 1].len = i; \ |
469 |
|
|
} while(0) |
470 |
|
|
|
471 |
|
|
LfmtFrame stack[128], *s = stack; /* current frame (always == stack+sp) */ |
472 |
|
|
int sp = -1; |
473 |
|
|
/* push new frame for token t */ |
474 |
|
|
#define PUSHFRAME( t ) do { \ |
475 |
|
|
if ( ++sp == sizeof(stack)/sizeof(stack[0]) ) goto overflow; \ |
476 |
|
|
s = stack+sp; \ |
477 |
|
|
memset( s, 0, sizeof(*s) ); \ |
478 |
|
|
s->data = o->len; \ |
479 |
|
|
s->used = o->used; \ |
480 |
|
|
s->tok = (t); \ |
481 |
|
|
s->want = (t)->sign[2]; \ |
482 |
|
|
} while(0) |
483 |
|
|
|
484 |
|
|
LfmtFmt fmt0, *f = NULL; /* default and current format */ |
485 |
|
|
const unsigned char *fp = NULL; /* format pointer */ |
486 |
|
|
#define PUSHFORMAT( pfmt, fstr ) do { \ |
487 |
|
|
memset( pfmt, 0, sizeof(*(pfmt)) ); \ |
488 |
|
|
(pfmt)->prev = f; \ |
489 |
|
|
f = (pfmt); \ |
490 |
|
|
fp = f->f = (const unsigned char *)fstr; \ |
491 |
|
|
f->e = fp + strlen(fstr); \ |
492 |
|
|
} while(0) |
493 |
|
|
|
494 |
|
|
LfmtRec rec0, *r = NULL; /* default and current record */ |
495 |
|
|
#define PUSHRECORD( prec, nrec ) do { \ |
496 |
|
|
memset( prec, 0, sizeof(*(prec)) ); \ |
497 |
|
|
(prec)->prev = r; \ |
498 |
|
|
r = (prec); \ |
499 |
|
|
r->frame = sp; \ |
500 |
|
|
r->r = nrec; \ |
501 |
|
|
r->sub = '^'; \ |
502 |
|
|
} while(0) |
503 |
|
|
|
504 |
|
|
int tlen = 0; /* token length */ |
505 |
|
|
|
506 |
|
|
|
507 |
|
|
PUSHFRAME( &blank ); |
508 |
|
|
s->rec = 1; |
509 |
|
|
PUSHFORMAT( &fmt0, fmt ); |
510 |
|
|
PUSHRECORD( &rec0, irec ); |
511 |
|
|
|
512 |
|
|
o->dbid = irec->dbid ; |
513 |
|
|
o->rowid = irec->rowid; |
514 |
|
|
|
515 |
|
|
|
516 |
|
|
for ( ;; fp += tlen ) { |
517 |
|
|
const LfmtToken *tok = NULL; |
518 |
|
|
Field lit; |
519 |
|
|
unsigned char u; |
520 |
|
|
int start = s->data + s->pos; /* start of current param in output */ |
521 |
|
|
int flds = o->len - start; /* # fields for current param */ |
522 |
|
|
int vpos = 0; /* pos in dict where we had match of tlen chars */ |
523 |
|
|
int dpos = 0; /* pos in dict */ |
524 |
|
|
int expl = 0; |
525 |
|
|
unsigned char eatlit = 0; |
526 |
|
|
const LfmtToken *dict; |
527 |
|
|
enum { |
528 |
|
|
CLOSE_OP, /* don't close anything, operator wants field */ |
529 |
|
|
CLOSE_VAR, /* close variables only */ |
530 |
|
|
CLOSE_FIELD, /* coerce field only */ |
531 |
|
|
CLOSE_PARAM, /* compact param only (eat comma) */ |
532 |
|
|
CLOSE_EXPL, /* close frame explicitly (eat closing token) */ |
533 |
|
|
CLOSE_IMPL /* close frame implicitly, continue on token */ |
534 |
|
|
} close; |
535 |
|
|
static const char *closename[] = { |
536 |
|
|
"", "var", "field", "param", "expl", "impl" }; |
537 |
|
|
|
538 |
|
|
lit.tag = 0; |
539 |
|
|
tlen = 0; /* verified length */ |
540 |
|
|
|
541 |
|
|
/* |
542 |
|
|
LOG_DBG( LFMT_DBG, |
543 |
|
|
"frame %s%c%s id %d pos %d got %d want %c%s", |
544 |
|
|
s->tok->name, s->expl ? '(' : ' ', s->tok->sign, s->tok->tag, |
545 |
|
|
s->pos, o->len - s->data, s->want, flds ? " (have)" : "" ); |
546 |
|
|
*/ |
547 |
|
|
/* lookup token */ |
548 |
|
|
|
549 |
|
|
while ( ' ' == (u = upcase[*fp]) ) /* eat whitespace */ |
550 |
|
|
fp++; |
551 |
|
|
|
552 |
|
|
if ( ISALPHA(u) ) { |
553 |
|
|
dict = dictAZ[u-'A']; |
554 |
|
|
if ( sentinel != dict->name ) |
555 |
|
|
u = upcase[ fp[tlen = 1] ]; |
556 |
|
|
} else if ( ISDIGIT( *fp ) ) { |
557 |
|
|
tok = &number; /* read numeric literal */ |
558 |
|
|
goto countnumber; |
559 |
|
|
} else |
560 |
|
|
dict = dictSpecial; |
561 |
|
|
|
562 |
|
|
/* tok is the last candidate that matched up to IT'S length */ |
563 |
|
|
for ( ;; dpos++ ) { |
564 |
|
|
unsigned char test; |
565 |
|
|
LOG_DBG( LOG_TRACE, "trying token '%s'", dict[dpos].name ); |
566 |
|
|
if ( tlen && vpos != dpos |
567 |
|
|
&& strncmp( dict[dpos].name, dict[vpos].name, tlen ) ) |
568 |
|
|
break; /* ran out of verified length */ |
569 |
|
|
if ( u == dict[dpos].name[tlen] ) |
570 |
|
|
vpos = dpos; |
571 |
|
|
while ( (test = dict[dpos].name[tlen]) && u == test ) |
572 |
|
|
/* test this entry -- same pos, next char */ |
573 |
|
|
u = upcase[ fp[++tlen] ]; |
574 |
|
|
if ( !test ) /* dict entry ends here -- a hit so far */ |
575 |
|
|
tok = &dict[dpos]; |
576 |
|
|
/* |
577 |
|
|
LOG_DBG( LFMT_DBG, "\thit on token %s(%s) len %d", |
578 |
|
|
tok->name, tok->sign, tlen ); |
579 |
|
|
*/ |
580 |
|
|
if ( u < test || !u ) /* too large - bail out*/ |
581 |
|
|
break; |
582 |
|
|
} |
583 |
|
|
|
584 |
|
|
if ( NULL != tok ) { /* had some match */ |
585 |
|
|
if ( tok < &dict[vpos] ) /* had later == longer match */ |
586 |
|
|
tok = NULL; |
587 |
|
|
else if ( dict != dictSpecial ) { |
588 |
|
|
/* alpha-token must not be followed by alpha */ |
589 |
|
|
assert( tlen == (int)strlen(tok->name) ); |
590 |
|
|
if ( ISALPHA( u ) && ISALPHA( tok->name[tlen-1] ) ) |
591 |
|
|
tok = NULL; |
592 |
|
|
} |
593 |
|
|
} |
594 |
|
|
|
595 |
|
|
if ( NULL == tok /* no hit in alphadict */ |
596 |
|
|
|| (dictSpecial == tok && *fp) /* false eof hit in special dict */ |
597 |
|
|
) { |
598 |
|
|
SERR2( "unrecognized token '%.*s'", tlen+1, fp ); |
599 |
|
|
goto broken; |
600 |
|
|
} |
601 |
|
|
|
602 |
|
|
if ( ISLITER( tok->sign[2] ) ) /* eat literals */ |
603 |
|
|
switch ( tok->sign[2] ) { /* c, a, i, x */ |
604 |
|
|
case 'c': |
605 |
|
|
if ( (lit.len = (eatlit = fp[tlen]) ? 1 : 0 ) ) { |
606 |
|
|
lit.tag = -LFMT_BARE; |
607 |
|
|
lit.val = (char *)fp + tlen; |
608 |
|
|
tlen++; |
609 |
|
|
} |
610 |
|
|
break; |
611 |
|
|
case 'a': |
612 |
|
|
if ( (eatlit = ISIDENT( fp[tlen] )) ) { |
613 |
|
|
lit.tag = -LFMT_BARE; |
614 |
|
|
lit.val = (char *)fp + tlen; |
615 |
|
|
lit.len = 0; |
616 |
|
|
while ( ISIDENT( fp[tlen] ) ) { |
617 |
|
|
lit.len++; |
618 |
|
|
tlen++; |
619 |
|
|
} |
620 |
|
|
} |
621 |
|
|
break; |
622 |
|
|
case 'i': |
623 |
|
|
countnumber: |
624 |
|
|
lit.val = NULL; |
625 |
|
|
lit.len = 0; |
626 |
|
|
if ( (eatlit = ISDIGIT( u = fp[tlen] )) ) { |
627 |
|
|
lit.tag = LFMT_NUM; |
628 |
|
|
while ( u && 10 > (u -= '0') ) { |
629 |
|
|
lit.len = 10*lit.len + u; |
630 |
|
|
u = fp[++tlen]; |
631 |
|
|
} |
632 |
|
|
} |
633 |
|
|
/* decimal digits on standalone numeric literal */ |
634 |
|
|
if ( &number == tok |
635 |
|
|
&& (unsigned char)('.'-'0') == u |
636 |
|
|
&& '.' != fp[tlen+1] /* avoid eating 1..2 */ |
637 |
|
|
) { |
638 |
|
|
int dec = 0; |
639 |
|
|
int v = 0; |
640 |
|
|
while ( (u = fp[++tlen]) && 10 > (u -= '0') ) |
641 |
|
|
if ( 6 > dec ) { |
642 |
|
|
v = 10*v + u; |
643 |
|
|
dec++; |
644 |
|
|
} |
645 |
|
|
while ( 6 < dec++ ) v *= 10; |
646 |
|
|
lit.tag = LFMT_NUM | (v & LFMT_VAL); |
647 |
|
|
} |
648 |
|
|
break; |
649 |
|
|
case 'x': |
650 |
|
|
switch ( tok->tag ) { |
651 |
|
|
case LFMT_COMM: |
652 |
|
|
eatlit='*'; |
653 |
|
|
break; |
654 |
|
|
case OPENISIS_FMT_ESC: |
655 |
|
|
eatlit=fp[tlen++]; |
656 |
|
|
break; |
657 |
|
|
default: |
658 |
|
|
eatlit=tok->name[0]; |
659 |
|
|
break; |
660 |
|
|
} |
661 |
|
|
lit.tag = - tok->tag; |
662 |
|
|
lit.val = (char *)fp+1; |
663 |
|
|
while ( (u = fp[tlen++]) |
664 |
|
|
&& (eatlit != u || (LFMT_COMM==tok->tag && '/' != fp[tlen])) |
665 |
|
|
) ; |
666 |
|
|
if ( !u ) { |
667 |
|
|
SERR1( "unterminated %s-literal", tok->name ); |
668 |
|
|
goto broken; |
669 |
|
|
} |
670 |
|
|
lit.len = tlen - 2; |
671 |
|
|
if ( LFMT_COMM == tok->tag ) { |
672 |
|
|
tlen++; |
673 |
|
|
continue; |
674 |
|
|
} |
675 |
|
|
} |
676 |
|
|
|
677 |
|
|
if ( tok->sign[2] ) { /* token wants params */ |
678 |
|
|
if ( (!eatlit || tok->sign[3]) && (expl = '(' == fp[tlen]) ) |
679 |
|
|
tlen++; /* eat ( */ |
680 |
|
|
} |
681 |
|
|
|
682 |
|
|
/* got token */ |
683 |
|
|
LOG_DBG( LFMT_DBG, |
684 |
|
|
"at %d: '%.*s' %s(%s) id %d (%x) group %d pos %d%c of %s%c%s", |
685 |
|
|
fp - f->f, tlen > 10 ? 10 : tlen, fp, |
686 |
|
|
tok->name, tok->sign, tok->tag, tok->tag, LFMT_GROUP(tok->tag), |
687 |
|
|
s->pos, s->want, s->tok->name, s->expl?'(':' ', s->tok->sign |
688 |
|
|
); |
689 |
|
|
|
690 |
|
|
/* close what needs to be closed */ |
691 |
|
|
do { |
692 |
|
|
int group = LFMT_GROUP( tok->tag ); |
693 |
|
|
int dflt, wantnum, gotnum; |
694 |
|
|
|
695 |
|
|
close = CLOSE_FIELD; |
696 |
|
|
|
697 |
|
|
if ( '_' != tok->sign[0] /* token is operator ... */ |
698 |
|
|
&& (s->expl /* ... within explicit frame or ... */ |
699 |
|
|
|| group < LFMT_GROUP( s->tok->tag )) /* ... of higher prec */ |
700 |
|
|
) /* we take it -- even as a variable ? */ |
701 |
|
|
close = 'v' == tok->sign[0] ? CLOSE_OP : CLOSE_VAR; |
702 |
|
|
else switch ( tok->tag ) { |
703 |
|
|
case LFMT_COMMA: |
704 |
|
|
case LFMT_RANGE: |
705 |
|
|
close = CLOSE_PARAM; |
706 |
|
|
break; |
707 |
|
|
case LFMT_CLOSE: |
708 |
|
|
close = CLOSE_EXPL; |
709 |
|
|
break; |
710 |
|
|
} |
711 |
|
|
|
712 |
|
|
if ( CLOSE_VAR > close ) /* leave variable to operator */ |
713 |
|
|
break; /* close loop */ |
714 |
|
|
|
715 |
|
|
if ( flds && LFMT_ISITR( o->field[ o->len-1 ].tag ) ) { |
716 |
|
|
/* dereference iterator variable */ |
717 |
|
|
LfmtIter * iter = r->iter + r->i; |
718 |
|
|
if ( ! iter->occ ) { |
719 |
|
|
if ( ! iter->from ) |
720 |
|
|
iter->from = 1; /* field defaults to 1..0 (all) */ |
721 |
|
|
if ( ! iter->sfrom ) |
722 |
|
|
iter->sfrom = iter->sto = 1; /* subfield defaults to 1..1 (1st) */ |
723 |
|
|
} |
724 |
|
|
|
725 |
|
|
LOG_DBG( LFMT_DBG, "iterator %d/%d V%d[%d..%d]^%c[%d..%d]%s", |
726 |
|
|
o->field[ o->len-1 ].len, r->occ, |
727 |
|
|
iter->tag, iter->from, iter->to, |
728 |
|
|
iter->sub ? iter->sub : ' ', iter->sfrom, iter->sto, |
729 |
|
|
iter->submode ? " subfield mode" : "" |
730 |
|
|
); |
731 |
|
|
|
732 |
|
|
o->len--; /* kill the variable. */ |
733 |
|
|
/* move to next legal position. |
734 |
|
|
In standard mode, where we advance one field occurence at a time, |
735 |
|
|
this is just the occ >= from. |
736 |
|
|
A legal position may still emit no field, |
737 |
|
|
if a selected subfield is not available in the current field occ. |
738 |
|
|
In subfield mode, we may have to advance 0, 1 or several field |
739 |
|
|
occurences to find next occ of subfield. |
740 |
|
|
*/ |
741 |
|
|
r->had = 0; |
742 |
|
|
do { |
743 |
|
|
int socc; |
744 |
|
|
if ( 0 > iter->occ ) /* we were already done */ |
745 |
|
|
break; |
746 |
|
|
/* if have legal occurence, ADD it */ |
747 |
|
|
if ( iter->occ >= iter->from ) for ( socc=1;; socc++ ) { |
748 |
|
|
/* not initialization pass. */ |
749 |
|
|
Field *v = r->r->field + iter->pos; |
750 |
|
|
const char *src = v->val; |
751 |
|
|
int len = v->len; |
752 |
|
|
if ( iter->sub ) { /* find subfield */ |
753 |
|
|
const char *p = src + iter->off, *e = src+len; |
754 |
|
|
while ( p < e && ( |
755 |
|
|
r->sub != *p++ |
756 |
|
|
|| e == p |
757 |
|
|
|| (iter->sub != *p++ && iter->sub != '*') |
758 |
|
|
) ) |
759 |
|
|
; |
760 |
|
|
iter->off = p - src; |
761 |
|
|
if ( p >= e ) |
762 |
|
|
break; |
763 |
|
|
if ( iter->sfrom && socc < iter->sfrom ) |
764 |
|
|
continue; |
765 |
|
|
if ( iter->sto && socc > iter->sto ) |
766 |
|
|
break; |
767 |
|
|
src = p; |
768 |
|
|
while ( p < e && r->sub != *p ) |
769 |
|
|
p++; |
770 |
|
|
iter->off += len = p - src; |
771 |
|
|
} |
772 |
|
|
/* make sure there's enough room, +3 for DATA mode */ |
773 |
|
|
ADD( v->tag, NULL, len+3 ); |
774 |
|
|
if ( ! f->mode ) |
775 |
|
|
CAT( src, len ); |
776 |
|
|
else { |
777 |
|
|
const char *e = src + len; |
778 |
|
|
char *dst = (char *)o->field[ o->len - 1 ].val; |
779 |
|
|
for ( ; src < e; ) { |
780 |
|
|
if ( '<' == *src ) { |
781 |
|
|
for ( src++; src < e; *dst++ = *src++ ) |
782 |
|
|
if ( '=' == *src || '>' == *src ) { |
783 |
|
|
while ( '>' != *src++ && src < e ) |
784 |
|
|
; |
785 |
|
|
if ( src < e && '<' == *src ) { /* have >< */ |
786 |
|
|
*dst++ = ';'; |
787 |
|
|
*dst++ = ' '; |
788 |
|
|
} |
789 |
|
|
break; |
790 |
|
|
} |
791 |
|
|
} else if ( r->sub == *src ) { |
792 |
|
|
if ( ++src == e ) |
793 |
|
|
break; |
794 |
|
|
if ( dst == o->field[ o->len - 1 ].val ) |
795 |
|
|
; /* no spearator */ |
796 |
|
|
else if ( 'a' == *src ) |
797 |
|
|
*dst++ = ';'; |
798 |
|
|
else if ( 'j' > *src ) |
799 |
|
|
*dst++ = ','; |
800 |
|
|
else |
801 |
|
|
*dst++ = '.'; |
802 |
|
|
src++; |
803 |
|
|
} else |
804 |
|
|
*dst++ = *src++; |
805 |
|
|
} |
806 |
|
|
o->used += |
807 |
|
|
o->field[ o->len - 1 ].len = |
808 |
|
|
dst - o->field[ o->len - 1 ].val; |
809 |
|
|
if ( LFMT_MD == f->mode ) |
810 |
|
|
CAT( ". ", 3 ); |
811 |
|
|
} /* f->mode */ |
812 |
|
|
if ( f->upcase ) { |
813 |
|
|
char *p = (char *)o->field[ o->len - 1 ].val; |
814 |
|
|
char *e = p + o->field[ o->len - 1 ].len; |
815 |
|
|
for ( ; p < e; p++ ) |
816 |
|
|
*p = upcase[(unsigned char)*p]; |
817 |
|
|
} |
818 |
|
|
r->had = 1; |
819 |
|
|
if ( iter->submode || ! iter->sub ) |
820 |
|
|
break; |
821 |
|
|
} |
822 |
|
|
if ( iter->submode ) { |
823 |
|
|
if ( iter->occ && iter->occ < iter->from ) /* was skipped */ |
824 |
|
|
iter->off++; |
825 |
|
|
for ( ; iter->pos < r->r->len; iter->pos++, iter->off = 0 ) |
826 |
|
|
if ( iter->tag == r->r->field[ iter->pos ].tag || ! iter->tag ) { |
827 |
|
|
Field *v = r->r->field + iter->pos; |
828 |
|
|
const char *p = v->val + iter->off, *e = v->val + v->len; |
829 |
|
|
while ( p < e && ( |
830 |
|
|
r->sub != *p++ |
831 |
|
|
|| e == p |
832 |
|
|
|| (iter->sub != *p++ && iter->sub != '*') |
833 |
|
|
) ) |
834 |
|
|
; |
835 |
|
|
if ( p < e ) { /* hit */ |
836 |
|
|
iter->off = (p - v->val) - 2; |
837 |
|
|
break; |
838 |
|
|
} |
839 |
|
|
} |
840 |
|
|
} else { |
841 |
|
|
if ( iter->occ ) |
842 |
|
|
iter->pos++; |
843 |
|
|
while ( iter->pos < r->r->len |
844 |
|
|
&& iter->tag |
845 |
|
|
&& iter->tag != r->r->field[ iter->pos ].tag |
846 |
|
|
) |
847 |
|
|
iter->pos++; |
848 |
|
|
iter->off = 0; |
849 |
|
|
} |
850 |
|
|
iter->occ++; |
851 |
|
|
if ( iter->pos >= r->r->len /* end of record */ |
852 |
|
|
|| (iter->to && iter->to < iter->occ) /* end of selected occ */ |
853 |
|
|
) { |
854 |
|
|
iter->occ = -1; |
855 |
|
|
break; |
856 |
|
|
} |
857 |
|
|
} while ( iter->occ <= iter->from ); |
858 |
|
|
|
859 |
|
|
if ( 0 < iter->occ ) /* we have a next occurence */ |
860 |
|
|
r->more = 1; |
861 |
|
|
if ( ! r->had && flds && -LFMT_REP == o->field[ o->len - 1 ].tag ) { |
862 |
|
|
flds--; |
863 |
|
|
o->len--; |
864 |
|
|
o->used -= o->field[o->len].len; |
865 |
|
|
} |
866 |
|
|
r->i++; /* advance to next iterator */ |
867 |
|
|
} /* dereference iterator variable */ |
868 |
|
|
|
869 |
|
|
if ( CLOSE_FIELD > close ) /* leave field to operator */ |
870 |
|
|
break; /* close loop */ |
871 |
|
|
|
872 |
|
|
if ( ! flds ) { /* had no field */ |
873 |
|
|
if ( CLOSE_PARAM > close ) /* nothing to do */ |
874 |
|
|
break; /* close loop */ |
875 |
|
|
} else if ( ! s->expl /* frame is implicit ... */ |
876 |
|
|
&& ( LFMT_LOOP != s->tok->tag /* ... and not a loop ... */ |
877 |
|
|
|| CLOSE_EXPL == close /* ... or we saw a hard closer ... */ |
878 |
|
|
|| (r->i && CLOSE_PARAM == close) |
879 |
|
|
/* ... or a loop after 1st iterator on param closer */ |
880 |
|
|
) |
881 |
|
|
) /** upgrade the closing mode to IMPL */ |
882 |
|
|
close = CLOSE_IMPL; |
883 |
|
|
|
884 |
|
|
/* close the field, i.e. coerce it */ |
885 |
|
|
dflt = !flds /* param was not given ... */ |
886 |
|
|
&& (s->pos /* but was started explicitly by a comma like F( 3, ) */ |
887 |
|
|
|| CLOSE_PARAM == close /* or is closed explicitly like F( ,3 ) */ |
888 |
|
|
); |
889 |
|
|
wantnum = 'i' == s->want || 'n' == s->want; |
890 |
|
|
gotnum = flds && LFMT_ISNUM( o->field[ o->len-1 ].tag ); |
891 |
|
|
|
892 |
|
|
LOG_DBG( LFMT_DBG, |
893 |
|
|
"\tclose %s %d%c pos %d%c of %s%c%s id %x", |
894 |
|
|
closename[close], |
895 |
|
|
flds, flds ? gotnum ? 'n' : 's' : dflt ? 'd' : '-', s->pos, s->want, |
896 |
|
|
s->tok->name, s->expl?'(':' ', s->tok->sign, s->tok->tag ); |
897 |
|
|
|
898 |
|
|
/* close and fix last field */ |
899 |
|
|
if ( gotnum && !wantnum ) { |
900 |
|
|
/* coerce number to string, so it may get concatenated */ |
901 |
|
|
char b[32]; |
902 |
|
|
int ll = lprint( b, o->field[ o->len-1 ].len ); |
903 |
|
|
b[ll++] = ' '; |
904 |
|
|
o->len--; |
905 |
|
|
ADD( 0, b, ll ); |
906 |
|
|
gotnum = 0; |
907 |
|
|
} |
908 |
|
|
|
909 |
|
|
if ( CLOSE_PARAM > close ) |
910 |
|
|
break; /* close loop */ |
911 |
|
|
|
912 |
|
|
/* close and fix param */ |
913 |
|
|
if ( wantnum ) { |
914 |
|
|
if ( dflt ) { /* default 0 */ |
915 |
|
|
NADD( 0, 0 ); |
916 |
|
|
flds = 1; |
917 |
|
|
} else if ( 1 < flds ) { |
918 |
|
|
SERR( "multiple fields for numerical param" ); |
919 |
|
|
goto broken; |
920 |
|
|
} else if ( flds && !gotnum ) { |
921 |
|
|
SERR( "expected number" ); |
922 |
|
|
goto broken; |
923 |
|
|
} |
924 |
|
|
if ( 'i' == s->want ) /* kill decimal */ |
925 |
|
|
o->field[s->data].tag &= ~LFMT_VAL; |
926 |
|
|
} else { |
927 |
|
|
if ( dflt ) { |
928 |
|
|
SADD( 0, "" ); |
929 |
|
|
flds = 1; |
930 |
|
|
} else if ( gotnum ) { |
931 |
|
|
SERR( "expected string" ); /* shouldn't happen !? */ |
932 |
|
|
goto broken; |
933 |
|
|
} else if ( 1 < flds /* concat strings ... */ |
934 |
|
|
&& ! s->rec /* ... unless in record mode frame (blank,loop,ref) */ |
935 |
|
|
) { |
936 |
|
|
int i; |
937 |
|
|
for ( i = start+1; i < o->len; i++ ) |
938 |
|
|
if ( o->field[i].val ) |
939 |
|
|
o->field[ start ].len += o->field[ i ].len; |
940 |
|
|
o->len = start + 1; |
941 |
|
|
} |
942 |
|
|
} |
943 |
|
|
|
944 |
|
|
/* done fixing param */ |
945 |
|
|
|
946 |
|
|
if ( CLOSE_IMPL > close ) { |
947 |
|
|
tok = NULL; /* eat token */ |
948 |
|
|
if ( CLOSE_EXPL > close ) { /* i.e. CLOSE_PARAM */ |
949 |
|
|
if ( flds ) { /* increment param pos, set next wanted type */ |
950 |
|
|
int sl = strlen( s->tok->sign ); /* frame takes sl-2 params */ |
951 |
|
|
if ( ++(s->pos) < sl-2 ) { |
952 |
|
|
if ( '_' != s->tok->sign[ 2 + s->pos ] ) /* repeated param */ |
953 |
|
|
s->want = s->tok->sign[ 2 + s->pos ]; /* else keep last val */ |
954 |
|
|
} else if ( '_' != s->tok->sign[sl-1] ) { /* too much */ |
955 |
|
|
SERR2( "expected at most %d params for '%s'", |
956 |
|
|
sl-2, s->tok->name ); |
957 |
|
|
goto broken; |
958 |
|
|
} |
959 |
|
|
} |
960 |
|
|
break; /* close loop */ |
961 |
|
|
} |
962 |
|
|
} |
963 |
|
|
|
964 |
|
|
/* done with token -- close and fix frame */ |
965 |
|
|
|
966 |
|
|
#define KILLARGS() do { \ |
967 |
|
|
o->len = s->data; \ |
968 |
|
|
o->used = s->used; \ |
969 |
|
|
} while(0) |
970 |
|
|
|
971 |
|
|
#define KILLOP() do { \ |
972 |
|
|
o->len = s->data - 1; \ |
973 |
|
|
o->used = s->used - (o->field[o->len].val ? o->field[o->len].len : 0); \ |
974 |
|
|
} while(0) |
975 |
|
|
|
976 |
|
|
switch ( s->tok->tag ) { |
977 |
|
|
case LFMT_PLUS: /* arithmetic */ |
978 |
|
|
case LFMT_MINUS: |
979 |
|
|
case LFMT_MUL: |
980 |
|
|
case LFMT_DIV: { |
981 |
|
|
lll a = o->field[s->data-1].len; |
982 |
|
|
lll b = o->field[s->data].len; |
983 |
|
|
/* LOG_DBG( LFMT_DBG, "\tarith on %Ld %Ld", a, b ); */ |
984 |
|
|
switch ( s->tok->tag ) { |
985 |
|
|
case LFMT_PLUS: a += b; break; |
986 |
|
|
case LFMT_MINUS: a -= b; break; |
987 |
|
|
case LFMT_MUL: a *= b; break; |
988 |
|
|
case LFMT_DIV: if ( 0 != b ) a /= b; break; |
989 |
|
|
} |
990 |
|
|
KILLOP(); |
991 |
|
|
NADD( (int)a, 0 ); |
992 |
|
|
} break; |
993 |
|
|
case LFMT_F: { |
994 |
|
|
char b[32]; |
995 |
|
|
int ll = lprint( b, o->field[ s->data ].len ); |
996 |
|
|
KILLARGS(); |
997 |
|
|
ADD( 0, b, ll ); |
998 |
|
|
} break; |
999 |
|
|
case LFMT_V: |
1000 |
|
|
r->iter[ r->i ].tag = o->field[ s->data ].len; |
1001 |
|
|
/* set iterator field */ |
1002 |
|
|
o->field[ s->data ].tag = LFMT_ITR | r->iter[ r->i ].tag; |
1003 |
|
|
o->field[ s->data ].len = r->i; |
1004 |
|
|
break; |
1005 |
|
|
case LFMT_INDEX: { |
1006 |
|
|
LfmtIter * iter = r->iter + r->i; |
1007 |
|
|
if ( 1 != r->occ ) { |
1008 |
|
|
SERR( "OOPS! index not within loop" ); |
1009 |
|
|
goto internalerr; |
1010 |
|
|
} |
1011 |
|
|
if ( ! iter->sub || (iter->submode = !iter->from) ) { |
1012 |
|
|
/* primary loop */ |
1013 |
|
|
if ( !(iter->from = (int) o->field[ s->data ].len) ) |
1014 |
|
|
iter->from = 1; |
1015 |
|
|
iter->to = s->pos ? (int) o->field[ s->data+1 ].len : iter->from; |
1016 |
|
|
} else { |
1017 |
|
|
/* additional subfield loop */ |
1018 |
|
|
if ( !(iter->sfrom = (int) o->field[ s->data ].len) ) |
1019 |
|
|
iter->sfrom = 1; |
1020 |
|
|
iter->sto = s->pos ? (int) o->field[ s->data+1 ].len : iter->sfrom; |
1021 |
|
|
} |
1022 |
|
|
iter->end = fp + tlen; |
1023 |
|
|
KILLARGS(); |
1024 |
|
|
} break; /* case LFMT_INDEX: */ |
1025 |
|
|
case LFMT_SUBFIELD: |
1026 |
|
|
if ( 1 != r->occ ) { |
1027 |
|
|
SERR( "OOPS! subfield not within loop" ); |
1028 |
|
|
goto internalerr; |
1029 |
|
|
} |
1030 |
|
|
r->iter[ r->i ].sub = o->field[ s->data ].val[0]; |
1031 |
|
|
r->iter[ r->i ].end = fp + tlen; |
1032 |
|
|
KILLARGS(); |
1033 |
|
|
break; |
1034 |
|
|
case LFMT_LOOP: |
1035 |
|
|
if ( r->more ) { |
1036 |
|
|
fp = r->loop; /* start over */ |
1037 |
|
|
tlen = 0; /* care for += tlen */ |
1038 |
|
|
tok = NULL; |
1039 |
|
|
r->occ++; |
1040 |
|
|
r->more = 0; |
1041 |
|
|
r->i = 0; |
1042 |
|
|
goto doneclosing; |
1043 |
|
|
} |
1044 |
|
|
r->loop = 0; |
1045 |
|
|
break; |
1046 |
|
|
} |
1047 |
|
|
|
1048 |
|
|
/* close frame, unless it's the outmost */ |
1049 |
|
|
if ( ! sp ) |
1050 |
|
|
break; |
1051 |
|
|
|
1052 |
|
|
/* TODO: close loop ? */ |
1053 |
|
|
s = &stack[ --sp ]; |
1054 |
|
|
|
1055 |
|
|
start = s->data + s->pos; /* start of current param in output */ |
1056 |
|
|
flds = o->len - start; /* # fields for current param */ |
1057 |
|
|
|
1058 |
|
|
} while ( CLOSE_IMPL == close ); |
1059 |
|
|
doneclosing: |
1060 |
|
|
|
1061 |
|
|
if ( NULL == tok ) |
1062 |
|
|
continue; |
1063 |
|
|
if ( ! tok->tag ) |
1064 |
|
|
goto done; |
1065 |
|
|
|
1066 |
|
|
/* add new token */ |
1067 |
|
|
if ( NULL == r->loop ) /* open loop ? */ |
1068 |
|
|
switch ( LFMT_GROUP(tok->tag) ) { |
1069 |
|
|
case LFMT_FUNCTIONS: |
1070 |
|
|
if ( LFMT_OPEN != tok->tag ) |
1071 |
|
|
break; |
1072 |
|
|
goto openloop; /* else should do, but gcc doesn't grok it right */ |
1073 |
|
|
case LFMT_LITERAL: |
1074 |
|
|
if ( LFMT_COND != tok->tag && LFMT_REP != tok->tag ) |
1075 |
|
|
break; |
1076 |
|
|
case LFMT_ITERATORS: |
1077 |
|
|
openloop: |
1078 |
|
|
LOG_DBG( LFMT_DBG, "\topening loop on %s", tok->name ); |
1079 |
|
|
PUSHFRAME( &loop ); |
1080 |
|
|
s->rec = stack[sp-1].rec; /* inherit record mode */ |
1081 |
|
|
r->loop = fp; |
1082 |
|
|
r->occ = 1; |
1083 |
|
|
r->more = 0; |
1084 |
|
|
r->had = 0; |
1085 |
|
|
r->i = 0; |
1086 |
|
|
if ( (s->expl = (LFMT_OPEN == tok->tag)) ) { |
1087 |
|
|
r->loop += tlen; |
1088 |
|
|
continue; |
1089 |
|
|
} |
1090 |
|
|
} |
1091 |
|
|
|
1092 |
|
|
if ( tok->sign[2] && LFMT_LITERAL != LFMT_GROUP(tok->tag) ) { |
1093 |
|
|
/* token opens standard frame */ |
1094 |
|
|
if ( LFMT_ITERATORS == LFMT_GROUP(tok->tag) ) { |
1095 |
|
|
if ( 1 != r->occ ) { /* not first run */ |
1096 |
|
|
if ( r->iter[r->i].def != fp ) { |
1097 |
|
|
SERR1( "internal error at %d. iterator", r->i + 1 ); |
1098 |
|
|
goto internalerr; |
1099 |
|
|
} |
1100 |
|
|
NADD( r->i, LFMT_ITR | r->iter[r->i].tag ); /* push iterator */ |
1101 |
|
|
fp = r->iter[r->i].end; /* skip parsing */ |
1102 |
|
|
tlen = 0; /* care for += tlen */ |
1103 |
|
|
continue; /* next token */ |
1104 |
|
|
} |
1105 |
|
|
if ( LFMT_NITER-1 == r->i ) { |
1106 |
|
|
SERR1( "maximum number of iterators %d exceeded", LFMT_NITER ); |
1107 |
|
|
goto overflow; |
1108 |
|
|
} |
1109 |
|
|
memset( r->iter + r->i, 0, sizeof(r->iter[0]) ); |
1110 |
|
|
r->iter[r->i].def = fp; |
1111 |
|
|
r->iter[r->i].end = fp + tlen; |
1112 |
|
|
} |
1113 |
|
|
PUSHFRAME( tok ); |
1114 |
|
|
switch ( tok->tag ) { /* special treatment */ |
1115 |
|
|
case LFMT_INDEX: |
1116 |
|
|
s->expl = 1; |
1117 |
|
|
break; |
1118 |
|
|
default: |
1119 |
|
|
if ( expl ) |
1120 |
|
|
s->expl = 1; |
1121 |
|
|
} |
1122 |
|
|
LOG_DBG( LFMT_DBG, "\topen %s%c%s id %x", |
1123 |
|
|
s->tok->name, s->expl?'(':' ', s->tok->sign, s->tok->tag ); |
1124 |
|
|
} |
1125 |
|
|
|
1126 |
|
|
if ( lit.tag ) { /* a literal */ |
1127 |
|
|
if ( NULL != lit.val ) { /* string literal */ |
1128 |
|
|
switch ( lit.tag ) { |
1129 |
|
|
case -LFMT_COND: |
1130 |
|
|
if ( r->i ? !r->more : r->occ > 1 ) |
1131 |
|
|
continue; |
1132 |
|
|
break; |
1133 |
|
|
case -LFMT_REP: |
1134 |
|
|
if ( r->i && !r->had ) |
1135 |
|
|
continue; |
1136 |
|
|
break; |
1137 |
|
|
} |
1138 |
|
|
ADD( lit.tag, lit.val, lit.len ); |
1139 |
|
|
LOG_DBG( LFMT_DBG, "\tlit %s%.*s id %x", |
1140 |
|
|
tok->name, lit.len, lit.val, tok->tag ); |
1141 |
|
|
} else { /* numeric literal */ |
1142 |
|
|
NADD( lit.len, lit.tag ); |
1143 |
|
|
LOG_DBG( LFMT_DBG, "\tlit %d", lit.len ); |
1144 |
|
|
} |
1145 |
|
|
/* new implicit frame immediatly closed by literal ? |
1146 |
|
|
problem are implicit loops |
1147 |
|
|
if ( o->len == s->data + 1 && ! s->expl ) { |
1148 |
|
|
tok = NULL; |
1149 |
|
|
goto closeframe; |
1150 |
|
|
} |
1151 |
|
|
*/ |
1152 |
|
|
} else switch ( tok->tag ) { /* other special tokens */ |
1153 |
|
|
case LFMT_MDL: f->mode = LFMT_MD; f->upcase = 0; break; |
1154 |
|
|
case LFMT_MDU: f->mode = LFMT_MD; f->upcase = 1; break; |
1155 |
|
|
case LFMT_MHL: f->mode = LFMT_MH; f->upcase = 0; break; |
1156 |
|
|
case LFMT_MHU: f->mode = LFMT_MH; f->upcase = 1; break; |
1157 |
|
|
case LFMT_MPL: f->mode = LFMT_MP; f->upcase = 0; break; |
1158 |
|
|
case LFMT_MPU: f->mode = LFMT_MP; f->upcase = 1; break; |
1159 |
|
|
} |
1160 |
|
|
|
1161 |
|
|
} /* for token */ |
1162 |
|
|
|
1163 |
|
|
broken: |
1164 |
|
|
internalerr: |
1165 |
|
|
overflow: |
1166 |
|
|
outofmem: |
1167 |
|
|
done: |
1168 |
|
|
if ( NULL != o ) { |
1169 |
|
|
int i = o->len; |
1170 |
|
|
while ( i-- ) |
1171 |
|
|
if ( NULL == o->field[i].val ) { |
1172 |
|
|
; |
1173 |
|
|
} else if ( -LFMT_LIT >= o->field[i].tag ) |
1174 |
|
|
o->field[i].tag = 0; |
1175 |
|
|
} |
1176 |
|
|
return o; |
1177 |
|
|
} /* rFmt */ |