1 |
dpavlin |
604 |
/* |
2 |
|
|
The Malete project - the Z39.2/Z39.50 database framework of OpenIsis. |
3 |
|
|
Version 0.9.x (patchlevel see file Version) |
4 |
|
|
Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org |
5 |
|
|
|
6 |
|
|
This library is free software; you can redistribute it and/or |
7 |
|
|
modify it under the terms of the GNU Lesser General Public |
8 |
|
|
License as published by the Free Software Foundation; either |
9 |
|
|
version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
|
11 |
|
|
This library is distributed in the hope that it will be useful, |
12 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
14 |
|
|
See the GNU Lesser General Public License for more details. |
15 |
|
|
|
16 |
|
|
You should have received a copy of the GNU Lesser General Public |
17 |
|
|
License along with this library; if not, write to the Free Software |
18 |
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
|
|
|
20 |
|
|
see README for more information |
21 |
|
|
EOH */ |
22 |
|
|
|
23 |
|
|
/* |
24 |
|
|
$Id: dbo.c,v 1.26 2004/11/03 17:39:21 kripke Exp $ |
25 |
|
|
the db object |
26 |
|
|
*/ |
27 |
|
|
|
28 |
|
|
#include "../pw/pw.h" |
29 |
|
|
|
30 |
|
|
#define Q_MAX_EXP 500 |
31 |
|
|
#define Q_MAX_DEPTH 50 |
32 |
|
|
|
33 |
|
|
enum { /* nodes in a query expression */ |
34 |
|
|
OP_NULL, OP_HEAD, /* blank node and head pseudo operator */ |
35 |
|
|
/* operators in increasing precedence; level is op>>1 */ |
36 |
|
|
OP_OR, |
37 |
|
|
OP_AND=4, OP_NOT, /* AND and NOT/WITHOUT */ |
38 |
|
|
OP_TAGS, /* /(...) tagmask, "kind of" unary */ |
39 |
|
|
OP_SAMEREP = 8, OP_SAMETAG, /* ,(F) ;(G) same field repetition/tag */ |
40 |
|
|
OP_DISTMAX, OP_DISTEX, /* ... and $$$ max/ex distance */ |
41 |
|
|
OP_IDENT, /* last operator, really first simple expression */ |
42 |
|
|
T_QRF = 16, /* #n ref */ |
43 |
|
|
/* term and relation */ |
44 |
|
|
T_EQ, T_PFX, /* term equality and prefix match */ |
45 |
|
|
T_GT, T_GTE, /* relational */ |
46 |
|
|
T_LT, T_LTE, /* relational */ |
47 |
|
|
T_CONT, T_RE /* contains and regexp */ |
48 |
|
|
}; |
49 |
|
|
static const char *opname[] = { |
50 |
|
|
"null", "head", |
51 |
|
|
"+", "", |
52 |
|
|
"*", "^", |
53 |
|
|
"/", "", |
54 |
|
|
",", ";", |
55 |
|
|
".", "$", |
56 |
|
|
"-" |
57 |
|
|
}; |
58 |
|
|
static const char *tname[] = { |
59 |
|
|
"#", |
60 |
|
|
"=", "%", |
61 |
|
|
">", ">=", |
62 |
|
|
"<", "<=", |
63 |
|
|
":", "~" |
64 |
|
|
}; |
65 |
|
|
static const int opdim[] = { |
66 |
|
|
0, 0, |
67 |
|
|
0, 0, |
68 |
|
|
0, 0, |
69 |
|
|
1, 0, /* / pseudo op */ |
70 |
|
|
2, 1, /* , ; */ |
71 |
|
|
3, 3, /* . $ */ |
72 |
|
|
3 /* IDENT does not actually imply a dimension */ |
73 |
|
|
}; |
74 |
|
|
|
75 |
|
|
typedef union Exp Exp; /* query expression */ |
76 |
|
|
|
77 |
|
|
typedef struct Bop { /* binary operator */ |
78 |
|
|
int tag; |
79 |
|
|
int dst; /* distance */ |
80 |
|
|
Exp *lft; /* index of left ... */ |
81 |
|
|
Exp *rgt; /* ... and right operand */ |
82 |
|
|
} Bop; |
83 |
|
|
|
84 |
|
|
typedef struct Tfl { /* tag filter */ |
85 |
|
|
int tag; |
86 |
|
|
int len; /* # tags */ |
87 |
|
|
Exp *lft; /* index of left expression, MUST match pos of Bop.lft */ |
88 |
|
|
unsigned short *lst; /* taglist */ |
89 |
|
|
} Tfl; |
90 |
|
|
|
91 |
|
|
typedef struct Qrf { /* query ref */ |
92 |
|
|
int tag; /* == T_QRF */ |
93 |
|
|
Qry *qry; |
94 |
|
|
} Qrf; |
95 |
|
|
|
96 |
|
|
typedef Fld Trm; |
97 |
|
|
|
98 |
|
|
union Exp { /* query expression */ |
99 |
|
|
int tag; |
100 |
|
|
Bop bop; |
101 |
|
|
Tfl tfl; |
102 |
|
|
Qrf qrf; |
103 |
|
|
Trm trm; |
104 |
|
|
}; |
105 |
|
|
|
106 |
|
|
struct Qry { /* parsed query and search result */ |
107 |
|
|
Qry *nxt; /* chained */ |
108 |
|
|
Dbo *dbo; /* our database */ |
109 |
|
|
lolo use; /* req of last use */ |
110 |
|
|
unsigned qid; /* our id */ |
111 |
|
|
char *txt; /* full unparsed expression */ |
112 |
|
|
unsigned sln; /* length of search expression, txt[sln] is '?' or 0 */ |
113 |
|
|
unsigned cur; /* sor */ |
114 |
|
|
unsigned len; /* of result set */ |
115 |
|
|
lulu *res; /* result set, 0 for all */ |
116 |
|
|
lulu skp; /* min rid to consider (skip to) */ |
117 |
|
|
lulu brk; /* min rid where set size limit was hit */ |
118 |
|
|
Tfl srt; /* sort filter */ |
119 |
|
|
Exp *exp; /* parsed search or filter expression */ |
120 |
|
|
char *buf; /* holding terms and taglists */ |
121 |
|
|
Exp ebf[Q_MAX_EXP]; /* exp buffer */ |
122 |
|
|
}; |
123 |
|
|
/* qry->exp is either 0 or ebf: |
124 |
|
|
after parsing the search part, it is ebf, possibly the OP_NULL expression. |
125 |
|
|
Then it is set to 0, if there is no '?' (no filter: record ids only), |
126 |
|
|
or to ebf for a filter expression (OP_NULL selecting all). |
127 |
|
|
*/ |
128 |
|
|
|
129 |
|
|
typedef struct Prv { /* dbo private data */ |
130 |
|
|
Dbo dbo; /* public */ |
131 |
|
|
Qry qry; /* the null query */ |
132 |
|
|
} Prv; |
133 |
|
|
|
134 |
|
|
|
135 |
|
|
typedef struct { /* level in tree during parsing */ |
136 |
|
|
Exp *exp; |
137 |
|
|
int par; /* number of open parentheses */ |
138 |
|
|
} Lev; |
139 |
|
|
|
140 |
|
|
|
141 |
|
|
/* see doc/Query.txt */ |
142 |
|
|
#define EXP_ISSIMPLE(e) (OP_IDENT <= (e)->tag) |
143 |
|
|
static int canmark ( Exp *e ) |
144 |
|
|
{ |
145 |
|
|
return EXP_ISSIMPLE(e) ? 1 |
146 |
|
|
: (OP_OR == e->tag /*|| OP_NOT == e->tag*/) |
147 |
|
|
? canmark(e->bop.lft) && EXP_ISSIMPLE(e->bop.rgt) |
148 |
|
|
: OP_TAGS == e->tag ? canmark(e->tfl.lft) : 0; |
149 |
|
|
} |
150 |
|
|
|
151 |
|
|
/* |
152 |
|
|
mode is 'c'reate, 'm'ark or 'f'ilter |
153 |
|
|
dim the required dimension |
154 |
|
|
return number of buffers required |
155 |
|
|
*/ |
156 |
|
|
static int explain (Exp *e, char mode, int dim, unsigned short tag) |
157 |
|
|
{ |
158 |
|
|
char lmode, rmode; |
159 |
|
|
int lcost, rcost, cost; |
160 |
|
|
if ( T_QRF <= e->tag ) { |
161 |
|
|
if ( T_QRF < e->tag ) |
162 |
|
|
eRr(LOG_VERBOSE, "%c%d %s \"%.*s\"/%d", |
163 |
|
|
mode, dim, tname[e->tag-T_QRF], e->trm.len, e->trm.val, tag); |
164 |
|
|
else if ( tag || dim ) { |
165 |
|
|
eRr(ERR_INVAL, "tag %d dim %d at ref#%d", tag, dim, e->qrf.qry->qid); |
166 |
|
|
return -1; |
167 |
|
|
} else |
168 |
|
|
eRr(LOG_VERBOSE, "%c%d #%d", mode, dim, e->qrf.qry->qid); |
169 |
|
|
return 'm' == mode ? 0 : 1; |
170 |
|
|
} |
171 |
|
|
if ( OP_TAGS == e->tag ) { |
172 |
|
|
eRr(LOG_VERBOSE, "/(%d,..)[%d]", *e->tfl.lst, e->tfl.len); |
173 |
|
|
return e->tfl.lft ? explain(e->tfl.lft, mode, dim, *e->tfl.lst) : 0; |
174 |
|
|
} |
175 |
|
|
if ( OP_DISTMAX == (e->tag&~1) ) |
176 |
|
|
eRr(LOG_VERBOSE, "%c%d %s%d", mode, dim, opname[e->tag], e->bop.dst); |
177 |
|
|
else |
178 |
|
|
eRr(LOG_VERBOSE, "%c%d %s", mode, dim, opname[e->tag]); |
179 |
|
|
if ( !e->tag ) |
180 |
|
|
return 0; |
181 |
|
|
/* bin op */ |
182 |
|
|
lmode = rmode = mode; |
183 |
|
|
cost = 0; |
184 |
|
|
if ( OP_OR == e->tag ) { |
185 |
|
|
if ( 'f' == mode ) |
186 |
|
|
cost = 1; /* because we need to keep the filter */ |
187 |
|
|
} else if (canmark(e->bop.rgt)) { |
188 |
|
|
rmode = 'm'; |
189 |
|
|
} else { |
190 |
|
|
lmode = 'f'; |
191 |
|
|
if ( 'f' == mode ) |
192 |
|
|
cost = 1; /* double filtering */ |
193 |
|
|
} |
194 |
|
|
if ( dim < opdim[e->tag] ) |
195 |
|
|
dim = opdim[e->tag]; |
196 |
|
|
lcost = e->bop.lft ? explain(e->bop.lft, lmode, dim, tag) : 0; |
197 |
|
|
if ( 0 > lcost ) return lcost; |
198 |
|
|
rcost = e->bop.rgt ? explain(e->bop.rgt, rmode, dim, tag) : 0; |
199 |
|
|
if ( 0 > rcost ) return rcost; |
200 |
|
|
cost += lcost > rcost ? lcost : rcost; |
201 |
|
|
return cost; |
202 |
|
|
} /* explain */ |
203 |
|
|
|
204 |
|
|
/* |
205 |
|
|
parse search or filter expression |
206 |
|
|
tokens are appended to an array of expressions. |
207 |
|
|
Operators form a tree in this array, with 0 being the head. |
208 |
|
|
We always operate on right childs, which, however, |
209 |
|
|
may become the left child of following operators. |
210 |
|
|
We maintain a path of right childs up to the current node. |
211 |
|
|
The current node can either be blank or a closed expression |
212 |
|
|
(after a term or closing parenthesis). |
213 |
|
|
If the next token is |
214 |
|
|
- an operator, the current node must not be blank for most operators |
215 |
|
|
(although some may use a default expression as left hand). |
216 |
|
|
While nodes on the path have same or higher precedence than the operator, |
217 |
|
|
and no node with open parentheses is encountered, |
218 |
|
|
the path is unwound. The then current node becomes the |
219 |
|
|
operators left child and is replaced by the operator. |
220 |
|
|
The operator opens a blank node as right child. |
221 |
|
|
- a terminal (term or ref), |
222 |
|
|
if the current node is not blank, a default AND is inserted. |
223 |
|
|
Then we have a blank node, which becomes the terminal. |
224 |
|
|
- a left parenthesis also produces a default AND, if the current |
225 |
|
|
node is not blank. Then the blank nodes parentheses count is increased. |
226 |
|
|
- a right parenthesis behaves like an operator in that the |
227 |
|
|
current node must not be blank. It then unwinds the path up to |
228 |
|
|
the first node whose parentheses count is not zero and decreases it. |
229 |
|
|
buffer usage: query length is sufficient. |
230 |
|
|
- quoted terms, without the quotes, which is 2 bytes less than |
231 |
|
|
in the query expression. may run on the 0 byte in unterminated literal, |
232 |
|
|
however, the inital quote accounts for that. |
233 |
|
|
- 2 bytes per tag in tagfilter. since every tag is at least 1 digit |
234 |
|
|
plus 1 extra character (the / or terminating ,), needs not more bytes |
235 |
|
|
then in the query expression. we may have to align to 2 after a |
236 |
|
|
proper quoted term, but the terms closing quote accounts for that. |
237 |
|
|
This may be called twice: |
238 |
|
|
- on the full query text, with q->exp unset and q->txt == txt |
239 |
|
|
- after the ?, reusing q->exp |
240 |
|
|
*/ |
241 |
|
|
static int parse (Qry *q, unsigned char *txt) |
242 |
|
|
{ |
243 |
|
|
Lev path[Q_MAX_DEPTH], *cur = path; |
244 |
|
|
Exp *exp; |
245 |
|
|
int n/*# used nodes*/, cost; |
246 |
|
|
unsigned char *buf = (unsigned char *)q->buf |
247 |
|
|
+ q->sln; /* buffer might still be used by q->srt */ |
248 |
|
|
unsigned char *p = txt, *pos=p; |
249 |
|
|
|
250 |
|
|
eRr(LOG_VERBOSE, "parsing '%s' as %s", |
251 |
|
|
txt, q->exp ? "filter" : "search"); |
252 |
|
|
if ( q->exp ) /* in filtering -- reuse exp buffer */ |
253 |
|
|
memset(exp = q->exp, 0, Q_MAX_EXP * sizeof(Exp)); |
254 |
|
|
else |
255 |
|
|
exp = q->ebf; |
256 |
|
|
memset(path, 0, sizeof(path)); |
257 |
|
|
/* add the pseudo op head with lowest precedence */ |
258 |
|
|
cur->exp = exp; |
259 |
|
|
exp->tag = OP_HEAD; |
260 |
|
|
exp->bop.rgt = (++cur)->exp = exp+1; /* add blank */ |
261 |
|
|
n = 2; |
262 |
|
|
#define QRAP( args ) do { eRr args; goto qrap; } while (0) /* query is crap */ |
263 |
|
|
for (;;) { |
264 |
|
|
Exp tok; |
265 |
|
|
/* get a token */ |
266 |
|
|
while (33 > *p) if ( !*p++ ) { p--; goto done; } /* skip white */ |
267 |
|
|
pos = p; |
268 |
|
|
LOG_DBG(LOG_DEBUG, "pos %d lev %d: %c", pos-txt, cur-path, *p); |
269 |
|
|
switch (*p) { |
270 |
|
|
case '+': tok.tag = OP_OR; p++; break; |
271 |
|
|
case '*': tok.tag = OP_AND; p++; break; |
272 |
|
|
case '^': tok.tag = OP_NOT; p++; break; |
273 |
|
|
case ';': tok.tag = OP_SAMETAG; p++; break; |
274 |
|
|
case ',': tok.tag = OP_SAMEREP; p++; break; |
275 |
|
|
case '.': tok.tag = OP_DISTMAX; |
276 |
|
|
for ( tok.bop.dst=1; '.' == *++p; ) tok.bop.dst++; |
277 |
|
|
break; |
278 |
|
|
case '$': tok.tag = OP_DISTEX; |
279 |
|
|
for ( tok.bop.dst=1; '$' == *++p; ) tok.bop.dst++; |
280 |
|
|
break; |
281 |
|
|
case '-': tok.tag = OP_IDENT; p++; break; |
282 |
|
|
case ')': |
283 |
|
|
eRr(LOG_VERBOSE, ") on lev %d", cur-path); |
284 |
|
|
if ( !cur->exp->tag ) |
285 |
|
|
QRAP((ERR_INVAL,"unexpected )")); |
286 |
|
|
while ( !cur->par ) |
287 |
|
|
if ( path == --cur ) |
288 |
|
|
QRAP((ERR_INVAL,"unbalanced )")); |
289 |
|
|
cur->par--; |
290 |
|
|
eRr(LOG_VERBOSE, "now on lev %d", cur-path); |
291 |
|
|
p++; |
292 |
|
|
continue; |
293 |
|
|
case '/': { |
294 |
|
|
int multi = '(' == *++p; |
295 |
|
|
unsigned short *lst; |
296 |
|
|
if ( multi ) |
297 |
|
|
p++; |
298 |
|
|
tok.tag = OP_TAGS; |
299 |
|
|
if ( 1 & (int)buf ) buf++; /* align */ |
300 |
|
|
lst = tok.tfl.lst = (unsigned short*)buf; |
301 |
|
|
for (;;) { |
302 |
|
|
int tag, got; |
303 |
|
|
while (*p && 33 > *p) p++; /* skip white */ |
304 |
|
|
got = a2il((char*)p, -1, &tag); |
305 |
|
|
if ( !got ) |
306 |
|
|
QRAP((ERR_INVAL, "/ expects digit")); |
307 |
|
|
if ( tag<0 || 65535<tag ) |
308 |
|
|
QRAP((ERR_INVAL, "tag %d out of range 0..65535", tag)); |
309 |
|
|
*lst++ = (unsigned short)tag; |
310 |
|
|
p += got; |
311 |
|
|
if ( !multi ) |
312 |
|
|
break; |
313 |
|
|
while (*p && 33 > *p) p++; /* skip white */ |
314 |
|
|
if ( ')' == *p++ ) |
315 |
|
|
break; |
316 |
|
|
if ( ',' != p[-1] ) |
317 |
|
|
QRAP((ERR_INVAL, "/ expects ,")); |
318 |
|
|
} |
319 |
|
|
tok.tfl.len = lst - tok.tfl.lst; |
320 |
|
|
buf = (unsigned char*)lst; |
321 |
|
|
if ( !cur->exp->tag && 2==n ) { /* save special case */ |
322 |
|
|
tok.tfl.lft = 0; |
323 |
|
|
if ( q->exp ) /* parsing filter */ |
324 |
|
|
*(exp->bop.lft = exp + n++) = tok; /* as left of head */ |
325 |
|
|
else |
326 |
|
|
q->srt = tok.tfl; /* as sort expression */ |
327 |
|
|
continue; |
328 |
|
|
} |
329 |
|
|
break; |
330 |
|
|
} /* tags */ |
331 |
|
|
case '?': |
332 |
|
|
if ( q->exp ) |
333 |
|
|
QRAP((ERR_INVAL, "unexpected ? in filter")); |
334 |
|
|
goto done; |
335 |
|
|
case '(': |
336 |
|
|
if ( ('F' == p[1] || 'G' == p[1]) && ')' == p[2] ) { /* legacy ops */ |
337 |
|
|
tok.tag = 'F' == p[1] ? OP_SAMEREP : OP_SAMETAG; |
338 |
|
|
p += 3; |
339 |
|
|
break; |
340 |
|
|
} |
341 |
|
|
if ( CT_D == lat1ct[p[1]] ) { /* (n) */ |
342 |
|
|
int dst, dig = a2il((char*)p+1,-1,&dst); |
343 |
|
|
if ( ')' == p[1+dig] ) { |
344 |
|
|
tok.tag = OP_DISTMAX; |
345 |
|
|
tok.bop.dst = dst; |
346 |
|
|
p += dig+2; |
347 |
|
|
break; |
348 |
|
|
} |
349 |
|
|
} |
350 |
|
|
default: /* terminal or parenthesis */ |
351 |
|
|
if ( cur->exp->tag ) { /* add default AND */ |
352 |
|
|
tok.tag = OP_AND; |
353 |
|
|
break; |
354 |
|
|
} |
355 |
|
|
if ( '(' == *p ) { |
356 |
|
|
eRr(LOG_VERBOSE, "( on lev %d", cur-path); |
357 |
|
|
cur->par++; |
358 |
|
|
p++; |
359 |
|
|
continue; |
360 |
|
|
} |
361 |
|
|
#ifdef REFS_ARE_NOW_FIXED |
362 |
|
|
if ( '#' == *p ) { |
363 |
|
|
unsigned qid; |
364 |
|
|
Qry *qq; |
365 |
|
|
if ( CT_D != lat1ct[*++p] ) |
366 |
|
|
QRAP((ERR_IDIOT,"#expects digit")); |
367 |
|
|
p += a2il((char*)p, -1, (int*)&qid); |
368 |
|
|
for (qq = q->dbo->qry; qq && qid != qq->qid; qq = qq->nxt) |
369 |
|
|
; |
370 |
|
|
if ( !qq ) |
371 |
|
|
QRAP((ERR_BADF, "query #%u not found (maybe expired)", qid)); |
372 |
|
|
if ( q->exp ) |
373 |
|
|
QRAP((ERR_IDIOT, "sorry, refs supported in search only")); |
374 |
|
|
tok.tag = T_QRF; |
375 |
|
|
tok.qrf.qry = qq; |
376 |
|
|
*cur->exp = tok; |
377 |
|
|
continue; |
378 |
|
|
} |
379 |
|
|
#endif |
380 |
|
|
/* check for relation */ |
381 |
|
|
tok.tag = T_EQ; |
382 |
|
|
switch (*p) { |
383 |
|
|
case '=': p++; break; |
384 |
|
|
case '%': tok.tag = T_PFX; p++; break; |
385 |
|
|
case '>': |
386 |
|
|
if ( '=' != *++p ) tok.tag = T_GT; else { tok.tag = T_GTE; p++; } |
387 |
|
|
break; |
388 |
|
|
case '<': |
389 |
|
|
if ( '=' != *++p ) tok.tag = T_LT; else { tok.tag = T_LTE; p++; } |
390 |
|
|
break; |
391 |
|
|
case ':': |
392 |
|
|
if ( !q->exp ) |
393 |
|
|
QRAP((ERR_INVAL, ": only in filter")); |
394 |
|
|
tok.tag = T_CONT; p++; |
395 |
|
|
break; |
396 |
|
|
case '~': |
397 |
|
|
/* tok.tag = T_RE; p++; break; */ |
398 |
|
|
QRAP((ERR_INVAL, "~ unsupported")); |
399 |
|
|
} |
400 |
|
|
while (33 > *p) if ( !*p++ ) goto done; /* skip white */ |
401 |
|
|
if ( '"' != *p ) { /* plain - point to expr */ |
402 |
|
|
if ( CT_S == lat1ct[*p] ) |
403 |
|
|
QRAP((ERR_INVAL,"bad character '%c'",*p)); |
404 |
|
|
tok.trm.val = (char*)p; |
405 |
|
|
while (!((CT_WHITE|CT_SPECL) & lat1ct[*p++])) |
406 |
|
|
; |
407 |
|
|
tok.trm.len = (char*)--p - tok.trm.val; |
408 |
|
|
} else { /* quoted - use buffer */ |
409 |
|
|
tok.trm.val = (char*)buf--; |
410 |
|
|
while ((*++buf = *++p) && ('"'!=*buf || '"'==*++p)) |
411 |
|
|
; |
412 |
|
|
if ( !*buf ) |
413 |
|
|
QRAP((ERR_INVAL,"unexpected end of query in literal")); |
414 |
|
|
tok.trm.len = (char*)buf - tok.trm.val; |
415 |
|
|
} |
416 |
|
|
if ( '$' == *p ) { |
417 |
|
|
tok.tag = T_PFX; |
418 |
|
|
p++; |
419 |
|
|
} |
420 |
|
|
*cur->exp = tok; |
421 |
|
|
continue; |
422 |
|
|
} |
423 |
|
|
/* not continued: got operator */ |
424 |
|
|
eRr(LOG_VERBOSE, "%s on lev %d", opname[tok.tag], cur-path); |
425 |
|
|
if ( !cur->exp->tag ) |
426 |
|
|
QRAP((ERR_INVAL,"unexpected operator '%s'", opname[tok.tag])); |
427 |
|
|
if ( OP_DISTMAX > tok.tag ) |
428 |
|
|
/* the DISTs are right associative with highest precedence */ |
429 |
|
|
/* for all others, unwind path up to lower precedence or parenthesis */ |
430 |
|
|
while ( !cur->par ) |
431 |
|
|
if ( ((--cur)->exp->tag&~1) < (tok.tag&~1) ) { |
432 |
|
|
cur++; |
433 |
|
|
break; |
434 |
|
|
} |
435 |
|
|
/* now cur->exp is the victim to become our left child */ |
436 |
|
|
tok.bop.lft = cur->exp; |
437 |
|
|
cur->exp = cur[-1].exp->bop.rgt = exp + n++; /* get new op exp */ |
438 |
|
|
if ( n > Q_MAX_EXP-1 ) /* allow one for right child */ |
439 |
|
|
QRAP((ERR_INVAL, "max #expressions %d exceeded", Q_MAX_EXP)); |
440 |
|
|
*cur->exp = tok; |
441 |
|
|
if ( OP_TAGS != tok.tag ) { |
442 |
|
|
Exp *child = cur->exp->bop.rgt = exp + n++; /* empty right child */ |
443 |
|
|
if ( (++cur - path) == Q_MAX_DEPTH ) |
444 |
|
|
QRAP((ERR_INVAL, "max depth %d exceeded", Q_MAX_DEPTH)); |
445 |
|
|
cur->exp = child; |
446 |
|
|
cur->par = 0; |
447 |
|
|
} |
448 |
|
|
} /* for ever */ |
449 |
|
|
done: |
450 |
|
|
if ( !cur->exp->tag && n>(exp->bop.lft ? 3 : 2) ) |
451 |
|
|
/* ok only if completely empty */ |
452 |
|
|
QRAP((ERR_INVAL,"unexpected end of query after operator")); |
453 |
|
|
/* to be strict, we should check for open parenthesis -- but who cares */ |
454 |
|
|
if ( !exp->bop.lft ) /* purge head unless we have a tagfilter */ |
455 |
|
|
*exp = *exp->bop.rgt; |
456 |
|
|
cost = explain(exp,'c',0,0); |
457 |
|
|
if ( 0 > cost ) |
458 |
|
|
QRAP((ERR_INVAL,"no plan")); |
459 |
|
|
eRr(LOG_VERBOSE, "successfully parsed %d bytes cost %d", p - txt, cost); |
460 |
|
|
q->exp = exp; |
461 |
|
|
return p - txt; |
462 |
|
|
qrap: /* cleanup */ |
463 |
|
|
#undef QRAP |
464 |
|
|
eRr(ERR_INVAL, "error in query '%s' near pos %d", txt, pos-txt); |
465 |
|
|
q->exp = 0; |
466 |
|
|
return ERR_INVAL; |
467 |
|
|
} /* parse */ |
468 |
|
|
|
469 |
|
|
|
470 |
|
|
typedef struct { /* long long pointer */ |
471 |
|
|
lulu rit; /* higher 6 rid, lower two tag */ |
472 |
|
|
unsigned pos; |
473 |
|
|
} Llp; |
474 |
|
|
#define RIDMSK LULU(0xffffffffffff0000) |
475 |
|
|
#define OCCMSK 0xffff0000U |
476 |
|
|
|
477 |
|
|
#define PGT(a, b) \ |
478 |
|
|
((a)->rit > (b)->rit ? 1 : (a)->rit < (b)->rit ? 0 : (a)->pos > (b)->pos) |
479 |
|
|
|
480 |
|
|
|
481 |
|
|
typedef struct Res { |
482 |
|
|
int len; |
483 |
|
|
Llp ptr[QDX_MAXVALPERLEAF]; /* really ses->s */ |
484 |
|
|
} Res; /* followed by ses->s marks */ |
485 |
|
|
#define RES_MARKS(res) (((char*)(res))+4+ses->s*sizeof(Ptr)) |
486 |
|
|
|
487 |
|
|
|
488 |
|
|
/* TODO: maintain pool */ |
489 |
|
|
static Res *rAlloc () |
490 |
|
|
{ |
491 |
|
|
return mAlloc(4 + ses->s*(sizeof(Ptr)+1)); |
492 |
|
|
} |
493 |
|
|
|
494 |
|
|
|
495 |
|
|
static void rFree (Res *res) |
496 |
|
|
{ |
497 |
|
|
mFree(res); |
498 |
|
|
} |
499 |
|
|
|
500 |
|
|
|
501 |
|
|
typedef struct { /* a filter */ |
502 |
|
|
Res *res; /* results against which to filter */ |
503 |
|
|
Bop *cnd; /* matching condition */ |
504 |
|
|
char *mrk; /* optional mark array */ |
505 |
|
|
char set; /* the mark to set/ positive or negative filtering */ |
506 |
|
|
} Flt; |
507 |
|
|
|
508 |
|
|
|
509 |
|
|
/* |
510 |
|
|
match res against flt on cnd |
511 |
|
|
with mrk, set mrk to set for corresponding entries in flt |
512 |
|
|
else compact res, keeping matches is set, nonmatches else |
513 |
|
|
TODO: prolly we'd rather use a copy of flt; however, compiler might do this |
514 |
|
|
*/ |
515 |
|
|
static void rMatch (Res *res, Flt *flt) |
516 |
|
|
{ |
517 |
|
|
Llp *p = res->ptr, *pe = p + res->len, *d = p; |
518 |
|
|
Llp *f = flt->res->ptr, *fe = f + flt->res->len; |
519 |
|
|
unsigned dim = opdim[flt->cnd->tag], pos; /* filter dim, pos */ |
520 |
|
|
char *m; |
521 |
|
|
#ifndef NDEBUG |
522 |
|
|
int mcnt = 0; |
523 |
|
|
# define MCNT mcnt++; |
524 |
|
|
#else |
525 |
|
|
# define MCNT |
526 |
|
|
#endif |
527 |
|
|
|
528 |
|
|
eRr(LOG_VERBOSE, |
529 |
|
|
"matching res len %d against flt len %d cnd %d set %d mrk %d dim %d", |
530 |
|
|
res->len, flt->res->len, flt->cnd->tag, flt->set, 0!=flt->mrk, dim); |
531 |
|
|
if ( !flt->res->len ) |
532 |
|
|
goto done; |
533 |
|
|
/* |
534 |
|
|
filter by sweep, costing O(flt->res->len + res->len) |
535 |
|
|
for res->len << flt->res->len/ld(flt->res->len), binsearch could be cheaper |
536 |
|
|
however, sweep is much simpler with varying dimensions |
537 |
|
|
and for marking, we'll do a full pass anyway |
538 |
|
|
TODO: lotsa other optimizations possible here |
539 |
|
|
obviously we could pull several case distinctions out of the loop |
540 |
|
|
to trade code size for speed |
541 |
|
|
*/ |
542 |
|
|
|
543 |
|
|
for ( ; p < pe; p++ ) { |
544 |
|
|
/* advance f, while it is too low */ |
545 |
|
|
lulu minrit = p->rit; |
546 |
|
|
if ( !dim ) minrit &= RIDMSK; |
547 |
|
|
while ( f->rit < minrit ) if ( fe == ++f ) goto done; |
548 |
|
|
if ( (RIDMSK&f->rit) > p->rit ) goto nomatch; |
549 |
|
|
if ( !dim ) goto match; |
550 |
|
|
if ( f->rit > p->rit ) goto nomatch; |
551 |
|
|
if ( 1 == dim ) goto match; |
552 |
|
|
/* got same rit: rid+tag */ |
553 |
|
|
pos = OCCMSK & p->pos; |
554 |
|
|
if ( 3 == dim && pos < p->pos - flt->cnd->dst ) |
555 |
|
|
pos = p->pos - flt->cnd->dst; |
556 |
|
|
while ( f->pos < pos ) { |
557 |
|
|
if ( fe == ++f ) goto done; |
558 |
|
|
if ( f->rit != p->rit ) goto nomatch; |
559 |
|
|
} |
560 |
|
|
/* still same rit */ |
561 |
|
|
if ( f->pos > (p->pos|0xffff) ) goto nomatch; /* occ mismatch */ |
562 |
|
|
/* and same occ */ |
563 |
|
|
if ( 2 == dim ) goto match; |
564 |
|
|
if ( f->pos > p->pos + flt->cnd->dst ) goto nomatch; /* far out */ |
565 |
|
|
if ( OP_DISTMAX == flt->cnd->tag ) goto match; |
566 |
|
|
/* the strange OP_DISTEX -- alas */ |
567 |
|
|
if ( f->pos == p->pos - flt->cnd->dst ) { /* lower match */ |
568 |
|
|
if ( !flt->mrk ) goto match; |
569 |
|
|
flt->mrk[f - flt->res->ptr] = flt->set; |
570 |
|
|
} |
571 |
|
|
if ( !(OCCMSK & (p->pos ^ (pos = p->pos + flt->cnd->dst))) ) { /* no ovfl */ |
572 |
|
|
Llp *f1 = f; /* scan for upper match */ |
573 |
|
|
while ( pos != f1->pos ) { |
574 |
|
|
if ( fe == ++f1 || f1->rit != p->rit || f1->pos > pos ) goto nomatch; |
575 |
|
|
} |
576 |
|
|
if ( !flt->mrk ) goto match; |
577 |
|
|
flt->mrk[f1 - flt->res->ptr] = flt->set; |
578 |
|
|
} |
579 |
|
|
/* had nomatch or marked marking match: go on */ |
580 |
|
|
nomatch: |
581 |
|
|
if ( flt->mrk ) continue; |
582 |
|
|
if ( flt->set ) continue; |
583 |
|
|
goto addit; |
584 |
|
|
match: /* filter matched */ |
585 |
|
|
if ( !flt->mrk ) { |
586 |
|
|
if ( flt->set ) { |
587 |
|
|
addit: |
588 |
|
|
if ( d < p ) *d = *p; |
589 |
|
|
d++; |
590 |
|
|
} |
591 |
|
|
continue; |
592 |
|
|
} |
593 |
|
|
LOG_DBG(LOG_DEBUG, "marking"); |
594 |
|
|
/* marking */ |
595 |
|
|
m = flt->mrk + (f - flt->res->ptr); |
596 |
|
|
switch (dim) { |
597 |
|
|
case 0: |
598 |
|
|
while ( !(RIDMSK & (p->rit ^ f->rit)) ) { |
599 |
|
|
MCNT *m++ = flt->set; if ( fe == ++f ) goto done; |
600 |
|
|
} |
601 |
|
|
break; |
602 |
|
|
case 1: |
603 |
|
|
while ( p->rit == f->rit ) { |
604 |
|
|
MCNT *m++ = flt->set; if ( fe == ++f ) goto done; |
605 |
|
|
} |
606 |
|
|
break; |
607 |
|
|
case 2: |
608 |
|
|
while ( p->rit == f->rit && !(OCCMSK & (p->pos^f->pos)) ) { |
609 |
|
|
MCNT *m++ = flt->set; if ( fe == ++f ) goto done; |
610 |
|
|
} |
611 |
|
|
break; |
612 |
|
|
case 3: /* DISTMAX only -- DISTEX done above */ |
613 |
|
|
if ( OCCMSK & (p->pos ^ (pos = p->pos + flt->cnd->dst)) ) /* overflow */ |
614 |
|
|
pos = p->pos | 0xffff; |
615 |
|
|
while ( p->rit == f->rit && !(OCCMSK & (p->pos^f->pos)) ) { |
616 |
|
|
MCNT *m++ = flt->set; if ( fe == ++f ) goto done; |
617 |
|
|
} |
618 |
|
|
break; |
619 |
|
|
} |
620 |
|
|
} /* for p < pe */ |
621 |
|
|
done: |
622 |
|
|
if ( flt->mrk ) { |
623 |
|
|
#ifndef NDEBUG |
624 |
|
|
eRr(LOG_VERBOSE, "marked %d", mcnt); |
625 |
|
|
#endif |
626 |
|
|
return; |
627 |
|
|
} |
628 |
|
|
if ( !flt->set && p < pe ) { /* negative filter exhausted */ |
629 |
|
|
if ( d == p ) return; /* none filtered -- keep full set */ |
630 |
|
|
memmove(d, p, (pe-p)*sizeof(*p)); |
631 |
|
|
d += pe-p; |
632 |
|
|
} |
633 |
|
|
res->len = d - res->ptr; |
634 |
|
|
} /* rMatch */ |
635 |
|
|
|
636 |
|
|
|
637 |
|
|
/* |
638 |
|
|
projection to rit (2 dimensions rid+tag) |
639 |
|
|
*/ |
640 |
|
|
static void rProj2 (Res *res) |
641 |
|
|
{ |
642 |
|
|
Llp *p = res->ptr, *e = p + res->len, *d; |
643 |
|
|
lulu last = 0; |
644 |
|
|
|
645 |
|
|
for (;;) { |
646 |
|
|
if ( e == p ) return; |
647 |
|
|
if ( last == p->rit ) break; |
648 |
|
|
last = p++->rit; |
649 |
|
|
} |
650 |
|
|
/* now p is 1st redundant */ |
651 |
|
|
for (d = p; ++p < e;) |
652 |
|
|
if ( last != p->rit ) { |
653 |
|
|
last = p->rit; |
654 |
|
|
*d++ = *p++; |
655 |
|
|
} |
656 |
|
|
res->len = d - res->ptr - 1; |
657 |
|
|
} /* rProj2 */ |
658 |
|
|
|
659 |
|
|
|
660 |
|
|
static lulu rMerge (Res *res, Res *add, int dim) |
661 |
|
|
{ |
662 |
|
|
lulu brk = 0; |
663 |
|
|
Llp *a = res->ptr + res->len-1, *end = res->ptr + ses->s; |
664 |
|
|
Llp *b, *set; |
665 |
|
|
if ( !res->len ) { |
666 |
|
|
if ( 2 > dim ) rProj2(res); |
667 |
|
|
memcpy(res, add, 4+add->len*sizeof(Ptr)); |
668 |
|
|
return res->len; |
669 |
|
|
} |
670 |
|
|
if ( 3 != dim ) { /* reduce the add set */ |
671 |
|
|
static const int tag[] = {OP_AND, OP_SAMETAG, OP_SAMEREP}; |
672 |
|
|
Bop bop; |
673 |
|
|
Flt flt; |
674 |
|
|
bop.tag = tag[dim]; |
675 |
|
|
flt.res = res; |
676 |
|
|
flt.cnd = &bop; |
677 |
|
|
flt.mrk = 0; |
678 |
|
|
flt.set = 0; |
679 |
|
|
rMatch(add, &flt); |
680 |
|
|
} |
681 |
|
|
LOG_DBG(LOG_DEBUG, "merging %d+%d dim %d", res->len, add->len, dim); |
682 |
|
|
b = add->ptr + add->len-1; |
683 |
|
|
for (set = a + add->len; set > a; set--) { |
684 |
|
|
Llp *gt = PGT(a, b) ? a : b; |
685 |
|
|
#if 0 |
686 |
|
|
LOG_DBG(LOG_DEBUG, "a %d b %d set %d use %c", |
687 |
|
|
a+1-res->ptr, b+1-add->ptr, set-res->ptr, b==gt ? 'b' : 'a'); |
688 |
|
|
#endif |
689 |
|
|
if ( set < end ) |
690 |
|
|
*set = *gt; |
691 |
|
|
else |
692 |
|
|
brk = gt->rit; |
693 |
|
|
if ( b == gt ) |
694 |
|
|
b--; /* if used last b, should bail out on set == a */ |
695 |
|
|
else if (res->ptr == a--) { /* used last a */ |
696 |
|
|
/* now the remaining bs are to become first in result */ |
697 |
|
|
LOG_DBG(LOG_DEBUG, "copying %d bs to %d slots", |
698 |
|
|
b+1-add->ptr, set-res->ptr); |
699 |
|
|
memcpy(res->ptr, add->ptr, ((char*)(b+1)) - ((char*)add->ptr)); |
700 |
|
|
break; |
701 |
|
|
} |
702 |
|
|
} |
703 |
|
|
LOG_DBG(LOG_DEBUG, "merged %d+%d brk %d untouched %d b %d", |
704 |
|
|
res->len, add->len, (unsigned)(brk>>16), set+1-res->ptr, b+1-add->ptr); |
705 |
|
|
if ( !brk ) { |
706 |
|
|
res->len += add->len; |
707 |
|
|
return 0; |
708 |
|
|
} |
709 |
|
|
/* kill entries on brk rid -- find last valid */ |
710 |
|
|
brk &= ~RIDMSK; |
711 |
|
|
for ( end = res->ptr+ses->s; end-- > res->ptr && brk < end->rit; ) ; |
712 |
|
|
res->len = end + 1 - res->ptr; |
713 |
|
|
return brk; |
714 |
|
|
} /* rMerge */ |
715 |
|
|
|
716 |
|
|
|
717 |
|
|
typedef struct { /* search context */ |
718 |
|
|
QLoop qlp; |
719 |
|
|
Res *res; /* array to fill in callback */ |
720 |
|
|
Qry *qry; /* query */ |
721 |
|
|
Tfl *tfl; /* current tag filter */ |
722 |
|
|
int dim; /* required dimension */ |
723 |
|
|
Flt flt; /* filter or mark set */ |
724 |
|
|
int dsc; /* discard filter */ |
725 |
|
|
} Scx; |
726 |
|
|
|
727 |
|
|
|
728 |
|
|
static int scb (Scx *scx) /* qLoop callback: search one term */ |
729 |
|
|
{ |
730 |
|
|
Res tmp; /* merge buffer */ |
731 |
|
|
unsigned i; |
732 |
|
|
const unsigned char *v = scx->qlp.vals; |
733 |
|
|
int typ = scx->qlp.qdx->ptr; |
734 |
|
|
Llp *p = tmp.ptr; |
735 |
|
|
|
736 |
|
|
eRr(LOG_VERBOSE, "scb '%b' %d", scx->qlp.cur.len, scx->qlp.cur.byt, |
737 |
|
|
scx->qlp.nvals); |
738 |
|
|
/* for tag-sorted (non FULTXT) index, we should pre-skip to tag */ |
739 |
|
|
for ( i=scx->qlp.nvals; i--; ) { |
740 |
|
|
#ifdef CPU_BIG_ENDIAN |
741 |
|
|
enum { RID5, RID4, RID3, RID2, RID1, RID0, TAG1, TAG0 }; |
742 |
|
|
enum { POS3, POS2, POS1, POS0 }; |
743 |
|
|
#else |
744 |
|
|
enum { TAG0, TAG1, RID0, RID1, RID2, RID3, RID4, RID5 }; |
745 |
|
|
enum { POS0, POS1, POS2, POS3 }; |
746 |
|
|
#endif |
747 |
|
|
unsigned char *rit = (unsigned char*)&p->rit; |
748 |
|
|
unsigned char *pos = (unsigned char*)&p->pos; |
749 |
|
|
p->rit = p->pos = 0; |
750 |
|
|
if ( !(QDX_FULTXT & typ) ) /* leading tag */ |
751 |
|
|
switch ( QDX_TAGMSK&typ ) { |
752 |
|
|
case QDX_TAG2: rit[TAG1] = *v++; |
753 |
|
|
case QDX_TAG1: rit[TAG0] = *v++; |
754 |
|
|
} |
755 |
|
|
switch ( QDX_RIDMSK&typ ) { |
756 |
|
|
case QDX_RID6: rit[RID5] = *v++; |
757 |
|
|
case QDX_RID5: rit[RID4] = *v++; |
758 |
|
|
case QDX_RID4: rit[RID3] = *v++; |
759 |
|
|
case QDX_RID3: |
760 |
|
|
rit[RID2] = *v++; |
761 |
|
|
rit[RID1] = *v++; |
762 |
|
|
rit[RID0] = *v++; |
763 |
|
|
} |
764 |
|
|
if ( QDX_FULTXT & typ ) |
765 |
|
|
switch ( QDX_TAGMSK&typ ) { |
766 |
|
|
case QDX_TAG2: rit[TAG1] = *v++; |
767 |
|
|
case QDX_TAG1: rit[TAG0] = *v++; |
768 |
|
|
} |
769 |
|
|
switch ( QDX_POSMSK&typ ) { |
770 |
|
|
case 4: pos[POS3] = *v++; |
771 |
|
|
case 3: pos[POS2] = *v++; |
772 |
|
|
case 2: pos[POS1] = *v++; |
773 |
|
|
case 1: pos[POS0] = *v++; |
774 |
|
|
} |
775 |
|
|
/* |
776 |
|
|
eRr(LOG_VERBOSE, "rid %d tag %d pos %d", |
777 |
|
|
(int)(p->rit >> 16), (int)(0xffff & p->rit), p->pos); |
778 |
|
|
*/ |
779 |
|
|
if ( !scx->tfl ) goto tagok; |
780 |
|
|
/* find in tag filter */ |
781 |
|
|
if ( 1 == scx->tfl->len ) { |
782 |
|
|
if ( (unsigned short)p->rit == *scx->tfl->lst ) goto tagok; |
783 |
|
|
} else { |
784 |
|
|
unsigned short *t = scx->tfl->lst, *te = t + scx->tfl->len; |
785 |
|
|
while ( t < te ) if ( (unsigned short)p->rit == *t++ ) goto tagok; |
786 |
|
|
} |
787 |
|
|
continue; |
788 |
|
|
tagok: |
789 |
|
|
if ( p->rit < scx->qry->skp ) continue; |
790 |
|
|
if ( scx->qry->brk && p->rit >= scx->qry->brk ) break; |
791 |
|
|
p++; |
792 |
|
|
} |
793 |
|
|
if ( !(tmp.len = p - tmp.ptr) ) goto done; |
794 |
|
|
if ( scx->flt.res ) { |
795 |
|
|
rMatch(&tmp, &scx->flt); |
796 |
|
|
if ( scx->flt.mrk ) goto done; |
797 |
|
|
} |
798 |
|
|
if ( scx->res->len ) |
799 |
|
|
rMerge(scx->res, &tmp, scx->dim); |
800 |
|
|
else |
801 |
|
|
memcpy(scx->res, &tmp, 4+tmp.len*sizeof(Llp)); |
802 |
|
|
done: |
803 |
|
|
return 0; |
804 |
|
|
} /* scb */ |
805 |
|
|
|
806 |
|
|
|
807 |
|
|
static Res *search (Scx *scx, Exp *exp) /* search one expression */ |
808 |
|
|
{ |
809 |
|
|
Res *a = 0, *b; |
810 |
|
|
int dim; |
811 |
|
|
|
812 |
|
|
if ( scx->flt.res ) |
813 |
|
|
eRr(LOG_VERBOSE, "%sing %d %c on %d", scx->flt.mrk?"mark":"filter", |
814 |
|
|
exp->tag, scx->flt.set?'+':'-', scx->flt.cnd->tag); |
815 |
|
|
else |
816 |
|
|
eRr(LOG_VERBOSE, "creating %d", exp->tag); |
817 |
|
|
if ( OP_IDENT <= exp->tag ) { /* simple expression */ |
818 |
|
|
scx->qlp.flg = 0; |
819 |
|
|
if ( OP_IDENT < exp->tag ) { |
820 |
|
|
qMkKey(scx->qlp.qdx, &scx->qlp.key, exp->trm.val, exp->trm.len); |
821 |
|
|
switch (exp->tag) { |
822 |
|
|
case T_EQ: scx->qlp.flg = QEQ; break; |
823 |
|
|
case T_PFX: scx->qlp.flg = QPF; break; |
824 |
|
|
} |
825 |
|
|
} else { |
826 |
|
|
scx->qlp.flg = QUPTO; |
827 |
|
|
qMkKey(scx->qlp.qdx, &scx->qlp.key, |
828 |
|
|
exp->bop.lft->trm.val, exp->bop.lft->trm.len); |
829 |
|
|
qMkKey(scx->qlp.qdx, &scx->qlp.to, |
830 |
|
|
exp->bop.rgt->trm.val, exp->bop.rgt->trm.len); |
831 |
|
|
} |
832 |
|
|
if ( !(QDX_FULTXT & scx->qlp.qdx->ptr) ) { |
833 |
|
|
if (!scx->tfl || 1 != scx->tfl->len) { |
834 |
|
|
eRr(ERR_INVAL, "need tag filter with non-fulltext db"); |
835 |
|
|
return 0; |
836 |
|
|
} |
837 |
|
|
} |
838 |
|
|
if ( !scx->flt.mrk ) |
839 |
|
|
scx->res = a = rAlloc(); |
840 |
|
|
qLoop(&scx->qlp); |
841 |
|
|
if ( a ) |
842 |
|
|
eRr(LOG_VERBOSE, "got %d", a->len); |
843 |
|
|
if ( scx->dsc ) { |
844 |
|
|
rFree(scx->flt.res); |
845 |
|
|
scx->flt.res = 0; |
846 |
|
|
} |
847 |
|
|
return a; |
848 |
|
|
} |
849 |
|
|
switch ( exp->tag ) { |
850 |
|
|
case OP_NULL: |
851 |
|
|
return 0; |
852 |
|
|
case OP_HEAD: |
853 |
|
|
return search(scx, exp->bop.rgt); |
854 |
|
|
case OP_OR: { |
855 |
|
|
int dsc = scx->dsc; |
856 |
|
|
scx->dsc = 0; /* delay discarding filter */ |
857 |
|
|
a = search(scx, exp->bop.lft); |
858 |
|
|
scx->dsc = dsc; |
859 |
|
|
b = search(scx, exp->bop.rgt); |
860 |
|
|
if ( b ) { |
861 |
|
|
if ( a ) |
862 |
|
|
rMerge(a, b, scx->dim); |
863 |
|
|
rFree(b); |
864 |
|
|
} |
865 |
|
|
eRr(LOG_VERBOSE, "got %d", a->len); |
866 |
|
|
return a; |
867 |
|
|
} /* case OR */ |
868 |
|
|
case OP_TAGS: { |
869 |
|
|
Tfl *tfl = scx->tfl; |
870 |
|
|
scx->tfl = &exp->tfl; |
871 |
|
|
a = search(scx, exp->tfl.lft); |
872 |
|
|
scx->tfl = tfl; |
873 |
|
|
return a; |
874 |
|
|
} /* case TAGS */ |
875 |
|
|
/* |
876 |
|
|
case OP_NOT: TODO: special case marking not impl. |
877 |
|
|
return 0; |
878 |
|
|
*/ |
879 |
|
|
default: |
880 |
|
|
if ( (dim = scx->dim) < opdim[exp->tag] ) |
881 |
|
|
scx->dim = opdim[exp->tag]; |
882 |
|
|
/* if not filtering and the right hand does not support direct marking, |
883 |
|
|
create it and use as left hand filter */ |
884 |
|
|
if ( !scx->flt.res && !canmark(exp->bop.rgt) ) { |
885 |
|
|
scx->flt.res = b = search(scx, exp->bop.rgt); /* create right hand */ |
886 |
|
|
scx->flt.cnd = &exp->bop; |
887 |
|
|
scx->flt.mrk = 0; |
888 |
|
|
scx->flt.set = OP_NOT != exp->tag; |
889 |
|
|
a = search(scx, exp->bop.lft); |
890 |
|
|
scx->flt.res = 0; |
891 |
|
|
break; |
892 |
|
|
} /* Else create the left hand (using any filter) */ |
893 |
|
|
if ( (a = search(scx, exp->bop.lft)) ) { |
894 |
|
|
Flt sav, use; |
895 |
|
|
use.res = a; |
896 |
|
|
use.cnd = &exp->bop; |
897 |
|
|
use.mrk = RES_MARKS(a); |
898 |
|
|
use.set = OP_NOT != exp->tag; |
899 |
|
|
memset(use.mrk, !use.set, ses->s); |
900 |
|
|
sav = scx->flt; |
901 |
|
|
/* evaluate the right hand */ |
902 |
|
|
if ( canmark(exp->bop.rgt) ) { /* using direct marking, if possible */ |
903 |
|
|
scx->flt = use; |
904 |
|
|
search(scx, exp->bop.rgt); /* direct marking on a */ |
905 |
|
|
} else { /* create it */ |
906 |
|
|
scx->flt.res = 0; /* non filtering */ |
907 |
|
|
if ( (b = search(scx, exp->bop.rgt)) ) { |
908 |
|
|
rMatch(b, &use); /* and mark */ |
909 |
|
|
rFree(b); |
910 |
|
|
} |
911 |
|
|
} |
912 |
|
|
if ( a->len ) { /* reduce the marked set */ |
913 |
|
|
Llp *p = a->ptr, *e = p + a->len, *d; |
914 |
|
|
char *m = use.mrk; |
915 |
|
|
eRr(LOG_VERBOSE, "reducing a->len %d", a->len); |
916 |
|
|
while ( *m++ ) if ( e == ++p ) goto done; |
917 |
|
|
eRr(LOG_VERBOSE, "1st %d were good", p - a->ptr); |
918 |
|
|
/* now *p is 1st unmarked */ |
919 |
|
|
for ( m--, d=p; p<e; p++ ) if ( *m++ ) *d++ = *p; |
920 |
|
|
a->len = d - a->ptr; |
921 |
|
|
eRr(LOG_VERBOSE, "... to a->len %d", a->len); |
922 |
|
|
} |
923 |
|
|
done: |
924 |
|
|
scx->flt = sav; |
925 |
|
|
} |
926 |
|
|
; |
927 |
|
|
} |
928 |
|
|
scx->dim = dim; |
929 |
|
|
return a; |
930 |
|
|
} /* search */ |
931 |
|
|
|
932 |
|
|
|
933 |
|
|
/* |
934 |
|
|
parse query and execute search from 'Q<TAB>*' msg |
935 |
|
|
*/ |
936 |
|
|
static Qry *Q ( Dbo *self, Msg msg ) |
937 |
|
|
{ |
938 |
|
|
Qry *q = 0, *p; |
939 |
|
|
char *exp = msg->val+2; |
940 |
|
|
int l = msg->len-2, i = 0; |
941 |
|
|
|
942 |
|
|
if ( !ses->qry ) |
943 |
|
|
ses->qry = mAlloc(ses->q * sizeof(Qry)); |
944 |
|
|
if ( !self->qry ) |
945 |
|
|
self->qry = &((Prv*)self)->qry; |
946 |
|
|
#ifdef REFS_ARE_NOW_FIXED |
947 |
|
|
if ( l && '#' == *exp && l-1 == a2il(exp+1,l-1,&i) ) { /* plain #n ref */ |
948 |
|
|
p = self->qry; |
949 |
|
|
if ( 1 == l ) |
950 |
|
|
((Prv*)self)->qry.cur = 0; /* reset 0 query */ |
951 |
|
|
if ( (unsigned)i == p->qid ) |
952 |
|
|
return p; |
953 |
|
|
for ( ; (q = p->nxt); p=q ) |
954 |
|
|
if ( (unsigned)i == q->qid ) { |
955 |
|
|
p->nxt = q->nxt; /* unlink */ |
956 |
|
|
q->nxt = self->qry; /* put at front */ |
957 |
|
|
return self->qry = q; |
958 |
|
|
} |
959 |
|
|
return 0; |
960 |
|
|
} |
961 |
|
|
#endif |
962 |
|
|
/* create new query */ |
963 |
|
|
p=q=ses->qry + ses->q -1; do { /* find slot */ |
964 |
|
|
if ( !p->dbo ) { /* unused */ |
965 |
|
|
q = p; |
966 |
|
|
goto gotslot; |
967 |
|
|
} |
968 |
|
|
if ( q->use < p->use ) |
969 |
|
|
q = p; |
970 |
|
|
} while ( ses->qry < --p ); |
971 |
|
|
/* unlink the lrused */ { |
972 |
|
|
Dbo *dbo = q->dbo; |
973 |
|
|
if ( q == dbo->qry ) |
974 |
|
|
dbo->qry = 0; /* kill current */ |
975 |
|
|
else { |
976 |
|
|
for (p = dbo->qry; q != p->nxt; ) |
977 |
|
|
if ( !(p = p->nxt) ) { |
978 |
|
|
eRr(ERR_IDIOT, "unlinked"); |
979 |
|
|
return 0; |
980 |
|
|
} |
981 |
|
|
p->nxt = q->nxt; /* unlink */ |
982 |
|
|
} |
983 |
|
|
mFree(q->txt); |
984 |
|
|
mFree(q->res); |
985 |
|
|
mFree(q->buf); |
986 |
|
|
} |
987 |
|
|
memset(q, 0, sizeof(*q)); |
988 |
|
|
gotslot: /* got clean q */ |
989 |
|
|
q->dbo = self; |
990 |
|
|
q->qid = self->qid+1; |
991 |
|
|
q->buf = mAlloc(l); /* see notes on buffer usage at parse */ |
992 |
|
|
if ( 0 > (i = parse(q, (unsigned char*)(q->txt = mDupz(exp, l)))) ) |
993 |
|
|
goto cleanup; |
994 |
|
|
q->sln = i; |
995 |
|
|
q->skp = 0x10000; /* always skip rid 0 entries */ |
996 |
|
|
if ( !q->exp->tag ) /* the NULL search: loop all */ |
997 |
|
|
q->len = self->db->rdx.mid; |
998 |
|
|
else { |
999 |
|
|
Scx scx; |
1000 |
|
|
Res *res; |
1001 |
|
|
lulu last = 0, *d; |
1002 |
|
|
Llp *r, *e; |
1003 |
|
|
|
1004 |
|
|
memset(&scx, 0, sizeof(scx)); |
1005 |
|
|
scx.qlp.qcb = (QCb*)scb; |
1006 |
|
|
scx.qlp.qdx = &self->db->qdx; |
1007 |
|
|
scx.qry = q; |
1008 |
|
|
if ( !(res = search(&scx, q->exp)) || !res->len ) |
1009 |
|
|
goto cleanup; |
1010 |
|
|
d = q->res = mAlloc(res->len * sizeof(lulu)); |
1011 |
|
|
r = res->ptr; |
1012 |
|
|
e = r + res->len; |
1013 |
|
|
do |
1014 |
|
|
if ( last != (*d = r->rit >> 16) ) last = *d++; |
1015 |
|
|
while (++r < e); |
1016 |
|
|
q->len = d - q->res; |
1017 |
|
|
} |
1018 |
|
|
if ( '?' == q->txt[i] ) |
1019 |
|
|
i = parse(q, (unsigned char*)q->txt+i+1); |
1020 |
|
|
else { |
1021 |
|
|
q->exp = 0; |
1022 |
|
|
MFREE(q->buf); |
1023 |
|
|
} |
1024 |
|
|
self->qid++; |
1025 |
|
|
q->nxt = self->qry; /* link */ |
1026 |
|
|
self->qry = q; |
1027 |
|
|
return q; |
1028 |
|
|
cleanup: |
1029 |
|
|
mFree(q->txt); |
1030 |
|
|
mFree(q->res); |
1031 |
|
|
mFree(q->buf); |
1032 |
|
|
memset(q, 0, sizeof(*q)); |
1033 |
|
|
return 0; |
1034 |
|
|
} /* Q */ |
1035 |
|
|
|
1036 |
|
|
|
1037 |
|
|
/* |
1038 |
|
|
apply filter expression to record rid. |
1039 |
|
|
return 1, if record passed, else 0. |
1040 |
|
|
*/ |
1041 |
|
|
static int filter ( Dbo *self, lulu rid, Exp *e ) |
1042 |
|
|
{ |
1043 |
|
|
if ( !e ) { |
1044 |
|
|
eOut(0, "%u", (unsigned)rid); |
1045 |
|
|
return 1; |
1046 |
|
|
} |
1047 |
|
|
if ( !e->tag ) { |
1048 |
|
|
if ( 0 <= rRead(&env.out->lst, &self->db->rdx, rid, self->p) ) { |
1049 |
|
|
SINK(env.out); |
1050 |
|
|
return 1; |
1051 |
|
|
} |
1052 |
|
|
return 0; |
1053 |
|
|
} |
1054 |
|
|
return 1; |
1055 |
|
|
} /* filter */ |
1056 |
|
|
|
1057 |
|
|
|
1058 |
|
|
typedef struct { /* search context */ |
1059 |
|
|
QLoop qlp; |
1060 |
|
|
int lim; /* limit #terms */ |
1061 |
|
|
int cnt; /* count for term so far */ |
1062 |
|
|
int det; /* detailled: restrict to tag, count precise rids */ |
1063 |
|
|
int rln; /* length of rid: 3..6 */ |
1064 |
|
|
int tln; /* length of tag to match: 0..2 */ |
1065 |
|
|
int rof; /* offset of rid in vals: 0 or taglength */ |
1066 |
|
|
int tof; /* offset of tag in vals: 0 or ridlength */ |
1067 |
|
|
char rid[6]; /* last rid seen */ |
1068 |
|
|
char tag[2]; /* tag to filter on */ |
1069 |
|
|
char key[512]; /* decoded term */ |
1070 |
|
|
int kln; /* key length */ |
1071 |
|
|
} Tcx; |
1072 |
|
|
|
1073 |
|
|
|
1074 |
|
|
static int tcb (Tcx *tcx) /* qLoop callback: list terms */ |
1075 |
|
|
{ |
1076 |
|
|
const unsigned char *v, *e; |
1077 |
|
|
char *rid; |
1078 |
|
|
int vsz; |
1079 |
|
|
|
1080 |
|
|
if ( !(QSAME & tcx->qlp.flg) ) { /* new term */ |
1081 |
|
|
if ( tcx->cnt ) |
1082 |
|
|
eOut(0, "%d\011%.*s", tcx->cnt, tcx->kln, tcx->key); |
1083 |
|
|
if ( !tcx->lim-- ) |
1084 |
|
|
return 1; /* break loop */ |
1085 |
|
|
tcx->kln = qRdKey(tcx->qlp.qdx, tcx->key, sizeof tcx->key, &tcx->qlp.cur); |
1086 |
|
|
tcx->cnt = 0; |
1087 |
|
|
} |
1088 |
|
|
if ( !tcx->det ) { |
1089 |
|
|
tcx->cnt += tcx->qlp.nvals; |
1090 |
|
|
return 0; |
1091 |
|
|
} |
1092 |
|
|
/* detailled */ |
1093 |
|
|
rid = tcx->rid; |
1094 |
|
|
if ( !(QSAME & tcx->qlp.flg) ) |
1095 |
|
|
memset(rid, 0, tcx->rln); |
1096 |
|
|
vsz = tcx->qlp.qdx->vsz; |
1097 |
|
|
v = tcx->qlp.vals; |
1098 |
|
|
e = v + tcx->qlp.nvals*vsz; |
1099 |
|
|
for ( ; v < e; v += vsz ) |
1100 |
|
|
if ( !(tcx->tln && memcmp(v+tcx->tof, tcx->tag, tcx->tln)) |
1101 |
|
|
&& memcmp(v+tcx->rof, rid, tcx->rln) |
1102 |
|
|
) { |
1103 |
|
|
tcx->cnt++; |
1104 |
|
|
rid = (char*)v+tcx->rof; |
1105 |
|
|
} |
1106 |
|
|
if ( tcx->rid != rid ) |
1107 |
|
|
memcpy(tcx->rid, rid, tcx->rln); |
1108 |
|
|
return 0; |
1109 |
|
|
} /* tcb */ |
1110 |
|
|
|
1111 |
|
|
|
1112 |
|
|
/* |
1113 |
|
|
*/ |
1114 |
|
|
static int T ( Db *db, Fld *msg ) /* terms */ |
1115 |
|
|
{ |
1116 |
|
|
char *v = msg->val+2, *e = msg->val + msg->len, *p; |
1117 |
|
|
Tcx tcx; |
1118 |
|
|
|
1119 |
|
|
memset(&tcx, 0, sizeof(tcx)); |
1120 |
|
|
tcx.qlp.qcb = (QCb*)tcb; |
1121 |
|
|
tcx.qlp.qdx = &db->qdx; |
1122 |
|
|
tcx.qlp.flg = QPF; |
1123 |
|
|
if ( !(tcx.lim = ses->s) ) |
1124 |
|
|
tcx.lim = 1; |
1125 |
|
|
if ( v < e ) { |
1126 |
|
|
for (p = v; v < e && TAB != *v; v++) ; |
1127 |
|
|
qMkKey(tcx.qlp.qdx, &tcx.qlp.key, p, v-p); |
1128 |
|
|
if ( v++ < e ) { |
1129 |
|
|
for (p = v; v < e && TAB != *v; v++) ; |
1130 |
|
|
if ( v == p ) |
1131 |
|
|
tcx.qlp.flg = QLOOP; /* endless */ |
1132 |
|
|
else { |
1133 |
|
|
tcx.qlp.flg = QUPTO; |
1134 |
|
|
qMkKey(tcx.qlp.qdx, &tcx.qlp.to, p, v-p); |
1135 |
|
|
} |
1136 |
|
|
if ( v++ < e ) { /* detailled */ |
1137 |
|
|
int typ = tcx.qlp.qdx->ptr, tag; |
1138 |
|
|
if ( QDX_LEAFPV & db->qdx.typ ) { |
1139 |
|
|
eOut(0, "\011-1\011plain values"); |
1140 |
|
|
return ERR_INVAL; |
1141 |
|
|
} |
1142 |
|
|
a2il(v, e-v, &tag); |
1143 |
|
|
tcx.det = !0; |
1144 |
|
|
tcx.rln = QDX_RIDMIN+((QDX_RIDMSK&typ)>>QDX_RIDSH); |
1145 |
|
|
tcx.tln = (QDX_TAGMSK&typ)>>QDX_TAGSH; |
1146 |
|
|
if ( !(QDX_FULTXT & typ) ) |
1147 |
|
|
tcx.rof = tcx.tln; |
1148 |
|
|
else if ( tag ) |
1149 |
|
|
tcx.tof = tcx.rln; |
1150 |
|
|
else |
1151 |
|
|
tcx.tln = 0; |
1152 |
|
|
if ( tcx.tln ) { /* get the tag bytes */ |
1153 |
|
|
Val val; |
1154 |
|
|
Ptr ptr; |
1155 |
|
|
ptr.tag = (unsigned short)tag; |
1156 |
|
|
qMkVal(&val, &ptr, typ); |
1157 |
|
|
memcpy(tcx.tag, val.byt+tcx.tof, tcx.tln); |
1158 |
|
|
} |
1159 |
|
|
} |
1160 |
|
|
} |
1161 |
|
|
} |
1162 |
|
|
eOut(0, "#\0110"); /* plain ok comment */ |
1163 |
|
|
qLoop(&tcx.qlp); |
1164 |
|
|
if ( tcx.cnt ) |
1165 |
|
|
eOut(0, "%d\011%.*s", tcx.cnt, tcx.kln, tcx.key); |
1166 |
|
|
return 0; |
1167 |
|
|
} /* T */ |
1168 |
|
|
|
1169 |
|
|
|
1170 |
|
|
|
1171 |
|
|
enum { |
1172 |
|
|
IDX_FIELD, |
1173 |
|
|
IDX_WORD, |
1174 |
|
|
IDX_SPLIT |
1175 |
|
|
}; |
1176 |
|
|
typedef struct { |
1177 |
|
|
QSet qst; |
1178 |
|
|
int mod; |
1179 |
|
|
int mhd; |
1180 |
|
|
} Idx; |
1181 |
|
|
|
1182 |
|
|
/* |
1183 |
|
|
process control fields for X method |
1184 |
|
|
if a tag is found, return it's pos |
1185 |
|
|
*/ |
1186 |
|
|
static char *idxctl (Idx *i, Fld *f) |
1187 |
|
|
{ |
1188 |
|
|
char *p = f->val, *e = p+f->len, *t = p; |
1189 |
|
|
for ( ; p<e; p = ++t) { |
1190 |
|
|
while (t < e && TAB != *t) t++; |
1191 |
|
|
switch (*p) { |
1192 |
|
|
case 'f': i->mod = IDX_FIELD; break; |
1193 |
|
|
case 'w': i->mod = IDX_WORD; break; |
1194 |
|
|
case 's': i->mod = IDX_SPLIT; break; |
1195 |
|
|
case 'a': i->qst.del = 0; break; |
1196 |
|
|
case 'd': i->qst.del = 1; break; |
1197 |
|
|
case 'm': i->mhd = t>p+1 && 'H'==p[1]; continue; |
1198 |
|
|
case 'p': |
1199 |
|
|
if ( t == p+1 ) |
1200 |
|
|
i->qst.pfx.len = 0; |
1201 |
|
|
else { |
1202 |
|
|
int len = t-p-1; |
1203 |
|
|
if ( 255 < len ) len = 255; |
1204 |
|
|
memcpy(i->qst.pfx.byt, p+1, i->qst.pfx.len=len); |
1205 |
|
|
} |
1206 |
|
|
continue; |
1207 |
|
|
case 'r': a2il(p+1, t-p-1, (int*)&i->qst.ptr.rid); continue; |
1208 |
|
|
case '+': i->qst.del = 0; |
1209 |
|
|
if (CT_D == lat1ct[(unsigned char)*++p]) return p; |
1210 |
|
|
continue; |
1211 |
|
|
case '-': i->qst.del = 1; |
1212 |
|
|
if (CT_D == lat1ct[(unsigned char)*++p]) return p; |
1213 |
|
|
continue; |
1214 |
|
|
default: |
1215 |
|
|
if (CT_D == lat1ct[(unsigned char)*p]) return p; |
1216 |
|
|
continue; |
1217 |
|
|
} |
1218 |
|
|
a2il(p+1, t-p-1, (int*)&i->qst.ptr.pos); |
1219 |
|
|
} |
1220 |
|
|
return 0; |
1221 |
|
|
} /* idxctl */ |
1222 |
|
|
|
1223 |
|
|
|
1224 |
|
|
/* |
1225 |
|
|
write a record. |
1226 |
|
|
header must have 'W<TAB>' prestripped. |
1227 |
|
|
*/ |
1228 |
|
|
static int W ( Db *db, Fld *rec ) |
1229 |
|
|
{ |
1230 |
|
|
int rid = 0; |
1231 |
|
|
int pos = -1; |
1232 |
|
|
unsigned siz = 0; |
1233 |
|
|
char *e = rec->val + rec->len; |
1234 |
|
|
/* strip rid@pos */ |
1235 |
|
|
rec->val += a2il(rec->val, rec->len, &rid); |
1236 |
|
|
if ( e > rec->val && '@' == *rec->val ) { |
1237 |
|
|
rec->val++; |
1238 |
|
|
rec->val += a2il(rec->val, e-rec->val, &pos); |
1239 |
|
|
} |
1240 |
|
|
if ( e > rec->val && TAB == *rec->val ) |
1241 |
|
|
rec->val++; |
1242 |
|
|
rec->len = e-rec->val; |
1243 |
|
|
if ( 0 < (rid = rWrite(&db->rdx, rec, rid, pos, siz)) ) |
1244 |
|
|
ses->rid = (unsigned)rid; |
1245 |
|
|
return rid; |
1246 |
|
|
} /* W */ |
1247 |
|
|
|
1248 |
|
|
|
1249 |
|
|
/* |
1250 |
|
|
*/ |
1251 |
|
|
int OPT_STDCALL oDbo ( Dbo *self, Msg msg ) |
1252 |
|
|
{ |
1253 |
|
|
eRr(LOG_VERBOSE, "oDBo '%.*s'", msg->len, msg->val); |
1254 |
|
|
if ( ! msg->len ) |
1255 |
|
|
goto simplewrite; |
1256 |
|
|
switch (*msg->val) { |
1257 |
|
|
case 'Q': { |
1258 |
|
|
Qry *q; |
1259 |
|
|
int i = ses->r; |
1260 |
|
|
unsigned len; |
1261 |
|
|
|
1262 |
|
|
if ( 1 == msg->len ) { |
1263 |
|
|
if ( !(q = self->qry) ) |
1264 |
|
|
return eRr(ERR_BADF, "query expired"); |
1265 |
|
|
} else if ( TAB != msg->val[1] ) |
1266 |
|
|
break; |
1267 |
|
|
else if ( !(q = Q(self, msg)) ) |
1268 |
|
|
return ERR_INVAL; |
1269 |
|
|
/* fetch results */ |
1270 |
|
|
len = q->res ? q->len : (unsigned)self->db->rdx.mid; |
1271 |
|
|
q->use = ses->req; |
1272 |
|
|
eRr(LOG_VERBOSE, "%d of %d results off %d", i, q->len, q->cur); |
1273 |
|
|
eOut(0, "#\t%d\t%d\t%d", len-q->cur, q->qid, (unsigned)(q->brk>>16)); |
1274 |
|
|
if ( q->res ) |
1275 |
|
|
while (q->cur < len && (i -= filter(self, q->res[q->cur++], q->exp))) |
1276 |
|
|
; |
1277 |
|
|
else |
1278 |
|
|
while (q->cur < len && (i -= filter(self, ++q->cur, q->exp))) |
1279 |
|
|
; |
1280 |
|
|
return 0; |
1281 |
|
|
} /* Q */ |
1282 |
|
|
case 'R': |
1283 |
|
|
if ( 1 == msg->len || TAB == msg->val[1] ) { |
1284 |
|
|
int rid, ret = 0; |
1285 |
|
|
EADD(0, "W", 1); |
1286 |
|
|
if ( 1 == msg->len ) { /* R multi */ |
1287 |
|
|
Fld *end = REND(msg); |
1288 |
|
|
unsigned mr = ses->r; |
1289 |
|
|
while ( ++msg < end |
1290 |
|
|
&& 0 <= (ret = ( a2il(msg->val, msg->len, &rid), |
1291 |
|
|
rRead(&env.out->lst, &self->db->rdx, rid, self->p))) |
1292 |
|
|
&& --mr /* allowing 4 billion if mr was 0 */ |
1293 |
|
|
) |
1294 |
|
|
SINK(env.out); |
1295 |
|
|
return ret; |
1296 |
|
|
} else { /* single or range */ |
1297 |
|
|
char *p = msg->val+2, *e = p + msg->len -2; |
1298 |
|
|
int count = 1; |
1299 |
|
|
p += a2il(p, e-p, &rid); |
1300 |
|
|
if ( rid > self->db->rdx.mid ) |
1301 |
|
|
return 0; |
1302 |
|
|
if ( e > p && TAB == *p ) { |
1303 |
|
|
p++; |
1304 |
|
|
a2il(p, e-p, &count); |
1305 |
|
|
if ( 0 >= count || count > (int)ses->r ) |
1306 |
|
|
count = (int)ses->r; |
1307 |
|
|
if ( !count || count > self->db->rdx.mid - rid + 1 ) |
1308 |
|
|
count = self->db->rdx.mid - rid + 1; |
1309 |
|
|
} |
1310 |
|
|
for ( ; 0 <= (ret = rRead(&env.out->lst, &self->db->rdx, rid, self->p)) |
1311 |
|
|
&& --count; |
1312 |
|
|
rid++ |
1313 |
|
|
) { |
1314 |
|
|
eRr(LOG_INFO, "R %u: %d lst %d", rid, ret, LLEN(&env.out->lst)); |
1315 |
|
|
SINK(env.out); |
1316 |
|
|
} |
1317 |
|
|
return ret; |
1318 |
|
|
} |
1319 |
|
|
} |
1320 |
|
|
break; |
1321 |
|
|
case 'T': |
1322 |
|
|
if ( 1 == msg->len || TAB == msg->val[1] ) |
1323 |
|
|
return T(self->db, msg); |
1324 |
|
|
break; |
1325 |
|
|
case 'W': |
1326 |
|
|
if ( 1 == msg->len ) { /* multi */ |
1327 |
|
|
int left = RLEN(msg)-1, l, ret; |
1328 |
|
|
EADD(0, "R", 1); |
1329 |
|
|
for ( msg++; left; left -= l, msg += l ) { |
1330 |
|
|
if ( !(l = RLEN(msg)) ) |
1331 |
|
|
msg->tag = -(l = left); |
1332 |
|
|
else if ( l < 0 || left < l ) |
1333 |
|
|
return eRr(ERR_INVAL, "whoopsy! bad embedded rec"); |
1334 |
|
|
if ( 0 > (ret = W(self->db, msg)) ) |
1335 |
|
|
return ret; |
1336 |
|
|
self->rid = (unsigned)ret; |
1337 |
|
|
eOut(0, "%d", ret); |
1338 |
|
|
} |
1339 |
|
|
return 0; |
1340 |
|
|
} else if ( TAB == msg->val[1] ) { /* single */ |
1341 |
|
|
msg->val += 2; /* strip W<TAB> */ |
1342 |
|
|
msg->len -= 2; |
1343 |
|
|
goto simplewrite; |
1344 |
|
|
} |
1345 |
|
|
/* else it's not a 'W' ... */ |
1346 |
|
|
break; |
1347 |
|
|
case '0': case '1': case '2': case '3': case '4': |
1348 |
|
|
case '5': case '6': case '7': case '8': case '9': |
1349 |
|
|
/* well ... we really should not see such messages ... alas */ |
1350 |
|
|
simplewrite: |
1351 |
|
|
{ |
1352 |
|
|
int ret = W(self->db, msg); |
1353 |
|
|
if ( 0 < ret ) { |
1354 |
|
|
eOut(0, "R\t%d", ret); |
1355 |
|
|
self->rid = (unsigned)ret; |
1356 |
|
|
} |
1357 |
|
|
return 0; |
1358 |
|
|
} |
1359 |
|
|
case 'X': |
1360 |
|
|
if ( 1 == msg->len || TAB == msg->val[1] ) { |
1361 |
|
|
char buf[0x2000], *mhd = buf, *val; |
1362 |
|
|
int mhdlen = sizeof(buf); |
1363 |
|
|
Fld *end = REND(msg); |
1364 |
|
|
int ctl = 1, lasttag = -1, cnt = 0, len = 0; |
1365 |
|
|
Idx idx; |
1366 |
|
|
|
1367 |
|
|
memset(&idx, 0, sizeof(idx)); |
1368 |
|
|
idx.qst.qdx = &self->db->qdx; |
1369 |
|
|
idx.qst.ptr.rid = self->rid; |
1370 |
|
|
for (;;) { |
1371 |
|
|
if ( !ctl ) { |
1372 |
|
|
idx.qst.ptr.tag = msg->tag; |
1373 |
|
|
val = msg->val; |
1374 |
|
|
len = msg->len; |
1375 |
|
|
} else if ( (val = idxctl(&idx, msg)) ) { |
1376 |
|
|
char *e = msg->val + msg->len; |
1377 |
|
|
int tag; |
1378 |
|
|
val += a2il(val, e-val, &tag); |
1379 |
|
|
idx.qst.ptr.tag = (unsigned short)tag; |
1380 |
|
|
if ( val < e && TAB == *val ) val++; |
1381 |
|
|
len = e - val; |
1382 |
|
|
} |
1383 |
|
|
if ( val ) { |
1384 |
|
|
if ( lasttag != idx.qst.ptr.tag ) { |
1385 |
|
|
idx.qst.ptr.pos = 0; |
1386 |
|
|
lasttag = idx.qst.ptr.tag; |
1387 |
|
|
} |
1388 |
|
|
if ( idx.mhd ) { |
1389 |
|
|
char *ev = val+len, *d; |
1390 |
|
|
if ( len > mhdlen ) { |
1391 |
|
|
if ( mhd != buf ) |
1392 |
|
|
mFree(mhd); |
1393 |
|
|
mhd = mAlloc(len); |
1394 |
|
|
} |
1395 |
|
|
for (d = mhd;;val++) { |
1396 |
|
|
char *s = val; |
1397 |
|
|
while ( val < ev && '<' != *val ) val++; |
1398 |
|
|
if ( val > s ) { |
1399 |
|
|
memcpy(d, s, val - s); |
1400 |
|
|
d += val - s; |
1401 |
|
|
} |
1402 |
|
|
while ( val < ev && '>' != *val && '=' != *val ) val++; |
1403 |
|
|
if ( val == ev ) break; |
1404 |
|
|
if ( '>' == *val ) continue; |
1405 |
|
|
s = ++val; |
1406 |
|
|
while ( val < ev && '>' != *val ) val++; |
1407 |
|
|
if ( val > s ) { |
1408 |
|
|
memcpy(d, s, val - s); |
1409 |
|
|
d += val - s; |
1410 |
|
|
} |
1411 |
|
|
} |
1412 |
|
|
val = mhd; |
1413 |
|
|
len = d - mhd; |
1414 |
|
|
eRr(LOG_DEBUG, "mhd len %d", len); |
1415 |
|
|
} |
1416 |
|
|
if ( IDX_SPLIT == idx.mod ) |
1417 |
|
|
cnt += qSetKeyVals(&idx.qst, val, len); |
1418 |
|
|
else { |
1419 |
|
|
if ( !qSetKeyVal(&idx.qst, val, len) ) |
1420 |
|
|
cnt++; |
1421 |
|
|
if (IDX_FIELD == idx.mod) |
1422 |
|
|
idx.qst.ptr.pos = (idx.qst.ptr.pos + 0x10000) & OCCMSK; |
1423 |
|
|
} |
1424 |
|
|
} |
1425 |
|
|
if ( end == ++msg ) |
1426 |
|
|
break; |
1427 |
|
|
ctl = !msg->tag; |
1428 |
|
|
} |
1429 |
|
|
if ( mhd != buf ) |
1430 |
|
|
mFree(mhd); |
1431 |
|
|
eOut(0, "#\t%d", cnt); |
1432 |
|
|
return 0; |
1433 |
|
|
} /* case 'X' */ |
1434 |
|
|
} /* switch */ |
1435 |
|
|
return oStruct((Struct*)self, msg); |
1436 |
|
|
} /* oDbo */ |
1437 |
|
|
|
1438 |
|
|
|
1439 |
|
|
extern Dbo *newDbo ( char *name ) |
1440 |
|
|
{ |
1441 |
|
|
Prv *prv; |
1442 |
|
|
Db *db = dOpen(name); |
1443 |
|
|
if ( !db ) |
1444 |
|
|
return 0; |
1445 |
|
|
prv = mAlloc(sizeof(Prv)); |
1446 |
|
|
prv->dbo.snd = (disp*)oDbo; |
1447 |
|
|
prv->dbo.db = db; |
1448 |
|
|
prv->dbo.qry = &prv->qry; |
1449 |
|
|
prv->qry.dbo = &prv->dbo; |
1450 |
|
|
prv->qry.exp = prv->qry.ebf; /* OP_NULL filter */ |
1451 |
|
|
return &prv->dbo; |
1452 |
|
|
} /* newDbo */ |