/[webpac]/openisis/0.9.9e/pw/dbo.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /openisis/0.9.9e/pw/dbo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (show annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 3 months ago) by dpavlin
File MIME type: text/plain
File size: 39411 byte(s)
import of new openisis release, 0.9.9e

1 /*
2 The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3 Version 0.9.x (patchlevel see file Version)
4 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 see README for more information
21 EOH */
22
23 /*
24 $Id: dbo.c,v 1.26 2004/11/03 17:39:21 kripke Exp $
25 the db object
26 */
27
28 #include "../pw/pw.h"
29
30 #define Q_MAX_EXP 500
31 #define Q_MAX_DEPTH 50
32
33 enum { /* nodes in a query expression */
34 OP_NULL, OP_HEAD, /* blank node and head pseudo operator */
35 /* operators in increasing precedence; level is op>>1 */
36 OP_OR,
37 OP_AND=4, OP_NOT, /* AND and NOT/WITHOUT */
38 OP_TAGS, /* /(...) tagmask, "kind of" unary */
39 OP_SAMEREP = 8, OP_SAMETAG, /* ,(F) ;(G) same field repetition/tag */
40 OP_DISTMAX, OP_DISTEX, /* ... and $$$ max/ex distance */
41 OP_IDENT, /* last operator, really first simple expression */
42 T_QRF = 16, /* #n ref */
43 /* term and relation */
44 T_EQ, T_PFX, /* term equality and prefix match */
45 T_GT, T_GTE, /* relational */
46 T_LT, T_LTE, /* relational */
47 T_CONT, T_RE /* contains and regexp */
48 };
49 static const char *opname[] = {
50 "null", "head",
51 "+", "",
52 "*", "^",
53 "/", "",
54 ",", ";",
55 ".", "$",
56 "-"
57 };
58 static const char *tname[] = {
59 "#",
60 "=", "%",
61 ">", ">=",
62 "<", "<=",
63 ":", "~"
64 };
65 static const int opdim[] = {
66 0, 0,
67 0, 0,
68 0, 0,
69 1, 0, /* / pseudo op */
70 2, 1, /* , ; */
71 3, 3, /* . $ */
72 3 /* IDENT does not actually imply a dimension */
73 };
74
75 typedef union Exp Exp; /* query expression */
76
77 typedef struct Bop { /* binary operator */
78 int tag;
79 int dst; /* distance */
80 Exp *lft; /* index of left ... */
81 Exp *rgt; /* ... and right operand */
82 } Bop;
83
84 typedef struct Tfl { /* tag filter */
85 int tag;
86 int len; /* # tags */
87 Exp *lft; /* index of left expression, MUST match pos of Bop.lft */
88 unsigned short *lst; /* taglist */
89 } Tfl;
90
91 typedef struct Qrf { /* query ref */
92 int tag; /* == T_QRF */
93 Qry *qry;
94 } Qrf;
95
96 typedef Fld Trm;
97
98 union Exp { /* query expression */
99 int tag;
100 Bop bop;
101 Tfl tfl;
102 Qrf qrf;
103 Trm trm;
104 };
105
106 struct Qry { /* parsed query and search result */
107 Qry *nxt; /* chained */
108 Dbo *dbo; /* our database */
109 lolo use; /* req of last use */
110 unsigned qid; /* our id */
111 char *txt; /* full unparsed expression */
112 unsigned sln; /* length of search expression, txt[sln] is '?' or 0 */
113 unsigned cur; /* sor */
114 unsigned len; /* of result set */
115 lulu *res; /* result set, 0 for all */
116 lulu skp; /* min rid to consider (skip to) */
117 lulu brk; /* min rid where set size limit was hit */
118 Tfl srt; /* sort filter */
119 Exp *exp; /* parsed search or filter expression */
120 char *buf; /* holding terms and taglists */
121 Exp ebf[Q_MAX_EXP]; /* exp buffer */
122 };
123 /* qry->exp is either 0 or ebf:
124 after parsing the search part, it is ebf, possibly the OP_NULL expression.
125 Then it is set to 0, if there is no '?' (no filter: record ids only),
126 or to ebf for a filter expression (OP_NULL selecting all).
127 */
128
129 typedef struct Prv { /* dbo private data */
130 Dbo dbo; /* public */
131 Qry qry; /* the null query */
132 } Prv;
133
134
135 typedef struct { /* level in tree during parsing */
136 Exp *exp;
137 int par; /* number of open parentheses */
138 } Lev;
139
140
141 /* see doc/Query.txt */
142 #define EXP_ISSIMPLE(e) (OP_IDENT <= (e)->tag)
143 static int canmark ( Exp *e )
144 {
145 return EXP_ISSIMPLE(e) ? 1
146 : (OP_OR == e->tag /*|| OP_NOT == e->tag*/)
147 ? canmark(e->bop.lft) && EXP_ISSIMPLE(e->bop.rgt)
148 : OP_TAGS == e->tag ? canmark(e->tfl.lft) : 0;
149 }
150
151 /*
152 mode is 'c'reate, 'm'ark or 'f'ilter
153 dim the required dimension
154 return number of buffers required
155 */
156 static int explain (Exp *e, char mode, int dim, unsigned short tag)
157 {
158 char lmode, rmode;
159 int lcost, rcost, cost;
160 if ( T_QRF <= e->tag ) {
161 if ( T_QRF < e->tag )
162 eRr(LOG_VERBOSE, "%c%d %s \"%.*s\"/%d",
163 mode, dim, tname[e->tag-T_QRF], e->trm.len, e->trm.val, tag);
164 else if ( tag || dim ) {
165 eRr(ERR_INVAL, "tag %d dim %d at ref#%d", tag, dim, e->qrf.qry->qid);
166 return -1;
167 } else
168 eRr(LOG_VERBOSE, "%c%d #%d", mode, dim, e->qrf.qry->qid);
169 return 'm' == mode ? 0 : 1;
170 }
171 if ( OP_TAGS == e->tag ) {
172 eRr(LOG_VERBOSE, "/(%d,..)[%d]", *e->tfl.lst, e->tfl.len);
173 return e->tfl.lft ? explain(e->tfl.lft, mode, dim, *e->tfl.lst) : 0;
174 }
175 if ( OP_DISTMAX == (e->tag&~1) )
176 eRr(LOG_VERBOSE, "%c%d %s%d", mode, dim, opname[e->tag], e->bop.dst);
177 else
178 eRr(LOG_VERBOSE, "%c%d %s", mode, dim, opname[e->tag]);
179 if ( !e->tag )
180 return 0;
181 /* bin op */
182 lmode = rmode = mode;
183 cost = 0;
184 if ( OP_OR == e->tag ) {
185 if ( 'f' == mode )
186 cost = 1; /* because we need to keep the filter */
187 } else if (canmark(e->bop.rgt)) {
188 rmode = 'm';
189 } else {
190 lmode = 'f';
191 if ( 'f' == mode )
192 cost = 1; /* double filtering */
193 }
194 if ( dim < opdim[e->tag] )
195 dim = opdim[e->tag];
196 lcost = e->bop.lft ? explain(e->bop.lft, lmode, dim, tag) : 0;
197 if ( 0 > lcost ) return lcost;
198 rcost = e->bop.rgt ? explain(e->bop.rgt, rmode, dim, tag) : 0;
199 if ( 0 > rcost ) return rcost;
200 cost += lcost > rcost ? lcost : rcost;
201 return cost;
202 } /* explain */
203
204 /*
205 parse search or filter expression
206 tokens are appended to an array of expressions.
207 Operators form a tree in this array, with 0 being the head.
208 We always operate on right childs, which, however,
209 may become the left child of following operators.
210 We maintain a path of right childs up to the current node.
211 The current node can either be blank or a closed expression
212 (after a term or closing parenthesis).
213 If the next token is
214 - an operator, the current node must not be blank for most operators
215 (although some may use a default expression as left hand).
216 While nodes on the path have same or higher precedence than the operator,
217 and no node with open parentheses is encountered,
218 the path is unwound. The then current node becomes the
219 operators left child and is replaced by the operator.
220 The operator opens a blank node as right child.
221 - a terminal (term or ref),
222 if the current node is not blank, a default AND is inserted.
223 Then we have a blank node, which becomes the terminal.
224 - a left parenthesis also produces a default AND, if the current
225 node is not blank. Then the blank nodes parentheses count is increased.
226 - a right parenthesis behaves like an operator in that the
227 current node must not be blank. It then unwinds the path up to
228 the first node whose parentheses count is not zero and decreases it.
229 buffer usage: query length is sufficient.
230 - quoted terms, without the quotes, which is 2 bytes less than
231 in the query expression. may run on the 0 byte in unterminated literal,
232 however, the inital quote accounts for that.
233 - 2 bytes per tag in tagfilter. since every tag is at least 1 digit
234 plus 1 extra character (the / or terminating ,), needs not more bytes
235 then in the query expression. we may have to align to 2 after a
236 proper quoted term, but the terms closing quote accounts for that.
237 This may be called twice:
238 - on the full query text, with q->exp unset and q->txt == txt
239 - after the ?, reusing q->exp
240 */
241 static int parse (Qry *q, unsigned char *txt)
242 {
243 Lev path[Q_MAX_DEPTH], *cur = path;
244 Exp *exp;
245 int n/*# used nodes*/, cost;
246 unsigned char *buf = (unsigned char *)q->buf
247 + q->sln; /* buffer might still be used by q->srt */
248 unsigned char *p = txt, *pos=p;
249
250 eRr(LOG_VERBOSE, "parsing '%s' as %s",
251 txt, q->exp ? "filter" : "search");
252 if ( q->exp ) /* in filtering -- reuse exp buffer */
253 memset(exp = q->exp, 0, Q_MAX_EXP * sizeof(Exp));
254 else
255 exp = q->ebf;
256 memset(path, 0, sizeof(path));
257 /* add the pseudo op head with lowest precedence */
258 cur->exp = exp;
259 exp->tag = OP_HEAD;
260 exp->bop.rgt = (++cur)->exp = exp+1; /* add blank */
261 n = 2;
262 #define QRAP( args ) do { eRr args; goto qrap; } while (0) /* query is crap */
263 for (;;) {
264 Exp tok;
265 /* get a token */
266 while (33 > *p) if ( !*p++ ) { p--; goto done; } /* skip white */
267 pos = p;
268 LOG_DBG(LOG_DEBUG, "pos %d lev %d: %c", pos-txt, cur-path, *p);
269 switch (*p) {
270 case '+': tok.tag = OP_OR; p++; break;
271 case '*': tok.tag = OP_AND; p++; break;
272 case '^': tok.tag = OP_NOT; p++; break;
273 case ';': tok.tag = OP_SAMETAG; p++; break;
274 case ',': tok.tag = OP_SAMEREP; p++; break;
275 case '.': tok.tag = OP_DISTMAX;
276 for ( tok.bop.dst=1; '.' == *++p; ) tok.bop.dst++;
277 break;
278 case '$': tok.tag = OP_DISTEX;
279 for ( tok.bop.dst=1; '$' == *++p; ) tok.bop.dst++;
280 break;
281 case '-': tok.tag = OP_IDENT; p++; break;
282 case ')':
283 eRr(LOG_VERBOSE, ") on lev %d", cur-path);
284 if ( !cur->exp->tag )
285 QRAP((ERR_INVAL,"unexpected )"));
286 while ( !cur->par )
287 if ( path == --cur )
288 QRAP((ERR_INVAL,"unbalanced )"));
289 cur->par--;
290 eRr(LOG_VERBOSE, "now on lev %d", cur-path);
291 p++;
292 continue;
293 case '/': {
294 int multi = '(' == *++p;
295 unsigned short *lst;
296 if ( multi )
297 p++;
298 tok.tag = OP_TAGS;
299 if ( 1 & (int)buf ) buf++; /* align */
300 lst = tok.tfl.lst = (unsigned short*)buf;
301 for (;;) {
302 int tag, got;
303 while (*p && 33 > *p) p++; /* skip white */
304 got = a2il((char*)p, -1, &tag);
305 if ( !got )
306 QRAP((ERR_INVAL, "/ expects digit"));
307 if ( tag<0 || 65535<tag )
308 QRAP((ERR_INVAL, "tag %d out of range 0..65535", tag));
309 *lst++ = (unsigned short)tag;
310 p += got;
311 if ( !multi )
312 break;
313 while (*p && 33 > *p) p++; /* skip white */
314 if ( ')' == *p++ )
315 break;
316 if ( ',' != p[-1] )
317 QRAP((ERR_INVAL, "/ expects ,"));
318 }
319 tok.tfl.len = lst - tok.tfl.lst;
320 buf = (unsigned char*)lst;
321 if ( !cur->exp->tag && 2==n ) { /* save special case */
322 tok.tfl.lft = 0;
323 if ( q->exp ) /* parsing filter */
324 *(exp->bop.lft = exp + n++) = tok; /* as left of head */
325 else
326 q->srt = tok.tfl; /* as sort expression */
327 continue;
328 }
329 break;
330 } /* tags */
331 case '?':
332 if ( q->exp )
333 QRAP((ERR_INVAL, "unexpected ? in filter"));
334 goto done;
335 case '(':
336 if ( ('F' == p[1] || 'G' == p[1]) && ')' == p[2] ) { /* legacy ops */
337 tok.tag = 'F' == p[1] ? OP_SAMEREP : OP_SAMETAG;
338 p += 3;
339 break;
340 }
341 if ( CT_D == lat1ct[p[1]] ) { /* (n) */
342 int dst, dig = a2il((char*)p+1,-1,&dst);
343 if ( ')' == p[1+dig] ) {
344 tok.tag = OP_DISTMAX;
345 tok.bop.dst = dst;
346 p += dig+2;
347 break;
348 }
349 }
350 default: /* terminal or parenthesis */
351 if ( cur->exp->tag ) { /* add default AND */
352 tok.tag = OP_AND;
353 break;
354 }
355 if ( '(' == *p ) {
356 eRr(LOG_VERBOSE, "( on lev %d", cur-path);
357 cur->par++;
358 p++;
359 continue;
360 }
361 #ifdef REFS_ARE_NOW_FIXED
362 if ( '#' == *p ) {
363 unsigned qid;
364 Qry *qq;
365 if ( CT_D != lat1ct[*++p] )
366 QRAP((ERR_IDIOT,"#expects digit"));
367 p += a2il((char*)p, -1, (int*)&qid);
368 for (qq = q->dbo->qry; qq && qid != qq->qid; qq = qq->nxt)
369 ;
370 if ( !qq )
371 QRAP((ERR_BADF, "query #%u not found (maybe expired)", qid));
372 if ( q->exp )
373 QRAP((ERR_IDIOT, "sorry, refs supported in search only"));
374 tok.tag = T_QRF;
375 tok.qrf.qry = qq;
376 *cur->exp = tok;
377 continue;
378 }
379 #endif
380 /* check for relation */
381 tok.tag = T_EQ;
382 switch (*p) {
383 case '=': p++; break;
384 case '%': tok.tag = T_PFX; p++; break;
385 case '>':
386 if ( '=' != *++p ) tok.tag = T_GT; else { tok.tag = T_GTE; p++; }
387 break;
388 case '<':
389 if ( '=' != *++p ) tok.tag = T_LT; else { tok.tag = T_LTE; p++; }
390 break;
391 case ':':
392 if ( !q->exp )
393 QRAP((ERR_INVAL, ": only in filter"));
394 tok.tag = T_CONT; p++;
395 break;
396 case '~':
397 /* tok.tag = T_RE; p++; break; */
398 QRAP((ERR_INVAL, "~ unsupported"));
399 }
400 while (33 > *p) if ( !*p++ ) goto done; /* skip white */
401 if ( '"' != *p ) { /* plain - point to expr */
402 if ( CT_S == lat1ct[*p] )
403 QRAP((ERR_INVAL,"bad character '%c'",*p));
404 tok.trm.val = (char*)p;
405 while (!((CT_WHITE|CT_SPECL) & lat1ct[*p++]))
406 ;
407 tok.trm.len = (char*)--p - tok.trm.val;
408 } else { /* quoted - use buffer */
409 tok.trm.val = (char*)buf--;
410 while ((*++buf = *++p) && ('"'!=*buf || '"'==*++p))
411 ;
412 if ( !*buf )
413 QRAP((ERR_INVAL,"unexpected end of query in literal"));
414 tok.trm.len = (char*)buf - tok.trm.val;
415 }
416 if ( '$' == *p ) {
417 tok.tag = T_PFX;
418 p++;
419 }
420 *cur->exp = tok;
421 continue;
422 }
423 /* not continued: got operator */
424 eRr(LOG_VERBOSE, "%s on lev %d", opname[tok.tag], cur-path);
425 if ( !cur->exp->tag )
426 QRAP((ERR_INVAL,"unexpected operator '%s'", opname[tok.tag]));
427 if ( OP_DISTMAX > tok.tag )
428 /* the DISTs are right associative with highest precedence */
429 /* for all others, unwind path up to lower precedence or parenthesis */
430 while ( !cur->par )
431 if ( ((--cur)->exp->tag&~1) < (tok.tag&~1) ) {
432 cur++;
433 break;
434 }
435 /* now cur->exp is the victim to become our left child */
436 tok.bop.lft = cur->exp;
437 cur->exp = cur[-1].exp->bop.rgt = exp + n++; /* get new op exp */
438 if ( n > Q_MAX_EXP-1 ) /* allow one for right child */
439 QRAP((ERR_INVAL, "max #expressions %d exceeded", Q_MAX_EXP));
440 *cur->exp = tok;
441 if ( OP_TAGS != tok.tag ) {
442 Exp *child = cur->exp->bop.rgt = exp + n++; /* empty right child */
443 if ( (++cur - path) == Q_MAX_DEPTH )
444 QRAP((ERR_INVAL, "max depth %d exceeded", Q_MAX_DEPTH));
445 cur->exp = child;
446 cur->par = 0;
447 }
448 } /* for ever */
449 done:
450 if ( !cur->exp->tag && n>(exp->bop.lft ? 3 : 2) )
451 /* ok only if completely empty */
452 QRAP((ERR_INVAL,"unexpected end of query after operator"));
453 /* to be strict, we should check for open parenthesis -- but who cares */
454 if ( !exp->bop.lft ) /* purge head unless we have a tagfilter */
455 *exp = *exp->bop.rgt;
456 cost = explain(exp,'c',0,0);
457 if ( 0 > cost )
458 QRAP((ERR_INVAL,"no plan"));
459 eRr(LOG_VERBOSE, "successfully parsed %d bytes cost %d", p - txt, cost);
460 q->exp = exp;
461 return p - txt;
462 qrap: /* cleanup */
463 #undef QRAP
464 eRr(ERR_INVAL, "error in query '%s' near pos %d", txt, pos-txt);
465 q->exp = 0;
466 return ERR_INVAL;
467 } /* parse */
468
469
470 typedef struct { /* long long pointer */
471 lulu rit; /* higher 6 rid, lower two tag */
472 unsigned pos;
473 } Llp;
474 #define RIDMSK LULU(0xffffffffffff0000)
475 #define OCCMSK 0xffff0000U
476
477 #define PGT(a, b) \
478 ((a)->rit > (b)->rit ? 1 : (a)->rit < (b)->rit ? 0 : (a)->pos > (b)->pos)
479
480
481 typedef struct Res {
482 int len;
483 Llp ptr[QDX_MAXVALPERLEAF]; /* really ses->s */
484 } Res; /* followed by ses->s marks */
485 #define RES_MARKS(res) (((char*)(res))+4+ses->s*sizeof(Ptr))
486
487
488 /* TODO: maintain pool */
489 static Res *rAlloc ()
490 {
491 return mAlloc(4 + ses->s*(sizeof(Ptr)+1));
492 }
493
494
495 static void rFree (Res *res)
496 {
497 mFree(res);
498 }
499
500
501 typedef struct { /* a filter */
502 Res *res; /* results against which to filter */
503 Bop *cnd; /* matching condition */
504 char *mrk; /* optional mark array */
505 char set; /* the mark to set/ positive or negative filtering */
506 } Flt;
507
508
509 /*
510 match res against flt on cnd
511 with mrk, set mrk to set for corresponding entries in flt
512 else compact res, keeping matches is set, nonmatches else
513 TODO: prolly we'd rather use a copy of flt; however, compiler might do this
514 */
515 static void rMatch (Res *res, Flt *flt)
516 {
517 Llp *p = res->ptr, *pe = p + res->len, *d = p;
518 Llp *f = flt->res->ptr, *fe = f + flt->res->len;
519 unsigned dim = opdim[flt->cnd->tag], pos; /* filter dim, pos */
520 char *m;
521 #ifndef NDEBUG
522 int mcnt = 0;
523 # define MCNT mcnt++;
524 #else
525 # define MCNT
526 #endif
527
528 eRr(LOG_VERBOSE,
529 "matching res len %d against flt len %d cnd %d set %d mrk %d dim %d",
530 res->len, flt->res->len, flt->cnd->tag, flt->set, 0!=flt->mrk, dim);
531 if ( !flt->res->len )
532 goto done;
533 /*
534 filter by sweep, costing O(flt->res->len + res->len)
535 for res->len << flt->res->len/ld(flt->res->len), binsearch could be cheaper
536 however, sweep is much simpler with varying dimensions
537 and for marking, we'll do a full pass anyway
538 TODO: lotsa other optimizations possible here
539 obviously we could pull several case distinctions out of the loop
540 to trade code size for speed
541 */
542
543 for ( ; p < pe; p++ ) {
544 /* advance f, while it is too low */
545 lulu minrit = p->rit;
546 if ( !dim ) minrit &= RIDMSK;
547 while ( f->rit < minrit ) if ( fe == ++f ) goto done;
548 if ( (RIDMSK&f->rit) > p->rit ) goto nomatch;
549 if ( !dim ) goto match;
550 if ( f->rit > p->rit ) goto nomatch;
551 if ( 1 == dim ) goto match;
552 /* got same rit: rid+tag */
553 pos = OCCMSK & p->pos;
554 if ( 3 == dim && pos < p->pos - flt->cnd->dst )
555 pos = p->pos - flt->cnd->dst;
556 while ( f->pos < pos ) {
557 if ( fe == ++f ) goto done;
558 if ( f->rit != p->rit ) goto nomatch;
559 }
560 /* still same rit */
561 if ( f->pos > (p->pos|0xffff) ) goto nomatch; /* occ mismatch */
562 /* and same occ */
563 if ( 2 == dim ) goto match;
564 if ( f->pos > p->pos + flt->cnd->dst ) goto nomatch; /* far out */
565 if ( OP_DISTMAX == flt->cnd->tag ) goto match;
566 /* the strange OP_DISTEX -- alas */
567 if ( f->pos == p->pos - flt->cnd->dst ) { /* lower match */
568 if ( !flt->mrk ) goto match;
569 flt->mrk[f - flt->res->ptr] = flt->set;
570 }
571 if ( !(OCCMSK & (p->pos ^ (pos = p->pos + flt->cnd->dst))) ) { /* no ovfl */
572 Llp *f1 = f; /* scan for upper match */
573 while ( pos != f1->pos ) {
574 if ( fe == ++f1 || f1->rit != p->rit || f1->pos > pos ) goto nomatch;
575 }
576 if ( !flt->mrk ) goto match;
577 flt->mrk[f1 - flt->res->ptr] = flt->set;
578 }
579 /* had nomatch or marked marking match: go on */
580 nomatch:
581 if ( flt->mrk ) continue;
582 if ( flt->set ) continue;
583 goto addit;
584 match: /* filter matched */
585 if ( !flt->mrk ) {
586 if ( flt->set ) {
587 addit:
588 if ( d < p ) *d = *p;
589 d++;
590 }
591 continue;
592 }
593 LOG_DBG(LOG_DEBUG, "marking");
594 /* marking */
595 m = flt->mrk + (f - flt->res->ptr);
596 switch (dim) {
597 case 0:
598 while ( !(RIDMSK & (p->rit ^ f->rit)) ) {
599 MCNT *m++ = flt->set; if ( fe == ++f ) goto done;
600 }
601 break;
602 case 1:
603 while ( p->rit == f->rit ) {
604 MCNT *m++ = flt->set; if ( fe == ++f ) goto done;
605 }
606 break;
607 case 2:
608 while ( p->rit == f->rit && !(OCCMSK & (p->pos^f->pos)) ) {
609 MCNT *m++ = flt->set; if ( fe == ++f ) goto done;
610 }
611 break;
612 case 3: /* DISTMAX only -- DISTEX done above */
613 if ( OCCMSK & (p->pos ^ (pos = p->pos + flt->cnd->dst)) ) /* overflow */
614 pos = p->pos | 0xffff;
615 while ( p->rit == f->rit && !(OCCMSK & (p->pos^f->pos)) ) {
616 MCNT *m++ = flt->set; if ( fe == ++f ) goto done;
617 }
618 break;
619 }
620 } /* for p < pe */
621 done:
622 if ( flt->mrk ) {
623 #ifndef NDEBUG
624 eRr(LOG_VERBOSE, "marked %d", mcnt);
625 #endif
626 return;
627 }
628 if ( !flt->set && p < pe ) { /* negative filter exhausted */
629 if ( d == p ) return; /* none filtered -- keep full set */
630 memmove(d, p, (pe-p)*sizeof(*p));
631 d += pe-p;
632 }
633 res->len = d - res->ptr;
634 } /* rMatch */
635
636
637 /*
638 projection to rit (2 dimensions rid+tag)
639 */
640 static void rProj2 (Res *res)
641 {
642 Llp *p = res->ptr, *e = p + res->len, *d;
643 lulu last = 0;
644
645 for (;;) {
646 if ( e == p ) return;
647 if ( last == p->rit ) break;
648 last = p++->rit;
649 }
650 /* now p is 1st redundant */
651 for (d = p; ++p < e;)
652 if ( last != p->rit ) {
653 last = p->rit;
654 *d++ = *p++;
655 }
656 res->len = d - res->ptr - 1;
657 } /* rProj2 */
658
659
660 static lulu rMerge (Res *res, Res *add, int dim)
661 {
662 lulu brk = 0;
663 Llp *a = res->ptr + res->len-1, *end = res->ptr + ses->s;
664 Llp *b, *set;
665 if ( !res->len ) {
666 if ( 2 > dim ) rProj2(res);
667 memcpy(res, add, 4+add->len*sizeof(Ptr));
668 return res->len;
669 }
670 if ( 3 != dim ) { /* reduce the add set */
671 static const int tag[] = {OP_AND, OP_SAMETAG, OP_SAMEREP};
672 Bop bop;
673 Flt flt;
674 bop.tag = tag[dim];
675 flt.res = res;
676 flt.cnd = &bop;
677 flt.mrk = 0;
678 flt.set = 0;
679 rMatch(add, &flt);
680 }
681 LOG_DBG(LOG_DEBUG, "merging %d+%d dim %d", res->len, add->len, dim);
682 b = add->ptr + add->len-1;
683 for (set = a + add->len; set > a; set--) {
684 Llp *gt = PGT(a, b) ? a : b;
685 #if 0
686 LOG_DBG(LOG_DEBUG, "a %d b %d set %d use %c",
687 a+1-res->ptr, b+1-add->ptr, set-res->ptr, b==gt ? 'b' : 'a');
688 #endif
689 if ( set < end )
690 *set = *gt;
691 else
692 brk = gt->rit;
693 if ( b == gt )
694 b--; /* if used last b, should bail out on set == a */
695 else if (res->ptr == a--) { /* used last a */
696 /* now the remaining bs are to become first in result */
697 LOG_DBG(LOG_DEBUG, "copying %d bs to %d slots",
698 b+1-add->ptr, set-res->ptr);
699 memcpy(res->ptr, add->ptr, ((char*)(b+1)) - ((char*)add->ptr));
700 break;
701 }
702 }
703 LOG_DBG(LOG_DEBUG, "merged %d+%d brk %d untouched %d b %d",
704 res->len, add->len, (unsigned)(brk>>16), set+1-res->ptr, b+1-add->ptr);
705 if ( !brk ) {
706 res->len += add->len;
707 return 0;
708 }
709 /* kill entries on brk rid -- find last valid */
710 brk &= ~RIDMSK;
711 for ( end = res->ptr+ses->s; end-- > res->ptr && brk < end->rit; ) ;
712 res->len = end + 1 - res->ptr;
713 return brk;
714 } /* rMerge */
715
716
717 typedef struct { /* search context */
718 QLoop qlp;
719 Res *res; /* array to fill in callback */
720 Qry *qry; /* query */
721 Tfl *tfl; /* current tag filter */
722 int dim; /* required dimension */
723 Flt flt; /* filter or mark set */
724 int dsc; /* discard filter */
725 } Scx;
726
727
728 static int scb (Scx *scx) /* qLoop callback: search one term */
729 {
730 Res tmp; /* merge buffer */
731 unsigned i;
732 const unsigned char *v = scx->qlp.vals;
733 int typ = scx->qlp.qdx->ptr;
734 Llp *p = tmp.ptr;
735
736 eRr(LOG_VERBOSE, "scb '%b' %d", scx->qlp.cur.len, scx->qlp.cur.byt,
737 scx->qlp.nvals);
738 /* for tag-sorted (non FULTXT) index, we should pre-skip to tag */
739 for ( i=scx->qlp.nvals; i--; ) {
740 #ifdef CPU_BIG_ENDIAN
741 enum { RID5, RID4, RID3, RID2, RID1, RID0, TAG1, TAG0 };
742 enum { POS3, POS2, POS1, POS0 };
743 #else
744 enum { TAG0, TAG1, RID0, RID1, RID2, RID3, RID4, RID5 };
745 enum { POS0, POS1, POS2, POS3 };
746 #endif
747 unsigned char *rit = (unsigned char*)&p->rit;
748 unsigned char *pos = (unsigned char*)&p->pos;
749 p->rit = p->pos = 0;
750 if ( !(QDX_FULTXT & typ) ) /* leading tag */
751 switch ( QDX_TAGMSK&typ ) {
752 case QDX_TAG2: rit[TAG1] = *v++;
753 case QDX_TAG1: rit[TAG0] = *v++;
754 }
755 switch ( QDX_RIDMSK&typ ) {
756 case QDX_RID6: rit[RID5] = *v++;
757 case QDX_RID5: rit[RID4] = *v++;
758 case QDX_RID4: rit[RID3] = *v++;
759 case QDX_RID3:
760 rit[RID2] = *v++;
761 rit[RID1] = *v++;
762 rit[RID0] = *v++;
763 }
764 if ( QDX_FULTXT & typ )
765 switch ( QDX_TAGMSK&typ ) {
766 case QDX_TAG2: rit[TAG1] = *v++;
767 case QDX_TAG1: rit[TAG0] = *v++;
768 }
769 switch ( QDX_POSMSK&typ ) {
770 case 4: pos[POS3] = *v++;
771 case 3: pos[POS2] = *v++;
772 case 2: pos[POS1] = *v++;
773 case 1: pos[POS0] = *v++;
774 }
775 /*
776 eRr(LOG_VERBOSE, "rid %d tag %d pos %d",
777 (int)(p->rit >> 16), (int)(0xffff & p->rit), p->pos);
778 */
779 if ( !scx->tfl ) goto tagok;
780 /* find in tag filter */
781 if ( 1 == scx->tfl->len ) {
782 if ( (unsigned short)p->rit == *scx->tfl->lst ) goto tagok;
783 } else {
784 unsigned short *t = scx->tfl->lst, *te = t + scx->tfl->len;
785 while ( t < te ) if ( (unsigned short)p->rit == *t++ ) goto tagok;
786 }
787 continue;
788 tagok:
789 if ( p->rit < scx->qry->skp ) continue;
790 if ( scx->qry->brk && p->rit >= scx->qry->brk ) break;
791 p++;
792 }
793 if ( !(tmp.len = p - tmp.ptr) ) goto done;
794 if ( scx->flt.res ) {
795 rMatch(&tmp, &scx->flt);
796 if ( scx->flt.mrk ) goto done;
797 }
798 if ( scx->res->len )
799 rMerge(scx->res, &tmp, scx->dim);
800 else
801 memcpy(scx->res, &tmp, 4+tmp.len*sizeof(Llp));
802 done:
803 return 0;
804 } /* scb */
805
806
807 static Res *search (Scx *scx, Exp *exp) /* search one expression */
808 {
809 Res *a = 0, *b;
810 int dim;
811
812 if ( scx->flt.res )
813 eRr(LOG_VERBOSE, "%sing %d %c on %d", scx->flt.mrk?"mark":"filter",
814 exp->tag, scx->flt.set?'+':'-', scx->flt.cnd->tag);
815 else
816 eRr(LOG_VERBOSE, "creating %d", exp->tag);
817 if ( OP_IDENT <= exp->tag ) { /* simple expression */
818 scx->qlp.flg = 0;
819 if ( OP_IDENT < exp->tag ) {
820 qMkKey(scx->qlp.qdx, &scx->qlp.key, exp->trm.val, exp->trm.len);
821 switch (exp->tag) {
822 case T_EQ: scx->qlp.flg = QEQ; break;
823 case T_PFX: scx->qlp.flg = QPF; break;
824 }
825 } else {
826 scx->qlp.flg = QUPTO;
827 qMkKey(scx->qlp.qdx, &scx->qlp.key,
828 exp->bop.lft->trm.val, exp->bop.lft->trm.len);
829 qMkKey(scx->qlp.qdx, &scx->qlp.to,
830 exp->bop.rgt->trm.val, exp->bop.rgt->trm.len);
831 }
832 if ( !(QDX_FULTXT & scx->qlp.qdx->ptr) ) {
833 if (!scx->tfl || 1 != scx->tfl->len) {
834 eRr(ERR_INVAL, "need tag filter with non-fulltext db");
835 return 0;
836 }
837 }
838 if ( !scx->flt.mrk )
839 scx->res = a = rAlloc();
840 qLoop(&scx->qlp);
841 if ( a )
842 eRr(LOG_VERBOSE, "got %d", a->len);
843 if ( scx->dsc ) {
844 rFree(scx->flt.res);
845 scx->flt.res = 0;
846 }
847 return a;
848 }
849 switch ( exp->tag ) {
850 case OP_NULL:
851 return 0;
852 case OP_HEAD:
853 return search(scx, exp->bop.rgt);
854 case OP_OR: {
855 int dsc = scx->dsc;
856 scx->dsc = 0; /* delay discarding filter */
857 a = search(scx, exp->bop.lft);
858 scx->dsc = dsc;
859 b = search(scx, exp->bop.rgt);
860 if ( b ) {
861 if ( a )
862 rMerge(a, b, scx->dim);
863 rFree(b);
864 }
865 eRr(LOG_VERBOSE, "got %d", a->len);
866 return a;
867 } /* case OR */
868 case OP_TAGS: {
869 Tfl *tfl = scx->tfl;
870 scx->tfl = &exp->tfl;
871 a = search(scx, exp->tfl.lft);
872 scx->tfl = tfl;
873 return a;
874 } /* case TAGS */
875 /*
876 case OP_NOT: TODO: special case marking not impl.
877 return 0;
878 */
879 default:
880 if ( (dim = scx->dim) < opdim[exp->tag] )
881 scx->dim = opdim[exp->tag];
882 /* if not filtering and the right hand does not support direct marking,
883 create it and use as left hand filter */
884 if ( !scx->flt.res && !canmark(exp->bop.rgt) ) {
885 scx->flt.res = b = search(scx, exp->bop.rgt); /* create right hand */
886 scx->flt.cnd = &exp->bop;
887 scx->flt.mrk = 0;
888 scx->flt.set = OP_NOT != exp->tag;
889 a = search(scx, exp->bop.lft);
890 scx->flt.res = 0;
891 break;
892 } /* Else create the left hand (using any filter) */
893 if ( (a = search(scx, exp->bop.lft)) ) {
894 Flt sav, use;
895 use.res = a;
896 use.cnd = &exp->bop;
897 use.mrk = RES_MARKS(a);
898 use.set = OP_NOT != exp->tag;
899 memset(use.mrk, !use.set, ses->s);
900 sav = scx->flt;
901 /* evaluate the right hand */
902 if ( canmark(exp->bop.rgt) ) { /* using direct marking, if possible */
903 scx->flt = use;
904 search(scx, exp->bop.rgt); /* direct marking on a */
905 } else { /* create it */
906 scx->flt.res = 0; /* non filtering */
907 if ( (b = search(scx, exp->bop.rgt)) ) {
908 rMatch(b, &use); /* and mark */
909 rFree(b);
910 }
911 }
912 if ( a->len ) { /* reduce the marked set */
913 Llp *p = a->ptr, *e = p + a->len, *d;
914 char *m = use.mrk;
915 eRr(LOG_VERBOSE, "reducing a->len %d", a->len);
916 while ( *m++ ) if ( e == ++p ) goto done;
917 eRr(LOG_VERBOSE, "1st %d were good", p - a->ptr);
918 /* now *p is 1st unmarked */
919 for ( m--, d=p; p<e; p++ ) if ( *m++ ) *d++ = *p;
920 a->len = d - a->ptr;
921 eRr(LOG_VERBOSE, "... to a->len %d", a->len);
922 }
923 done:
924 scx->flt = sav;
925 }
926 ;
927 }
928 scx->dim = dim;
929 return a;
930 } /* search */
931
932
933 /*
934 parse query and execute search from 'Q<TAB>*' msg
935 */
936 static Qry *Q ( Dbo *self, Msg msg )
937 {
938 Qry *q = 0, *p;
939 char *exp = msg->val+2;
940 int l = msg->len-2, i = 0;
941
942 if ( !ses->qry )
943 ses->qry = mAlloc(ses->q * sizeof(Qry));
944 if ( !self->qry )
945 self->qry = &((Prv*)self)->qry;
946 #ifdef REFS_ARE_NOW_FIXED
947 if ( l && '#' == *exp && l-1 == a2il(exp+1,l-1,&i) ) { /* plain #n ref */
948 p = self->qry;
949 if ( 1 == l )
950 ((Prv*)self)->qry.cur = 0; /* reset 0 query */
951 if ( (unsigned)i == p->qid )
952 return p;
953 for ( ; (q = p->nxt); p=q )
954 if ( (unsigned)i == q->qid ) {
955 p->nxt = q->nxt; /* unlink */
956 q->nxt = self->qry; /* put at front */
957 return self->qry = q;
958 }
959 return 0;
960 }
961 #endif
962 /* create new query */
963 p=q=ses->qry + ses->q -1; do { /* find slot */
964 if ( !p->dbo ) { /* unused */
965 q = p;
966 goto gotslot;
967 }
968 if ( q->use < p->use )
969 q = p;
970 } while ( ses->qry < --p );
971 /* unlink the lrused */ {
972 Dbo *dbo = q->dbo;
973 if ( q == dbo->qry )
974 dbo->qry = 0; /* kill current */
975 else {
976 for (p = dbo->qry; q != p->nxt; )
977 if ( !(p = p->nxt) ) {
978 eRr(ERR_IDIOT, "unlinked");
979 return 0;
980 }
981 p->nxt = q->nxt; /* unlink */
982 }
983 mFree(q->txt);
984 mFree(q->res);
985 mFree(q->buf);
986 }
987 memset(q, 0, sizeof(*q));
988 gotslot: /* got clean q */
989 q->dbo = self;
990 q->qid = self->qid+1;
991 q->buf = mAlloc(l); /* see notes on buffer usage at parse */
992 if ( 0 > (i = parse(q, (unsigned char*)(q->txt = mDupz(exp, l)))) )
993 goto cleanup;
994 q->sln = i;
995 q->skp = 0x10000; /* always skip rid 0 entries */
996 if ( !q->exp->tag ) /* the NULL search: loop all */
997 q->len = self->db->rdx.mid;
998 else {
999 Scx scx;
1000 Res *res;
1001 lulu last = 0, *d;
1002 Llp *r, *e;
1003
1004 memset(&scx, 0, sizeof(scx));
1005 scx.qlp.qcb = (QCb*)scb;
1006 scx.qlp.qdx = &self->db->qdx;
1007 scx.qry = q;
1008 if ( !(res = search(&scx, q->exp)) || !res->len )
1009 goto cleanup;
1010 d = q->res = mAlloc(res->len * sizeof(lulu));
1011 r = res->ptr;
1012 e = r + res->len;
1013 do
1014 if ( last != (*d = r->rit >> 16) ) last = *d++;
1015 while (++r < e);
1016 q->len = d - q->res;
1017 }
1018 if ( '?' == q->txt[i] )
1019 i = parse(q, (unsigned char*)q->txt+i+1);
1020 else {
1021 q->exp = 0;
1022 MFREE(q->buf);
1023 }
1024 self->qid++;
1025 q->nxt = self->qry; /* link */
1026 self->qry = q;
1027 return q;
1028 cleanup:
1029 mFree(q->txt);
1030 mFree(q->res);
1031 mFree(q->buf);
1032 memset(q, 0, sizeof(*q));
1033 return 0;
1034 } /* Q */
1035
1036
1037 /*
1038 apply filter expression to record rid.
1039 return 1, if record passed, else 0.
1040 */
1041 static int filter ( Dbo *self, lulu rid, Exp *e )
1042 {
1043 if ( !e ) {
1044 eOut(0, "%u", (unsigned)rid);
1045 return 1;
1046 }
1047 if ( !e->tag ) {
1048 if ( 0 <= rRead(&env.out->lst, &self->db->rdx, rid, self->p) ) {
1049 SINK(env.out);
1050 return 1;
1051 }
1052 return 0;
1053 }
1054 return 1;
1055 } /* filter */
1056
1057
1058 typedef struct { /* search context */
1059 QLoop qlp;
1060 int lim; /* limit #terms */
1061 int cnt; /* count for term so far */
1062 int det; /* detailled: restrict to tag, count precise rids */
1063 int rln; /* length of rid: 3..6 */
1064 int tln; /* length of tag to match: 0..2 */
1065 int rof; /* offset of rid in vals: 0 or taglength */
1066 int tof; /* offset of tag in vals: 0 or ridlength */
1067 char rid[6]; /* last rid seen */
1068 char tag[2]; /* tag to filter on */
1069 char key[512]; /* decoded term */
1070 int kln; /* key length */
1071 } Tcx;
1072
1073
1074 static int tcb (Tcx *tcx) /* qLoop callback: list terms */
1075 {
1076 const unsigned char *v, *e;
1077 char *rid;
1078 int vsz;
1079
1080 if ( !(QSAME & tcx->qlp.flg) ) { /* new term */
1081 if ( tcx->cnt )
1082 eOut(0, "%d\011%.*s", tcx->cnt, tcx->kln, tcx->key);
1083 if ( !tcx->lim-- )
1084 return 1; /* break loop */
1085 tcx->kln = qRdKey(tcx->qlp.qdx, tcx->key, sizeof tcx->key, &tcx->qlp.cur);
1086 tcx->cnt = 0;
1087 }
1088 if ( !tcx->det ) {
1089 tcx->cnt += tcx->qlp.nvals;
1090 return 0;
1091 }
1092 /* detailled */
1093 rid = tcx->rid;
1094 if ( !(QSAME & tcx->qlp.flg) )
1095 memset(rid, 0, tcx->rln);
1096 vsz = tcx->qlp.qdx->vsz;
1097 v = tcx->qlp.vals;
1098 e = v + tcx->qlp.nvals*vsz;
1099 for ( ; v < e; v += vsz )
1100 if ( !(tcx->tln && memcmp(v+tcx->tof, tcx->tag, tcx->tln))
1101 && memcmp(v+tcx->rof, rid, tcx->rln)
1102 ) {
1103 tcx->cnt++;
1104 rid = (char*)v+tcx->rof;
1105 }
1106 if ( tcx->rid != rid )
1107 memcpy(tcx->rid, rid, tcx->rln);
1108 return 0;
1109 } /* tcb */
1110
1111
1112 /*
1113 */
1114 static int T ( Db *db, Fld *msg ) /* terms */
1115 {
1116 char *v = msg->val+2, *e = msg->val + msg->len, *p;
1117 Tcx tcx;
1118
1119 memset(&tcx, 0, sizeof(tcx));
1120 tcx.qlp.qcb = (QCb*)tcb;
1121 tcx.qlp.qdx = &db->qdx;
1122 tcx.qlp.flg = QPF;
1123 if ( !(tcx.lim = ses->s) )
1124 tcx.lim = 1;
1125 if ( v < e ) {
1126 for (p = v; v < e && TAB != *v; v++) ;
1127 qMkKey(tcx.qlp.qdx, &tcx.qlp.key, p, v-p);
1128 if ( v++ < e ) {
1129 for (p = v; v < e && TAB != *v; v++) ;
1130 if ( v == p )
1131 tcx.qlp.flg = QLOOP; /* endless */
1132 else {
1133 tcx.qlp.flg = QUPTO;
1134 qMkKey(tcx.qlp.qdx, &tcx.qlp.to, p, v-p);
1135 }
1136 if ( v++ < e ) { /* detailled */
1137 int typ = tcx.qlp.qdx->ptr, tag;
1138 if ( QDX_LEAFPV & db->qdx.typ ) {
1139 eOut(0, "\011-1\011plain values");
1140 return ERR_INVAL;
1141 }
1142 a2il(v, e-v, &tag);
1143 tcx.det = !0;
1144 tcx.rln = QDX_RIDMIN+((QDX_RIDMSK&typ)>>QDX_RIDSH);
1145 tcx.tln = (QDX_TAGMSK&typ)>>QDX_TAGSH;
1146 if ( !(QDX_FULTXT & typ) )
1147 tcx.rof = tcx.tln;
1148 else if ( tag )
1149 tcx.tof = tcx.rln;
1150 else
1151 tcx.tln = 0;
1152 if ( tcx.tln ) { /* get the tag bytes */
1153 Val val;
1154 Ptr ptr;
1155 ptr.tag = (unsigned short)tag;
1156 qMkVal(&val, &ptr, typ);
1157 memcpy(tcx.tag, val.byt+tcx.tof, tcx.tln);
1158 }
1159 }
1160 }
1161 }
1162 eOut(0, "#\0110"); /* plain ok comment */
1163 qLoop(&tcx.qlp);
1164 if ( tcx.cnt )
1165 eOut(0, "%d\011%.*s", tcx.cnt, tcx.kln, tcx.key);
1166 return 0;
1167 } /* T */
1168
1169
1170
1171 enum {
1172 IDX_FIELD,
1173 IDX_WORD,
1174 IDX_SPLIT
1175 };
1176 typedef struct {
1177 QSet qst;
1178 int mod;
1179 int mhd;
1180 } Idx;
1181
1182 /*
1183 process control fields for X method
1184 if a tag is found, return it's pos
1185 */
1186 static char *idxctl (Idx *i, Fld *f)
1187 {
1188 char *p = f->val, *e = p+f->len, *t = p;
1189 for ( ; p<e; p = ++t) {
1190 while (t < e && TAB != *t) t++;
1191 switch (*p) {
1192 case 'f': i->mod = IDX_FIELD; break;
1193 case 'w': i->mod = IDX_WORD; break;
1194 case 's': i->mod = IDX_SPLIT; break;
1195 case 'a': i->qst.del = 0; break;
1196 case 'd': i->qst.del = 1; break;
1197 case 'm': i->mhd = t>p+1 && 'H'==p[1]; continue;
1198 case 'p':
1199 if ( t == p+1 )
1200 i->qst.pfx.len = 0;
1201 else {
1202 int len = t-p-1;
1203 if ( 255 < len ) len = 255;
1204 memcpy(i->qst.pfx.byt, p+1, i->qst.pfx.len=len);
1205 }
1206 continue;
1207 case 'r': a2il(p+1, t-p-1, (int*)&i->qst.ptr.rid); continue;
1208 case '+': i->qst.del = 0;
1209 if (CT_D == lat1ct[(unsigned char)*++p]) return p;
1210 continue;
1211 case '-': i->qst.del = 1;
1212 if (CT_D == lat1ct[(unsigned char)*++p]) return p;
1213 continue;
1214 default:
1215 if (CT_D == lat1ct[(unsigned char)*p]) return p;
1216 continue;
1217 }
1218 a2il(p+1, t-p-1, (int*)&i->qst.ptr.pos);
1219 }
1220 return 0;
1221 } /* idxctl */
1222
1223
1224 /*
1225 write a record.
1226 header must have 'W<TAB>' prestripped.
1227 */
1228 static int W ( Db *db, Fld *rec )
1229 {
1230 int rid = 0;
1231 int pos = -1;
1232 unsigned siz = 0;
1233 char *e = rec->val + rec->len;
1234 /* strip rid@pos */
1235 rec->val += a2il(rec->val, rec->len, &rid);
1236 if ( e > rec->val && '@' == *rec->val ) {
1237 rec->val++;
1238 rec->val += a2il(rec->val, e-rec->val, &pos);
1239 }
1240 if ( e > rec->val && TAB == *rec->val )
1241 rec->val++;
1242 rec->len = e-rec->val;
1243 if ( 0 < (rid = rWrite(&db->rdx, rec, rid, pos, siz)) )
1244 ses->rid = (unsigned)rid;
1245 return rid;
1246 } /* W */
1247
1248
1249 /*
1250 */
1251 int OPT_STDCALL oDbo ( Dbo *self, Msg msg )
1252 {
1253 eRr(LOG_VERBOSE, "oDBo '%.*s'", msg->len, msg->val);
1254 if ( ! msg->len )
1255 goto simplewrite;
1256 switch (*msg->val) {
1257 case 'Q': {
1258 Qry *q;
1259 int i = ses->r;
1260 unsigned len;
1261
1262 if ( 1 == msg->len ) {
1263 if ( !(q = self->qry) )
1264 return eRr(ERR_BADF, "query expired");
1265 } else if ( TAB != msg->val[1] )
1266 break;
1267 else if ( !(q = Q(self, msg)) )
1268 return ERR_INVAL;
1269 /* fetch results */
1270 len = q->res ? q->len : (unsigned)self->db->rdx.mid;
1271 q->use = ses->req;
1272 eRr(LOG_VERBOSE, "%d of %d results off %d", i, q->len, q->cur);
1273 eOut(0, "#\t%d\t%d\t%d", len-q->cur, q->qid, (unsigned)(q->brk>>16));
1274 if ( q->res )
1275 while (q->cur < len && (i -= filter(self, q->res[q->cur++], q->exp)))
1276 ;
1277 else
1278 while (q->cur < len && (i -= filter(self, ++q->cur, q->exp)))
1279 ;
1280 return 0;
1281 } /* Q */
1282 case 'R':
1283 if ( 1 == msg->len || TAB == msg->val[1] ) {
1284 int rid, ret = 0;
1285 EADD(0, "W", 1);
1286 if ( 1 == msg->len ) { /* R multi */
1287 Fld *end = REND(msg);
1288 unsigned mr = ses->r;
1289 while ( ++msg < end
1290 && 0 <= (ret = ( a2il(msg->val, msg->len, &rid),
1291 rRead(&env.out->lst, &self->db->rdx, rid, self->p)))
1292 && --mr /* allowing 4 billion if mr was 0 */
1293 )
1294 SINK(env.out);
1295 return ret;
1296 } else { /* single or range */
1297 char *p = msg->val+2, *e = p + msg->len -2;
1298 int count = 1;
1299 p += a2il(p, e-p, &rid);
1300 if ( rid > self->db->rdx.mid )
1301 return 0;
1302 if ( e > p && TAB == *p ) {
1303 p++;
1304 a2il(p, e-p, &count);
1305 if ( 0 >= count || count > (int)ses->r )
1306 count = (int)ses->r;
1307 if ( !count || count > self->db->rdx.mid - rid + 1 )
1308 count = self->db->rdx.mid - rid + 1;
1309 }
1310 for ( ; 0 <= (ret = rRead(&env.out->lst, &self->db->rdx, rid, self->p))
1311 && --count;
1312 rid++
1313 ) {
1314 eRr(LOG_INFO, "R %u: %d lst %d", rid, ret, LLEN(&env.out->lst));
1315 SINK(env.out);
1316 }
1317 return ret;
1318 }
1319 }
1320 break;
1321 case 'T':
1322 if ( 1 == msg->len || TAB == msg->val[1] )
1323 return T(self->db, msg);
1324 break;
1325 case 'W':
1326 if ( 1 == msg->len ) { /* multi */
1327 int left = RLEN(msg)-1, l, ret;
1328 EADD(0, "R", 1);
1329 for ( msg++; left; left -= l, msg += l ) {
1330 if ( !(l = RLEN(msg)) )
1331 msg->tag = -(l = left);
1332 else if ( l < 0 || left < l )
1333 return eRr(ERR_INVAL, "whoopsy! bad embedded rec");
1334 if ( 0 > (ret = W(self->db, msg)) )
1335 return ret;
1336 self->rid = (unsigned)ret;
1337 eOut(0, "%d", ret);
1338 }
1339 return 0;
1340 } else if ( TAB == msg->val[1] ) { /* single */
1341 msg->val += 2; /* strip W<TAB> */
1342 msg->len -= 2;
1343 goto simplewrite;
1344 }
1345 /* else it's not a 'W' ... */
1346 break;
1347 case '0': case '1': case '2': case '3': case '4':
1348 case '5': case '6': case '7': case '8': case '9':
1349 /* well ... we really should not see such messages ... alas */
1350 simplewrite:
1351 {
1352 int ret = W(self->db, msg);
1353 if ( 0 < ret ) {
1354 eOut(0, "R\t%d", ret);
1355 self->rid = (unsigned)ret;
1356 }
1357 return 0;
1358 }
1359 case 'X':
1360 if ( 1 == msg->len || TAB == msg->val[1] ) {
1361 char buf[0x2000], *mhd = buf, *val;
1362 int mhdlen = sizeof(buf);
1363 Fld *end = REND(msg);
1364 int ctl = 1, lasttag = -1, cnt = 0, len = 0;
1365 Idx idx;
1366
1367 memset(&idx, 0, sizeof(idx));
1368 idx.qst.qdx = &self->db->qdx;
1369 idx.qst.ptr.rid = self->rid;
1370 for (;;) {
1371 if ( !ctl ) {
1372 idx.qst.ptr.tag = msg->tag;
1373 val = msg->val;
1374 len = msg->len;
1375 } else if ( (val = idxctl(&idx, msg)) ) {
1376 char *e = msg->val + msg->len;
1377 int tag;
1378 val += a2il(val, e-val, &tag);
1379 idx.qst.ptr.tag = (unsigned short)tag;
1380 if ( val < e && TAB == *val ) val++;
1381 len = e - val;
1382 }
1383 if ( val ) {
1384 if ( lasttag != idx.qst.ptr.tag ) {
1385 idx.qst.ptr.pos = 0;
1386 lasttag = idx.qst.ptr.tag;
1387 }
1388 if ( idx.mhd ) {
1389 char *ev = val+len, *d;
1390 if ( len > mhdlen ) {
1391 if ( mhd != buf )
1392 mFree(mhd);
1393 mhd = mAlloc(len);
1394 }
1395 for (d = mhd;;val++) {
1396 char *s = val;
1397 while ( val < ev && '<' != *val ) val++;
1398 if ( val > s ) {
1399 memcpy(d, s, val - s);
1400 d += val - s;
1401 }
1402 while ( val < ev && '>' != *val && '=' != *val ) val++;
1403 if ( val == ev ) break;
1404 if ( '>' == *val ) continue;
1405 s = ++val;
1406 while ( val < ev && '>' != *val ) val++;
1407 if ( val > s ) {
1408 memcpy(d, s, val - s);
1409 d += val - s;
1410 }
1411 }
1412 val = mhd;
1413 len = d - mhd;
1414 eRr(LOG_DEBUG, "mhd len %d", len);
1415 }
1416 if ( IDX_SPLIT == idx.mod )
1417 cnt += qSetKeyVals(&idx.qst, val, len);
1418 else {
1419 if ( !qSetKeyVal(&idx.qst, val, len) )
1420 cnt++;
1421 if (IDX_FIELD == idx.mod)
1422 idx.qst.ptr.pos = (idx.qst.ptr.pos + 0x10000) & OCCMSK;
1423 }
1424 }
1425 if ( end == ++msg )
1426 break;
1427 ctl = !msg->tag;
1428 }
1429 if ( mhd != buf )
1430 mFree(mhd);
1431 eOut(0, "#\t%d", cnt);
1432 return 0;
1433 } /* case 'X' */
1434 } /* switch */
1435 return oStruct((Struct*)self, msg);
1436 } /* oDbo */
1437
1438
1439 extern Dbo *newDbo ( char *name )
1440 {
1441 Prv *prv;
1442 Db *db = dOpen(name);
1443 if ( !db )
1444 return 0;
1445 prv = mAlloc(sizeof(Prv));
1446 prv->dbo.snd = (disp*)oDbo;
1447 prv->dbo.db = db;
1448 prv->dbo.qry = &prv->qry;
1449 prv->qry.dbo = &prv->dbo;
1450 prv->qry.exp = prv->qry.ebf; /* OP_NULL filter */
1451 return &prv->dbo;
1452 } /* newDbo */

  ViewVC Help
Powered by ViewVC 1.1.26