/[hyperestraier]/trunk/estseek.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/estseek.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (show annotations)
Fri Jul 29 21:57:20 2005 UTC (18 years, 9 months ago) by dpavlin
File MIME type: text/plain
File size: 36945 byte(s)
make working copy from version 0.5.1

1 /*************************************************************************************************
2 * A sample searcher of Hyper Estraier
3 * Copyright (C) 2004-2005 Mikio Hirabayashi
4 * This file is part of Hyper Estraier.
5 * Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of
6 * the GNU Lesser General Public License as published by the Free Software Foundation; either
7 * version 2.1 of the License or any later version. Hyper Estraier is distributed in the hope
8 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
10 * License for more details.
11 * You should have received a copy of the GNU Lesser General Public License along with Hyper
12 * Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13 * Boston, MA 02111-1307 USA.
14 *************************************************************************************************/
15
16
17 #if defined(MYFCGI)
18 #include <fcgi_stdio.h>
19 #endif
20 #include "estraier.h"
21 #include "myconf.h"
22
23 #define CONFSUFFIX ".conf" /* suffix of the configuration file */
24 #define KWDBNAME "kwords" /* name of the database for keywords */
25 #define DATTRLFILE "_lfile" /* name of the attribute of the local file name */
26 #define DATTRSCORE "#score" /* name of the pseudo-attribute of score */
27 #define NUMBUFSIZ 32 /* size of a buffer for a number */
28 #define OUTBUFSIZ 262144 /* size of the output buffer */
29 #define MINIBNUM 31 /* bucket number of map for trivial use */
30 #define LOCKRETRYNUM 16 /* number of retries when locking failure */
31 #define MISSRETRYNUM 3 /* number of retries when missing documents */
32 #define MISSINCRATIO 8 /* ratio of increment number when missing */
33 #define DEFPERPAGE 10 /* default number of show documents per page */
34 #define NAVIPAGES 10 /* number of pages in paging navigation */
35 #define SPCACHEMNUM 1048576 /* max number of the special cache */
36
37
38 /* global variables for configurations */
39 const char *g_conffile = NULL; /* path of the configuration file */
40 const char *g_indexname = NULL; /* name of the index */
41 const char *g_tmplfile = NULL; /* path of the template file */
42 const char *g_topfile = NULL; /* path of the top page file */
43 const char *g_logfile = NULL; /* path of the log file */
44 const char *g_lprefix = NULL; /* local prefix of the URI of each document */
45 const char *g_gprefix = NULL; /* global prefix of the URI of each document */
46 const char *g_gsuffix = NULL; /* global suffix of the URI of each document */
47 const char *g_dirindex = NULL; /* name of the index file in a directory */
48 const CBLIST *g_replexprs = NULL; /* list of URI replacement expressions */
49 const char *g_perpage = NULL; /* CSV of numbers of shown documents per page */
50 int g_attrselect = FALSE; /* whether to use select boxes for extension form */
51 int g_showscore = FALSE; /* whether to show scores */
52 const CBLIST *g_extattrs = NULL; /* list of extra attributes of each document */
53 int g_snipwwidth = -1; /* whole width of the snippet */
54 int g_sniphwidth = -1; /* width of beginning of the text */
55 int g_snipawidth = -1; /* width around each highlighted word */
56 int g_condgstep = -1; /* step of N-gram */
57 int g_dotfidf = FALSE; /* whether to do TF-IDF tuning */
58 int g_smplphrase = FALSE; /* whether to use simplefied phrase */
59 int g_candetail = FALSE; /* whether to show detail link */
60 int g_smlrvnum = -1; /* number of elements of a vecter for similarity */
61 const char *g_spcache = NULL; /* name of the attribute of special cache */
62
63
64 /* global variables for parameters */
65 const char *p_phrase = NULL; /* search phrase */
66 const char *p_attr = NULL; /* narrowing attribute */
67 const char *p_attrval = NULL; /* separated value of narrowing attribute */
68 const char *p_order = NULL; /* ordering attribute */
69 int p_perpage = 0; /* number of show documents per page */
70 int p_pagenum = 0; /* number of the page */
71 int p_detail = 0; /* ID of the document to be detailed */
72 int p_similar = 0; /* ID of the seed document of similarity search */
73
74
75 /* other global variables */
76 char g_outbuf[OUTBUFSIZ]; /* output buffer */
77 const char *g_scriptname = NULL; /* name of the script */
78 const char *g_tmpltext = NULL; /* text of the template */
79 const char *g_toptext = NULL; /* text of the top page */
80 ESTDB *g_db = NULL; /* main database object */
81 CURIA *g_kwdb = NULL; /* keyword database object */
82 double g_etime = 0.0; /* elepsed time */
83 int g_tabidx = 0; /* counter of tab indexes */
84
85
86 /* function prototypes */
87 int main(int argc, char **argv);
88 static int realmain(int argc, char **argv);
89 static void showerror(const char *msg);
90 static const char *skiplabel(const char *str);
91 static CBMAP *getparameters(void);
92 static void myestdbclose(ESTDB *db);
93 static void xmlprintf(const char *format, ...);
94 static CBMAP *vectorizer(void *db, int id, void *kwdb);
95 static void setsimilarphrase(void);
96 static void showpage(void);
97 static void showform(void);
98 static void showtop(void);
99 static void showresult(ESTDOC **docs, int dnum, CBMAP *hints, int miss);
100 static void showdoc(ESTDOC *doc, const CBLIST *words, CBMAP *cnames, int detail);
101 static char *makeshownuri(const char *uri);
102 static void showinfo(void);
103 static void outputlog(void);
104
105
106 /* main routine */
107 int main(int argc, char **argv){
108 #if defined(MYFCGI)
109 static int cnt = 0;
110 while(FCGI_Accept() >= 0){
111 if(++cnt >= 256){
112 cbggcsweep();
113 g_db = NULL;
114 g_kwdb = NULL;
115 cnt = 0;
116 }
117 p_phrase = NULL;
118 p_attr = NULL;
119 p_attrval = NULL;
120 p_order = NULL;
121 p_perpage = 0;
122 p_pagenum = 0;
123 p_detail = 0;
124 p_similar = 0;
125 realmain(argc, argv);
126 }
127 return 0;
128 #else
129 return realmain(argc, argv);
130 #endif
131 }
132
133
134 /* real main routine */
135 static int realmain(int argc, char **argv){
136 CBLIST *lines, *rlist, *alist;
137 CBMAP *params;
138 const char *rp;
139 char *tmp, *wp;
140 int i, ecode;
141 /* set configurations */
142 cbstdiobin();
143 setvbuf(stdout, g_outbuf, _IOFBF, OUTBUFSIZ);
144 g_scriptname = argv[0];
145 if((rp = getenv("SCRIPT_NAME")) != NULL) g_scriptname = rp;
146 if((rp = strrchr(g_scriptname, '/')) != NULL) g_scriptname = rp + 1;
147 tmp = cbmalloc(strlen(g_scriptname) + strlen(CONFSUFFIX) + 1);
148 sprintf(tmp, "%s", g_scriptname);
149 cbglobalgc(tmp, free);
150 if(!(wp = strrchr(tmp, '.'))) wp = tmp + strlen(tmp);
151 sprintf(wp, "%s", CONFSUFFIX);
152 g_conffile = tmp;
153 if(!(lines = cbreadlines(g_conffile))) showerror("the configuration file is missing.");
154 cbglobalgc(lines, (void (*)(void *))cblistclose);
155 rlist = cblistopen();
156 cbglobalgc(rlist, (void (*)(void *))cblistclose);
157 alist = cblistopen();
158 cbglobalgc(alist, (void (*)(void *))cblistclose);
159 for(i = 0; i < cblistnum(lines); i++){
160 rp = cblistval(lines, i, NULL);
161 if(cbstrfwimatch(rp, "indexname:")){
162 g_indexname = skiplabel(rp);
163 } else if(cbstrfwimatch(rp, "tmplfile:")){
164 g_tmplfile = skiplabel(rp);
165 } else if(cbstrfwimatch(rp, "topfile:")){
166 g_topfile = skiplabel(rp);
167 } else if(cbstrfwimatch(rp, "logfile:")){
168 g_logfile = skiplabel(rp);
169 } else if(cbstrfwimatch(rp, "lprefix:")){
170 g_lprefix = skiplabel(rp);
171 } else if(cbstrfwimatch(rp, "gprefix:")){
172 g_gprefix = skiplabel(rp);
173 } else if(cbstrfwimatch(rp, "gsuffix:")){
174 g_gsuffix = skiplabel(rp);
175 } else if(cbstrfwimatch(rp, "dirindex:")){
176 g_dirindex = skiplabel(rp);
177 } else if(cbstrfwimatch(rp, "replace:")){
178 cblistpush(rlist, skiplabel(rp), -1);
179 } else if(cbstrfwimatch(rp, "perpage:")){
180 g_perpage = skiplabel(rp);
181 } else if(cbstrfwimatch(rp, "attrselect:")){
182 if(!cbstricmp(skiplabel(rp), "true")) g_attrselect = TRUE;
183 } else if(cbstrfwimatch(rp, "showscore:")){
184 if(!cbstricmp(skiplabel(rp), "true")) g_showscore = TRUE;
185 } else if(cbstrfwimatch(rp, "extattr:")){
186 cblistpush(alist, skiplabel(rp), -1);
187 } else if(cbstrfwimatch(rp, "snipwwidth:")){
188 g_snipwwidth = atoi(skiplabel(rp));
189 } else if(cbstrfwimatch(rp, "sniphwidth:")){
190 g_sniphwidth = atoi(skiplabel(rp));
191 } else if(cbstrfwimatch(rp, "snipawidth:")){
192 g_snipawidth = atoi(skiplabel(rp));
193 } else if(cbstrfwimatch(rp, "condgstep:")){
194 g_condgstep = atoi(skiplabel(rp));
195 } else if(cbstrfwimatch(rp, "dotfidf:")){
196 if(!cbstricmp(skiplabel(rp), "true")) g_dotfidf = TRUE;
197 } else if(cbstrfwimatch(rp, "smplphrase:")){
198 if(!cbstricmp(skiplabel(rp), "true")) g_smplphrase = TRUE;
199 } else if(cbstrfwimatch(rp, "candetail:")){
200 if(!cbstricmp(skiplabel(rp), "true")) g_candetail = TRUE;
201 } else if(cbstrfwimatch(rp, "smlrvnum:")){
202 g_smlrvnum = atoi(skiplabel(rp));
203 } else if(cbstrfwimatch(rp, "spcache:")){
204 g_spcache = skiplabel(rp);
205 }
206 }
207 if(!g_indexname) showerror("indexname is undefined.");
208 if(!g_tmplfile) showerror("tmplfile is undefined.");
209 if(!g_topfile) showerror("topfile is undefined.");
210 if(!g_logfile) showerror("logfile is undefined.");
211 if(!g_lprefix) showerror("lprefix is undefined.");
212 if(!g_gprefix) showerror("gprefix is undefined.");
213 if(!g_gsuffix) showerror("gsuffix is undefined.");
214 if(!g_dirindex) showerror("dirindex is undefined.");
215 g_replexprs = rlist;
216 if(!g_perpage) showerror("perpage is undefined.");
217 g_extattrs = alist;
218 if(g_snipwwidth < 0) showerror("snipwwidth is undefined.");
219 if(g_sniphwidth < 0) showerror("sniphwidth is undefined.");
220 if(g_snipawidth < 0) showerror("snipawidth is undefined.");
221 if(g_condgstep < 1) showerror("condgstep is undefined.");
222 if(!g_spcache) showerror("spcache is undefined.");
223 /* read parameters */
224 params = getparameters();
225 cbglobalgc(params, (void (*)(void *))cbmapclose);
226 if(!(p_phrase = cbmapget(params, "phrase", -1, NULL))) p_phrase = "";
227 while(*p_phrase == ' ' || *p_phrase == '\t'){
228 p_phrase++;
229 }
230 if(!(p_attr = cbmapget(params, "attr", -1, NULL))) p_attr = "";
231 while(*p_attr == ' ' || *p_attr == '\t'){
232 p_attr++;
233 }
234 if(!(p_attrval = cbmapget(params, "attrval", -1, NULL))) p_attrval = "";
235 while(*p_attrval == ' ' || *p_attrval == '\t'){
236 p_attrval++;
237 }
238 if(cbstrfwmatch(p_attr, "gstep=")){
239 g_condgstep = atoi(p_attr + 6);
240 p_attr = "";
241 }
242 if(cbstrfwmatch(p_attr, "tfidf=")){
243 g_dotfidf = !cbstricmp(p_attr + 6, "true");
244 p_attr = "";
245 }
246 if(!(p_order = cbmapget(params, "order", -1, NULL))) p_order = "";
247 while(*p_order == ' ' || *p_order == '\t'){
248 p_order++;
249 }
250 if((rp = cbmapget(params, "perpage", -1, NULL)) != NULL) p_perpage = atoi(rp);
251 if(p_perpage < 1) p_perpage = DEFPERPAGE;
252 if((rp = cbmapget(params, "detail", -1, NULL)) != NULL) p_detail = atoi(rp);
253 if(p_detail < 1) p_detail = 0;
254 if((rp = cbmapget(params, "similar", -1, NULL)) != NULL) p_similar = atoi(rp);
255 if(p_similar < 1) p_similar = 0;
256 if((rp = cbmapget(params, "pagenum", -1, NULL)) != NULL) p_pagenum = atoi(rp);
257 if(p_pagenum < 1) p_pagenum = 1;
258 if((rp = cbmapget(params, "enc", -1, NULL)) != NULL){
259 if((tmp = est_iconv(p_phrase, -1, rp, "UTF-8", NULL, NULL)) != NULL){
260 p_phrase = tmp;
261 cbglobalgc(tmp, free);
262 }
263 if((tmp = est_iconv(p_attr, -1, rp, "UTF-8", NULL, NULL)) != NULL){
264 p_attr = tmp;
265 cbglobalgc(tmp, free);
266 }
267 if((tmp = est_iconv(p_attrval, -1, rp, "UTF-8", NULL, NULL)) != NULL){
268 p_attrval = tmp;
269 cbglobalgc(tmp, free);
270 }
271 if((tmp = est_iconv(p_order, -1, rp, "UTF-8", NULL, NULL)) != NULL){
272 p_order = tmp;
273 cbglobalgc(tmp, free);
274 }
275 }
276 /* read the other files and the database */
277 if(!g_db){
278 if(!(tmp = cbreadfile(g_tmplfile, NULL))) showerror("the template file is missing.");
279 cbglobalgc(tmp, free);
280 g_tmpltext = tmp;
281 if(!(tmp = cbreadfile(g_topfile, NULL))) showerror("the top page file is missing.");
282 cbglobalgc(tmp, free);
283 g_toptext = tmp;
284 for(i = 0; i <= LOCKRETRYNUM; i++){
285 if((g_db = est_db_open(g_indexname, ESTDBREADER | ESTDBLCKNB, &ecode)) != NULL) break;
286 if(ecode != ESTELOCK) showerror("the index is missing or broken.");
287 est_usleep(1000 * 1000);
288 }
289 if(!g_db) showerror("the index is being updated now.");
290 cbglobalgc(g_db, (void (*)(void *))myestdbclose);
291 if(g_spcache[0] != '\0') est_db_set_special_cache(g_db, g_spcache, SPCACHEMNUM);
292 }
293 setsimilarphrase();
294 /* show the page */
295 showpage();
296 /* output the log message */
297 outputlog();
298 return 0;
299 }
300
301
302 /* show the error page and exit */
303 static void showerror(const char *msg){
304 printf("Status: 500 Internal Server Error\r\n");
305 printf("Content-Type: text/plain; charset=UTF-8\r\n");
306 printf("\r\n");
307 printf("Error: %s\n", msg);
308 exit(1);
309 }
310
311
312 /* skip the label of a line */
313 static const char *skiplabel(const char *str){
314 if(!(str = strchr(str, ':'))) return "";
315 str++;
316 while(*str != '\0' && (*str == ' ' || *str == '\t')){
317 str++;
318 }
319 return str;
320 }
321
322
323 /* get CGI parameters */
324 static CBMAP *getparameters(void){
325 int maxlen = 1024 * 1024 * 32;
326 CBMAP *map, *attrs;
327 CBLIST *pairs, *parts;
328 const char *rp, *body;
329 char *buf, *key, *val, *dkey, *dval, *wp, *bound, *fbuf, *aname;
330 int i, len, c, blen, flen;
331 map = cbmapopenex(37);
332 buf = NULL;
333 len = 0;
334 if((rp = getenv("REQUEST_METHOD")) != NULL && !strcmp(rp, "POST") &&
335 (rp = getenv("CONTENT_LENGTH")) != NULL && (len = atoi(rp)) > 0){
336 if(len > maxlen) len = maxlen;
337 buf = cbmalloc(len + 1);
338 for(i = 0; i < len && (c = getchar()) != EOF; i++){
339 buf[i] = c;
340 }
341 buf[i] = '\0';
342 if(i != len){
343 free(buf);
344 buf = NULL;
345 }
346 } else if((rp = getenv("QUERY_STRING")) != NULL){
347 buf = cbmemdup(rp, -1);
348 len = strlen(buf);
349 }
350 if(buf && len > 0){
351 if((rp = getenv("CONTENT_TYPE")) != NULL && cbstrfwmatch(rp, "multipart/form-data") &&
352 (rp = strstr(rp, "boundary=")) != NULL){
353 rp += 9;
354 bound = cbmemdup(rp, -1);
355 if((wp = strchr(bound, ';')) != NULL) *wp = '\0';
356 parts = cbmimeparts(buf, len, bound);
357 for(i = 0; i < cblistnum(parts); i++){
358 body = cblistval(parts, i, &blen);
359 attrs = cbmapopen();
360 fbuf = cbmimebreak(body, blen, attrs, &flen);
361 if((rp = cbmapget(attrs, "NAME", -1, NULL)) != NULL){
362 cbmapput(map, rp, -1, fbuf, flen, FALSE);
363 aname = cbsprintf("%s-filename", rp);
364 if((rp = cbmapget(attrs, "FILENAME", -1, NULL)) != NULL)
365 cbmapput(map, aname, -1, rp, -1, FALSE);
366 free(aname);
367 }
368 free(fbuf);
369 cbmapclose(attrs);
370 }
371 cblistclose(parts);
372 free(bound);
373 } else {
374 pairs = cbsplit(buf, -1, "&");
375 for(i = 0; i < cblistnum(pairs); i++){
376 key = cbmemdup(cblistval(pairs, i, NULL), -1);
377 if((val = strchr(key, '=')) != NULL){
378 *(val++) = '\0';
379 dkey = cburldecode(key, NULL);
380 dval = cburldecode(val, NULL);
381 cbmapput(map, dkey, -1, dval, -1, FALSE);
382 free(dval);
383 free(dkey);
384 }
385 free(key);
386 }
387 cblistclose(pairs);
388 }
389 }
390 free(buf);
391 return map;
392 }
393
394
395 /* close the database */
396 static void myestdbclose(ESTDB *db){
397 int ecode;
398 est_db_close(db, &ecode);
399 }
400
401
402 /* output escaped string */
403 static void xmlprintf(const char *format, ...){
404 va_list ap;
405 char *tmp, cbuf[32];
406 unsigned char c;
407 int cblen;
408 va_start(ap, format);
409 while(*format != '\0'){
410 if(*format == '%'){
411 cbuf[0] = '%';
412 cblen = 1;
413 format++;
414 while(strchr("0123456789 .+-", *format) && *format != '\0' && cblen < 31){
415 cbuf[cblen++] = *format;
416 format++;
417 }
418 cbuf[cblen++] = *format;
419 cbuf[cblen] = '\0';
420 switch(*format){
421 case 's':
422 tmp = va_arg(ap, char *);
423 if(!tmp) tmp = "(null)";
424 printf(cbuf, tmp);
425 break;
426 case 'd':
427 printf(cbuf, va_arg(ap, int));
428 break;
429 case 'o': case 'u': case 'x': case 'X': case 'c':
430 printf(cbuf, va_arg(ap, unsigned int));
431 break;
432 case 'e': case 'E': case 'f': case 'g': case 'G':
433 printf(cbuf, va_arg(ap, double));
434 break;
435 case '@':
436 tmp = va_arg(ap, char *);
437 if(!tmp) tmp = "(null)";
438 while(*tmp){
439 switch(*tmp){
440 case '&': printf("&amp;"); break;
441 case '<': printf("&lt;"); break;
442 case '>': printf("&gt;"); break;
443 case '"': printf("&quot;"); break;
444 default:
445 if(!((*tmp >= 0 && *tmp <= 0x8) || (*tmp >= 0x0e && *tmp <= 0x1f))) putchar(*tmp);
446 break;
447 }
448 tmp++;
449 }
450 break;
451 case '?':
452 tmp = va_arg(ap, char *);
453 if(!tmp) tmp = "(null)";
454 while(*tmp){
455 c = *(unsigned char *)tmp;
456 if((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
457 (c >= '0' && c <= '9') || (c != '\0' && strchr("_-.", c))){
458 putchar(c);
459 } else {
460 printf("%%%02X", c);
461 }
462 tmp++;
463 }
464 break;
465 case '%':
466 putchar('%');
467 break;
468 }
469 } else {
470 putchar(*format);
471 }
472 format++;
473 }
474 va_end(ap);
475 }
476
477
478 /* create a vector of keywords */
479 static CBMAP *vectorizer(void *db, int id, void *kwdb){
480 CBMAP *kwords;
481 char *mbuf;
482 int msiz;
483 if(!(mbuf = crget((CURIA *)kwdb, (char *)&id, sizeof(int), 0, -1, &msiz))) return NULL;
484 kwords = cbmapload(mbuf, msiz);
485 free(mbuf);
486 return kwords;
487 }
488
489
490 /* set the phrase for similarity search */
491 static void setsimilarphrase(void){
492 ESTDOC *doc;
493 CBMAP *svmap;
494 CBDATUM *datum;
495 const char *kbuf, *vbuf;
496 char *ptr;
497 int ksiz, vsiz;
498 if(!cbstrfwimatch(p_phrase, ESTOPSIMILAR) && p_similar < 1) return;
499 if(g_smlrvnum < 1){
500 p_phrase = "";
501 return;
502 }
503 if(!g_kwdb){
504 ptr = cbsprintf("%s%c%s", g_indexname, ESTPATHCHR, KWDBNAME);
505 if((g_kwdb = cropen(ptr, CR_OREADER, -1, -1)) != NULL){
506 cbglobalgc(g_kwdb, (void (*)(void *))crclose);
507 est_db_set_vectorizer(g_db, vectorizer, g_kwdb);
508 }
509 free(ptr);
510 }
511 if(p_similar < 1) return;
512 svmap = g_kwdb ? vectorizer(g_db, p_similar, g_kwdb) : NULL;
513 if(!svmap && (doc = est_db_get_doc(g_db, p_similar, 0)) != NULL){
514 svmap = est_db_etch_doc(g_dotfidf ? g_db : NULL, doc, g_smlrvnum);
515 est_doc_delete(doc);
516 } else if(!svmap){
517 return;
518 }
519 datum = cbdatumopen(ESTOPSIMILAR, -1);
520 cbmapiterinit(svmap);
521 while((kbuf = cbmapiternext(svmap, &ksiz)) != NULL){
522 vbuf = cbmapget(svmap, kbuf, ksiz, &vsiz);
523 cbdatumcat(datum, " WITH ", -1);
524 cbdatumcat(datum, vbuf, vsiz);
525 cbdatumcat(datum, " ", 1);
526 cbdatumcat(datum, kbuf, ksiz);
527 }
528 ptr = cbdatumtomalloc(datum, NULL);
529 cbglobalgc(ptr, free);
530 p_phrase = ptr;
531 cbmapclose(svmap);
532 }
533
534
535 /* show the page */
536 static void showpage(void){
537 ESTCOND *cond;
538 ESTDOC **docs;
539 CBMAP *hints;
540 CBLIST *elems;
541 const char *rp;
542 char *tmp, numbuf[NUMBUFSIZ];
543 int i, tnum, max, *res, rnum, sc, dnum, miss;
544 printf("Cache-Control: no-cache, must-revalidate, no-transform\r\n");
545 printf("Pragma: no-cache\r\n");
546 printf("Content-Disposition: inline; filename=%s\r\n", g_scriptname);
547 printf("Content-Type: text/html; charset=UTF-8\r\n");
548 printf("\r\n");
549 g_etime = est_gettimeofday();
550 cond = est_cond_new();
551 if(p_phrase[0] != '\0') est_cond_set_phrase(cond, p_phrase);
552 if(p_attr[0] != '\0'){
553 if(p_attrval[0] != '\0'){
554 tmp = cbsprintf("%s %s", p_attr, p_attrval);
555 est_cond_add_attr(cond, tmp);
556 free(tmp);
557 } else {
558 est_cond_add_attr(cond, p_attr);
559 }
560 }
561 if(p_order[0] != '\0') est_cond_set_order(cond, p_order);
562 switch(g_condgstep){
563 case 1:
564 est_cond_set_options(cond, ESTCONDSURE);
565 break;
566 case 2:
567 est_cond_set_options(cond, ESTCONDUSU);
568 break;
569 case 3:
570 est_cond_set_options(cond, ESTCONDFAST);
571 break;
572 case 4:
573 est_cond_set_options(cond, ESTCONDAGIT);
574 break;
575 }
576 if(!g_dotfidf) est_cond_set_options(cond, ESTCONDNOIDF);
577 if(g_smplphrase) est_cond_set_options(cond, ESTCONDSIMPLE);
578 if(g_showscore) est_cond_set_options(cond, ESTCONDSCFB);
579 tnum = 0;
580 max = p_pagenum * p_perpage * 1.3 + 1;
581 do {
582 est_cond_set_max(cond, max);
583 hints = cbmapopenex(MINIBNUM);
584 res = est_db_search(g_db, cond, &rnum, hints);
585 if(g_candetail && p_detail > 0){
586 if(rnum < 1) cbmapput(hints, "", 0, "1", 1, TRUE);
587 free(res);
588 res = cbmalloc(sizeof(int));
589 res[0] = p_detail;
590 rnum = 1;
591 }
592 docs = cbmalloc(rnum * sizeof(ESTDOC *) + 1);
593 dnum = 0;
594 miss = 0;
595 for(i = 0; i < rnum; i++){
596 if(!(docs[dnum] = est_db_get_doc(g_db, res[i], dnum < p_pagenum * p_perpage ? 0 :
597 ESTGDNOATTR | ESTGDNOTEXT))){
598 miss++;
599 continue;
600 }
601 if((sc = est_cond_score(cond, i)) >= 0){
602 sprintf(numbuf, "%d", sc);
603 est_doc_add_attr(docs[dnum], DATTRSCORE, numbuf);
604 }
605 dnum++;
606 }
607 if(tnum <= MISSRETRYNUM && miss > 0 && max <= rnum && dnum < p_pagenum * p_perpage + 1){
608 for(i = 0; i < dnum; i++){
609 est_doc_delete(docs[i]);
610 }
611 free(docs);
612 free(res);
613 cbmapclose(hints);
614 max *= MISSINCRATIO;
615 tnum++;
616 continue;
617 }
618 break;
619 } while(TRUE);
620 g_etime = est_gettimeofday() - g_etime;
621 elems = cbxmlbreak(g_tmpltext, FALSE);
622 for(i = 0; i < cblistnum(elems); i++){
623 rp = cblistval(elems, i, NULL);
624 if(!strcmp(rp, "<!--ESTFORM-->")){
625 showform();
626 } else if(!strcmp(rp, "<!--ESTRESULT-->")){
627 if(p_phrase[0] == '\0' && p_attr[0] == '\0' && p_detail < 1){
628 showtop();
629 } else {
630 showresult(docs, dnum, hints, miss);
631 }
632 } else if(!strcmp(rp, "<!--ESTINFO-->")){
633 showinfo();
634 } else {
635 printf("%s", rp);
636 }
637 }
638 for(i = 0; i < dnum; i++){
639 est_doc_delete(docs[i]);
640 }
641 cblistclose(elems);
642 free(docs);
643 free(res);
644 cbmapclose(hints);
645 est_cond_delete(cond);
646 }
647
648
649 /* show the form */
650 static void showform(void){
651 CBLIST *list;
652 const char *elem;
653 int i, num;
654 xmlprintf("<div id=\"estform\" class=\"estform\">\n");
655 xmlprintf("<form action=\"%@\" method=\"get\" id=\"form_self\">\n", g_scriptname);
656 xmlprintf("<div class=\"form_basic\">\n");
657 xmlprintf("<input type=\"text\" name=\"phrase\" value=\"%@\""
658 " size=\"80\" id=\"phrase\" class=\"text\" tabindex=\"%d\" accesskey=\"0\" />\n",
659 p_phrase, ++g_tabidx);
660 xmlprintf("<input type=\"submit\" value=\"Search\""
661 " id=\"search\" class=\"submit\" tabindex=\"%d\" accesskey=\"1\" />\n",
662 ++g_tabidx);
663 xmlprintf("</div>\n");
664 xmlprintf("<div class=\"form_extension\">\n");
665 xmlprintf("<select name=\"perpage\" id=\"perpage\" tabindex=\"%d\">\n", ++g_tabidx);
666 list = cbsplit(g_perpage, -1, ",");
667 for(i = 0; i < cblistnum(list); i++){
668 elem = cblistval(list, i, NULL);
669 if(elem[0] == '\0') continue;
670 num = atoi(elem);
671 xmlprintf("<option value=\"%d\"%s>%d</option>\n",
672 num, num == p_perpage ? " selected=\"selected\"" : "", num);
673 }
674 cblistclose(list);
675 xmlprintf("</select>\n");
676 xmlprintf("per page, with\n");
677 if(g_attrselect){
678 xmlprintf("<select name=\"attr\" id=\"attr\" tabindex=\"%d\">\n", ++g_tabidx);
679 xmlprintf("<option value=\"\">--</option>\n");
680 xmlprintf("<option value=\"@title ISTRINC\"%s>title including</option>\n",
681 cbstrfwmatch(p_attr, "@title ISTRINC") ? " selected=\"selected\"" : "");
682 xmlprintf("<option value=\"@title ISTRBW\"%s>title beginning with</option>\n",
683 cbstrfwmatch(p_attr, "@title ISTRBW") ? " selected=\"selected\"" : "");
684 xmlprintf("<option value=\"@title ISTREW\"%s>title ending with</option>\n",
685 cbstrfwmatch(p_attr, "@title ISTREW") ? " selected=\"selected\"" : "");
686 xmlprintf("<option value=\"@author ISTRINC\"%s>author including</option>\n",
687 cbstrfwmatch(p_attr, "@author ISTRINC") ? " selected=\"selected\"" : "");
688 xmlprintf("<option value=\"@author ISTRBW\"%s>author beginning with</option>\n",
689 cbstrfwmatch(p_attr, "@author ISTRBW") ? " selected=\"selected\"" : "");
690 xmlprintf("<option value=\"@author ISTREW\"%s>author ending with</option>\n",
691 cbstrfwmatch(p_attr, "@author ISTREW") ? " selected=\"selected\"" : "");
692 xmlprintf("<option value=\"@mdate NUMLT\"%s>date less than</option>\n",
693 cbstrfwmatch(p_attr, "@mdate NUMLT") ? " selected=\"selected\"" : "");
694 xmlprintf("<option value=\"@mdate NUMGE\"%s>date not less than</option>\n",
695 cbstrfwmatch(p_attr, "@mdate NUMGE") ? " selected=\"selected\"" : "");
696 xmlprintf("<option value=\"@size NUMLT\"%s>size less than</option>\n",
697 cbstrfwmatch(p_attr, "@size NUMLT") ? " selected=\"selected\"" : "");
698 xmlprintf("<option value=\"@size NUMGE\"%s>size not less than</option>\n",
699 cbstrfwmatch(p_attr, "@size NUMGE") ? " selected=\"selected\"" : "");
700 xmlprintf("</select>\n");
701 xmlprintf("<input type=\"text\" name=\"attrval\" value=\"%@\""
702 " size=\"16\" id=\"attrval\" class=\"text\" tabindex=\"%d\" accesskey=\"2\" />\n",
703 p_attrval, ++g_tabidx);
704 xmlprintf(", order by\n");
705 xmlprintf("<select name=\"order\" id=\"order\" tabindex=\"%d\">\n", ++g_tabidx);
706 xmlprintf("<option value=\"\">score</option>\n");
707 xmlprintf("<option value=\"@title STRA\"%s>title (asc)</option>\n",
708 !strcmp(p_order, "@title STRA") ? " selected=\"selected\"" : "");
709 xmlprintf("<option value=\"@title STRD\"%s>title (desc)</option>\n",
710 !strcmp(p_order, "@title STRD") ? " selected=\"selected\"" : "");
711 xmlprintf("<option value=\"@author STRA\"%s>author (asc)</option>\n",
712 !strcmp(p_order, "@author STRA") ? " selected=\"selected\"" : "");
713 xmlprintf("<option value=\"@author STRD\"%s>author (desc)</option>\n",
714 !strcmp(p_order, "@author STRD") ? " selected=\"selected\"" : "");
715 xmlprintf("<option value=\"@mdate NUMA\"%s>date (asc)</option>\n",
716 !strcmp(p_order, "@mdate NUMA") ? " selected=\"selected\"" : "");
717 xmlprintf("<option value=\"@mdate NUMD\"%s>date (desc)</option>\n",
718 !strcmp(p_order, "@mdate NUMD") ? " selected=\"selected\"" : "");
719 xmlprintf("<option value=\"@size NUMA\"%s>size (asc)</option>\n",
720 !strcmp(p_order, "@size NUMA") ? " selected=\"selected\"" : "");
721 xmlprintf("<option value=\"@size NUMD\"%s>size (desc)</option>\n",
722 !strcmp(p_order, "@size NUMD") ? " selected=\"selected\"" : "");
723 xmlprintf("</select>\n");
724 } else {
725 xmlprintf("<input type=\"text\" name=\"attr\" value=\"%@\""
726 " size=\"24\" id=\"attr\" class=\"text\" tabindex=\"%d\" accesskey=\"2\" />\n",
727 p_attr, ++g_tabidx);
728 xmlprintf(", order by\n");
729 xmlprintf("<input type=\"text\" name=\"order\" value=\"%@\""
730 " size=\"24\" id=\"order\" class=\"text\" tabindex=\"%d\" accesskey=\"3\" />\n",
731 p_order, ++g_tabidx);
732 }
733 xmlprintf("</div>\n");
734 xmlprintf("</form>\n");
735 xmlprintf("</div>\n");
736 }
737
738
739 /* show the top message */
740 static void showtop(void){
741 printf("%s", g_toptext);
742 }
743
744
745 /* show the result */
746 static void showresult(ESTDOC **docs, int dnum, CBMAP *hints, int miss){
747 CBMAP *cnames;
748 CBLIST *words;
749 const char *key, *myphrase;
750 char cname[NUMBUFSIZ];
751 int i, hits, snum, start, end, cnum, pnum;
752 xmlprintf("<div id=\"estresult\" class=\"estresult\">\n");
753 hits = atoi(cbmapget(hints, "", 0, NULL)) - miss;
754 start = (p_pagenum - 1) * p_perpage;
755 end = p_pagenum * p_perpage;
756 if(end > dnum) end = dnum;
757 xmlprintf("<div class=\"resinfo\">");
758 xmlprintf("Results of <strong>%d</strong> - <strong>%d</strong>",
759 start + (hits > 0 ? 1 : 0), end);
760 xmlprintf(" of about <strong>%d</strong>", hits);
761 if(p_phrase[0] != '\0' && strlen(p_phrase) < 128)
762 xmlprintf(" for <strong>%@</strong>", p_phrase);
763 if(g_etime > 0.0) xmlprintf(" (%.3f sec.)", g_etime / 1000.0);
764 if(miss > p_perpage * p_pagenum) xmlprintf("*");
765 xmlprintf("</div>\n");
766 if(cbmaprnum(hints) > 2 || (p_phrase[0] != '\0' && p_attr[0] != '\0')){
767 xmlprintf("<div class=\"hints\">");
768 cbmapiterinit(hints);
769 i = 0;
770 while((key = cbmapiternext(hints, NULL)) != NULL){
771 if(key[0] == '\0') continue;
772 if(i++ > 0) xmlprintf(", ");
773 xmlprintf("<span class=\"hword\">%s (%s)</span>", key, cbmapget(hints, key, -1, NULL));
774 }
775 xmlprintf("</div>\n");
776 }
777 words = cblistopen();
778 cbmapiterinit(hints);
779 while((key = cbmapiternext(hints, NULL)) != NULL){
780 if(key[0] == '\0' || atoi(cbmapget(hints, key, -1, NULL)) < 0) continue;
781 cblistpush(words, key, -1);
782 }
783 cnames = cbmapopenex(MINIBNUM);
784 cnum = 0;
785 for(i = 0; i < cblistnum(words); i++){
786 sprintf(cname, "key%d", ++cnum);
787 cbmapput(cnames, cblistval(words, i, NULL), -1, cname, -1, FALSE);
788 }
789 for(snum = start; snum < end; snum++){
790 showdoc(docs[snum], words, cnames, g_candetail && p_detail > 0);
791 }
792 cbmapclose(cnames);
793 cblistclose(words);
794 if(dnum < 1) xmlprintf("<p class=\"note\">Your search did not match any documents.</p>\n");
795 myphrase = p_similar > 0 ? "" : p_phrase;
796 xmlprintf("<div class=\"paging\">\n");
797 if(p_pagenum > 1){
798 xmlprintf("<a href=\"%@?phrase=%?&amp;attr=%?&amp;attrval=%?&amp;order=%?"
799 "&amp;perpage=%d&amp;pagenum=%d&amp;similar=%d\" class=\"navi\">PREV</a>\n",
800 g_scriptname, myphrase, p_attr, p_attrval, p_order,
801 p_perpage, p_pagenum - 1, p_similar);
802 } else {
803 xmlprintf("<span class=\"void\">PREV</span>\n");
804 }
805 pnum = (hits - 1 - (hits - 1) % p_perpage + p_perpage) / p_perpage;
806 if(hits > 0 && p_detail < 1){
807 for(i = p_pagenum > NAVIPAGES ? p_pagenum - NAVIPAGES + 1 : 1;
808 i == 1 || (i <= pnum && i < p_pagenum + NAVIPAGES); i++){
809 if(i == p_pagenum){
810 printf("<span class=\"pnow\">%d</span>\n", i);
811 } else {
812 xmlprintf("<a href=\"%@?phrase=%?&amp;attr=%?&amp;attrval=%?&amp;order=%?"
813 "&amp;perpage=%d&amp;pagenum=%d&amp;similar=%d\" class=\"pnum\">%d</a>\n",
814 g_scriptname, myphrase, p_attr, p_attrval, p_order, p_perpage, i, p_similar, i);
815 }
816 }
817 }
818 if(snum < dnum){
819 xmlprintf("<a href=\"%@?phrase=%?&amp;attr=%?&amp;attrval=%?&amp;order=%?"
820 "&amp;perpage=%d&amp;pagenum=%d&amp;similar=%d\" class=\"navi\">NEXT</a>\n",
821 g_scriptname, myphrase, p_attr, p_attrval, p_order,
822 p_perpage, p_pagenum + 1, p_similar);
823 } else {
824 xmlprintf("<span class=\"void\">NEXT</span>\n");
825 }
826 xmlprintf("</div>\n");
827 xmlprintf("</div>\n");
828 }
829
830
831 /* show a document */
832 static void showdoc(ESTDOC *doc, const CBLIST *words, CBMAP *cnames, int detail){
833 CBMAP *kwords;
834 CBLIST *names, *lines;
835 const char *uri, *title, *score, *val, *name, *line, *cname;
836 char *turi, *tsv, *pv, *str;
837 int i, id;
838 id = est_doc_id(doc);
839 if(!(uri = est_doc_attr(doc, ESTDATTRURI))) uri = ".";
840 turi = makeshownuri(uri);
841 if(!(title = est_doc_attr(doc, ESTDATTRTITLE))) title = "";
842 if(title[0] == '\0' && !(title = est_doc_attr(doc, DATTRLFILE))) title = "";
843 if(title[0] == '\0' && ((pv = strrchr(uri, '/')) != NULL)) title = pv + 1;
844 if(title[0] == '\0') title = "(no title)";
845 if(!(score = est_doc_attr(doc, DATTRSCORE))) score = "";
846 xmlprintf("<dl class=\"doc\" id=\"doc_%d\">\n", id);
847 xmlprintf("<dt>");
848 xmlprintf("<a href=\"%@\" class=\"doc_title\">%@</a>", turi, title);
849 if(score[0] != '\0') xmlprintf(" <span class=\"doc_score\">%@</span>", score);
850 xmlprintf("</dt>\n");
851 if(detail){
852 names = est_doc_attr_names(doc);
853 for(i = 0; i < cblistnum(names); i++){
854 name = cblistval(names, i, NULL);
855 if(name[0] != '_' && strcmp(name, ESTDATTRURI) && strcmp(name, ESTDATTRTITLE) &&
856 (val = est_doc_attr(doc, name)) != NULL && val[0] != '\0'){
857 xmlprintf("<dd class=\"doc_attr\">");
858 xmlprintf("%@: <span class=\"doc_val\">%@</span>", name, val);
859 xmlprintf("</dd>\n");
860 }
861 }
862 cblistclose(names);
863 if(g_smlrvnum > 0){
864 xmlprintf("<dd class=\"doc_attr\">");
865 xmlprintf("#vector: <span class=\"doc_val\">");
866 kwords = est_db_etch_doc(g_db, doc, g_smlrvnum);
867 cbmapiterinit(kwords);
868 for(i = 0; (name = cbmapiternext(kwords, NULL)) != NULL; i++){
869 if(i > 0) xmlprintf(", ");
870 xmlprintf("%@ (%@)\n", name, cbmapget(kwords, name, -1, NULL));
871 }
872 cbmapclose(kwords);
873 xmlprintf("</span>");
874 xmlprintf("</dd>\n");
875 }
876 } else {
877 for(i = 0; i < cblistnum(g_extattrs); i++){
878 str = cbmemdup(cblistval(g_extattrs, i, NULL), -1);
879 if((pv = strchr(str, '|')) != NULL){
880 *pv = '\0';
881 pv++;
882 if((val = est_doc_attr(doc, str)) != NULL && val[0] != '\0'){
883 xmlprintf("<dd class=\"doc_attr\">");
884 xmlprintf("%@: <span class=\"doc_val\">%@</span>", pv, val);
885 xmlprintf("</dd>\n");
886 }
887 }
888 free(str);
889 }
890 }
891 xmlprintf("<dd class=\"doc_text\">");
892 tsv = est_doc_make_snippet(doc, words, detail ? INT_MAX : g_snipwwidth,
893 detail ? INT_MAX : g_sniphwidth, g_snipawidth);
894 lines = cbsplit(tsv, -1, "\n");
895 for(i = 0; i < cblistnum(lines); i++){
896 line = cblistval(lines, i, NULL);
897 if(line[0] == '\0'){
898 if(i < cblistnum(lines) - 1) xmlprintf(" ... ");
899 } else if((pv = strchr(line, '\t')) != NULL){
900 str = cbmemdup(line, pv - line);
901 if(!(cname = cbmapget(cnames, pv + 1, -1, NULL))) cname = "key0";
902 xmlprintf("<strong class=\"key %@\">%@</strong>", cname, str);
903 free(str);
904 } else {
905 xmlprintf("%@", line);
906 }
907 }
908 cblistclose(lines);
909 free(tsv);
910 xmlprintf("</dd>\n");
911 xmlprintf("<dd class=\"doc_navi\">\n");
912 xmlprintf("<span class=\"doc_uri\">%@</span>\n", turi);
913 if(g_candetail)
914 xmlprintf("- <a href=\"%@?phrase=%?&amp;detail=%d&amp;perpage=%d\" class=\"detail\">"
915 "[detail]</a>\n", g_scriptname, p_similar > 0 ? "" : p_phrase, id, p_perpage);
916 if(g_smlrvnum > 0)
917 xmlprintf("- <a href=\"%@?similar=%d&amp;perpage=%d\" class=\"similar\">[similar]</a>\n",
918 g_scriptname, id, p_perpage);
919 xmlprintf("</dd>\n");
920 xmlprintf("</dl>\n");
921 free(turi);
922 }
923
924
925 /* make a URI to be shown */
926 static char *makeshownuri(const char *uri){
927 const char *prefix;
928 char *turi, *file, *bef, *aft, *pv, *nuri, *wp;
929 int i;
930 if(cbstrfwimatch(uri, g_lprefix)) uri += strlen(g_lprefix);
931 prefix = g_gprefix;
932 if(cbstrfwimatch(uri, "file://") || cbstrfwimatch(uri, "ftp://") ||
933 cbstrfwimatch(uri, "http://") || cbstrfwimatch(uri, "https://")) prefix = "";
934 turi = cbsprintf("%s%s%s", prefix, uri, g_gsuffix);
935 if(g_dirindex[0] != '\0' && (file = strrchr(turi, '/')) != NULL &&
936 !cbstricmp(file + 1, g_dirindex)){
937 file[1] = '\0';
938 }
939 for(i = 0; i < cblistnum(g_replexprs); i++){
940 bef = cbmemdup(cblistval(g_replexprs, i, NULL), -1);
941 if((pv = strstr(bef, "{{!}}")) != NULL){
942 *pv = '\0';
943 aft = pv + 5;
944 } else {
945 aft = "";
946 }
947 if((pv = strstr(turi, bef)) != NULL){
948 nuri = cbmalloc(strlen(turi) + strlen(aft) + 1);
949 wp = nuri;
950 memcpy(wp, turi, pv - turi);
951 wp += pv - turi;
952 wp += sprintf(wp, "%s", aft);
953 sprintf(wp, "%s", pv + strlen(bef));
954 free(turi);
955 turi = nuri;
956 }
957 free(bef);
958 }
959 return turi;
960 }
961
962
963 /* show the top */
964 static void showinfo(void){
965 xmlprintf("<div id=\"estinfo\" class=\"estinfo\">");
966 xmlprintf("Powered by Hyper Estraier %@, with %d documents and %d words.",
967 est_version, est_db_doc_num(g_db), est_db_word_num(g_db));
968 xmlprintf("</div>\n");
969 }
970
971
972 /* output the log message */
973 static void outputlog(void){
974 FILE *ofp;
975 const char *val;
976 if(g_logfile[0] == '\0' || !(ofp = fopen(g_logfile, "ab"))) return;
977 if(!(val = getenv("REMOTE_ADDR"))) val = "0.0.0.0";
978 fprintf(ofp, "%s:", val);
979 if(!(val = getenv("REMOTE_PORT"))) val = "0";
980 fprintf(ofp, "%s\t", val);
981 fprintf(ofp, "%s\t", p_phrase);
982 if(!(val = getenv("HTTP_USER_AGENT"))) val = "*";
983 fprintf(ofp, "%s\n", val);
984 fclose(ofp);
985 }
986
987
988
989 /* END OF FILE */

  ViewVC Help
Powered by ViewVC 1.1.26