/[pgswish]/trunk/pgswish.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/pgswish.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 22 - (show annotations)
Sun May 29 22:41:20 2005 UTC (18 years, 10 months ago) by dpavlin
File MIME type: text/plain
File size: 16026 byte(s)
Added pgswish_arr function using SFRM_Materialize mode. Integer properties
doesn't work yet.

1 /*
2 * integrate swish-e into PostgreSQL
3 *
4 * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-02-18
5 *
6 * TODO:
7 * - check null input using PG_ARGISNULL before using PG_GETARG_xxxx
8 * - support composite type arguments
9 * - split error_or_abort
10 * - use getResultPropValue not SwishResultPropertyStr
11 * - fix everything about pgswish_arr which is broken
12 *
13 * NOTES:
14 * - clear structures with memset to support hash indexes (who whould like
15 * to create hash index on table returned from function?)
16 * - number of returned rows is set by PostgreSQL evaluator, see:
17 * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
18 *
19 * Based on:
20 * - C example from PostgreSQL documentation (BSD licence)
21 * - swish-e example src/libtest.c (GPL)
22 * - _textin/_textout from pgcurl.c (LGPL)
23 *
24 * This code is licenced under GPL
25 */
26
27 #include "postgres.h"
28 #include "fmgr.h"
29 #include "funcapi.h"
30 #include "utils/builtins.h"
31 #include "utils/array.h"
32 #include "miscadmin.h"
33 #include <swish-e.h>
34
35 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
36 #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
37 #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
38 #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
39
40 /* Globals */
41 static SW_HANDLE swish_handle = NULL; /* Database handle */
42 static SW_SEARCH search = NULL; /* search handle -- search parameters */
43 static SW_RESULTS swish_results = NULL; /* results handle -- list of results */
44 static SW_RESULT *sw_res = NULL; /* one row from swish-e results */
45
46 /* define PostgreSQL v1 function */
47 PG_FUNCTION_INFO_V1(pgswish);
48 Datum pgswish(PG_FUNCTION_ARGS) {
49
50 FuncCallContext *funcctx;
51 int call_cntr;
52 int max_calls;
53 TupleDesc tupdesc;
54 TupleTableSlot *slot;
55 AttInMetadata *attinmeta;
56 char *index_path;
57 char *query;
58 FILE *logfh;
59
60 /* stuff done only on the first call of the function */
61 if (SRF_IS_FIRSTCALL()) {
62 MemoryContext oldcontext;
63
64 /* take arguments from function */
65 //index_path = _textout(PG_GETARG_TEXT_P(0));
66 index_path = _textout(PG_GETARG_TEXT_P(0));
67 query = _textout(PG_GETARG_TEXT_P(1));
68
69 /* create a function context for cross-call persistence */
70 funcctx = SRF_FIRSTCALL_INIT();
71
72 /* switch to memory context appropriate for multiple function calls */
73 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
74
75
76 /* Send any errors or warnings to log, as well as
77 * STDOUT and STDERR (just to be sure) */
78 if ( logfh = fopen("/tmp/pgswish.log", "a") ) {
79 set_error_handle( logfh );
80 elog(INFO, "loggin swish-e errors to /tmp/pgswish.log");
81 /* redirect STDOUT and STDERR to log */
82 dup2(1, logfh);
83 dup2(2, logfh);
84 } else {
85 elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!");
86 }
87
88 elog(INFO, "pgswish: SwishInit(%s)", index_path);
89
90 swish_handle = SwishInit( index_path );
91
92 if ( SwishError( swish_handle ) )
93 elog(INFO, "pgswish: SwishInit(%s) failed: %s", index_path, SwishErrorString( swish_handle ));
94
95 elog(INFO, "handle: %08x", swish_handle);
96
97 if (! swish_handle) {
98 elog(ERROR, "pgswish: can't open %s", index_path);
99 SRF_RETURN_DONE(funcctx);
100 }
101
102 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
103 /* set ranking scheme. default is 0 */
104 SwishRankScheme( swish_handle, 0 );
105 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
106
107 elog(INFO, "pgswish: SwishQuery(%s)", query);
108 /* Here's a short-cut to searching that creates a search object and searches at the same time */
109 swish_results = SwishQuery( swish_handle, query);
110 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
111
112 /* total number of tuples to be returned */
113 funcctx->max_calls = SwishHits( swish_results );
114
115 /* check if results exists */
116 if ( 0 == funcctx->max_calls )
117 elog(INFO, "no results for: %s", query );
118
119 elog(INFO, "pgswish: SwishHits = %d", funcctx->max_calls);
120
121 /* Build a tuple description for a __pgswish tuple */
122 tupdesc = RelationNameGetTupleDesc("__pgswish");
123
124 /* allocate a slot for a tuple with this tupdesc */
125 slot = TupleDescGetSlot(tupdesc);
126
127 /* assign slot to function context */
128 funcctx->slot = slot;
129
130 /*
131 * generate attribute metadata needed later to produce tuples from raw
132 * C strings
133 */
134 attinmeta = TupleDescGetAttInMetadata(tupdesc);
135 funcctx->attinmeta = attinmeta;
136
137 MemoryContextSwitchTo(oldcontext);
138
139 elog(INFO, "SRF_IS_FIRSTCALL done");
140 }
141
142 /* stuff done on every call of the function */
143 funcctx = SRF_PERCALL_SETUP();
144
145 call_cntr = funcctx->call_cntr;
146 max_calls = funcctx->max_calls;
147 slot = funcctx->slot;
148 attinmeta = funcctx->attinmeta;
149
150 if (call_cntr < max_calls) {
151 char **values;
152 HeapTuple tuple;
153 Datum result;
154
155 elog(INFO, "pgswish: loop count %d", call_cntr);
156
157 if (! swish_results) {
158 elog(ERROR, "pgswish: no swish-e results");
159 SRF_RETURN_DONE(funcctx);
160 }
161
162 elog(DEBUG1, "pgswish: check for swish-e error");
163 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
164
165 /*
166 * Prepare a values array for storage in our slot.
167 * This should be an array of C strings which will
168 * be processed later by the type input functions.
169 */
170
171 sw_res = SwishNextResult( swish_results );
172 if (! sw_res) {
173 elog(ERROR, "pgswish: swish-e sort result list: %d rows expected %d", call_cntr, max_calls - 1);
174 Free_Results_Object( swish_results );
175 Free_Search_Object( search );
176 SRF_RETURN_DONE(funcctx);
177 }
178
179 elog(INFO, "Path: %s\n Rank: %lu\n Size: %lu\n Title: %s\n Index: %s\n Modified: %s\n Record #: %lu\n File #: %lu\n\n",
180 SwishResultPropertyStr ( sw_res, "swishdocpath" ),
181 SwishResultPropertyULong ( sw_res, "swishrank" ),
182 SwishResultPropertyULong ( sw_res, "swishdocsize" ),
183 SwishResultPropertyStr ( sw_res, "swishtitle"),
184 SwishResultPropertyStr ( sw_res, "swishdbfile" ),
185 SwishResultPropertyStr ( sw_res, "swishlastmodified" ),
186 SwishResultPropertyULong ( sw_res, "swishreccount" ), /* can figure this out in loop, of course */
187 SwishResultPropertyULong ( sw_res, "swishfilenum" )
188 );
189
190 values = (char **) palloc(4 * sizeof(char *));
191
192 values[0] = prop2int( sw_res, "swishrank" );
193 values[1] = prop2text( sw_res, "swishdocpath" );
194 values[2] = prop2text( sw_res, "swishtitle" );
195 values[3] = prop2int( sw_res, "swishdocsize" );
196
197 /*
198 values[0] = (char *) palloc(16 * sizeof(char));
199 snprintf(values[0], 16, "%d", 1);
200 values[1] = (char *) palloc(16 * sizeof(char));
201 snprintf(values[1], 16, "%d", 2);
202 values[2] = (char *) palloc(16 * sizeof(char));
203 snprintf(values[2], 16, "%d", 3);
204 values[3] = (char *) palloc(16 * sizeof(char));
205 snprintf(values[3], 16, "%d", 4);
206 */
207
208 /* build a tuple */
209 tuple = BuildTupleFromCStrings(attinmeta, values);
210
211 /* make the tuple into a datum */
212 result = TupleGetDatum(slot, tuple);
213
214 /* clean up ? */
215 pfree(values[0]);
216 pfree(values[1]);
217 pfree(values[2]);
218 pfree(values[3]);
219 pfree(values);
220
221 elog(DEBUG1, "row: %s|%s|%s|%s",values[0],values[1],values[2],values[3]);
222
223 SRF_RETURN_NEXT(funcctx, result);
224 } else {
225 elog(INFO, "loop over");
226
227 /* free swish object and close */
228 Free_Search_Object( search );
229 SwishClose( swish_handle );
230
231 /* do when there is no more left */
232 SRF_RETURN_DONE(funcctx);
233 }
234 }
235
236
237 /*
238 * new function with support for property selection
239 */
240
241 PG_FUNCTION_INFO_V1(pgswish_arr);
242 Datum pgswish_arr(PG_FUNCTION_ARGS)
243 {
244 ArrayType *prop_arr = PG_GETARG_ARRAYTYPE_P(5);
245 Oid prop_element_type = ARR_ELEMTYPE(prop_arr);
246 int prop_ndims = ARR_NDIM(prop_arr);
247 int *prop_dim_counts = ARR_DIMS(prop_arr);
248 int *prop_dim_lower_bounds = ARR_LBOUND(prop_arr);
249 int ncols = 0;
250 int nrows = 0;
251 int indx[MAXDIM];
252 int16 prop_len;
253 bool prop_byval;
254 char prop_align;
255 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
256 AttInMetadata *attinmeta;
257 TupleDesc tupdesc;
258 Tuplestorestate *tupstore = NULL;
259 HeapTuple tuple;
260 MemoryContext per_query_ctx;
261 MemoryContext oldcontext;
262 Datum dvalue;
263 char **values;
264 int rsinfo_ncols;
265 int i, j;
266 /* swish-e */
267 FILE *logfh;
268 int resnum;
269 int limit = 0;
270 int offset = 0;
271
272 char *index_path;
273 char *query;
274 char *attr;
275
276
277 /* only allow 1D input array */
278 if (prop_ndims == 1)
279 {
280 ncols = prop_dim_counts[0];
281 }
282 else
283 ereport(ERROR,
284 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
285 errmsg("invalid input array"),
286 errdetail("Input array must have 1 dimension")));
287
288 /* check to see if caller supports us returning a tuplestore */
289 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
290 ereport(ERROR,
291 (errcode(ERRCODE_SYNTAX_ERROR),
292 errmsg("materialize mode required, but it is not " \
293 "allowed in this context")));
294
295 /* get info about element type needed to construct the array */
296 get_typlenbyvalalign(prop_element_type, &prop_len, &prop_byval, &prop_align);
297
298 /* get the requested return tuple description */
299 tupdesc = rsinfo->expectedDesc;
300 rsinfo_ncols = tupdesc->natts;
301
302 /*
303 * The requested tuple description better match up with the array
304 * we were given.
305 */
306 if (rsinfo_ncols != ncols)
307 ereport(ERROR,
308 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
309 errmsg("invalid input array"),
310 errdetail("Number of elements in array must match number of query specified columns.")));
311
312 /* OK, use it */
313 attinmeta = TupleDescGetAttInMetadata(tupdesc);
314
315 /* Now go to work */
316 rsinfo->returnMode = SFRM_Materialize;
317
318 per_query_ctx = fcinfo->flinfo->fn_mcxt;
319 oldcontext = MemoryContextSwitchTo(per_query_ctx);
320
321 /* initialize our tuplestore */
322 tupstore = tuplestore_begin_heap(true, false, SortMem);
323
324
325 /* take rest of arguments from function */
326
327 /* index path */
328 if (PG_ARGISNULL(0)) {
329 ereport(ERROR,
330 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
331 errmsg("index path can't be null"),
332 errdetail("Index path must be valid full path to swish-e index")));
333 }
334 index_path = _textout(PG_GETARG_TEXT_P(0));
335
336 /* query string */
337 if (PG_ARGISNULL(0)) {
338 query = "";
339 } else {
340 query = _textout(PG_GETARG_TEXT_P(1));
341 }
342
343 /* atribute filter */
344 if (PG_ARGISNULL(2)) {
345 attr = "";
346 } else {
347 attr = _textout(PG_GETARG_TEXT_P(2));
348 }
349
350 /* limit */
351 if (PG_ARGISNULL(3)) {
352 limit = 0;
353 } else {
354 limit = PG_GETARG_INT32(3);
355 }
356
357 /* offset */
358 if (PG_ARGISNULL(4)) {
359 offset = 0;
360 } else {
361 offset = PG_GETARG_INT32(4);
362 }
363
364
365 /* Send any errors or warnings to log, as well as
366 * STDOUT and STDERR (just to be sure) */
367 if ( logfh = fopen("/tmp/pgswish.log", "a") ) {
368 set_error_handle( logfh );
369 elog(INFO, "loggin swish-e errors to /tmp/pgswish.log");
370 /* redirect STDOUT and STDERR to log */
371 dup2(1, logfh);
372 dup2(2, logfh);
373 } else {
374 elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!");
375 }
376
377 elog(INFO, "pgswish: SwishInit(%s)", index_path);
378
379 swish_handle = SwishInit( index_path );
380
381 if ( SwishError( swish_handle ) || ! swish_handle )
382 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
383 errmsg("pgswish: SwishInit(%s) failed", index_path ),
384 errdetail( SwishErrorString( swish_handle ) )
385 ));
386
387 elog(DEBUG1, "pgswish: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
388
389
390 /* set ranking scheme. default is 0 */
391 SwishRankScheme( swish_handle, 0 );
392 error_or_abort( swish_handle );
393
394 elog(INFO, "pgswish: SwishQuery(%s)", query);
395 /* Here's a short-cut to searching that creates a search object
396 * and searches at the same time */
397
398 /* set the search phrase to the search condition object */
399 if (! PG_ARGISNULL(1) && strlen(query) > 0)
400 swish_results = SwishQuery( swish_handle, query);
401 error_or_abort( swish_handle );
402
403 /* total number of tuples to be returned */
404 resnum = SwishHits( swish_results );
405
406 /* FIXME */
407 if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
408 elog(DEBUG1,"ignored: %s", attr);
409 }
410
411 /* check if results exists */
412 if ( 0 == resnum ) {
413 elog(INFO, "pgswish: no results for: %s", query );
414 }
415
416 /* total number of tuples to be returned */
417 if (limit && limit < resnum) {
418 nrows = limit - offset;
419 } else {
420 nrows = resnum - offset;
421 }
422
423
424 elog(DEBUG1, "pgswish: found %d hits for %s", resnum, query);
425
426
427 values = (char **) palloc(ncols * sizeof(char *));
428
429 for (i = 0; i < nrows; i++)
430 {
431 SwishSeekResult( swish_results, i + offset );
432 sw_res = SwishNextResult( swish_results );
433
434 /* get result from swish-e */
435 if (! ( SwishErrorString( swish_handle ) ) ) {
436 elog(INFO, "can't find result %d", i + offset);
437 } else {
438 elog(INFO, "Path: %s\n Rank: %lu\n Size: %lu\n Title: %s\n Index: %s\n Modified: %s\n Record #: %lu\n File #: %lu\n\n",
439 SwishResultPropertyStr ( sw_res, "swishdocpath" ),
440 SwishResultPropertyULong ( sw_res, "swishrank" ),
441 SwishResultPropertyULong ( sw_res, "swishdocsize" ),
442 SwishResultPropertyStr ( sw_res, "swishtitle"),
443 SwishResultPropertyStr ( sw_res, "swishdbfile" ),
444 SwishResultPropertyStr ( sw_res, "swishlastmodified" ),
445 SwishResultPropertyULong ( sw_res, "swishreccount" ), /* can figure this out in loop, of course */
446 SwishResultPropertyULong ( sw_res, "swishfilenum" )
447 );
448 }
449
450 /* iterate over results */
451 for (j = 0; j < ncols; j++)
452 {
453 bool isnull;
454
455 /* array value of this position */
456 indx[0] = j + prop_dim_lower_bounds[0];
457
458 dvalue = array_ref(prop_arr, prop_ndims, indx, -1, prop_len, prop_byval, prop_align, &isnull);
459
460 if (!isnull && sw_res)
461 values[j] = DatumGetCString(
462 prop2text( sw_res,
463 (char *)DirectFunctionCall1(textout, dvalue)
464 ));
465 else
466 values[j] = NULL;
467 }
468 /* construct the tuple */
469 tuple = BuildTupleFromCStrings(attinmeta, values);
470
471 /* now store it */
472 tuplestore_puttuple(tupstore, tuple);
473
474 }
475
476 tuplestore_donestoring(tupstore);
477 rsinfo->setResult = tupstore;
478
479 /*
480 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
481 * tuples are in our tuplestore and passed back through
482 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
483 * that we actually used to build our tuples with, so the caller can
484 * verify we did what it was expecting.
485 */
486 rsinfo->setDesc = tupdesc;
487 MemoryContextSwitchTo(oldcontext);
488
489 /* free swish object and close */
490 Free_Search_Object( search );
491 SwishClose( swish_handle );
492
493 return (Datum) 0;
494 }
495
496
497
498
499 /* make text var from property */
500 char *prop2text(SW_RESULT sw_res, char *propname) {
501 char *val;
502 char *prop;
503 int len;
504
505 elog(DEBUG2, "prop2text(%s)", propname);
506
507 prop = SwishResultPropertyStr( sw_res, propname );
508 if (error_or_abort( swish_handle )) return NULL;
509
510 len = strlen(prop);
511 elog(DEBUG1, "prop2text(%s) = '%s' %d bytes", propname, prop, len);
512
513 len++;
514 len *= sizeof(char);
515
516 elog(DEBUG2, "palloc(%d)", len);
517
518 val = palloc(len);
519
520 memset(val, 0, len);
521 strncpy(val, prop, len);
522
523 elog(DEBUG2, "val=%s", val);
524
525 return val;
526 }
527
528 /* make integer variable from property */
529 char *prop2int(SW_RESULT sw_res, char *propname) {
530 char *val;
531 unsigned long prop;
532 int len;
533
534 elog(DEBUG2, "prop2int(%s)", propname);
535
536 prop = SwishResultPropertyULong( sw_res, propname );
537 if (error_or_abort( swish_handle )) return NULL;
538
539 elog(DEBUG1, "prop2int(%s) = %lu", propname, prop);
540
541 len = 128 * sizeof(char);
542 elog(DEBUG2, "palloc(%d)", len);
543
544 val = palloc(len);
545 memset(val, 0, len);
546
547 snprintf(val, len, "%lu", prop);
548
549 elog(DEBUG2, "val=%s", val);
550
551 return val;
552 }
553
554
555 /*
556 * check if swish has returned error, and elog it.
557 */
558 static int error_or_abort( SW_HANDLE swish_handle ) {
559 if ( !SwishError( swish_handle ) )
560 return 0;
561
562 /* print a message */
563 elog(ERROR,
564 "pgswish error: Number [%d], Type [%s], Optional Message: [%s]\n",
565 SwishError( swish_handle ),
566 SwishErrorString( swish_handle ),
567 SwishLastErrorMsg( swish_handle )
568 );
569 if ( swish_results ) Free_Results_Object( swish_results );
570 if ( search ) Free_Search_Object( search );
571 SwishClose( swish_handle );
572
573 return 1;
574 }
575

  ViewVC Help
Powered by ViewVC 1.1.26