/[pgswish]/trunk/pgswish.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/pgswish.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 23 - (hide annotations)
Sun May 29 23:00:19 2005 UTC (18 years, 11 months ago) by dpavlin
File MIME type: text/plain
File size: 15946 byte(s)
make it less chatty at INFO logging level

1 dpavlin 8 /*
2     * integrate swish-e into PostgreSQL
3     *
4     * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-02-18
5     *
6     * TODO:
7     * - check null input using PG_ARGISNULL before using PG_GETARG_xxxx
8     * - support composite type arguments
9 dpavlin 19 * - split error_or_abort
10     * - use getResultPropValue not SwishResultPropertyStr
11 dpavlin 22 * - fix everything about pgswish_arr which is broken
12 dpavlin 8 *
13     * NOTES:
14     * - clear structures with memset to support hash indexes (who whould like
15     * to create hash index on table returned from function?)
16 dpavlin 10 * - number of returned rows is set by PostgreSQL evaluator, see:
17     * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
18 dpavlin 8 *
19 dpavlin 9 * Based on:
20     * - C example from PostgreSQL documentation (BSD licence)
21     * - swish-e example src/libtest.c (GPL)
22     * - _textin/_textout from pgcurl.c (LGPL)
23     *
24     * This code is licenced under GPL
25 dpavlin 8 */
26    
27     #include "postgres.h"
28     #include "fmgr.h"
29     #include "funcapi.h"
30 dpavlin 9 #include "utils/builtins.h"
31 dpavlin 16 #include "utils/array.h"
32     #include "miscadmin.h"
33 dpavlin 8 #include <swish-e.h>
34    
35 dpavlin 9 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
36     #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
37 dpavlin 11 #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
38     #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
39 dpavlin 8
40 dpavlin 20 /* Globals */
41     static SW_HANDLE swish_handle = NULL; /* Database handle */
42     static SW_SEARCH search = NULL; /* search handle -- search parameters */
43     static SW_RESULTS swish_results = NULL; /* results handle -- list of results */
44     static SW_RESULT *sw_res = NULL; /* one row from swish-e results */
45 dpavlin 8
46     /* define PostgreSQL v1 function */
47     PG_FUNCTION_INFO_V1(pgswish);
48     Datum pgswish(PG_FUNCTION_ARGS) {
49    
50     FuncCallContext *funcctx;
51     int call_cntr;
52     int max_calls;
53     TupleDesc tupdesc;
54     TupleTableSlot *slot;
55     AttInMetadata *attinmeta;
56 dpavlin 9 char *index_path;
57     char *query;
58 dpavlin 20 FILE *logfh;
59 dpavlin 8
60     /* stuff done only on the first call of the function */
61     if (SRF_IS_FIRSTCALL()) {
62     MemoryContext oldcontext;
63    
64 dpavlin 9 /* take arguments from function */
65     //index_path = _textout(PG_GETARG_TEXT_P(0));
66     index_path = _textout(PG_GETARG_TEXT_P(0));
67     query = _textout(PG_GETARG_TEXT_P(1));
68    
69 dpavlin 8 /* create a function context for cross-call persistence */
70     funcctx = SRF_FIRSTCALL_INIT();
71    
72     /* switch to memory context appropriate for multiple function calls */
73     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
74    
75 dpavlin 9
76 dpavlin 20 /* Send any errors or warnings to log, as well as
77     * STDOUT and STDERR (just to be sure) */
78     if ( logfh = fopen("/tmp/pgswish.log", "a") ) {
79     set_error_handle( logfh );
80 dpavlin 23 elog(DEBUG1, "loggin swish-e errors to /tmp/pgswish.log");
81 dpavlin 20 /* redirect STDOUT and STDERR to log */
82     dup2(1, logfh);
83     dup2(2, logfh);
84     } else {
85     elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!");
86     }
87 dpavlin 8
88 dpavlin 23 elog(DEBUG1, "pgswish: SwishInit(%s)", index_path);
89 dpavlin 20
90 dpavlin 9 swish_handle = SwishInit( index_path );
91 dpavlin 8
92 dpavlin 23 if ( SwishError( swish_handle ) || ! swish_handle) {
93     elog(ERROR, "pgswish: SwishInit(%s) failed: %s", index_path, SwishErrorString( swish_handle ));
94 dpavlin 20
95 dpavlin 8 SRF_RETURN_DONE(funcctx);
96     }
97    
98 dpavlin 19 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
99 dpavlin 8 /* set ranking scheme. default is 0 */
100 dpavlin 10 SwishRankScheme( swish_handle, 0 );
101 dpavlin 19 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
102 dpavlin 8
103 dpavlin 23 elog(DEBUG1, "pgswish: SwishQuery(%s)", query);
104 dpavlin 8 /* Here's a short-cut to searching that creates a search object and searches at the same time */
105 dpavlin 11 swish_results = SwishQuery( swish_handle, query);
106 dpavlin 19 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
107 dpavlin 8
108     /* total number of tuples to be returned */
109 dpavlin 11 funcctx->max_calls = SwishHits( swish_results );
110 dpavlin 8
111     /* check if results exists */
112     if ( 0 == funcctx->max_calls )
113 dpavlin 9 elog(INFO, "no results for: %s", query );
114 dpavlin 8
115 dpavlin 23 elog(DEBUG1, "pgswish: SwishHits = %d", funcctx->max_calls);
116 dpavlin 9
117 dpavlin 8 /* Build a tuple description for a __pgswish tuple */
118     tupdesc = RelationNameGetTupleDesc("__pgswish");
119    
120     /* allocate a slot for a tuple with this tupdesc */
121     slot = TupleDescGetSlot(tupdesc);
122    
123     /* assign slot to function context */
124     funcctx->slot = slot;
125    
126     /*
127     * generate attribute metadata needed later to produce tuples from raw
128     * C strings
129     */
130     attinmeta = TupleDescGetAttInMetadata(tupdesc);
131     funcctx->attinmeta = attinmeta;
132    
133     MemoryContextSwitchTo(oldcontext);
134 dpavlin 13
135 dpavlin 23 elog(DEBUG1, "SRF_IS_FIRSTCALL done");
136 dpavlin 8 }
137    
138     /* stuff done on every call of the function */
139     funcctx = SRF_PERCALL_SETUP();
140    
141     call_cntr = funcctx->call_cntr;
142     max_calls = funcctx->max_calls;
143     slot = funcctx->slot;
144     attinmeta = funcctx->attinmeta;
145    
146     if (call_cntr < max_calls) {
147     char **values;
148     HeapTuple tuple;
149     Datum result;
150    
151 dpavlin 23 elog(DEBUG1, "pgswish: loop count %d", call_cntr);
152 dpavlin 11
153     if (! swish_results) {
154     elog(ERROR, "pgswish: no swish-e results");
155     SRF_RETURN_DONE(funcctx);
156     }
157    
158 dpavlin 14 elog(DEBUG1, "pgswish: check for swish-e error");
159 dpavlin 19 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
160 dpavlin 11
161 dpavlin 8 /*
162     * Prepare a values array for storage in our slot.
163     * This should be an array of C strings which will
164     * be processed later by the type input functions.
165     */
166    
167 dpavlin 11 sw_res = SwishNextResult( swish_results );
168     if (! sw_res) {
169     elog(ERROR, "pgswish: swish-e sort result list: %d rows expected %d", call_cntr, max_calls - 1);
170 dpavlin 19 Free_Results_Object( swish_results );
171     Free_Search_Object( search );
172 dpavlin 11 SRF_RETURN_DONE(funcctx);
173     }
174    
175 dpavlin 23 elog(DEBUG1, "Path: %s\n Rank: %lu\n Size: %lu\n Title: %s\n Index: %s\n Modified: %s\n Record #: %lu\n File #: %lu\n\n",
176 dpavlin 12 SwishResultPropertyStr ( sw_res, "swishdocpath" ),
177     SwishResultPropertyULong ( sw_res, "swishrank" ),
178     SwishResultPropertyULong ( sw_res, "swishdocsize" ),
179     SwishResultPropertyStr ( sw_res, "swishtitle"),
180     SwishResultPropertyStr ( sw_res, "swishdbfile" ),
181     SwishResultPropertyStr ( sw_res, "swishlastmodified" ),
182     SwishResultPropertyULong ( sw_res, "swishreccount" ), /* can figure this out in loop, of course */
183     SwishResultPropertyULong ( sw_res, "swishfilenum" )
184     );
185 dpavlin 11
186 dpavlin 13 values = (char **) palloc(4 * sizeof(char *));
187    
188 dpavlin 12 values[0] = prop2int( sw_res, "swishrank" );
189     values[1] = prop2text( sw_res, "swishdocpath" );
190     values[2] = prop2text( sw_res, "swishtitle" );
191     values[3] = prop2int( sw_res, "swishdocsize" );
192 dpavlin 13
193     /*
194     values[0] = (char *) palloc(16 * sizeof(char));
195     snprintf(values[0], 16, "%d", 1);
196     values[1] = (char *) palloc(16 * sizeof(char));
197     snprintf(values[1], 16, "%d", 2);
198     values[2] = (char *) palloc(16 * sizeof(char));
199     snprintf(values[2], 16, "%d", 3);
200     values[3] = (char *) palloc(16 * sizeof(char));
201     snprintf(values[3], 16, "%d", 4);
202     */
203 dpavlin 16
204 dpavlin 8 /* build a tuple */
205     tuple = BuildTupleFromCStrings(attinmeta, values);
206    
207     /* make the tuple into a datum */
208     result = TupleGetDatum(slot, tuple);
209    
210 dpavlin 12 /* clean up ? */
211 dpavlin 13 pfree(values[0]);
212     pfree(values[1]);
213     pfree(values[2]);
214     pfree(values[3]);
215     pfree(values);
216    
217 dpavlin 17 elog(DEBUG1, "row: %s|%s|%s|%s",values[0],values[1],values[2],values[3]);
218 dpavlin 12
219 dpavlin 8 SRF_RETURN_NEXT(funcctx, result);
220     } else {
221 dpavlin 23 elog(DEBUG1, "loop over");
222 dpavlin 13
223 dpavlin 8 /* free swish object and close */
224     Free_Search_Object( search );
225     SwishClose( swish_handle );
226    
227     /* do when there is no more left */
228     SRF_RETURN_DONE(funcctx);
229     }
230     }
231    
232    
233 dpavlin 22 /*
234     * new function with support for property selection
235     */
236    
237     PG_FUNCTION_INFO_V1(pgswish_arr);
238     Datum pgswish_arr(PG_FUNCTION_ARGS)
239     {
240     ArrayType *prop_arr = PG_GETARG_ARRAYTYPE_P(5);
241     Oid prop_element_type = ARR_ELEMTYPE(prop_arr);
242     int prop_ndims = ARR_NDIM(prop_arr);
243     int *prop_dim_counts = ARR_DIMS(prop_arr);
244     int *prop_dim_lower_bounds = ARR_LBOUND(prop_arr);
245     int ncols = 0;
246     int nrows = 0;
247     int indx[MAXDIM];
248     int16 prop_len;
249     bool prop_byval;
250     char prop_align;
251     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
252     AttInMetadata *attinmeta;
253     TupleDesc tupdesc;
254     Tuplestorestate *tupstore = NULL;
255     HeapTuple tuple;
256     MemoryContext per_query_ctx;
257     MemoryContext oldcontext;
258     Datum dvalue;
259     char **values;
260     int rsinfo_ncols;
261     int i, j;
262     /* swish-e */
263     FILE *logfh;
264     int resnum;
265     int limit = 0;
266     int offset = 0;
267    
268     char *index_path;
269     char *query;
270     char *attr;
271    
272    
273     /* only allow 1D input array */
274     if (prop_ndims == 1)
275     {
276     ncols = prop_dim_counts[0];
277     }
278     else
279     ereport(ERROR,
280     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
281     errmsg("invalid input array"),
282     errdetail("Input array must have 1 dimension")));
283    
284     /* check to see if caller supports us returning a tuplestore */
285     if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
286     ereport(ERROR,
287     (errcode(ERRCODE_SYNTAX_ERROR),
288     errmsg("materialize mode required, but it is not " \
289     "allowed in this context")));
290    
291     /* get info about element type needed to construct the array */
292     get_typlenbyvalalign(prop_element_type, &prop_len, &prop_byval, &prop_align);
293    
294     /* get the requested return tuple description */
295     tupdesc = rsinfo->expectedDesc;
296     rsinfo_ncols = tupdesc->natts;
297    
298     /*
299     * The requested tuple description better match up with the array
300     * we were given.
301     */
302     if (rsinfo_ncols != ncols)
303     ereport(ERROR,
304     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
305     errmsg("invalid input array"),
306     errdetail("Number of elements in array must match number of query specified columns.")));
307    
308     /* OK, use it */
309     attinmeta = TupleDescGetAttInMetadata(tupdesc);
310    
311     /* Now go to work */
312     rsinfo->returnMode = SFRM_Materialize;
313    
314     per_query_ctx = fcinfo->flinfo->fn_mcxt;
315     oldcontext = MemoryContextSwitchTo(per_query_ctx);
316    
317     /* initialize our tuplestore */
318     tupstore = tuplestore_begin_heap(true, false, SortMem);
319    
320    
321     /* take rest of arguments from function */
322    
323     /* index path */
324     if (PG_ARGISNULL(0)) {
325     ereport(ERROR,
326     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
327     errmsg("index path can't be null"),
328     errdetail("Index path must be valid full path to swish-e index")));
329     }
330     index_path = _textout(PG_GETARG_TEXT_P(0));
331    
332     /* query string */
333     if (PG_ARGISNULL(0)) {
334     query = "";
335     } else {
336     query = _textout(PG_GETARG_TEXT_P(1));
337     }
338    
339     /* atribute filter */
340     if (PG_ARGISNULL(2)) {
341     attr = "";
342     } else {
343     attr = _textout(PG_GETARG_TEXT_P(2));
344     }
345    
346     /* limit */
347     if (PG_ARGISNULL(3)) {
348     limit = 0;
349     } else {
350     limit = PG_GETARG_INT32(3);
351     }
352    
353     /* offset */
354     if (PG_ARGISNULL(4)) {
355     offset = 0;
356     } else {
357     offset = PG_GETARG_INT32(4);
358     }
359    
360    
361     /* Send any errors or warnings to log, as well as
362     * STDOUT and STDERR (just to be sure) */
363     if ( logfh = fopen("/tmp/pgswish.log", "a") ) {
364     set_error_handle( logfh );
365 dpavlin 23 elog(DEBUG1, "loggin swish-e errors to /tmp/pgswish.log");
366 dpavlin 22 /* redirect STDOUT and STDERR to log */
367     dup2(1, logfh);
368     dup2(2, logfh);
369     } else {
370     elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!");
371     }
372    
373 dpavlin 23 elog(DEBUG1, "pgswish: SwishInit(%s)", index_path);
374 dpavlin 22
375     swish_handle = SwishInit( index_path );
376    
377     if ( SwishError( swish_handle ) || ! swish_handle )
378     ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
379     errmsg("pgswish: SwishInit(%s) failed", index_path ),
380     errdetail( SwishErrorString( swish_handle ) )
381     ));
382    
383     elog(DEBUG1, "pgswish: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
384    
385    
386     /* set ranking scheme. default is 0 */
387     SwishRankScheme( swish_handle, 0 );
388     error_or_abort( swish_handle );
389    
390 dpavlin 23 elog(DEBUG1, "pgswish: SwishQuery(%s)", query);
391 dpavlin 22 /* Here's a short-cut to searching that creates a search object
392     * and searches at the same time */
393    
394     /* set the search phrase to the search condition object */
395     if (! PG_ARGISNULL(1) && strlen(query) > 0)
396     swish_results = SwishQuery( swish_handle, query);
397     error_or_abort( swish_handle );
398    
399     /* total number of tuples to be returned */
400     resnum = SwishHits( swish_results );
401    
402     /* FIXME */
403     if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
404     elog(DEBUG1,"ignored: %s", attr);
405     }
406    
407     /* check if results exists */
408     if ( 0 == resnum ) {
409     elog(INFO, "pgswish: no results for: %s", query );
410     }
411    
412     /* total number of tuples to be returned */
413     if (limit && limit < resnum) {
414     nrows = limit - offset;
415     } else {
416     nrows = resnum - offset;
417     }
418    
419    
420     elog(DEBUG1, "pgswish: found %d hits for %s", resnum, query);
421    
422    
423     values = (char **) palloc(ncols * sizeof(char *));
424    
425     for (i = 0; i < nrows; i++)
426     {
427     SwishSeekResult( swish_results, i + offset );
428     sw_res = SwishNextResult( swish_results );
429    
430     /* get result from swish-e */
431     if (! ( SwishErrorString( swish_handle ) ) ) {
432     elog(INFO, "can't find result %d", i + offset);
433     } else {
434 dpavlin 23 elog(DEBUG1, "Path: %s\n Rank: %lu\n Size: %lu\n Title: %s\n Index: %s\n Modified: %s\n Record #: %lu\n File #: %lu\n\n",
435 dpavlin 22 SwishResultPropertyStr ( sw_res, "swishdocpath" ),
436     SwishResultPropertyULong ( sw_res, "swishrank" ),
437     SwishResultPropertyULong ( sw_res, "swishdocsize" ),
438     SwishResultPropertyStr ( sw_res, "swishtitle"),
439     SwishResultPropertyStr ( sw_res, "swishdbfile" ),
440     SwishResultPropertyStr ( sw_res, "swishlastmodified" ),
441     SwishResultPropertyULong ( sw_res, "swishreccount" ), /* can figure this out in loop, of course */
442     SwishResultPropertyULong ( sw_res, "swishfilenum" )
443     );
444     }
445    
446     /* iterate over results */
447     for (j = 0; j < ncols; j++)
448     {
449     bool isnull;
450    
451     /* array value of this position */
452     indx[0] = j + prop_dim_lower_bounds[0];
453    
454     dvalue = array_ref(prop_arr, prop_ndims, indx, -1, prop_len, prop_byval, prop_align, &isnull);
455    
456     if (!isnull && sw_res)
457     values[j] = DatumGetCString(
458     prop2text( sw_res,
459     (char *)DirectFunctionCall1(textout, dvalue)
460     ));
461     else
462     values[j] = NULL;
463     }
464     /* construct the tuple */
465     tuple = BuildTupleFromCStrings(attinmeta, values);
466    
467     /* now store it */
468     tuplestore_puttuple(tupstore, tuple);
469    
470     }
471    
472     tuplestore_donestoring(tupstore);
473     rsinfo->setResult = tupstore;
474    
475     /*
476     * SFRM_Materialize mode expects us to return a NULL Datum. The actual
477     * tuples are in our tuplestore and passed back through
478     * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
479     * that we actually used to build our tuples with, so the caller can
480     * verify we did what it was expecting.
481     */
482     rsinfo->setDesc = tupdesc;
483     MemoryContextSwitchTo(oldcontext);
484    
485     /* free swish object and close */
486     Free_Search_Object( search );
487     SwishClose( swish_handle );
488    
489     return (Datum) 0;
490     }
491    
492    
493    
494    
495 dpavlin 19 /* make text var from property */
496 dpavlin 12 char *prop2text(SW_RESULT sw_res, char *propname) {
497     char *val;
498     char *prop;
499     int len;
500    
501 dpavlin 17 elog(DEBUG2, "prop2text(%s)", propname);
502 dpavlin 12
503     prop = SwishResultPropertyStr( sw_res, propname );
504 dpavlin 19 if (error_or_abort( swish_handle )) return NULL;
505 dpavlin 12
506     len = strlen(prop);
507 dpavlin 17 elog(DEBUG1, "prop2text(%s) = '%s' %d bytes", propname, prop, len);
508 dpavlin 12
509     len++;
510     len *= sizeof(char);
511    
512 dpavlin 17 elog(DEBUG2, "palloc(%d)", len);
513 dpavlin 12
514     val = palloc(len);
515    
516     memset(val, 0, len);
517     strncpy(val, prop, len);
518    
519 dpavlin 17 elog(DEBUG2, "val=%s", val);
520 dpavlin 12
521     return val;
522     }
523    
524 dpavlin 14 /* make integer variable from property */
525 dpavlin 12 char *prop2int(SW_RESULT sw_res, char *propname) {
526     char *val;
527     unsigned long prop;
528     int len;
529    
530 dpavlin 17 elog(DEBUG2, "prop2int(%s)", propname);
531 dpavlin 12
532     prop = SwishResultPropertyULong( sw_res, propname );
533 dpavlin 19 if (error_or_abort( swish_handle )) return NULL;
534 dpavlin 12
535 dpavlin 17 elog(DEBUG1, "prop2int(%s) = %lu", propname, prop);
536 dpavlin 12
537     len = 128 * sizeof(char);
538 dpavlin 17 elog(DEBUG2, "palloc(%d)", len);
539 dpavlin 12
540     val = palloc(len);
541     memset(val, 0, len);
542    
543     snprintf(val, len, "%lu", prop);
544    
545 dpavlin 17 elog(DEBUG2, "val=%s", val);
546 dpavlin 12
547     return val;
548     }
549    
550    
551 dpavlin 14 /*
552     * check if swish has returned error, and elog it.
553     */
554 dpavlin 19 static int error_or_abort( SW_HANDLE swish_handle ) {
555 dpavlin 8 if ( !SwishError( swish_handle ) )
556 dpavlin 19 return 0;
557 dpavlin 8
558     /* print a message */
559     elog(ERROR,
560     "pgswish error: Number [%d], Type [%s], Optional Message: [%s]\n",
561     SwishError( swish_handle ),
562     SwishErrorString( swish_handle ),
563     SwishLastErrorMsg( swish_handle )
564     );
565 dpavlin 19 if ( swish_results ) Free_Results_Object( swish_results );
566 dpavlin 8 if ( search ) Free_Search_Object( search );
567     SwishClose( swish_handle );
568 dpavlin 19
569     return 1;
570 dpavlin 8 }
571    

  ViewVC Help
Powered by ViewVC 1.1.26