/[pgswish]/trunk/pgswish.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/pgswish.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 22 - (hide annotations)
Sun May 29 22:41:20 2005 UTC (18 years, 11 months ago) by dpavlin
File MIME type: text/plain
File size: 16026 byte(s)
Added pgswish_arr function using SFRM_Materialize mode. Integer properties
doesn't work yet.

1 dpavlin 8 /*
2     * integrate swish-e into PostgreSQL
3     *
4     * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-02-18
5     *
6     * TODO:
7     * - check null input using PG_ARGISNULL before using PG_GETARG_xxxx
8     * - support composite type arguments
9 dpavlin 19 * - split error_or_abort
10     * - use getResultPropValue not SwishResultPropertyStr
11 dpavlin 22 * - fix everything about pgswish_arr which is broken
12 dpavlin 8 *
13     * NOTES:
14     * - clear structures with memset to support hash indexes (who whould like
15     * to create hash index on table returned from function?)
16 dpavlin 10 * - number of returned rows is set by PostgreSQL evaluator, see:
17     * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
18 dpavlin 8 *
19 dpavlin 9 * Based on:
20     * - C example from PostgreSQL documentation (BSD licence)
21     * - swish-e example src/libtest.c (GPL)
22     * - _textin/_textout from pgcurl.c (LGPL)
23     *
24     * This code is licenced under GPL
25 dpavlin 8 */
26    
27     #include "postgres.h"
28     #include "fmgr.h"
29     #include "funcapi.h"
30 dpavlin 9 #include "utils/builtins.h"
31 dpavlin 16 #include "utils/array.h"
32     #include "miscadmin.h"
33 dpavlin 8 #include <swish-e.h>
34    
35 dpavlin 9 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
36     #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
37 dpavlin 11 #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
38     #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
39 dpavlin 8
40 dpavlin 20 /* Globals */
41     static SW_HANDLE swish_handle = NULL; /* Database handle */
42     static SW_SEARCH search = NULL; /* search handle -- search parameters */
43     static SW_RESULTS swish_results = NULL; /* results handle -- list of results */
44     static SW_RESULT *sw_res = NULL; /* one row from swish-e results */
45 dpavlin 8
46     /* define PostgreSQL v1 function */
47     PG_FUNCTION_INFO_V1(pgswish);
48     Datum pgswish(PG_FUNCTION_ARGS) {
49    
50     FuncCallContext *funcctx;
51     int call_cntr;
52     int max_calls;
53     TupleDesc tupdesc;
54     TupleTableSlot *slot;
55     AttInMetadata *attinmeta;
56 dpavlin 9 char *index_path;
57     char *query;
58 dpavlin 20 FILE *logfh;
59 dpavlin 8
60     /* stuff done only on the first call of the function */
61     if (SRF_IS_FIRSTCALL()) {
62     MemoryContext oldcontext;
63    
64 dpavlin 9 /* take arguments from function */
65     //index_path = _textout(PG_GETARG_TEXT_P(0));
66     index_path = _textout(PG_GETARG_TEXT_P(0));
67     query = _textout(PG_GETARG_TEXT_P(1));
68    
69 dpavlin 8 /* create a function context for cross-call persistence */
70     funcctx = SRF_FIRSTCALL_INIT();
71    
72     /* switch to memory context appropriate for multiple function calls */
73     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
74    
75 dpavlin 9
76 dpavlin 20 /* Send any errors or warnings to log, as well as
77     * STDOUT and STDERR (just to be sure) */
78     if ( logfh = fopen("/tmp/pgswish.log", "a") ) {
79     set_error_handle( logfh );
80     elog(INFO, "loggin swish-e errors to /tmp/pgswish.log");
81     /* redirect STDOUT and STDERR to log */
82     dup2(1, logfh);
83     dup2(2, logfh);
84     } else {
85     elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!");
86     }
87 dpavlin 8
88 dpavlin 9 elog(INFO, "pgswish: SwishInit(%s)", index_path);
89 dpavlin 20
90 dpavlin 9 swish_handle = SwishInit( index_path );
91 dpavlin 8
92 dpavlin 20 if ( SwishError( swish_handle ) )
93     elog(INFO, "pgswish: SwishInit(%s) failed: %s", index_path, SwishErrorString( swish_handle ));
94    
95     elog(INFO, "handle: %08x", swish_handle);
96    
97 dpavlin 8 if (! swish_handle) {
98 dpavlin 9 elog(ERROR, "pgswish: can't open %s", index_path);
99 dpavlin 8 SRF_RETURN_DONE(funcctx);
100     }
101    
102 dpavlin 19 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
103 dpavlin 8 /* set ranking scheme. default is 0 */
104 dpavlin 10 SwishRankScheme( swish_handle, 0 );
105 dpavlin 19 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
106 dpavlin 8
107 dpavlin 9 elog(INFO, "pgswish: SwishQuery(%s)", query);
108 dpavlin 8 /* Here's a short-cut to searching that creates a search object and searches at the same time */
109 dpavlin 11 swish_results = SwishQuery( swish_handle, query);
110 dpavlin 19 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
111 dpavlin 8
112     /* total number of tuples to be returned */
113 dpavlin 11 funcctx->max_calls = SwishHits( swish_results );
114 dpavlin 8
115     /* check if results exists */
116     if ( 0 == funcctx->max_calls )
117 dpavlin 9 elog(INFO, "no results for: %s", query );
118 dpavlin 8
119 dpavlin 9 elog(INFO, "pgswish: SwishHits = %d", funcctx->max_calls);
120    
121 dpavlin 8 /* Build a tuple description for a __pgswish tuple */
122     tupdesc = RelationNameGetTupleDesc("__pgswish");
123    
124     /* allocate a slot for a tuple with this tupdesc */
125     slot = TupleDescGetSlot(tupdesc);
126    
127     /* assign slot to function context */
128     funcctx->slot = slot;
129    
130     /*
131     * generate attribute metadata needed later to produce tuples from raw
132     * C strings
133     */
134     attinmeta = TupleDescGetAttInMetadata(tupdesc);
135     funcctx->attinmeta = attinmeta;
136    
137     MemoryContextSwitchTo(oldcontext);
138 dpavlin 13
139     elog(INFO, "SRF_IS_FIRSTCALL done");
140 dpavlin 8 }
141    
142     /* stuff done on every call of the function */
143     funcctx = SRF_PERCALL_SETUP();
144    
145     call_cntr = funcctx->call_cntr;
146     max_calls = funcctx->max_calls;
147     slot = funcctx->slot;
148     attinmeta = funcctx->attinmeta;
149    
150     if (call_cntr < max_calls) {
151     char **values;
152     HeapTuple tuple;
153     Datum result;
154    
155 dpavlin 13 elog(INFO, "pgswish: loop count %d", call_cntr);
156 dpavlin 11
157     if (! swish_results) {
158     elog(ERROR, "pgswish: no swish-e results");
159     SRF_RETURN_DONE(funcctx);
160     }
161    
162 dpavlin 14 elog(DEBUG1, "pgswish: check for swish-e error");
163 dpavlin 19 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
164 dpavlin 11
165 dpavlin 8 /*
166     * Prepare a values array for storage in our slot.
167     * This should be an array of C strings which will
168     * be processed later by the type input functions.
169     */
170    
171 dpavlin 11 sw_res = SwishNextResult( swish_results );
172     if (! sw_res) {
173     elog(ERROR, "pgswish: swish-e sort result list: %d rows expected %d", call_cntr, max_calls - 1);
174 dpavlin 19 Free_Results_Object( swish_results );
175     Free_Search_Object( search );
176 dpavlin 11 SRF_RETURN_DONE(funcctx);
177     }
178    
179 dpavlin 12 elog(INFO, "Path: %s\n Rank: %lu\n Size: %lu\n Title: %s\n Index: %s\n Modified: %s\n Record #: %lu\n File #: %lu\n\n",
180     SwishResultPropertyStr ( sw_res, "swishdocpath" ),
181     SwishResultPropertyULong ( sw_res, "swishrank" ),
182     SwishResultPropertyULong ( sw_res, "swishdocsize" ),
183     SwishResultPropertyStr ( sw_res, "swishtitle"),
184     SwishResultPropertyStr ( sw_res, "swishdbfile" ),
185     SwishResultPropertyStr ( sw_res, "swishlastmodified" ),
186     SwishResultPropertyULong ( sw_res, "swishreccount" ), /* can figure this out in loop, of course */
187     SwishResultPropertyULong ( sw_res, "swishfilenum" )
188     );
189 dpavlin 11
190 dpavlin 13 values = (char **) palloc(4 * sizeof(char *));
191    
192 dpavlin 12 values[0] = prop2int( sw_res, "swishrank" );
193     values[1] = prop2text( sw_res, "swishdocpath" );
194     values[2] = prop2text( sw_res, "swishtitle" );
195     values[3] = prop2int( sw_res, "swishdocsize" );
196 dpavlin 13
197     /*
198     values[0] = (char *) palloc(16 * sizeof(char));
199     snprintf(values[0], 16, "%d", 1);
200     values[1] = (char *) palloc(16 * sizeof(char));
201     snprintf(values[1], 16, "%d", 2);
202     values[2] = (char *) palloc(16 * sizeof(char));
203     snprintf(values[2], 16, "%d", 3);
204     values[3] = (char *) palloc(16 * sizeof(char));
205     snprintf(values[3], 16, "%d", 4);
206     */
207 dpavlin 16
208 dpavlin 8 /* build a tuple */
209     tuple = BuildTupleFromCStrings(attinmeta, values);
210    
211     /* make the tuple into a datum */
212     result = TupleGetDatum(slot, tuple);
213    
214 dpavlin 12 /* clean up ? */
215 dpavlin 13 pfree(values[0]);
216     pfree(values[1]);
217     pfree(values[2]);
218     pfree(values[3]);
219     pfree(values);
220    
221 dpavlin 17 elog(DEBUG1, "row: %s|%s|%s|%s",values[0],values[1],values[2],values[3]);
222 dpavlin 12
223 dpavlin 8 SRF_RETURN_NEXT(funcctx, result);
224     } else {
225 dpavlin 13 elog(INFO, "loop over");
226    
227 dpavlin 8 /* free swish object and close */
228     Free_Search_Object( search );
229     SwishClose( swish_handle );
230    
231     /* do when there is no more left */
232     SRF_RETURN_DONE(funcctx);
233     }
234     }
235    
236    
237 dpavlin 22 /*
238     * new function with support for property selection
239     */
240    
241     PG_FUNCTION_INFO_V1(pgswish_arr);
242     Datum pgswish_arr(PG_FUNCTION_ARGS)
243     {
244     ArrayType *prop_arr = PG_GETARG_ARRAYTYPE_P(5);
245     Oid prop_element_type = ARR_ELEMTYPE(prop_arr);
246     int prop_ndims = ARR_NDIM(prop_arr);
247     int *prop_dim_counts = ARR_DIMS(prop_arr);
248     int *prop_dim_lower_bounds = ARR_LBOUND(prop_arr);
249     int ncols = 0;
250     int nrows = 0;
251     int indx[MAXDIM];
252     int16 prop_len;
253     bool prop_byval;
254     char prop_align;
255     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
256     AttInMetadata *attinmeta;
257     TupleDesc tupdesc;
258     Tuplestorestate *tupstore = NULL;
259     HeapTuple tuple;
260     MemoryContext per_query_ctx;
261     MemoryContext oldcontext;
262     Datum dvalue;
263     char **values;
264     int rsinfo_ncols;
265     int i, j;
266     /* swish-e */
267     FILE *logfh;
268     int resnum;
269     int limit = 0;
270     int offset = 0;
271    
272     char *index_path;
273     char *query;
274     char *attr;
275    
276    
277     /* only allow 1D input array */
278     if (prop_ndims == 1)
279     {
280     ncols = prop_dim_counts[0];
281     }
282     else
283     ereport(ERROR,
284     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
285     errmsg("invalid input array"),
286     errdetail("Input array must have 1 dimension")));
287    
288     /* check to see if caller supports us returning a tuplestore */
289     if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
290     ereport(ERROR,
291     (errcode(ERRCODE_SYNTAX_ERROR),
292     errmsg("materialize mode required, but it is not " \
293     "allowed in this context")));
294    
295     /* get info about element type needed to construct the array */
296     get_typlenbyvalalign(prop_element_type, &prop_len, &prop_byval, &prop_align);
297    
298     /* get the requested return tuple description */
299     tupdesc = rsinfo->expectedDesc;
300     rsinfo_ncols = tupdesc->natts;
301    
302     /*
303     * The requested tuple description better match up with the array
304     * we were given.
305     */
306     if (rsinfo_ncols != ncols)
307     ereport(ERROR,
308     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
309     errmsg("invalid input array"),
310     errdetail("Number of elements in array must match number of query specified columns.")));
311    
312     /* OK, use it */
313     attinmeta = TupleDescGetAttInMetadata(tupdesc);
314    
315     /* Now go to work */
316     rsinfo->returnMode = SFRM_Materialize;
317    
318     per_query_ctx = fcinfo->flinfo->fn_mcxt;
319     oldcontext = MemoryContextSwitchTo(per_query_ctx);
320    
321     /* initialize our tuplestore */
322     tupstore = tuplestore_begin_heap(true, false, SortMem);
323    
324    
325     /* take rest of arguments from function */
326    
327     /* index path */
328     if (PG_ARGISNULL(0)) {
329     ereport(ERROR,
330     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
331     errmsg("index path can't be null"),
332     errdetail("Index path must be valid full path to swish-e index")));
333     }
334     index_path = _textout(PG_GETARG_TEXT_P(0));
335    
336     /* query string */
337     if (PG_ARGISNULL(0)) {
338     query = "";
339     } else {
340     query = _textout(PG_GETARG_TEXT_P(1));
341     }
342    
343     /* atribute filter */
344     if (PG_ARGISNULL(2)) {
345     attr = "";
346     } else {
347     attr = _textout(PG_GETARG_TEXT_P(2));
348     }
349    
350     /* limit */
351     if (PG_ARGISNULL(3)) {
352     limit = 0;
353     } else {
354     limit = PG_GETARG_INT32(3);
355     }
356    
357     /* offset */
358     if (PG_ARGISNULL(4)) {
359     offset = 0;
360     } else {
361     offset = PG_GETARG_INT32(4);
362     }
363    
364    
365     /* Send any errors or warnings to log, as well as
366     * STDOUT and STDERR (just to be sure) */
367     if ( logfh = fopen("/tmp/pgswish.log", "a") ) {
368     set_error_handle( logfh );
369     elog(INFO, "loggin swish-e errors to /tmp/pgswish.log");
370     /* redirect STDOUT and STDERR to log */
371     dup2(1, logfh);
372     dup2(2, logfh);
373     } else {
374     elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!");
375     }
376    
377     elog(INFO, "pgswish: SwishInit(%s)", index_path);
378    
379     swish_handle = SwishInit( index_path );
380    
381     if ( SwishError( swish_handle ) || ! swish_handle )
382     ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
383     errmsg("pgswish: SwishInit(%s) failed", index_path ),
384     errdetail( SwishErrorString( swish_handle ) )
385     ));
386    
387     elog(DEBUG1, "pgswish: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
388    
389    
390     /* set ranking scheme. default is 0 */
391     SwishRankScheme( swish_handle, 0 );
392     error_or_abort( swish_handle );
393    
394     elog(INFO, "pgswish: SwishQuery(%s)", query);
395     /* Here's a short-cut to searching that creates a search object
396     * and searches at the same time */
397    
398     /* set the search phrase to the search condition object */
399     if (! PG_ARGISNULL(1) && strlen(query) > 0)
400     swish_results = SwishQuery( swish_handle, query);
401     error_or_abort( swish_handle );
402    
403     /* total number of tuples to be returned */
404     resnum = SwishHits( swish_results );
405    
406     /* FIXME */
407     if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
408     elog(DEBUG1,"ignored: %s", attr);
409     }
410    
411     /* check if results exists */
412     if ( 0 == resnum ) {
413     elog(INFO, "pgswish: no results for: %s", query );
414     }
415    
416     /* total number of tuples to be returned */
417     if (limit && limit < resnum) {
418     nrows = limit - offset;
419     } else {
420     nrows = resnum - offset;
421     }
422    
423    
424     elog(DEBUG1, "pgswish: found %d hits for %s", resnum, query);
425    
426    
427     values = (char **) palloc(ncols * sizeof(char *));
428    
429     for (i = 0; i < nrows; i++)
430     {
431     SwishSeekResult( swish_results, i + offset );
432     sw_res = SwishNextResult( swish_results );
433    
434     /* get result from swish-e */
435     if (! ( SwishErrorString( swish_handle ) ) ) {
436     elog(INFO, "can't find result %d", i + offset);
437     } else {
438     elog(INFO, "Path: %s\n Rank: %lu\n Size: %lu\n Title: %s\n Index: %s\n Modified: %s\n Record #: %lu\n File #: %lu\n\n",
439     SwishResultPropertyStr ( sw_res, "swishdocpath" ),
440     SwishResultPropertyULong ( sw_res, "swishrank" ),
441     SwishResultPropertyULong ( sw_res, "swishdocsize" ),
442     SwishResultPropertyStr ( sw_res, "swishtitle"),
443     SwishResultPropertyStr ( sw_res, "swishdbfile" ),
444     SwishResultPropertyStr ( sw_res, "swishlastmodified" ),
445     SwishResultPropertyULong ( sw_res, "swishreccount" ), /* can figure this out in loop, of course */
446     SwishResultPropertyULong ( sw_res, "swishfilenum" )
447     );
448     }
449    
450     /* iterate over results */
451     for (j = 0; j < ncols; j++)
452     {
453     bool isnull;
454    
455     /* array value of this position */
456     indx[0] = j + prop_dim_lower_bounds[0];
457    
458     dvalue = array_ref(prop_arr, prop_ndims, indx, -1, prop_len, prop_byval, prop_align, &isnull);
459    
460     if (!isnull && sw_res)
461     values[j] = DatumGetCString(
462     prop2text( sw_res,
463     (char *)DirectFunctionCall1(textout, dvalue)
464     ));
465     else
466     values[j] = NULL;
467     }
468     /* construct the tuple */
469     tuple = BuildTupleFromCStrings(attinmeta, values);
470    
471     /* now store it */
472     tuplestore_puttuple(tupstore, tuple);
473    
474     }
475    
476     tuplestore_donestoring(tupstore);
477     rsinfo->setResult = tupstore;
478    
479     /*
480     * SFRM_Materialize mode expects us to return a NULL Datum. The actual
481     * tuples are in our tuplestore and passed back through
482     * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
483     * that we actually used to build our tuples with, so the caller can
484     * verify we did what it was expecting.
485     */
486     rsinfo->setDesc = tupdesc;
487     MemoryContextSwitchTo(oldcontext);
488    
489     /* free swish object and close */
490     Free_Search_Object( search );
491     SwishClose( swish_handle );
492    
493     return (Datum) 0;
494     }
495    
496    
497    
498    
499 dpavlin 19 /* make text var from property */
500 dpavlin 12 char *prop2text(SW_RESULT sw_res, char *propname) {
501     char *val;
502     char *prop;
503     int len;
504    
505 dpavlin 17 elog(DEBUG2, "prop2text(%s)", propname);
506 dpavlin 12
507     prop = SwishResultPropertyStr( sw_res, propname );
508 dpavlin 19 if (error_or_abort( swish_handle )) return NULL;
509 dpavlin 12
510     len = strlen(prop);
511 dpavlin 17 elog(DEBUG1, "prop2text(%s) = '%s' %d bytes", propname, prop, len);
512 dpavlin 12
513     len++;
514     len *= sizeof(char);
515    
516 dpavlin 17 elog(DEBUG2, "palloc(%d)", len);
517 dpavlin 12
518     val = palloc(len);
519    
520     memset(val, 0, len);
521     strncpy(val, prop, len);
522    
523 dpavlin 17 elog(DEBUG2, "val=%s", val);
524 dpavlin 12
525     return val;
526     }
527    
528 dpavlin 14 /* make integer variable from property */
529 dpavlin 12 char *prop2int(SW_RESULT sw_res, char *propname) {
530     char *val;
531     unsigned long prop;
532     int len;
533    
534 dpavlin 17 elog(DEBUG2, "prop2int(%s)", propname);
535 dpavlin 12
536     prop = SwishResultPropertyULong( sw_res, propname );
537 dpavlin 19 if (error_or_abort( swish_handle )) return NULL;
538 dpavlin 12
539 dpavlin 17 elog(DEBUG1, "prop2int(%s) = %lu", propname, prop);
540 dpavlin 12
541     len = 128 * sizeof(char);
542 dpavlin 17 elog(DEBUG2, "palloc(%d)", len);
543 dpavlin 12
544     val = palloc(len);
545     memset(val, 0, len);
546    
547     snprintf(val, len, "%lu", prop);
548    
549 dpavlin 17 elog(DEBUG2, "val=%s", val);
550 dpavlin 12
551     return val;
552     }
553    
554    
555 dpavlin 14 /*
556     * check if swish has returned error, and elog it.
557     */
558 dpavlin 19 static int error_or_abort( SW_HANDLE swish_handle ) {
559 dpavlin 8 if ( !SwishError( swish_handle ) )
560 dpavlin 19 return 0;
561 dpavlin 8
562     /* print a message */
563     elog(ERROR,
564     "pgswish error: Number [%d], Type [%s], Optional Message: [%s]\n",
565     SwishError( swish_handle ),
566     SwishErrorString( swish_handle ),
567     SwishLastErrorMsg( swish_handle )
568     );
569 dpavlin 19 if ( swish_results ) Free_Results_Object( swish_results );
570 dpavlin 8 if ( search ) Free_Search_Object( search );
571     SwishClose( swish_handle );
572 dpavlin 19
573     return 1;
574 dpavlin 8 }
575    

  ViewVC Help
Powered by ViewVC 1.1.26