Line # Revision Author
1 1 dpavlin /*
2 * integrate Hyper Estraier into PostgreSQL
3 *
4 * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-05-19
5 *
6 * TODO:
7 * - all
8 *
9 * NOTES:
10 * - clear structures with memset to support hash indexes (who whould like
11 * to create hash index on table returned from function?)
12 * - number of returned rows is set by PostgreSQL evaluator, see:
13 * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
14 *
15 * Based on:
16 * - C example from PostgreSQL documentation (BSD licence)
17 41 dpavlin * - coreexample002.c and nodeexample002.c from Hyper Estraier (GPL)
18 1 dpavlin * - _textin/_textout from pgcurl.c (LGPL)
19 *
20 * This code is licenced under GPL
21 */
22
23 #include "postgres.h"
24 #include "fmgr.h"
25 #include "funcapi.h"
26 #include "utils/builtins.h"
27 #include "utils/array.h"
28 56 dpavlin #include "utils/lsyscache.h"
29 1 dpavlin #include "miscadmin.h"
30 61 dpavlin #include "commands/trigger.h"
31 #include "executor/spi.h"
32
33 1 dpavlin #include <estraier.h>
34 #include <cabin.h>
35 41 dpavlin #include <estnode.h>
36 1 dpavlin
37 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
38 #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
39 #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
40 #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
41
42 27 dpavlin /* SortMem got renamed in PostgreSQL 8.0 */
43 #ifndef SortMem
44 #define SortMem 16 * 1024
45 #endif
46
47 85 dpavlin #ifdef PG_MODULE_MAGIC
48 PG_MODULE_MAGIC;
49 #endif
50
51 40 dpavlin #define ATTR_DELIMITER "{{!}}"
52 57 dpavlin #define HINTS_PREFIX "HINTS."
53 40 dpavlin
54 5 dpavlin /* prototype */
55 char *attr2text(ESTDOC *doc, char *attr);
56 41 dpavlin char *node_attr2text(ESTRESDOC *rdoc, char *attr);
57 56 dpavlin void cond_add_attr(ESTCOND *cond, char *attr);
58 1 dpavlin
59
60 /* work in progress */
61 19 dpavlin PG_FUNCTION_INFO_V1(pgest_attr);
62 Datum pgest_attr(PG_FUNCTION_ARGS)
63 1 dpavlin {
64 31 dpavlin ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(6);
65 25 dpavlin Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
66 int attr_ndims = ARR_NDIM(attr_arr);
67 int *attr_dim_counts = ARR_DIMS(attr_arr);
68 int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
69 19 dpavlin int ncols = 0;
70 int nrows = 0;
71 int indx[MAXDIM];
72 25 dpavlin int16 attr_len;
73 bool attr_byval;
74 char attr_align;
75 1 dpavlin ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
76 AttInMetadata *attinmeta;
77 TupleDesc tupdesc;
78 19 dpavlin Tuplestorestate *tupstore = NULL;
79 1 dpavlin HeapTuple tuple;
80 MemoryContext per_query_ctx;
81 MemoryContext oldcontext;
82 Datum dvalue;
83 char **values;
84 19 dpavlin int rsinfo_ncols;
85 1 dpavlin int i, j;
86 19 dpavlin /* estvars */
87 25 dpavlin ESTDB *db;
88 ESTCOND *cond;
89 ESTDOC *doc;
90 int ecode, *est_result, resnum;
91 int limit = 0;
92 int offset = 0;
93
94 19 dpavlin char *index_path;
95 char *query;
96 char *attr;
97 31 dpavlin char *order;
98 1 dpavlin
99 19 dpavlin
100 /* only allow 1D input array */
101 25 dpavlin if (attr_ndims == 1)
102 19 dpavlin {
103 25 dpavlin ncols = attr_dim_counts[0];
104 19 dpavlin }
105 else
106 ereport(ERROR,
107 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
108 errmsg("invalid input array"),
109 errdetail("Input array must have 1 dimension")));
110
111 1 dpavlin /* check to see if caller supports us returning a tuplestore */
112 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
113 ereport(ERROR,
114 (errcode(ERRCODE_SYNTAX_ERROR),
115 errmsg("materialize mode required, but it is not " \
116 "allowed in this context")));
117
118 19 dpavlin /* get info about element type needed to construct the array */
119 25 dpavlin get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
120 19 dpavlin
121 1 dpavlin /* get the requested return tuple description */
122 tupdesc = rsinfo->expectedDesc;
123 19 dpavlin rsinfo_ncols = tupdesc->natts;
124 1 dpavlin
125 /*
126 * The requested tuple description better match up with the array
127 * we were given.
128 */
129 19 dpavlin if (rsinfo_ncols != ncols)
130 ereport(ERROR,
131 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
132 errmsg("invalid input array"),
133 errdetail("Number of elements in array must match number of query specified columns.")));
134
135 1 dpavlin /* OK, use it */
136 attinmeta = TupleDescGetAttInMetadata(tupdesc);
137
138 /* Now go to work */
139 rsinfo->returnMode = SFRM_Materialize;
140
141 per_query_ctx = fcinfo->flinfo->fn_mcxt;
142 oldcontext = MemoryContextSwitchTo(per_query_ctx);
143
144 /* initialize our tuplestore */
145 tupstore = tuplestore_begin_heap(true, false, SortMem);
146
147 19 dpavlin
148 /* take rest of arguments from function */
149
150 /* index path */
151 if (PG_ARGISNULL(0)) {
152 ereport(ERROR,
153 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
154 errmsg("index path can't be null"),
155 errdetail("Index path must be valid full path to HyperEstraier index")));
156 }
157 index_path = _textout(PG_GETARG_TEXT_P(0));
158
159 /* query string */
160 31 dpavlin if (PG_ARGISNULL(1)) {
161 19 dpavlin query = "";
162 } else {
163 query = _textout(PG_GETARG_TEXT_P(1));
164 }
165
166 /* atribute filter */
167 if (PG_ARGISNULL(2)) {
168 attr = "";
169 } else {
170 attr = _textout(PG_GETARG_TEXT_P(2));
171 }
172 31 dpavlin
173 /* sort order */
174 if (PG_ARGISNULL(3)) {
175 order = "";
176 } else {
177 order = _textout(PG_GETARG_TEXT_P(3));
178 }
179 19 dpavlin
180 31 dpavlin
181 19 dpavlin /* limit */
182 31 dpavlin if (PG_ARGISNULL(4)) {
183 19 dpavlin limit = 0;
184 } else {
185 31 dpavlin limit = PG_GETARG_INT32(4);
186 19 dpavlin }
187
188 /* offset */
189 31 dpavlin if (PG_ARGISNULL(5)) {
190 19 dpavlin offset = 0;
191 } else {
192 31 dpavlin offset = PG_GETARG_INT32(5);
193 19 dpavlin }
194
195
196 /* open the database */
197 elog(DEBUG1, "pgest_attr: est_db_open(%s)", index_path);
198
199 if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
200 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
201 errmsg("est_db_open: can't open %s: %d", index_path, ecode),
202 errdetail(est_err_msg(ecode))));
203 }
204
205 20 dpavlin elog(DEBUG1, "pgest_attr: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
206 19 dpavlin
207 /* create a search condition object */
208 if (!(cond = est_cond_new())) {
209 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
210 errmsg("pgest_attr: est_cond_new failed")));
211 }
212
213 /* set the search phrase to the search condition object */
214 if (! PG_ARGISNULL(1) && strlen(query) > 0)
215 est_cond_set_phrase(cond, query);
216
217 /* minimum valid attribute length is 10: @a STREQ a */
218 if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
219 40 dpavlin elog(DEBUG1,"attributes: %s", attr);
220 56 dpavlin cond_add_attr(cond, attr);
221 19 dpavlin }
222
223 31 dpavlin /* set the search phrase to the search condition object */
224 if (! PG_ARGISNULL(3) && strlen(order) > 0) {
225 elog(DEBUG1,"est_cond_set_order(%s)", order);
226 est_cond_set_order(cond, order);
227 }
228
229 38 dpavlin if (limit) {
230 elog(DEBUG1,"est_cond_set_max(%d)", limit + offset);
231 est_cond_set_max(cond, limit + offset);
232 }
233
234 19 dpavlin /* get the result of search */
235 est_result = est_db_search(db, cond, &resnum, NULL);
236
237 /* check if results exists */
238 if ( 0 == resnum ) {
239 elog(INFO, "pgest_attr: no results for: %s", query );
240 }
241
242 /* total number of tuples to be returned */
243 if (limit && limit < resnum) {
244 31 dpavlin nrows = limit;
245 19 dpavlin } else {
246 nrows = resnum - offset;
247 }
248
249
250 elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query);
251
252 1 dpavlin values = (char **) palloc(ncols * sizeof(char *));
253
254 for (i = 0; i < nrows; i++)
255 {
256 19 dpavlin
257 /* get result from estraier */
258 if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) {
259 47 dpavlin elog(INFO, "pgest_attr: can't find result %d", i + offset);
260 19 dpavlin } else {
261 elog(DEBUG1, "URI: %s\n Title: %s\n",
262 est_doc_attr(doc, "@uri"),
263 est_doc_attr(doc, "@title")
264 );
265 }
266
267 /* iterate over results */
268 1 dpavlin for (j = 0; j < ncols; j++)
269 {
270 19 dpavlin bool isnull;
271
272 /* array value of this position */
273 25 dpavlin indx[0] = j + attr_dim_lower_bounds[0];
274 19 dpavlin
275 25 dpavlin dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull);
276 19 dpavlin
277 if (!isnull && doc)
278 values[j] = DatumGetCString(
279 attr2text(doc,
280 20 dpavlin (char *)DirectFunctionCall1(textout, dvalue)
281 19 dpavlin ));
282 else
283 values[j] = NULL;
284 1 dpavlin }
285 /* construct the tuple */
286 tuple = BuildTupleFromCStrings(attinmeta, values);
287
288 /* now store it */
289 tuplestore_puttuple(tupstore, tuple);
290 19 dpavlin
291 /* delete estraier document object */
292 47 dpavlin if (doc) est_doc_delete(doc);
293 1 dpavlin }
294
295 tuplestore_donestoring(tupstore);
296 rsinfo->setResult = tupstore;
297
298 /*
299 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
300 * tuples are in our tuplestore and passed back through
301 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
302 * that we actually used to build our tuples with, so the caller can
303 * verify we did what it was expecting.
304 */
305 rsinfo->setDesc = tupdesc;
306 MemoryContextSwitchTo(oldcontext);
307
308 31 dpavlin est_cond_delete(cond);
309
310 19 dpavlin if(!est_db_close(db, &ecode)){
311 ereport(ERROR, (errcode(ERRCODE_IO_ERROR),
312 errmsg("est_db_close: %d", ecode),
313 errdetail(est_err_msg(ecode))));
314 }
315
316 1 dpavlin return (Datum) 0;
317 }
318
319
320 /* make text var from attr */
321 char *attr2text(ESTDOC *doc, char *attr) {
322 char *val;
323 const char *attrval;
324 int len;
325 4 dpavlin int attrlen;
326 1 dpavlin
327 47 dpavlin if (! doc) return (Datum) NULL;
328
329 56 dpavlin elog(DEBUG1, "doc: %p, attr: %s", doc, attr);
330 1 dpavlin
331 4 dpavlin if ( (attrval = est_doc_attr(doc, attr)) && (attrlen = strlen(attrval)) ) {
332 val = (char *) palloc(attrlen * sizeof(char));
333 1 dpavlin } else {
334 return (Datum) NULL;
335 }
336
337 len = strlen(attrval);
338 2 dpavlin elog(DEBUG1, "attr2text(%s) = '%s' %d bytes", attr, attrval, len);
339 1 dpavlin
340 len++;
341 len *= sizeof(char);
342
343 elog(DEBUG2, "palloc(%d)", len);
344
345 val = palloc(len);
346
347 memset(val, 0, len);
348 strncpy(val, attrval, len);
349
350 elog(DEBUG2, "val=%s", val);
351
352 return val;
353 }
354
355 41 dpavlin /*
356 * variation on theme: use node API which doesn't open index on
357 * every query which is much faster for large indexes
358 *
359 */
360
361 48 dpavlin /* select * from pgest( */
362 #define _arg_node_uri 0
363 #define _arg_login 1
364 #define _arg_passwd 2
365 49 dpavlin #define _arg_depth 3
366 #define _arg_query 4
367 #define _arg_attr 5
368 #define _arg_order 6
369 #define _arg_limit 7
370 #define _arg_offset 8
371 #define _arg_attr_array 9
372 48 dpavlin /* as (foo text, ... ); */
373
374
375 41 dpavlin PG_FUNCTION_INFO_V1(pgest_node);
376 Datum pgest_node(PG_FUNCTION_ARGS)
377 {
378 48 dpavlin ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(_arg_attr_array);
379 41 dpavlin Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
380 int attr_ndims = ARR_NDIM(attr_arr);
381 int *attr_dim_counts = ARR_DIMS(attr_arr);
382 int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
383 int ncols = 0;
384 int nrows = 0;
385 int indx[MAXDIM];
386 int16 attr_len;
387 bool attr_byval;
388 char attr_align;
389 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
390 AttInMetadata *attinmeta;
391 TupleDesc tupdesc;
392 Tuplestorestate *tupstore = NULL;
393 HeapTuple tuple;
394 MemoryContext per_query_ctx;
395 MemoryContext oldcontext;
396 Datum dvalue;
397 char **values;
398 int rsinfo_ncols;
399 int i, j;
400 /* estvars */
401 ESTNODE *node;
402 ESTCOND *cond;
403 ESTNODERES *nres;
404 ESTRESDOC *rdoc;
405 57 dpavlin CBMAP *hints;
406 41 dpavlin int resnum = 0;
407 int limit = 0;
408 int offset = 0;
409 49 dpavlin int depth = 0;
410 41 dpavlin
411 char *node_url;
412 char *user, *passwd;
413 char *query;
414 char *attr;
415 char *order;
416
417
418 /* only allow 1D input array */
419 if (attr_ndims == 1)
420 {
421 ncols = attr_dim_counts[0];
422 }
423 else
424 ereport(ERROR,
425 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
426 errmsg("invalid input array"),
427 errdetail("Input array must have 1 dimension")));
428
429 /* check to see if caller supports us returning a tuplestore */
430 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
431 ereport(ERROR,
432 (errcode(ERRCODE_SYNTAX_ERROR),
433 errmsg("materialize mode required, but it is not " \
434 "allowed in this context")));
435
436 /* get info about element type needed to construct the array */
437 get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
438
439 /* get the requested return tuple description */
440 tupdesc = rsinfo->expectedDesc;
441 rsinfo_ncols = tupdesc->natts;
442
443 /*
444 * The requested tuple description better match up with the array
445 * we were given.
446 */
447 if (rsinfo_ncols != ncols)
448 ereport(ERROR,
449 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
450 errmsg("invalid input array"),
451 errdetail("Number of elements in array must match number of query specified columns.")));
452
453 /* OK, use it */
454 attinmeta = TupleDescGetAttInMetadata(tupdesc);
455
456 /* Now go to work */
457 rsinfo->returnMode = SFRM_Materialize;
458
459 per_query_ctx = fcinfo->flinfo->fn_mcxt;
460 oldcontext = MemoryContextSwitchTo(per_query_ctx);
461
462 /* initialize our tuplestore */
463 tupstore = tuplestore_begin_heap(true, false, SortMem);
464
465
466 /* take rest of arguments from function */
467
468 /* node URL */
469 48 dpavlin if (PG_ARGISNULL(_arg_node_uri)) {
470 41 dpavlin ereport(ERROR,
471 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
472 errmsg("node URL can't be null"),
473 errdetail("Node URL must be valid URL to HyperEstraier node")));
474 }
475 48 dpavlin node_url = _textout(PG_GETARG_TEXT_P(_arg_node_uri));
476 41 dpavlin
477 /* login and password */
478 48 dpavlin if (PG_ARGISNULL(_arg_login) || PG_ARGISNULL(_arg_passwd)) {
479 41 dpavlin ereport(ERROR,
480 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
481 errmsg("username and password can't be NULL"),
482 errdetail("You must specify valid username and password to HyperEstraier node")));
483 }
484 48 dpavlin user = _textout(PG_GETARG_TEXT_P(_arg_login));
485 passwd = _textout(PG_GETARG_TEXT_P(_arg_passwd));
486 41 dpavlin
487 49 dpavlin /* depth of search */
488 if (PG_ARGISNULL(_arg_depth)) {
489 depth = 0;
490 } else {
491 depth = PG_GETARG_INT32(_arg_depth);
492 }
493
494 41 dpavlin /* query string */
495 48 dpavlin if (PG_ARGISNULL(_arg_query)) {
496 41 dpavlin query = "";
497 } else {
498 48 dpavlin query = _textout(PG_GETARG_TEXT_P(_arg_query));
499 41 dpavlin }
500
501 /* atribute filter */
502 48 dpavlin if (PG_ARGISNULL(_arg_attr)) {
503 41 dpavlin attr = "";
504 } else {
505 48 dpavlin attr = _textout(PG_GETARG_TEXT_P(_arg_attr));
506 41 dpavlin }
507
508 /* sort order */
509 48 dpavlin if (PG_ARGISNULL(_arg_order)) {
510 41 dpavlin order = "";
511 } else {
512 48 dpavlin order = _textout(PG_GETARG_TEXT_P(_arg_order));
513 41 dpavlin }
514
515
516 /* limit */
517 48 dpavlin if (PG_ARGISNULL(_arg_limit)) {
518 41 dpavlin limit = 0;
519 } else {
520 48 dpavlin limit = PG_GETARG_INT32(_arg_limit);
521 41 dpavlin }
522
523 /* offset */
524 48 dpavlin if (PG_ARGISNULL(_arg_offset)) {
525 41 dpavlin offset = 0;
526 } else {
527 48 dpavlin offset = PG_GETARG_INT32(_arg_offset);
528 41 dpavlin }
529
530 /* initialize the network environment */
531 if(!est_init_net_env()){
532 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
533 errmsg("pgest_node: can't create network enviroment")));
534 }
535
536 /* create the node connection object */
537 elog(DEBUG1, "pgest_node: est_node_new(%s) as %s", node_url, user);
538 node = est_node_new(node_url);
539 est_node_set_auth(node, user, passwd);
540
541 49 dpavlin elog(DEBUG1, "pgest_node: node: %s (d:%d) query[%s] attr[%s] limit %d offset %d", node_url, depth, query, (PG_ARGISNULL(_arg_attr) ? "NULL" : attr), limit, offset);
542 41 dpavlin
543 /* create a search condition object */
544 if (!(cond = est_cond_new())) {
545 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
546 errmsg("pgest_node: est_cond_new failed")));
547 }
548
549 /* set the search phrase to the search condition object */
550 48 dpavlin if (! PG_ARGISNULL(_arg_query) && strlen(query) > 0)
551 41 dpavlin est_cond_set_phrase(cond, query);
552
553 /* minimum valid attribute length is 10: @a STREQ a */
554 48 dpavlin if (! PG_ARGISNULL(_arg_attr) && strlen(attr) >= 10) {
555 41 dpavlin elog(DEBUG1,"attributes: %s", attr);
556 56 dpavlin cond_add_attr(cond, attr);
557 41 dpavlin }
558
559 /* set the search phrase to the search condition object */
560 48 dpavlin if (! PG_ARGISNULL(_arg_order) && strlen(order) > 0) {
561 41 dpavlin elog(DEBUG1,"est_cond_set_order(%s)", order);
562 est_cond_set_order(cond, order);
563