#!/usr/bin/perl -w use strict; use Search::Estraier 0.07; use DBI; use Data::Dumper; use Encode qw/from_to/; use Time::HiRes qw/time/; use Getopt::Long; =head1 NAME pgest-index.pl - create full-text index of some columns in your database =cut my $c = { debug => 0, }; =head1 SYNOPSIS pgest-index.pl --create movies --sql "select id,title,year from movies" pgsql-index.pl --drop movies Options: =over 4 =item --create name Create index C and create triggers on table with same name =item --drop name Remove triggers from table C and node with same name =item --node-url http://localhost:1978/node/name Full URI to node. If it's not specified, it's assumed that you are using Hyper Estraier on C. =item --sql "select col1,col2 from name" SQL query which will return names of columns which are included in full-text index. Have in mind that you can't use aliases (as I) in this SQL query (or triggers will be created with wrong fields). If SQL query isn't specified, default one C<< select * from movies >> will be created. That will be B if all columns are not needed for search. =item --pk id Specify name of primary key column in SQL query. If you allready have primary key on table or unique index and it consists of simgle column (compaund keys are not supported) it will be picked up automatically. If you specify value which is not unique, you will get just last occurence of that item in index (which might be what you want). That's because specified C column will be used for C<@uri> in Hyper Estraier. If name of primary key begins with C<_> it will not be added into text indexing (so you won't be able to find prmary key value, but it will still be available as attribute value). =item --user login =item --passwd password Username and password to use when connecting to Hyper Estraier. If not specified, C and C will be used. =item --debug Dump debugging output. It may be specified multiple times for more verbose debugging. =back =cut my $usage = "$0 database_name (--create|--drop) table_name [--sql='select id,foo,bar from table'] [--pk=id]\n"; GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/); my $dbname = shift @ARGV || die $usage; $c->{dbi} = 'Pg:dbname=' . $dbname; warn "# c: ", Dumper($c) if ($c->{debug}); my $table = $c->{create} || $c->{drop} || die $usage; $c->{node_url} = 'http://localhost:1978/node/' . $table; $c->{user} ||= 'admin'; $c->{passwd} ||= 'admin'; # create and configure node my $node = new Search::Estraier::Node( url => $c->{node_url}, user => $c->{user}, passwd => $c->{passwd}, croak_on_error => 1, create => 1, debug => $c->{debug} >= 4 ? 1 : 0, ); # create DBI connection my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr; # drop existing triggers sub drop_triggers { my $table = shift || die "no table?"; my $sth = $dbh->prepare(qq{ SELECT relname,tgname FROM pg_trigger JOIN pg_class ON relfilenode = tgrelid WHERE tgname LIKE 'pgest_trigger_%' AND relname = ? }) || $dbh->errstr; $sth->execute( $table ) || $sth->errstr(); warn "there are ", $sth->rows, " triggers instead of just 3, dropping all\n" if ($sth->rows != 3); while (my $row = $sth->fetchrow_hashref) { my $sql = sprintf(qq{ DROP TRIGGER %s ON %s }, $row->{tgname}, $row->{relname} ); #warn "# $sql\n"; $dbh->do( $sql ) || $dbh->errstr; } warn "removed ", $sth->rows, " triggers from $table\n" if ($sth->rows); } if ($c->{drop}) { drop_triggers( $table ); warn "removing node $table\n"; $node->master( action => 'nodedel', name => $table, ); exit; } # clear existing node $node->master( action => 'nodeclr', name => $table, ); # create PostgreSQL functions $dbh->do(qq{ CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[]) RETURNS setof record AS 'pgest','pgest_node' LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT; CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER AS 'pgest', 'pgest_trigger' LANGUAGE 'C' STRICT; }) || die $dbh->errstr(); drop_triggers( $table ); if (! $c->{pk}) { warn "# finding primary key for $table\n" if ($c->{debug}); my $index_fmt = qq{ SELECT a.attname, t.typname FROM pg_type t, pg_attribute a WHERE t.oid = a.atttypid AND attrelid = ( SELECT indexrelid FROM pg_class c, pg_index i WHERE c.relname = '%s' AND c.oid = i.indrelid AND %s AND indnatts = 1 ) }; $c->{pk} = $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisprimary') ); $c->{pk} ||= $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisunique') ); } die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk}); warn "using primary key $c->{pk}", $c->{pk} =~ m/^_/ ? " (not indexed)" : "", "\n"; $dbh->begin_work; $c->{sql} ||= "select * from $table"; my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr(); $sth->execute() || die $sth->errstr; my @cols = @{ $sth->{NAME} }; die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols); warn "# columns: ",join(",", @cols),"\n" if ($c->{debug}); my $total = $sth->rows; my $i = 1; my $t = time(); my $pk = $c->{pk} || 'id'; warn "indexing existing ",$sth->rows," rows\n"; while (my $row = $sth->fetchrow_hashref() ) { warn "# row: ",Dumper($row) if ($c->{debug} >= 3); # create document my $doc = new Search::Estraier::Document; if (my $id = $row->{$pk}) { $doc->add_attr('@uri', $id); } else { die "can't find pk column '$pk' in results\n"; } my $log = sprintf "%4d ",$i; while (my ($col,$val) = each %{$row}) { if ($val) { # add attributes (make column usable from attribute search) $doc->add_attr($col, $val); # add body text to document (make it searchable using full-text index) $doc->add_text($val) unless ($col =~ m/^_/); $log .= "R"; } else { $log .= "."; } } warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2); die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) }); $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) ); print STDERR $log; } my $cols = "'" . join("', '", @cols) . "'"; foreach my $t (qw/UPDATE INSERT DELETE/) { my $lc_t = lc($t); my $sql = qq{ CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t} ON ${table} FOR EACH ROW EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}', '$c->{pk}', $cols ) }; #warn "$sql\n"; $dbh->do( $sql ) || die $dbh->errstr; } warn "created consistency triggers\n"; $dbh->commit; =head1 SEARCHING At end of each run, this script will output example search SQL query on STDOUT. You can use it to quickly construct queries for your application. =cut my $col_names = join(', ', @cols); my $col_def = join(', ', map { "$_ text" } @cols); print " -- example SQL search query: SELECT $col_names FROM pgest( -- node, login, passwd, depth '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0, -- full text search 'foo bar', -- attribute filter, order, limit, offset null, null, null, null, -- return columns array[$cols] ) as ($col_def); "; __END__ =head1 AUTHOR Dobrica Pavlinusic L =head1 LICENSE This product is licensed under GNU Public License (GPL) v2 or later. =cut