Revision 9 (by dpavlin, 2007/04/26 22:19:40) move maximum number of results fetched from upsrtream server to servers table
and maxres column. This allows fine-tuning of query lenght to upstream server.
create table servers (
	name	text not null,
	host	text not null,
	port	int not null default 210,
	database text default 'Default',
	maxres	int not null default 100,
	primary key(name)
);

-- insert sample
insert into servers values ( 'nsk', '161.53.240.27', 8090, 'voyager', 0 );
insert into servers values ( 'nsk-en', '161.53.240.27', 8190, 'voyager', 300 );
insert into servers values ( 'loc', 'z3950.loc.gov', 7090, 'Voyager', 300 );
insert into servers values ( 'ucs', 'library.usc.edu', 2200, 'unicorn', 100 );
--insert into servers values ( '', '', , '' );

create type item as (
	title text,
	author text,
	edition text,
	date text
);

create or replace function search(text,text)
returns setof item
language plperlu
as $$

use strict;

my $debug = 0;

my ( $server, $query ) = @_;

my $rv = spi_exec_query(qq{
	select host,port,database,maxres from servers where name ilike '$server'
},1);

die "can't find server $server in table servers\n" unless ( $rv->{processed} == 1 );

my ( $host, $port, $database, $max_res ) = (
	$rv->{rows}[0]->{host},
	$rv->{rows}[0]->{port},
	$rv->{rows}[0]->{database},
	$rv->{rows}[0]->{maxres},
);

use ZOOM;
use MARC::Record;
use Encode qw/encode decode/;

my $pqf = {
	isbn => '@attr 1=7 @attr 4=1 "%s"',
	title => '@attr 1=4 @attr 4=1 "%s"',
	author => '@attr 1=1003 @attr 4=1 "%s"',
	issn => '@attr 1=8 @attr 4=1 "%s"',
};

sub q2cqf {
	my $q = shift;
	if ($q =~ m/^(\w+):\s*(.*)$/) {
		my ( $k,$v ) = ( $1,$2 );
		return sprintf( $pqf->{ $k }, $v ) if ( defined( $pqf->{ $k } ) );
	}
	return $q;
}

my $conn = new ZOOM::Connection($host, $port,
	databaseName => $database) or
	die "can't connect to ${host}:${port}/${database}\n";

$conn->option(preferredRecordSyntax => "usmarc");

my $rs;

my $notice;

if ( $query =~ m/[\s="]|(and|or|not)/ ) {
	$rs = $conn->search( new ZOOM::Query::CQL( $query ) );
	$notice = 'CQL';
} else {
	my $cqf = q2cqf( $query );
	$notice = "CQF: $cqf";
	$rs = $conn->search_pqf( $cqf );
}

my $n = $rs->size();
if ( $n > $max_res ) {
	warn "query returned $n results, fetching first $max_res\n";
	$n = $max_res;
}
# fetch all results
$rs->records(0, $n - 1, 0);

warn "$n results for '$query' $notice\n";

sub strip_non_digit {
	my $d = shift;
	$d =~ s/^\D+//;
	$d =~ s/\D+$//;
	return $d;
}

# fix encoding
sub e {
	my $t = shift;
	$t =~ s/éc/è/g;
	$t =~ s/âc/æ/g;
	$t =~ s/éz/¾/g;
	$t =~ s/és/¹/g;
	$t =~ s/³/ð/g;
	$t =~ s/éC/È/g;
	$t =~ s/âC/Æ/g;
	$t =~ s/éZ/®/g;
	$t =~ s/éS/©/g;
	$t =~ s/£/Ð/g;
	warn "## $t\n" if $debug;
#	$t = decode('iso-8859-2', $t);
#	return encode('utf-8',$t);
	return $t;
}

foreach my $i ( 1 .. $n ) {
	my $marc = new_from_usmarc MARC::Record( $rs->record( $i - 1 )->raw() );

	return_next({
		title => e( $marc->title ),
		author => e( $marc->author ),
		edition => e( $marc->edition ),
		date => strip_non_digit( $marc->publication_date ),
	});
}

return undef;

$$;

-- if your terminal isn't iso-8859-2, change this!
-- set client_encoding  = 'iso-8859-2';

-- example
-- select * from search('title:djece');
-- select * from search('osman');

--select * from search('nsk','title:mor');
--select * from search('nsk','grada');
--select * from search('nsk-en','restrictions');

--select * from search('ucs','human');

SELECT date,count(date)
FROM search('loc','human and computer and interaction')
GROUP BY date