--- trunk/html/swish.cgi 2003/03/16 21:44:42 16
+++ trunk/html/swish.cgi 2004/08/31 09:04:15 89
@@ -3,56 +3,100 @@
use strict;
use CGI qw/:standard -no_xhtml/;
use CGI::Carp qw(fatalsToBrowser);
-use SWISH;
+use SWISH::API;
use XML::Simple;
use Lingua::Spelling::Alternative;
use Text::Iconv;
+use Data::Pageset;
+use FormatResult;
-# output charset
-my $CHARSET='ISO-8859-2';
+# for pager
+my $pages_per_set = 20;
Text::Iconv->raise_error(0); # Conversion errors raise exceptions
-my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);
-
my $config=XMLin(undef,
# keyattr => { label => "value" },
forcecontent => 0,
+ ForceArray => [ 'path' ],
);
-use Data::Dumper;
-#print Dumper($config);
+my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
+sub x {
+ return if (! defined $_[0]);
+ return $from_utf8->convert($_[0]);
+}
+
+# Escape <, >, & and ", and to produce valid XML
+my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"');
+my $escape_re = join '|' => keys %escape;
+sub e {
+ my $out;
+ foreach my $v (@_) {
+ $v =~ s/($escape_re)/$escape{$1}/g;
+ $out .= $v;
+ }
+ return $out;
+}
-my $spelling_alt;
+my @spellings;
# FIX: doesn't work very well
if ($config->{findaffix}) {
- $spelling_alt = new Lingua::Spelling::Alternative;
- $spelling_alt->load_findaffix($config->{affix});
+ foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
+ next if (! -f $findaffix);
+ my $spelling_alt = new Lingua::Spelling::Alternative;
+ $spelling_alt->load_findaffix($findaffix);
+ push @spellings,$spelling_alt;
+ }
}
if ($config->{affix}) {
- $spelling_alt = new Lingua::Spelling::Alternative;
- $spelling_alt->load_affix($config->{affix});
+ foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
+ next if (! -f $affix);
+ my $spelling_alt = new Lingua::Spelling::Alternative;
+ $spelling_alt->load_affix($affix);
+ push @spellings,$spelling_alt;
+ }
}
my $hits=0;
-my $max_hits=$config->{max_hits};
+my $max_hits=param('max_hits') || x($config->{max_hits});
my %labels;
foreach (@{$config->{labels}->{label}}) {
- $labels{$_->{value}} = $from_utf8->convert($_->{content});
+ next if (! $_->{value}); # skip unlimited (0)
+ $labels{$_->{value}} = x($_->{content});
}
+my $path;
+# limit to this path
+$path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
+my %path_label;
+my @path_name;
+foreach (@{$config->{paths}->{path}}) {
+ push @path_name,x($_->{limit});
+ $path_label{$_->{limit}} = x($_->{content});
+}
+
+my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
+
if ($config->{charset}) {
- print header(-charset=>$config->{charset});
+ print header(-charset=>x($config->{charset}));
} else {
print header;
}
-print start_html(-title=>$config->{title}),start_form;
-print $config->{text}->{search};
+print start_html(-title=>x($config->{title})),start_form;
+print x($config->{text}->{search});
print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
-print $config->{text}->{documents};
+print x($config->{text}->{documents});
print textfield('search');
-print submit(-value=> $config->{text}->{submit});
-print checkbox(-name=>'no_affix', -checked=>0, -label=>$config->{text}->{no_spell}) if ($spelling_alt);
+print submit(-value=> x($config->{text}->{submit}));
+print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
+print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
+if (@path_name) {
+ print br,x($config->{text}->{limit});
+ print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
+} elsif (param('path')) {
+ print hidden(-name=>'path',-values=>param('path'));
+}
print end_form,hr;
if (param('search')) {
@@ -60,69 +104,214 @@
my $s;
# re-write query from +/- to and/and not
+ my @s_highlite;
+
my $search = param('search');
- my $s_phrase = "";
+
+ # strip spaces
+ $search =~ s/^\s+//;
+ $search =~ s/\s+$//;
+ # fixup search string
+ $search=~tr/¹ð¾èæ©Ð®ÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
+ $search=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
+
+ # extract phrases and put them first
while ($search =~ s/\s*("[^"]+")\s*/ /) {
$s .= "$1 ";
+ push @s_highlite, $1;
}
- $search =~ s/^\s+//;
- $search =~ s/\s+$//;
+
+ my %words;
foreach (split(/\s+/,$search)) {
if (m/^([+-])(\S+)/) {
$s.= ($s) ? "and " : "";
$s.="not " if ($1 eq "-");
- if ($spelling_alt && !param('no_affix')) {
+ if (@spellings && !param('no_affix')) {
my $w = $2; $w =~ s/[\*\s]+//g;
$w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
- $s.="$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
+ my $or="";
+ foreach my $spelling_alt (@spellings) {
+ $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
+ $or = "or ";
+ }
} else {
$s.="$2* ";
}
+ push @s_highlite, $2 if ($1 ne "-");
} else {
- if ($spelling_alt && !param('no_affix')) {
+ if (@spellings && !param('no_affix')) {
my $w = $_; $w =~ s/[\*\s]+//g;
- #$s.="(".join("* or ",$spelling_alt->alternatives($w))."*) ";
- $s.="(".join("* or ",$spelling_alt->alternatives($w))."*) ";
+ my $or="";
+ foreach my $spelling_alt (@spellings) {
+ $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
+ $or = "or ";
+ }
} else {
$s.="$_* ";
}
+ push @s_highlite, $_;
}
}
- # fixup search string
- $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
- $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
+ # fix multiple stars
$s=~s/\*\*+/*/g;
- my $sh = SWISH->connect('Fork',
- prog => $config->{prog},
- indexes => $config->{index},
-# properties => [qw/god br nr/],
- results => sub {
- my ($sh,$hit) = @_;
-
- printf ("%s [%s]
\n","http://".virtual_host().$config->{url}.$hit->swishdocpath,$hit->swishtitle || 'untitled',$hit->swishrank);
-
-# print $_[1]->as_string,"
\n";
-# my @fields = $hit->field_names;
-# print "Field '$_' = '", $hit->$_, "'
\n" for sort @fields;
- },
- maxhits => param('max_hits') || $max_hits,
- );
+ # limit to some path
+ $s = "swishdocpath=($path) and $s" if ($path);
- die $SWISH::errstr unless $sh;
+ my %params; # optional parametars for swish
+ # default format for output
+ my $hit_fmt = "%s [%s]
\n";
- $hits = $sh->query($s);
+ if (@properties) {
+ $hit_fmt = x($config->{hit}) if (! param('no_properties'));
+ $params{properties} = \@properties;
+ } else {
+ $hit_fmt = x($config->{hit}) if (x($config->{hit}));
+ }
- if ($hits > 0) {
- print p,hr;
- printf ($config->{text}->{hits},$hits,param('max_hits') || $max_hits,$s);
+ my $swish = SWISH::API->new($config->{index});
+ $swish->AbortLastError if $swish->Error;
+ my $results = $swish->Query($s);
+ my $hits = $results->Hits;
+
+
+ # build pager
+ my $current_page = param('page') || 1;
+
+ my $pager = Data::Pageset->new({
+ 'total_entries' => $hits,
+ 'entries_per_page' => $max_hits,
+ 'current_page' => $current_page,
+ 'pages_per_set' => $pages_per_set,
+ });
+
+ $results->SeekResult( $pager->first - 1 );
+
+ # get number of entries on this page
+ my $i = $pager->entries_on_this_page;
+
+ # print number of hits or error message
+ if ( !$hits ) {
+ printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
} else {
- print p;
- printf ($config->{text}->{no_hits},$s,$sh->errstr);
+ printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
+ }
+
+ my %path2title;
+ foreach my $p (@{$config->{path2title}->{path}}) {
+ $path2title{$p->{dir}} = $p->{content};
}
+
+ # output start of table
+ print qq{
+
+ }; + my $tr_post = qq{ + |
+$nav_html + |