--- trunk/html/swish.cgi 2003/03/16 21:44:42 16 +++ trunk/html/swish.cgi 2004/08/31 09:04:15 89 @@ -3,56 +3,100 @@ use strict; use CGI qw/:standard -no_xhtml/; use CGI::Carp qw(fatalsToBrowser); -use SWISH; +use SWISH::API; use XML::Simple; use Lingua::Spelling::Alternative; use Text::Iconv; +use Data::Pageset; +use FormatResult; -# output charset -my $CHARSET='ISO-8859-2'; +# for pager +my $pages_per_set = 20; Text::Iconv->raise_error(0); # Conversion errors raise exceptions -my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET); - my $config=XMLin(undef, # keyattr => { label => "value" }, forcecontent => 0, + ForceArray => [ 'path' ], ); -use Data::Dumper; -#print Dumper($config); +my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset}); +sub x { + return if (! defined $_[0]); + return $from_utf8->convert($_[0]); +} + +# Escape <, >, & and ", and to produce valid XML +my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); +my $escape_re = join '|' => keys %escape; +sub e { + my $out; + foreach my $v (@_) { + $v =~ s/($escape_re)/$escape{$1}/g; + $out .= $v; + } + return $out; +} -my $spelling_alt; +my @spellings; # FIX: doesn't work very well if ($config->{findaffix}) { - $spelling_alt = new Lingua::Spelling::Alternative; - $spelling_alt->load_findaffix($config->{affix}); + foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) { + next if (! -f $findaffix); + my $spelling_alt = new Lingua::Spelling::Alternative; + $spelling_alt->load_findaffix($findaffix); + push @spellings,$spelling_alt; + } } if ($config->{affix}) { - $spelling_alt = new Lingua::Spelling::Alternative; - $spelling_alt->load_affix($config->{affix}); + foreach my $affix (split(/[, ]+/,x($config->{affix}))) { + next if (! -f $affix); + my $spelling_alt = new Lingua::Spelling::Alternative; + $spelling_alt->load_affix($affix); + push @spellings,$spelling_alt; + } } my $hits=0; -my $max_hits=$config->{max_hits}; +my $max_hits=param('max_hits') || x($config->{max_hits}); my %labels; foreach (@{$config->{labels}->{label}}) { - $labels{$_->{value}} = $from_utf8->convert($_->{content}); + next if (! $_->{value}); # skip unlimited (0) + $labels{$_->{value}} = x($_->{content}); } +my $path; +# limit to this path +$path .= '"'.join('*" or "',param('path')).'*"' if (param('path')); +my %path_label; +my @path_name; +foreach (@{$config->{paths}->{path}}) { + push @path_name,x($_->{limit}); + $path_label{$_->{limit}} = x($_->{content}); +} + +my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties}); + if ($config->{charset}) { - print header(-charset=>$config->{charset}); + print header(-charset=>x($config->{charset})); } else { print header; } -print start_html(-title=>$config->{title}),start_form; -print $config->{text}->{search}; +print start_html(-title=>x($config->{title})),start_form; +print x($config->{text}->{search}); print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits); -print $config->{text}->{documents}; +print x($config->{text}->{documents}); print textfield('search'); -print submit(-value=> $config->{text}->{submit}); -print checkbox(-name=>'no_affix', -checked=>0, -label=>$config->{text}->{no_spell}) if ($spelling_alt); +print submit(-value=> x($config->{text}->{submit})); +print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings); +print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties); +if (@path_name) { + print br,x($config->{text}->{limit}); + print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path); +} elsif (param('path')) { + print hidden(-name=>'path',-values=>param('path')); +} print end_form,hr; if (param('search')) { @@ -60,69 +104,214 @@ my $s; # re-write query from +/- to and/and not + my @s_highlite; + my $search = param('search'); - my $s_phrase = ""; + + # strip spaces + $search =~ s/^\s+//; + $search =~ s/\s+$//; + # fixup search string + $search=~tr/¹ð¾èæ©Ð®ÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2 + $search=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/; + + # extract phrases and put them first while ($search =~ s/\s*("[^"]+")\s*/ /) { $s .= "$1 "; + push @s_highlite, $1; } - $search =~ s/^\s+//; - $search =~ s/\s+$//; + + my %words; foreach (split(/\s+/,$search)) { if (m/^([+-])(\S+)/) { $s.= ($s) ? "and " : ""; $s.="not " if ($1 eq "-"); - if ($spelling_alt && !param('no_affix')) { + if (@spellings && !param('no_affix')) { my $w = $2; $w =~ s/[\*\s]+//g; $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/; - $s.="$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 "; + my $or=""; + foreach my $spelling_alt (@spellings) { + $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 "; + $or = "or "; + } } else { $s.="$2* "; } + push @s_highlite, $2 if ($1 ne "-"); } else { - if ($spelling_alt && !param('no_affix')) { + if (@spellings && !param('no_affix')) { my $w = $_; $w =~ s/[\*\s]+//g; - #$s.="(".join("* or ",$spelling_alt->alternatives($w))."*) "; - $s.="(".join("* or ",$spelling_alt->alternatives($w))."*) "; + my $or=""; + foreach my $spelling_alt (@spellings) { + $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) "; + $or = "or "; + } } else { $s.="$_* "; } + push @s_highlite, $_; } } - # fixup search string - $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2 - $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/; + # fix multiple stars $s=~s/\*\*+/*/g; - my $sh = SWISH->connect('Fork', - prog => $config->{prog}, - indexes => $config->{index}, -# properties => [qw/god br nr/], - results => sub { - my ($sh,$hit) = @_; - - printf ("%s [%s]
\n","http://".virtual_host().$config->{url}.$hit->swishdocpath,$hit->swishtitle || 'untitled',$hit->swishrank); - -# print $_[1]->as_string,"
\n"; -# my @fields = $hit->field_names; -# print "Field '$_' = '", $hit->$_, "'
\n" for sort @fields; - }, - maxhits => param('max_hits') || $max_hits, - ); + # limit to some path + $s = "swishdocpath=($path) and $s" if ($path); - die $SWISH::errstr unless $sh; + my %params; # optional parametars for swish + # default format for output + my $hit_fmt = "%s [%s]
\n"; - $hits = $sh->query($s); + if (@properties) { + $hit_fmt = x($config->{hit}) if (! param('no_properties')); + $params{properties} = \@properties; + } else { + $hit_fmt = x($config->{hit}) if (x($config->{hit})); + } - if ($hits > 0) { - print p,hr; - printf ($config->{text}->{hits},$hits,param('max_hits') || $max_hits,$s); + my $swish = SWISH::API->new($config->{index}); + $swish->AbortLastError if $swish->Error; + my $results = $swish->Query($s); + my $hits = $results->Hits; + + + # build pager + my $current_page = param('page') || 1; + + my $pager = Data::Pageset->new({ + 'total_entries' => $hits, + 'entries_per_page' => $max_hits, + 'current_page' => $current_page, + 'pages_per_set' => $pages_per_set, + }); + + $results->SeekResult( $pager->first - 1 ); + + # get number of entries on this page + my $i = $pager->entries_on_this_page; + + # print number of hits or error message + if ( !$hits ) { + printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString); } else { - print p; - printf ($config->{text}->{no_hits},$s,$sh->errstr); + printf (x($config->{text}->{hits}),$i,$results->Hits,$s); + } + + my %path2title; + foreach my $p (@{$config->{path2title}->{path}}) { + $path2title{$p->{dir}} = $p->{content}; } + + # output start of table + print qq{ + + }; + # html before and after each hit + my $tr_pre = qq{ + + }; + + for(my $i=$pager->first; $i<=$pager->last; $i++) { + + my $result = $results->NextResult; + last if (! $result); + + my @arr; + + foreach my $prop (@properties) { + if ($prop =~ m/swishdescription/) { + my $tmp = FormatResult::get_snippet( + e($result->Property($prop)), + @s_highlite, + ); + + push @arr, $tmp; + } else { + push @arr, $result->Property($prop); + } + } + + my $title = e($result->Property("swishtitle")) || 'untitled'; + my $rank = $result->Property("swishrank"); + my $host = $result->Property("swishdocpath"); + $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url}); + + foreach my $p (keys %path2title) { + if ($host =~ m/$p/i) { + $title =~ s/$path2title{$p}\s*[:-]+\s*//; + $title = $path2title{$p}." :: ".$title; + last; + } + } + + print $tr_pre,$i,". "; + # print collection name which is not link + if ($title =~ s/^(.+? :: )//) { + print $1; + } + + printf($hit_fmt, $host, $title || 'untitled', $rank, @arr); + print $tr_post; + + } + + # pager navigation + my $nav_html; + + my $nav_fmt=qq{ %s }; + + if ($pager->current_page() > $pager->first_page) { + param('page', $pager->current_page - 1); + $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'<<'); + } + + if ($pager->previous_set) { + param('page', $pager->previous_set); + $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..'); + } + + + foreach my $p (@{$pager->pages_in_set()}) { + next if ($p < 0); +# for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) { + if($p == $pager->current_page()) { + $nav_html .= "$p "; + } else { + param('page', $p); + $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p); + } + } + + if ($pager->next_set) { + param('page', $pager->next_set); + $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..'); + } + + if ($pager->current_page() < $pager->last_page) { + param('page', $pager->current_page + 1); + $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'>>'); + } + + if ($config->{text}->{pages}) { + $nav_html = x($config->{text}->{pages})." ".$nav_html; + } + + # end html table + print qq{ + +
+ }; + my $tr_post = qq{ +
+$nav_html +
+ }; + + + } else { - print p($config->{text}->{footer}); + print p(x($config->{text}->{footer})); }