--- trunk/html/swish.cgi 2004/04/06 19:21:07 73 +++ trunk/html/swish.cgi 2004/08/30 11:16:39 86 @@ -9,6 +9,53 @@ use Text::Iconv; use Data::Pageset; + +sub get_snippet { + my $context_chars = 100; + + my $desc = shift || return ''; + $desc = e($desc); + + # test if $desc contains any of our query words + my @snips; + + my @colors = qw{#ffff66 #a0ffff #99ff99 #ff9999 #ff66ff}; + + # construct regex + my $re = qq/^.*?(.{$context_chars}?)(\Q/ . join("|",@_) . + qq/\E)(.{$context_chars})/; + + while ($desc =~ s/$re//si) { + my ($bef,$qm,$af) = ($1, $2, $3); + + # no partial words... + $bef =~ s,^\S+\s+|\s+\S+$,,gs; + $af =~ s,^\S+\s+|\s+\S+$,,gs; + + push @snips, "$bef $qm $af"; + } + + my $ellip = ' ... '; + my $snippet; + + if (@snips) { + $snippet = $ellip. join($ellip, @snips) . $ellip; + } else { + return ''; + } + + # color offset + my $i = 0; + + foreach my $w (@_) { + $snippet =~ s,(\Q$w\E),$1,gsi; + $i++; + $i = 0 if ($i > $#colors); + } + + return $snippet; +} + # for pager my $pages_per_set = 20; @@ -16,6 +63,7 @@ my $config=XMLin(undef, # keyattr => { label => "value" }, forcecontent => 0, + ForceArray => [ 'path' ], ); my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset}); @@ -40,6 +88,7 @@ # FIX: doesn't work very well if ($config->{findaffix}) { foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) { + next if (! -f $findaffix); my $spelling_alt = new Lingua::Spelling::Alternative; $spelling_alt->load_findaffix($findaffix); push @spellings,$spelling_alt; @@ -47,6 +96,7 @@ } if ($config->{affix}) { foreach my $affix (split(/[, ]+/,x($config->{affix}))) { + next if (! -f $affix); my $spelling_alt = new Lingua::Spelling::Alternative; $spelling_alt->load_affix($affix); push @spellings,$spelling_alt; @@ -62,12 +112,12 @@ $labels{$_->{value}} = x($_->{content}); } -my $path = param('path'); # limit to this path +my $path; +# limit to this path +$path .= '"'.join('*" or "',param('path')).'*"' if (param('path')); my %path_label; my @path_name; foreach (@{$config->{paths}->{path}}) { - -print STDERR "##: $_->{limit}",x($_->{content}),"\n"; push @path_name,x($_->{limit}); $path_label{$_->{limit}} = x($_->{content}); } @@ -90,6 +140,8 @@ if (@path_name) { print br,x($config->{text}->{limit}); print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path); +} elsif (param('path')) { + print hidden(-name=>'path',-values=>param('path')); } print end_form,hr; @@ -98,13 +150,22 @@ my $s; # re-write query from +/- to and/and not + my @s_elem; + my $search = param('search'); - my $s_phrase = ""; + + # strip spaces + $search =~ s/^\s+//; + $search =~ s/\s+$//; + # fixup search string + $search=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2 + $search=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/; + + # extract phrases and put them first while ($search =~ s/\s*("[^"]+")\s*/ /) { $s .= "$1 "; + push @s_elem, $1; } - $search =~ s/^\s+//; - $search =~ s/\s+$//; my %words; @@ -123,10 +184,10 @@ } else { $s.="$2* "; } + push @s_elem, $2; } else { if (@spellings && !param('no_affix')) { my $w = $_; $w =~ s/[\*\s]+//g; - #$s.="(".join("* or ",$spelling_alt->alternatives($w))."*) "; my $or=""; foreach my $spelling_alt (@spellings) { $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) "; @@ -135,34 +196,21 @@ } else { $s.="$_* "; } + push @s_elem, $_; } } - # fixup search string - $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2 - $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/; + # fix multiple stars $s=~s/\*\*+/*/g; # limit to some path - $s = "swishdocpath=(\"*$path*\") and $s" if ($path); + $s = "swishdocpath=($path) and $s" if ($path); my %params; # optional parametars for swish # default format for output my $hit_fmt = "%s [%s]
\n"; - # output start of table - print qq{ - - }; - # html before and after each hit - my $tr_pre = qq{ - - }; - if (@properties) { $hit_fmt = x($config->{hit}) if (! param('no_properties')); $params{properties} = \@properties; @@ -170,56 +218,12 @@ $hit_fmt = x($config->{hit}) if (x($config->{hit})); } -# my $sh = SWISH->connect('Fork', -# prog => x($config->{prog}), -# indexes => x($config->{index}), -# results => sub { -# my ($sh,$hit) = @_; -# -# if ($config->{url}) { -# printf ($hit_fmt ,"http://".virtual_host().x($config->{url}).$hit->swishdocpath,e($hit->swishtitle) || 'untitled',$hit->swishrank, map($hit->$_, @properties)); -# } else { -# printf ($hit_fmt ,$hit->swishdocpath,e($hit->swishtitle) || 'untitled',$hit->swishrank, map($hit->$_, @properties) ); -# -# } -# -## print $_[1]->as_string,"
\n"; -## my @fields = $hit->field_names; -## print "Field '$_' = '", $hit->$_, "'
\n" for sort @fields; -# }, -# maxhits => param('max_hits') || $max_hits, -# \%params, -# ); -# -# die $SWISH::errstr unless $sh; -# -# $hits = $sh->query($s); -# -# if ($hits && $hits > 0) { -# print p,hr; -# printf (x($config->{text}->{hits}),$hits,param('max_hits') || $max_hits,$s); -# } else { -# print p; -# printf (x($config->{text}->{no_hits}),$s,$sh->errstr); -# } -# if ($hits && $hits > 0) { -# print p,hr; -# printf (x($config->{text}->{hits}),$hits,param('max_hits') || $max_hits,$s); -# } else { -# print p; -# printf (x($config->{text}->{no_hits}),$s,$sh->errstr); -# } - my $swish = SWISH::API->new($config->{index}); - $swish->AbortLastError if $swish->Error; - my $results = $swish->Query($s); - my $hits = $results->Hits; - # build pager my $current_page = param('page') || 1; @@ -242,6 +246,22 @@ printf (x($config->{text}->{hits}),$i,$results->Hits,$s); } + my %path2title; + foreach my $p (@{$config->{path2title}->{path}}) { + $path2title{$p->{dir}} = $p->{content}; + } + + # output start of table + print qq{ +
- }; - my $tr_post = qq{ -
+ }; + # html before and after each hit + my $tr_pre = qq{ + + }; for(my $i=$pager->first; $i<=$pager->last; $i++) { @@ -249,10 +269,14 @@ last if (! $result); my @arr; + foreach my $prop (@properties) { if ($prop =~ m/swishdescription/) { - my $tmp = $result->Property($prop); - $tmp =~ s/<[^>]+>//g; + my $tmp = get_snippet( + $result->Property($prop), + @s_elem, + ); + push @arr, $tmp; } else { push @arr, $result->Property($prop); @@ -263,11 +287,21 @@ my $rank = $result->Property("swishrank"); my $host = $result->Property("swishdocpath"); $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url}); + + foreach my $p (keys %path2title) { + if ($host =~ m/$p/i) { + $title =~ s/$path2title{$p}\s*[:-]+\s*//; + $title = $path2title{$p}." :: ".$title; + last; + } + } + print $tr_pre,$i,". "; # print collection name which is not link if ($title =~ s/^(.+? :: )//) { print $1; } + printf($hit_fmt, $host, $title || 'untitled', $rank, @arr); print $tr_post; @@ -278,9 +312,14 @@ my $nav_fmt=qq{ %s }; + if ($pager->current_page() > $pager->first_page) { + param('page', $pager->current_page - 1); + $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'<<'); + } + if ($pager->previous_set) { param('page', $pager->previous_set); - $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'<<'); + $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..'); } @@ -297,13 +336,22 @@ if ($pager->next_set) { param('page', $pager->next_set); + $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..'); + } + + if ($pager->current_page() < $pager->last_page) { + param('page', $pager->current_page + 1); $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'>>'); } + if ($config->{text}->{pages}) { + $nav_html = x($config->{text}->{pages})." ".$nav_html; + } + # end html table print qq{
+ }; + my $tr_post = qq{ +
-Pages: $nav_html +$nav_html
};