--- trunk/html/swish.cgi 2004/04/06 19:21:07 73
+++ trunk/html/swish.cgi 2004/08/30 11:16:39 86
@@ -9,6 +9,53 @@
use Text::Iconv;
use Data::Pageset;
+
+sub get_snippet {
+ my $context_chars = 100;
+
+ my $desc = shift || return '';
+ $desc = e($desc);
+
+ # test if $desc contains any of our query words
+ my @snips;
+
+ my @colors = qw{#ffff66 #a0ffff #99ff99 #ff9999 #ff66ff};
+
+ # construct regex
+ my $re = qq/^.*?(.{$context_chars}?)(\Q/ . join("|",@_) .
+ qq/\E)(.{$context_chars})/;
+
+ while ($desc =~ s/$re//si) {
+ my ($bef,$qm,$af) = ($1, $2, $3);
+
+ # no partial words...
+ $bef =~ s,^\S+\s+|\s+\S+$,,gs;
+ $af =~ s,^\S+\s+|\s+\S+$,,gs;
+
+ push @snips, "$bef $qm $af";
+ }
+
+ my $ellip = ' ... ';
+ my $snippet;
+
+ if (@snips) {
+ $snippet = $ellip. join($ellip, @snips) . $ellip;
+ } else {
+ return '';
+ }
+
+ # color offset
+ my $i = 0;
+
+ foreach my $w (@_) {
+ $snippet =~ s,(\Q$w\E),$1,gsi;
+ $i++;
+ $i = 0 if ($i > $#colors);
+ }
+
+ return $snippet;
+}
+
# for pager
my $pages_per_set = 20;
@@ -16,6 +63,7 @@
my $config=XMLin(undef,
# keyattr => { label => "value" },
forcecontent => 0,
+ ForceArray => [ 'path' ],
);
my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
@@ -40,6 +88,7 @@
# FIX: doesn't work very well
if ($config->{findaffix}) {
foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
+ next if (! -f $findaffix);
my $spelling_alt = new Lingua::Spelling::Alternative;
$spelling_alt->load_findaffix($findaffix);
push @spellings,$spelling_alt;
@@ -47,6 +96,7 @@
}
if ($config->{affix}) {
foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
+ next if (! -f $affix);
my $spelling_alt = new Lingua::Spelling::Alternative;
$spelling_alt->load_affix($affix);
push @spellings,$spelling_alt;
@@ -62,12 +112,12 @@
$labels{$_->{value}} = x($_->{content});
}
-my $path = param('path'); # limit to this path
+my $path;
+# limit to this path
+$path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
my %path_label;
my @path_name;
foreach (@{$config->{paths}->{path}}) {
-
-print STDERR "##: $_->{limit}",x($_->{content}),"\n";
push @path_name,x($_->{limit});
$path_label{$_->{limit}} = x($_->{content});
}
@@ -90,6 +140,8 @@
if (@path_name) {
print br,x($config->{text}->{limit});
print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
+} elsif (param('path')) {
+ print hidden(-name=>'path',-values=>param('path'));
}
print end_form,hr;
@@ -98,13 +150,22 @@
my $s;
# re-write query from +/- to and/and not
+ my @s_elem;
+
my $search = param('search');
- my $s_phrase = "";
+
+ # strip spaces
+ $search =~ s/^\s+//;
+ $search =~ s/\s+$//;
+ # fixup search string
+ $search=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
+ $search=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
+
+ # extract phrases and put them first
while ($search =~ s/\s*("[^"]+")\s*/ /) {
$s .= "$1 ";
+ push @s_elem, $1;
}
- $search =~ s/^\s+//;
- $search =~ s/\s+$//;
my %words;
@@ -123,10 +184,10 @@
} else {
$s.="$2* ";
}
+ push @s_elem, $2;
} else {
if (@spellings && !param('no_affix')) {
my $w = $_; $w =~ s/[\*\s]+//g;
- #$s.="(".join("* or ",$spelling_alt->alternatives($w))."*) ";
my $or="";
foreach my $spelling_alt (@spellings) {
$s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
@@ -135,34 +196,21 @@
} else {
$s.="$_* ";
}
+ push @s_elem, $_;
}
}
- # fixup search string
- $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
- $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
+ # fix multiple stars
$s=~s/\*\*+/*/g;
# limit to some path
- $s = "swishdocpath=(\"*$path*\") and $s" if ($path);
+ $s = "swishdocpath=($path) and $s" if ($path);
my %params; # optional parametars for swish
# default format for output
my $hit_fmt = "%s [%s]
\n";
- # output start of table
- print qq{
-
- };
- # html before and after each hit
- my $tr_pre = qq{
-
- };
- my $tr_post = qq{
- |
- };
-
if (@properties) {
$hit_fmt = x($config->{hit}) if (! param('no_properties'));
$params{properties} = \@properties;
@@ -170,56 +218,12 @@
$hit_fmt = x($config->{hit}) if (x($config->{hit}));
}
-# my $sh = SWISH->connect('Fork',
-# prog => x($config->{prog}),
-# indexes => x($config->{index}),
-# results => sub {
-# my ($sh,$hit) = @_;
-#
-# if ($config->{url}) {
-# printf ($hit_fmt ,"http://".virtual_host().x($config->{url}).$hit->swishdocpath,e($hit->swishtitle) || 'untitled',$hit->swishrank, map($hit->$_, @properties));
-# } else {
-# printf ($hit_fmt ,$hit->swishdocpath,e($hit->swishtitle) || 'untitled',$hit->swishrank, map($hit->$_, @properties) );
-#
-# }
-#
-## print $_[1]->as_string,"
\n";
-## my @fields = $hit->field_names;
-## print "Field '$_' = '", $hit->$_, "'
\n" for sort @fields;
-# },
-# maxhits => param('max_hits') || $max_hits,
-# \%params,
-# );
-#
-# die $SWISH::errstr unless $sh;
-#
-# $hits = $sh->query($s);
-#
-# if ($hits && $hits > 0) {
-# print p,hr;
-# printf (x($config->{text}->{hits}),$hits,param('max_hits') || $max_hits,$s);
-# } else {
-# print p;
-# printf (x($config->{text}->{no_hits}),$s,$sh->errstr);
-# }
-# if ($hits && $hits > 0) {
-# print p,hr;
-# printf (x($config->{text}->{hits}),$hits,param('max_hits') || $max_hits,$s);
-# } else {
-# print p;
-# printf (x($config->{text}->{no_hits}),$s,$sh->errstr);
-# }
-
my $swish = SWISH::API->new($config->{index});
-
$swish->AbortLastError if $swish->Error;
-
my $results = $swish->Query($s);
-
my $hits = $results->Hits;
-
# build pager
my $current_page = param('page') || 1;
@@ -242,6 +246,22 @@
printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
}
+ my %path2title;
+ foreach my $p (@{$config->{path2title}->{path}}) {
+ $path2title{$p->{dir}} = $p->{content};
+ }
+
+ # output start of table
+ print qq{
+
+ };
+ # html before and after each hit
+ my $tr_pre = qq{
+
+ };
+ my $tr_post = qq{
+ |
+ };
for(my $i=$pager->first; $i<=$pager->last; $i++) {
@@ -249,10 +269,14 @@
last if (! $result);
my @arr;
+
foreach my $prop (@properties) {
if ($prop =~ m/swishdescription/) {
- my $tmp = $result->Property($prop);
- $tmp =~ s/<[^>]+>//g;
+ my $tmp = get_snippet(
+ $result->Property($prop),
+ @s_elem,
+ );
+
push @arr, $tmp;
} else {
push @arr, $result->Property($prop);
@@ -263,11 +287,21 @@
my $rank = $result->Property("swishrank");
my $host = $result->Property("swishdocpath");
$host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
+
+ foreach my $p (keys %path2title) {
+ if ($host =~ m/$p/i) {
+ $title =~ s/$path2title{$p}\s*[:-]+\s*//;
+ $title = $path2title{$p}." :: ".$title;
+ last;
+ }
+ }
+
print $tr_pre,$i,". ";
# print collection name which is not link
if ($title =~ s/^(.+? :: )//) {
print $1;
}
+
printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
print $tr_post;
@@ -278,9 +312,14 @@
my $nav_fmt=qq{ %s };
+ if ($pager->current_page() > $pager->first_page) {
+ param('page', $pager->current_page - 1);
+ $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'<<');
+ }
+
if ($pager->previous_set) {
param('page', $pager->previous_set);
- $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'<<');
+ $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
}
@@ -297,13 +336,22 @@
if ($pager->next_set) {
param('page', $pager->next_set);
+ $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
+ }
+
+ if ($pager->current_page() < $pager->last_page) {
+ param('page', $pager->current_page + 1);
$nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'>>');
}
+ if ($config->{text}->{pages}) {
+ $nav_html = x($config->{text}->{pages})." ".$nav_html;
+ }
+
# end html table
print qq{
-Pages: $nav_html
+$nav_html
|
};