--- trunk/html/swish.cgi 2004/08/30 11:16:39 86
+++ trunk/html/swish.cgi 2004/08/30 16:59:17 87
@@ -9,50 +9,73 @@
use Text::Iconv;
use Data::Pageset;
-
sub get_snippet {
- my $context_chars = 100;
+
+ # maximum length of context in characters
+ my $cc = 50;
my $desc = shift || return '';
$desc = e($desc);
- # test if $desc contains any of our query words
- my @snips;
+ # sort words from longer to shorter (for hilighting later)
+ my @words = sort { length($b) <=> length($a) } @_;
+ # colors to highlite
my @colors = qw{#ffff66 #a0ffff #99ff99 #ff9999 #ff66ff};
# construct regex
- my $re = qq/^.*?(.{$context_chars}?)(\Q/ . join("|",@_) .
- qq/\E)(.{$context_chars})/;
+ my $re = qq/^(.*?\\b)(/ . join('|', @words) . qq/)/;
+
+ my $ellip = ' ... ';
+ my $snippet = '';
+
+#print "
";
while ($desc =~ s/$re//si) {
- my ($bef,$qm,$af) = ($1, $2, $3);
-
- # no partial words...
- $bef =~ s,^\S+\s+|\s+\S+$,,gs;
- $af =~ s,^\S+\s+|\s+\S+$,,gs;
+ my ($foo, $match) = ($1,$2);
- push @snips, "$bef $qm $af";
- }
+#print "
desc: $desc\n";
+#print "
foo: $foo$match\n";
+
+ if (length($foo) < $cc * 2) {
+ $snippet .= $foo . $match;
+ } else {
+
+ if ($foo =~ m/^(.{0,$cc})(\s.*?\s|\s|)?(.{0,$cc})$/) {
+
+# print "- $snippet
+# ",length($1),": $1
+# ",length($2),": $2
+# ",length($3),": $3
+# $match\n";
+
+ if ($snippet) {
+ $snippet .= $1 . $ellip . $3 . $match;
+ } else {
+ $snippet = $ellip . $3 . $match ;
+ }
+
+ } else {
+# print " - SKIP $foo\n";
+ print STDERR "this shouldn't happen!\n";
+ }
+
+ }
- my $ellip = ' ... ';
- my $snippet;
-
- if (@snips) {
- $snippet = $ellip. join($ellip, @snips) . $ellip;
- } else {
- return '';
}
+#print "
";
# color offset
my $i = 0;
- foreach my $w (@_) {
- $snippet =~ s,(\Q$w\E),$1,gsi;
+ foreach my $w (@words) {
+ $snippet =~ s,(\b\Q$w\E),$1,gsi;
$i++;
$i = 0 if ($i > $#colors);
}
+ $snippet .= $ellip if ($snippet);
+
return $snippet;
}
@@ -158,7 +181,7 @@
$search =~ s/^\s+//;
$search =~ s/\s+$//;
# fixup search string
- $search=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
+ $search=~tr/¹ð¾èæ©Ð®ÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
$search=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
# extract phrases and put them first