1 |
dpavlin |
89 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
|
|
package FormatResult; |
4 |
|
|
|
5 |
|
|
use strict; |
6 |
|
|
|
7 |
|
|
=head1 NAME |
8 |
|
|
|
9 |
|
|
FormatResult - functions to pretty print results from search engine |
10 |
|
|
|
11 |
|
|
=head1 SYNOPSIS |
12 |
|
|
|
13 |
|
|
print FormatResult::get_snippet($text, @words); |
14 |
|
|
|
15 |
|
|
print FormatResult::highlite_words(\$text, \@words); |
16 |
|
|
|
17 |
|
|
=head1 DESCRIPTION |
18 |
|
|
|
19 |
|
|
Big brown hack in current stage. |
20 |
|
|
|
21 |
|
|
=head2 get_snippet |
22 |
|
|
|
23 |
|
|
FormatResult::context_chars = 20; |
24 |
|
|
my $text = FormatResult::get_snippet($text, @words); |
25 |
|
|
|
26 |
|
|
This functions will return snippet with words in it, showing |
27 |
|
|
C<FormatResult::context_chars> characters around each searched word. |
28 |
|
|
It's clever enough to find word delimiters and generally do the right thing. |
29 |
|
|
|
30 |
|
|
It will call C<highlite_words> automatically. |
31 |
|
|
|
32 |
|
|
=cut |
33 |
|
|
|
34 |
|
|
# maximum length of context in characters |
35 |
|
|
my $context_chars = 50; |
36 |
|
|
|
37 |
|
|
sub get_snippet { |
38 |
|
|
|
39 |
|
|
my $desc = shift || return ''; |
40 |
|
|
|
41 |
|
|
# construct regex |
42 |
|
|
my $re = qq/^(.*?\\b)(/ . join('|', @_) . qq/)/; |
43 |
|
|
|
44 |
|
|
my $ellip = ' ... '; |
45 |
|
|
my $snippet = ''; |
46 |
|
|
|
47 |
|
|
#print "<ul>"; |
48 |
|
|
|
49 |
|
|
while ($desc =~ s/$re//si) { |
50 |
|
|
my ($foo, $match) = ($1,$2); |
51 |
|
|
|
52 |
|
|
#print "<br>desc: <small>$desc</small>\n"; |
53 |
|
|
#print "<br>foo: <small>$foo<b>$match</b></small>\n"; |
54 |
|
|
|
55 |
|
|
if (length($foo) < $context_chars * 2) { |
56 |
|
|
$snippet .= $foo . $match; |
57 |
|
|
} else { |
58 |
|
|
|
59 |
|
|
if ($foo =~ m/^(.{0,$context_chars})(\s.*?\s|\s|)?(.{0,$context_chars})$/) { |
60 |
|
|
|
61 |
|
|
# print "<li><small>$snippet</small><br> |
62 |
|
|
# ",length($1),": <i>$1</i><br> |
63 |
|
|
# ",length($2),": <span style=\"color:grey\">$2</span><br> |
64 |
|
|
# ",length($3),": <i>$3</i><br> |
65 |
|
|
# <b>$match</b>\n"; |
66 |
|
|
|
67 |
|
|
if ($snippet) { |
68 |
|
|
$snippet .= $1 . $ellip . $3 . $match; |
69 |
|
|
} else { |
70 |
|
|
$snippet = $ellip . $3 . $match ; |
71 |
|
|
} |
72 |
|
|
|
73 |
|
|
} else { |
74 |
|
|
# print "<li> <big>SKIP</big> $foo\n"; |
75 |
|
|
print STDERR "this shouldn't happen!\n"; |
76 |
|
|
} |
77 |
|
|
|
78 |
|
|
} |
79 |
|
|
|
80 |
|
|
} |
81 |
|
|
#print "</ul>"; |
82 |
|
|
|
83 |
|
|
$snippet .= $ellip if ($snippet); |
84 |
|
|
|
85 |
|
|
return highlite_words(\$snippet,\@_); |
86 |
|
|
} |
87 |
|
|
|
88 |
|
|
=head2 highlite_words |
89 |
|
|
|
90 |
|
|
my $text = FormatResult::highlite_words(\$text, \@words); |
91 |
|
|
|
92 |
|
|
This is stand-alone function which does highlite. It cycle colors |
93 |
|
|
and generate html. Have in mind that it takes ref arguments. |
94 |
|
|
|
95 |
dpavlin |
90 |
C<@words> can also be array of arrays if you have alternative spellings |
96 |
|
|
for each word (e.g. using C<Lingua::Spelling::Alternative>). |
97 |
|
|
|
98 |
dpavlin |
89 |
=cut |
99 |
|
|
|
100 |
|
|
sub highlite_words { |
101 |
|
|
my ($snip_ref,$words_ref) = @_; |
102 |
|
|
|
103 |
|
|
return '' if (! $snip_ref || ! $words_ref); |
104 |
|
|
|
105 |
|
|
# sort words from longer to shorter (for hilighting later) |
106 |
|
|
my @words = sort { length($b) <=> length($a) } @{$words_ref}; |
107 |
|
|
|
108 |
|
|
# colors to highlite |
109 |
dpavlin |
91 |
my @colors = ('#ffff66','#a0ffff','#99ff99','#ff9999','#ff66ff'); |
110 |
dpavlin |
89 |
|
111 |
|
|
# color offset |
112 |
|
|
my $i = 0; |
113 |
|
|
|
114 |
|
|
foreach my $w (@words) { |
115 |
dpavlin |
90 |
if ($w =~ m/ARRAY/) { |
116 |
|
|
foreach my $ww (sort { length($b) <=> length($a) } @{$w}) { |
117 |
dpavlin |
107 |
$$snip_ref =~ s,(\b\Q$ww\E\w*),<span style="background: $colors[$i]; color:black;">$1</span>,gsi; |
118 |
dpavlin |
90 |
} |
119 |
|
|
} else { |
120 |
dpavlin |
107 |
$$snip_ref =~ s,(\b\Q$w\E\w*),<span style="background: $colors[$i]; color:black;">$1</span>,gsi; |
121 |
dpavlin |
90 |
} |
122 |
dpavlin |
89 |
$i++; |
123 |
|
|
$i = 0 if ($i > $#colors); |
124 |
|
|
} |
125 |
|
|
|
126 |
|
|
return $$snip_ref; |
127 |
|
|
} |
128 |
|
|
|
129 |
|
|
=head1 AUTHOR |
130 |
|
|
|
131 |
|
|
Dobrica Pavlinusic C<dpavlin@rot13.org>, L<http://www.rot13.org/~dpavlin/> |
132 |
|
|
|
133 |
|
|
=cut |
134 |
|
|
|
135 |
|
|
1; |