/[swish]/trunk/html/swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/html/swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 86 - (show annotations)
Mon Aug 30 11:16:39 2004 UTC (19 years, 8 months ago) by dpavlin
File size: 8518 byte(s)
better snippets

1 #!/usr/bin/perl -w
2
3 use strict;
4 use CGI qw/:standard -no_xhtml/;
5 use CGI::Carp qw(fatalsToBrowser);
6 use SWISH::API;
7 use XML::Simple;
8 use Lingua::Spelling::Alternative;
9 use Text::Iconv;
10 use Data::Pageset;
11
12
13 sub get_snippet {
14 my $context_chars = 100;
15
16 my $desc = shift || return '';
17 $desc = e($desc);
18
19 # test if $desc contains any of our query words
20 my @snips;
21
22 my @colors = qw{#ffff66 #a0ffff #99ff99 #ff9999 #ff66ff};
23
24 # construct regex
25 my $re = qq/^.*?(.{$context_chars}?)(\Q/ . join("|",@_) .
26 qq/\E)(.{$context_chars})/;
27
28 while ($desc =~ s/$re//si) {
29 my ($bef,$qm,$af) = ($1, $2, $3);
30
31 # no partial words...
32 $bef =~ s,^\S+\s+|\s+\S+$,,gs;
33 $af =~ s,^\S+\s+|\s+\S+$,,gs;
34
35 push @snips, "$bef $qm $af";
36 }
37
38 my $ellip = ' ... ';
39 my $snippet;
40
41 if (@snips) {
42 $snippet = $ellip. join($ellip, @snips) . $ellip;
43 } else {
44 return '';
45 }
46
47 # color offset
48 my $i = 0;
49
50 foreach my $w (@_) {
51 $snippet =~ s,(\Q$w\E),<span style="background: $colors[$i]; color:black;">$1</span>,gsi;
52 $i++;
53 $i = 0 if ($i > $#colors);
54 }
55
56 return $snippet;
57 }
58
59 # for pager
60 my $pages_per_set = 20;
61
62 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
63 my $config=XMLin(undef,
64 # keyattr => { label => "value" },
65 forcecontent => 0,
66 ForceArray => [ 'path' ],
67 );
68
69 my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
70 sub x {
71 return if (! defined $_[0]);
72 return $from_utf8->convert($_[0]);
73 }
74
75 # Escape <, >, & and ", and to produce valid XML
76 my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
77 my $escape_re = join '|' => keys %escape;
78 sub e {
79 my $out;
80 foreach my $v (@_) {
81 $v =~ s/($escape_re)/$escape{$1}/g;
82 $out .= $v;
83 }
84 return $out;
85 }
86
87 my @spellings;
88 # FIX: doesn't work very well
89 if ($config->{findaffix}) {
90 foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
91 next if (! -f $findaffix);
92 my $spelling_alt = new Lingua::Spelling::Alternative;
93 $spelling_alt->load_findaffix($findaffix);
94 push @spellings,$spelling_alt;
95 }
96 }
97 if ($config->{affix}) {
98 foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
99 next if (! -f $affix);
100 my $spelling_alt = new Lingua::Spelling::Alternative;
101 $spelling_alt->load_affix($affix);
102 push @spellings,$spelling_alt;
103 }
104 }
105
106 my $hits=0;
107 my $max_hits=param('max_hits') || x($config->{max_hits});
108
109 my %labels;
110 foreach (@{$config->{labels}->{label}}) {
111 next if (! $_->{value}); # skip unlimited (0)
112 $labels{$_->{value}} = x($_->{content});
113 }
114
115 my $path;
116 # limit to this path
117 $path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
118 my %path_label;
119 my @path_name;
120 foreach (@{$config->{paths}->{path}}) {
121 push @path_name,x($_->{limit});
122 $path_label{$_->{limit}} = x($_->{content});
123 }
124
125 my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
126
127 if ($config->{charset}) {
128 print header(-charset=>x($config->{charset}));
129 } else {
130 print header;
131 }
132 print start_html(-title=>x($config->{title})),start_form;
133 print x($config->{text}->{search});
134 print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
135 print x($config->{text}->{documents});
136 print textfield('search');
137 print submit(-value=> x($config->{text}->{submit}));
138 print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
139 print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
140 if (@path_name) {
141 print br,x($config->{text}->{limit});
142 print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
143 } elsif (param('path')) {
144 print hidden(-name=>'path',-values=>param('path'));
145 }
146 print end_form,hr;
147
148 if (param('search')) {
149
150 my $s;
151 # re-write query from +/- to and/and not
152
153 my @s_elem;
154
155 my $search = param('search');
156
157 # strip spaces
158 $search =~ s/^\s+//;
159 $search =~ s/\s+$//;
160 # fixup search string
161 $search=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
162 $search=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
163
164 # extract phrases and put them first
165 while ($search =~ s/\s*("[^"]+")\s*/ /) {
166 $s .= "$1 ";
167 push @s_elem, $1;
168 }
169
170 my %words;
171
172 foreach (split(/\s+/,$search)) {
173 if (m/^([+-])(\S+)/) {
174 $s.= ($s) ? "and " : "";
175 $s.="not " if ($1 eq "-");
176 if (@spellings && !param('no_affix')) {
177 my $w = $2; $w =~ s/[\*\s]+//g;
178 $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
179 my $or="";
180 foreach my $spelling_alt (@spellings) {
181 $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
182 $or = "or ";
183 }
184 } else {
185 $s.="$2* ";
186 }
187 push @s_elem, $2;
188 } else {
189 if (@spellings && !param('no_affix')) {
190 my $w = $_; $w =~ s/[\*\s]+//g;
191 my $or="";
192 foreach my $spelling_alt (@spellings) {
193 $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
194 $or = "or ";
195 }
196 } else {
197 $s.="$_* ";
198 }
199 push @s_elem, $_;
200 }
201 }
202
203 # fix multiple stars
204 $s=~s/\*\*+/*/g;
205
206 # limit to some path
207 $s = "swishdocpath=($path) and $s" if ($path);
208
209 my %params; # optional parametars for swish
210
211 # default format for output
212 my $hit_fmt = "<a href=\"%s\">%s</a> [%s]<br>\n";
213
214 if (@properties) {
215 $hit_fmt = x($config->{hit}) if (! param('no_properties'));
216 $params{properties} = \@properties;
217 } else {
218 $hit_fmt = x($config->{hit}) if (x($config->{hit}));
219 }
220
221 my $swish = SWISH::API->new($config->{index});
222 $swish->AbortLastError if $swish->Error;
223 my $results = $swish->Query($s);
224 my $hits = $results->Hits;
225
226
227 # build pager
228 my $current_page = param('page') || 1;
229
230 my $pager = Data::Pageset->new({
231 'total_entries' => $hits,
232 'entries_per_page' => $max_hits,
233 'current_page' => $current_page,
234 'pages_per_set' => $pages_per_set,
235 });
236
237 $results->SeekResult( $pager->first - 1 );
238
239 # get number of entries on this page
240 my $i = $pager->entries_on_this_page;
241
242 # print number of hits or error message
243 if ( !$hits ) {
244 printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
245 } else {
246 printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
247 }
248
249 my %path2title;
250 foreach my $p (@{$config->{path2title}->{path}}) {
251 $path2title{$p->{dir}} = $p->{content};
252 }
253
254 # output start of table
255 print qq{
256 <table border="0">
257 };
258 # html before and after each hit
259 my $tr_pre = qq{
260 <tr><td>
261 };
262 my $tr_post = qq{
263 </td></tr>
264 };
265
266 for(my $i=$pager->first; $i<=$pager->last; $i++) {
267
268 my $result = $results->NextResult;
269 last if (! $result);
270
271 my @arr;
272
273 foreach my $prop (@properties) {
274 if ($prop =~ m/swishdescription/) {
275 my $tmp = get_snippet(
276 $result->Property($prop),
277 @s_elem,
278 );
279
280 push @arr, $tmp;
281 } else {
282 push @arr, $result->Property($prop);
283 }
284 }
285
286 my $title = e($result->Property("swishtitle")) || 'untitled';
287 my $rank = $result->Property("swishrank");
288 my $host = $result->Property("swishdocpath");
289 $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
290
291 foreach my $p (keys %path2title) {
292 if ($host =~ m/$p/i) {
293 $title =~ s/$path2title{$p}\s*[:-]+\s*//;
294 $title = $path2title{$p}." :: ".$title;
295 last;
296 }
297 }
298
299 print $tr_pre,$i,". ";
300 # print collection name which is not link
301 if ($title =~ s/^(.+? :: )//) {
302 print $1;
303 }
304
305 printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
306 print $tr_post;
307
308 }
309
310 # pager navigation
311 my $nav_html;
312
313 my $nav_fmt=qq{ <a href="%s">%s</a> };
314
315 if ($pager->current_page() > $pager->first_page) {
316 param('page', $pager->current_page - 1);
317 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&lt;&lt;');
318 }
319
320 if ($pager->previous_set) {
321 param('page', $pager->previous_set);
322 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
323 }
324
325
326 foreach my $p (@{$pager->pages_in_set()}) {
327 next if ($p < 0);
328 # for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) {
329 if($p == $pager->current_page()) {
330 $nav_html .= "<b>$p</b> ";
331 } else {
332 param('page', $p);
333 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p);
334 }
335 }
336
337 if ($pager->next_set) {
338 param('page', $pager->next_set);
339 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
340 }
341
342 if ($pager->current_page() < $pager->last_page) {
343 param('page', $pager->current_page + 1);
344 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&gt;&gt;');
345 }
346
347 if ($config->{text}->{pages}) {
348 $nav_html = x($config->{text}->{pages})." ".$nav_html;
349 }
350
351 # end html table
352 print qq{
353 <tr><td>
354 $nav_html
355 </td></tr>
356 </table>
357 };
358
359
360
361 } else {
362 print p(x($config->{text}->{footer}));
363 }

Properties

Name Value
cvs2svn:cvs-rev 1.16
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26