/[swish]/trunk/html/swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/html/swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 88 - (show annotations)
Tue Aug 31 07:47:05 2004 UTC (19 years, 8 months ago) by dpavlin
File size: 9126 byte(s)
ignore not words (-computer) in queries when highliting

1 #!/usr/bin/perl -w
2
3 use strict;
4 use CGI qw/:standard -no_xhtml/;
5 use CGI::Carp qw(fatalsToBrowser);
6 use SWISH::API;
7 use XML::Simple;
8 use Lingua::Spelling::Alternative;
9 use Text::Iconv;
10 use Data::Pageset;
11
12 sub get_snippet {
13
14 # maximum length of context in characters
15 my $cc = 50;
16
17 my $desc = shift || return '';
18 $desc = e($desc);
19
20 # sort words from longer to shorter (for hilighting later)
21 my @words = sort { length($b) <=> length($a) } @_;
22
23 # colors to highlite
24 my @colors = qw{#ffff66 #a0ffff #99ff99 #ff9999 #ff66ff};
25
26 # construct regex
27 my $re = qq/^(.*?\\b)(/ . join('|', @words) . qq/)/;
28
29 my $ellip = ' ... ';
30 my $snippet = '';
31
32 #print "<ul>";
33
34 while ($desc =~ s/$re//si) {
35 my ($foo, $match) = ($1,$2);
36
37 #print "<br>desc: <small>$desc</small>\n";
38 #print "<br>foo: <small>$foo<b>$match</b></small>\n";
39
40 if (length($foo) < $cc * 2) {
41 $snippet .= $foo . $match;
42 } else {
43
44 if ($foo =~ m/^(.{0,$cc})(\s.*?\s|\s|)?(.{0,$cc})$/) {
45
46 # print "<li><small>$snippet</small><br>
47 # ",length($1),": <i>$1</i><br>
48 # ",length($2),": <span style=\"color:grey\">$2</span><br>
49 # ",length($3),": <i>$3</i><br>
50 # <b>$match</b>\n";
51
52 if ($snippet) {
53 $snippet .= $1 . $ellip . $3 . $match;
54 } else {
55 $snippet = $ellip . $3 . $match ;
56 }
57
58 } else {
59 # print "<li> <big>SKIP</big> $foo\n";
60 print STDERR "this shouldn't happen!\n";
61 }
62
63 }
64
65 }
66 #print "</ul>";
67
68 # color offset
69 my $i = 0;
70
71 foreach my $w (@words) {
72 $snippet =~ s,(\b\Q$w\E),<span style="background: $colors[$i]; color:black;">$1</span>,gsi;
73 $i++;
74 $i = 0 if ($i > $#colors);
75 }
76
77 $snippet .= $ellip if ($snippet);
78
79 return $snippet;
80 }
81
82 # for pager
83 my $pages_per_set = 20;
84
85 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
86 my $config=XMLin(undef,
87 # keyattr => { label => "value" },
88 forcecontent => 0,
89 ForceArray => [ 'path' ],
90 );
91
92 my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
93 sub x {
94 return if (! defined $_[0]);
95 return $from_utf8->convert($_[0]);
96 }
97
98 # Escape <, >, & and ", and to produce valid XML
99 my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
100 my $escape_re = join '|' => keys %escape;
101 sub e {
102 my $out;
103 foreach my $v (@_) {
104 $v =~ s/($escape_re)/$escape{$1}/g;
105 $out .= $v;
106 }
107 return $out;
108 }
109
110 my @spellings;
111 # FIX: doesn't work very well
112 if ($config->{findaffix}) {
113 foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
114 next if (! -f $findaffix);
115 my $spelling_alt = new Lingua::Spelling::Alternative;
116 $spelling_alt->load_findaffix($findaffix);
117 push @spellings,$spelling_alt;
118 }
119 }
120 if ($config->{affix}) {
121 foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
122 next if (! -f $affix);
123 my $spelling_alt = new Lingua::Spelling::Alternative;
124 $spelling_alt->load_affix($affix);
125 push @spellings,$spelling_alt;
126 }
127 }
128
129 my $hits=0;
130 my $max_hits=param('max_hits') || x($config->{max_hits});
131
132 my %labels;
133 foreach (@{$config->{labels}->{label}}) {
134 next if (! $_->{value}); # skip unlimited (0)
135 $labels{$_->{value}} = x($_->{content});
136 }
137
138 my $path;
139 # limit to this path
140 $path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
141 my %path_label;
142 my @path_name;
143 foreach (@{$config->{paths}->{path}}) {
144 push @path_name,x($_->{limit});
145 $path_label{$_->{limit}} = x($_->{content});
146 }
147
148 my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
149
150 if ($config->{charset}) {
151 print header(-charset=>x($config->{charset}));
152 } else {
153 print header;
154 }
155 print start_html(-title=>x($config->{title})),start_form;
156 print x($config->{text}->{search});
157 print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
158 print x($config->{text}->{documents});
159 print textfield('search');
160 print submit(-value=> x($config->{text}->{submit}));
161 print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
162 print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
163 if (@path_name) {
164 print br,x($config->{text}->{limit});
165 print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
166 } elsif (param('path')) {
167 print hidden(-name=>'path',-values=>param('path'));
168 }
169 print end_form,hr;
170
171 if (param('search')) {
172
173 my $s;
174 # re-write query from +/- to and/and not
175
176 my @s_highlite;
177
178 my $search = param('search');
179
180 # strip spaces
181 $search =~ s/^\s+//;
182 $search =~ s/\s+$//;
183 # fixup search string
184 $search=~tr/¹ð¾èæ©Ð®ÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
185 $search=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
186
187 # extract phrases and put them first
188 while ($search =~ s/\s*("[^"]+")\s*/ /) {
189 $s .= "$1 ";
190 push @s_highlite, $1;
191 }
192
193 my %words;
194
195 foreach (split(/\s+/,$search)) {
196 if (m/^([+-])(\S+)/) {
197 $s.= ($s) ? "and " : "";
198 $s.="not " if ($1 eq "-");
199 if (@spellings && !param('no_affix')) {
200 my $w = $2; $w =~ s/[\*\s]+//g;
201 $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
202 my $or="";
203 foreach my $spelling_alt (@spellings) {
204 $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
205 $or = "or ";
206 }
207 } else {
208 $s.="$2* ";
209 }
210 push @s_highlite, $2 if ($1 ne "-");
211 } else {
212 if (@spellings && !param('no_affix')) {
213 my $w = $_; $w =~ s/[\*\s]+//g;
214 my $or="";
215 foreach my $spelling_alt (@spellings) {
216 $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
217 $or = "or ";
218 }
219 } else {
220 $s.="$_* ";
221 }
222 push @s_highlite, $_;
223 }
224 }
225
226 # fix multiple stars
227 $s=~s/\*\*+/*/g;
228
229 # limit to some path
230 $s = "swishdocpath=($path) and $s" if ($path);
231
232 my %params; # optional parametars for swish
233
234 # default format for output
235 my $hit_fmt = "<a href=\"%s\">%s</a> [%s]<br>\n";
236
237 if (@properties) {
238 $hit_fmt = x($config->{hit}) if (! param('no_properties'));
239 $params{properties} = \@properties;
240 } else {
241 $hit_fmt = x($config->{hit}) if (x($config->{hit}));
242 }
243
244 my $swish = SWISH::API->new($config->{index});
245 $swish->AbortLastError if $swish->Error;
246 my $results = $swish->Query($s);
247 my $hits = $results->Hits;
248
249
250 # build pager
251 my $current_page = param('page') || 1;
252
253 my $pager = Data::Pageset->new({
254 'total_entries' => $hits,
255 'entries_per_page' => $max_hits,
256 'current_page' => $current_page,
257 'pages_per_set' => $pages_per_set,
258 });
259
260 $results->SeekResult( $pager->first - 1 );
261
262 # get number of entries on this page
263 my $i = $pager->entries_on_this_page;
264
265 # print number of hits or error message
266 if ( !$hits ) {
267 printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
268 } else {
269 printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
270 }
271
272 my %path2title;
273 foreach my $p (@{$config->{path2title}->{path}}) {
274 $path2title{$p->{dir}} = $p->{content};
275 }
276
277 # output start of table
278 print qq{
279 <table border="0">
280 };
281 # html before and after each hit
282 my $tr_pre = qq{
283 <tr><td>
284 };
285 my $tr_post = qq{
286 </td></tr>
287 };
288
289 for(my $i=$pager->first; $i<=$pager->last; $i++) {
290
291 my $result = $results->NextResult;
292 last if (! $result);
293
294 my @arr;
295
296 foreach my $prop (@properties) {
297 if ($prop =~ m/swishdescription/) {
298 my $tmp = get_snippet(
299 $result->Property($prop),
300 @s_highlite,
301 );
302
303 push @arr, $tmp;
304 } else {
305 push @arr, $result->Property($prop);
306 }
307 }
308
309 my $title = e($result->Property("swishtitle")) || 'untitled';
310 my $rank = $result->Property("swishrank");
311 my $host = $result->Property("swishdocpath");
312 $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
313
314 foreach my $p (keys %path2title) {
315 if ($host =~ m/$p/i) {
316 $title =~ s/$path2title{$p}\s*[:-]+\s*//;
317 $title = $path2title{$p}." :: ".$title;
318 last;
319 }
320 }
321
322 print $tr_pre,$i,". ";
323 # print collection name which is not link
324 if ($title =~ s/^(.+? :: )//) {
325 print $1;
326 }
327
328 printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
329 print $tr_post;
330
331 }
332
333 # pager navigation
334 my $nav_html;
335
336 my $nav_fmt=qq{ <a href="%s">%s</a> };
337
338 if ($pager->current_page() > $pager->first_page) {
339 param('page', $pager->current_page - 1);
340 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&lt;&lt;');
341 }
342
343 if ($pager->previous_set) {
344 param('page', $pager->previous_set);
345 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
346 }
347
348
349 foreach my $p (@{$pager->pages_in_set()}) {
350 next if ($p < 0);
351 # for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) {
352 if($p == $pager->current_page()) {
353 $nav_html .= "<b>$p</b> ";
354 } else {
355 param('page', $p);
356 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p);
357 }
358 }
359
360 if ($pager->next_set) {
361 param('page', $pager->next_set);
362 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
363 }
364
365 if ($pager->current_page() < $pager->last_page) {
366 param('page', $pager->current_page + 1);
367 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&gt;&gt;');
368 }
369
370 if ($config->{text}->{pages}) {
371 $nav_html = x($config->{text}->{pages})." ".$nav_html;
372 }
373
374 # end html table
375 print qq{
376 <tr><td>
377 $nav_html
378 </td></tr>
379 </table>
380 };
381
382
383
384 } else {
385 print p(x($config->{text}->{footer}));
386 }

Properties

Name Value
cvs2svn:cvs-rev 1.16
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26