/[swish]/trunk/html/swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/html/swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 88 - (hide annotations)
Tue Aug 31 07:47:05 2004 UTC (19 years, 8 months ago) by dpavlin
File size: 9126 byte(s)
ignore not words (-computer) in queries when highliting

1 dpavlin 8 #!/usr/bin/perl -w
2    
3     use strict;
4     use CGI qw/:standard -no_xhtml/;
5     use CGI::Carp qw(fatalsToBrowser);
6 dpavlin 59 use SWISH::API;
7 dpavlin 8 use XML::Simple;
8     use Lingua::Spelling::Alternative;
9     use Text::Iconv;
10 dpavlin 59 use Data::Pageset;
11 dpavlin 8
12 dpavlin 81 sub get_snippet {
13    
14 dpavlin 87 # maximum length of context in characters
15     my $cc = 50;
16    
17 dpavlin 81 my $desc = shift || return '';
18 dpavlin 83 $desc = e($desc);
19 dpavlin 86
20 dpavlin 87 # sort words from longer to shorter (for hilighting later)
21     my @words = sort { length($b) <=> length($a) } @_;
22 dpavlin 81
23 dpavlin 87 # colors to highlite
24 dpavlin 81 my @colors = qw{#ffff66 #a0ffff #99ff99 #ff9999 #ff66ff};
25    
26 dpavlin 86 # construct regex
27 dpavlin 87 my $re = qq/^(.*?\\b)(/ . join('|', @words) . qq/)/;
28 dpavlin 81
29 dpavlin 87 my $ellip = ' ... ';
30     my $snippet = '';
31    
32     #print "<ul>";
33    
34 dpavlin 86 while ($desc =~ s/$re//si) {
35 dpavlin 87 my ($foo, $match) = ($1,$2);
36 dpavlin 81
37 dpavlin 87 #print "<br>desc: <small>$desc</small>\n";
38     #print "<br>foo: <small>$foo<b>$match</b></small>\n";
39 dpavlin 86
40 dpavlin 87 if (length($foo) < $cc * 2) {
41     $snippet .= $foo . $match;
42     } else {
43    
44     if ($foo =~ m/^(.{0,$cc})(\s.*?\s|\s|)?(.{0,$cc})$/) {
45    
46     # print "<li><small>$snippet</small><br>
47     # ",length($1),": <i>$1</i><br>
48     # ",length($2),": <span style=\"color:grey\">$2</span><br>
49     # ",length($3),": <i>$3</i><br>
50     # <b>$match</b>\n";
51    
52     if ($snippet) {
53     $snippet .= $1 . $ellip . $3 . $match;
54     } else {
55     $snippet = $ellip . $3 . $match ;
56     }
57    
58     } else {
59     # print "<li> <big>SKIP</big> $foo\n";
60     print STDERR "this shouldn't happen!\n";
61     }
62    
63     }
64    
65 dpavlin 86 }
66 dpavlin 87 #print "</ul>";
67 dpavlin 86
68     # color offset
69     my $i = 0;
70    
71 dpavlin 87 foreach my $w (@words) {
72     $snippet =~ s,(\b\Q$w\E),<span style="background: $colors[$i]; color:black;">$1</span>,gsi;
73 dpavlin 86 $i++;
74     $i = 0 if ($i > $#colors);
75     }
76    
77 dpavlin 87 $snippet .= $ellip if ($snippet);
78    
79 dpavlin 81 return $snippet;
80     }
81    
82 dpavlin 59 # for pager
83     my $pages_per_set = 20;
84    
85 dpavlin 8 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
86     my $config=XMLin(undef,
87     # keyattr => { label => "value" },
88     forcecontent => 0,
89 dpavlin 80 ForceArray => [ 'path' ],
90 dpavlin 8 );
91    
92 dpavlin 18 my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
93     sub x {
94 dpavlin 62 return if (! defined $_[0]);
95 dpavlin 18 return $from_utf8->convert($_[0]);
96     }
97    
98 dpavlin 29 # Escape <, >, & and ", and to produce valid XML
99     my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
100     my $escape_re = join '|' => keys %escape;
101     sub e {
102     my $out;
103     foreach my $v (@_) {
104     $v =~ s/($escape_re)/$escape{$1}/g;
105     $out .= $v;
106     }
107     return $out;
108     }
109 dpavlin 8
110 dpavlin 39 my @spellings;
111 dpavlin 8 # FIX: doesn't work very well
112 dpavlin 16 if ($config->{findaffix}) {
113 dpavlin 41 foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
114 dpavlin 80 next if (! -f $findaffix);
115 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
116     $spelling_alt->load_findaffix($findaffix);
117     push @spellings,$spelling_alt;
118     }
119 dpavlin 8 }
120 dpavlin 16 if ($config->{affix}) {
121 dpavlin 41 foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
122 dpavlin 80 next if (! -f $affix);
123 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
124     $spelling_alt->load_affix($affix);
125     push @spellings,$spelling_alt;
126     }
127 dpavlin 16 }
128 dpavlin 8
129     my $hits=0;
130 dpavlin 59 my $max_hits=param('max_hits') || x($config->{max_hits});
131 dpavlin 8
132     my %labels;
133     foreach (@{$config->{labels}->{label}}) {
134 dpavlin 59 next if (! $_->{value}); # skip unlimited (0)
135 dpavlin 18 $labels{$_->{value}} = x($_->{content});
136 dpavlin 8 }
137    
138 dpavlin 81 my $path;
139     # limit to this path
140     $path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
141 dpavlin 23 my %path_label;
142     my @path_name;
143     foreach (@{$config->{paths}->{path}}) {
144     push @path_name,x($_->{limit});
145     $path_label{$_->{limit}} = x($_->{content});
146     }
147    
148 dpavlin 62 my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
149 dpavlin 32
150 dpavlin 16 if ($config->{charset}) {
151 dpavlin 18 print header(-charset=>x($config->{charset}));
152 dpavlin 16 } else {
153     print header;
154     }
155 dpavlin 18 print start_html(-title=>x($config->{title})),start_form;
156     print x($config->{text}->{search});
157 dpavlin 16 print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
158 dpavlin 18 print x($config->{text}->{documents});
159 dpavlin 16 print textfield('search');
160 dpavlin 18 print submit(-value=> x($config->{text}->{submit}));
161 dpavlin 39 print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
162 dpavlin 32 print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
163 dpavlin 23 if (@path_name) {
164     print br,x($config->{text}->{limit});
165     print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
166 dpavlin 81 } elsif (param('path')) {
167     print hidden(-name=>'path',-values=>param('path'));
168 dpavlin 23 }
169 dpavlin 8 print end_form,hr;
170    
171     if (param('search')) {
172    
173     my $s;
174     # re-write query from +/- to and/and not
175    
176 dpavlin 88 my @s_highlite;
177 dpavlin 86
178 dpavlin 8 my $search = param('search');
179 dpavlin 86
180     # strip spaces
181     $search =~ s/^\s+//;
182     $search =~ s/\s+$//;
183     # fixup search string
184 dpavlin 87 $search=~tr/¹ð¾èæ©Ð®ÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
185 dpavlin 86 $search=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
186    
187     # extract phrases and put them first
188 dpavlin 8 while ($search =~ s/\s*("[^"]+")\s*/ /) {
189     $s .= "$1 ";
190 dpavlin 88 push @s_highlite, $1;
191 dpavlin 8 }
192    
193 dpavlin 41 my %words;
194    
195 dpavlin 8 foreach (split(/\s+/,$search)) {
196     if (m/^([+-])(\S+)/) {
197     $s.= ($s) ? "and " : "";
198     $s.="not " if ($1 eq "-");
199 dpavlin 39 if (@spellings && !param('no_affix')) {
200 dpavlin 8 my $w = $2; $w =~ s/[\*\s]+//g;
201     $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
202 dpavlin 39 my $or="";
203     foreach my $spelling_alt (@spellings) {
204     $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
205     $or = "or ";
206     }
207 dpavlin 8 } else {
208     $s.="$2* ";
209     }
210 dpavlin 88 push @s_highlite, $2 if ($1 ne "-");
211 dpavlin 8 } else {
212 dpavlin 39 if (@spellings && !param('no_affix')) {
213 dpavlin 8 my $w = $_; $w =~ s/[\*\s]+//g;
214 dpavlin 39 my $or="";
215     foreach my $spelling_alt (@spellings) {
216     $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
217     $or = "or ";
218     }
219 dpavlin 8 } else {
220     $s.="$_* ";
221     }
222 dpavlin 88 push @s_highlite, $_;
223 dpavlin 8 }
224     }
225    
226 dpavlin 86 # fix multiple stars
227 dpavlin 8 $s=~s/\*\*+/*/g;
228    
229 dpavlin 23 # limit to some path
230 dpavlin 81 $s = "swishdocpath=($path) and $s" if ($path);
231 dpavlin 23
232 dpavlin 22 my %params; # optional parametars for swish
233    
234 dpavlin 32 # default format for output
235     my $hit_fmt = "<a href=\"%s\">%s</a> [%s]<br>\n";
236 dpavlin 21
237 dpavlin 32 if (@properties) {
238     $hit_fmt = x($config->{hit}) if (! param('no_properties'));
239 dpavlin 39 $params{properties} = \@properties;
240 dpavlin 32 } else {
241     $hit_fmt = x($config->{hit}) if (x($config->{hit}));
242     }
243    
244 dpavlin 59 my $swish = SWISH::API->new($config->{index});
245     $swish->AbortLastError if $swish->Error;
246     my $results = $swish->Query($s);
247     my $hits = $results->Hits;
248    
249    
250     # build pager
251     my $current_page = param('page') || 1;
252    
253     my $pager = Data::Pageset->new({
254     'total_entries' => $hits,
255     'entries_per_page' => $max_hits,
256     'current_page' => $current_page,
257     'pages_per_set' => $pages_per_set,
258     });
259    
260     $results->SeekResult( $pager->first - 1 );
261    
262     # get number of entries on this page
263     my $i = $pager->entries_on_this_page;
264    
265     # print number of hits or error message
266     if ( !$hits ) {
267     printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
268     } else {
269     printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
270 dpavlin 57 }
271    
272 dpavlin 80 my %path2title;
273     foreach my $p (@{$config->{path2title}->{path}}) {
274     $path2title{$p->{dir}} = $p->{content};
275     }
276 dpavlin 8
277 dpavlin 83 # output start of table
278     print qq{
279     <table border="0">
280     };
281     # html before and after each hit
282     my $tr_pre = qq{
283     <tr><td>
284     };
285     my $tr_post = qq{
286     </td></tr>
287     };
288    
289 dpavlin 59 for(my $i=$pager->first; $i<=$pager->last; $i++) {
290    
291     my $result = $results->NextResult;
292     last if (! $result);
293    
294     my @arr;
295 dpavlin 81
296 dpavlin 59 foreach my $prop (@properties) {
297     if ($prop =~ m/swishdescription/) {
298 dpavlin 81 my $tmp = get_snippet(
299     $result->Property($prop),
300 dpavlin 88 @s_highlite,
301 dpavlin 81 );
302    
303 dpavlin 59 push @arr, $tmp;
304 dpavlin 18 } else {
305 dpavlin 59 push @arr, $result->Property($prop);
306 dpavlin 18 }
307 dpavlin 59 }
308 dpavlin 18
309 dpavlin 73 my $title = e($result->Property("swishtitle")) || 'untitled';
310     my $rank = $result->Property("swishrank");
311     my $host = $result->Property("swishdocpath");
312     $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
313 dpavlin 80
314     foreach my $p (keys %path2title) {
315     if ($host =~ m/$p/i) {
316     $title =~ s/$path2title{$p}\s*[:-]+\s*//;
317     $title = $path2title{$p}." :: ".$title;
318     last;
319     }
320     }
321    
322 dpavlin 59 print $tr_pre,$i,". ";
323 dpavlin 73 # print collection name which is not link
324     if ($title =~ s/^(.+? :: )//) {
325     print $1;
326 dpavlin 59 }
327 dpavlin 80
328 dpavlin 73 printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
329 dpavlin 59 print $tr_post;
330 dpavlin 8
331 dpavlin 59 }
332 dpavlin 8
333 dpavlin 59 # pager navigation
334     my $nav_html;
335 dpavlin 8
336 dpavlin 59 my $nav_fmt=qq{ <a href="%s">%s</a> };
337 dpavlin 8
338 dpavlin 75 if ($pager->current_page() > $pager->first_page) {
339     param('page', $pager->current_page - 1);
340     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&lt;&lt;');
341     }
342    
343 dpavlin 59 if ($pager->previous_set) {
344     param('page', $pager->previous_set);
345 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
346 dpavlin 8 }
347 dpavlin 59
348    
349     foreach my $p (@{$pager->pages_in_set()}) {
350 dpavlin 60 next if ($p < 0);
351 dpavlin 59 # for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) {
352     if($p == $pager->current_page()) {
353     $nav_html .= "<b>$p</b> ";
354     } else {
355     param('page', $p);
356     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p);
357     }
358     }
359    
360     if ($pager->next_set) {
361     param('page', $pager->next_set);
362 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
363     }
364    
365     if ($pager->current_page() < $pager->last_page) {
366     param('page', $pager->current_page + 1);
367 dpavlin 59 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&gt;&gt;');
368     }
369    
370 dpavlin 76 if ($config->{text}->{pages}) {
371     $nav_html = x($config->{text}->{pages})." ".$nav_html;
372     }
373    
374 dpavlin 59 # end html table
375     print qq{
376     <tr><td>
377 dpavlin 76 $nav_html
378 dpavlin 59 </td></tr>
379     </table>
380     };
381    
382    
383    
384 dpavlin 8 } else {
385 dpavlin 18 print p(x($config->{text}->{footer}));
386 dpavlin 8 }

Properties

Name Value
cvs2svn:cvs-rev 1.16
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26