/[swish]/trunk/html/swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/html/swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 86 - (hide annotations)
Mon Aug 30 11:16:39 2004 UTC (19 years, 8 months ago) by dpavlin
File size: 8518 byte(s)
better snippets

1 dpavlin 8 #!/usr/bin/perl -w
2    
3     use strict;
4     use CGI qw/:standard -no_xhtml/;
5     use CGI::Carp qw(fatalsToBrowser);
6 dpavlin 59 use SWISH::API;
7 dpavlin 8 use XML::Simple;
8     use Lingua::Spelling::Alternative;
9     use Text::Iconv;
10 dpavlin 59 use Data::Pageset;
11 dpavlin 8
12 dpavlin 81
13     sub get_snippet {
14     my $context_chars = 100;
15    
16     my $desc = shift || return '';
17 dpavlin 83 $desc = e($desc);
18 dpavlin 86
19 dpavlin 81 # test if $desc contains any of our query words
20     my @snips;
21    
22     my @colors = qw{#ffff66 #a0ffff #99ff99 #ff9999 #ff66ff};
23    
24 dpavlin 86 # construct regex
25     my $re = qq/^.*?(.{$context_chars}?)(\Q/ . join("|",@_) .
26     qq/\E)(.{$context_chars})/;
27 dpavlin 81
28 dpavlin 86 while ($desc =~ s/$re//si) {
29     my ($bef,$qm,$af) = ($1, $2, $3);
30 dpavlin 81
31 dpavlin 86 # no partial words...
32     $bef =~ s,^\S+\s+|\s+\S+$,,gs;
33     $af =~ s,^\S+\s+|\s+\S+$,,gs;
34 dpavlin 81
35 dpavlin 86 push @snips, "$bef $qm $af";
36 dpavlin 81 }
37 dpavlin 86
38 dpavlin 81 my $ellip = ' ... ';
39 dpavlin 86 my $snippet;
40    
41     if (@snips) {
42     $snippet = $ellip. join($ellip, @snips) . $ellip;
43     } else {
44     return '';
45     }
46    
47     # color offset
48     my $i = 0;
49    
50     foreach my $w (@_) {
51     $snippet =~ s,(\Q$w\E),<span style="background: $colors[$i]; color:black;">$1</span>,gsi;
52     $i++;
53     $i = 0 if ($i > $#colors);
54     }
55    
56 dpavlin 81 return $snippet;
57     }
58    
59 dpavlin 59 # for pager
60     my $pages_per_set = 20;
61    
62 dpavlin 8 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
63     my $config=XMLin(undef,
64     # keyattr => { label => "value" },
65     forcecontent => 0,
66 dpavlin 80 ForceArray => [ 'path' ],
67 dpavlin 8 );
68    
69 dpavlin 18 my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
70     sub x {
71 dpavlin 62 return if (! defined $_[0]);
72 dpavlin 18 return $from_utf8->convert($_[0]);
73     }
74    
75 dpavlin 29 # Escape <, >, & and ", and to produce valid XML
76     my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
77     my $escape_re = join '|' => keys %escape;
78     sub e {
79     my $out;
80     foreach my $v (@_) {
81     $v =~ s/($escape_re)/$escape{$1}/g;
82     $out .= $v;
83     }
84     return $out;
85     }
86 dpavlin 8
87 dpavlin 39 my @spellings;
88 dpavlin 8 # FIX: doesn't work very well
89 dpavlin 16 if ($config->{findaffix}) {
90 dpavlin 41 foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
91 dpavlin 80 next if (! -f $findaffix);
92 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
93     $spelling_alt->load_findaffix($findaffix);
94     push @spellings,$spelling_alt;
95     }
96 dpavlin 8 }
97 dpavlin 16 if ($config->{affix}) {
98 dpavlin 41 foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
99 dpavlin 80 next if (! -f $affix);
100 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
101     $spelling_alt->load_affix($affix);
102     push @spellings,$spelling_alt;
103     }
104 dpavlin 16 }
105 dpavlin 8
106     my $hits=0;
107 dpavlin 59 my $max_hits=param('max_hits') || x($config->{max_hits});
108 dpavlin 8
109     my %labels;
110     foreach (@{$config->{labels}->{label}}) {
111 dpavlin 59 next if (! $_->{value}); # skip unlimited (0)
112 dpavlin 18 $labels{$_->{value}} = x($_->{content});
113 dpavlin 8 }
114    
115 dpavlin 81 my $path;
116     # limit to this path
117     $path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
118 dpavlin 23 my %path_label;
119     my @path_name;
120     foreach (@{$config->{paths}->{path}}) {
121     push @path_name,x($_->{limit});
122     $path_label{$_->{limit}} = x($_->{content});
123     }
124    
125 dpavlin 62 my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
126 dpavlin 32
127 dpavlin 16 if ($config->{charset}) {
128 dpavlin 18 print header(-charset=>x($config->{charset}));
129 dpavlin 16 } else {
130     print header;
131     }
132 dpavlin 18 print start_html(-title=>x($config->{title})),start_form;
133     print x($config->{text}->{search});
134 dpavlin 16 print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
135 dpavlin 18 print x($config->{text}->{documents});
136 dpavlin 16 print textfield('search');
137 dpavlin 18 print submit(-value=> x($config->{text}->{submit}));
138 dpavlin 39 print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
139 dpavlin 32 print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
140 dpavlin 23 if (@path_name) {
141     print br,x($config->{text}->{limit});
142     print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
143 dpavlin 81 } elsif (param('path')) {
144     print hidden(-name=>'path',-values=>param('path'));
145 dpavlin 23 }
146 dpavlin 8 print end_form,hr;
147    
148     if (param('search')) {
149    
150     my $s;
151     # re-write query from +/- to and/and not
152    
153 dpavlin 86 my @s_elem;
154    
155 dpavlin 8 my $search = param('search');
156 dpavlin 86
157     # strip spaces
158     $search =~ s/^\s+//;
159     $search =~ s/\s+$//;
160     # fixup search string
161     $search=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
162     $search=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
163    
164     # extract phrases and put them first
165 dpavlin 8 while ($search =~ s/\s*("[^"]+")\s*/ /) {
166     $s .= "$1 ";
167 dpavlin 86 push @s_elem, $1;
168 dpavlin 8 }
169    
170 dpavlin 41 my %words;
171    
172 dpavlin 8 foreach (split(/\s+/,$search)) {
173     if (m/^([+-])(\S+)/) {
174     $s.= ($s) ? "and " : "";
175     $s.="not " if ($1 eq "-");
176 dpavlin 39 if (@spellings && !param('no_affix')) {
177 dpavlin 8 my $w = $2; $w =~ s/[\*\s]+//g;
178     $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
179 dpavlin 39 my $or="";
180     foreach my $spelling_alt (@spellings) {
181     $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
182     $or = "or ";
183     }
184 dpavlin 8 } else {
185     $s.="$2* ";
186     }
187 dpavlin 86 push @s_elem, $2;
188 dpavlin 8 } else {
189 dpavlin 39 if (@spellings && !param('no_affix')) {
190 dpavlin 8 my $w = $_; $w =~ s/[\*\s]+//g;
191 dpavlin 39 my $or="";
192     foreach my $spelling_alt (@spellings) {
193     $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
194     $or = "or ";
195     }
196 dpavlin 8 } else {
197     $s.="$_* ";
198     }
199 dpavlin 86 push @s_elem, $_;
200 dpavlin 8 }
201     }
202    
203 dpavlin 86 # fix multiple stars
204 dpavlin 8 $s=~s/\*\*+/*/g;
205    
206 dpavlin 23 # limit to some path
207 dpavlin 81 $s = "swishdocpath=($path) and $s" if ($path);
208 dpavlin 23
209 dpavlin 22 my %params; # optional parametars for swish
210    
211 dpavlin 32 # default format for output
212     my $hit_fmt = "<a href=\"%s\">%s</a> [%s]<br>\n";
213 dpavlin 21
214 dpavlin 32 if (@properties) {
215     $hit_fmt = x($config->{hit}) if (! param('no_properties'));
216 dpavlin 39 $params{properties} = \@properties;
217 dpavlin 32 } else {
218     $hit_fmt = x($config->{hit}) if (x($config->{hit}));
219     }
220    
221 dpavlin 59 my $swish = SWISH::API->new($config->{index});
222     $swish->AbortLastError if $swish->Error;
223     my $results = $swish->Query($s);
224     my $hits = $results->Hits;
225    
226    
227     # build pager
228     my $current_page = param('page') || 1;
229    
230     my $pager = Data::Pageset->new({
231     'total_entries' => $hits,
232     'entries_per_page' => $max_hits,
233     'current_page' => $current_page,
234     'pages_per_set' => $pages_per_set,
235     });
236    
237     $results->SeekResult( $pager->first - 1 );
238    
239     # get number of entries on this page
240     my $i = $pager->entries_on_this_page;
241    
242     # print number of hits or error message
243     if ( !$hits ) {
244     printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
245     } else {
246     printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
247 dpavlin 57 }
248    
249 dpavlin 80 my %path2title;
250     foreach my $p (@{$config->{path2title}->{path}}) {
251     $path2title{$p->{dir}} = $p->{content};
252     }
253 dpavlin 8
254 dpavlin 83 # output start of table
255     print qq{
256     <table border="0">
257     };
258     # html before and after each hit
259     my $tr_pre = qq{
260     <tr><td>
261     };
262     my $tr_post = qq{
263     </td></tr>
264     };
265    
266 dpavlin 59 for(my $i=$pager->first; $i<=$pager->last; $i++) {
267    
268     my $result = $results->NextResult;
269     last if (! $result);
270    
271     my @arr;
272 dpavlin 81
273 dpavlin 59 foreach my $prop (@properties) {
274     if ($prop =~ m/swishdescription/) {
275 dpavlin 81 my $tmp = get_snippet(
276     $result->Property($prop),
277 dpavlin 86 @s_elem,
278 dpavlin 81 );
279    
280 dpavlin 59 push @arr, $tmp;
281 dpavlin 18 } else {
282 dpavlin 59 push @arr, $result->Property($prop);
283 dpavlin 18 }
284 dpavlin 59 }
285 dpavlin 18
286 dpavlin 73 my $title = e($result->Property("swishtitle")) || 'untitled';
287     my $rank = $result->Property("swishrank");
288     my $host = $result->Property("swishdocpath");
289     $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
290 dpavlin 80
291     foreach my $p (keys %path2title) {
292     if ($host =~ m/$p/i) {
293     $title =~ s/$path2title{$p}\s*[:-]+\s*//;
294     $title = $path2title{$p}." :: ".$title;
295     last;
296     }
297     }
298    
299 dpavlin 59 print $tr_pre,$i,". ";
300 dpavlin 73 # print collection name which is not link
301     if ($title =~ s/^(.+? :: )//) {
302     print $1;
303 dpavlin 59 }
304 dpavlin 80
305 dpavlin 73 printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
306 dpavlin 59 print $tr_post;
307 dpavlin 8
308 dpavlin 59 }
309 dpavlin 8
310 dpavlin 59 # pager navigation
311     my $nav_html;
312 dpavlin 8
313 dpavlin 59 my $nav_fmt=qq{ <a href="%s">%s</a> };
314 dpavlin 8
315 dpavlin 75 if ($pager->current_page() > $pager->first_page) {
316     param('page', $pager->current_page - 1);
317     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&lt;&lt;');
318     }
319    
320 dpavlin 59 if ($pager->previous_set) {
321     param('page', $pager->previous_set);
322 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
323 dpavlin 8 }
324 dpavlin 59
325    
326     foreach my $p (@{$pager->pages_in_set()}) {
327 dpavlin 60 next if ($p < 0);
328 dpavlin 59 # for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) {
329     if($p == $pager->current_page()) {
330     $nav_html .= "<b>$p</b> ";
331     } else {
332     param('page', $p);
333     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p);
334     }
335     }
336    
337     if ($pager->next_set) {
338     param('page', $pager->next_set);
339 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
340     }
341    
342     if ($pager->current_page() < $pager->last_page) {
343     param('page', $pager->current_page + 1);
344 dpavlin 59 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&gt;&gt;');
345     }
346    
347 dpavlin 76 if ($config->{text}->{pages}) {
348     $nav_html = x($config->{text}->{pages})." ".$nav_html;
349     }
350    
351 dpavlin 59 # end html table
352     print qq{
353     <tr><td>
354 dpavlin 76 $nav_html
355 dpavlin 59 </td></tr>
356     </table>
357     };
358    
359    
360    
361 dpavlin 8 } else {
362 dpavlin 18 print p(x($config->{text}->{footer}));
363 dpavlin 8 }

Properties

Name Value
cvs2svn:cvs-rev 1.16
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26