/[swish]/trunk/html/swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/html/swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 81 - (hide annotations)
Sat Aug 28 22:15:59 2004 UTC (19 years, 8 months ago) by dpavlin
File size: 8391 byte(s)
implement snippets of content and highlighthing of words

1 dpavlin 8 #!/usr/bin/perl -w
2    
3     use strict;
4     use CGI qw/:standard -no_xhtml/;
5     use CGI::Carp qw(fatalsToBrowser);
6 dpavlin 59 use SWISH::API;
7 dpavlin 8 use XML::Simple;
8     use Lingua::Spelling::Alternative;
9     use Text::Iconv;
10 dpavlin 59 use Data::Pageset;
11 dpavlin 8
12 dpavlin 81
13     sub get_snippet {
14     my $context_chars = 100;
15    
16     my $desc = shift || return '';
17     # test if $desc contains any of our query words
18     my @snips;
19    
20     my @colors = qw{#ffff66 #a0ffff #99ff99 #ff9999 #ff66ff};
21    
22     my $i = 0;
23    
24     for my $q (@_) {
25     if ($desc =~ m/(.*?)(\Q$q\E)(.*)/si) {
26     my $bef = $1;
27     my $qm = $2;
28     my $af = $3;
29     $bef = substr $bef, -$context_chars;
30     $af = substr $af, 0, $context_chars;
31    
32     # no partial words...
33     $af =~ s,^\S+\s+|\s+\S+$,,gs;
34     $bef =~ s,^\S+\s+|\s+\S+$,,gs;
35    
36     push(@snips, "$bef <span style=\"background:".$colors[$i]."\">$qm</span> $af");
37     $i++;
38     $i = 0 if ($i > $#colors);
39     }
40     }
41     my $ellip = ' ... ';
42     my $snippet = $ellip. join($ellip, @snips) . $ellip;
43    
44     return $snippet;
45     }
46    
47 dpavlin 59 # for pager
48     my $pages_per_set = 20;
49    
50 dpavlin 8 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
51     my $config=XMLin(undef,
52     # keyattr => { label => "value" },
53     forcecontent => 0,
54 dpavlin 80 ForceArray => [ 'path' ],
55 dpavlin 8 );
56    
57 dpavlin 18 my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
58     sub x {
59 dpavlin 62 return if (! defined $_[0]);
60 dpavlin 18 return $from_utf8->convert($_[0]);
61     }
62    
63 dpavlin 29 # Escape <, >, & and ", and to produce valid XML
64     my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
65     my $escape_re = join '|' => keys %escape;
66     sub e {
67     my $out;
68     foreach my $v (@_) {
69     $v =~ s/($escape_re)/$escape{$1}/g;
70     $out .= $v;
71     }
72     return $out;
73     }
74 dpavlin 8
75 dpavlin 39 my @spellings;
76 dpavlin 8 # FIX: doesn't work very well
77 dpavlin 16 if ($config->{findaffix}) {
78 dpavlin 41 foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
79 dpavlin 80 next if (! -f $findaffix);
80 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
81     $spelling_alt->load_findaffix($findaffix);
82     push @spellings,$spelling_alt;
83     }
84 dpavlin 8 }
85 dpavlin 16 if ($config->{affix}) {
86 dpavlin 41 foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
87 dpavlin 80 next if (! -f $affix);
88 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
89     $spelling_alt->load_affix($affix);
90     push @spellings,$spelling_alt;
91     }
92 dpavlin 16 }
93 dpavlin 8
94     my $hits=0;
95 dpavlin 59 my $max_hits=param('max_hits') || x($config->{max_hits});
96 dpavlin 8
97     my %labels;
98     foreach (@{$config->{labels}->{label}}) {
99 dpavlin 59 next if (! $_->{value}); # skip unlimited (0)
100 dpavlin 18 $labels{$_->{value}} = x($_->{content});
101 dpavlin 8 }
102    
103 dpavlin 81 my $path;
104     # limit to this path
105     $path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
106 dpavlin 23 my %path_label;
107     my @path_name;
108     foreach (@{$config->{paths}->{path}}) {
109 dpavlin 39
110     print STDERR "##: $_->{limit}",x($_->{content}),"\n";
111 dpavlin 23 push @path_name,x($_->{limit});
112     $path_label{$_->{limit}} = x($_->{content});
113     }
114    
115 dpavlin 62 my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
116 dpavlin 32
117 dpavlin 16 if ($config->{charset}) {
118 dpavlin 18 print header(-charset=>x($config->{charset}));
119 dpavlin 16 } else {
120     print header;
121     }
122 dpavlin 18 print start_html(-title=>x($config->{title})),start_form;
123     print x($config->{text}->{search});
124 dpavlin 16 print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
125 dpavlin 18 print x($config->{text}->{documents});
126 dpavlin 16 print textfield('search');
127 dpavlin 18 print submit(-value=> x($config->{text}->{submit}));
128 dpavlin 39 print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
129 dpavlin 32 print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
130 dpavlin 23 if (@path_name) {
131     print br,x($config->{text}->{limit});
132     print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
133 dpavlin 81 } elsif (param('path')) {
134     print hidden(-name=>'path',-values=>param('path'));
135 dpavlin 23 }
136 dpavlin 8 print end_form,hr;
137    
138     if (param('search')) {
139    
140     my $s;
141     # re-write query from +/- to and/and not
142    
143     my $search = param('search');
144     my $s_phrase = "";
145     while ($search =~ s/\s*("[^"]+")\s*/ /) {
146     $s .= "$1 ";
147     }
148     $search =~ s/^\s+//;
149     $search =~ s/\s+$//;
150    
151 dpavlin 41 my %words;
152    
153 dpavlin 8 foreach (split(/\s+/,$search)) {
154     if (m/^([+-])(\S+)/) {
155     $s.= ($s) ? "and " : "";
156     $s.="not " if ($1 eq "-");
157 dpavlin 39 if (@spellings && !param('no_affix')) {
158 dpavlin 8 my $w = $2; $w =~ s/[\*\s]+//g;
159     $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
160 dpavlin 39 my $or="";
161     foreach my $spelling_alt (@spellings) {
162     $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
163     $or = "or ";
164     }
165 dpavlin 8 } else {
166     $s.="$2* ";
167     }
168     } else {
169 dpavlin 39 if (@spellings && !param('no_affix')) {
170 dpavlin 8 my $w = $_; $w =~ s/[\*\s]+//g;
171     #$s.="(".join("* or ",$spelling_alt->alternatives($w))."*) ";
172 dpavlin 39 my $or="";
173     foreach my $spelling_alt (@spellings) {
174     $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
175     $or = "or ";
176     }
177 dpavlin 8 } else {
178     $s.="$_* ";
179     }
180     }
181     }
182    
183     # fixup search string
184     $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
185     $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
186     $s=~s/\*\*+/*/g;
187    
188 dpavlin 23 # limit to some path
189 dpavlin 81 $s = "swishdocpath=($path) and $s" if ($path);
190 dpavlin 23
191 dpavlin 22 my %params; # optional parametars for swish
192    
193 dpavlin 32 # default format for output
194     my $hit_fmt = "<a href=\"%s\">%s</a> [%s]<br>\n";
195 dpavlin 21
196 dpavlin 59 # output start of table
197     print qq{
198     <table border="0">
199     };
200     # html before and after each hit
201     my $tr_pre = qq{
202     <tr><td>
203     };
204     my $tr_post = qq{
205     </td></tr>
206     };
207    
208 dpavlin 32 if (@properties) {
209     $hit_fmt = x($config->{hit}) if (! param('no_properties'));
210 dpavlin 39 $params{properties} = \@properties;
211 dpavlin 32 } else {
212     $hit_fmt = x($config->{hit}) if (x($config->{hit}));
213     }
214    
215 dpavlin 59 my $swish = SWISH::API->new($config->{index});
216     $swish->AbortLastError if $swish->Error;
217     my $results = $swish->Query($s);
218     my $hits = $results->Hits;
219    
220    
221     # build pager
222     my $current_page = param('page') || 1;
223    
224     my $pager = Data::Pageset->new({
225     'total_entries' => $hits,
226     'entries_per_page' => $max_hits,
227     'current_page' => $current_page,
228     'pages_per_set' => $pages_per_set,
229     });
230    
231     $results->SeekResult( $pager->first - 1 );
232    
233     # get number of entries on this page
234     my $i = $pager->entries_on_this_page;
235    
236     # print number of hits or error message
237     if ( !$hits ) {
238     printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
239     } else {
240     printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
241 dpavlin 57 }
242    
243 dpavlin 80 my %path2title;
244     use Data::Dumper;
245     foreach my $p (@{$config->{path2title}->{path}}) {
246     $path2title{$p->{dir}} = $p->{content};
247     }
248 dpavlin 8
249 dpavlin 59 for(my $i=$pager->first; $i<=$pager->last; $i++) {
250    
251     my $result = $results->NextResult;
252     last if (! $result);
253    
254     my @arr;
255 dpavlin 81
256 dpavlin 59 foreach my $prop (@properties) {
257     if ($prop =~ m/swishdescription/) {
258 dpavlin 81 my $tmp = get_snippet(
259     $result->Property($prop),
260     split(/\s+/,$search)
261     );
262    
263 dpavlin 59 push @arr, $tmp;
264 dpavlin 18 } else {
265 dpavlin 59 push @arr, $result->Property($prop);
266 dpavlin 18 }
267 dpavlin 59 }
268 dpavlin 18
269 dpavlin 73 my $title = e($result->Property("swishtitle")) || 'untitled';
270     my $rank = $result->Property("swishrank");
271     my $host = $result->Property("swishdocpath");
272     $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
273 dpavlin 80
274     foreach my $p (keys %path2title) {
275     if ($host =~ m/$p/i) {
276     $title =~ s/$path2title{$p}\s*[:-]+\s*//;
277     $title = $path2title{$p}." :: ".$title;
278     last;
279     }
280     }
281    
282 dpavlin 59 print $tr_pre,$i,". ";
283 dpavlin 73 # print collection name which is not link
284     if ($title =~ s/^(.+? :: )//) {
285     print $1;
286 dpavlin 59 }
287 dpavlin 80
288 dpavlin 73 printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
289 dpavlin 59 print $tr_post;
290 dpavlin 8
291 dpavlin 59 }
292 dpavlin 8
293 dpavlin 59 # pager navigation
294     my $nav_html;
295 dpavlin 8
296 dpavlin 59 my $nav_fmt=qq{ <a href="%s">%s</a> };
297 dpavlin 8
298 dpavlin 75 if ($pager->current_page() > $pager->first_page) {
299     param('page', $pager->current_page - 1);
300     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&lt;&lt;');
301     }
302    
303 dpavlin 59 if ($pager->previous_set) {
304     param('page', $pager->previous_set);
305 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
306 dpavlin 8 }
307 dpavlin 59
308    
309     foreach my $p (@{$pager->pages_in_set()}) {
310 dpavlin 60 next if ($p < 0);
311 dpavlin 59 # for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) {
312     if($p == $pager->current_page()) {
313     $nav_html .= "<b>$p</b> ";
314     } else {
315     param('page', $p);
316     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p);
317     }
318     }
319    
320     if ($pager->next_set) {
321     param('page', $pager->next_set);
322 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
323     }
324    
325     if ($pager->current_page() < $pager->last_page) {
326     param('page', $pager->current_page + 1);
327 dpavlin 59 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&gt;&gt;');
328     }
329    
330 dpavlin 76 if ($config->{text}->{pages}) {
331     $nav_html = x($config->{text}->{pages})." ".$nav_html;
332     }
333    
334 dpavlin 59 # end html table
335     print qq{
336     <tr><td>
337 dpavlin 76 $nav_html
338 dpavlin 59 </td></tr>
339     </table>
340     };
341    
342    
343    
344 dpavlin 8 } else {
345 dpavlin 18 print p(x($config->{text}->{footer}));
346 dpavlin 8 }

Properties

Name Value
cvs2svn:cvs-rev 1.16
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26