/[swish]/trunk/html/swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/html/swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 82 - (hide annotations)
Sun Aug 29 18:17:15 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 8523 byte(s)
added maximum size of content to extract snippet from (16k), smaller other
improvements

1 dpavlin 8 #!/usr/bin/perl -w
2    
3     use strict;
4     use CGI qw/:standard -no_xhtml/;
5     use CGI::Carp qw(fatalsToBrowser);
6 dpavlin 59 use SWISH::API;
7 dpavlin 8 use XML::Simple;
8     use Lingua::Spelling::Alternative;
9     use Text::Iconv;
10 dpavlin 59 use Data::Pageset;
11 dpavlin 8
12 dpavlin 81
13     sub get_snippet {
14     my $context_chars = 100;
15 dpavlin 82 my $max_desc = 16384; # 16k to filter
16 dpavlin 81
17     my $desc = shift || return '';
18 dpavlin 82 $desc = substr($desc,0,$max_desc) if (length($desc) > $max_desc);
19 dpavlin 81 # test if $desc contains any of our query words
20     my @snips;
21    
22     my @colors = qw{#ffff66 #a0ffff #99ff99 #ff9999 #ff66ff};
23    
24     my $i = 0;
25    
26     for my $q (@_) {
27     if ($desc =~ m/(.*?)(\Q$q\E)(.*)/si) {
28     my $bef = $1;
29     my $qm = $2;
30     my $af = $3;
31     $bef = substr $bef, -$context_chars;
32     $af = substr $af, 0, $context_chars;
33    
34     # no partial words...
35     $af =~ s,^\S+\s+|\s+\S+$,,gs;
36     $bef =~ s,^\S+\s+|\s+\S+$,,gs;
37    
38 dpavlin 82 push(@snips, "$bef <span style=\"background:".$colors[$i]."; color:black;\">$qm</span> $af");
39 dpavlin 81 $i++;
40     $i = 0 if ($i > $#colors);
41     }
42     }
43     my $ellip = ' ... ';
44 dpavlin 82 my $snippet = $ellip. join($ellip, @snips) . $ellip if (@snips);
45 dpavlin 81
46     return $snippet;
47     }
48    
49 dpavlin 59 # for pager
50     my $pages_per_set = 20;
51    
52 dpavlin 8 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
53     my $config=XMLin(undef,
54     # keyattr => { label => "value" },
55     forcecontent => 0,
56 dpavlin 80 ForceArray => [ 'path' ],
57 dpavlin 8 );
58    
59 dpavlin 18 my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
60     sub x {
61 dpavlin 62 return if (! defined $_[0]);
62 dpavlin 18 return $from_utf8->convert($_[0]);
63     }
64    
65 dpavlin 29 # Escape <, >, & and ", and to produce valid XML
66     my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
67     my $escape_re = join '|' => keys %escape;
68     sub e {
69     my $out;
70     foreach my $v (@_) {
71     $v =~ s/($escape_re)/$escape{$1}/g;
72     $out .= $v;
73     }
74     return $out;
75     }
76 dpavlin 8
77 dpavlin 39 my @spellings;
78 dpavlin 8 # FIX: doesn't work very well
79 dpavlin 16 if ($config->{findaffix}) {
80 dpavlin 41 foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
81 dpavlin 80 next if (! -f $findaffix);
82 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
83     $spelling_alt->load_findaffix($findaffix);
84     push @spellings,$spelling_alt;
85     }
86 dpavlin 8 }
87 dpavlin 16 if ($config->{affix}) {
88 dpavlin 41 foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
89 dpavlin 80 next if (! -f $affix);
90 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
91     $spelling_alt->load_affix($affix);
92     push @spellings,$spelling_alt;
93     }
94 dpavlin 16 }
95 dpavlin 8
96     my $hits=0;
97 dpavlin 59 my $max_hits=param('max_hits') || x($config->{max_hits});
98 dpavlin 8
99     my %labels;
100     foreach (@{$config->{labels}->{label}}) {
101 dpavlin 59 next if (! $_->{value}); # skip unlimited (0)
102 dpavlin 18 $labels{$_->{value}} = x($_->{content});
103 dpavlin 8 }
104    
105 dpavlin 81 my $path;
106     # limit to this path
107     $path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
108 dpavlin 23 my %path_label;
109     my @path_name;
110     foreach (@{$config->{paths}->{path}}) {
111 dpavlin 39
112     print STDERR "##: $_->{limit}",x($_->{content}),"\n";
113 dpavlin 23 push @path_name,x($_->{limit});
114     $path_label{$_->{limit}} = x($_->{content});
115     }
116    
117 dpavlin 62 my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
118 dpavlin 32
119 dpavlin 16 if ($config->{charset}) {
120 dpavlin 18 print header(-charset=>x($config->{charset}));
121 dpavlin 16 } else {
122     print header;
123     }
124 dpavlin 18 print start_html(-title=>x($config->{title})),start_form;
125     print x($config->{text}->{search});
126 dpavlin 16 print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
127 dpavlin 18 print x($config->{text}->{documents});
128 dpavlin 16 print textfield('search');
129 dpavlin 18 print submit(-value=> x($config->{text}->{submit}));
130 dpavlin 39 print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
131 dpavlin 32 print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
132 dpavlin 23 if (@path_name) {
133     print br,x($config->{text}->{limit});
134     print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
135 dpavlin 81 } elsif (param('path')) {
136     print hidden(-name=>'path',-values=>param('path'));
137 dpavlin 23 }
138 dpavlin 8 print end_form,hr;
139    
140     if (param('search')) {
141    
142     my $s;
143     # re-write query from +/- to and/and not
144    
145     my $search = param('search');
146     my $s_phrase = "";
147     while ($search =~ s/\s*("[^"]+")\s*/ /) {
148     $s .= "$1 ";
149     }
150     $search =~ s/^\s+//;
151     $search =~ s/\s+$//;
152    
153 dpavlin 41 my %words;
154    
155 dpavlin 8 foreach (split(/\s+/,$search)) {
156     if (m/^([+-])(\S+)/) {
157     $s.= ($s) ? "and " : "";
158     $s.="not " if ($1 eq "-");
159 dpavlin 39 if (@spellings && !param('no_affix')) {
160 dpavlin 8 my $w = $2; $w =~ s/[\*\s]+//g;
161     $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
162 dpavlin 39 my $or="";
163     foreach my $spelling_alt (@spellings) {
164     $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
165     $or = "or ";
166     }
167 dpavlin 8 } else {
168     $s.="$2* ";
169     }
170     } else {
171 dpavlin 39 if (@spellings && !param('no_affix')) {
172 dpavlin 8 my $w = $_; $w =~ s/[\*\s]+//g;
173     #$s.="(".join("* or ",$spelling_alt->alternatives($w))."*) ";
174 dpavlin 39 my $or="";
175     foreach my $spelling_alt (@spellings) {
176     $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
177     $or = "or ";
178     }
179 dpavlin 8 } else {
180     $s.="$_* ";
181     }
182     }
183     }
184    
185     # fixup search string
186     $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
187     $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
188     $s=~s/\*\*+/*/g;
189    
190 dpavlin 23 # limit to some path
191 dpavlin 81 $s = "swishdocpath=($path) and $s" if ($path);
192 dpavlin 23
193 dpavlin 22 my %params; # optional parametars for swish
194    
195 dpavlin 32 # default format for output
196     my $hit_fmt = "<a href=\"%s\">%s</a> [%s]<br>\n";
197 dpavlin 21
198 dpavlin 59 # output start of table
199     print qq{
200     <table border="0">
201     };
202     # html before and after each hit
203     my $tr_pre = qq{
204     <tr><td>
205     };
206     my $tr_post = qq{
207     </td></tr>
208     };
209    
210 dpavlin 32 if (@properties) {
211     $hit_fmt = x($config->{hit}) if (! param('no_properties'));
212 dpavlin 39 $params{properties} = \@properties;
213 dpavlin 32 } else {
214     $hit_fmt = x($config->{hit}) if (x($config->{hit}));
215     }
216    
217 dpavlin 59 my $swish = SWISH::API->new($config->{index});
218     $swish->AbortLastError if $swish->Error;
219     my $results = $swish->Query($s);
220     my $hits = $results->Hits;
221    
222    
223     # build pager
224     my $current_page = param('page') || 1;
225    
226     my $pager = Data::Pageset->new({
227     'total_entries' => $hits,
228     'entries_per_page' => $max_hits,
229     'current_page' => $current_page,
230     'pages_per_set' => $pages_per_set,
231     });
232    
233     $results->SeekResult( $pager->first - 1 );
234    
235     # get number of entries on this page
236     my $i = $pager->entries_on_this_page;
237    
238     # print number of hits or error message
239     if ( !$hits ) {
240     printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
241     } else {
242     printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
243 dpavlin 57 }
244    
245 dpavlin 80 my %path2title;
246     use Data::Dumper;
247     foreach my $p (@{$config->{path2title}->{path}}) {
248     $path2title{$p->{dir}} = $p->{content};
249     }
250 dpavlin 8
251 dpavlin 59 for(my $i=$pager->first; $i<=$pager->last; $i++) {
252    
253     my $result = $results->NextResult;
254     last if (! $result);
255    
256     my @arr;
257 dpavlin 81
258 dpavlin 59 foreach my $prop (@properties) {
259     if ($prop =~ m/swishdescription/) {
260 dpavlin 81 my $tmp = get_snippet(
261     $result->Property($prop),
262     split(/\s+/,$search)
263     );
264    
265 dpavlin 59 push @arr, $tmp;
266 dpavlin 18 } else {
267 dpavlin 59 push @arr, $result->Property($prop);
268 dpavlin 18 }
269 dpavlin 59 }
270 dpavlin 18
271 dpavlin 73 my $title = e($result->Property("swishtitle")) || 'untitled';
272     my $rank = $result->Property("swishrank");
273     my $host = $result->Property("swishdocpath");
274     $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
275 dpavlin 80
276     foreach my $p (keys %path2title) {
277     if ($host =~ m/$p/i) {
278     $title =~ s/$path2title{$p}\s*[:-]+\s*//;
279     $title = $path2title{$p}." :: ".$title;
280     last;
281     }
282     }
283    
284 dpavlin 59 print $tr_pre,$i,". ";
285 dpavlin 73 # print collection name which is not link
286     if ($title =~ s/^(.+? :: )//) {
287     print $1;
288 dpavlin 59 }
289 dpavlin 80
290 dpavlin 73 printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
291 dpavlin 59 print $tr_post;
292 dpavlin 8
293 dpavlin 59 }
294 dpavlin 8
295 dpavlin 59 # pager navigation
296     my $nav_html;
297 dpavlin 8
298 dpavlin 59 my $nav_fmt=qq{ <a href="%s">%s</a> };
299 dpavlin 8
300 dpavlin 75 if ($pager->current_page() > $pager->first_page) {
301     param('page', $pager->current_page - 1);
302     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&lt;&lt;');
303     }
304    
305 dpavlin 59 if ($pager->previous_set) {
306     param('page', $pager->previous_set);
307 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
308 dpavlin 8 }
309 dpavlin 59
310    
311     foreach my $p (@{$pager->pages_in_set()}) {
312 dpavlin 60 next if ($p < 0);
313 dpavlin 59 # for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) {
314     if($p == $pager->current_page()) {
315     $nav_html .= "<b>$p</b> ";
316     } else {
317     param('page', $p);
318     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p);
319     }
320     }
321    
322     if ($pager->next_set) {
323     param('page', $pager->next_set);
324 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
325     }
326    
327     if ($pager->current_page() < $pager->last_page) {
328     param('page', $pager->current_page + 1);
329 dpavlin 59 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&gt;&gt;');
330     }
331    
332 dpavlin 76 if ($config->{text}->{pages}) {
333     $nav_html = x($config->{text}->{pages})." ".$nav_html;
334     }
335    
336 dpavlin 59 # end html table
337     print qq{
338     <tr><td>
339 dpavlin 76 $nav_html
340 dpavlin 59 </td></tr>
341     </table>
342     };
343    
344    
345    
346 dpavlin 8 } else {
347 dpavlin 18 print p(x($config->{text}->{footer}));
348 dpavlin 8 }

Properties

Name Value
cvs2svn:cvs-rev 1.16
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26