/[swish]/trunk/html/swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/html/swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 83 - (show annotations)
Sun Aug 29 18:26:58 2004 UTC (19 years, 7 months ago) by dpavlin
File size: 8523 byte(s)
produce valid html, escape characters in snippet

1 #!/usr/bin/perl -w
2
3 use strict;
4 use CGI qw/:standard -no_xhtml/;
5 use CGI::Carp qw(fatalsToBrowser);
6 use SWISH::API;
7 use XML::Simple;
8 use Lingua::Spelling::Alternative;
9 use Text::Iconv;
10 use Data::Pageset;
11
12
13 sub get_snippet {
14 my $context_chars = 100;
15 my $max_desc = 16384; # 16k to filter
16
17 my $desc = shift || return '';
18 $desc = substr($desc,0,$max_desc) if (length($desc) > $max_desc);
19 $desc = e($desc);
20 # test if $desc contains any of our query words
21 my @snips;
22
23 my @colors = qw{#ffff66 #a0ffff #99ff99 #ff9999 #ff66ff};
24
25 my $i = 0;
26
27 for my $q (@_) {
28 if ($desc =~ m/(.*?)(\Q$q\E)(.*)/si) {
29 my $bef = $1;
30 my $qm = $2;
31 my $af = $3;
32 $bef = substr $bef, -$context_chars;
33 $af = substr $af, 0, $context_chars;
34
35 # no partial words...
36 $af =~ s,^\S+\s+|\s+\S+$,,gs;
37 $bef =~ s,^\S+\s+|\s+\S+$,,gs;
38
39 push(@snips, "$bef <span style=\"background:".$colors[$i]."; color:black;\">$qm</span> $af");
40 $i++;
41 $i = 0 if ($i > $#colors);
42 }
43 }
44 my $ellip = ' ... ';
45 my $snippet = $ellip. join($ellip, @snips) . $ellip if (@snips);
46
47 return $snippet;
48 }
49
50 # for pager
51 my $pages_per_set = 20;
52
53 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
54 my $config=XMLin(undef,
55 # keyattr => { label => "value" },
56 forcecontent => 0,
57 ForceArray => [ 'path' ],
58 );
59
60 my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
61 sub x {
62 return if (! defined $_[0]);
63 return $from_utf8->convert($_[0]);
64 }
65
66 # Escape <, >, & and ", and to produce valid XML
67 my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
68 my $escape_re = join '|' => keys %escape;
69 sub e {
70 my $out;
71 foreach my $v (@_) {
72 $v =~ s/($escape_re)/$escape{$1}/g;
73 $out .= $v;
74 }
75 return $out;
76 }
77
78 my @spellings;
79 # FIX: doesn't work very well
80 if ($config->{findaffix}) {
81 foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
82 next if (! -f $findaffix);
83 my $spelling_alt = new Lingua::Spelling::Alternative;
84 $spelling_alt->load_findaffix($findaffix);
85 push @spellings,$spelling_alt;
86 }
87 }
88 if ($config->{affix}) {
89 foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
90 next if (! -f $affix);
91 my $spelling_alt = new Lingua::Spelling::Alternative;
92 $spelling_alt->load_affix($affix);
93 push @spellings,$spelling_alt;
94 }
95 }
96
97 my $hits=0;
98 my $max_hits=param('max_hits') || x($config->{max_hits});
99
100 my %labels;
101 foreach (@{$config->{labels}->{label}}) {
102 next if (! $_->{value}); # skip unlimited (0)
103 $labels{$_->{value}} = x($_->{content});
104 }
105
106 my $path;
107 # limit to this path
108 $path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
109 my %path_label;
110 my @path_name;
111 foreach (@{$config->{paths}->{path}}) {
112
113 print STDERR "##: $_->{limit}",x($_->{content}),"\n";
114 push @path_name,x($_->{limit});
115 $path_label{$_->{limit}} = x($_->{content});
116 }
117
118 my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
119
120 if ($config->{charset}) {
121 print header(-charset=>x($config->{charset}));
122 } else {
123 print header;
124 }
125 print start_html(-title=>x($config->{title})),start_form;
126 print x($config->{text}->{search});
127 print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
128 print x($config->{text}->{documents});
129 print textfield('search');
130 print submit(-value=> x($config->{text}->{submit}));
131 print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
132 print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
133 if (@path_name) {
134 print br,x($config->{text}->{limit});
135 print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
136 } elsif (param('path')) {
137 print hidden(-name=>'path',-values=>param('path'));
138 }
139 print end_form,hr;
140
141 if (param('search')) {
142
143 my $s;
144 # re-write query from +/- to and/and not
145
146 my $search = param('search');
147 my $s_phrase = "";
148 while ($search =~ s/\s*("[^"]+")\s*/ /) {
149 $s .= "$1 ";
150 }
151 $search =~ s/^\s+//;
152 $search =~ s/\s+$//;
153
154 my %words;
155
156 foreach (split(/\s+/,$search)) {
157 if (m/^([+-])(\S+)/) {
158 $s.= ($s) ? "and " : "";
159 $s.="not " if ($1 eq "-");
160 if (@spellings && !param('no_affix')) {
161 my $w = $2; $w =~ s/[\*\s]+//g;
162 $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
163 my $or="";
164 foreach my $spelling_alt (@spellings) {
165 $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
166 $or = "or ";
167 }
168 } else {
169 $s.="$2* ";
170 }
171 } else {
172 if (@spellings && !param('no_affix')) {
173 my $w = $_; $w =~ s/[\*\s]+//g;
174 #$s.="(".join("* or ",$spelling_alt->alternatives($w))."*) ";
175 my $or="";
176 foreach my $spelling_alt (@spellings) {
177 $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
178 $or = "or ";
179 }
180 } else {
181 $s.="$_* ";
182 }
183 }
184 }
185
186 # fixup search string
187 $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
188 $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
189 $s=~s/\*\*+/*/g;
190
191 # limit to some path
192 $s = "swishdocpath=($path) and $s" if ($path);
193
194 my %params; # optional parametars for swish
195
196 # default format for output
197 my $hit_fmt = "<a href=\"%s\">%s</a> [%s]<br>\n";
198
199 if (@properties) {
200 $hit_fmt = x($config->{hit}) if (! param('no_properties'));
201 $params{properties} = \@properties;
202 } else {
203 $hit_fmt = x($config->{hit}) if (x($config->{hit}));
204 }
205
206 my $swish = SWISH::API->new($config->{index});
207 $swish->AbortLastError if $swish->Error;
208 my $results = $swish->Query($s);
209 my $hits = $results->Hits;
210
211
212 # build pager
213 my $current_page = param('page') || 1;
214
215 my $pager = Data::Pageset->new({
216 'total_entries' => $hits,
217 'entries_per_page' => $max_hits,
218 'current_page' => $current_page,
219 'pages_per_set' => $pages_per_set,
220 });
221
222 $results->SeekResult( $pager->first - 1 );
223
224 # get number of entries on this page
225 my $i = $pager->entries_on_this_page;
226
227 # print number of hits or error message
228 if ( !$hits ) {
229 printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
230 } else {
231 printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
232 }
233
234 my %path2title;
235 foreach my $p (@{$config->{path2title}->{path}}) {
236 $path2title{$p->{dir}} = $p->{content};
237 }
238
239 # output start of table
240 print qq{
241 <table border="0">
242 };
243 # html before and after each hit
244 my $tr_pre = qq{
245 <tr><td>
246 };
247 my $tr_post = qq{
248 </td></tr>
249 };
250
251 for(my $i=$pager->first; $i<=$pager->last; $i++) {
252
253 my $result = $results->NextResult;
254 last if (! $result);
255
256 my @arr;
257
258 foreach my $prop (@properties) {
259 if ($prop =~ m/swishdescription/) {
260 my $tmp = get_snippet(
261 $result->Property($prop),
262 split(/\s+/,$search)
263 );
264
265 push @arr, $tmp;
266 } else {
267 push @arr, $result->Property($prop);
268 }
269 }
270
271 my $title = e($result->Property("swishtitle")) || 'untitled';
272 my $rank = $result->Property("swishrank");
273 my $host = $result->Property("swishdocpath");
274 $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
275
276 foreach my $p (keys %path2title) {
277 if ($host =~ m/$p/i) {
278 $title =~ s/$path2title{$p}\s*[:-]+\s*//;
279 $title = $path2title{$p}." :: ".$title;
280 last;
281 }
282 }
283
284 print $tr_pre,$i,". ";
285 # print collection name which is not link
286 if ($title =~ s/^(.+? :: )//) {
287 print $1;
288 }
289
290 printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
291 print $tr_post;
292
293 }
294
295 # pager navigation
296 my $nav_html;
297
298 my $nav_fmt=qq{ <a href="%s">%s</a> };
299
300 if ($pager->current_page() > $pager->first_page) {
301 param('page', $pager->current_page - 1);
302 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&lt;&lt;');
303 }
304
305 if ($pager->previous_set) {
306 param('page', $pager->previous_set);
307 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
308 }
309
310
311 foreach my $p (@{$pager->pages_in_set()}) {
312 next if ($p < 0);
313 # for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) {
314 if($p == $pager->current_page()) {
315 $nav_html .= "<b>$p</b> ";
316 } else {
317 param('page', $p);
318 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p);
319 }
320 }
321
322 if ($pager->next_set) {
323 param('page', $pager->next_set);
324 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
325 }
326
327 if ($pager->current_page() < $pager->last_page) {
328 param('page', $pager->current_page + 1);
329 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&gt;&gt;');
330 }
331
332 if ($config->{text}->{pages}) {
333 $nav_html = x($config->{text}->{pages})." ".$nav_html;
334 }
335
336 # end html table
337 print qq{
338 <tr><td>
339 $nav_html
340 </td></tr>
341 </table>
342 };
343
344
345
346 } else {
347 print p(x($config->{text}->{footer}));
348 }

Properties

Name Value
cvs2svn:cvs-rev 1.16
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26