/[swish]/trunk/html/swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/html/swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 81 - (show annotations)
Sat Aug 28 22:15:59 2004 UTC (19 years, 8 months ago) by dpavlin
File size: 8391 byte(s)
implement snippets of content and highlighthing of words

1 #!/usr/bin/perl -w
2
3 use strict;
4 use CGI qw/:standard -no_xhtml/;
5 use CGI::Carp qw(fatalsToBrowser);
6 use SWISH::API;
7 use XML::Simple;
8 use Lingua::Spelling::Alternative;
9 use Text::Iconv;
10 use Data::Pageset;
11
12
13 sub get_snippet {
14 my $context_chars = 100;
15
16 my $desc = shift || return '';
17 # test if $desc contains any of our query words
18 my @snips;
19
20 my @colors = qw{#ffff66 #a0ffff #99ff99 #ff9999 #ff66ff};
21
22 my $i = 0;
23
24 for my $q (@_) {
25 if ($desc =~ m/(.*?)(\Q$q\E)(.*)/si) {
26 my $bef = $1;
27 my $qm = $2;
28 my $af = $3;
29 $bef = substr $bef, -$context_chars;
30 $af = substr $af, 0, $context_chars;
31
32 # no partial words...
33 $af =~ s,^\S+\s+|\s+\S+$,,gs;
34 $bef =~ s,^\S+\s+|\s+\S+$,,gs;
35
36 push(@snips, "$bef <span style=\"background:".$colors[$i]."\">$qm</span> $af");
37 $i++;
38 $i = 0 if ($i > $#colors);
39 }
40 }
41 my $ellip = ' ... ';
42 my $snippet = $ellip. join($ellip, @snips) . $ellip;
43
44 return $snippet;
45 }
46
47 # for pager
48 my $pages_per_set = 20;
49
50 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
51 my $config=XMLin(undef,
52 # keyattr => { label => "value" },
53 forcecontent => 0,
54 ForceArray => [ 'path' ],
55 );
56
57 my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
58 sub x {
59 return if (! defined $_[0]);
60 return $from_utf8->convert($_[0]);
61 }
62
63 # Escape <, >, & and ", and to produce valid XML
64 my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
65 my $escape_re = join '|' => keys %escape;
66 sub e {
67 my $out;
68 foreach my $v (@_) {
69 $v =~ s/($escape_re)/$escape{$1}/g;
70 $out .= $v;
71 }
72 return $out;
73 }
74
75 my @spellings;
76 # FIX: doesn't work very well
77 if ($config->{findaffix}) {
78 foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
79 next if (! -f $findaffix);
80 my $spelling_alt = new Lingua::Spelling::Alternative;
81 $spelling_alt->load_findaffix($findaffix);
82 push @spellings,$spelling_alt;
83 }
84 }
85 if ($config->{affix}) {
86 foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
87 next if (! -f $affix);
88 my $spelling_alt = new Lingua::Spelling::Alternative;
89 $spelling_alt->load_affix($affix);
90 push @spellings,$spelling_alt;
91 }
92 }
93
94 my $hits=0;
95 my $max_hits=param('max_hits') || x($config->{max_hits});
96
97 my %labels;
98 foreach (@{$config->{labels}->{label}}) {
99 next if (! $_->{value}); # skip unlimited (0)
100 $labels{$_->{value}} = x($_->{content});
101 }
102
103 my $path;
104 # limit to this path
105 $path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
106 my %path_label;
107 my @path_name;
108 foreach (@{$config->{paths}->{path}}) {
109
110 print STDERR "##: $_->{limit}",x($_->{content}),"\n";
111 push @path_name,x($_->{limit});
112 $path_label{$_->{limit}} = x($_->{content});
113 }
114
115 my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
116
117 if ($config->{charset}) {
118 print header(-charset=>x($config->{charset}));
119 } else {
120 print header;
121 }
122 print start_html(-title=>x($config->{title})),start_form;
123 print x($config->{text}->{search});
124 print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
125 print x($config->{text}->{documents});
126 print textfield('search');
127 print submit(-value=> x($config->{text}->{submit}));
128 print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
129 print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
130 if (@path_name) {
131 print br,x($config->{text}->{limit});
132 print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
133 } elsif (param('path')) {
134 print hidden(-name=>'path',-values=>param('path'));
135 }
136 print end_form,hr;
137
138 if (param('search')) {
139
140 my $s;
141 # re-write query from +/- to and/and not
142
143 my $search = param('search');
144 my $s_phrase = "";
145 while ($search =~ s/\s*("[^"]+")\s*/ /) {
146 $s .= "$1 ";
147 }
148 $search =~ s/^\s+//;
149 $search =~ s/\s+$//;
150
151 my %words;
152
153 foreach (split(/\s+/,$search)) {
154 if (m/^([+-])(\S+)/) {
155 $s.= ($s) ? "and " : "";
156 $s.="not " if ($1 eq "-");
157 if (@spellings && !param('no_affix')) {
158 my $w = $2; $w =~ s/[\*\s]+//g;
159 $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
160 my $or="";
161 foreach my $spelling_alt (@spellings) {
162 $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
163 $or = "or ";
164 }
165 } else {
166 $s.="$2* ";
167 }
168 } else {
169 if (@spellings && !param('no_affix')) {
170 my $w = $_; $w =~ s/[\*\s]+//g;
171 #$s.="(".join("* or ",$spelling_alt->alternatives($w))."*) ";
172 my $or="";
173 foreach my $spelling_alt (@spellings) {
174 $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
175 $or = "or ";
176 }
177 } else {
178 $s.="$_* ";
179 }
180 }
181 }
182
183 # fixup search string
184 $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
185 $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
186 $s=~s/\*\*+/*/g;
187
188 # limit to some path
189 $s = "swishdocpath=($path) and $s" if ($path);
190
191 my %params; # optional parametars for swish
192
193 # default format for output
194 my $hit_fmt = "<a href=\"%s\">%s</a> [%s]<br>\n";
195
196 # output start of table
197 print qq{
198 <table border="0">
199 };
200 # html before and after each hit
201 my $tr_pre = qq{
202 <tr><td>
203 };
204 my $tr_post = qq{
205 </td></tr>
206 };
207
208 if (@properties) {
209 $hit_fmt = x($config->{hit}) if (! param('no_properties'));
210 $params{properties} = \@properties;
211 } else {
212 $hit_fmt = x($config->{hit}) if (x($config->{hit}));
213 }
214
215 my $swish = SWISH::API->new($config->{index});
216 $swish->AbortLastError if $swish->Error;
217 my $results = $swish->Query($s);
218 my $hits = $results->Hits;
219
220
221 # build pager
222 my $current_page = param('page') || 1;
223
224 my $pager = Data::Pageset->new({
225 'total_entries' => $hits,
226 'entries_per_page' => $max_hits,
227 'current_page' => $current_page,
228 'pages_per_set' => $pages_per_set,
229 });
230
231 $results->SeekResult( $pager->first - 1 );
232
233 # get number of entries on this page
234 my $i = $pager->entries_on_this_page;
235
236 # print number of hits or error message
237 if ( !$hits ) {
238 printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
239 } else {
240 printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
241 }
242
243 my %path2title;
244 use Data::Dumper;
245 foreach my $p (@{$config->{path2title}->{path}}) {
246 $path2title{$p->{dir}} = $p->{content};
247 }
248
249 for(my $i=$pager->first; $i<=$pager->last; $i++) {
250
251 my $result = $results->NextResult;
252 last if (! $result);
253
254 my @arr;
255
256 foreach my $prop (@properties) {
257 if ($prop =~ m/swishdescription/) {
258 my $tmp = get_snippet(
259 $result->Property($prop),
260 split(/\s+/,$search)
261 );
262
263 push @arr, $tmp;
264 } else {
265 push @arr, $result->Property($prop);
266 }
267 }
268
269 my $title = e($result->Property("swishtitle")) || 'untitled';
270 my $rank = $result->Property("swishrank");
271 my $host = $result->Property("swishdocpath");
272 $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
273
274 foreach my $p (keys %path2title) {
275 if ($host =~ m/$p/i) {
276 $title =~ s/$path2title{$p}\s*[:-]+\s*//;
277 $title = $path2title{$p}." :: ".$title;
278 last;
279 }
280 }
281
282 print $tr_pre,$i,". ";
283 # print collection name which is not link
284 if ($title =~ s/^(.+? :: )//) {
285 print $1;
286 }
287
288 printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
289 print $tr_post;
290
291 }
292
293 # pager navigation
294 my $nav_html;
295
296 my $nav_fmt=qq{ <a href="%s">%s</a> };
297
298 if ($pager->current_page() > $pager->first_page) {
299 param('page', $pager->current_page - 1);
300 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&lt;&lt;');
301 }
302
303 if ($pager->previous_set) {
304 param('page', $pager->previous_set);
305 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
306 }
307
308
309 foreach my $p (@{$pager->pages_in_set()}) {
310 next if ($p < 0);
311 # for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) {
312 if($p == $pager->current_page()) {
313 $nav_html .= "<b>$p</b> ";
314 } else {
315 param('page', $p);
316 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p);
317 }
318 }
319
320 if ($pager->next_set) {
321 param('page', $pager->next_set);
322 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
323 }
324
325 if ($pager->current_page() < $pager->last_page) {
326 param('page', $pager->current_page + 1);
327 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&gt;&gt;');
328 }
329
330 if ($config->{text}->{pages}) {
331 $nav_html = x($config->{text}->{pages})." ".$nav_html;
332 }
333
334 # end html table
335 print qq{
336 <tr><td>
337 $nav_html
338 </td></tr>
339 </table>
340 };
341
342
343
344 } else {
345 print p(x($config->{text}->{footer}));
346 }

Properties

Name Value
cvs2svn:cvs-rev 1.16
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26