/[swish]/trunk/html/swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/html/swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 89 - (show annotations)
Tue Aug 31 09:04:15 2004 UTC (19 years, 7 months ago) by dpavlin
File size: 7726 byte(s)
extract snippet and highlite into separate module

1 #!/usr/bin/perl -w
2
3 use strict;
4 use CGI qw/:standard -no_xhtml/;
5 use CGI::Carp qw(fatalsToBrowser);
6 use SWISH::API;
7 use XML::Simple;
8 use Lingua::Spelling::Alternative;
9 use Text::Iconv;
10 use Data::Pageset;
11 use FormatResult;
12
13 # for pager
14 my $pages_per_set = 20;
15
16 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
17 my $config=XMLin(undef,
18 # keyattr => { label => "value" },
19 forcecontent => 0,
20 ForceArray => [ 'path' ],
21 );
22
23 my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
24 sub x {
25 return if (! defined $_[0]);
26 return $from_utf8->convert($_[0]);
27 }
28
29 # Escape <, >, & and ", and to produce valid XML
30 my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
31 my $escape_re = join '|' => keys %escape;
32 sub e {
33 my $out;
34 foreach my $v (@_) {
35 $v =~ s/($escape_re)/$escape{$1}/g;
36 $out .= $v;
37 }
38 return $out;
39 }
40
41 my @spellings;
42 # FIX: doesn't work very well
43 if ($config->{findaffix}) {
44 foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
45 next if (! -f $findaffix);
46 my $spelling_alt = new Lingua::Spelling::Alternative;
47 $spelling_alt->load_findaffix($findaffix);
48 push @spellings,$spelling_alt;
49 }
50 }
51 if ($config->{affix}) {
52 foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
53 next if (! -f $affix);
54 my $spelling_alt = new Lingua::Spelling::Alternative;
55 $spelling_alt->load_affix($affix);
56 push @spellings,$spelling_alt;
57 }
58 }
59
60 my $hits=0;
61 my $max_hits=param('max_hits') || x($config->{max_hits});
62
63 my %labels;
64 foreach (@{$config->{labels}->{label}}) {
65 next if (! $_->{value}); # skip unlimited (0)
66 $labels{$_->{value}} = x($_->{content});
67 }
68
69 my $path;
70 # limit to this path
71 $path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
72 my %path_label;
73 my @path_name;
74 foreach (@{$config->{paths}->{path}}) {
75 push @path_name,x($_->{limit});
76 $path_label{$_->{limit}} = x($_->{content});
77 }
78
79 my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
80
81 if ($config->{charset}) {
82 print header(-charset=>x($config->{charset}));
83 } else {
84 print header;
85 }
86 print start_html(-title=>x($config->{title})),start_form;
87 print x($config->{text}->{search});
88 print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
89 print x($config->{text}->{documents});
90 print textfield('search');
91 print submit(-value=> x($config->{text}->{submit}));
92 print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
93 print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
94 if (@path_name) {
95 print br,x($config->{text}->{limit});
96 print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
97 } elsif (param('path')) {
98 print hidden(-name=>'path',-values=>param('path'));
99 }
100 print end_form,hr;
101
102 if (param('search')) {
103
104 my $s;
105 # re-write query from +/- to and/and not
106
107 my @s_highlite;
108
109 my $search = param('search');
110
111 # strip spaces
112 $search =~ s/^\s+//;
113 $search =~ s/\s+$//;
114 # fixup search string
115 $search=~tr/¹ð¾èæ©Ð®ÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
116 $search=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
117
118 # extract phrases and put them first
119 while ($search =~ s/\s*("[^"]+")\s*/ /) {
120 $s .= "$1 ";
121 push @s_highlite, $1;
122 }
123
124 my %words;
125
126 foreach (split(/\s+/,$search)) {
127 if (m/^([+-])(\S+)/) {
128 $s.= ($s) ? "and " : "";
129 $s.="not " if ($1 eq "-");
130 if (@spellings && !param('no_affix')) {
131 my $w = $2; $w =~ s/[\*\s]+//g;
132 $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
133 my $or="";
134 foreach my $spelling_alt (@spellings) {
135 $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
136 $or = "or ";
137 }
138 } else {
139 $s.="$2* ";
140 }
141 push @s_highlite, $2 if ($1 ne "-");
142 } else {
143 if (@spellings && !param('no_affix')) {
144 my $w = $_; $w =~ s/[\*\s]+//g;
145 my $or="";
146 foreach my $spelling_alt (@spellings) {
147 $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
148 $or = "or ";
149 }
150 } else {
151 $s.="$_* ";
152 }
153 push @s_highlite, $_;
154 }
155 }
156
157 # fix multiple stars
158 $s=~s/\*\*+/*/g;
159
160 # limit to some path
161 $s = "swishdocpath=($path) and $s" if ($path);
162
163 my %params; # optional parametars for swish
164
165 # default format for output
166 my $hit_fmt = "<a href=\"%s\">%s</a> [%s]<br>\n";
167
168 if (@properties) {
169 $hit_fmt = x($config->{hit}) if (! param('no_properties'));
170 $params{properties} = \@properties;
171 } else {
172 $hit_fmt = x($config->{hit}) if (x($config->{hit}));
173 }
174
175 my $swish = SWISH::API->new($config->{index});
176 $swish->AbortLastError if $swish->Error;
177 my $results = $swish->Query($s);
178 my $hits = $results->Hits;
179
180
181 # build pager
182 my $current_page = param('page') || 1;
183
184 my $pager = Data::Pageset->new({
185 'total_entries' => $hits,
186 'entries_per_page' => $max_hits,
187 'current_page' => $current_page,
188 'pages_per_set' => $pages_per_set,
189 });
190
191 $results->SeekResult( $pager->first - 1 );
192
193 # get number of entries on this page
194 my $i = $pager->entries_on_this_page;
195
196 # print number of hits or error message
197 if ( !$hits ) {
198 printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
199 } else {
200 printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
201 }
202
203 my %path2title;
204 foreach my $p (@{$config->{path2title}->{path}}) {
205 $path2title{$p->{dir}} = $p->{content};
206 }
207
208 # output start of table
209 print qq{
210 <table border="0">
211 };
212 # html before and after each hit
213 my $tr_pre = qq{
214 <tr><td>
215 };
216 my $tr_post = qq{
217 </td></tr>
218 };
219
220 for(my $i=$pager->first; $i<=$pager->last; $i++) {
221
222 my $result = $results->NextResult;
223 last if (! $result);
224
225 my @arr;
226
227 foreach my $prop (@properties) {
228 if ($prop =~ m/swishdescription/) {
229 my $tmp = FormatResult::get_snippet(
230 e($result->Property($prop)),
231 @s_highlite,
232 );
233
234 push @arr, $tmp;
235 } else {
236 push @arr, $result->Property($prop);
237 }
238 }
239
240 my $title = e($result->Property("swishtitle")) || 'untitled';
241 my $rank = $result->Property("swishrank");
242 my $host = $result->Property("swishdocpath");
243 $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
244
245 foreach my $p (keys %path2title) {
246 if ($host =~ m/$p/i) {
247 $title =~ s/$path2title{$p}\s*[:-]+\s*//;
248 $title = $path2title{$p}." :: ".$title;
249 last;
250 }
251 }
252
253 print $tr_pre,$i,". ";
254 # print collection name which is not link
255 if ($title =~ s/^(.+? :: )//) {
256 print $1;
257 }
258
259 printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
260 print $tr_post;
261
262 }
263
264 # pager navigation
265 my $nav_html;
266
267 my $nav_fmt=qq{ <a href="%s">%s</a> };
268
269 if ($pager->current_page() > $pager->first_page) {
270 param('page', $pager->current_page - 1);
271 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&lt;&lt;');
272 }
273
274 if ($pager->previous_set) {
275 param('page', $pager->previous_set);
276 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
277 }
278
279
280 foreach my $p (@{$pager->pages_in_set()}) {
281 next if ($p < 0);
282 # for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) {
283 if($p == $pager->current_page()) {
284 $nav_html .= "<b>$p</b> ";
285 } else {
286 param('page', $p);
287 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p);
288 }
289 }
290
291 if ($pager->next_set) {
292 param('page', $pager->next_set);
293 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
294 }
295
296 if ($pager->current_page() < $pager->last_page) {
297 param('page', $pager->current_page + 1);
298 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&gt;&gt;');
299 }
300
301 if ($config->{text}->{pages}) {
302 $nav_html = x($config->{text}->{pages})." ".$nav_html;
303 }
304
305 # end html table
306 print qq{
307 <tr><td>
308 $nav_html
309 </td></tr>
310 </table>
311 };
312
313
314
315 } else {
316 print p(x($config->{text}->{footer}));
317 }

Properties

Name Value
cvs2svn:cvs-rev 1.16
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26