/[swish]/trunk/html/swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/html/swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 89 - (hide annotations)
Tue Aug 31 09:04:15 2004 UTC (19 years, 7 months ago) by dpavlin
File size: 7726 byte(s)
extract snippet and highlite into separate module

1 dpavlin 8 #!/usr/bin/perl -w
2    
3     use strict;
4     use CGI qw/:standard -no_xhtml/;
5     use CGI::Carp qw(fatalsToBrowser);
6 dpavlin 59 use SWISH::API;
7 dpavlin 8 use XML::Simple;
8     use Lingua::Spelling::Alternative;
9     use Text::Iconv;
10 dpavlin 59 use Data::Pageset;
11 dpavlin 89 use FormatResult;
12 dpavlin 8
13 dpavlin 59 # for pager
14     my $pages_per_set = 20;
15    
16 dpavlin 8 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
17     my $config=XMLin(undef,
18     # keyattr => { label => "value" },
19     forcecontent => 0,
20 dpavlin 80 ForceArray => [ 'path' ],
21 dpavlin 8 );
22    
23 dpavlin 18 my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
24     sub x {
25 dpavlin 62 return if (! defined $_[0]);
26 dpavlin 18 return $from_utf8->convert($_[0]);
27     }
28    
29 dpavlin 29 # Escape <, >, & and ", and to produce valid XML
30     my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
31     my $escape_re = join '|' => keys %escape;
32     sub e {
33     my $out;
34     foreach my $v (@_) {
35     $v =~ s/($escape_re)/$escape{$1}/g;
36     $out .= $v;
37     }
38     return $out;
39     }
40 dpavlin 8
41 dpavlin 39 my @spellings;
42 dpavlin 8 # FIX: doesn't work very well
43 dpavlin 16 if ($config->{findaffix}) {
44 dpavlin 41 foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
45 dpavlin 80 next if (! -f $findaffix);
46 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
47     $spelling_alt->load_findaffix($findaffix);
48     push @spellings,$spelling_alt;
49     }
50 dpavlin 8 }
51 dpavlin 16 if ($config->{affix}) {
52 dpavlin 41 foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
53 dpavlin 80 next if (! -f $affix);
54 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
55     $spelling_alt->load_affix($affix);
56     push @spellings,$spelling_alt;
57     }
58 dpavlin 16 }
59 dpavlin 8
60     my $hits=0;
61 dpavlin 59 my $max_hits=param('max_hits') || x($config->{max_hits});
62 dpavlin 8
63     my %labels;
64     foreach (@{$config->{labels}->{label}}) {
65 dpavlin 59 next if (! $_->{value}); # skip unlimited (0)
66 dpavlin 18 $labels{$_->{value}} = x($_->{content});
67 dpavlin 8 }
68    
69 dpavlin 81 my $path;
70     # limit to this path
71     $path .= '"'.join('*" or "',param('path')).'*"' if (param('path'));
72 dpavlin 23 my %path_label;
73     my @path_name;
74     foreach (@{$config->{paths}->{path}}) {
75     push @path_name,x($_->{limit});
76     $path_label{$_->{limit}} = x($_->{content});
77     }
78    
79 dpavlin 62 my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
80 dpavlin 32
81 dpavlin 16 if ($config->{charset}) {
82 dpavlin 18 print header(-charset=>x($config->{charset}));
83 dpavlin 16 } else {
84     print header;
85     }
86 dpavlin 18 print start_html(-title=>x($config->{title})),start_form;
87     print x($config->{text}->{search});
88 dpavlin 16 print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
89 dpavlin 18 print x($config->{text}->{documents});
90 dpavlin 16 print textfield('search');
91 dpavlin 18 print submit(-value=> x($config->{text}->{submit}));
92 dpavlin 39 print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
93 dpavlin 32 print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
94 dpavlin 23 if (@path_name) {
95     print br,x($config->{text}->{limit});
96     print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
97 dpavlin 81 } elsif (param('path')) {
98     print hidden(-name=>'path',-values=>param('path'));
99 dpavlin 23 }
100 dpavlin 8 print end_form,hr;
101    
102     if (param('search')) {
103    
104     my $s;
105     # re-write query from +/- to and/and not
106    
107 dpavlin 88 my @s_highlite;
108 dpavlin 86
109 dpavlin 8 my $search = param('search');
110 dpavlin 86
111     # strip spaces
112     $search =~ s/^\s+//;
113     $search =~ s/\s+$//;
114     # fixup search string
115 dpavlin 87 $search=~tr/¹ð¾èæ©Ð®ÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
116 dpavlin 86 $search=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
117    
118     # extract phrases and put them first
119 dpavlin 8 while ($search =~ s/\s*("[^"]+")\s*/ /) {
120     $s .= "$1 ";
121 dpavlin 88 push @s_highlite, $1;
122 dpavlin 8 }
123    
124 dpavlin 41 my %words;
125    
126 dpavlin 8 foreach (split(/\s+/,$search)) {
127     if (m/^([+-])(\S+)/) {
128     $s.= ($s) ? "and " : "";
129     $s.="not " if ($1 eq "-");
130 dpavlin 39 if (@spellings && !param('no_affix')) {
131 dpavlin 8 my $w = $2; $w =~ s/[\*\s]+//g;
132     $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
133 dpavlin 39 my $or="";
134     foreach my $spelling_alt (@spellings) {
135     $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
136     $or = "or ";
137     }
138 dpavlin 8 } else {
139     $s.="$2* ";
140     }
141 dpavlin 88 push @s_highlite, $2 if ($1 ne "-");
142 dpavlin 8 } else {
143 dpavlin 39 if (@spellings && !param('no_affix')) {
144 dpavlin 8 my $w = $_; $w =~ s/[\*\s]+//g;
145 dpavlin 39 my $or="";
146     foreach my $spelling_alt (@spellings) {
147     $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
148     $or = "or ";
149     }
150 dpavlin 8 } else {
151     $s.="$_* ";
152     }
153 dpavlin 88 push @s_highlite, $_;
154 dpavlin 8 }
155     }
156    
157 dpavlin 86 # fix multiple stars
158 dpavlin 8 $s=~s/\*\*+/*/g;
159    
160 dpavlin 23 # limit to some path
161 dpavlin 81 $s = "swishdocpath=($path) and $s" if ($path);
162 dpavlin 23
163 dpavlin 22 my %params; # optional parametars for swish
164    
165 dpavlin 32 # default format for output
166     my $hit_fmt = "<a href=\"%s\">%s</a> [%s]<br>\n";
167 dpavlin 21
168 dpavlin 32 if (@properties) {
169     $hit_fmt = x($config->{hit}) if (! param('no_properties'));
170 dpavlin 39 $params{properties} = \@properties;
171 dpavlin 32 } else {
172     $hit_fmt = x($config->{hit}) if (x($config->{hit}));
173     }
174    
175 dpavlin 59 my $swish = SWISH::API->new($config->{index});
176     $swish->AbortLastError if $swish->Error;
177     my $results = $swish->Query($s);
178     my $hits = $results->Hits;
179    
180    
181     # build pager
182     my $current_page = param('page') || 1;
183    
184     my $pager = Data::Pageset->new({
185     'total_entries' => $hits,
186     'entries_per_page' => $max_hits,
187     'current_page' => $current_page,
188     'pages_per_set' => $pages_per_set,
189     });
190    
191     $results->SeekResult( $pager->first - 1 );
192    
193     # get number of entries on this page
194     my $i = $pager->entries_on_this_page;
195    
196     # print number of hits or error message
197     if ( !$hits ) {
198     printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
199     } else {
200     printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
201 dpavlin 57 }
202    
203 dpavlin 80 my %path2title;
204     foreach my $p (@{$config->{path2title}->{path}}) {
205     $path2title{$p->{dir}} = $p->{content};
206     }
207 dpavlin 8
208 dpavlin 83 # output start of table
209     print qq{
210     <table border="0">
211     };
212     # html before and after each hit
213     my $tr_pre = qq{
214     <tr><td>
215     };
216     my $tr_post = qq{
217     </td></tr>
218     };
219    
220 dpavlin 59 for(my $i=$pager->first; $i<=$pager->last; $i++) {
221    
222     my $result = $results->NextResult;
223     last if (! $result);
224    
225     my @arr;
226 dpavlin 81
227 dpavlin 59 foreach my $prop (@properties) {
228     if ($prop =~ m/swishdescription/) {
229 dpavlin 89 my $tmp = FormatResult::get_snippet(
230     e($result->Property($prop)),
231 dpavlin 88 @s_highlite,
232 dpavlin 81 );
233    
234 dpavlin 59 push @arr, $tmp;
235 dpavlin 18 } else {
236 dpavlin 59 push @arr, $result->Property($prop);
237 dpavlin 18 }
238 dpavlin 59 }
239 dpavlin 18
240 dpavlin 73 my $title = e($result->Property("swishtitle")) || 'untitled';
241     my $rank = $result->Property("swishrank");
242     my $host = $result->Property("swishdocpath");
243     $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
244 dpavlin 80
245     foreach my $p (keys %path2title) {
246     if ($host =~ m/$p/i) {
247     $title =~ s/$path2title{$p}\s*[:-]+\s*//;
248     $title = $path2title{$p}." :: ".$title;
249     last;
250     }
251     }
252    
253 dpavlin 59 print $tr_pre,$i,". ";
254 dpavlin 73 # print collection name which is not link
255     if ($title =~ s/^(.+? :: )//) {
256     print $1;
257 dpavlin 59 }
258 dpavlin 80
259 dpavlin 73 printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
260 dpavlin 59 print $tr_post;
261 dpavlin 8
262 dpavlin 59 }
263 dpavlin 8
264 dpavlin 59 # pager navigation
265     my $nav_html;
266 dpavlin 8
267 dpavlin 59 my $nav_fmt=qq{ <a href="%s">%s</a> };
268 dpavlin 8
269 dpavlin 75 if ($pager->current_page() > $pager->first_page) {
270     param('page', $pager->current_page - 1);
271     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&lt;&lt;');
272     }
273    
274 dpavlin 59 if ($pager->previous_set) {
275     param('page', $pager->previous_set);
276 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
277 dpavlin 8 }
278 dpavlin 59
279    
280     foreach my $p (@{$pager->pages_in_set()}) {
281 dpavlin 60 next if ($p < 0);
282 dpavlin 59 # for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) {
283     if($p == $pager->current_page()) {
284     $nav_html .= "<b>$p</b> ";
285     } else {
286     param('page', $p);
287     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p);
288     }
289     }
290    
291     if ($pager->next_set) {
292     param('page', $pager->next_set);
293 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
294     }
295    
296     if ($pager->current_page() < $pager->last_page) {
297     param('page', $pager->current_page + 1);
298 dpavlin 59 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&gt;&gt;');
299     }
300    
301 dpavlin 76 if ($config->{text}->{pages}) {
302     $nav_html = x($config->{text}->{pages})." ".$nav_html;
303     }
304    
305 dpavlin 59 # end html table
306     print qq{
307     <tr><td>
308 dpavlin 76 $nav_html
309 dpavlin 59 </td></tr>
310     </table>
311     };
312    
313    
314    
315 dpavlin 8 } else {
316 dpavlin 18 print p(x($config->{text}->{footer}));
317 dpavlin 8 }

Properties

Name Value
cvs2svn:cvs-rev 1.16
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26