/[swish]/trunk/html/swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/html/swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 80 - (hide annotations)
Sat May 22 18:33:33 2004 UTC (19 years, 11 months ago) by dpavlin
File size: 7508 byte(s)
major improvement: added <path2title> to configuration so that you can specify
part of path to add prefix (collection title) to results,
code cleanup (removed unused parts of code), specified but non-existant
affix and findaffix files will be skipped

1 dpavlin 8 #!/usr/bin/perl -w
2    
3     use strict;
4     use CGI qw/:standard -no_xhtml/;
5     use CGI::Carp qw(fatalsToBrowser);
6 dpavlin 59 use SWISH::API;
7 dpavlin 8 use XML::Simple;
8     use Lingua::Spelling::Alternative;
9     use Text::Iconv;
10 dpavlin 59 use Data::Pageset;
11 dpavlin 8
12 dpavlin 59 # for pager
13     my $pages_per_set = 20;
14    
15 dpavlin 8 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
16     my $config=XMLin(undef,
17     # keyattr => { label => "value" },
18     forcecontent => 0,
19 dpavlin 80 ForceArray => [ 'path' ],
20 dpavlin 8 );
21    
22 dpavlin 18 my $from_utf8 = Text::Iconv->new('UTF8', $config->{charset});
23     sub x {
24 dpavlin 62 return if (! defined $_[0]);
25 dpavlin 18 return $from_utf8->convert($_[0]);
26     }
27    
28 dpavlin 29 # Escape <, >, & and ", and to produce valid XML
29     my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
30     my $escape_re = join '|' => keys %escape;
31     sub e {
32     my $out;
33     foreach my $v (@_) {
34     $v =~ s/($escape_re)/$escape{$1}/g;
35     $out .= $v;
36     }
37     return $out;
38     }
39 dpavlin 8
40 dpavlin 39 my @spellings;
41 dpavlin 8 # FIX: doesn't work very well
42 dpavlin 16 if ($config->{findaffix}) {
43 dpavlin 41 foreach my $findaffix (split(/[, ]+/,x($config->{findaffix}))) {
44 dpavlin 80 next if (! -f $findaffix);
45 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
46     $spelling_alt->load_findaffix($findaffix);
47     push @spellings,$spelling_alt;
48     }
49 dpavlin 8 }
50 dpavlin 16 if ($config->{affix}) {
51 dpavlin 41 foreach my $affix (split(/[, ]+/,x($config->{affix}))) {
52 dpavlin 80 next if (! -f $affix);
53 dpavlin 41 my $spelling_alt = new Lingua::Spelling::Alternative;
54     $spelling_alt->load_affix($affix);
55     push @spellings,$spelling_alt;
56     }
57 dpavlin 16 }
58 dpavlin 8
59     my $hits=0;
60 dpavlin 59 my $max_hits=param('max_hits') || x($config->{max_hits});
61 dpavlin 8
62     my %labels;
63     foreach (@{$config->{labels}->{label}}) {
64 dpavlin 59 next if (! $_->{value}); # skip unlimited (0)
65 dpavlin 18 $labels{$_->{value}} = x($_->{content});
66 dpavlin 8 }
67    
68 dpavlin 23 my $path = param('path'); # limit to this path
69     my %path_label;
70     my @path_name;
71     foreach (@{$config->{paths}->{path}}) {
72 dpavlin 39
73     print STDERR "##: $_->{limit}",x($_->{content}),"\n";
74 dpavlin 23 push @path_name,x($_->{limit});
75     $path_label{$_->{limit}} = x($_->{content});
76     }
77    
78 dpavlin 62 my @properties = split(/\s+/,x($config->{properties})) if ($config->{properties});
79 dpavlin 32
80 dpavlin 16 if ($config->{charset}) {
81 dpavlin 18 print header(-charset=>x($config->{charset}));
82 dpavlin 16 } else {
83     print header;
84     }
85 dpavlin 18 print start_html(-title=>x($config->{title})),start_form;
86     print x($config->{text}->{search});
87 dpavlin 16 print popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits);
88 dpavlin 18 print x($config->{text}->{documents});
89 dpavlin 16 print textfield('search');
90 dpavlin 18 print submit(-value=> x($config->{text}->{submit}));
91 dpavlin 39 print br,checkbox(-name=>'no_affix', -checked=>0, -label=>x($config->{text}->{no_spell})) if (@spellings);
92 dpavlin 32 print checkbox(-name=>'no_properties', -checked=>0, -label=>($config->{text}->{no_properties})) if (@properties);
93 dpavlin 23 if (@path_name) {
94     print br,x($config->{text}->{limit});
95     print popup_menu(-name=>'path',-values=>\@path_name,-labels=>\%path_label,-default=>$path);
96     }
97 dpavlin 8 print end_form,hr;
98    
99     if (param('search')) {
100    
101     my $s;
102     # re-write query from +/- to and/and not
103    
104     my $search = param('search');
105     my $s_phrase = "";
106     while ($search =~ s/\s*("[^"]+")\s*/ /) {
107     $s .= "$1 ";
108     }
109     $search =~ s/^\s+//;
110     $search =~ s/\s+$//;
111    
112 dpavlin 41 my %words;
113    
114 dpavlin 8 foreach (split(/\s+/,$search)) {
115     if (m/^([+-])(\S+)/) {
116     $s.= ($s) ? "and " : "";
117     $s.="not " if ($1 eq "-");
118 dpavlin 39 if (@spellings && !param('no_affix')) {
119 dpavlin 8 my $w = $2; $w =~ s/[\*\s]+//g;
120     $w =~ s/^(['"]*)([^'"]+)(['"]*)/$2/;
121 dpavlin 39 my $or="";
122     foreach my $spelling_alt (@spellings) {
123     $s.="$or$1(".join("* or ",$spelling_alt->alternatives($w))."*)$3 ";
124     $or = "or ";
125     }
126 dpavlin 8 } else {
127     $s.="$2* ";
128     }
129     } else {
130 dpavlin 39 if (@spellings && !param('no_affix')) {
131 dpavlin 8 my $w = $_; $w =~ s/[\*\s]+//g;
132     #$s.="(".join("* or ",$spelling_alt->alternatives($w))."*) ";
133 dpavlin 39 my $or="";
134     foreach my $spelling_alt (@spellings) {
135     $s.="$or(".join("* or ",$spelling_alt->alternatives($w))."*) ";
136     $or = "or ";
137     }
138 dpavlin 8 } else {
139     $s.="$_* ";
140     }
141     }
142     }
143    
144     # fixup search string
145     $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
146     $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
147     $s=~s/\*\*+/*/g;
148    
149 dpavlin 23 # limit to some path
150 dpavlin 39 $s = "swishdocpath=(\"*$path*\") and $s" if ($path);
151 dpavlin 23
152 dpavlin 22 my %params; # optional parametars for swish
153    
154 dpavlin 32 # default format for output
155     my $hit_fmt = "<a href=\"%s\">%s</a> [%s]<br>\n";
156 dpavlin 21
157 dpavlin 59 # output start of table
158     print qq{
159     <table border="0">
160     };
161     # html before and after each hit
162     my $tr_pre = qq{
163     <tr><td>
164     };
165     my $tr_post = qq{
166     </td></tr>
167     };
168    
169 dpavlin 32 if (@properties) {
170     $hit_fmt = x($config->{hit}) if (! param('no_properties'));
171 dpavlin 39 $params{properties} = \@properties;
172 dpavlin 32 } else {
173     $hit_fmt = x($config->{hit}) if (x($config->{hit}));
174     }
175    
176 dpavlin 59 my $swish = SWISH::API->new($config->{index});
177     $swish->AbortLastError if $swish->Error;
178     my $results = $swish->Query($s);
179     my $hits = $results->Hits;
180    
181    
182     # build pager
183     my $current_page = param('page') || 1;
184    
185     my $pager = Data::Pageset->new({
186     'total_entries' => $hits,
187     'entries_per_page' => $max_hits,
188     'current_page' => $current_page,
189     'pages_per_set' => $pages_per_set,
190     });
191    
192     $results->SeekResult( $pager->first - 1 );
193    
194     # get number of entries on this page
195     my $i = $pager->entries_on_this_page;
196    
197     # print number of hits or error message
198     if ( !$hits ) {
199     printf (x($config->{text}->{no_hits}),$s,$swish->ErrorString);
200     } else {
201     printf (x($config->{text}->{hits}),$i,$results->Hits,$s);
202 dpavlin 57 }
203    
204 dpavlin 80 my %path2title;
205     use Data::Dumper;
206     foreach my $p (@{$config->{path2title}->{path}}) {
207     $path2title{$p->{dir}} = $p->{content};
208     }
209 dpavlin 8
210 dpavlin 59 for(my $i=$pager->first; $i<=$pager->last; $i++) {
211    
212     my $result = $results->NextResult;
213     last if (! $result);
214    
215     my @arr;
216     foreach my $prop (@properties) {
217     if ($prop =~ m/swishdescription/) {
218     my $tmp = $result->Property($prop);
219     $tmp =~ s/<[^>]+>//g;
220     push @arr, $tmp;
221 dpavlin 18 } else {
222 dpavlin 59 push @arr, $result->Property($prop);
223 dpavlin 18 }
224 dpavlin 59 }
225 dpavlin 18
226 dpavlin 73 my $title = e($result->Property("swishtitle")) || 'untitled';
227     my $rank = $result->Property("swishrank");
228     my $host = $result->Property("swishdocpath");
229     $host = "http://".virtual_host().x($config->{url}).$result->Property("swishdocpath") if ($config->{url});
230 dpavlin 80
231     foreach my $p (keys %path2title) {
232     if ($host =~ m/$p/i) {
233     $title =~ s/$path2title{$p}\s*[:-]+\s*//;
234     $title = $path2title{$p}." :: ".$title;
235     last;
236     }
237     }
238    
239 dpavlin 59 print $tr_pre,$i,". ";
240 dpavlin 73 # print collection name which is not link
241     if ($title =~ s/^(.+? :: )//) {
242     print $1;
243 dpavlin 59 }
244 dpavlin 80
245 dpavlin 73 printf($hit_fmt, $host, $title || 'untitled', $rank, @arr);
246 dpavlin 59 print $tr_post;
247 dpavlin 8
248 dpavlin 59 }
249 dpavlin 8
250 dpavlin 59 # pager navigation
251     my $nav_html;
252 dpavlin 8
253 dpavlin 59 my $nav_fmt=qq{ <a href="%s">%s</a> };
254 dpavlin 8
255 dpavlin 75 if ($pager->current_page() > $pager->first_page) {
256     param('page', $pager->current_page - 1);
257     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&lt;&lt;');
258     }
259    
260 dpavlin 59 if ($pager->previous_set) {
261     param('page', $pager->previous_set);
262 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
263 dpavlin 8 }
264 dpavlin 59
265    
266     foreach my $p (@{$pager->pages_in_set()}) {
267 dpavlin 60 next if ($p < 0);
268 dpavlin 59 # for (my $p=$pager->previous_set; $p <= $pager->next_set; $p++) {
269     if($p == $pager->current_page()) {
270     $nav_html .= "<b>$p</b> ";
271     } else {
272     param('page', $p);
273     $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),$p);
274     }
275     }
276    
277     if ($pager->next_set) {
278     param('page', $pager->next_set);
279 dpavlin 75 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'..');
280     }
281    
282     if ($pager->current_page() < $pager->last_page) {
283     param('page', $pager->current_page + 1);
284 dpavlin 59 $nav_html .= sprintf($nav_fmt,url(-relative=>1, -query=>1),'&gt;&gt;');
285     }
286    
287 dpavlin 76 if ($config->{text}->{pages}) {
288     $nav_html = x($config->{text}->{pages})." ".$nav_html;
289     }
290    
291 dpavlin 59 # end html table
292     print qq{
293     <tr><td>
294 dpavlin 76 $nav_html
295 dpavlin 59 </td></tr>
296     </table>
297     };
298    
299    
300    
301 dpavlin 8 } else {
302 dpavlin 18 print p(x($config->{text}->{footer}));
303 dpavlin 8 }

Properties

Name Value
cvs2svn:cvs-rev 1.16
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26