1 |
package WebPac; |
2 |
|
3 |
use base 'CGI::Application'; |
4 |
use strict; |
5 |
|
6 |
use HTML::FillInForm; |
7 |
use SWISH::API; |
8 |
use Text::Iconv; |
9 |
use DBI; |
10 |
use Config::IniFiles; |
11 |
use Text::Unaccent; |
12 |
use Data::Pageset; |
13 |
use POSIX qw(locale_h); |
14 |
|
15 |
use lib '..'; |
16 |
use index_DBI_filter; |
17 |
use back2html; |
18 |
|
19 |
|
20 |
# read global.conf configuration |
21 |
my $cfg_global = new Config::IniFiles( -file => '../global.conf' ) || die "can't open 'global.conf'"; |
22 |
|
23 |
# configuration options from global.conf |
24 |
my $TEMPLATE_PATH = $cfg_global->val('webpac', 'template_html') || die "need template_html in global.conf, section webpac"; |
25 |
my $CHARSET = $cfg_global->val('webpac', 'charset') || 'ISO-8859-1'; |
26 |
my $SWISH = $cfg_global->val('webpac', 'swish') || '/usr/bin/swish-e'; |
27 |
my $INDEX = $cfg_global->val('webpac', 'index') || die "need index in global.conf, section webpac"; |
28 |
my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0; |
29 |
my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10; |
30 |
my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1; |
31 |
my $TEMPLATE =$cfg_global->val('webpac', 'template'); |
32 |
my $UNAC_FILTER =$cfg_global->val('global', 'my_unac_filter'); |
33 |
my $BASE_PATH =$cfg_global->val('webpac', 'base_path'); |
34 |
# for pager |
35 |
my $pages_per_set = $cfg_global->val('webpac', 'pages_per_set') || 10; |
36 |
my $locale = $cfg_global->val('locale') || 'hr_HR'; |
37 |
|
38 |
Text::Iconv->raise_error(0); # Conversion errors raise exceptions |
39 |
|
40 |
my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET); |
41 |
|
42 |
setlocale(LC_CTYPE, $locale); |
43 |
setlocale(LC_COLLATE, $locale); |
44 |
|
45 |
if ($UNAC_FILTER) { |
46 |
require $UNAC_FILTER; |
47 |
} else { |
48 |
sub WebPac::my_unac_string { |
49 |
my ($charset, $string) = (@_); |
50 |
return $string; |
51 |
} |
52 |
} |
53 |
|
54 |
# use path from cgi script to support templates in subdirs |
55 |
sub url_ex { |
56 |
my $q = shift || die "suff2file needs CGI object!"; |
57 |
my $tpl = shift || die "url_ex needs template name!"; |
58 |
return suff2file($BASE_PATH, $q->url(-absolute => 1,-path => 1),$TEMPLATE_PATH,$tpl); |
59 |
} |
60 |
|
61 |
sub suff2file($$$$) { |
62 |
my ($base_path, $p, $path, $tpl) = @_; |
63 |
|
64 |
return $tpl if (! $base_path); |
65 |
|
66 |
# strip everything to and including base path, leaving only |
67 |
# additional (virtual) path |
68 |
if ($base_path eq "/") { |
69 |
$p =~ s,/*,,g; |
70 |
my ($name,$ext) = split(/\./,$tpl); |
71 |
$p = $name . "-" . $p . "." . $ext; |
72 |
} elsif ($p =~ s,^.*?$base_path,,) { |
73 |
$p =~ s,/*,,g; |
74 |
my ($name,$ext) = split(/\./,$tpl); |
75 |
$p = $name . $p . "." . $ext; |
76 |
} else { |
77 |
# if unable reset it! |
78 |
$p = $tpl; |
79 |
} |
80 |
|
81 |
if ( -e "$path/$p") { |
82 |
return $p; |
83 |
} else { |
84 |
return $tpl; |
85 |
} |
86 |
|
87 |
} |
88 |
|
89 |
sub setup { |
90 |
my $self = shift; |
91 |
$self->tmpl_path($TEMPLATE_PATH); |
92 |
$self->run_modes( |
93 |
'search' => 'show_search_form', |
94 |
'results' => 'show_results_list', |
95 |
# 'user' => 'show_user_detail', |
96 |
'index' => 'show_index', |
97 |
); |
98 |
$self->start_mode('search'); |
99 |
$self->mode_param('rm'); |
100 |
|
101 |
$self->header_props(-charset=>$CHARSET); |
102 |
} |
103 |
|
104 |
sub in_template { |
105 |
my $q = shift || die "need CGI object!"; |
106 |
my $html = shift || die "This page is left unintentionally blank"; |
107 |
return $html if (! defined($TEMPLATE)); |
108 |
|
109 |
my ($dir,$tpl); |
110 |
if ($TEMPLATE =~ m,^(.*?/*)([^/]+)$,) { |
111 |
($dir,$tpl) = ($1,$2); |
112 |
} else { |
113 |
die "can't parse TEMPLATE path"; |
114 |
} |
115 |
|
116 |
my $master_tpl = suff2file($BASE_PATH, $q->url(-absolute => 1, -path => 1),$dir,$tpl); |
117 |
if (open(T, $master_tpl)) { |
118 |
my $template_html = join("\n",<T>); |
119 |
close(T); |
120 |
$template_html =~ s/##webpac##/$html/gsi; |
121 |
return $template_html; |
122 |
} else { |
123 |
return "Can't read template '$master_tpl'"; |
124 |
} |
125 |
} |
126 |
|
127 |
#-------------------------------------------------------------------------- |
128 |
|
129 |
# |
130 |
# make pager navigation and fill template variables |
131 |
# compatibile with HTML::Pager |
132 |
# |
133 |
|
134 |
sub make_pager($$$) { |
135 |
my ($q,$tmpl,$pager) = @_; |
136 |
|
137 |
# |
138 |
# pager navigation |
139 |
# |
140 |
my ($pager_prev,$pager_next, $pager_jump) = ('','',''); |
141 |
|
142 |
my $nav_fmt=qq{ <a href="%s">%s</a> }; |
143 |
|
144 |
if ($pager->current_page() > $pager->first_page) { |
145 |
$q->param('PAGER_offset', $pager->current_page - 1); |
146 |
$pager_prev .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'<<'); |
147 |
} |
148 |
|
149 |
if ($pager->previous_set) { |
150 |
$q->param('PAGER_offset', $pager->previous_set); |
151 |
$pager_prev .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'..'); |
152 |
} |
153 |
|
154 |
|
155 |
foreach my $p (@{$pager->pages_in_set()}) { |
156 |
next if ($p <= 0); |
157 |
if($p == $pager->current_page()) { |
158 |
$pager_jump .= "<b>$p</b> "; |
159 |
} else { |
160 |
$q->param('PAGER_offset', $p); |
161 |
$pager_jump .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),$p); |
162 |
} |
163 |
} |
164 |
|
165 |
if ($pager->next_set) { |
166 |
$q->param('PAGER_offset', $pager->next_set); |
167 |
$pager_next .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'..'); |
168 |
} |
169 |
|
170 |
if ($pager->current_page() < $pager->last_page) { |
171 |
$q->param('PAGER_offset', $pager->current_page + 1); |
172 |
$pager_next .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'>>'); |
173 |
} |
174 |
|
175 |
$tmpl->param('PAGER_PREV', $pager_prev); |
176 |
$tmpl->param('PAGER_JUMP', $pager_jump); |
177 |
$tmpl->param('PAGER_NEXT', $pager_next); |
178 |
|
179 |
} |
180 |
|
181 |
# |
182 |
# put persisten variables in template |
183 |
# |
184 |
|
185 |
sub make_pager_vars { |
186 |
my $q = shift @_; |
187 |
my $tmpl = shift @_; |
188 |
my @persist_vars = @_; |
189 |
my $hidden_vars = ''; |
190 |
my $hidden_search = ''; |
191 |
foreach my $v (@persist_vars) { |
192 |
foreach my $val ($q->param($v)) { |
193 |
next if (! $val || $val eq ''); |
194 |
$val =~ s/"/"/g; |
195 |
$hidden_vars .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n"; |
196 |
$hidden_search .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n" if ($v ne "rm"); |
197 |
} |
198 |
} |
199 |
|
200 |
$tmpl->param('PAGER_HIDDEN', $hidden_vars); |
201 |
$tmpl->param('SEARCH_HIDDEN', $hidden_search); |
202 |
$tmpl->param('PAGER_JAVASCRIPT', qq# |
203 |
<SCRIPT LANGUAGE="Javascript"> |
204 |
<!-- Begin |
205 |
// dummy emulator for HTML::Pager templates |
206 |
function PAGER_set_offset_and_submit() { |
207 |
return true; |
208 |
} |
209 |
// End --> |
210 |
</script> |
211 |
#); |
212 |
} |
213 |
|
214 |
#-------------------------------------------------------------------------- |
215 |
|
216 |
sub show_search_form { |
217 |
my $self = shift; |
218 |
|
219 |
# Get the CGI.pm query object |
220 |
my $q = $self->query(); |
221 |
|
222 |
my $tmpl = $self->load_tmpl(url_ex($q,'search.html')); |
223 |
my $html = $tmpl->output; |
224 |
|
225 |
my $fif = new HTML::FillInForm; |
226 |
|
227 |
return in_template($q,$fif->fill(scalarref => \$html, fobject => $q, |
228 |
target => 'search')); |
229 |
} |
230 |
|
231 |
sub show_results_list { |
232 |
my $self = shift; |
233 |
|
234 |
my $q = $self->query(); |
235 |
|
236 |
# submit was reset? |
237 |
if ($q->param('reset')) { |
238 |
$q->delete_all; |
239 |
return $self->show_search_form(); |
240 |
} |
241 |
|
242 |
# load template for this page |
243 |
|
244 |
my @s_arr; # all queries are located here |
245 |
|
246 |
my @path_arr = $q->param('path'); |
247 |
my $full = $q->param('full'); |
248 |
|
249 |
my @persist_vars = ( 'rm', 'persist_search' ); |
250 |
my $url_params = { |
251 |
'rm' => 'results', |
252 |
'show_full' => 1, |
253 |
'last_PAGER_offset' => ($q->param('PAGER_offset') || 0), |
254 |
}; |
255 |
|
256 |
my @persist_search_vars; |
257 |
my $url_params_persist = {}; |
258 |
if ($q->param("persist_search")) { |
259 |
@persist_search_vars = split(/\s*,\s*/, $q->param("persist_search")); |
260 |
$url_params_persist->{'persist_search'} = $q->url_param("persist_search"); |
261 |
$url_params->{'persist_search'} = $q->url_param("persist_search"); |
262 |
} |
263 |
|
264 |
# support parametars "f" and "v" for start |
265 |
for(my $i = 0; $i <=30; $i++) { |
266 |
|
267 |
$i = '' if ($i == 0); |
268 |
|
269 |
return show_index($self, $i) if ($q->param("f".$i."_index")); |
270 |
|
271 |
next if (! $q->param("v$i") || $q->param("v$i") eq ''); |
272 |
next if (! $q->param("f$i")); |
273 |
|
274 |
my $persist = grep(/^$i$/,@persist_search_vars); |
275 |
|
276 |
push @persist_vars, "f$i"; |
277 |
push @persist_vars, "v$i"; |
278 |
push @persist_vars, "e$i" if ($q->param("e$i")); |
279 |
|
280 |
# create url parametars (and persistent ones) |
281 |
|
282 |
$url_params->{"f$i"} = $q->url_param("f$i"); |
283 |
$url_params_persist->{"f$i"} = $q->url_param("f$i") if ($persist); |
284 |
|
285 |
foreach my $v ($q->url_param("v$i")) { |
286 |
# escape quotes so that phrase search work |
287 |
$v =~ s/"/%22/g; |
288 |
$url_params->{"v$i"} = $v; |
289 |
$url_params_persist->{"v$i"} = $v if ($persist); |
290 |
} |
291 |
|
292 |
if ($q->param("e$i")) { |
293 |
$url_params->{"e$i"} = $q->url_param("e$i"); |
294 |
# $url_params_persist->{"e$i"} = $q->url_param("e$i"); |
295 |
} |
296 |
|
297 |
my $wc="*"; # swish wildcard |
298 |
$wc="" if ($i eq ""); # don't apply wildcard on field 0 |
299 |
|
300 |
# re-write query from +/- to and/and not |
301 |
my @param_vals = $q->param("v$i"); |
302 |
my @swish_q; |
303 |
my ($pre,$post,$exact) = ('','',''); |
304 |
while (my $search = shift @param_vals) { |
305 |
my $s; |
306 |
# remove accents |
307 |
$search = my_unac_string($CHARSET,$search); |
308 |
while ($search =~ s/\s*("[^"]+")\s*/ /) { |
309 |
$s .= "$1 "; |
310 |
} |
311 |
$search =~ s/^\s+//; |
312 |
$search =~ s/\s+$//; |
313 |
|
314 |
# filed e[nr] is exact match bitmask |
315 |
# 1 = beginning, 2=end, 3=both |
316 |
my $exact_flag = $q->param("e$i") || 0; |
317 |
$pre = '"xxbxx ' if ($exact_flag & 1); |
318 |
$post = ' xxexx"' if ($exact_flag & 2); |
319 |
# add qotes on other side |
320 |
if ($q->param("e$i")) { |
321 |
$pre = '"' if (! $pre); |
322 |
$post = '"' if (! $post); |
323 |
# what about wildcards? |
324 |
$wc = ''; |
325 |
$wc = '*' if ($q->param("e$i") & 4); |
326 |
$exact = '_exact'; |
327 |
} |
328 |
|
329 |
foreach (split(/\s+/,$search)) { |
330 |
if (m/^([+-])(\S+)/) { |
331 |
$s.= ($s) ? "and " : ""; |
332 |
$s.="not " if ($1 eq "-"); |
333 |
$s.=$2.$wc." "; |
334 |
} elsif (m/^\s*(and|or|not)\s*$/i) { |
335 |
$s.=$_." "; |
336 |
# don't add * to words with less than x chars |
337 |
} elsif (length($_) <= $MIN_WILDCARD) { |
338 |
$s.=$_." "; |
339 |
} else { |
340 |
$s.=$_.$wc." "; |
341 |
} |
342 |
} |
343 |
$s =~ s/\*+/*/g; |
344 |
$s =~ s/[()]//g; # () are used in query language |
345 |
$s = $pre.$s.$post if ($q->param("e$i")); |
346 |
push @swish_q,$s; |
347 |
} |
348 |
# FIXME default operator for multi-value fields is or. There is |
349 |
# no way to change it, except here for now. Is there need? |
350 |
push @s_arr, $q->param("f$i")."_swish".$exact."=(".join(" or ",@swish_q).")"; |
351 |
} |
352 |
|
353 |
my $tmpl = $self->load_tmpl(url_ex($q,'results.html'), global_vars => 1, die_on_bad_params => 0); |
354 |
|
355 |
sub esc_html { |
356 |
my $html = shift; |
357 |
$html =~ s/</</g; |
358 |
$html =~ s/>/>/g; |
359 |
return $html; |
360 |
} |
361 |
|
362 |
my $sort = 'swishrank'; |
363 |
if ($q->param("sort")) { |
364 |
$sort = 'headline'; |
365 |
push @persist_vars, "sort"; |
366 |
} |
367 |
|
368 |
my $sortby = $q->param("sortby"); |
369 |
if ($sortby) { |
370 |
$sort = $sortby; |
371 |
push @persist_vars, "sortby"; |
372 |
} |
373 |
# used to filter entries in index and swish |
374 |
my $filter = $q->param("filter"); |
375 |
|
376 |
# construct swish query |
377 |
my $sw_q = join(" and ",@s_arr); |
378 |
if (@path_arr && $q->param('show_full')) { |
379 |
$sw_q .= " and (swishdocpath=\""; |
380 |
$sw_q .= join("\" or swishdocpath=\"",@path_arr); |
381 |
$sw_q .= "\")"; |
382 |
$tmpl->param('full',1); # show full records |
383 |
# } elsif (@path_arr && $#path_arr == 0) { |
384 |
# # I will assume that it's a filter since there isn't show_full |
385 |
# $filter = shift @path_arr; |
386 |
} elsif ($q->param('show_full')) { |
387 |
# just show full path, no path defined |
388 |
$tmpl->param('full',1); |
389 |
} else { |
390 |
$tmpl->param('full',0); |
391 |
} |
392 |
|
393 |
if ($filter) { |
394 |
$sw_q .= " and (swishdocpath=\"$filter\")" unless (@path_arr); |
395 |
push @persist_vars, "filter"; |
396 |
$url_params->{'filter'} = $filter; |
397 |
$url_params_persist->{'filter'} = $filter; |
398 |
} |
399 |
|
400 |
my $swish_msg = ' '; |
401 |
|
402 |
# create new swish instance |
403 |
my $swish = SWISH::API->new($INDEX); |
404 |
$swish_msg .= $swish->ErrorString." ".$swish->LastErrorMsg if $swish->Error; |
405 |
|
406 |
# execute query and get number of results from SWISH-E |
407 |
my $search = $swish->New_Search_Object; |
408 |
|
409 |
$search->SetSort($sort); |
410 |
|
411 |
my $results = $search->Execute($sw_q); |
412 |
$swish_msg .= $swish->ErrorString." ".$swish->LastErrorMsg if $swish->Error; |
413 |
|
414 |
my $hits = $results->Hits; |
415 |
|
416 |
$tmpl->param('hits',$hits); |
417 |
my $search_msg = $sw_q; |
418 |
$search_msg .= '<em>'.$swish_msg.'</em>' if ($swish_msg); |
419 |
$tmpl->param('search', $search_msg); |
420 |
|
421 |
$tmpl->param('PAGER_offset',$q->param("PAGER_offset") || 0); |
422 |
$tmpl->param('last_PAGER_offset',$q->param("last_PAGER_offset") || 0); |
423 |
|
424 |
# URL parametars for search results |
425 |
sub cook_url_params { |
426 |
my $hash = shift || return; |
427 |
return join("&", map { $_."=".$hash->{$_} } keys %{$hash}); |
428 |
} |
429 |
|
430 |
$tmpl->param('url_params',"?".cook_url_params($url_params)); |
431 |
$tmpl->param('url_params_paths',"?".cook_url_params($url_params).'&'.join("&",map { my $t = $_; $t =~ s/\#/%23/g; "path=$t"; } @path_arr)); |
432 |
|
433 |
|
434 |
|
435 |
# |
436 |
# build pager |
437 |
# |
438 |
|
439 |
my $current_page = $q->param('PAGER_offset') || 1; |
440 |
|
441 |
my $pager = Data::Pageset->new({ |
442 |
'total_entries' => $hits, |
443 |
'entries_per_page' => $ON_PAGE, |
444 |
'current_page' => $current_page, |
445 |
'pages_per_set' => $pages_per_set, |
446 |
}); |
447 |
|
448 |
$results->SeekResult( $pager->first - 1 ); |
449 |
|
450 |
# get number of entries on this page |
451 |
my $i = $pager->entries_on_this_page; |
452 |
|
453 |
# results from swish for template |
454 |
my @pager_data_list; |
455 |
|
456 |
for(my $i=$pager->first; $i<=$pager->last; $i++) { |
457 |
|
458 |
my $result = $results->NextResult; |
459 |
last if (! $result); |
460 |
|
461 |
my $r = { |
462 |
nr => $i, |
463 |
path => $result->Property('swishdocpath'), |
464 |
headline => esc_html($from_utf8->convert($result->Property('headline'))), |
465 |
rank => $result->Property('swishrank') |
466 |
}; |
467 |
|
468 |
#$r->{html} = back2html($from_utf8->convert($result->Property('html')), cook_url_params($url_params_persist)) if ($q->param('show_full')); |
469 |
$r->{html} = back2html($from_utf8->convert($result->Property('html')), $filter ? 'filter='.$filter : '') if ($q->param('show_full')); |
470 |
|
471 |
push @pager_data_list, $r; |
472 |
} |
473 |
|
474 |
|
475 |
|
476 |
# put something in template |
477 |
make_pager($q, $tmpl, $pager); |
478 |
make_pager_vars($q, $tmpl, @persist_vars); |
479 |
$tmpl->param('PAGER_DATA_LIST', \@pager_data_list); |
480 |
|
481 |
my $html = $tmpl->output; |
482 |
|
483 |
return in_template($q,$html); |
484 |
} |
485 |
|
486 |
sub show_index { |
487 |
my $self = shift; |
488 |
my $i = shift; # field number |
489 |
|
490 |
my $q = $self->query(); |
491 |
|
492 |
my $field = $q->param("f$i"); |
493 |
my $limit = $q->param("v$i"); |
494 |
|
495 |
my $filter = $q->param("filter"); |
496 |
|
497 |
my $html; |
498 |
|
499 |
my $index = new index_DBI( |
500 |
$cfg_global->val('global', 'dbi_dbd'), |
501 |
$cfg_global->val('global', 'dbi_dsn'), |
502 |
$cfg_global->val('global', 'dbi_user'), |
503 |
$cfg_global->val('global', 'dbi_passwd') || '' |
504 |
); |
505 |
|
506 |
my $total = $index->count($field,$limit,$filter); |
507 |
|
508 |
if (! defined($total)) { |
509 |
my $tmpl = $self->load_tmpl(url_ex($q,'no_index.html')); |
510 |
$tmpl->param('field',$field); |
511 |
$html = $tmpl->output; |
512 |
return $html; |
513 |
} |
514 |
|
515 |
my $tmpl = $self->load_tmpl(url_ex($q,'index_res.html'), global_vars => 1, die_on_bad_params => 0); |
516 |
$tmpl->param('field',$field); |
517 |
$tmpl->param('limit',$limit); |
518 |
$tmpl->param('total',$total); |
519 |
$tmpl->param('filter',$filter); |
520 |
|
521 |
# FIXME I should set offset and leave out limit from fetch!! |
522 |
# if (! $q->param("PAGER_offset") { |
523 |
# $q->param("Pager_offet) |
524 |
# } |
525 |
|
526 |
|
527 |
# |
528 |
# build pager |
529 |
# |
530 |
my $pager = Data::Pageset->new({ |
531 |
'total_entries' => $total, |
532 |
'entries_per_page' => $ON_PAGE, |
533 |
'current_page' => $q->param('PAGER_offset') || 1, |
534 |
'pages_per_set' => $pages_per_set |
535 |
}); |
536 |
|
537 |
my @persist_vars = qw{rm f$i v$i f$i_index offset}; |
538 |
|
539 |
make_pager($q, $tmpl, $pager); |
540 |
make_pager_vars($q, $tmpl, @persist_vars); |
541 |
|
542 |
my @pager_data_list = $index->fetch($field,$limit, $pager->first - 1, $pager->entries_on_this_page, $filter); |
543 |
$tmpl->param('PAGER_DATA_LIST', \@pager_data_list); |
544 |
|
545 |
return in_template($q,$tmpl->output); |
546 |
} |
547 |
|
548 |
1; |