Revision 733 (by dpavlin, 2006/05/24 13:32:07) strip script name from self URL
package WebPac;

use base 'CGI::Application';
use strict;

use HTML::FillInForm;
use SWISH::API;
use Text::Iconv;
use DBI;
use Config::IniFiles;
use Text::Unaccent;
use Data::Pageset;
use POSIX qw(locale_h);

use lib '..';
use index_DBI_filter;
use back2html;


# read global.conf configuration
my $cfg_global = new Config::IniFiles( -file => '../global.conf' ) || die "can't open 'global.conf'";

# configuration options from global.conf
my $TEMPLATE_PATH = $cfg_global->val('webpac', 'template_html') || die "need template_html in global.conf, section webpac";
my $CHARSET = $cfg_global->val('webpac', 'charset') || 'ISO-8859-1';
my $SWISH = $cfg_global->val('webpac', 'swish') || '/usr/bin/swish-e';
my $INDEX = $cfg_global->val('webpac', 'index') || die "need index in global.conf, section webpac";
my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0;
my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10;
my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1;
my $TEMPLATE =$cfg_global->val('webpac', 'template');
my $UNAC_FILTER =$cfg_global->val('global', 'my_unac_filter');
my $BASE_PATH =$cfg_global->val('webpac', 'base_path');
# for pager
my $pages_per_set = $cfg_global->val('webpac', 'pages_per_set') || 10;
my $locale = $cfg_global->val('locale') || 'hr_HR';

Text::Iconv->raise_error(0);     # Conversion errors raise exceptions

my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);

setlocale(LC_CTYPE, $locale);
setlocale(LC_COLLATE, $locale);

if ($UNAC_FILTER) {
	require $UNAC_FILTER;
} else {
	sub WebPac::my_unac_string {
		my ($charset, $string) = (@_);
		return $string;
	}
}

# use path from cgi script to support templates in subdirs
sub url_ex {
	my $q = shift || die "suff2file needs CGI object!";
	my $tpl = shift || die "url_ex needs template name!";
	return suff2file($BASE_PATH, $q->url(-absolute => 1,-path => 1),$TEMPLATE_PATH,$tpl);
}

sub suff2file($$$$) {
	my ($base_path, $p, $path, $tpl) = @_;

	return $tpl if (! $base_path);

	#warn "base_path: $base_path, p: $p, path: $path, tpl: $tpl\n";

	$p =~ s#/[^/]*$##;

	# strip everything to and including base path, leaving only
	# additional (virtual) path
	if ($base_path eq "/") {
		$p =~ s,/*,,g;
		my ($name,$ext) = split(/\./,$tpl);
		$p = $name . "-" . $p . "." . $ext;
	} elsif ($p =~ s,^.*?$base_path,,) {
		$p =~ s,/*,,g;
		my ($name,$ext) = split(/\./,$tpl);
		$p = $name . $p . "." . $ext;
	} else {
		# if unable reset it!
		$p = $tpl;
	}

	if ( -e "$path/$p") {
		return $p;
	} else {
		return $tpl;
	}

}

sub setup {
	my $self = shift;
	$self->tmpl_path($TEMPLATE_PATH);
	$self->run_modes(
		'search' => 'show_search_form',
		'results' => 'show_results_list',
#		'user' => 'show_user_detail',
		'index' => 'show_index',
	);
	$self->start_mode('search');
	$self->mode_param('rm');

	$self->header_props(-charset=>$CHARSET);
}

sub in_template {
	my $q = shift || die "need CGI object!";
	my $html = shift || die "This page is left unintentionally blank";
	return $html if (! defined($TEMPLATE));

	my ($dir,$tpl);
	if ($TEMPLATE =~ m,^(.*?/*)([^/]+)$,) {
		($dir,$tpl) = ($1,$2);
	} else {
		die "can't parse TEMPLATE path";
	}

	my $master_tpl = suff2file($BASE_PATH, $q->url(-absolute => 1, -path => 1),$dir,$tpl);
	if (open(T, $master_tpl)) {
		my $template_html = join("\n",<T>);
		close(T);
		$template_html =~ s/##webpac##/$html/gsi;
		return $template_html;
	} else {
		return "Can't read template '$master_tpl'";
	}
}

#--------------------------------------------------------------------------

#
# make pager navigation and fill template variables
# compatibile with HTML::Pager
#

sub make_pager($$$) {
	my ($q,$tmpl,$pager) = @_;

	#
	# pager navigation
	#
	my ($pager_prev,$pager_next, $pager_jump) = ('','','');

	my $nav_fmt=qq{ <a href="%s">%s</a> };

	if ($pager->current_page() > $pager->first_page) {
		$q->param('PAGER_offset', $pager->current_page - 1);
		$pager_prev .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'&lt;&lt;');
	}

	if ($pager->previous_set) {
		$q->param('PAGER_offset', $pager->previous_set);
		$pager_prev .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'..');
	}


	foreach my $p (@{$pager->pages_in_set()}) {
		next if ($p <= 0);
		if($p == $pager->current_page()) {
			$pager_jump .= "<b>$p</b> ";
		} else {
			$q->param('PAGER_offset', $p);
			$pager_jump .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),$p);
		}
	}

	if ($pager->next_set) {
		$q->param('PAGER_offset', $pager->next_set);
		$pager_next .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'..');
	}

	if ($pager->current_page() < $pager->last_page) {
		$q->param('PAGER_offset', $pager->current_page + 1);
		$pager_next .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'&gt;&gt;');
	}

	$tmpl->param('PAGER_PREV', $pager_prev);
	$tmpl->param('PAGER_JUMP', $pager_jump);
	$tmpl->param('PAGER_NEXT', $pager_next);

}

#
# put persisten variables in template
#

sub make_pager_vars {
	my $q = shift @_;
	my $tmpl = shift @_;
	my @persist_vars = @_;
	my $hidden_vars = '';
	my $hidden_search = '';
	foreach my $v (@persist_vars) {
		foreach my $val ($q->param($v)) {
			next if (! $val || $val eq '');
			$val =~ s/"/&quot;/g;
			$hidden_vars .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n";
			$hidden_search .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n" if ($v ne "rm");
		}
	}

	$tmpl->param('PAGER_HIDDEN', $hidden_vars);
	$tmpl->param('SEARCH_HIDDEN', $hidden_search);
	$tmpl->param('PAGER_JAVASCRIPT', qq#
<SCRIPT LANGUAGE="Javascript">
<!-- Begin
	// dummy emulator for HTML::Pager templates
	function PAGER_set_offset_and_submit() {
		return true;
	}
// End -->
</script>  
	#);
}

#--------------------------------------------------------------------------

sub show_search_form {
	my $self = shift;

	# Get the CGI.pm query object
	my $q = $self->query();

	my $tmpl = $self->load_tmpl(url_ex($q,'search.html'));
	my $html = $tmpl->output;

	my $fif = new HTML::FillInForm;

	return in_template($q,$fif->fill(scalarref => \$html, fobject => $q,
		target => 'search'));
}
 
sub show_results_list {
	my $self = shift;

	my $q = $self->query();

	# submit was reset?
	if ($q->param('reset')) {
		$q->delete_all;
		return $self->show_search_form();
	}

	# load template for this page

	my @s_arr;	# all queries are located here

	my @path_arr = $q->param('path');
	my $full = $q->param('full');

	my @persist_vars = ( 'rm', 'persist_search' );
	my $url_params = {
		'rm' => 'results',
		'show_full' => 1,
		'last_PAGER_offset' => ($q->param('PAGER_offset') || 0),
	};

	my @persist_search_vars;
	my $url_params_persist = {};
	if ($q->param("persist_search")) {
		@persist_search_vars = split(/\s*,\s*/, $q->param("persist_search"));
		$url_params_persist->{'persist_search'} = $q->url_param("persist_search");
		$url_params->{'persist_search'} = $q->url_param("persist_search");
	}

	# support parametars "f" and "v" for start
	for(my $i = 0; $i <=30; $i++) {

		$i = '' if ($i == 0);

		return show_index($self, $i) if ($q->param("f".$i."_index"));

		next if (! $q->param("v$i") || $q->param("v$i") eq '');
		next if (! $q->param("f$i"));

		my $persist = grep(/^$i$/,@persist_search_vars);
	
		push @persist_vars, "f$i";
		push @persist_vars, "v$i";
		push @persist_vars, "e$i" if ($q->param("e$i"));

		# create url parametars (and persistent ones)

		$url_params->{"f$i"} = $q->url_param("f$i");
		$url_params_persist->{"f$i"} = $q->url_param("f$i") if ($persist);

		my @v;

		foreach my $v ($q->url_param("v$i")) {
			# escape quotes so that phrase search work
			$v =~ s/"/%22/g;
			push @v, $v;
		}
		$url_params->{"v$i"} = \@v;
		$url_params_persist->{"v$i"} = \@v if ($persist);

		if ($q->param("e$i")) {
			$url_params->{"e$i"} = $q->url_param("e$i");
#			$url_params_persist->{"e$i"} = $q->url_param("e$i");
		}

		my $wc="*";	# swish wildcard
		$wc="" if ($i eq "");	# don't apply wildcard on field 0

		# re-write query from +/- to and/and not
		my @param_vals = $q->param("v$i");
		my @swish_q;
		my ($pre,$post,$exact) = ('','','');
		while (my $search = shift @param_vals) {
			my $s;
			# remove accents
			$search = my_unac_string($CHARSET,$search);
			while ($search =~ s/\s*("[^"]+")\s*/ /) {
				$s .= "$1 ";
			}
			$search =~ s/^\s+//;
			$search =~ s/\s+$//;

			# filed e[nr] is exact match bitmask
			# 1 = beginning, 2=end, 3=both
			my $exact_flag = $q->param("e$i") || 0;
			$pre = '"xxbxx ' if ($exact_flag & 1);
			$post = ' xxexx"' if ($exact_flag & 2);
			# add qotes on other side
			if ($q->param("e$i")) {
				$pre = '"' if (! $pre);
				$post = '"' if (! $post);
				# what about wildcards?
				$wc = '';
				$wc = '*' if ($q->param("e$i") & 4);
				$exact = '_exact';
			}

			foreach (split(/\s+/,$search)) {
				if (m/^([+-])(\S+)/) {
					$s.= ($s) ? "and " : "";
					$s.="not " if ($1 eq "-");
					$s.=$2.$wc." ";
				} elsif (m/^\s*(and|or|not)\s*$/i) {
					$s.=$_." ";
				# don't add * to words with less than x chars
				} elsif (length($_) <= $MIN_WILDCARD) {
					$s.=$_." ";
				} else {
					$s.=$_.$wc." ";
				}
			}
			$s =~ s/\*+/*/g;
			$s =~ s/[()]//g;	# () are used in query language
			$s = $pre.$s.$post if ($q->param("e$i"));
			push @swish_q,$s;
		}
		# FIXME default operator for multi-value fields is or. There is
		# no way to change it, except here for now. Is there need?
		push @s_arr, $q->param("f$i")."_swish".$exact."=(".join(" or ",@swish_q).")";
	}

	my $tmpl = $self->load_tmpl(url_ex($q,'results.html'), global_vars => 1, die_on_bad_params => 0);

	sub esc_html {
		my $html = shift;
		$html =~ s/</&lt;/g;
		$html =~ s/>/&gt;/g;
		return $html;
	}

	my $sort = 'swishrank';
	if ($q->param("sort")) {
		$sort = 'headline';
		push @persist_vars, "sort";
	}

	my $sortby = $q->param("sortby");
	if ($sortby) {
		$sort = $sortby;
		push @persist_vars, "sortby";
	}
	# used to filter entries in index and swish
	my $filter = $q->param("filter");

	# construct swish query
	my $sw_q = join(" and ",@s_arr);
	if (@path_arr && $q->param('show_full')) {
		$sw_q .= " and (swishdocpath=\"";
		$sw_q .= join("\" or swishdocpath=\"",@path_arr);
		$sw_q .= "\")";
		$tmpl->param('full',1);	# show full records
#	} elsif (@path_arr && $#path_arr == 0) {
#		# I will assume that it's a filter since there isn't show_full
#		$filter = shift @path_arr;
	} elsif ($q->param('show_full')) {
		# just show full path, no path defined
		$tmpl->param('full',1);
	} else {
		$tmpl->param('full',0);
	}

	if ($filter) {
		$sw_q .= " and (swishdocpath=\"$filter\")" unless (@path_arr);
		push @persist_vars, "filter";
		$url_params->{'filter'} = $filter;
		$url_params_persist->{'filter'} = $filter;
	}

	my $swish_msg = ' ';

	# create new swish instance
	my $swish = SWISH::API->new($INDEX);
	$swish_msg .= $swish->ErrorString." ".$swish->LastErrorMsg if $swish->Error;

	# execute query and get number of results from SWISH-E
	my $search = $swish->New_Search_Object;

	$search->SetSort($sort);

	my $results = $search->Execute($sw_q);
	$swish_msg .= $swish->ErrorString." ".$swish->LastErrorMsg if $swish->Error;

	my $hits = $results->Hits;

	$tmpl->param('hits',$hits);
	my $search_msg = $sw_q;
	$search_msg .= '<em>'.$swish_msg.'</em>' if ($swish_msg);
	$tmpl->param('search', $search_msg);

	$tmpl->param('PAGER_offset',$q->param("PAGER_offset") || 0);
	$tmpl->param('last_PAGER_offset',$q->param("last_PAGER_offset") || 0);

	# URL parametars for search results
	sub cook_url_params {
		my $hash = shift || return;
		return join("&", map {
			my $var = $_;
			if (ref($hash->{$var}) eq 'ARRAY') {
				join('&',
					map { $var.'='.$_ } @{$hash->{$var}}
				);
			} else {
				$var."=".$hash->{$var};
			}
		} keys %{$hash});
	}

	$tmpl->param('url_params',"?".cook_url_params($url_params));
	$tmpl->param('url_params_paths',"?".cook_url_params($url_params).'&'.join("&",map { my $t = $_; $t =~ s/\#/%23/g; "path=$t"; } @path_arr));



	#
	# build pager
	#

	my $current_page = $q->param('PAGER_offset') || 1;

	my $pager = Data::Pageset->new({
		'total_entries' => $hits,
		'entries_per_page' => $ON_PAGE,
		'current_page' => $current_page,
		'pages_per_set' => $pages_per_set,
	});

	$results->SeekResult( $pager->first - 1 );

	# get number of entries on this page
	my $i = $pager->entries_on_this_page;

	# results from swish for template
	my @pager_data_list;

	for(my $i=$pager->first; $i<=$pager->last; $i++) {

		my $result = $results->NextResult;
		last if (! $result);

		my $r = {
			nr => $i,
			path => $result->Property('swishdocpath'),
			headline => esc_html($from_utf8->convert($result->Property('headline'))),
			rank => $result->Property('swishrank')
		};

		#$r->{html} = back2html($from_utf8->convert($result->Property('html')), cook_url_params($url_params_persist)) if ($q->param('show_full'));
		$r->{html} = back2html($from_utf8->convert($result->Property('html')), $filter ? 'filter='.$filter : '') if ($q->param('show_full'));

		push @pager_data_list, $r;
	}



	# put something in template
	make_pager($q, $tmpl, $pager);
	make_pager_vars($q, $tmpl, @persist_vars);
	$tmpl->param('PAGER_DATA_LIST', \@pager_data_list);

	my $html = $tmpl->output;

	return in_template($q,$html);
}
 
sub show_index {
	my $self = shift;
	my $i = shift;		# field number

	my $q = $self->query();

	my $field = $q->param("f$i");
	my $limit = $q->param("v$i");

	my $filter = $q->param("filter");

	my $html;

	my $index = new index_DBI(
		$cfg_global->val('global', 'dbi_dbd'),
		$cfg_global->val('global', 'dbi_dsn'),
		$cfg_global->val('global', 'dbi_user'),
		$cfg_global->val('global', 'dbi_passwd') || ''
	);

	my $total = $index->count($field,$limit,$filter);

	if (! defined($total)) {
		my $tmpl = $self->load_tmpl(url_ex($q,'no_index.html'));
		$tmpl->param('field',$field);
		$html = $tmpl->output;
		return $html;
	}

	my $tmpl = $self->load_tmpl(url_ex($q,'index_res.html'), global_vars => 1, die_on_bad_params => 0);
	$tmpl->param('field',$field);
	$tmpl->param('limit',$limit);
	$tmpl->param('total',$total);
	$tmpl->param('filter',$filter);

# FIXME I should set offset and leave out limit from fetch!!
#	if (! $q->param("PAGER_offset") {
#		$q->param("Pager_offet)
#	}


	#
	# build pager
	#
	my $pager = Data::Pageset->new({
		'total_entries' => $total,
		'entries_per_page' => $ON_PAGE,
		'current_page' => $q->param('PAGER_offset') || 1,
		'pages_per_set' => $pages_per_set
	});

	my @persist_vars = qw{rm f$i v$i f$i_index offset};

	make_pager($q, $tmpl, $pager);
	make_pager_vars($q, $tmpl, @persist_vars);

	my @pager_data_list = $index->fetch($field,$limit, $pager->first - 1, $pager->entries_on_this_page, $filter);
	$tmpl->param('PAGER_DATA_LIST', \@pager_data_list);

	return in_template($q,$tmpl->output);
}

1;