--- trunk/WebPac.pm 2003/01/22 20:24:32 11
+++ trunk/WebPac.pm 2003/10/30 00:10:09 140
@@ -6,20 +6,32 @@
use HTML::Pager;
use HTML::FillInForm;
use SWISH;
-use Unicode::MapUTF8 qw(to_utf8 from_utf8 utf8_supported_charset);
+use Text::Iconv;
use DBI;
+use Config::IniFiles;
+use Text::Unaccent;
use lib '..';
-use index_DBI;
+use index_DBI_cache;
+use back2html;
-# configuration options
-# FIX: they really should go in configuration file!
-my $TEMPLATE_PATH = '/data/webpac/template_html';
-my $CHARSET = 'ISO-8859-2';
-my $SWISH = '/usr/local/bin/swish-e';
-my $INDEX = '/data/webpac/index/isis.index';
-my $MAX_HITS = 500;
-my $ON_PAGE = 10;
+
+# read global.conf configuration
+my $cfg_global = new Config::IniFiles( -file => '../global.conf' ) || die "can't open 'global.conf'";
+
+# configuration options from global.conf
+my $TEMPLATE_PATH = $cfg_global->val('webpac', 'template_html') || die "need template_html in global.conf, section webpac";
+my $CHARSET = $cfg_global->val('webpac', 'charset') || 'ISO-8859-1';
+my $SWISH = $cfg_global->val('webpac', 'swish') || '/usr/bin/swish-e';
+my $INDEX = $cfg_global->val('webpac', 'index') || die "need index in global.conf, section webpac";
+my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0;
+my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10;
+my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1;
+
+
+Text::Iconv->raise_error(0); # Conversion errors raise exceptions
+
+my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);
sub setup {
@@ -63,63 +75,110 @@
my @s_arr; # all queries are located here
- for(my $i = 1; $i <=10; $i++) {
+ my @path_arr = $q->param('path');
+ my $full = $q->param('full');
+
+ my @persist_vars = ( 'rm' );
+ my @url_params = ( 'rm=results', 'show_full=1', 'last_PAGER_offset='.$q->param('PAGER_offset') || 0 );
+
+ for(my $i = 1; $i <=30; $i++) {
return show_index($self, $i) if ($q->param("f".$i."_index"));
- next if (! $q->param("f$i"));
+
next if (! $q->param("v$i"));
+ next if (! $q->param("f$i"));
+
+ push @persist_vars, "f$i";
+ push @persist_vars, "v$i";
+
+ push @url_params,"f$i=".$q->url_param("f$i");
+ push @url_params,"v$i=".$q->url_param("v$i");
# re-write query from +/- to and/and not
- my $s;
- my $search = $q->param("v$i");
- while ($search =~ s/\s*("[^"]+")\s*/ /) {
- $s .= "$1 ";
- }
- $search =~ s/^\s+//;
- $search =~ s/\s+$//;
+ my @param_vals = $q->param("v$i");
+ my @swish_q;
+ while (my $search = shift @param_vals) {
+ my $s;
+ # remove accents
+ $search = unac_string($CHARSET,$search);
+ while ($search =~ s/\s*("[^"]+")\s*/ /) {
+ $s .= "$1 ";
+ }
+ $search =~ s/^\s+//;
+ $search =~ s/\s+$//;
- foreach (split(/\s+/,$search)) {
- if (m/^([+-])(\S+)/) {
- $s.= ($s) ? "and " : "";
- $s.="not " if ($1 eq "-");
- $s.="$2* ";
- } else {
- $s.="$_* ";
+ foreach (split(/\s+/,$search)) {
+ if (m/^([+-])(\S+)/) {
+ $s.= ($s) ? "and " : "";
+ $s.="not " if ($1 eq "-");
+ $s.="$2* ";
+ } elsif (m/^\s*(and|or|not)\s*$/i) {
+ $s.="$_ ";
+ # don't add * to words with less than x chars
+ } elsif (length($_) <= $MIN_WILDCARD) {
+ $s.="$_ ";
+ } else {
+ $s.="$_* ";
+ }
}
+ $s =~ s/\*+/*/g;
+ push @swish_q,$s;
}
-
- push @s_arr,$q->param("f$i")."_swish=($s)";
+ # FIXME default operator for multi-value fields is or. There is
+ # no way to change it, except here for now. Is there need?
+ push @s_arr, $q->param("f$i")."_swish=(".join(" or ",@swish_q).")";
}
- my $tmpl = $self->load_tmpl('results.html');
+ my $tmpl = $self->load_tmpl('results.html', global_vars => 1);
+
+ sub esc_html {
+ my $html = shift;
+ $html =~ s/</g;
+ $html =~ s/>/>/g;
+ return $html;
+ }
# call swish
my $sh = SWISH->connect('Fork',
prog => $SWISH,
indexes => $INDEX,
- #properties => [qw/god br nr/],
+ properties => [qw/swishdocpath swishrank swishtitle headline html/],
results => sub {
my ($sh,$hit) = @_;
push @swish_results, {
nr => ($#swish_results + 2),
path => $hit->swishdocpath,
- title => to_utf8({ -string => $hit->swishtitle, -charset => $CHARSET }),
+ headline => esc_html($from_utf8->convert($hit->headline)),
+ html => back2html($from_utf8->convert($hit->html)),
rank => $hit->swishrank };
-# my @fields = $hit->field_names;
-# print "Field '$_' = '", $hit->$_, "'
\n" for sort @fields;
},
#startnum => 0,
- maxhits => $MAX_HITS,
+ maxhits => $MAX_HITS
);
die $SWISH::errstr unless $sh;
+ # construct swish query
+ my $sw_q = join(" and ",@s_arr);
+ if (@path_arr && $q->param('show_full')) {
+ $sw_q .= "and (swishdocpath=\"";
+ $sw_q .= join("\" or swishdocpath=\"",@path_arr);
+ $sw_q .= "\")";
+ $tmpl->param('full',1); # show full records
+ } else {
+ $tmpl->param('full',0);
+ }
- my $hits = $sh->query(join(" and ",@s_arr)) || 0; # FIX: and/or
+ my $hits = $sh->query($sw_q);
$tmpl->param('hits',$hits);
- $tmpl->param('search',join(" and ",@s_arr));
+ $tmpl->param('search',$sw_q);
+
+ $tmpl->param('PAGER_offset',$q->param("PAGER_offset") || 0);
+ $tmpl->param('last_PAGER_offset',$q->param("last_PAGER_offset") || 0);
+
+ $tmpl->param('url_params',"?".join("&",@url_params));
# create a Pager object
my $pager = HTML::Pager->new(
@@ -130,25 +189,21 @@
my @result;
for (my $i=0; $i<$rows; $i++) {
- push @result, $swish_results[$offset+$i] if $swish_results[$offset+$i];
+ my $r = $swish_results[$offset+$i];
+ if ($r && $tmpl->param('full')) {
+ push @result, $r;
+ } elsif ($r) {
+ # if not full output, skip html
+ delete $r->{html};
+ push @result, $r;
+ }
}
return \@result;
},
rows => $hits,
page_size => $ON_PAGE,
# some optional parameters
- persist_vars => [
- 'rm',
- 'f1', 'v1',
- 'f2', 'v2',
- 'f3', 'v3',
- 'f4', 'v4',
- 'f5', 'v5',
- 'f6', 'v6',
- 'f7', 'v7',
- 'f8', 'v8',
- 'f9', 'v9',
- ],
+ persist_vars => [ @persist_vars ],
#cell_space_color => '#000000',
#cell_background_color => '#ffffff',
#nav_background_color => '#dddddd',
@@ -171,28 +226,53 @@
my $field = $q->param("f$i");
my $limit = $q->param("v$i");
-
my $html;
- my $index = new index_DBI();
+ my $index = new index_DBI(
+ $cfg_global->val('global', 'dbi_dbd'),
+ $cfg_global->val('global', 'dbi_dsn'),
+ $cfg_global->val('global', 'dbi_user'),
+ $cfg_global->val('global', 'dbi_passwd') || ''
+ );
- if (! $index->check($field)) {
+ my $total = $index->count($field,$limit);
+ if (! $total) {
my $tmpl = $self->load_tmpl('no_index.html');
$tmpl->param('field',$field);
$html = $tmpl->output;
return $html;
}
- my @index_arr = $index->fetch($field,'item',$limit);
+ my $tmpl = $self->load_tmpl('index_res.html', global_vars => 1);
+ $tmpl->param('field',$field);
+ $tmpl->param('limit',$limit);
+ $tmpl->param('total',$total);
+
+# FIXME I should set offset and leave out limit from fetch!!
+# if (! $q->param("PAGER_offset") {
+# $q->param("Pager_offet)
+# }
- $html .= "show index of $field";
- $html .= " for $limit" if ($limit);
+ my $pager = HTML::Pager->new(
+ query => $q,
+ get_data_callback => sub {
+ my ($offset, $rows) = @_;
- while (my $row = shift @index_arr) {
- $html .= "
".$row->{item}."\n";
- }
+ my @result = $index->fetch($field,$limit, $offset, $rows);
+ return \@result;
+ },
+ rows => $total,
+ page_size => $ON_PAGE,
+ persist_vars => [
+ 'rm',
+ "f$i", "v$i", "f".$i."_index",
+ 'offset',
+ ],
+ debug => 1,
+ template => $tmpl,
+ );
- return $html;
+ return $pager->output;
}
1;