/[webpac]/trunk2/WebPac.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/WebPac.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 163 - (show annotations)
Thu Nov 20 21:23:40 2003 UTC (20 years, 4 months ago) by dpavlin
Original Path: trunk/WebPac.pm
File size: 7797 byte(s)
Added type="swish_exact" to save data into swish index with boundaries
xxbxx data xxexxx. This is helpful to implement exact match from beginning
of query and exact match to full query which are defined using e[nr] field
in web user interface (with same [nr] as f[nr] and v[nr] fields) which
have to have value 1 (from beginning) 2 (from end, not that useful...) or
3 (1+2 - exact match)

1 package WebPac;
2
3 use base 'CGI::Application';
4 use strict;
5
6 use HTML::Pager;
7 use HTML::FillInForm;
8 use SWISH;
9 use Text::Iconv;
10 use DBI;
11 use Config::IniFiles;
12 use Text::Unaccent;
13
14 use lib '..';
15 use index_DBI_cache;
16 use back2html;
17
18
19 # read global.conf configuration
20 my $cfg_global = new Config::IniFiles( -file => '../global.conf' ) || die "can't open 'global.conf'";
21
22 # configuration options from global.conf
23 my $TEMPLATE_PATH = $cfg_global->val('webpac', 'template_html') || die "need template_html in global.conf, section webpac";
24 my $CHARSET = $cfg_global->val('webpac', 'charset') || 'ISO-8859-1';
25 my $SWISH = $cfg_global->val('webpac', 'swish') || '/usr/bin/swish-e';
26 my $INDEX = $cfg_global->val('webpac', 'index') || die "need index in global.conf, section webpac";
27 my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0;
28 my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10;
29 my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1;
30 my $TEMPLATE =$cfg_global->val('webpac', 'template');
31
32
33 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
34
35 my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);
36
37
38 sub setup {
39 my $self = shift;
40 $self->tmpl_path($TEMPLATE_PATH);
41 $self->run_modes(
42 'search' => 'show_search_form',
43 'results' => 'show_results_list',
44 # 'user' => 'show_user_detail',
45 'index' => 'show_index',
46 );
47 $self->start_mode('search');
48 $self->mode_param('rm');
49
50 $self->header_props(-charset=>$CHARSET);
51 }
52
53 sub in_template {
54 my $html = shift || "This page is left unintentionally blank";
55 return $html if (! defined($TEMPLATE));
56 if (open(T, $TEMPLATE)) {
57 my $template_html = join("\n",<T>);
58 close(T);
59 $template_html =~ s/##webpac##/$html/gsi;
60 return $template_html;
61 } else {
62 return "Can't read template '$TEMPLATE'";
63 }
64 }
65
66 sub show_search_form {
67 my $self = shift;
68
69 # Get the CGI.pm query object
70 my $q = $self->query();
71
72 my $tmpl = $self->load_tmpl('search.html');
73 my $html = $tmpl->output;
74
75 my $fif = new HTML::FillInForm;
76
77 return in_template($fif->fill(scalarref => \$html, fobject => $q,
78 target => 'search'));
79 }
80
81 sub show_results_list {
82 my $self = shift;
83
84 my $q = $self->query();
85
86 my @swish_results; # results from swish
87
88 # load template for this page
89
90 my @s_arr; # all queries are located here
91
92 my @path_arr = $q->param('path');
93 my $full = $q->param('full');
94
95 my @persist_vars = ( 'rm' );
96 my @url_params = ( 'rm=results', 'show_full=1', 'last_PAGER_offset='.$q->param('PAGER_offset') || 0 );
97
98 # support parametars "f" and "v" for start
99 for(my $i = ""; $i <=30; $i++) {
100
101 return show_index($self, $i) if ($q->param("f".$i."_index"));
102
103 next if (! $q->param("v$i"));
104 next if (! $q->param("f$i"));
105
106 push @persist_vars, "f$i";
107 push @persist_vars, "v$i";
108
109 push @url_params,"f$i=".$q->url_param("f$i");
110 foreach my $v ($q->url_param("v$i")) {
111 push @url_params,"v$i=$v";
112 }
113
114 my $wc="*"; # swish wildcard
115 $wc="" if ($i eq ""); # don't apply wildcard on field 0
116
117 # re-write query from +/- to and/and not
118 my @param_vals = $q->param("v$i");
119 my @swish_q;
120 my ($pre,$post,$exact) = ('','','');
121 while (my $search = shift @param_vals) {
122 my $s;
123 # remove accents
124 $search = unac_string($CHARSET,$search);
125 while ($search =~ s/\s*("[^"]+")\s*/ /) {
126 $s .= "$1 ";
127 }
128 $search =~ s/^\s+//;
129 $search =~ s/\s+$//;
130
131 # filed e[nr] is exact match bitmask
132 # 1 = beginning, 2=end, 3=both
133 $pre = '"xxbxx ' if ($q->param("e$i") & 1);
134 $post = ' xxexx"' if ($q->param("e$i") & 2);
135 # add qotes on other side
136 if ($q->param("e$i")) {
137 $pre = '"' if (! $pre);
138 $post = '"' if (! $post);
139 $wc = ''; # don't use windcard in exact
140 $exact = '_exact';
141 }
142
143 foreach (split(/\s+/,$search)) {
144 if (m/^([+-])(\S+)/) {
145 $s.= ($s) ? "and " : "";
146 $s.="not " if ($1 eq "-");
147 $s.=$2.$wc." ";
148 } elsif (m/^\s*(and|or|not)\s*$/i) {
149 $s.=$_." ";
150 # don't add * to words with less than x chars
151 } elsif (length($_) <= $MIN_WILDCARD) {
152 $s.=$_." ";
153 } else {
154 $s.=$_.$wc." ";
155 }
156 }
157 $s =~ s/\*+/*/g;
158 $s = $pre.$s.$post if ($q->param("e$i"));
159 push @swish_q,$s;
160 }
161 # FIXME default operator for multi-value fields is or. There is
162 # no way to change it, except here for now. Is there need?
163 push @s_arr, $q->param("f$i")."_swish".$exact."=(".join(" or ",@swish_q).")";
164 }
165
166 my $tmpl = $self->load_tmpl('results.html', global_vars => 1);
167
168 sub esc_html {
169 my $html = shift;
170 $html =~ s/</&lt;/g;
171 $html =~ s/>/&gt;/g;
172 return $html;
173 }
174
175 # call swish
176 my $sh = SWISH->connect('Fork',
177 prog => $SWISH,
178 indexes => $INDEX,
179 properties => [qw/swishdocpath swishrank swishtitle headline html/],
180 results => sub {
181 my ($sh,$hit) = @_;
182
183 push @swish_results, {
184 nr => ($#swish_results + 2),
185 path => $hit->swishdocpath,
186 headline => esc_html($from_utf8->convert($hit->headline)),
187 html => back2html($from_utf8->convert($hit->html)),
188 rank => $hit->swishrank };
189
190 },
191 #startnum => 0,
192 maxhits => $MAX_HITS
193 );
194
195 die $SWISH::errstr unless $sh;
196 # construct swish query
197 my $sw_q = join(" and ",@s_arr);
198 if (@path_arr && $q->param('show_full')) {
199 $sw_q .= "and (swishdocpath=\"";
200 $sw_q .= join("\" or swishdocpath=\"",@path_arr);
201 $sw_q .= "\")";
202 $tmpl->param('full',1); # show full records
203 } elsif ($q->param('show_full')) {
204 # just show full path, no path defined
205 $tmpl->param('full',1);
206 } else {
207 $tmpl->param('full',0);
208 }
209
210 my $hits = $sh->query($sw_q);
211
212 $tmpl->param('hits',$hits);
213 $tmpl->param('search',$sw_q);
214
215 $tmpl->param('PAGER_offset',$q->param("PAGER_offset") || 0);
216 $tmpl->param('last_PAGER_offset',$q->param("last_PAGER_offset") || 0);
217
218 $tmpl->param('url_params',"?".join("&",@url_params));
219
220 # create a Pager object
221 my $pager = HTML::Pager->new(
222 # required parameters
223 query => $q,
224 get_data_callback => sub {
225 my ($offset, $rows) = @_;
226
227 my @result;
228 for (my $i=0; $i<$rows; $i++) {
229 my $r = $swish_results[$offset+$i];
230 if ($r && $tmpl->param('full')) {
231 push @result, $r;
232 } elsif ($r) {
233 # if not full output, skip html
234 delete $r->{html};
235 push @result, $r;
236 }
237 }
238 return \@result;
239 },
240 rows => $hits,
241 page_size => $ON_PAGE,
242 # some optional parameters
243 persist_vars => [ @persist_vars ],
244 #cell_space_color => '#000000',
245 #cell_background_color => '#ffffff',
246 #nav_background_color => '#dddddd',
247 #javascript_presubmit => 'last_minute_javascript()',
248 debug => 1,
249 template => $tmpl,
250 );
251
252 my $html = $pager->output;
253
254 return in_template($html);
255 }
256
257 sub show_index {
258 my $self = shift;
259 my $i = shift; # field number
260
261 my $q = $self->query();
262
263 my $field = $q->param("f$i");
264 my $limit = $q->param("v$i");
265
266 my $html;
267
268 my $index = new index_DBI(
269 $cfg_global->val('global', 'dbi_dbd'),
270 $cfg_global->val('global', 'dbi_dsn'),
271 $cfg_global->val('global', 'dbi_user'),
272 $cfg_global->val('global', 'dbi_passwd') || ''
273 );
274
275 my $total = $index->count($field,$limit);
276 if (! $total) {
277 my $tmpl = $self->load_tmpl('no_index.html');
278 $tmpl->param('field',$field);
279 $html = $tmpl->output;
280 return $html;
281 }
282
283 my $tmpl = $self->load_tmpl('index_res.html', global_vars => 1);
284 $tmpl->param('field',$field);
285 $tmpl->param('limit',$limit);
286 $tmpl->param('total',$total);
287
288 # FIXME I should set offset and leave out limit from fetch!!
289 # if (! $q->param("PAGER_offset") {
290 # $q->param("Pager_offet)
291 # }
292
293 my $pager = HTML::Pager->new(
294 query => $q,
295 get_data_callback => sub {
296 my ($offset, $rows) = @_;
297
298 my @result = $index->fetch($field,$limit, $offset, $rows);
299 return \@result;
300 },
301 rows => $total,
302 page_size => $ON_PAGE,
303 persist_vars => [
304 'rm',
305 "f$i", "v$i", "f".$i."_index",
306 'offset',
307 ],
308 debug => 1,
309 template => $tmpl,
310 );
311
312 return in_template($pager->output);
313 }
314
315 1;

Properties

Name Value
cvs2svn:cvs-rev 1.34

  ViewVC Help
Powered by ViewVC 1.1.26