/[webpac]/trunk/WebPac.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/WebPac.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 641 - (show annotations)
Sun Jan 23 02:02:10 2005 UTC (15 years ago) by dpavlin
File size: 12611 byte(s)
New implementation of indexes: now it uses only two tables (index for all
data and tags for all tags). Currently, it doesn't enforce relation between
them on RDBMS level (I have to test this code against SQLite and MySQL
before enforcing that).
Removed swish-e output while indexing, database is used as default tag to
enable filtering by database (there isn't possiblity to set tag to something
else yet!). Output usage count in index.

1 package WebPac;
2
3 use base 'CGI::Application';
4 use strict;
5
6 use HTML::FillInForm;
7 use SWISH::API;
8 use Text::Iconv;
9 use DBI;
10 use Config::IniFiles;
11 use Text::Unaccent;
12 use Data::Pageset;
13
14 use lib '..';
15 use index_DBI_tag;
16 use back2html;
17
18
19 # read global.conf configuration
20 my $cfg_global = new Config::IniFiles( -file => '../global.conf' ) || die "can't open 'global.conf'";
21
22 # configuration options from global.conf
23 my $TEMPLATE_PATH = $cfg_global->val('webpac', 'template_html') || die "need template_html in global.conf, section webpac";
24 my $CHARSET = $cfg_global->val('webpac', 'charset') || 'ISO-8859-1';
25 my $SWISH = $cfg_global->val('webpac', 'swish') || '/usr/bin/swish-e';
26 my $INDEX = $cfg_global->val('webpac', 'index') || die "need index in global.conf, section webpac";
27 my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0;
28 my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10;
29 my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1;
30 my $TEMPLATE =$cfg_global->val('webpac', 'template');
31 my $UNAC_FILTER =$cfg_global->val('global', 'my_unac_filter');
32 my $BASE_PATH =$cfg_global->val('webpac', 'base_path');
33 # for pager
34 my $pages_per_set = $cfg_global->val('webpac', 'pages_per_set') || 10;
35
36 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
37
38 my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);
39
40 if ($UNAC_FILTER) {
41 require $UNAC_FILTER;
42 } else {
43 sub WebPac::my_unac_string {
44 my ($charset, $string) = (@_);
45 return $string;
46 }
47 }
48
49 # use path from cgi script to support templates in subdirs
50 sub url_ex {
51 my $q = shift || die "suff2file needs CGI object!";
52 my $tpl = shift || die "url_ex needs template name!";
53 return suff2file($BASE_PATH, $q->url(-absolute => 1,-path => 1),$TEMPLATE_PATH,$tpl);
54 }
55
56 sub suff2file($$$$) {
57 my ($base_path, $p, $path, $tpl) = @_;
58
59 return $tpl if (! $base_path);
60
61 # strip everything to and including base path, leaving only
62 # additional (virtual) path
63 if ($base_path eq "/") {
64 $p =~ s,/*,,g;
65 my ($name,$ext) = split(/\./,$tpl);
66 $p = $name . "-" . $p . "." . $ext;
67 } elsif ($p =~ s,^.*?$base_path,,) {
68 $p =~ s,/*,,g;
69 my ($name,$ext) = split(/\./,$tpl);
70 $p = $name . $p . "." . $ext;
71 } else {
72 # if unable reset it!
73 $p = $tpl;
74 }
75
76 if ( -e "$path/$p") {
77 return $p;
78 } else {
79 return $tpl;
80 }
81
82 }
83
84 sub setup {
85 my $self = shift;
86 $self->tmpl_path($TEMPLATE_PATH);
87 $self->run_modes(
88 'search' => 'show_search_form',
89 'results' => 'show_results_list',
90 # 'user' => 'show_user_detail',
91 'index' => 'show_index',
92 );
93 $self->start_mode('search');
94 $self->mode_param('rm');
95
96 $self->header_props(-charset=>$CHARSET);
97 }
98
99 sub in_template {
100 my $q = shift || die "need CGI object!";
101 my $html = shift || die "This page is left unintentionally blank";
102 return $html if (! defined($TEMPLATE));
103
104 my ($dir,$tpl);
105 if ($TEMPLATE =~ m,^(.*?/*)([^/]+)$,) {
106 ($dir,$tpl) = ($1,$2);
107 } else {
108 die "can't parse TEMPLATE path";
109 }
110
111 my $master_tpl = suff2file($BASE_PATH, $q->url(-absolute => 1, -path => 1),$dir,$tpl);
112 if (open(T, $master_tpl)) {
113 my $template_html = join("\n",<T>);
114 close(T);
115 $template_html =~ s/##webpac##/$html/gsi;
116 return $template_html;
117 } else {
118 return "Can't read template '$master_tpl'";
119 }
120 }
121
122 #--------------------------------------------------------------------------
123
124 #
125 # make pager navigation and fill template variables
126 # compatibile with HTML::Pager
127 #
128
129 sub make_pager($$$) {
130 my ($q,$tmpl,$pager) = @_;
131
132 #
133 # pager navigation
134 #
135 my ($pager_prev,$pager_next, $pager_jump) = ('','','');
136
137 my $nav_fmt=qq{ <a href="%s">%s</a> };
138
139 if ($pager->current_page() > $pager->first_page) {
140 $q->param('PAGER_offset', $pager->current_page - 1);
141 $pager_prev .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'&lt;&lt;');
142 }
143
144 if ($pager->previous_set) {
145 $q->param('PAGER_offset', $pager->previous_set);
146 $pager_prev .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'..');
147 }
148
149
150 foreach my $p (@{$pager->pages_in_set()}) {
151 next if ($p < 0);
152 if($p == $pager->current_page()) {
153 $pager_jump .= "<b>$p</b> ";
154 } else {
155 $q->param('PAGER_offset', $p);
156 $pager_jump .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),$p);
157 }
158 }
159
160 if ($pager->next_set) {
161 $q->param('PAGER_offset', $pager->next_set);
162 $pager_next .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'..');
163 }
164
165 if ($pager->current_page() < $pager->last_page) {
166 $q->param('PAGER_offset', $pager->current_page + 1);
167 $pager_next .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'&gt;&gt;');
168 }
169
170 $tmpl->param('PAGER_PREV', $pager_prev);
171 $tmpl->param('PAGER_JUMP', $pager_jump);
172 $tmpl->param('PAGER_NEXT', $pager_next);
173
174 }
175
176 #
177 # put persisten variables in template
178 #
179
180 sub make_pager_vars {
181 my $q = shift @_;
182 my $tmpl = shift @_;
183 my @persist_vars = @_;
184 my $hidden_vars = '';
185 foreach my $v (@persist_vars) {
186 foreach my $val ($q->param($v)) {
187 next if (! $val || $val eq '');
188 $hidden_vars .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n";
189 }
190 }
191
192 $tmpl->param('PAGER_HIDDEN', $hidden_vars);
193 $tmpl->param('PAGER_JAVASCRIPT', qq#
194 <SCRIPT LANGUAGE="Javascript">
195 <!-- Begin
196 // dummy emulator for HTML::Pager templates
197 function PAGER_set_offset_and_submit() {
198 return true;
199 }
200 // End -->
201 </script>
202 #);
203 }
204
205 #--------------------------------------------------------------------------
206
207 sub show_search_form {
208 my $self = shift;
209
210 # Get the CGI.pm query object
211 my $q = $self->query();
212
213 my $tmpl = $self->load_tmpl(url_ex($q,'search.html'));
214 my $html = $tmpl->output;
215
216 my $fif = new HTML::FillInForm;
217
218 return in_template($q,$fif->fill(scalarref => \$html, fobject => $q,
219 target => 'search'));
220 }
221
222 sub show_results_list {
223 my $self = shift;
224
225 my $q = $self->query();
226
227 # load template for this page
228
229 my @s_arr; # all queries are located here
230
231 my @path_arr = $q->param('path');
232 my $full = $q->param('full');
233
234 my @persist_vars = ( 'rm', 'persist_search' );
235 my @url_params = ( 'rm=results', 'show_full=1', 'last_PAGER_offset='.($q->param('PAGER_offset') || 0) );
236
237 my @persist_search_vars;
238 my @url_params_persist;
239 if ($q->param("persist_search")) {
240 @persist_search_vars = split(/\s*,\s*/, $q->param("persist_search"));
241 push @url_params_persist, "persist_search=".$q->url_param("persist_search");
242 push @url_params,"persist_search=".$q->url_param("persist_search");
243 }
244
245 # support parametars "f" and "v" for start
246 for(my $i = 0; $i <=30; $i++) {
247
248 $i = '' if ($i == 0);
249
250 return show_index($self, $i) if ($q->param("f".$i."_index"));
251
252 next if (! $q->param("v$i") || $q->param("v$i") eq '');
253 next if (! $q->param("f$i"));
254
255 my $persist = grep(/^$i$/,@persist_search_vars);
256
257 push @persist_vars, "f$i";
258 push @persist_vars, "v$i";
259 push @persist_vars, "e$i" if ($q->param("e$i"));
260
261 # create url parametars (and persistent ones)
262
263 push @url_params,"f$i=".$q->url_param("f$i");
264 push @url_params_persist,"f$i=".$q->url_param("f$i") if ($persist);
265
266 foreach my $v ($q->url_param("v$i")) {
267 push @url_params,"v$i=$v";
268 push @url_params_persist,"v$i=$v" if ($persist);
269 }
270
271 if ($q->param("e$i")) {
272 push @url_params,"e$i=".$q->url_param("e$i");
273 # push @url_params_persist,"e$i=".$q->url_param("e$i");
274 }
275
276 my $wc="*"; # swish wildcard
277 $wc="" if ($i eq ""); # don't apply wildcard on field 0
278
279 # re-write query from +/- to and/and not
280 my @param_vals = $q->param("v$i");
281 my @swish_q;
282 my ($pre,$post,$exact) = ('','','');
283 while (my $search = shift @param_vals) {
284 my $s;
285 # remove accents
286 $search = my_unac_string($CHARSET,$search);
287 while ($search =~ s/\s*("[^"]+")\s*/ /) {
288 $s .= "$1 ";
289 }
290 $search =~ s/^\s+//;
291 $search =~ s/\s+$//;
292
293 # filed e[nr] is exact match bitmask
294 # 1 = beginning, 2=end, 3=both
295 my $exact_flag = $q->param("e$i") || 0;
296 $pre = '"xxbxx ' if ($exact_flag & 1);
297 $post = ' xxexx"' if ($exact_flag & 2);
298 # add qotes on other side
299 if ($q->param("e$i")) {
300 $pre = '"' if (! $pre);
301 $post = '"' if (! $post);
302 # what about wildcards?
303 $wc = '';
304 $wc = '*' if ($q->param("e$i") & 4);
305 $exact = '_exact';
306 }
307
308 foreach (split(/\s+/,$search)) {
309 if (m/^([+-])(\S+)/) {
310 $s.= ($s) ? "and " : "";
311 $s.="not " if ($1 eq "-");
312 $s.=$2.$wc." ";
313 } elsif (m/^\s*(and|or|not)\s*$/i) {
314 $s.=$_." ";
315 # don't add * to words with less than x chars
316 } elsif (length($_) <= $MIN_WILDCARD) {
317 $s.=$_." ";
318 } else {
319 $s.=$_.$wc." ";
320 }
321 }
322 $s =~ s/\*+/*/g;
323 $s = $pre.$s.$post if ($q->param("e$i"));
324 push @swish_q,$s;
325 }
326 # FIXME default operator for multi-value fields is or. There is
327 # no way to change it, except here for now. Is there need?
328 push @s_arr, $q->param("f$i")."_swish".$exact."=(".join(" or ",@swish_q).")";
329 }
330
331 my $tmpl = $self->load_tmpl(url_ex($q,'results.html'), global_vars => 1);
332
333 $tmpl->param('url_params',"?".join("&",@url_params));
334
335 sub esc_html {
336 my $html = shift;
337 $html =~ s/</&lt;/g;
338 $html =~ s/>/&gt;/g;
339 return $html;
340 }
341
342 my $sort = 'swishrank';
343 if ($q->param("sort")) {
344 $sort = 'headline';
345 push @persist_vars, "sort";
346 }
347
348 # construct swish query
349 my $sw_q = join(" and ",@s_arr);
350 if (@path_arr && $q->param('show_full')) {
351 $sw_q .= "and (swishdocpath=\"";
352 $sw_q .= join("\" or swishdocpath=\"",@path_arr);
353 $sw_q .= "\")";
354 $tmpl->param('full',1); # show full records
355 } elsif ($q->param('show_full')) {
356 # just show full path, no path defined
357 $tmpl->param('full',1);
358 } else {
359 $tmpl->param('full',0);
360 }
361
362 # create new swish instance
363 my $swish = SWISH::API->new($INDEX);
364 die $swish->ErrorString.": ".$swish->LastErrorMsg if $swish->Error;
365
366 # execute query and get number of results from SWISH-E
367 my $search = $swish->New_Search_Object;
368
369 $search->SetSort($sort);
370
371 my $results = $search->Execute($sw_q);
372 die $swish->ErrorString.": ".$swish->LastErrorMsg if $swish->Error;
373
374 my $hits = $results->Hits;
375
376 $tmpl->param('hits',$hits);
377 $tmpl->param('search',$sw_q);
378
379 $tmpl->param('PAGER_offset',$q->param("PAGER_offset") || 0);
380 $tmpl->param('last_PAGER_offset',$q->param("last_PAGER_offset") || 0);
381
382 #
383 # build pager
384 #
385
386 my $current_page = $q->param('PAGER_offset') || 1;
387
388 my $pager = Data::Pageset->new({
389 'total_entries' => $hits,
390 'entries_per_page' => $ON_PAGE,
391 'current_page' => $current_page,
392 'pages_per_set' => $pages_per_set,
393 });
394
395 $results->SeekResult( $pager->first - 1 );
396
397 # get number of entries on this page
398 my $i = $pager->entries_on_this_page;
399
400 # results from swish for template
401 my @pager_data_list;
402
403 for(my $i=$pager->first; $i<=$pager->last; $i++) {
404
405 my $result = $results->NextResult;
406 last if (! $result);
407
408 my $r = {
409 nr => $i,
410 path => $result->Property('swishdocpath'),
411 headline => esc_html($from_utf8->convert($result->Property('headline'))),
412 rank => $result->Property('swishrank')
413 };
414
415 $r->{html} = back2html($from_utf8->convert($result->Property('html')), join("&",@url_params_persist)) if ($q->param('show_full'));
416
417 push @pager_data_list, $r;
418 }
419
420
421
422 # put something in template
423 make_pager($q, $tmpl, $pager);
424 make_pager_vars($q, $tmpl, @persist_vars);
425 $tmpl->param('PAGER_DATA_LIST', \@pager_data_list);
426
427 my $html = $tmpl->output;
428
429 return in_template($q,$html);
430 }
431
432 sub show_index {
433 my $self = shift;
434 my $i = shift; # field number
435
436 my $q = $self->query();
437
438 my $field = $q->param("f$i");
439 my $limit = $q->param("v$i");
440
441 my $html;
442
443 my $index = new index_DBI(
444 $cfg_global->val('global', 'dbi_dbd'),
445 $cfg_global->val('global', 'dbi_dsn'),
446 $cfg_global->val('global', 'dbi_user'),
447 $cfg_global->val('global', 'dbi_passwd') || ''
448 );
449
450 my $total = $index->count($field,$limit);
451
452 if (! $total) {
453 my $tmpl = $self->load_tmpl(url_ex($q,'no_index.html'));
454 $tmpl->param('field',$field);
455 $html = $tmpl->output;
456 return $html;
457 }
458
459 my $tmpl = $self->load_tmpl(url_ex($q,'index_res.html'), global_vars => 1);
460 $tmpl->param('field',$field);
461 $tmpl->param('limit',$limit);
462 $tmpl->param('total',$total);
463
464 # FIXME I should set offset and leave out limit from fetch!!
465 # if (! $q->param("PAGER_offset") {
466 # $q->param("Pager_offet)
467 # }
468
469
470 #
471 # build pager
472 #
473 my $pager = Data::Pageset->new({
474 'total_entries' => $total,
475 'entries_per_page' => $ON_PAGE,
476 'current_page' => $q->param('PAGER_offset') || 1,
477 'pages_per_set' => $pages_per_set
478 });
479
480 my @persist_vars = qw{rm f$i v$i f$i_index offset};
481
482 make_pager($q, $tmpl, $pager);
483 make_pager_vars($q, $tmpl, @persist_vars);
484
485 my @pager_data_list = $index->fetch($field,$limit, $pager->first - 1, $pager->entries_on_this_page);
486 $tmpl->param('PAGER_DATA_LIST', \@pager_data_list);
487
488 return in_template($q,$tmpl->output);
489 }
490
491 1;

Properties

Name Value
cvs2svn:cvs-rev 1.40

  ViewVC Help
Powered by ViewVC 1.1.26