/[webpac]/trunk/WebPac.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/WebPac.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 641 - (hide annotations)
Sun Jan 23 02:02:10 2005 UTC (17 years, 3 months ago) by dpavlin
File size: 12611 byte(s)
New implementation of indexes: now it uses only two tables (index for all
data and tags for all tags). Currently, it doesn't enforce relation between
them on RDBMS level (I have to test this code against SQLite and MySQL
before enforcing that).
Removed swish-e output while indexing, database is used as default tag to
enable filtering by database (there isn't possiblity to set tag to something
else yet!). Output usage count in index.

1 dpavlin 7 package WebPac;
2    
3     use base 'CGI::Application';
4     use strict;
5    
6     use HTML::FillInForm;
7 dpavlin 304 use SWISH::API;
8 dpavlin 14 use Text::Iconv;
9 dpavlin 9 use DBI;
10 dpavlin 53 use Config::IniFiles;
11 dpavlin 73 use Text::Unaccent;
12 dpavlin 304 use Data::Pageset;
13 dpavlin 7
14 dpavlin 11 use lib '..';
15 dpavlin 641 use index_DBI_tag;
16 dpavlin 13 use back2html;
17 dpavlin 11
18 dpavlin 7
19 dpavlin 76 # read global.conf configuration
20     my $cfg_global = new Config::IniFiles( -file => '../global.conf' ) || die "can't open 'global.conf'";
21    
22     # configuration options from global.conf
23     my $TEMPLATE_PATH = $cfg_global->val('webpac', 'template_html') || die "need template_html in global.conf, section webpac";
24     my $CHARSET = $cfg_global->val('webpac', 'charset') || 'ISO-8859-1';
25     my $SWISH = $cfg_global->val('webpac', 'swish') || '/usr/bin/swish-e';
26     my $INDEX = $cfg_global->val('webpac', 'index') || die "need index in global.conf, section webpac";
27     my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0;
28     my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10;
29 dpavlin 120 my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1;
30 dpavlin 147 my $TEMPLATE =$cfg_global->val('webpac', 'template');
31 dpavlin 320 my $UNAC_FILTER =$cfg_global->val('global', 'my_unac_filter');
32 dpavlin 198 my $BASE_PATH =$cfg_global->val('webpac', 'base_path');
33 dpavlin 304 # for pager
34     my $pages_per_set = $cfg_global->val('webpac', 'pages_per_set') || 10;
35 dpavlin 76
36 dpavlin 320 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
37 dpavlin 304
38 dpavlin 320 my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);
39    
40 dpavlin 164 if ($UNAC_FILTER) {
41     require $UNAC_FILTER;
42 dpavlin 320 } else {
43     sub WebPac::my_unac_string {
44     my ($charset, $string) = (@_);
45     return $string;
46     }
47 dpavlin 164 }
48 dpavlin 76
49 dpavlin 198 # use path from cgi script to support templates in subdirs
50     sub url_ex {
51     my $q = shift || die "suff2file needs CGI object!";
52     my $tpl = shift || die "url_ex needs template name!";
53 dpavlin 302 return suff2file($BASE_PATH, $q->url(-absolute => 1,-path => 1),$TEMPLATE_PATH,$tpl);
54 dpavlin 198 }
55 dpavlin 53
56 dpavlin 198 sub suff2file($$$$) {
57     my ($base_path, $p, $path, $tpl) = @_;
58    
59     return $tpl if (! $base_path);
60    
61     # strip everything to and including base path, leaving only
62     # additional (virtual) path
63 dpavlin 302 if ($base_path eq "/") {
64 dpavlin 198 $p =~ s,/*,,g;
65     my ($name,$ext) = split(/\./,$tpl);
66 dpavlin 302 $p = $name . "-" . $p . "." . $ext;
67     } elsif ($p =~ s,^.*?$base_path,,) {
68     $p =~ s,/*,,g;
69     my ($name,$ext) = split(/\./,$tpl);
70 dpavlin 198 $p = $name . $p . "." . $ext;
71     } else {
72     # if unable reset it!
73     $p = $tpl;
74     }
75    
76     if ( -e "$path/$p") {
77     return $p;
78     } else {
79     return $tpl;
80     }
81    
82     }
83    
84 dpavlin 7 sub setup {
85     my $self = shift;
86     $self->tmpl_path($TEMPLATE_PATH);
87     $self->run_modes(
88     'search' => 'show_search_form',
89     'results' => 'show_results_list',
90 dpavlin 9 # 'user' => 'show_user_detail',
91     'index' => 'show_index',
92 dpavlin 7 );
93     $self->start_mode('search');
94     $self->mode_param('rm');
95    
96     $self->header_props(-charset=>$CHARSET);
97     }
98    
99 dpavlin 147 sub in_template {
100 dpavlin 198 my $q = shift || die "need CGI object!";
101     my $html = shift || die "This page is left unintentionally blank";
102 dpavlin 147 return $html if (! defined($TEMPLATE));
103 dpavlin 198
104     my ($dir,$tpl);
105     if ($TEMPLATE =~ m,^(.*?/*)([^/]+)$,) {
106     ($dir,$tpl) = ($1,$2);
107     } else {
108     die "can't parse TEMPLATE path";
109     }
110    
111 dpavlin 302 my $master_tpl = suff2file($BASE_PATH, $q->url(-absolute => 1, -path => 1),$dir,$tpl);
112 dpavlin 198 if (open(T, $master_tpl)) {
113 dpavlin 147 my $template_html = join("\n",<T>);
114     close(T);
115     $template_html =~ s/##webpac##/$html/gsi;
116     return $template_html;
117     } else {
118 dpavlin 198 return "Can't read template '$master_tpl'";
119 dpavlin 147 }
120     }
121    
122 dpavlin 304 #--------------------------------------------------------------------------
123    
124     #
125     # make pager navigation and fill template variables
126     # compatibile with HTML::Pager
127     #
128    
129     sub make_pager($$$) {
130     my ($q,$tmpl,$pager) = @_;
131    
132     #
133     # pager navigation
134     #
135     my ($pager_prev,$pager_next, $pager_jump) = ('','','');
136    
137     my $nav_fmt=qq{ <a href="%s">%s</a> };
138    
139     if ($pager->current_page() > $pager->first_page) {
140     $q->param('PAGER_offset', $pager->current_page - 1);
141     $pager_prev .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'&lt;&lt;');
142     }
143    
144     if ($pager->previous_set) {
145     $q->param('PAGER_offset', $pager->previous_set);
146     $pager_prev .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'..');
147     }
148    
149    
150     foreach my $p (@{$pager->pages_in_set()}) {
151     next if ($p < 0);
152     if($p == $pager->current_page()) {
153     $pager_jump .= "<b>$p</b> ";
154     } else {
155     $q->param('PAGER_offset', $p);
156     $pager_jump .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),$p);
157     }
158     }
159    
160     if ($pager->next_set) {
161     $q->param('PAGER_offset', $pager->next_set);
162     $pager_next .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'..');
163     }
164    
165     if ($pager->current_page() < $pager->last_page) {
166     $q->param('PAGER_offset', $pager->current_page + 1);
167     $pager_next .= sprintf($nav_fmt,$q->url(-relative=>1, -query=>1),'&gt;&gt;');
168     }
169    
170     $tmpl->param('PAGER_PREV', $pager_prev);
171     $tmpl->param('PAGER_JUMP', $pager_jump);
172     $tmpl->param('PAGER_NEXT', $pager_next);
173    
174     }
175    
176     #
177     # put persisten variables in template
178     #
179    
180     sub make_pager_vars {
181     my $q = shift @_;
182     my $tmpl = shift @_;
183     my @persist_vars = @_;
184     my $hidden_vars = '';
185     foreach my $v (@persist_vars) {
186 dpavlin 457 foreach my $val ($q->param($v)) {
187 dpavlin 636 next if (! $val || $val eq '');
188 dpavlin 457 $hidden_vars .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n";
189     }
190 dpavlin 304 }
191    
192     $tmpl->param('PAGER_HIDDEN', $hidden_vars);
193 dpavlin 636 $tmpl->param('PAGER_JAVASCRIPT', qq#
194 dpavlin 304 <SCRIPT LANGUAGE="Javascript">
195     <!-- Begin
196     // dummy emulator for HTML::Pager templates
197     function PAGER_set_offset_and_submit() {
198     return true;
199     }
200     // End -->
201     </script>
202 dpavlin 636 #);
203 dpavlin 304 }
204    
205     #--------------------------------------------------------------------------
206    
207 dpavlin 7 sub show_search_form {
208     my $self = shift;
209    
210     # Get the CGI.pm query object
211     my $q = $self->query();
212    
213 dpavlin 198 my $tmpl = $self->load_tmpl(url_ex($q,'search.html'));
214 dpavlin 7 my $html = $tmpl->output;
215    
216     my $fif = new HTML::FillInForm;
217    
218 dpavlin 198 return in_template($q,$fif->fill(scalarref => \$html, fobject => $q,
219 dpavlin 147 target => 'search'));
220 dpavlin 7 }
221    
222     sub show_results_list {
223     my $self = shift;
224    
225     my $q = $self->query();
226    
227     # load template for this page
228    
229     my @s_arr; # all queries are located here
230    
231 dpavlin 47 my @path_arr = $q->param('path');
232     my $full = $q->param('full');
233    
234 dpavlin 636 my @persist_vars = ( 'rm', 'persist_search' );
235 dpavlin 304 my @url_params = ( 'rm=results', 'show_full=1', 'last_PAGER_offset='.($q->param('PAGER_offset') || 0) );
236 dpavlin 112
237 dpavlin 636 my @persist_search_vars;
238     my @url_params_persist;
239     if ($q->param("persist_search")) {
240     @persist_search_vars = split(/\s*,\s*/, $q->param("persist_search"));
241     push @url_params_persist, "persist_search=".$q->url_param("persist_search");
242     push @url_params,"persist_search=".$q->url_param("persist_search");
243     }
244    
245 dpavlin 150 # support parametars "f" and "v" for start
246 dpavlin 636 for(my $i = 0; $i <=30; $i++) {
247 dpavlin 7
248 dpavlin 636 $i = '' if ($i == 0);
249    
250 dpavlin 9 return show_index($self, $i) if ($q->param("f".$i."_index"));
251 dpavlin 112
252 dpavlin 636 next if (! $q->param("v$i") || $q->param("v$i") eq '');
253 dpavlin 9 next if (! $q->param("f$i"));
254 dpavlin 7
255 dpavlin 636 my $persist = grep(/^$i$/,@persist_search_vars);
256    
257 dpavlin 112 push @persist_vars, "f$i";
258     push @persist_vars, "v$i";
259 dpavlin 186 push @persist_vars, "e$i" if ($q->param("e$i"));
260 dpavlin 112
261 dpavlin 636 # create url parametars (and persistent ones)
262    
263 dpavlin 126 push @url_params,"f$i=".$q->url_param("f$i");
264 dpavlin 636 push @url_params_persist,"f$i=".$q->url_param("f$i") if ($persist);
265    
266 dpavlin 158 foreach my $v ($q->url_param("v$i")) {
267     push @url_params,"v$i=$v";
268 dpavlin 636 push @url_params_persist,"v$i=$v" if ($persist);
269 dpavlin 158 }
270 dpavlin 126
271 dpavlin 636 if ($q->param("e$i")) {
272     push @url_params,"e$i=".$q->url_param("e$i");
273 dpavlin 639 # push @url_params_persist,"e$i=".$q->url_param("e$i");
274 dpavlin 636 }
275    
276 dpavlin 150 my $wc="*"; # swish wildcard
277     $wc="" if ($i eq ""); # don't apply wildcard on field 0
278    
279 dpavlin 7 # re-write query from +/- to and/and not
280 dpavlin 73 my @param_vals = $q->param("v$i");
281     my @swish_q;
282 dpavlin 163 my ($pre,$post,$exact) = ('','','');
283 dpavlin 73 while (my $search = shift @param_vals) {
284     my $s;
285     # remove accents
286 dpavlin 320 $search = my_unac_string($CHARSET,$search);
287 dpavlin 73 while ($search =~ s/\s*("[^"]+")\s*/ /) {
288     $s .= "$1 ";
289     }
290     $search =~ s/^\s+//;
291     $search =~ s/\s+$//;
292 dpavlin 7
293 dpavlin 163 # filed e[nr] is exact match bitmask
294     # 1 = beginning, 2=end, 3=both
295 dpavlin 636 my $exact_flag = $q->param("e$i") || 0;
296     $pre = '"xxbxx ' if ($exact_flag & 1);
297     $post = ' xxexx"' if ($exact_flag & 2);
298 dpavlin 163 # add qotes on other side
299     if ($q->param("e$i")) {
300     $pre = '"' if (! $pre);
301     $post = '"' if (! $post);
302 dpavlin 190 # what about wildcards?
303     $wc = '';
304     $wc = '*' if ($q->param("e$i") & 4);
305 dpavlin 163 $exact = '_exact';
306     }
307    
308 dpavlin 73 foreach (split(/\s+/,$search)) {
309     if (m/^([+-])(\S+)/) {
310     $s.= ($s) ? "and " : "";
311     $s.="not " if ($1 eq "-");
312 dpavlin 163 $s.=$2.$wc." ";
313 dpavlin 122 } elsif (m/^\s*(and|or|not)\s*$/i) {
314 dpavlin 163 $s.=$_." ";
315 dpavlin 120 # don't add * to words with less than x chars
316     } elsif (length($_) <= $MIN_WILDCARD) {
317 dpavlin 163 $s.=$_." ";
318 dpavlin 73 } else {
319 dpavlin 163 $s.=$_.$wc." ";
320 dpavlin 73 }
321 dpavlin 7 }
322 dpavlin 73 $s =~ s/\*+/*/g;
323 dpavlin 163 $s = $pre.$s.$post if ($q->param("e$i"));
324 dpavlin 73 push @swish_q,$s;
325 dpavlin 7 }
326 dpavlin 73 # FIXME default operator for multi-value fields is or. There is
327     # no way to change it, except here for now. Is there need?
328 dpavlin 163 push @s_arr, $q->param("f$i")."_swish".$exact."=(".join(" or ",@swish_q).")";
329 dpavlin 7 }
330    
331 dpavlin 198 my $tmpl = $self->load_tmpl(url_ex($q,'results.html'), global_vars => 1);
332 dpavlin 9
333 dpavlin 304 $tmpl->param('url_params',"?".join("&",@url_params));
334    
335 dpavlin 80 sub esc_html {
336     my $html = shift;
337     $html =~ s/</&lt;/g;
338     $html =~ s/>/&gt;/g;
339     return $html;
340     }
341    
342 dpavlin 202 my $sort = 'swishrank';
343     if ($q->param("sort")) {
344     $sort = 'headline';
345     push @persist_vars, "sort";
346     }
347    
348 dpavlin 47 # construct swish query
349     my $sw_q = join(" and ",@s_arr);
350 dpavlin 636 if (@path_arr && $q->param('show_full')) {
351 dpavlin 47 $sw_q .= "and (swishdocpath=\"";
352     $sw_q .= join("\" or swishdocpath=\"",@path_arr);
353     $sw_q .= "\")";
354 dpavlin 636 $tmpl->param('full',1); # show full records
355     } elsif ($q->param('show_full')) {
356 dpavlin 149 # just show full path, no path defined
357     $tmpl->param('full',1);
358 dpavlin 121 } else {
359     $tmpl->param('full',0);
360 dpavlin 47 }
361 dpavlin 7
362 dpavlin 304 # create new swish instance
363     my $swish = SWISH::API->new($INDEX);
364 dpavlin 639 die $swish->ErrorString.": ".$swish->LastErrorMsg if $swish->Error;
365 dpavlin 47
366 dpavlin 304 # execute query and get number of results from SWISH-E
367     my $search = $swish->New_Search_Object;
368    
369     $search->SetSort($sort);
370    
371     my $results = $search->Execute($sw_q);
372 dpavlin 639 die $swish->ErrorString.": ".$swish->LastErrorMsg if $swish->Error;
373 dpavlin 304
374     my $hits = $results->Hits;
375    
376 dpavlin 7 $tmpl->param('hits',$hits);
377 dpavlin 47 $tmpl->param('search',$sw_q);
378 dpavlin 7
379 dpavlin 76 $tmpl->param('PAGER_offset',$q->param("PAGER_offset") || 0);
380     $tmpl->param('last_PAGER_offset',$q->param("last_PAGER_offset") || 0);
381 dpavlin 51
382 dpavlin 304 #
383     # build pager
384     #
385 dpavlin 126
386 dpavlin 304 my $current_page = $q->param('PAGER_offset') || 1;
387 dpavlin 7
388 dpavlin 304 my $pager = Data::Pageset->new({
389     'total_entries' => $hits,
390     'entries_per_page' => $ON_PAGE,
391     'current_page' => $current_page,
392     'pages_per_set' => $pages_per_set,
393     });
394 dpavlin 7
395 dpavlin 304 $results->SeekResult( $pager->first - 1 );
396 dpavlin 7
397 dpavlin 304 # get number of entries on this page
398     my $i = $pager->entries_on_this_page;
399    
400     # results from swish for template
401     my @pager_data_list;
402    
403     for(my $i=$pager->first; $i<=$pager->last; $i++) {
404    
405     my $result = $results->NextResult;
406     last if (! $result);
407    
408     my $r = {
409     nr => $i,
410     path => $result->Property('swishdocpath'),
411     headline => esc_html($from_utf8->convert($result->Property('headline'))),
412     rank => $result->Property('swishrank')
413     };
414    
415 dpavlin 636 $r->{html} = back2html($from_utf8->convert($result->Property('html')), join("&",@url_params_persist)) if ($q->param('show_full'));
416 dpavlin 304
417     push @pager_data_list, $r;
418     }
419    
420    
421    
422     # put something in template
423     make_pager($q, $tmpl, $pager);
424     make_pager_vars($q, $tmpl, @persist_vars);
425     $tmpl->param('PAGER_DATA_LIST', \@pager_data_list);
426    
427     my $html = $tmpl->output;
428    
429 dpavlin 198 return in_template($q,$html);
430 dpavlin 7 }
431    
432 dpavlin 9 sub show_index {
433     my $self = shift;
434     my $i = shift; # field number
435    
436     my $q = $self->query();
437    
438 dpavlin 11 my $field = $q->param("f$i");
439     my $limit = $q->param("v$i");
440    
441 dpavlin 9 my $html;
442    
443 dpavlin 53 my $index = new index_DBI(
444     $cfg_global->val('global', 'dbi_dbd'),
445     $cfg_global->val('global', 'dbi_dsn'),
446     $cfg_global->val('global', 'dbi_user'),
447     $cfg_global->val('global', 'dbi_passwd') || ''
448     );
449 dpavlin 9
450 dpavlin 140 my $total = $index->count($field,$limit);
451 dpavlin 304
452 dpavlin 12 if (! $total) {
453 dpavlin 198 my $tmpl = $self->load_tmpl(url_ex($q,'no_index.html'));
454 dpavlin 11 $tmpl->param('field',$field);
455     $html = $tmpl->output;
456     return $html;
457     }
458 dpavlin 9
459 dpavlin 198 my $tmpl = $self->load_tmpl(url_ex($q,'index_res.html'), global_vars => 1);
460 dpavlin 12 $tmpl->param('field',$field);
461     $tmpl->param('limit',$limit);
462     $tmpl->param('total',$total);
463 dpavlin 11
464 dpavlin 51 # FIXME I should set offset and leave out limit from fetch!!
465 dpavlin 16 # if (! $q->param("PAGER_offset") {
466     # $q->param("Pager_offet)
467     # }
468    
469 dpavlin 11
470 dpavlin 304 #
471     # build pager
472     #
473     my $pager = Data::Pageset->new({
474     'total_entries' => $total,
475     'entries_per_page' => $ON_PAGE,
476     'current_page' => $q->param('PAGER_offset') || 1,
477     'pages_per_set' => $pages_per_set
478     });
479 dpavlin 11
480 dpavlin 304 my @persist_vars = qw{rm f$i v$i f$i_index offset};
481    
482     make_pager($q, $tmpl, $pager);
483     make_pager_vars($q, $tmpl, @persist_vars);
484    
485     my @pager_data_list = $index->fetch($field,$limit, $pager->first - 1, $pager->entries_on_this_page);
486     $tmpl->param('PAGER_DATA_LIST', \@pager_data_list);
487    
488     return in_template($q,$tmpl->output);
489 dpavlin 9 }
490    
491 dpavlin 7 1;

Properties

Name Value
cvs2svn:cvs-rev 1.40

  ViewVC Help
Powered by ViewVC 1.1.26