/[webpac]/branches/tehnika/WebPac.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /branches/tehnika/WebPac.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 163 - (hide annotations)
Thu Nov 20 21:23:40 2003 UTC (20 years, 4 months ago) by dpavlin
Original Path: trunk/WebPac.pm
File size: 7797 byte(s)
Added type="swish_exact" to save data into swish index with boundaries
xxbxx data xxexxx. This is helpful to implement exact match from beginning
of query and exact match to full query which are defined using e[nr] field
in web user interface (with same [nr] as f[nr] and v[nr] fields) which
have to have value 1 (from beginning) 2 (from end, not that useful...) or
3 (1+2 - exact match)

1 dpavlin 7 package WebPac;
2    
3     use base 'CGI::Application';
4     use strict;
5    
6     use HTML::Pager;
7     use HTML::FillInForm;
8     use SWISH;
9 dpavlin 14 use Text::Iconv;
10 dpavlin 9 use DBI;
11 dpavlin 53 use Config::IniFiles;
12 dpavlin 73 use Text::Unaccent;
13 dpavlin 7
14 dpavlin 11 use lib '..';
15 dpavlin 128 use index_DBI_cache;
16 dpavlin 13 use back2html;
17 dpavlin 11
18 dpavlin 7
19 dpavlin 76 # read global.conf configuration
20     my $cfg_global = new Config::IniFiles( -file => '../global.conf' ) || die "can't open 'global.conf'";
21    
22     # configuration options from global.conf
23     my $TEMPLATE_PATH = $cfg_global->val('webpac', 'template_html') || die "need template_html in global.conf, section webpac";
24     my $CHARSET = $cfg_global->val('webpac', 'charset') || 'ISO-8859-1';
25     my $SWISH = $cfg_global->val('webpac', 'swish') || '/usr/bin/swish-e';
26     my $INDEX = $cfg_global->val('webpac', 'index') || die "need index in global.conf, section webpac";
27     my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0;
28     my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10;
29 dpavlin 120 my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1;
30 dpavlin 147 my $TEMPLATE =$cfg_global->val('webpac', 'template');
31 dpavlin 76
32    
33 dpavlin 30 Text::Iconv->raise_error(0); # Conversion errors raise exceptions
34 dpavlin 7
35 dpavlin 14 my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);
36    
37 dpavlin 53
38 dpavlin 7 sub setup {
39     my $self = shift;
40     $self->tmpl_path($TEMPLATE_PATH);
41     $self->run_modes(
42     'search' => 'show_search_form',
43     'results' => 'show_results_list',
44 dpavlin 9 # 'user' => 'show_user_detail',
45     'index' => 'show_index',
46 dpavlin 7 );
47     $self->start_mode('search');
48     $self->mode_param('rm');
49    
50     $self->header_props(-charset=>$CHARSET);
51     }
52    
53 dpavlin 147 sub in_template {
54     my $html = shift || "This page is left unintentionally blank";
55     return $html if (! defined($TEMPLATE));
56     if (open(T, $TEMPLATE)) {
57     my $template_html = join("\n",<T>);
58     close(T);
59     $template_html =~ s/##webpac##/$html/gsi;
60     return $template_html;
61     } else {
62     return "Can't read template '$TEMPLATE'";
63     }
64     }
65    
66 dpavlin 7 sub show_search_form {
67     my $self = shift;
68    
69     # Get the CGI.pm query object
70     my $q = $self->query();
71    
72     my $tmpl = $self->load_tmpl('search.html');
73     my $html = $tmpl->output;
74    
75     my $fif = new HTML::FillInForm;
76    
77 dpavlin 147 return in_template($fif->fill(scalarref => \$html, fobject => $q,
78     target => 'search'));
79 dpavlin 7 }
80    
81     sub show_results_list {
82     my $self = shift;
83    
84     my $q = $self->query();
85    
86     my @swish_results; # results from swish
87    
88     # load template for this page
89    
90     my @s_arr; # all queries are located here
91    
92 dpavlin 47 my @path_arr = $q->param('path');
93     my $full = $q->param('full');
94    
95 dpavlin 112 my @persist_vars = ( 'rm' );
96 dpavlin 126 my @url_params = ( 'rm=results', 'show_full=1', 'last_PAGER_offset='.$q->param('PAGER_offset') || 0 );
97 dpavlin 112
98 dpavlin 150 # support parametars "f" and "v" for start
99     for(my $i = ""; $i <=30; $i++) {
100 dpavlin 7
101 dpavlin 9 return show_index($self, $i) if ($q->param("f".$i."_index"));
102 dpavlin 112
103 dpavlin 71 next if (! $q->param("v$i"));
104 dpavlin 9 next if (! $q->param("f$i"));
105 dpavlin 7
106 dpavlin 112 push @persist_vars, "f$i";
107     push @persist_vars, "v$i";
108    
109 dpavlin 126 push @url_params,"f$i=".$q->url_param("f$i");
110 dpavlin 158 foreach my $v ($q->url_param("v$i")) {
111     push @url_params,"v$i=$v";
112     }
113 dpavlin 126
114 dpavlin 150 my $wc="*"; # swish wildcard
115     $wc="" if ($i eq ""); # don't apply wildcard on field 0
116    
117 dpavlin 7 # re-write query from +/- to and/and not
118 dpavlin 73 my @param_vals = $q->param("v$i");
119     my @swish_q;
120 dpavlin 163 my ($pre,$post,$exact) = ('','','');
121 dpavlin 73 while (my $search = shift @param_vals) {
122     my $s;
123     # remove accents
124     $search = unac_string($CHARSET,$search);
125     while ($search =~ s/\s*("[^"]+")\s*/ /) {
126     $s .= "$1 ";
127     }
128     $search =~ s/^\s+//;
129     $search =~ s/\s+$//;
130 dpavlin 7
131 dpavlin 163 # filed e[nr] is exact match bitmask
132     # 1 = beginning, 2=end, 3=both
133     $pre = '"xxbxx ' if ($q->param("e$i") & 1);
134     $post = ' xxexx"' if ($q->param("e$i") & 2);
135     # add qotes on other side
136     if ($q->param("e$i")) {
137     $pre = '"' if (! $pre);
138     $post = '"' if (! $post);
139     $wc = ''; # don't use windcard in exact
140     $exact = '_exact';
141     }
142    
143 dpavlin 73 foreach (split(/\s+/,$search)) {
144     if (m/^([+-])(\S+)/) {
145     $s.= ($s) ? "and " : "";
146     $s.="not " if ($1 eq "-");
147 dpavlin 163 $s.=$2.$wc." ";
148 dpavlin 122 } elsif (m/^\s*(and|or|not)\s*$/i) {
149 dpavlin 163 $s.=$_." ";
150 dpavlin 120 # don't add * to words with less than x chars
151     } elsif (length($_) <= $MIN_WILDCARD) {
152 dpavlin 163 $s.=$_." ";
153 dpavlin 73 } else {
154 dpavlin 163 $s.=$_.$wc." ";
155 dpavlin 73 }
156 dpavlin 7 }
157 dpavlin 73 $s =~ s/\*+/*/g;
158 dpavlin 163 $s = $pre.$s.$post if ($q->param("e$i"));
159 dpavlin 73 push @swish_q,$s;
160 dpavlin 7 }
161 dpavlin 73 # FIXME default operator for multi-value fields is or. There is
162     # no way to change it, except here for now. Is there need?
163 dpavlin 163 push @s_arr, $q->param("f$i")."_swish".$exact."=(".join(" or ",@swish_q).")";
164 dpavlin 7 }
165    
166 dpavlin 126 my $tmpl = $self->load_tmpl('results.html', global_vars => 1);
167 dpavlin 9
168 dpavlin 80 sub esc_html {
169     my $html = shift;
170     $html =~ s/</&lt;/g;
171     $html =~ s/>/&gt;/g;
172     return $html;
173     }
174    
175 dpavlin 7 # call swish
176     my $sh = SWISH->connect('Fork',
177     prog => $SWISH,
178     indexes => $INDEX,
179 dpavlin 13 properties => [qw/swishdocpath swishrank swishtitle headline html/],
180 dpavlin 7 results => sub {
181     my ($sh,$hit) = @_;
182    
183     push @swish_results, {
184     nr => ($#swish_results + 2),
185     path => $hit->swishdocpath,
186 dpavlin 80 headline => esc_html($from_utf8->convert($hit->headline)),
187 dpavlin 41 html => back2html($from_utf8->convert($hit->html)),
188 dpavlin 7 rank => $hit->swishrank };
189    
190     },
191     #startnum => 0,
192 dpavlin 47 maxhits => $MAX_HITS
193 dpavlin 7 );
194    
195     die $SWISH::errstr unless $sh;
196 dpavlin 47 # construct swish query
197     my $sw_q = join(" and ",@s_arr);
198 dpavlin 111 if (@path_arr && $q->param('show_full')) {
199 dpavlin 47 $sw_q .= "and (swishdocpath=\"";
200     $sw_q .= join("\" or swishdocpath=\"",@path_arr);
201     $sw_q .= "\")";
202     $tmpl->param('full',1); # show full records
203 dpavlin 149 } elsif ($q->param('show_full')) {
204     # just show full path, no path defined
205     $tmpl->param('full',1);
206 dpavlin 121 } else {
207     $tmpl->param('full',0);
208 dpavlin 47 }
209 dpavlin 7
210 dpavlin 47 my $hits = $sh->query($sw_q);
211    
212 dpavlin 7 $tmpl->param('hits',$hits);
213 dpavlin 47 $tmpl->param('search',$sw_q);
214 dpavlin 7
215 dpavlin 76 $tmpl->param('PAGER_offset',$q->param("PAGER_offset") || 0);
216     $tmpl->param('last_PAGER_offset',$q->param("last_PAGER_offset") || 0);
217 dpavlin 51
218 dpavlin 126 $tmpl->param('url_params',"?".join("&",@url_params));
219    
220 dpavlin 7 # create a Pager object
221     my $pager = HTML::Pager->new(
222     # required parameters
223     query => $q,
224     get_data_callback => sub {
225     my ($offset, $rows) = @_;
226    
227     my @result;
228     for (my $i=0; $i<$rows; $i++) {
229 dpavlin 114 my $r = $swish_results[$offset+$i];
230 dpavlin 121 if ($r && $tmpl->param('full')) {
231 dpavlin 114 push @result, $r;
232     } elsif ($r) {
233     # if not full output, skip html
234     delete $r->{html};
235     push @result, $r;
236     }
237 dpavlin 7 }
238     return \@result;
239     },
240     rows => $hits,
241     page_size => $ON_PAGE,
242     # some optional parameters
243 dpavlin 112 persist_vars => [ @persist_vars ],
244 dpavlin 7 #cell_space_color => '#000000',
245     #cell_background_color => '#ffffff',
246     #nav_background_color => '#dddddd',
247     #javascript_presubmit => 'last_minute_javascript()',
248     debug => 1,
249     template => $tmpl,
250     );
251    
252     my $html = $pager->output;
253    
254 dpavlin 147 return in_template($html);
255 dpavlin 7 }
256    
257 dpavlin 9 sub show_index {
258     my $self = shift;
259     my $i = shift; # field number
260    
261     my $q = $self->query();
262    
263 dpavlin 11 my $field = $q->param("f$i");
264     my $limit = $q->param("v$i");
265    
266 dpavlin 9 my $html;
267    
268 dpavlin 53 my $index = new index_DBI(
269     $cfg_global->val('global', 'dbi_dbd'),
270     $cfg_global->val('global', 'dbi_dsn'),
271     $cfg_global->val('global', 'dbi_user'),
272     $cfg_global->val('global', 'dbi_passwd') || ''
273     );
274 dpavlin 9
275 dpavlin 140 my $total = $index->count($field,$limit);
276 dpavlin 12 if (! $total) {
277 dpavlin 11 my $tmpl = $self->load_tmpl('no_index.html');
278     $tmpl->param('field',$field);
279     $html = $tmpl->output;
280     return $html;
281     }
282 dpavlin 9
283 dpavlin 124 my $tmpl = $self->load_tmpl('index_res.html', global_vars => 1);
284 dpavlin 12 $tmpl->param('field',$field);
285     $tmpl->param('limit',$limit);
286     $tmpl->param('total',$total);
287 dpavlin 11
288 dpavlin 51 # FIXME I should set offset and leave out limit from fetch!!
289 dpavlin 16 # if (! $q->param("PAGER_offset") {
290     # $q->param("Pager_offet)
291     # }
292    
293 dpavlin 12 my $pager = HTML::Pager->new(
294     query => $q,
295     get_data_callback => sub {
296     my ($offset, $rows) = @_;
297 dpavlin 11
298 dpavlin 140 my @result = $index->fetch($field,$limit, $offset, $rows);
299 dpavlin 12 return \@result;
300     },
301     rows => $total,
302     page_size => $ON_PAGE,
303     persist_vars => [
304     'rm',
305     "f$i", "v$i", "f".$i."_index",
306     'offset',
307     ],
308     debug => 1,
309     template => $tmpl,
310     );
311 dpavlin 11
312 dpavlin 147 return in_template($pager->output);
313 dpavlin 9 }
314    
315 dpavlin 7 1;

Properties

Name Value
cvs2svn:cvs-rev 1.34

  ViewVC Help
Powered by ViewVC 1.1.26