/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 359 - (show annotations)
Wed Jun 16 15:41:16 2004 UTC (15 years, 7 months ago) by dpavlin
File size: 8349 byte(s)
implemeted eval{...}

1 package WebPAC;
2
3 use Carp;
4 use Text::Iconv;
5 use Config::IniFiles;
6
7 use Data::Dumper;
8
9 =head1 NAME
10
11 WebPAC - base class for WebPAC
12
13 =head1 DESCRIPTION
14
15 This module implements methods used by WebPAC.
16
17 =head1 METHODS
18
19 =head2 new
20
21 This will create new instance of WebPAC using configuration specified by C<config_file>.
22
23 my $webpac = new WebPAC(
24 config_file => 'name.conf',
25 [code_page => 'ISO-8859-2',]
26 );
27
28 Default C<code_page> is C<ISO-8859-2>.
29
30 It will also read configuration files
31 C<global.conf> (used by indexer and Web font-end)
32 and configuration file specified by C<config_file>
33 which describes databases to be indexed.
34
35 =cut
36
37 sub new {
38 my $class = shift;
39 my $self = {@_};
40 bless($self, $class);
41
42 # fill in default values
43 # output codepage
44 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
45
46 #
47 # read global.conf
48 #
49
50 $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
51
52 # read global config parametars
53 foreach my $var (qw(
54 dbi_dbd
55 dbi_dsn
56 dbi_user
57 dbi_passwd
58 show_progress
59 my_unac_filter
60 )) {
61 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
62 }
63
64 #
65 # read indexer config file
66 #
67
68 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
69
70 # read global config parametars
71 foreach my $var (qw(
72 dbi_dbd
73 dbi_dsn
74 dbi_user
75 dbi_passwd
76 show_progress
77 my_unac_filter
78 )) {
79 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
80 }
81
82 return $self;
83 }
84
85 =head2 open_isis
86
87 Open CDS/ISIS database using OpenIsis module and read all records to memory.
88
89 $webpac->open_isis(
90 filename => '/data/ISIS/ISIS',
91 code_page => '852',
92 limit_mfn => '500',
93 lookup => [ ... ],
94 );
95
96 By default, ISIS code page is assumed to be C<852>.
97
98 If optional parametar C<limit_mfn> is set, it will read just 500 records
99 from database in example above.
100
101 Returns number of last record read into memory (size of database, really).
102
103 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
104 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
105 value in index.
106
107 lookup => [
108 { 'key' => 'd:v900', 'val' => 'v250^a' },
109 { 'eval' => '"v901^a" eq "Podruèje"',
110 'key' => 'pa:v561^4:v562^4:v461^1',
111 'val' => 'v900' },
112 ]
113
114 =cut
115
116 sub open_isis {
117 my $self = shift;
118 my $arg = {@_};
119
120 croak "need filename" if (! $arg->{'filename'});
121 my $code_page = $arg->{'code_page'} || '852';
122
123 use OpenIsis;
124
125 #$self->{'isis_code_page'} = $code_page;
126
127 # create Text::Iconv object
128 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
129
130 my $isis_db = OpenIsis::open($arg->{'filename'});
131
132 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
133
134 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
135
136 # read database
137 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
138
139 # read record
140 my $row = OpenIsis::read( $isis_db, $mfn );
141 foreach my $k (keys %{$row}) {
142 if ($k ne "mfn") {
143 foreach my $l (@{$row->{$k}}) {
144 $l = $cp->convert($l);
145 # has subfields?
146 my $val;
147 if ($l =~ m/\^/) {
148 foreach my $t (split(/\^/,$l)) {
149 next if (! $t);
150 $val->{substr($t,0,1)} = substr($t,1);
151 }
152 } else {
153 $val = $l;
154 }
155
156 push @{$self->{'data'}->{$mfn}->{$k}}, $val;
157 }
158 }
159
160 }
161
162 # create lookup
163 my $rec = $self->{'data'}->{$mfn};
164 $self->create_lookup($rec, @{$arg->{'lookup'}});
165
166 }
167
168 # store max mfn and return it.
169 return $self->{'max_mfn'} = $maxmfn;
170 }
171
172 =head2 create_lookup
173
174 Create lookup from record using lookup definition.
175
176 =cut
177
178 sub create_lookup {
179 my $self = shift;
180
181 my $rec = shift || confess "need record to create lookup";
182 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
183
184 foreach my $i (@_) {
185 if ($i->{'eval'}) {
186 my $eval = $self->fill_in($rec,$i->{'eval'});
187 my $key = $self->fill_in($rec,$i->{'key'});
188 my @val = $self->fill_in($rec,$i->{'val'});
189 if ($key && @val && eval $eval) {
190 push @{$self->{'lookup'}->{$key}}, @val;
191 }
192 } else {
193 my $key = $self->fill_in($rec,$i->{'key'});
194 my @val = $self->fill_in($rec,$i->{'val'});
195 if ($key && @val) {
196 push @{$self->{'lookup'}->{$key}}, @val;
197 }
198 }
199 }
200 }
201
202 =head2 get_data
203
204 Returns value from record.
205
206 $self->get_data(\$rec,$f,$sf,$i,\$found);
207
208 Arguments are:
209 record reference C<$rec>,
210 field C<$f>,
211 optional subfiled C<$sf>,
212 index for repeatable values C<$i>.
213
214 Optinal variable C<$found> will be incremeted if thre
215 is field.
216
217 Returns value or empty string.
218
219 =cut
220
221 sub get_data {
222 my $self = shift;
223
224 my ($rec,$f,$sf,$i,$found) = @_;
225 if ($$rec->{$f}) {
226 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
227 $$found++ if (defined($$found));
228 return $$rec->{$f}->[$i]->{$sf};
229 } elsif ($$rec->{$f}->[$i]) {
230 $$found++ if (defined($$found));
231 return $$rec->{$f}->[$i];
232 }
233 } else {
234 return '';
235 }
236 }
237
238 =head2 fill_in
239
240 Workhourse of all: takes record from in-memory structure of database and
241 strings with placeholders and returns string or array of with substituted
242 values from record.
243
244 $webpac->fill_in($rec,'v250^a');
245
246 Optional argument is ordinal number for repeatable fields. By default,
247 it's assume to be first repeatable field (fields are perl array, so first
248 element is 0).
249 Following example will read second value from repeatable field.
250
251 $webpac->fill_in($rec,'Title: v250^a',1);
252
253 This function B<does not> perform parsing of format to inteligenty skip
254 delimiters before fields which aren't used.
255
256 =cut
257
258 sub fill_in {
259 my $self = shift;
260
261 my $rec = shift || confess "need data record";
262 my $format = shift || confess "need format to parse";
263 # iteration (for repeatable fields)
264 my $i = shift || 0;
265
266 # FIXME remove for speedup?
267 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
268
269 my $found = 0;
270
271 my $eval_code;
272 # remove eval{...} from beginning
273 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
274
275 # do actual replacement of placeholders
276 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
277
278 if ($found) {
279 if ($eval_code) {
280 my $eval = $self->fill_in($rec,$eval_code,$i);
281 return if (! eval $eval);
282 }
283 # do we have lookups?
284 if ($format =~ /\[[^\[\]]+\]/o) {
285 return $self->lookup($format);
286 } else {
287 return $format;
288 }
289 } else {
290 return;
291 }
292 }
293
294 =head2 lookup
295
296 Perform lookups on format supplied to it.
297
298 my $txt = $self->lookup('[v900]');
299
300 Lookups can be nested (like C<[d:[a:[v900]]]>).
301
302 =cut
303
304 sub lookup {
305 my $self = shift;
306
307 my $tmp = shift || confess "need format";
308
309 if ($tmp =~ /\[[^\[\]]+\]/o) {
310 my @in = ( $tmp );
311 #print "##lookup $tmp\n";
312 my @out;
313 while (my $f = shift @in) {
314 if ($f =~ /\[([^\[\]]+)\]/) {
315 my $k = $1;
316 if ($self->{'lookup'}->{$k}) {
317 #print "## lookup key = $k\n";
318 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
319 my $tmp2 = $f;
320 $tmp2 =~ s/\[$k\]/$nv/g;
321 push @in, $tmp2;
322 #print "## lookup in => $tmp2\n";
323 }
324 } else {
325 undef $f;
326 }
327 } elsif ($f) {
328 push @out, $f;
329 #print "## lookup out => $f\n";
330 }
331 }
332 return @out;
333 } else {
334 return $tmp;
335 }
336 }
337
338 =head2 parse
339
340 Perform smart parsing of string, skipping delimiters for fields which aren't
341 defined. It can also eval code in format starting with C<eval{...}> and
342 return output or nothing depending on eval code.
343
344 $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
345
346 =cut
347
348 sub parse {
349 my $self = shift;
350
351 my ($rec, $format, $i) = @_;
352
353 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
354
355 $i = 0 if (! $i);
356
357 my @out;
358
359 my $eval_code;
360 # remove eval{...} from beginning
361 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
362
363 my $prefix;
364 my $all_found=0;
365
366 #print "## $format\n";
367 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
368 #print "## [ $1 | $2 | $3 ] $format\n";
369
370 my $del = $1 || '';
371 $prefix ||= $del if ($all_found == 0);
372
373 my $found = 0;
374 my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
375
376 if ($found) {
377 push @out, $del;
378 push @out, $tmp;
379 $all_found += $found;
380 }
381 }
382
383 return if (! $all_found);
384
385 my $out = join('',@out) . $format;
386
387 # add prefix if not there
388 $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
389
390 if ($eval_code) {
391 my $eval = $self->fill_in($rec,$eval_code,$i);
392 return if (! eval $eval);
393 }
394
395 return $out;
396 }
397
398 1;

  ViewVC Help
Powered by ViewVC 1.1.26