/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 359 - (hide annotations)
Wed Jun 16 15:41:16 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 8349 byte(s)
implemeted eval{...}

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3     use Carp;
4 dpavlin 353 use Text::Iconv;
5     use Config::IniFiles;
6 dpavlin 352
7 dpavlin 358 use Data::Dumper;
8    
9 dpavlin 352 =head1 NAME
10    
11 dpavlin 354 WebPAC - base class for WebPAC
12 dpavlin 352
13     =head1 DESCRIPTION
14    
15 dpavlin 354 This module implements methods used by WebPAC.
16 dpavlin 352
17     =head1 METHODS
18    
19     =head2 new
20    
21 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
22 dpavlin 352
23 dpavlin 354 my $webpac = new WebPAC(
24 dpavlin 352 config_file => 'name.conf',
25     [code_page => 'ISO-8859-2',]
26     );
27    
28     Default C<code_page> is C<ISO-8859-2>.
29    
30 dpavlin 353 It will also read configuration files
31     C<global.conf> (used by indexer and Web font-end)
32     and configuration file specified by C<config_file>
33     which describes databases to be indexed.
34    
35 dpavlin 352 =cut
36    
37     sub new {
38     my $class = shift;
39     my $self = {@_};
40     bless($self, $class);
41    
42     # fill in default values
43     # output codepage
44     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
45    
46 dpavlin 353 #
47     # read global.conf
48     #
49 dpavlin 352
50     $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
51    
52     # read global config parametars
53     foreach my $var (qw(
54     dbi_dbd
55     dbi_dsn
56     dbi_user
57     dbi_passwd
58     show_progress
59     my_unac_filter
60     )) {
61     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
62     }
63    
64 dpavlin 353 #
65     # read indexer config file
66     #
67 dpavlin 352
68     $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
69    
70     # read global config parametars
71     foreach my $var (qw(
72     dbi_dbd
73     dbi_dsn
74     dbi_user
75     dbi_passwd
76     show_progress
77     my_unac_filter
78     )) {
79     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
80     }
81    
82     return $self;
83     }
84    
85     =head2 open_isis
86    
87     Open CDS/ISIS database using OpenIsis module and read all records to memory.
88    
89     $webpac->open_isis(
90     filename => '/data/ISIS/ISIS',
91     code_page => '852',
92     limit_mfn => '500',
93     lookup => [ ... ],
94     );
95    
96     By default, ISIS code page is assumed to be C<852>.
97    
98 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
99     from database in example above.
100 dpavlin 352
101     Returns number of last record read into memory (size of database, really).
102    
103     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
104     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
105     value in index.
106    
107     lookup => [
108     { 'key' => 'd:v900', 'val' => 'v250^a' },
109     { 'eval' => '"v901^a" eq "Podruèje"',
110     'key' => 'pa:v561^4:v562^4:v461^1',
111     'val' => 'v900' },
112     ]
113    
114     =cut
115    
116     sub open_isis {
117     my $self = shift;
118     my $arg = {@_};
119    
120     croak "need filename" if (! $arg->{'filename'});
121     my $code_page = $arg->{'code_page'} || '852';
122    
123 dpavlin 353 use OpenIsis;
124    
125 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
126    
127     # create Text::Iconv object
128     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
129    
130     my $isis_db = OpenIsis::open($arg->{'filename'});
131    
132     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
133    
134 dpavlin 357 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
135    
136 dpavlin 352 # read database
137     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
138    
139     # read record
140     my $row = OpenIsis::read( $isis_db, $mfn );
141     foreach my $k (keys %{$row}) {
142     if ($k ne "mfn") {
143     foreach my $l (@{$row->{$k}}) {
144     $l = $cp->convert($l);
145     # has subfields?
146     my $val;
147     if ($l =~ m/\^/) {
148     foreach my $t (split(/\^/,$l)) {
149     next if (! $t);
150     $val->{substr($t,0,1)} = substr($t,1);
151     }
152     } else {
153     $val = $l;
154     }
155    
156     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
157     }
158     }
159    
160     }
161    
162     # create lookup
163 dpavlin 355 my $rec = $self->{'data'}->{$mfn};
164     $self->create_lookup($rec, @{$arg->{'lookup'}});
165 dpavlin 352
166     }
167    
168     # store max mfn and return it.
169     return $self->{'max_mfn'} = $maxmfn;
170     }
171    
172 dpavlin 355 =head2 create_lookup
173    
174     Create lookup from record using lookup definition.
175    
176     =cut
177    
178     sub create_lookup {
179     my $self = shift;
180    
181     my $rec = shift || confess "need record to create lookup";
182     confess("need HASH as first argument!") if ($rec !~ /HASH/o);
183    
184     foreach my $i (@_) {
185     if ($i->{'eval'}) {
186     my $eval = $self->fill_in($rec,$i->{'eval'});
187     my $key = $self->fill_in($rec,$i->{'key'});
188     my @val = $self->fill_in($rec,$i->{'val'});
189     if ($key && @val && eval $eval) {
190     push @{$self->{'lookup'}->{$key}}, @val;
191     }
192     } else {
193     my $key = $self->fill_in($rec,$i->{'key'});
194     my @val = $self->fill_in($rec,$i->{'val'});
195     if ($key && @val) {
196     push @{$self->{'lookup'}->{$key}}, @val;
197     }
198     }
199     }
200     }
201    
202 dpavlin 356 =head2 get_data
203    
204     Returns value from record.
205    
206     $self->get_data(\$rec,$f,$sf,$i,\$found);
207    
208     Arguments are:
209     record reference C<$rec>,
210     field C<$f>,
211     optional subfiled C<$sf>,
212     index for repeatable values C<$i>.
213    
214     Optinal variable C<$found> will be incremeted if thre
215     is field.
216    
217     Returns value or empty string.
218    
219     =cut
220    
221     sub get_data {
222     my $self = shift;
223    
224     my ($rec,$f,$sf,$i,$found) = @_;
225     if ($$rec->{$f}) {
226     if ($sf && $$rec->{$f}->[$i]->{$sf}) {
227     $$found++ if (defined($$found));
228     return $$rec->{$f}->[$i]->{$sf};
229     } elsif ($$rec->{$f}->[$i]) {
230     $$found++ if (defined($$found));
231     return $$rec->{$f}->[$i];
232     }
233     } else {
234     return '';
235     }
236     }
237    
238 dpavlin 352 =head2 fill_in
239    
240     Workhourse of all: takes record from in-memory structure of database and
241     strings with placeholders and returns string or array of with substituted
242     values from record.
243    
244     $webpac->fill_in($rec,'v250^a');
245    
246     Optional argument is ordinal number for repeatable fields. By default,
247 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
248     element is 0).
249     Following example will read second value from repeatable field.
250 dpavlin 352
251 dpavlin 353 $webpac->fill_in($rec,'Title: v250^a',1);
252    
253     This function B<does not> perform parsing of format to inteligenty skip
254     delimiters before fields which aren't used.
255    
256 dpavlin 352 =cut
257    
258     sub fill_in {
259     my $self = shift;
260    
261     my $rec = shift || confess "need data record";
262     my $format = shift || confess "need format to parse";
263     # iteration (for repeatable fields)
264     my $i = shift || 0;
265    
266     # FIXME remove for speedup?
267 dpavlin 355 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
268 dpavlin 352
269     my $found = 0;
270    
271 dpavlin 359 my $eval_code;
272     # remove eval{...} from beginning
273     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
274    
275 dpavlin 352 # do actual replacement of placeholders
276 dpavlin 356 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
277 dpavlin 352
278 dpavlin 353 if ($found) {
279 dpavlin 359 if ($eval_code) {
280     my $eval = $self->fill_in($rec,$eval_code,$i);
281     return if (! eval $eval);
282     }
283 dpavlin 353 # do we have lookups?
284     if ($format =~ /\[[^\[\]]+\]/o) {
285     return $self->lookup($format);
286     } else {
287     return $format;
288     }
289 dpavlin 352 } else {
290     return;
291     }
292     }
293    
294     =head2 lookup
295    
296 dpavlin 355 Perform lookups on format supplied to it.
297 dpavlin 352
298     my $txt = $self->lookup('[v900]');
299    
300 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
301    
302 dpavlin 352 =cut
303    
304     sub lookup {
305     my $self = shift;
306    
307     my $tmp = shift || confess "need format";
308    
309 dpavlin 353 if ($tmp =~ /\[[^\[\]]+\]/o) {
310 dpavlin 352 my @in = ( $tmp );
311 dpavlin 353 #print "##lookup $tmp\n";
312 dpavlin 352 my @out;
313     while (my $f = shift @in) {
314     if ($f =~ /\[([^\[\]]+)\]/) {
315     my $k = $1;
316     if ($self->{'lookup'}->{$k}) {
317 dpavlin 353 #print "## lookup key = $k\n";
318 dpavlin 352 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
319     my $tmp2 = $f;
320     $tmp2 =~ s/\[$k\]/$nv/g;
321     push @in, $tmp2;
322 dpavlin 353 #print "## lookup in => $tmp2\n";
323 dpavlin 352 }
324     } else {
325     undef $f;
326     }
327     } elsif ($f) {
328     push @out, $f;
329 dpavlin 353 #print "## lookup out => $f\n";
330 dpavlin 352 }
331     }
332     return @out;
333     } else {
334     return $tmp;
335     }
336     }
337    
338 dpavlin 356 =head2 parse
339    
340     Perform smart parsing of string, skipping delimiters for fields which aren't
341     defined. It can also eval code in format starting with C<eval{...}> and
342     return output or nothing depending on eval code.
343    
344     $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
345    
346     =cut
347    
348     sub parse {
349     my $self = shift;
350    
351     my ($rec, $format, $i) = @_;
352    
353 dpavlin 358 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
354    
355     $i = 0 if (! $i);
356    
357 dpavlin 356 my @out;
358    
359     my $eval_code;
360     # remove eval{...} from beginning
361     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
362    
363 dpavlin 358 my $prefix;
364     my $all_found=0;
365 dpavlin 356
366 dpavlin 359 #print "## $format\n";
367 dpavlin 358 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
368 dpavlin 359 #print "## [ $1 | $2 | $3 ] $format\n";
369 dpavlin 356
370 dpavlin 358 my $del = $1 || '';
371 dpavlin 359 $prefix ||= $del if ($all_found == 0);
372 dpavlin 358
373     my $found = 0;
374     my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
375    
376 dpavlin 356 if ($found) {
377 dpavlin 358 push @out, $del;
378     push @out, $tmp;
379     $all_found += $found;
380 dpavlin 356 }
381     }
382    
383 dpavlin 358 return if (! $all_found);
384 dpavlin 356
385 dpavlin 358 my $out = join('',@out) . $format;
386    
387     # add prefix if not there
388     $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
389    
390 dpavlin 359 if ($eval_code) {
391     my $eval = $self->fill_in($rec,$eval_code,$i);
392     return if (! eval $eval);
393     }
394    
395 dpavlin 358 return $out;
396 dpavlin 356 }
397    
398 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26