/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 371 - (show annotations)
Thu Jun 17 20:44:45 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 13265 byte(s)
use local (more relaxed) eval, report errors in eval

1 package WebPAC;
2
3 use warnings;
4 use strict;
5
6 use Carp;
7 use Text::Iconv;
8 use Config::IniFiles;
9 use XML::Simple;
10 use Template;
11
12 use Data::Dumper;
13
14 =head1 NAME
15
16 WebPAC - base class for WebPAC
17
18 =head1 DESCRIPTION
19
20 This module implements methods used by WebPAC.
21
22 =head1 METHODS
23
24 =head2 new
25
26 This will create new instance of WebPAC using configuration specified by C<config_file>.
27
28 my $webpac = new WebPAC(
29 config_file => 'name.conf',
30 [code_page => 'ISO-8859-2',]
31 );
32
33 Default C<code_page> is C<ISO-8859-2>.
34
35 It will also read configuration files
36 C<global.conf> (used by indexer and Web font-end)
37 and configuration file specified by C<config_file>
38 which describes databases to be indexed.
39
40 =cut
41
42 # mapping between data type and tag which specify
43 # format in XML file
44 my %type2tag = (
45 'isis' => 'isis',
46 # 'excel' => 'column',
47 # 'marc' => 'marc',
48 # 'feed' => 'feed'
49 );
50
51 sub new {
52 my $class = shift;
53 my $self = {@_};
54 bless($self, $class);
55
56 # fill in default values
57 # output codepage
58 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
59
60 #
61 # read global.conf
62 #
63
64 my $config = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
65
66 # read global config parametars
67 foreach my $var (qw(
68 dbi_dbd
69 dbi_dsn
70 dbi_user
71 dbi_passwd
72 show_progress
73 my_unac_filter
74 output_template
75 )) {
76 $self->{'global_config'}->{$var} = $config->val('global', $var);
77 }
78
79 #
80 # read indexer config file
81 #
82
83 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
84
85 # create UTF-8 convertor for import_xml files
86 $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
87
88 # create Template toolkit instance
89 $self->{'tt'} = Template->new(
90 INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
91 # FILTERS => {
92 # 'foo' => \&foo_filter,
93 # },
94 EVAL_PERL => 1,
95 );
96
97 return $self;
98 }
99
100 =head2 open_isis
101
102 Open CDS/ISIS database using OpenIsis module and read all records to memory.
103
104 $webpac->open_isis(
105 filename => '/data/ISIS/ISIS',
106 code_page => '852',
107 limit_mfn => '500',
108 lookup => [ ... ],
109 );
110
111 By default, ISIS code page is assumed to be C<852>.
112
113 If optional parametar C<limit_mfn> is set, it will read just 500 records
114 from database in example above.
115
116 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
117 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
118 value in index.
119
120 lookup => [
121 { 'key' => 'd:v900', 'val' => 'v250^a' },
122 { 'eval' => '"v901^a" eq "Podruèje"',
123 'key' => 'pa:v561^4:v562^4:v461^1',
124 'val' => 'v900' },
125 ]
126
127 Returns number of last record read into memory (size of database, really).
128
129 =cut
130
131 sub open_isis {
132 my $self = shift;
133 my $arg = {@_};
134
135 croak "need filename" if (! $arg->{'filename'});
136 my $code_page = $arg->{'code_page'} || '852';
137
138 use OpenIsis;
139
140 #$self->{'isis_code_page'} = $code_page;
141
142 # create Text::Iconv object
143 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
144
145 print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
146
147 my $isis_db = OpenIsis::open($arg->{'filename'});
148
149 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
150
151 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
152
153 print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
154
155 # read database
156 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
157
158 # read record
159 my $row = OpenIsis::read( $isis_db, $mfn );
160 foreach my $k (keys %{$row}) {
161 if ($k ne "mfn") {
162 foreach my $l (@{$row->{$k}}) {
163 $l = $cp->convert($l);
164 # has subfields?
165 my $val;
166 if ($l =~ m/\^/) {
167 foreach my $t (split(/\^/,$l)) {
168 next if (! $t);
169 $val->{substr($t,0,1)} = substr($t,1);
170 }
171 } else {
172 $val = $l;
173 }
174
175 push @{$self->{'data'}->{$mfn}->{$k}}, $val;
176 }
177 }
178
179 }
180
181 # create lookup
182 my $rec = $self->{'data'}->{$mfn};
183 $self->create_lookup($rec, @{$arg->{'lookup'}});
184
185 }
186
187 $self->{'current_mfn'} = 1;
188
189 # store max mfn and return it.
190 return $self->{'max_mfn'} = $maxmfn;
191 }
192
193 =head2 fetch_rec
194
195 Fetch next record from database. It will also display progress bar (once
196 it's implemented, that is).
197
198 my $rec = $webpac->fetch_rec;
199
200 =cut
201
202 sub fetch_rec {
203 my $self = shift;
204
205 my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
206
207 if ($mfn > $self->{'max_mfn'}) {
208 $self->{'current_mfn'} = $self->{'max_mfn'};
209 return;
210 }
211
212 return $self->{'data'}->{$mfn};
213 }
214
215 =head2 open_import_xml
216
217 Read file from C<import_xml/> directory and parse it.
218
219 $webpac->open_import_xml(type => 'isis');
220
221 =cut
222
223 sub open_import_xml {
224 my $self = shift;
225
226 my $arg = {@_};
227 confess "need type to load file from import_xml/" if (! $arg->{'type'});
228
229 $self->{'type'} = $arg->{'type'};
230
231 my $type_base = $arg->{'type'};
232 $type_base =~ s/_.*$//g;
233
234 $self->{'tag'} = $type2tag{$type_base};
235
236 print STDERR "using type '",$self->{'type'},"' tag <",$self->{'tag'},">\n" if ($self->{'debug'});
237
238 my $f = "./import_xml/".$self->{'type'}.".xml";
239 confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
240
241 print STDERR "reading '$f'\n" if ($self->{'debug'});
242
243 $self->{'import_xml'} = XMLin($f,
244 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
245 ForceContent => 1
246 );
247
248 }
249
250 =head2 create_lookup
251
252 Create lookup from record using lookup definition.
253
254 $self->create_lookup($rec, @lookups);
255
256 Called internally by C<open_*> methods.
257
258 =cut
259
260 sub create_lookup {
261 my $self = shift;
262
263 my $rec = shift || confess "need record to create lookup";
264 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
265
266 foreach my $i (@_) {
267 if ($i->{'eval'}) {
268 my $eval = $self->fill_in($rec,$i->{'eval'});
269 my $key = $self->fill_in($rec,$i->{'key'});
270 my @val = $self->fill_in($rec,$i->{'val'});
271 if ($key && @val && eval $eval) {
272 push @{$self->{'lookup'}->{$key}}, @val;
273 }
274 } else {
275 my $key = $self->fill_in($rec,$i->{'key'});
276 my @val = $self->fill_in($rec,$i->{'val'});
277 if ($key && @val) {
278 push @{$self->{'lookup'}->{$key}}, @val;
279 }
280 }
281 }
282 }
283
284 =head2 get_data
285
286 Returns value from record.
287
288 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
289
290 Arguments are:
291 record reference C<$rec>,
292 field C<$f>,
293 optional subfiled C<$sf>,
294 index for repeatable values C<$i>.
295
296 Optinal variable C<$found> will be incremeted if there
297 is field.
298
299 Returns value or empty string.
300
301 =cut
302
303 sub get_data {
304 my $self = shift;
305
306 my ($rec,$f,$sf,$i,$found) = @_;
307
308 if ($$rec->{$f}) {
309 return '' if (! $$rec->{$f}->[$i]);
310 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
311 $$found++ if (defined($$found));
312 return $$rec->{$f}->[$i]->{$sf};
313 } elsif ($$rec->{$f}->[$i]) {
314 $$found++ if (defined($$found));
315 # it still might have subfield, just
316 # not specified, so we'll dump all
317 if ($$rec->{$f}->[$i] =~ /HASH/o) {
318 my $out;
319 foreach my $k (keys %{$$rec->{$f}->[$i]}) {
320 $out .= $$rec->{$f}->[$i]->{$k}." ";
321 }
322 return $out;
323 } else {
324 return $$rec->{$f}->[$i];
325 }
326 }
327 } else {
328 return '';
329 }
330 }
331
332 =head2 fill_in
333
334 Workhourse of all: takes record from in-memory structure of database and
335 strings with placeholders and returns string or array of with substituted
336 values from record.
337
338 my $text = $webpac->fill_in($rec,'v250^a');
339
340 Optional argument is ordinal number for repeatable fields. By default,
341 it's assume to be first repeatable field (fields are perl array, so first
342 element is 0).
343 Following example will read second value from repeatable field.
344
345 my $text = $webpac->fill_in($rec,'Title: v250^a',1);
346
347 This function B<does not> perform parsing of format to inteligenty skip
348 delimiters before fields which aren't used.
349
350 =cut
351
352 # internal function to eval code
353 sub _eval {
354 my $self = shift;
355
356 my $code = shift || return;
357 no strict 'subs';
358 my $ret = eval $code;
359 if ($@) {
360 print STDERR "problem with eval code [$code]: $@\n";
361 }
362 return $ret;
363 }
364
365 sub fill_in {
366 my $self = shift;
367
368 my $rec = shift || confess "need data record";
369 my $format = shift || confess "need format to parse";
370 # iteration (for repeatable fields)
371 my $i = shift || 0;
372
373 # FIXME remove for speedup?
374 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
375
376 my $found = 0;
377
378 my $eval_code;
379 # remove eval{...} from beginning
380 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
381
382 # do actual replacement of placeholders
383 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
384
385 if ($found) {
386 if ($eval_code) {
387 my $eval = $self->fill_in($rec,$eval_code,$i);
388 return if (! $self->_eval($eval));
389 }
390 # do we have lookups?
391 if ($format =~ /\[[^\[\]]+\]/o) {
392 print "## probable lookup: $format\n";
393 return $self->lookup($format);
394 } else {
395 return $format;
396 }
397 } else {
398 return;
399 }
400 }
401
402 =head2 lookup
403
404 Perform lookups on format supplied to it.
405
406 my $text = $self->lookup('[v900]');
407
408 Lookups can be nested (like C<[d:[a:[v900]]]>).
409
410 =cut
411
412 sub lookup {
413 my $self = shift;
414
415 my $tmp = shift || confess "need format";
416
417 if ($tmp =~ /\[[^\[\]]+\]/o) {
418 my @in = ( $tmp );
419 print "## lookup $tmp\n";
420 my @out;
421 while (my $f = shift @in) {
422 if ($f =~ /\[([^\[\]]+)\]/) {
423 my $k = $1;
424 if ($self->{'lookup'}->{$k}) {
425 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
426 my $tmp2 = $f;
427 $tmp2 =~ s/\[$k\]/$nv/g;
428 push @in, $tmp2;
429 }
430 } else {
431 undef $f;
432 }
433 } elsif ($f) {
434 push @out, $f;
435 }
436 }
437 return @out;
438 } else {
439 return $tmp;
440 }
441 }
442
443 =head2 parse
444
445 Perform smart parsing of string, skipping delimiters for fields which aren't
446 defined. It can also eval code in format starting with C<eval{...}> and
447 return output or nothing depending on eval code.
448
449 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
450
451 =cut
452
453 sub parse {
454 my $self = shift;
455
456 my ($rec, $format_utf8, $i) = @_;
457
458 return if (! $format_utf8);
459
460 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
461 confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
462
463 $i = 0 if (! $i);
464
465 my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
466
467 my @out;
468
469 my $eval_code;
470 # remove eval{...} from beginning
471 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
472
473 my $prefix;
474 my $all_found=0;
475
476 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
477
478 my $del = $1 || '';
479 $prefix ||= $del if ($all_found == 0);
480
481 my $found = 0;
482 my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
483
484 if ($found) {
485 push @out, $del;
486 push @out, $tmp;
487 $all_found += $found;
488 }
489 }
490
491 return if (! $all_found);
492
493 my $out = join('',@out) . $format;
494
495 # add prefix if not there
496 $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
497
498 if ($eval_code) {
499 my $eval = $self->fill_in($rec,$eval_code,$i);
500 return if (! $self->_eval($eval));
501 }
502
503 return $out;
504 }
505
506 =head2 parse_to_arr
507
508 Similar to C<parse>, but returns array of all repeatable fields
509
510 my @arr = $webpac->parse_to_arr($rec,'v250^a');
511
512 =cut
513
514 sub parse_to_arr {
515 my $self = shift;
516
517 my ($rec, $format_utf8) = @_;
518
519 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
520 return if (! $format_utf8);
521
522 my $i = 0;
523 my @arr;
524
525 while (my $v = $self->parse($rec,$format_utf8,$i++)) {
526 push @arr, $v;
527 }
528
529 return @arr;
530 }
531
532 =head2 data_structure
533
534 Create in-memory data structure which represents layout from C<import_xml>.
535 It is used later to produce output.
536
537 my @ds = $webpac->data_structure($rec);
538
539 =cut
540
541 # private method _sort_by_order
542 # sort subrouting using order="" attribute
543 sub _sort_by_order {
544 my $self = shift;
545
546 my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
547 $self->{'import_xml'}->{'indexer'}->{$a};
548 my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
549 $self->{'import_xml'}->{'indexer'}->{$b};
550
551 return $va <=> $vb;
552 }
553
554 sub data_structure {
555 my $self = shift;
556
557 my $rec = shift;
558 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
559
560 my @sorted_tags;
561 if ($self->{tags_by_order}) {
562 @sorted_tags = @{$self->{tags_by_order}};
563 } else {
564 @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
565 $self->{tags_by_order} = \@sorted_tags;
566 }
567
568 my @ds;
569
570 foreach my $field (@sorted_tags) {
571
572 my $row;
573
574 #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
575
576 foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
577 my @v = $self->parse_to_arr($rec,$tag->{'content'});
578
579 next if (! @v);
580
581 # does tag have type?
582 if ($tag->{'type'}) {
583 push @{$row->{$tag->{'type'}}}, @v;
584 } else {
585 push @{$row->{'display'}}, @v;
586 push @{$row->{'swish'}}, @v;
587 }
588 }
589
590 if ($row) {
591 $row->{'tag'} = $field;
592 push @ds, $row;
593 }
594
595 }
596
597 return @ds;
598
599 }
600
601 =head2 output
602
603 Create output from in-memory data structure using Template Toolkit template.
604
605 my $text = $webpac->output( template => 'text.tt', data => @ds );
606
607 =cut
608
609 sub output {
610 my $self = shift;
611
612 my $args = {@_};
613
614 confess("need template name") if (! $args->{'template'});
615 confess("need data array") if (! $args->{'data'});
616
617 my $out;
618
619 $self->{'tt'}->process(
620 $args->{'template'},
621 $args,
622 \$out
623 ) || confess $self->{'tt'}->error();
624
625 return $out;
626 }
627
628 1;

  ViewVC Help
Powered by ViewVC 1.1.26