1 |
package WebPAC; |
package WebPAC; |
2 |
|
|
3 |
|
use warnings; |
4 |
|
use strict; |
5 |
|
|
6 |
use Carp; |
use Carp; |
7 |
use Text::Iconv; |
use Text::Iconv; |
8 |
use Config::IniFiles; |
use Config::IniFiles; |
9 |
use XML::Simple; |
use XML::Simple; |
10 |
|
use Template; |
11 |
|
|
12 |
use Data::Dumper; |
use Data::Dumper; |
13 |
|
|
61 |
# read global.conf |
# read global.conf |
62 |
# |
# |
63 |
|
|
64 |
$self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'"; |
my $config = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'"; |
65 |
|
|
66 |
# read global config parametars |
# read global config parametars |
67 |
foreach my $var (qw( |
foreach my $var (qw( |
71 |
dbi_passwd |
dbi_passwd |
72 |
show_progress |
show_progress |
73 |
my_unac_filter |
my_unac_filter |
74 |
|
output_template |
75 |
)) { |
)) { |
76 |
$self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var); |
$self->{'global_config'}->{$var} = $config->val('global', $var); |
77 |
} |
} |
78 |
|
|
79 |
# |
# |
82 |
|
|
83 |
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; |
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; |
84 |
|
|
85 |
|
# create UTF-8 convertor for import_xml files |
86 |
$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'}); |
$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'}); |
87 |
|
|
88 |
|
# create Template toolkit instance |
89 |
|
$self->{'tt'} = Template->new( |
90 |
|
INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'), |
91 |
|
# FILTERS => { |
92 |
|
# 'foo' => \&foo_filter, |
93 |
|
# }, |
94 |
|
EVAL_PERL => 1, |
95 |
|
); |
96 |
|
|
97 |
return $self; |
return $self; |
98 |
} |
} |
99 |
|
|
113 |
If optional parametar C<limit_mfn> is set, it will read just 500 records |
If optional parametar C<limit_mfn> is set, it will read just 500 records |
114 |
from database in example above. |
from database in example above. |
115 |
|
|
|
Returns number of last record read into memory (size of database, really). |
|
|
|
|
116 |
C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and |
C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and |
117 |
C<val>. Optional parametar C<eval> is perl code to evaluate before storing |
C<val>. Optional parametar C<eval> is perl code to evaluate before storing |
118 |
value in index. |
value in index. |
124 |
'val' => 'v900' }, |
'val' => 'v900' }, |
125 |
] |
] |
126 |
|
|
127 |
|
Returns number of last record read into memory (size of database, really). |
128 |
|
|
129 |
=cut |
=cut |
130 |
|
|
131 |
sub open_isis { |
sub open_isis { |
245 |
ForceContent => 1 |
ForceContent => 1 |
246 |
); |
); |
247 |
|
|
|
print Dumper($self->{'import_xml'}); |
|
|
|
|
248 |
} |
} |
249 |
|
|
250 |
=head2 create_lookup |
=head2 create_lookup |
251 |
|
|
252 |
Create lookup from record using lookup definition. |
Create lookup from record using lookup definition. |
253 |
|
|
254 |
|
$self->create_lookup($rec, @lookups); |
255 |
|
|
256 |
|
Called internally by C<open_*> methods. |
257 |
|
|
258 |
=cut |
=cut |
259 |
|
|
260 |
sub create_lookup { |
sub create_lookup { |
285 |
|
|
286 |
Returns value from record. |
Returns value from record. |
287 |
|
|
288 |
$self->get_data(\$rec,$f,$sf,$i,\$found); |
my $text = $self->get_data(\$rec,$f,$sf,$i,\$found); |
289 |
|
|
290 |
Arguments are: |
Arguments are: |
291 |
record reference C<$rec>, |
record reference C<$rec>, |
293 |
optional subfiled C<$sf>, |
optional subfiled C<$sf>, |
294 |
index for repeatable values C<$i>. |
index for repeatable values C<$i>. |
295 |
|
|
296 |
Optinal variable C<$found> will be incremeted if thre |
Optinal variable C<$found> will be incremeted if there |
297 |
is field. |
is field. |
298 |
|
|
299 |
Returns value or empty string. |
Returns value or empty string. |
304 |
my $self = shift; |
my $self = shift; |
305 |
|
|
306 |
my ($rec,$f,$sf,$i,$found) = @_; |
my ($rec,$f,$sf,$i,$found) = @_; |
307 |
|
|
308 |
if ($$rec->{$f}) { |
if ($$rec->{$f}) { |
309 |
|
return '' if (! $$rec->{$f}->[$i]); |
310 |
if ($sf && $$rec->{$f}->[$i]->{$sf}) { |
if ($sf && $$rec->{$f}->[$i]->{$sf}) { |
311 |
$$found++ if (defined($$found)); |
$$found++ if (defined($$found)); |
312 |
return $$rec->{$f}->[$i]->{$sf}; |
return $$rec->{$f}->[$i]->{$sf}; |
335 |
strings with placeholders and returns string or array of with substituted |
strings with placeholders and returns string or array of with substituted |
336 |
values from record. |
values from record. |
337 |
|
|
338 |
$webpac->fill_in($rec,'v250^a'); |
my $text = $webpac->fill_in($rec,'v250^a'); |
339 |
|
|
340 |
Optional argument is ordinal number for repeatable fields. By default, |
Optional argument is ordinal number for repeatable fields. By default, |
341 |
it's assume to be first repeatable field (fields are perl array, so first |
it's assume to be first repeatable field (fields are perl array, so first |
342 |
element is 0). |
element is 0). |
343 |
Following example will read second value from repeatable field. |
Following example will read second value from repeatable field. |
344 |
|
|
345 |
$webpac->fill_in($rec,'Title: v250^a',1); |
my $text = $webpac->fill_in($rec,'Title: v250^a',1); |
346 |
|
|
347 |
This function B<does not> perform parsing of format to inteligenty skip |
This function B<does not> perform parsing of format to inteligenty skip |
348 |
delimiters before fields which aren't used. |
delimiters before fields which aren't used. |
389 |
|
|
390 |
Perform lookups on format supplied to it. |
Perform lookups on format supplied to it. |
391 |
|
|
392 |
my $txt = $self->lookup('[v900]'); |
my $text = $self->lookup('[v900]'); |
393 |
|
|
394 |
Lookups can be nested (like C<[d:[a:[v900]]]>). |
Lookups can be nested (like C<[d:[a:[v900]]]>). |
395 |
|
|
402 |
|
|
403 |
if ($tmp =~ /\[[^\[\]]+\]/o) { |
if ($tmp =~ /\[[^\[\]]+\]/o) { |
404 |
my @in = ( $tmp ); |
my @in = ( $tmp ); |
|
#print "##lookup $tmp\n"; |
|
405 |
my @out; |
my @out; |
406 |
while (my $f = shift @in) { |
while (my $f = shift @in) { |
407 |
if ($f =~ /\[([^\[\]]+)\]/) { |
if ($f =~ /\[([^\[\]]+)\]/) { |
408 |
my $k = $1; |
my $k = $1; |
409 |
if ($self->{'lookup'}->{$k}) { |
if ($self->{'lookup'}->{$k}) { |
|
#print "## lookup key = $k\n"; |
|
410 |
foreach my $nv (@{$self->{'lookup'}->{$k}}) { |
foreach my $nv (@{$self->{'lookup'}->{$k}}) { |
411 |
my $tmp2 = $f; |
my $tmp2 = $f; |
412 |
$tmp2 =~ s/\[$k\]/$nv/g; |
$tmp2 =~ s/\[$k\]/$nv/g; |
413 |
push @in, $tmp2; |
push @in, $tmp2; |
|
#print "## lookup in => $tmp2\n"; |
|
414 |
} |
} |
415 |
} else { |
} else { |
416 |
undef $f; |
undef $f; |
417 |
} |
} |
418 |
} elsif ($f) { |
} elsif ($f) { |
419 |
push @out, $f; |
push @out, $f; |
|
#print "## lookup out => $f\n"; |
|
420 |
} |
} |
421 |
} |
} |
422 |
return @out; |
return @out; |
431 |
defined. It can also eval code in format starting with C<eval{...}> and |
defined. It can also eval code in format starting with C<eval{...}> and |
432 |
return output or nothing depending on eval code. |
return output or nothing depending on eval code. |
433 |
|
|
434 |
$webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i); |
my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i); |
435 |
|
|
436 |
=cut |
=cut |
437 |
|
|
458 |
my $prefix; |
my $prefix; |
459 |
my $all_found=0; |
my $all_found=0; |
460 |
|
|
|
#print "## $format\n"; |
|
461 |
while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) { |
while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) { |
|
#print "## [ $1 | $2 | $3 ] $format\n"; |
|
462 |
|
|
463 |
my $del = $1 || ''; |
my $del = $1 || ''; |
464 |
$prefix ||= $del if ($all_found == 0); |
$prefix ||= $del if ($all_found == 0); |
479 |
|
|
480 |
# add prefix if not there |
# add prefix if not there |
481 |
$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/); |
$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/); |
482 |
|
|
483 |
if ($eval_code) { |
if ($eval_code) { |
484 |
my $eval = $self->fill_in($rec,$eval_code,$i); |
my $eval = $self->fill_in($rec,$eval_code,$i); |
485 |
return if (! eval $eval); |
return if (! eval $eval); |
488 |
return $out; |
return $out; |
489 |
} |
} |
490 |
|
|
491 |
|
=head2 parse_to_arr |
492 |
|
|
493 |
|
Similar to C<parse>, but returns array of all repeatable fields |
494 |
|
|
495 |
|
my @arr = $webpac->parse_to_arr($rec,'v250^a'); |
496 |
|
|
497 |
|
=cut |
498 |
|
|
499 |
|
sub parse_to_arr { |
500 |
|
my $self = shift; |
501 |
|
|
502 |
|
my ($rec, $format_utf8) = @_; |
503 |
|
|
504 |
|
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
505 |
|
return if (! $format_utf8); |
506 |
|
|
507 |
|
my $i = 0; |
508 |
|
my @arr; |
509 |
|
|
510 |
|
while (my $v = $self->parse($rec,$format_utf8,$i++)) { |
511 |
|
push @arr, $v; |
512 |
|
} |
513 |
|
|
514 |
|
return @arr; |
515 |
|
} |
516 |
|
|
517 |
=head2 data_structure |
=head2 data_structure |
518 |
|
|
519 |
Create in-memory data structure which represents layout from C<import_xml>. |
Create in-memory data structure which represents layout from C<import_xml>. |
520 |
It is used later to produce output. |
It is used later to produce output. |
521 |
|
|
522 |
my $ds = $webpac->data_structure($rec); |
my @ds = $webpac->data_structure($rec); |
523 |
|
|
524 |
=cut |
=cut |
525 |
|
|
550 |
$self->{tags_by_order} = \@sorted_tags; |
$self->{tags_by_order} = \@sorted_tags; |
551 |
} |
} |
552 |
|
|
553 |
my $ds; |
my @ds; |
554 |
|
|
555 |
foreach my $field (@sorted_tags) { |
foreach my $field (@sorted_tags) { |
556 |
|
|
557 |
my $row; |
my $row; |
|
my $i = 0; |
|
558 |
|
|
559 |
#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}); |
#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}); |
560 |
|
|
561 |
foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) { |
foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) { |
562 |
|
my @v = $self->parse_to_arr($rec,$tag->{'content'}); |
563 |
|
|
564 |
my $v = $self->parse($rec,$tag->{'content'},$i); |
next if (! @v); |
|
print "## $i:",$tag->{'content'}," = ",($v || 'null'),"\n"; |
|
|
|
|
|
next if (!$v || $v && $v eq ''); |
|
565 |
|
|
566 |
# does tag have type? |
# does tag have type? |
567 |
if ($tag->{'type'}) { |
if ($tag->{'type'}) { |
568 |
push @{$row->{$tag->{'type'}}}, $v; |
push @{$row->{$tag->{'type'}}}, @v; |
569 |
} else { |
} else { |
570 |
push @{$row->{'display'}}, $v; |
push @{$row->{'display'}}, @v; |
571 |
push @{$row->{'swish'}}, $v; |
push @{$row->{'swish'}}, @v; |
572 |
} |
} |
573 |
} |
} |
574 |
|
|
575 |
push @{$ds->{$field}}, $row if ($row); |
if ($row) { |
576 |
|
$row->{'tag'} = $field; |
577 |
|
push @ds, $row; |
578 |
|
} |
579 |
|
|
580 |
} |
} |
581 |
|
|
582 |
print Dumper($ds); |
return @ds; |
583 |
|
|
584 |
|
} |
585 |
|
|
586 |
|
=head2 output |
587 |
|
|
588 |
|
Create output from in-memory data structure using Template Toolkit template. |
589 |
|
|
590 |
|
my $text = $webpac->output( template => 'text.tt', data => @ds ); |
591 |
|
|
592 |
|
=cut |
593 |
|
|
594 |
|
sub output { |
595 |
|
my $self = shift; |
596 |
|
|
597 |
|
my $args = {@_}; |
598 |
|
|
599 |
|
confess("need template name") if (! $args->{'template'}); |
600 |
|
confess("need data array") if (! $args->{'data'}); |
601 |
|
|
602 |
|
my $out; |
603 |
|
|
604 |
|
$self->{'tt'}->process( |
605 |
|
$args->{'template'}, |
606 |
|
$args, |
607 |
|
\$out |
608 |
|
) || confess $self->{'tt'}->error(); |
609 |
|
|
610 |
|
return $out; |
611 |
} |
} |
612 |
|
|
613 |
1; |
1; |