7 |
use Text::Iconv; |
use Text::Iconv; |
8 |
use Config::IniFiles; |
use Config::IniFiles; |
9 |
use XML::Simple; |
use XML::Simple; |
10 |
|
use Template; |
11 |
|
use Log::Log4perl qw(get_logger :levels); |
12 |
|
|
13 |
use Data::Dumper; |
use Data::Dumper; |
14 |
|
|
54 |
my $self = {@_}; |
my $self = {@_}; |
55 |
bless($self, $class); |
bless($self, $class); |
56 |
|
|
57 |
|
my $log_file = $self->{'log'} || "log.conf"; |
58 |
|
Log::Log4perl->init($log_file); |
59 |
|
|
60 |
|
my $log = $self->_get_logger(); |
61 |
|
|
62 |
# fill in default values |
# fill in default values |
63 |
# output codepage |
# output codepage |
64 |
$self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'}); |
$self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'}); |
66 |
# |
# |
67 |
# read global.conf |
# read global.conf |
68 |
# |
# |
69 |
|
$log->debug("read 'global.conf'"); |
70 |
|
|
71 |
$self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'"; |
my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'"); |
72 |
|
|
73 |
# read global config parametars |
# read global config parametars |
74 |
foreach my $var (qw( |
foreach my $var (qw( |
78 |
dbi_passwd |
dbi_passwd |
79 |
show_progress |
show_progress |
80 |
my_unac_filter |
my_unac_filter |
81 |
|
output_template |
82 |
)) { |
)) { |
83 |
$self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var); |
$self->{'global_config'}->{$var} = $config->val('global', $var); |
84 |
} |
} |
85 |
|
|
86 |
# |
# |
87 |
# read indexer config file |
# read indexer config file |
88 |
# |
# |
89 |
|
|
90 |
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; |
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'"); |
91 |
|
|
92 |
|
# create UTF-8 convertor for import_xml files |
93 |
$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'}); |
$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'}); |
94 |
|
|
95 |
|
# create Template toolkit instance |
96 |
|
$self->{'tt'} = Template->new( |
97 |
|
INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'), |
98 |
|
# FILTERS => { |
99 |
|
# 'foo' => \&foo_filter, |
100 |
|
# }, |
101 |
|
EVAL_PERL => 1, |
102 |
|
); |
103 |
|
|
104 |
return $self; |
return $self; |
105 |
} |
} |
106 |
|
|
139 |
my $self = shift; |
my $self = shift; |
140 |
my $arg = {@_}; |
my $arg = {@_}; |
141 |
|
|
142 |
croak "need filename" if (! $arg->{'filename'}); |
my $log = $self->_get_logger(); |
143 |
|
|
144 |
|
$log->logcroak("need filename") if (! $arg->{'filename'}); |
145 |
my $code_page = $arg->{'code_page'} || '852'; |
my $code_page = $arg->{'code_page'} || '852'; |
146 |
|
|
147 |
use OpenIsis; |
use OpenIsis; |
151 |
# create Text::Iconv object |
# create Text::Iconv object |
152 |
my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); |
my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); |
153 |
|
|
154 |
print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'}); |
$log->info("reading ISIS database '",$arg->{'filename'},"'"); |
155 |
|
|
156 |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
157 |
|
|
159 |
|
|
160 |
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); |
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); |
161 |
|
|
162 |
print STDERR "processing $maxmfn records...\n" if ($self->{'debug'}); |
$log->info("processing $maxmfn records..."); |
163 |
|
|
164 |
# read database |
# read database |
165 |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
211 |
sub fetch_rec { |
sub fetch_rec { |
212 |
my $self = shift; |
my $self = shift; |
213 |
|
|
214 |
my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!"; |
my $log = $self->_get_logger(); |
215 |
|
|
216 |
|
my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!"); |
217 |
|
|
218 |
if ($mfn > $self->{'max_mfn'}) { |
if ($mfn > $self->{'max_mfn'}) { |
219 |
$self->{'current_mfn'} = $self->{'max_mfn'}; |
$self->{'current_mfn'} = $self->{'max_mfn'}; |
234 |
sub open_import_xml { |
sub open_import_xml { |
235 |
my $self = shift; |
my $self = shift; |
236 |
|
|
237 |
|
my $log = $self->_get_logger(); |
238 |
|
|
239 |
my $arg = {@_}; |
my $arg = {@_}; |
240 |
confess "need type to load file from import_xml/" if (! $arg->{'type'}); |
$log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'}); |
241 |
|
|
242 |
$self->{'type'} = $arg->{'type'}; |
$self->{'type'} = $arg->{'type'}; |
243 |
|
|
246 |
|
|
247 |
$self->{'tag'} = $type2tag{$type_base}; |
$self->{'tag'} = $type2tag{$type_base}; |
248 |
|
|
249 |
print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'}); |
$log->debug("using type '",$self->{'type'},"' tag <",$self->{'tag'},">") if ($self->{'debug'}); |
250 |
|
|
251 |
my $f = "./import_xml/".$self->{'type'}.".xml"; |
my $f = "./import_xml/".$self->{'type'}.".xml"; |
252 |
confess "import_xml file '$f' doesn't exist!" if (! -e "$f"); |
$log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f"); |
253 |
|
|
254 |
print STDERR "reading '$f'\n" if ($self->{'debug'}); |
$log->debug("reading '$f'") if ($self->{'debug'}); |
255 |
|
|
256 |
$self->{'import_xml'} = XMLin($f, |
$self->{'import_xml'} = XMLin($f, |
257 |
ForceArray => [ $self->{'tag'}, 'config', 'format' ], |
ForceArray => [ $self->{'tag'}, 'config', 'format' ], |
258 |
ForceContent => 1 |
ForceContent => 1 |
259 |
); |
); |
260 |
|
|
|
print Dumper($self->{'import_xml'}); |
|
|
|
|
261 |
} |
} |
262 |
|
|
263 |
=head2 create_lookup |
=head2 create_lookup |
273 |
sub create_lookup { |
sub create_lookup { |
274 |
my $self = shift; |
my $self = shift; |
275 |
|
|
276 |
my $rec = shift || confess "need record to create lookup"; |
my $log = $self->_get_logger(); |
277 |
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
|
278 |
|
my $rec = shift || $log->logconfess("need record to create lookup"); |
279 |
|
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
280 |
|
|
281 |
foreach my $i (@_) { |
foreach my $i (@_) { |
282 |
if ($i->{'eval'}) { |
if ($i->{'eval'}) { |
367 |
sub fill_in { |
sub fill_in { |
368 |
my $self = shift; |
my $self = shift; |
369 |
|
|
370 |
my $rec = shift || confess "need data record"; |
my $log = $self->_get_logger(); |
371 |
my $format = shift || confess "need format to parse"; |
|
372 |
|
my $rec = shift || $log->logconfess("need data record"); |
373 |
|
my $format = shift || $log->logconfess("need format to parse"); |
374 |
# iteration (for repeatable fields) |
# iteration (for repeatable fields) |
375 |
my $i = shift || 0; |
my $i = shift || 0; |
376 |
|
|
377 |
# FIXME remove for speedup? |
# FIXME remove for speedup? |
378 |
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
379 |
|
|
380 |
my $found = 0; |
my $found = 0; |
381 |
|
|
389 |
if ($found) { |
if ($found) { |
390 |
if ($eval_code) { |
if ($eval_code) { |
391 |
my $eval = $self->fill_in($rec,$eval_code,$i); |
my $eval = $self->fill_in($rec,$eval_code,$i); |
392 |
return if (! eval $eval); |
return if (! $self->_eval($eval)); |
393 |
} |
} |
394 |
# do we have lookups? |
# do we have lookups? |
395 |
|
$log->debug("test format '$format' for lookups"); |
396 |
if ($format =~ /\[[^\[\]]+\]/o) { |
if ($format =~ /\[[^\[\]]+\]/o) { |
397 |
return $self->lookup($format); |
return $self->lookup($format); |
398 |
} else { |
} else { |
416 |
sub lookup { |
sub lookup { |
417 |
my $self = shift; |
my $self = shift; |
418 |
|
|
419 |
my $tmp = shift || confess "need format"; |
my $log = $self->_get_logger(); |
420 |
|
|
421 |
|
my $tmp = shift || $log->logconfess("need format"); |
422 |
|
|
423 |
if ($tmp =~ /\[[^\[\]]+\]/o) { |
if ($tmp =~ /\[[^\[\]]+\]/o) { |
424 |
my @in = ( $tmp ); |
my @in = ( $tmp ); |
425 |
|
|
426 |
|
$log->debug("lookup for: ",$tmp); |
427 |
|
|
428 |
my @out; |
my @out; |
429 |
while (my $f = shift @in) { |
while (my $f = shift @in) { |
430 |
if ($f =~ /\[([^\[\]]+)\]/) { |
if ($f =~ /\[([^\[\]]+)\]/) { |
465 |
|
|
466 |
return if (! $format_utf8); |
return if (! $format_utf8); |
467 |
|
|
468 |
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
my $log = $self->_get_logger(); |
469 |
confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'}); |
|
470 |
|
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
471 |
|
$log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'}); |
472 |
|
|
473 |
$i = 0 if (! $i); |
$i = 0 if (! $i); |
474 |
|
|
475 |
my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'}); |
my $format = $self->{'utf2cp'}->convert($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'}); |
476 |
|
|
477 |
my @out; |
my @out; |
478 |
|
|
507 |
|
|
508 |
if ($eval_code) { |
if ($eval_code) { |
509 |
my $eval = $self->fill_in($rec,$eval_code,$i); |
my $eval = $self->fill_in($rec,$eval_code,$i); |
510 |
return if (! eval $eval); |
$log->debug("about to eval ",$eval," [$out]"); |
511 |
|
return if (! $self->_eval($eval)); |
512 |
} |
} |
513 |
|
|
514 |
return $out; |
return $out; |
527 |
|
|
528 |
my ($rec, $format_utf8) = @_; |
my ($rec, $format_utf8) = @_; |
529 |
|
|
530 |
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
my $log = $self->_get_logger(); |
531 |
|
|
532 |
|
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
533 |
return if (! $format_utf8); |
return if (! $format_utf8); |
534 |
|
|
535 |
my $i = 0; |
my $i = 0; |
547 |
Create in-memory data structure which represents layout from C<import_xml>. |
Create in-memory data structure which represents layout from C<import_xml>. |
548 |
It is used later to produce output. |
It is used later to produce output. |
549 |
|
|
550 |
my $ds = $webpac->data_structure($rec); |
my @ds = $webpac->data_structure($rec); |
551 |
|
|
552 |
=cut |
=cut |
553 |
|
|
|
# private method _sort_by_order |
|
|
# sort subrouting using order="" attribute |
|
|
sub _sort_by_order { |
|
|
my $self = shift; |
|
|
|
|
|
my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} || |
|
|
$self->{'import_xml'}->{'indexer'}->{$a}; |
|
|
my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} || |
|
|
$self->{'import_xml'}->{'indexer'}->{$b}; |
|
|
|
|
|
return $va <=> $vb; |
|
|
} |
|
|
|
|
554 |
sub data_structure { |
sub data_structure { |
555 |
my $self = shift; |
my $self = shift; |
556 |
|
|
557 |
|
my $log = $self->_get_logger(); |
558 |
|
|
559 |
my $rec = shift; |
my $rec = shift; |
560 |
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
561 |
|
|
562 |
my @sorted_tags; |
my @sorted_tags; |
563 |
if ($self->{tags_by_order}) { |
if ($self->{tags_by_order}) { |
567 |
$self->{tags_by_order} = \@sorted_tags; |
$self->{tags_by_order} = \@sorted_tags; |
568 |
} |
} |
569 |
|
|
570 |
my $ds; |
my @ds; |
571 |
|
|
572 |
foreach my $field (@sorted_tags) { |
foreach my $field (@sorted_tags) { |
573 |
|
|
589 |
} |
} |
590 |
} |
} |
591 |
|
|
592 |
push @{$ds->{$field}}, $row if ($row); |
if ($row) { |
593 |
|
$row->{'tag'} = $field; |
594 |
|
push @ds, $row; |
595 |
|
} |
596 |
|
|
597 |
|
} |
598 |
|
|
599 |
|
return @ds; |
600 |
|
|
601 |
|
} |
602 |
|
|
603 |
|
=head2 output |
604 |
|
|
605 |
|
Create output from in-memory data structure using Template Toolkit template. |
606 |
|
|
607 |
|
my $text = $webpac->output( template => 'text.tt', data => @ds ); |
608 |
|
|
609 |
|
=cut |
610 |
|
|
611 |
|
sub output { |
612 |
|
my $self = shift; |
613 |
|
|
614 |
|
my $args = {@_}; |
615 |
|
|
616 |
|
my $log = $self->_get_logger(); |
617 |
|
|
618 |
|
$log->logconfess("need template name") if (! $args->{'template'}); |
619 |
|
$log->logconfess("need data array") if (! $args->{'data'}); |
620 |
|
|
621 |
|
my $out; |
622 |
|
|
623 |
|
$self->{'tt'}->process( |
624 |
|
$args->{'template'}, |
625 |
|
$args, |
626 |
|
\$out |
627 |
|
) || confess $self->{'tt'}->error(); |
628 |
|
|
629 |
|
return $out; |
630 |
|
} |
631 |
|
|
632 |
|
# |
633 |
|
# |
634 |
|
# |
635 |
|
|
636 |
|
=head1 INTERNAL METHODS |
637 |
|
|
638 |
|
Here is a quick list of internal methods, mostly useful to turn debugging |
639 |
|
on them (see L<LOGGING> below for explanation). |
640 |
|
|
641 |
|
=cut |
642 |
|
|
643 |
|
=head2 _eval |
644 |
|
|
645 |
|
Internal function to eval code without C<strict 'subs'>. |
646 |
|
|
647 |
|
=cut |
648 |
|
|
649 |
|
sub _eval { |
650 |
|
my $self = shift; |
651 |
|
|
652 |
|
my $code = shift || return; |
653 |
|
|
654 |
|
my $log = $self->_get_logger(); |
655 |
|
|
656 |
|
no strict 'subs'; |
657 |
|
my $ret = eval $code; |
658 |
|
if ($@) { |
659 |
|
$log->error("problem with eval code [$code]: $@"); |
660 |
} |
} |
661 |
|
|
662 |
print "data_structure => ",Dumper($ds); |
$log->debug("eval: ",$code," [",$ret,"]"); |
663 |
|
|
664 |
|
return $ret || 0; |
665 |
|
} |
666 |
|
|
667 |
|
=head2 _sort_by_order |
668 |
|
|
669 |
|
Sort xml tags data structure accoding to C<order=""> attribute. |
670 |
|
|
671 |
|
=cut |
672 |
|
|
673 |
|
sub _sort_by_order { |
674 |
|
my $self = shift; |
675 |
|
|
676 |
|
my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} || |
677 |
|
$self->{'import_xml'}->{'indexer'}->{$a}; |
678 |
|
my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} || |
679 |
|
$self->{'import_xml'}->{'indexer'}->{$b}; |
680 |
|
|
681 |
|
return $va <=> $vb; |
682 |
} |
} |
683 |
|
|
684 |
|
sub _get_logger { |
685 |
|
my $self = shift; |
686 |
|
|
687 |
|
my @c = caller(1); |
688 |
|
return get_logger($c[3]); |
689 |
|
} |
690 |
|
|
691 |
|
# |
692 |
|
# |
693 |
|
# |
694 |
|
|
695 |
|
=head1 LOGGING |
696 |
|
|
697 |
|
Logging in WebPAC is performed by L<Log::Log4perl> with config file |
698 |
|
C<log.conf>. |
699 |
|
|
700 |
|
Methods defined above have different levels of logging, so |
701 |
|
it's descriptions will be useful to turn (mostry B<debug> logging) on |
702 |
|
or off to see why WabPAC isn't perforing as you expect it (it might even |
703 |
|
be a bug!). |
704 |
|
|
705 |
|
B<This is different from normal Log4perl behaviour>. To repeat, you can |
706 |
|
also use method names, and not only classes (which are just few) |
707 |
|
to filter logging. |
708 |
|
|
709 |
|
=cut |
710 |
|
|
711 |
1; |
1; |