8 |
use Config::IniFiles; |
use Config::IniFiles; |
9 |
use XML::Simple; |
use XML::Simple; |
10 |
use Template; |
use Template; |
11 |
|
use Log::Log4perl qw(get_logger :levels); |
12 |
|
|
13 |
use Data::Dumper; |
use Data::Dumper; |
14 |
|
|
54 |
my $self = {@_}; |
my $self = {@_}; |
55 |
bless($self, $class); |
bless($self, $class); |
56 |
|
|
57 |
|
my $log_file = $self->{'log'} || "log.conf"; |
58 |
|
Log::Log4perl->init($log_file); |
59 |
|
|
60 |
|
my $log = $self->_get_logger(); |
61 |
|
|
62 |
# fill in default values |
# fill in default values |
63 |
# output codepage |
# output codepage |
64 |
$self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'}); |
$self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'}); |
66 |
# |
# |
67 |
# read global.conf |
# read global.conf |
68 |
# |
# |
69 |
|
$log->debug("read 'global.conf'"); |
70 |
|
|
71 |
my $config = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'"; |
my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'"); |
72 |
|
|
73 |
# read global config parametars |
# read global config parametars |
74 |
foreach my $var (qw( |
foreach my $var (qw( |
87 |
# read indexer config file |
# read indexer config file |
88 |
# |
# |
89 |
|
|
90 |
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; |
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'"); |
91 |
|
|
92 |
# create UTF-8 convertor for import_xml files |
# create UTF-8 convertor for import_xml files |
93 |
$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'}); |
$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'}); |
139 |
my $self = shift; |
my $self = shift; |
140 |
my $arg = {@_}; |
my $arg = {@_}; |
141 |
|
|
142 |
croak "need filename" if (! $arg->{'filename'}); |
my $log = $self->_get_logger(); |
143 |
|
|
144 |
|
$log->logcroak("need filename") if (! $arg->{'filename'}); |
145 |
my $code_page = $arg->{'code_page'} || '852'; |
my $code_page = $arg->{'code_page'} || '852'; |
146 |
|
|
147 |
use OpenIsis; |
use OpenIsis; |
151 |
# create Text::Iconv object |
# create Text::Iconv object |
152 |
my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); |
my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); |
153 |
|
|
154 |
print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'}); |
$log->info("reading ISIS database '",$arg->{'filename'},"'"); |
155 |
|
|
156 |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
157 |
|
|
159 |
|
|
160 |
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); |
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); |
161 |
|
|
162 |
print STDERR "processing $maxmfn records...\n" if ($self->{'debug'}); |
$log->info("processing $maxmfn records..."); |
163 |
|
|
164 |
# read database |
# read database |
165 |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
211 |
sub fetch_rec { |
sub fetch_rec { |
212 |
my $self = shift; |
my $self = shift; |
213 |
|
|
214 |
my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!"; |
my $log = $self->_get_logger(); |
215 |
|
|
216 |
|
my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!"); |
217 |
|
|
218 |
if ($mfn > $self->{'max_mfn'}) { |
if ($mfn > $self->{'max_mfn'}) { |
219 |
$self->{'current_mfn'} = $self->{'max_mfn'}; |
$self->{'current_mfn'} = $self->{'max_mfn'}; |
234 |
sub open_import_xml { |
sub open_import_xml { |
235 |
my $self = shift; |
my $self = shift; |
236 |
|
|
237 |
|
my $log = $self->_get_logger(); |
238 |
|
|
239 |
my $arg = {@_}; |
my $arg = {@_}; |
240 |
confess "need type to load file from import_xml/" if (! $arg->{'type'}); |
$log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'}); |
241 |
|
|
242 |
$self->{'type'} = $arg->{'type'}; |
$self->{'type'} = $arg->{'type'}; |
243 |
|
|
246 |
|
|
247 |
$self->{'tag'} = $type2tag{$type_base}; |
$self->{'tag'} = $type2tag{$type_base}; |
248 |
|
|
249 |
print STDERR "using type '",$self->{'type'},"' tag <",$self->{'tag'},">\n" if ($self->{'debug'}); |
$log->debug("using type '",$self->{'type'},"' tag <",$self->{'tag'},">") if ($self->{'debug'}); |
250 |
|
|
251 |
my $f = "./import_xml/".$self->{'type'}.".xml"; |
my $f = "./import_xml/".$self->{'type'}.".xml"; |
252 |
confess "import_xml file '$f' doesn't exist!" if (! -e "$f"); |
$log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f"); |
253 |
|
|
254 |
print STDERR "reading '$f'\n" if ($self->{'debug'}); |
$log->debug("reading '$f'") if ($self->{'debug'}); |
255 |
|
|
256 |
$self->{'import_xml'} = XMLin($f, |
$self->{'import_xml'} = XMLin($f, |
257 |
ForceArray => [ $self->{'tag'}, 'config', 'format' ], |
ForceArray => [ $self->{'tag'}, 'config', 'format' ], |
273 |
sub create_lookup { |
sub create_lookup { |
274 |
my $self = shift; |
my $self = shift; |
275 |
|
|
276 |
my $rec = shift || confess "need record to create lookup"; |
my $log = $self->_get_logger(); |
277 |
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
|
278 |
|
my $rec = shift || $log->logconfess("need record to create lookup"); |
279 |
|
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
280 |
|
|
281 |
foreach my $i (@_) { |
foreach my $i (@_) { |
282 |
if ($i->{'eval'}) { |
if ($i->{'eval'}) { |
364 |
|
|
365 |
=cut |
=cut |
366 |
|
|
|
# internal function to eval code |
|
|
sub _eval { |
|
|
my $self = shift; |
|
|
|
|
|
my $code = shift || return; |
|
|
no strict 'subs'; |
|
|
my $ret = eval $code; |
|
|
if ($@) { |
|
|
print STDERR "problem with eval code [$code]: $@\n"; |
|
|
} |
|
|
return $ret; |
|
|
} |
|
|
|
|
367 |
sub fill_in { |
sub fill_in { |
368 |
my $self = shift; |
my $self = shift; |
369 |
|
|
370 |
my $rec = shift || confess "need data record"; |
my $log = $self->_get_logger(); |
371 |
my $format = shift || confess "need format to parse"; |
|
372 |
|
my $rec = shift || $log->logconfess("need data record"); |
373 |
|
my $format = shift || $log->logconfess("need format to parse"); |
374 |
# iteration (for repeatable fields) |
# iteration (for repeatable fields) |
375 |
my $i = shift || 0; |
my $i = shift || 0; |
376 |
|
|
377 |
# FIXME remove for speedup? |
# FIXME remove for speedup? |
378 |
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
379 |
|
|
380 |
my $found = 0; |
my $found = 0; |
381 |
|
|
392 |
return if (! $self->_eval($eval)); |
return if (! $self->_eval($eval)); |
393 |
} |
} |
394 |
# do we have lookups? |
# do we have lookups? |
395 |
|
$log->debug("test format '$format' for lookups"); |
396 |
if ($format =~ /\[[^\[\]]+\]/o) { |
if ($format =~ /\[[^\[\]]+\]/o) { |
|
print "## probable lookup: $format\n"; |
|
397 |
return $self->lookup($format); |
return $self->lookup($format); |
398 |
} else { |
} else { |
399 |
return $format; |
return $format; |
416 |
sub lookup { |
sub lookup { |
417 |
my $self = shift; |
my $self = shift; |
418 |
|
|
419 |
my $tmp = shift || confess "need format"; |
my $log = $self->_get_logger(); |
420 |
|
|
421 |
|
my $tmp = shift || $log->logconfess("need format"); |
422 |
|
|
423 |
if ($tmp =~ /\[[^\[\]]+\]/o) { |
if ($tmp =~ /\[[^\[\]]+\]/o) { |
424 |
my @in = ( $tmp ); |
my @in = ( $tmp ); |
425 |
print "## lookup $tmp\n"; |
|
426 |
|
$log->debug("lookup for: ",$tmp); |
427 |
|
|
428 |
my @out; |
my @out; |
429 |
while (my $f = shift @in) { |
while (my $f = shift @in) { |
430 |
if ($f =~ /\[([^\[\]]+)\]/) { |
if ($f =~ /\[([^\[\]]+)\]/) { |
465 |
|
|
466 |
return if (! $format_utf8); |
return if (! $format_utf8); |
467 |
|
|
468 |
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
my $log = $self->_get_logger(); |
469 |
confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'}); |
|
470 |
|
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
471 |
|
$log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'}); |
472 |
|
|
473 |
$i = 0 if (! $i); |
$i = 0 if (! $i); |
474 |
|
|
475 |
my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'}); |
my $format = $self->{'utf2cp'}->convert($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'}); |
476 |
|
|
477 |
my @out; |
my @out; |
478 |
|
|
507 |
|
|
508 |
if ($eval_code) { |
if ($eval_code) { |
509 |
my $eval = $self->fill_in($rec,$eval_code,$i); |
my $eval = $self->fill_in($rec,$eval_code,$i); |
510 |
|
$log->debug("about to eval ",$eval," [$out]"); |
511 |
return if (! $self->_eval($eval)); |
return if (! $self->_eval($eval)); |
512 |
} |
} |
513 |
|
|
527 |
|
|
528 |
my ($rec, $format_utf8) = @_; |
my ($rec, $format_utf8) = @_; |
529 |
|
|
530 |
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
my $log = $self->_get_logger(); |
531 |
|
|
532 |
|
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
533 |
return if (! $format_utf8); |
return if (! $format_utf8); |
534 |
|
|
535 |
my $i = 0; |
my $i = 0; |
551 |
|
|
552 |
=cut |
=cut |
553 |
|
|
|
# private method _sort_by_order |
|
|
# sort subrouting using order="" attribute |
|
|
sub _sort_by_order { |
|
|
my $self = shift; |
|
|
|
|
|
my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} || |
|
|
$self->{'import_xml'}->{'indexer'}->{$a}; |
|
|
my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} || |
|
|
$self->{'import_xml'}->{'indexer'}->{$b}; |
|
|
|
|
|
return $va <=> $vb; |
|
|
} |
|
|
|
|
554 |
sub data_structure { |
sub data_structure { |
555 |
my $self = shift; |
my $self = shift; |
556 |
|
|
557 |
|
my $log = $self->_get_logger(); |
558 |
|
|
559 |
my $rec = shift; |
my $rec = shift; |
560 |
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
561 |
|
|
562 |
my @sorted_tags; |
my @sorted_tags; |
563 |
if ($self->{tags_by_order}) { |
if ($self->{tags_by_order}) { |
613 |
|
|
614 |
my $args = {@_}; |
my $args = {@_}; |
615 |
|
|
616 |
confess("need template name") if (! $args->{'template'}); |
my $log = $self->_get_logger(); |
617 |
confess("need data array") if (! $args->{'data'}); |
|
618 |
|
$log->logconfess("need template name") if (! $args->{'template'}); |
619 |
|
$log->logconfess("need data array") if (! $args->{'data'}); |
620 |
|
|
621 |
my $out; |
my $out; |
622 |
|
|
629 |
return $out; |
return $out; |
630 |
} |
} |
631 |
|
|
632 |
|
# |
633 |
|
# |
634 |
|
# |
635 |
|
|
636 |
|
=head1 INTERNAL METHODS |
637 |
|
|
638 |
|
Here is a quick list of internal methods, mostly useful to turn debugging |
639 |
|
on them (see L<LOGGING> below for explanation). |
640 |
|
|
641 |
|
=cut |
642 |
|
|
643 |
|
=head2 _eval |
644 |
|
|
645 |
|
Internal function to eval code without C<strict 'subs'>. |
646 |
|
|
647 |
|
=cut |
648 |
|
|
649 |
|
sub _eval { |
650 |
|
my $self = shift; |
651 |
|
|
652 |
|
my $code = shift || return; |
653 |
|
|
654 |
|
my $log = $self->_get_logger(); |
655 |
|
|
656 |
|
no strict 'subs'; |
657 |
|
my $ret = eval $code; |
658 |
|
if ($@) { |
659 |
|
$log->error("problem with eval code [$code]: $@"); |
660 |
|
} |
661 |
|
|
662 |
|
$log->debug("eval: ",$code," [",$ret,"]"); |
663 |
|
|
664 |
|
return $ret || 0; |
665 |
|
} |
666 |
|
|
667 |
|
=head2 _sort_by_order |
668 |
|
|
669 |
|
Sort xml tags data structure accoding to C<order=""> attribute. |
670 |
|
|
671 |
|
=cut |
672 |
|
|
673 |
|
sub _sort_by_order { |
674 |
|
my $self = shift; |
675 |
|
|
676 |
|
my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} || |
677 |
|
$self->{'import_xml'}->{'indexer'}->{$a}; |
678 |
|
my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} || |
679 |
|
$self->{'import_xml'}->{'indexer'}->{$b}; |
680 |
|
|
681 |
|
return $va <=> $vb; |
682 |
|
} |
683 |
|
|
684 |
|
sub _get_logger { |
685 |
|
my $self = shift; |
686 |
|
|
687 |
|
my @c = caller(1); |
688 |
|
return get_logger($c[3]); |
689 |
|
} |
690 |
|
|
691 |
|
# |
692 |
|
# |
693 |
|
# |
694 |
|
|
695 |
|
=head1 LOGGING |
696 |
|
|
697 |
|
Logging in WebPAC is performed by L<Log::Log4perl> with config file |
698 |
|
C<log.conf>. |
699 |
|
|
700 |
|
Methods defined above have different levels of logging, so |
701 |
|
it's descriptions will be useful to turn (mostry B<debug> logging) on |
702 |
|
or off to see why WabPAC isn't perforing as you expect it (it might even |
703 |
|
be a bug!). |
704 |
|
|
705 |
|
B<This is different from normal Log4perl behaviour>. To repeat, you can |
706 |
|
also use method names, and not only classes (which are just few) |
707 |
|
to filter logging. |
708 |
|
|
709 |
|
=cut |
710 |
|
|
711 |
1; |
1; |