--- trunk2/lib/WebPAC.pm 2004/06/17 17:25:12 370
+++ trunk2/lib/WebPAC.pm 2004/09/05 22:22:37 411
@@ -8,9 +8,15 @@
use Config::IniFiles;
use XML::Simple;
use Template;
+use Log::Log4perl qw(get_logger :levels);
use Data::Dumper;
+#my $LOOKUP_REGEX = '\[[^\[\]]+\]';
+#my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
+my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
+my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
+
=head1 NAME
WebPAC - base class for WebPAC
@@ -53,6 +59,11 @@
my $self = {@_};
bless($self, $class);
+ my $log_file = $self->{'log'} || "log.conf";
+ Log::Log4perl->init($log_file);
+
+ my $log = $self->_get_logger();
+
# fill in default values
# output codepage
$self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
@@ -60,8 +71,9 @@
#
# read global.conf
#
+ $log->debug("read 'global.conf'");
- my $config = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
+ my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
# read global config parametars
foreach my $var (qw(
@@ -80,7 +92,7 @@
# read indexer config file
#
- $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
+ $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
# create UTF-8 convertor for import_xml files
$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
@@ -132,7 +144,9 @@
my $self = shift;
my $arg = {@_};
- croak "need filename" if (! $arg->{'filename'});
+ my $log = $self->_get_logger();
+
+ $log->logcroak("need filename") if (! $arg->{'filename'});
my $code_page = $arg->{'code_page'} || '852';
use OpenIsis;
@@ -142,7 +156,7 @@
# create Text::Iconv object
my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
- print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
+ $log->info("reading ISIS database '",$arg->{'filename'},"'");
my $isis_db = OpenIsis::open($arg->{'filename'});
@@ -150,7 +164,7 @@
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
- print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
+ $log->info("processing $maxmfn records...");
# read database
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
@@ -174,6 +188,8 @@
push @{$self->{'data'}->{$mfn}->{$k}}, $val;
}
+ } else {
+ push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn;
}
}
@@ -182,9 +198,12 @@
my $rec = $self->{'data'}->{$mfn};
$self->create_lookup($rec, @{$arg->{'lookup'}});
+ $self->progress_bar($mfn,$maxmfn);
+
}
$self->{'current_mfn'} = 1;
+ $self->{'last_pcnt'} = 0;
# store max mfn and return it.
return $self->{'max_mfn'} = $maxmfn;
@@ -202,16 +221,55 @@
sub fetch_rec {
my $self = shift;
- my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
+ my $log = $self->_get_logger();
+
+ my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!");
if ($mfn > $self->{'max_mfn'}) {
$self->{'current_mfn'} = $self->{'max_mfn'};
+ $log->debug("at EOF");
return;
}
+ $self->progress_bar($mfn,$self->{'max_mfn'});
+
return $self->{'data'}->{$mfn};
}
+=head2 progress_bar
+
+Draw progress bar on STDERR.
+
+ $webpac->progress_bar($current, $max);
+
+=cut
+
+sub progress_bar {
+ my $self = shift;
+
+ my ($curr,$max) = @_;
+
+ my $log = $self->_get_logger();
+
+ $log->logconfess("no current value!") if (! $curr);
+ $log->logconfess("no maximum value!") if (! $max);
+
+ if ($curr > $max) {
+ $max = $curr;
+ $log->debug("overflow to $curr");
+ }
+
+ $self->{'last_pcnt'} ||= 1;
+
+ $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});
+
+ my $p = int($curr * 100 / $max);
+ if ($p != $self->{'last_pcnt'}) {
+ printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );
+ $self->{'last_pcnt'} = $p;
+ }
+}
+
=head2 open_import_xml
Read file from C directory and parse it.
@@ -223,8 +281,10 @@
sub open_import_xml {
my $self = shift;
+ my $log = $self->_get_logger();
+
my $arg = {@_};
- confess "need type to load file from import_xml/" if (! $arg->{'type'});
+ $log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
$self->{'type'} = $arg->{'type'};
@@ -233,18 +293,21 @@
$self->{'tag'} = $type2tag{$type_base};
- print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});
+ $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
my $f = "./import_xml/".$self->{'type'}.".xml";
- confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
+ $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
+
+ $log->info("reading '$f'");
- print STDERR "reading '$f'\n" if ($self->{'debug'});
+ $self->{'import_xml_file'} = $f;
$self->{'import_xml'} = XMLin($f,
ForceArray => [ $self->{'tag'}, 'config', 'format' ],
- ForceContent => 1
);
+ $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
+
}
=head2 create_lookup
@@ -260,8 +323,10 @@
sub create_lookup {
my $self = shift;
- my $rec = shift || confess "need record to create lookup";
- confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+ my $log = $self->_get_logger();
+
+ my $rec = shift || $log->logconfess("need record to create lookup");
+ $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
foreach my $i (@_) {
if ($i->{'eval'}) {
@@ -269,12 +334,14 @@
my $key = $self->fill_in($rec,$i->{'key'});
my @val = $self->fill_in($rec,$i->{'val'});
if ($key && @val && eval $eval) {
+ $log->debug("stored $key = ",sub { join(" | ",@val) });
push @{$self->{'lookup'}->{$key}}, @val;
}
} else {
my $key = $self->fill_in($rec,$i->{'key'});
my @val = $self->fill_in($rec,$i->{'val'});
if ($key && @val) {
+ $log->debug("stored $key = ",sub { join(" | ",@val) });
push @{$self->{'lookup'}->{$key}}, @val;
}
}
@@ -307,6 +374,7 @@
if ($$rec->{$f}) {
return '' if (! $$rec->{$f}->[$i]);
+ no strict 'refs';
if ($sf && $$rec->{$f}->[$i]->{$sf}) {
$$found++ if (defined($$found));
return $$rec->{$f}->[$i]->{$sf};
@@ -347,18 +415,27 @@
This function B perform parsing of format to inteligenty skip
delimiters before fields which aren't used.
+This method will automatically decode UTF-8 string to local code page
+if needed.
+
=cut
sub fill_in {
my $self = shift;
- my $rec = shift || confess "need data record";
- my $format = shift || confess "need format to parse";
+ my $log = $self->_get_logger();
+
+ my $rec = shift || $log->logconfess("need data record");
+ my $format = shift || $log->logconfess("need format to parse");
# iteration (for repeatable fields)
my $i = shift || 0;
# FIXME remove for speedup?
- confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+ $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
+
+ if (utf8::is_utf8($format)) {
+ $format = $self->_x($format);
+ }
my $found = 0;
@@ -367,15 +444,17 @@
$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
# do actual replacement of placeholders
- $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
+ $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
if ($found) {
+ $log->debug("format: $format");
if ($eval_code) {
my $eval = $self->fill_in($rec,$eval_code,$i);
- return if (! eval $eval);
+ return if (! $self->_eval($eval));
}
# do we have lookups?
- if ($format =~ /\[[^\[\]]+\]/o) {
+ if ($format =~ /$LOOKUP_REGEX/o) {
+ $log->debug("format '$format' has lookup");
return $self->lookup($format);
} else {
return $format;
@@ -398,18 +477,23 @@
sub lookup {
my $self = shift;
- my $tmp = shift || confess "need format";
+ my $log = $self->_get_logger();
- if ($tmp =~ /\[[^\[\]]+\]/o) {
+ my $tmp = shift || $log->logconfess("need format");
+
+ if ($tmp =~ /$LOOKUP_REGEX/o) {
my @in = ( $tmp );
+
+ $log->debug("lookup for: ",$tmp);
+
my @out;
while (my $f = shift @in) {
- if ($f =~ /\[([^\[\]]+)\]/) {
+ if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
my $k = $1;
if ($self->{'lookup'}->{$k}) {
foreach my $nv (@{$self->{'lookup'}->{$k}}) {
my $tmp2 = $f;
- $tmp2 =~ s/\[$k\]/$nv/g;
+ $tmp2 =~ s/lookup{$k}/$nv/g;
push @in, $tmp2;
}
} else {
@@ -419,6 +503,7 @@
push @out, $f;
}
}
+ $log->logconfess("return is array and it's not expected!") unless wantarray;
return @out;
} else {
return $tmp;
@@ -442,15 +527,19 @@
return if (! $format_utf8);
- confess("need HASH as first argument!") if ($rec !~ /HASH/o);
- confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
+ my $log = $self->_get_logger();
+
+ $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
+ $log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
$i = 0 if (! $i);
- my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
+ my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
my @out;
+ $log->debug("format: $format");
+
my $eval_code;
# remove eval{...} from beginning
$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
@@ -458,7 +547,7 @@
my $prefix;
my $all_found=0;
- while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
+ while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
my $del = $1 || '';
$prefix ||= $del if ($all_found == 0);
@@ -475,14 +564,22 @@
return if (! $all_found);
- my $out = join('',@out) . $format;
+ my $out = join('',@out);
+
+ if ($out) {
+ # add rest of format (suffix)
+ $out .= $format;
- # add prefix if not there
- $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
+ # add prefix if not there
+ $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
+
+ $log->debug("result: $out");
+ }
if ($eval_code) {
my $eval = $self->fill_in($rec,$eval_code,$i);
- return if (! eval $eval);
+ $log->debug("about to eval{",$eval,"} format: $out");
+ return if (! $self->_eval($eval));
}
return $out;
@@ -501,7 +598,9 @@
my ($rec, $format_utf8) = @_;
- confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+ my $log = $self->_get_logger();
+
+ $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
return if (! $format_utf8);
my $i = 0;
@@ -511,36 +610,67 @@
push @arr, $v;
}
+ $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
+
return @arr;
}
-=head2 data_structure
+=head2 fill_in_to_arr
-Create in-memory data structure which represents layout from C.
-It is used later to produce output.
+Similar to C, but returns array of all repeatable fields. Usable
+for fields which have lookups, so they shouldn't be parsed but rather
+Ced.
- my @ds = $webpac->data_structure($rec);
+ my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
=cut
-# private method _sort_by_order
-# sort subrouting using order="" attribute
-sub _sort_by_order {
+sub fill_in_to_arr {
my $self = shift;
- my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
- $self->{'import_xml'}->{'indexer'}->{$a};
- my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
- $self->{'import_xml'}->{'indexer'}->{$b};
+ my ($rec, $format_utf8) = @_;
- return $va <=> $vb;
+ my $log = $self->_get_logger();
+
+ $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
+ return if (! $format_utf8);
+
+ my $i = 0;
+ my @arr;
+
+ while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
+ push @arr, @v;
+ }
+
+ $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
+
+ return @arr;
}
+
+=head2 data_structure
+
+Create in-memory data structure which represents layout from C.
+It is used later to produce output.
+
+ my @ds = $webpac->data_structure($rec);
+
+This method will also set C<$webpac->{'currnet_filename'}> if there is
+ tag in C and C<$webpac->{'headline'}> if there is
+ tag.
+
+=cut
+
sub data_structure {
my $self = shift;
+ my $log = $self->_get_logger();
+
my $rec = shift;
- confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+ $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
+
+ undef $self->{'currnet_filename'};
+ undef $self->{'headline'};
my @sorted_tags;
if ($self->{tags_by_order}) {
@@ -552,6 +682,8 @@
my @ds;
+ $log->debug("tags: ",sub { join(", ",@sorted_tags) });
+
foreach my $field (@sorted_tags) {
my $row;
@@ -559,10 +691,32 @@
#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
- my @v = $self->parse_to_arr($rec,$tag->{'content'});
+ my $format = $tag->{'value'} || $tag->{'content'};
+
+ $log->debug("format: $format");
+ my @v;
+ if ($format =~ /$LOOKUP_REGEX/o) {
+ @v = $self->fill_in_to_arr($rec,$format);
+ } else {
+ @v = $self->parse_to_arr($rec,$format);
+ }
next if (! @v);
+ # use format?
+ if ($tag->{'format_name'}) {
+ @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
+ }
+
+ if ($field eq 'filename') {
+ $self->{'current_filename'} = join('',@v);
+ $log->debug("filename: ",$self->{'current_filename'});
+ } elsif ($field eq 'headline') {
+ $self->{'headline'} .= join('',@v);
+ $log->debug("headline: ",$self->{'headline'});
+ next; # don't return headline in data_structure!
+ }
+
# does tag have type?
if ($tag->{'type'}) {
push @{$row->{$tag->{'type'}}}, @v;
@@ -570,11 +724,20 @@
push @{$row->{'display'}}, @v;
push @{$row->{'swish'}}, @v;
}
+
+
}
if ($row) {
$row->{'tag'} = $field;
+
+ # TODO: name_sigular, name_plural
+ my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
+ $row->{'name'} = $name ? $self->_x($name) : $field;
+
push @ds, $row;
+
+ $log->debug("row $field: ",sub { Dumper($row) });
}
}
@@ -596,8 +759,10 @@
my $args = {@_};
- confess("need template name") if (! $args->{'template'});
- confess("need data array") if (! $args->{'data'});
+ my $log = $self->_get_logger();
+
+ $log->logconfess("need template name") if (! $args->{'template'});
+ $log->logconfess("need data array") if (! $args->{'data'});
my $out;
@@ -610,4 +775,181 @@
return $out;
}
+=head2 output_file
+
+Create output from in-memory data structure using Template Toolkit template
+to a file.
+
+ $webpac->output_file(
+ file => 'out.txt',
+ template => 'text.tt',
+ data => @ds
+ );
+
+=cut
+
+sub output_file {
+ my $self = shift;
+
+ my $args = {@_};
+
+ my $log = $self->_get_logger();
+
+ $log->logconfess("need file name") if (! $args->{'file'});
+
+ $log->debug("creating file ",$args->{'file'});
+
+ open(my $fh, ">", $args->{'file'}) || $log->logdie("can't open output file '$self->{'file'}': $!");
+ print $fh $self->output(
+ template => $args->{'template'},
+ data => $args->{'data'},
+ ) || $log->logdie("print: $!");
+ close($fh) || $log->logdie("close: $!");
+}
+
+=head2 apply_format
+
+Apply format specified in tag with C and
+C.
+
+ my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
+
+Formats can contain C if you need them.
+
+=cut
+
+sub apply_format {
+ my $self = shift;
+
+ my ($name,$delimiter,$data) = @_;
+
+ my $log = $self->_get_logger();
+
+ if (! $self->{'import_xml'}->{'format'}->{$name}) {
+ $log->warn(" is not defined in ",$self->{'import_xml_file'});
+ return $data;
+ }
+
+ $log->warn("no delimiter for format $name") if (! $delimiter);
+
+ my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
+
+ my @data = split(/\Q$delimiter\E/, $data);
+
+ my $out = sprintf($format, @data);
+ $log->debug("using format $name [$format] on $data to produce: $out");
+
+ if ($out =~ m/$LOOKUP_REGEX/o) {
+ return $self->lookup($out);
+ } else {
+ return $out;
+ }
+
+}
+
+
+#
+#
+#
+
+=head1 INTERNAL METHODS
+
+Here is a quick list of internal methods, mostly useful to turn debugging
+on them (see L below for explanation).
+
+=cut
+
+=head2 _eval
+
+Internal function to eval code without C.
+
+=cut
+
+sub _eval {
+ my $self = shift;
+
+ my $code = shift || return;
+
+ my $log = $self->_get_logger();
+
+ no strict 'subs';
+ my $ret = eval $code;
+ if ($@) {
+ $log->error("problem with eval code [$code]: $@");
+ }
+
+ $log->debug("eval: ",$code," [",$ret,"]");
+
+ return $ret || 0;
+}
+
+=head2 _sort_by_order
+
+Sort xml tags data structure accoding to C attribute.
+
+=cut
+
+sub _sort_by_order {
+ my $self = shift;
+
+ my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
+ $self->{'import_xml'}->{'indexer'}->{$a};
+ my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
+ $self->{'import_xml'}->{'indexer'}->{$b};
+
+ return $va <=> $vb;
+}
+
+=head2 _get_logger
+
+Get C object with a twist: domains are defined for each
+method
+
+ my $log = $webpac->_get_logger();
+
+=cut
+
+sub _get_logger {
+ my $self = shift;
+
+ my $name = (caller(1))[3] || caller;
+ return get_logger($name);
+}
+
+=head2 _x
+
+Convert string from UTF-8 to code page defined in C.
+
+ my $text = $webpac->_x('utf8 text');
+
+=cut
+
+sub _x {
+ my $self = shift;
+ my $utf8 = shift || return;
+
+ return $self->{'utf2cp'}->convert($utf8) ||
+ $self->_get_logger()->logwarn("can't convert '$utf8'");
+}
+
+#
+#
+#
+
+=head1 LOGGING
+
+Logging in WebPAC is performed by L with config file
+C.
+
+Methods defined above have different levels of logging, so
+it's descriptions will be useful to turn (mostry B logging) on
+or off to see why WabPAC isn't perforing as you expect it (it might even
+be a bug!).
+
+B. To repeat, you can
+also use method names, and not only classes (which are just few)
+to filter logging.
+
+=cut
+
1;