--- trunk2/lib/WebPAC.pm 2004/06/20 16:57:52 374
+++ trunk2/lib/WebPAC.pm 2004/09/09 18:08:38 418
@@ -169,6 +169,9 @@
# read database
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
+
+ $log->debug("mfn: $mfn\n");
+
# read record
my $row = OpenIsis::read( $isis_db, $mfn );
foreach my $k (keys %{$row}) {
@@ -195,12 +198,17 @@
}
# create lookup
- my $rec = $self->{'data'}->{$mfn};
+ my $rec = $self->{'data'}->{$mfn} || $log->confess("record $mfn empty?");
$self->create_lookup($rec, @{$arg->{'lookup'}});
+ $self->progress_bar($mfn,$maxmfn);
+
}
$self->{'current_mfn'} = 1;
+ $self->{'last_pcnt'} = 0;
+
+ $log->debug("max mfn: $maxmfn");
# store max mfn and return it.
return $self->{'max_mfn'} = $maxmfn;
@@ -228,9 +236,46 @@
return;
}
+ $self->progress_bar($mfn,$self->{'max_mfn'});
+
return $self->{'data'}->{$mfn};
}
+=head2 progress_bar
+
+Draw progress bar on STDERR.
+
+ $webpac->progress_bar($current, $max);
+
+=cut
+
+sub progress_bar {
+ my $self = shift;
+
+ my ($curr,$max) = @_;
+
+ my $log = $self->_get_logger();
+
+ $log->logconfess("no current value!") if (! $curr);
+ $log->logconfess("no maximum value!") if (! $max);
+
+ if ($curr > $max) {
+ $max = $curr;
+ $log->debug("overflow to $curr");
+ }
+
+ $self->{'last_pcnt'} ||= 1;
+
+ $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});
+
+ my $p = int($curr * 100 / $max);
+ if ($p != $self->{'last_pcnt'}) {
+ printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );
+ $self->{'last_pcnt'} = $p;
+ }
+ print STDERR "\n" if ($p == 100);
+}
+
=head2 open_import_xml
Read file from C directory and parse it.
@@ -254,17 +299,21 @@
$self->{'tag'} = $type2tag{$type_base};
- $log->debug("using type '",$self->{'type'},"' tag <",$self->{'tag'},">") if ($self->{'debug'});
+ $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
my $f = "./import_xml/".$self->{'type'}.".xml";
$log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
- $log->debug("reading '$f'") if ($self->{'debug'});
+ $log->info("reading '$f'");
+
+ $self->{'import_xml_file'} = $f;
$self->{'import_xml'} = XMLin($f,
ForceArray => [ $self->{'tag'}, 'config', 'format' ],
);
+ $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
+
}
=head2 create_lookup
@@ -286,21 +335,23 @@
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
foreach my $i (@_) {
- if ($i->{'eval'}) {
- my $eval = $self->fill_in($rec,$i->{'eval'});
- my $key = $self->fill_in($rec,$i->{'key'});
- my @val = $self->fill_in($rec,$i->{'val'});
- if ($key && @val && eval $eval) {
+ $log->logconfess("need key") unless defined($i->{'key'});
+ $log->logconfess("need val") unless defined($i->{'val'});
+
+ if (defined($i->{'eval'})) {
+ # eval first, so we can skip fill_in for key and val
+ my $eval = $self->fill_in($rec,$i->{'eval'}) || next;
+ if ($self->_eval($eval)) {
+ my $key = $self->fill_in($rec,$i->{'key'}) || next;
+ my @val = $self->fill_in($rec,$i->{'val'}) || next;
$log->debug("stored $key = ",sub { join(" | ",@val) });
push @{$self->{'lookup'}->{$key}}, @val;
}
} else {
- my $key = $self->fill_in($rec,$i->{'key'});
- my @val = $self->fill_in($rec,$i->{'val'});
- if ($key && @val) {
- $log->debug("stored $key = ",sub { join(" | ",@val) });
- push @{$self->{'lookup'}->{$key}}, @val;
- }
+ my $key = $self->fill_in($rec,$i->{'key'}) || next;
+ my @val = $self->fill_in($rec,$i->{'val'}) || next;
+ $log->debug("stored $key = ",sub { join(" | ",@val) });
+ push @{$self->{'lookup'}->{$key}}, @val;
}
}
}
@@ -331,6 +382,7 @@
if ($$rec->{$f}) {
return '' if (! $$rec->{$f}->[$i]);
+ no strict 'refs';
if ($sf && $$rec->{$f}->[$i]->{$sf}) {
$$found++ if (defined($$found));
return $$rec->{$f}->[$i]->{$sf};
@@ -371,6 +423,9 @@
This function B perform parsing of format to inteligenty skip
delimiters before fields which aren't used.
+This method will automatically decode UTF-8 string to local code page
+if needed.
+
=cut
sub fill_in {
@@ -386,6 +441,10 @@
# FIXME remove for speedup?
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
+ if (utf8::is_utf8($format)) {
+ $format = $self->_x($format);
+ }
+
my $found = 0;
my $eval_code;
@@ -483,7 +542,7 @@
$i = 0 if (! $i);
- my $format = $self->{'utf2cp'}->convert($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
+ my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
my @out;
@@ -605,7 +664,8 @@
my @ds = $webpac->data_structure($rec);
This method will also set C<$webpac->{'currnet_filename'}> if there is
- tag in C.
+ tag in C and C<$webpac->{'headline'}> if there is
+ tag.
=cut
@@ -618,6 +678,7 @@
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
undef $self->{'currnet_filename'};
+ undef $self->{'headline'};
my @sorted_tags;
if ($self->{tags_by_order}) {
@@ -650,6 +711,20 @@
}
next if (! @v);
+ # use format?
+ if ($tag->{'format_name'}) {
+ @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
+ }
+
+ if ($field eq 'filename') {
+ $self->{'current_filename'} = join('',@v);
+ $log->debug("filename: ",$self->{'current_filename'});
+ } elsif ($field eq 'headline') {
+ $self->{'headline'} .= join('',@v);
+ $log->debug("headline: ",$self->{'headline'});
+ next; # don't return headline in data_structure!
+ }
+
# does tag have type?
if ($tag->{'type'}) {
push @{$row->{$tag->{'type'}}}, @v;
@@ -658,15 +733,16 @@
push @{$row->{'swish'}}, @v;
}
- if ($field eq 'filename') {
- $self->{'current_filename'} = join('',@v);
- $log->debug("filename: ",$self->{'current_filename'});
- }
}
if ($row) {
$row->{'tag'} = $field;
+
+ # TODO: name_sigular, name_plural
+ my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
+ $row->{'name'} = $name ? $self->_x($name) : $field;
+
push @ds, $row;
$log->debug("row $field: ",sub { Dumper($row) });
@@ -707,6 +783,79 @@
return $out;
}
+=head2 output_file
+
+Create output from in-memory data structure using Template Toolkit template
+to a file.
+
+ $webpac->output_file(
+ file => 'out.txt',
+ template => 'text.tt',
+ data => @ds
+ );
+
+=cut
+
+sub output_file {
+ my $self = shift;
+
+ my $args = {@_};
+
+ my $log = $self->_get_logger();
+
+ $log->logconfess("need file name") if (! $args->{'file'});
+
+ $log->debug("creating file ",$args->{'file'});
+
+ open(my $fh, ">", $args->{'file'}) || $log->logdie("can't open output file '$self->{'file'}': $!");
+ print $fh $self->output(
+ template => $args->{'template'},
+ data => $args->{'data'},
+ ) || $log->logdie("print: $!");
+ close($fh) || $log->logdie("close: $!");
+}
+
+=head2 apply_format
+
+Apply format specified in tag with C and
+C.
+
+ my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
+
+Formats can contain C if you need them.
+
+=cut
+
+sub apply_format {
+ my $self = shift;
+
+ my ($name,$delimiter,$data) = @_;
+
+ my $log = $self->_get_logger();
+
+ if (! $self->{'import_xml'}->{'format'}->{$name}) {
+ $log->warn(" is not defined in ",$self->{'import_xml_file'});
+ return $data;
+ }
+
+ $log->warn("no delimiter for format $name") if (! $delimiter);
+
+ my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
+
+ my @data = split(/\Q$delimiter\E/, $data);
+
+ my $out = sprintf($format, @data);
+ $log->debug("using format $name [$format] on $data to produce: $out");
+
+ if ($out =~ m/$LOOKUP_REGEX/o) {
+ return $self->lookup($out);
+ } else {
+ return $out;
+ }
+
+}
+
+
#
#
#
@@ -759,6 +908,15 @@
return $va <=> $vb;
}
+=head2 _get_logger
+
+Get C object with a twist: domains are defined for each
+method
+
+ my $log = $webpac->_get_logger();
+
+=cut
+
sub _get_logger {
my $self = shift;
@@ -766,6 +924,22 @@
return get_logger($name);
}
+=head2 _x
+
+Convert string from UTF-8 to code page defined in C.
+
+ my $text = $webpac->_x('utf8 text');
+
+=cut
+
+sub _x {
+ my $self = shift;
+ my $utf8 = shift || return;
+
+ return $self->{'utf2cp'}->convert($utf8) ||
+ $self->_get_logger()->logwarn("can't convert '$utf8'");
+}
+
#
#
#