--- trunk2/lib/WebPAC.pm 2004/06/20 15:49:09 373 +++ trunk2/lib/WebPAC.pm 2004/09/09 18:08:38 418 @@ -169,6 +169,9 @@ # read database for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { + + $log->debug("mfn: $mfn\n"); + # read record my $row = OpenIsis::read( $isis_db, $mfn ); foreach my $k (keys %{$row}) { @@ -188,17 +191,24 @@ push @{$self->{'data'}->{$mfn}->{$k}}, $val; } + } else { + push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn; } } # create lookup - my $rec = $self->{'data'}->{$mfn}; + my $rec = $self->{'data'}->{$mfn} || $log->confess("record $mfn empty?"); $self->create_lookup($rec, @{$arg->{'lookup'}}); + $self->progress_bar($mfn,$maxmfn); + } $self->{'current_mfn'} = 1; + $self->{'last_pcnt'} = 0; + + $log->debug("max mfn: $maxmfn"); # store max mfn and return it. return $self->{'max_mfn'} = $maxmfn; @@ -226,9 +236,46 @@ return; } + $self->progress_bar($mfn,$self->{'max_mfn'}); + return $self->{'data'}->{$mfn}; } +=head2 progress_bar + +Draw progress bar on STDERR. + + $webpac->progress_bar($current, $max); + +=cut + +sub progress_bar { + my $self = shift; + + my ($curr,$max) = @_; + + my $log = $self->_get_logger(); + + $log->logconfess("no current value!") if (! $curr); + $log->logconfess("no maximum value!") if (! $max); + + if ($curr > $max) { + $max = $curr; + $log->debug("overflow to $curr"); + } + + $self->{'last_pcnt'} ||= 1; + + $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'}); + + my $p = int($curr * 100 / $max); + if ($p != $self->{'last_pcnt'}) { + printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p ); + $self->{'last_pcnt'} = $p; + } + print STDERR "\n" if ($p == 100); +} + =head2 open_import_xml Read file from C directory and parse it. @@ -252,17 +299,21 @@ $self->{'tag'} = $type2tag{$type_base}; - $log->debug("using type '",$self->{'type'},"' tag <",$self->{'tag'},">") if ($self->{'debug'}); + $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">"); my $f = "./import_xml/".$self->{'type'}.".xml"; $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f"); - $log->debug("reading '$f'") if ($self->{'debug'}); + $log->info("reading '$f'"); + + $self->{'import_xml_file'} = $f; $self->{'import_xml'} = XMLin($f, ForceArray => [ $self->{'tag'}, 'config', 'format' ], ); + $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) }); + } =head2 create_lookup @@ -284,21 +335,23 @@ $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); foreach my $i (@_) { - if ($i->{'eval'}) { - my $eval = $self->fill_in($rec,$i->{'eval'}); - my $key = $self->fill_in($rec,$i->{'key'}); - my @val = $self->fill_in($rec,$i->{'val'}); - if ($key && @val && eval $eval) { + $log->logconfess("need key") unless defined($i->{'key'}); + $log->logconfess("need val") unless defined($i->{'val'}); + + if (defined($i->{'eval'})) { + # eval first, so we can skip fill_in for key and val + my $eval = $self->fill_in($rec,$i->{'eval'}) || next; + if ($self->_eval($eval)) { + my $key = $self->fill_in($rec,$i->{'key'}) || next; + my @val = $self->fill_in($rec,$i->{'val'}) || next; $log->debug("stored $key = ",sub { join(" | ",@val) }); push @{$self->{'lookup'}->{$key}}, @val; } } else { - my $key = $self->fill_in($rec,$i->{'key'}); - my @val = $self->fill_in($rec,$i->{'val'}); - if ($key && @val) { - $log->debug("stored $key = ",sub { join(" | ",@val) }); - push @{$self->{'lookup'}->{$key}}, @val; - } + my $key = $self->fill_in($rec,$i->{'key'}) || next; + my @val = $self->fill_in($rec,$i->{'val'}) || next; + $log->debug("stored $key = ",sub { join(" | ",@val) }); + push @{$self->{'lookup'}->{$key}}, @val; } } } @@ -329,6 +382,7 @@ if ($$rec->{$f}) { return '' if (! $$rec->{$f}->[$i]); + no strict 'refs'; if ($sf && $$rec->{$f}->[$i]->{$sf}) { $$found++ if (defined($$found)); return $$rec->{$f}->[$i]->{$sf}; @@ -369,6 +423,9 @@ This function B perform parsing of format to inteligenty skip delimiters before fields which aren't used. +This method will automatically decode UTF-8 string to local code page +if needed. + =cut sub fill_in { @@ -384,6 +441,10 @@ # FIXME remove for speedup? $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); + if (utf8::is_utf8($format)) { + $format = $self->_x($format); + } + my $found = 0; my $eval_code; @@ -481,7 +542,7 @@ $i = 0 if (! $i); - my $format = $self->{'utf2cp'}->convert($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'}); + my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'}); my @out; @@ -602,6 +663,10 @@ my @ds = $webpac->data_structure($rec); +This method will also set C<$webpac->{'currnet_filename'}> if there is + tag in C and C<$webpac->{'headline'}> if there is + tag. + =cut sub data_structure { @@ -612,6 +677,9 @@ my $rec = shift; $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); + undef $self->{'currnet_filename'}; + undef $self->{'headline'}; + my @sorted_tags; if ($self->{tags_by_order}) { @sorted_tags = @{$self->{tags_by_order}}; @@ -643,6 +711,20 @@ } next if (! @v); + # use format? + if ($tag->{'format_name'}) { + @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; + } + + if ($field eq 'filename') { + $self->{'current_filename'} = join('',@v); + $log->debug("filename: ",$self->{'current_filename'}); + } elsif ($field eq 'headline') { + $self->{'headline'} .= join('',@v); + $log->debug("headline: ",$self->{'headline'}); + next; # don't return headline in data_structure! + } + # does tag have type? if ($tag->{'type'}) { push @{$row->{$tag->{'type'}}}, @v; @@ -651,11 +733,18 @@ push @{$row->{'swish'}}, @v; } + } if ($row) { $row->{'tag'} = $field; + + # TODO: name_sigular, name_plural + my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; + $row->{'name'} = $name ? $self->_x($name) : $field; + push @ds, $row; + $log->debug("row $field: ",sub { Dumper($row) }); } @@ -694,6 +783,79 @@ return $out; } +=head2 output_file + +Create output from in-memory data structure using Template Toolkit template +to a file. + + $webpac->output_file( + file => 'out.txt', + template => 'text.tt', + data => @ds + ); + +=cut + +sub output_file { + my $self = shift; + + my $args = {@_}; + + my $log = $self->_get_logger(); + + $log->logconfess("need file name") if (! $args->{'file'}); + + $log->debug("creating file ",$args->{'file'}); + + open(my $fh, ">", $args->{'file'}) || $log->logdie("can't open output file '$self->{'file'}': $!"); + print $fh $self->output( + template => $args->{'template'}, + data => $args->{'data'}, + ) || $log->logdie("print: $!"); + close($fh) || $log->logdie("close: $!"); +} + +=head2 apply_format + +Apply format specified in tag with C and +C. + + my $text = $webpac->apply_format($format_name,$format_delimiter,$data); + +Formats can contain C if you need them. + +=cut + +sub apply_format { + my $self = shift; + + my ($name,$delimiter,$data) = @_; + + my $log = $self->_get_logger(); + + if (! $self->{'import_xml'}->{'format'}->{$name}) { + $log->warn(" is not defined in ",$self->{'import_xml_file'}); + return $data; + } + + $log->warn("no delimiter for format $name") if (! $delimiter); + + my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'"); + + my @data = split(/\Q$delimiter\E/, $data); + + my $out = sprintf($format, @data); + $log->debug("using format $name [$format] on $data to produce: $out"); + + if ($out =~ m/$LOOKUP_REGEX/o) { + return $self->lookup($out); + } else { + return $out; + } + +} + + # # # @@ -746,11 +908,36 @@ return $va <=> $vb; } +=head2 _get_logger + +Get C object with a twist: domains are defined for each +method + + my $log = $webpac->_get_logger(); + +=cut + sub _get_logger { my $self = shift; - my @c = caller(1); - return get_logger($c[3]); + my $name = (caller(1))[3] || caller; + return get_logger($name); +} + +=head2 _x + +Convert string from UTF-8 to code page defined in C. + + my $text = $webpac->_x('utf8 text'); + +=cut + +sub _x { + my $self = shift; + my $utf8 = shift || return; + + return $self->{'utf2cp'}->convert($utf8) || + $self->_get_logger()->logwarn("can't convert '$utf8'"); } #