--- trunk2/lib/WebPAC.pm 2004/06/19 18:16:20 372 +++ trunk2/lib/WebPAC.pm 2004/06/20 17:52:41 375 @@ -12,6 +12,11 @@ use Data::Dumper; +#my $LOOKUP_REGEX = '\[[^\[\]]+\]'; +#my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]'; +my $LOOKUP_REGEX = 'lookup{[^\{\}]+}'; +my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}'; + =head1 NAME WebPAC - base class for WebPAC @@ -183,6 +188,8 @@ push @{$self->{'data'}->{$mfn}->{$k}}, $val; } + } else { + push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn; } } @@ -217,6 +224,7 @@ if ($mfn > $self->{'max_mfn'}) { $self->{'current_mfn'} = $self->{'max_mfn'}; + $log->debug("at EOF"); return; } @@ -246,18 +254,21 @@ $self->{'tag'} = $type2tag{$type_base}; - $log->debug("using type '",$self->{'type'},"' tag <",$self->{'tag'},">") if ($self->{'debug'}); + $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">"); my $f = "./import_xml/".$self->{'type'}.".xml"; $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f"); - $log->debug("reading '$f'") if ($self->{'debug'}); + $log->info("reading '$f'"); + + $self->{'import_xml_file'} = $f; $self->{'import_xml'} = XMLin($f, ForceArray => [ $self->{'tag'}, 'config', 'format' ], - ForceContent => 1 ); + $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) }); + } =head2 create_lookup @@ -284,12 +295,14 @@ my $key = $self->fill_in($rec,$i->{'key'}); my @val = $self->fill_in($rec,$i->{'val'}); if ($key && @val && eval $eval) { + $log->debug("stored $key = ",sub { join(" | ",@val) }); push @{$self->{'lookup'}->{$key}}, @val; } } else { my $key = $self->fill_in($rec,$i->{'key'}); my @val = $self->fill_in($rec,$i->{'val'}); if ($key && @val) { + $log->debug("stored $key = ",sub { join(" | ",@val) }); push @{$self->{'lookup'}->{$key}}, @val; } } @@ -384,16 +397,17 @@ $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); # do actual replacement of placeholders - $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges; + $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges; if ($found) { + $log->debug("format: $format"); if ($eval_code) { my $eval = $self->fill_in($rec,$eval_code,$i); return if (! $self->_eval($eval)); } # do we have lookups? - $log->debug("test format '$format' for lookups"); - if ($format =~ /\[[^\[\]]+\]/o) { + if ($format =~ /$LOOKUP_REGEX/o) { + $log->debug("format '$format' has lookup"); return $self->lookup($format); } else { return $format; @@ -420,19 +434,19 @@ my $tmp = shift || $log->logconfess("need format"); - if ($tmp =~ /\[[^\[\]]+\]/o) { + if ($tmp =~ /$LOOKUP_REGEX/o) { my @in = ( $tmp ); $log->debug("lookup for: ",$tmp); my @out; while (my $f = shift @in) { - if ($f =~ /\[([^\[\]]+)\]/) { + if ($f =~ /$LOOKUP_REGEX_SAVE/o) { my $k = $1; if ($self->{'lookup'}->{$k}) { foreach my $nv (@{$self->{'lookup'}->{$k}}) { my $tmp2 = $f; - $tmp2 =~ s/\[$k\]/$nv/g; + $tmp2 =~ s/lookup{$k}/$nv/g; push @in, $tmp2; } } else { @@ -442,6 +456,7 @@ push @out, $f; } } + $log->logconfess("return is array and it's not expected!") unless wantarray; return @out; } else { return $tmp; @@ -472,10 +487,12 @@ $i = 0 if (! $i); - my $format = $self->{'utf2cp'}->convert($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'}); + my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'}); my @out; + $log->debug("format: $format"); + my $eval_code; # remove eval{...} from beginning $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); @@ -483,7 +500,7 @@ my $prefix; my $all_found=0; - while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) { + while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) { my $del = $1 || ''; $prefix ||= $del if ($all_found == 0); @@ -500,14 +517,21 @@ return if (! $all_found); - my $out = join('',@out) . $format; + my $out = join('',@out); + + if ($out) { + # add rest of format (suffix) + $out .= $format; - # add prefix if not there - $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/); + # add prefix if not there + $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/); + + $log->debug("result: $out"); + } if ($eval_code) { my $eval = $self->fill_in($rec,$eval_code,$i); - $log->debug("about to eval ",$eval," [$out]"); + $log->debug("about to eval{",$eval,"} format: $out"); return if (! $self->_eval($eval)); } @@ -539,9 +563,44 @@ push @arr, $v; } + $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr); + + return @arr; +} + +=head2 fill_in_to_arr + +Similar to C, but returns array of all repeatable fields. Usable +for fields which have lookups, so they shouldn't be parsed but rather +Ced. + + my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]'); + +=cut + +sub fill_in_to_arr { + my $self = shift; + + my ($rec, $format_utf8) = @_; + + my $log = $self->_get_logger(); + + $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); + return if (! $format_utf8); + + my $i = 0; + my @arr; + + while (my @v = $self->fill_in($rec,$format_utf8,$i++)) { + push @arr, @v; + } + + $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr); + return @arr; } + =head2 data_structure Create in-memory data structure which represents layout from C. @@ -549,6 +608,9 @@ my @ds = $webpac->data_structure($rec); +This method will also set C<$webpac->{'currnet_filename'}> if there is + tag in C. + =cut sub data_structure { @@ -559,6 +621,8 @@ my $rec = shift; $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); + undef $self->{'currnet_filename'}; + my @sorted_tags; if ($self->{tags_by_order}) { @sorted_tags = @{$self->{tags_by_order}}; @@ -569,6 +633,8 @@ my @ds; + $log->debug("tags: ",sub { join(", ",@sorted_tags) }); + foreach my $field (@sorted_tags) { my $row; @@ -576,10 +642,23 @@ #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}); foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) { - my @v = $self->parse_to_arr($rec,$tag->{'content'}); + my $format = $tag->{'value'} || $tag->{'content'}; + + $log->debug("format: $format"); + my @v; + if ($format =~ /$LOOKUP_REGEX/o) { + @v = $self->fill_in_to_arr($rec,$format); + } else { + @v = $self->parse_to_arr($rec,$format); + } next if (! @v); + # use format? + if ($tag->{'format_name'}) { + @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; + } + # does tag have type? if ($tag->{'type'}) { push @{$row->{$tag->{'type'}}}, @v; @@ -587,11 +666,24 @@ push @{$row->{'display'}}, @v; push @{$row->{'swish'}}, @v; } + + if ($field eq 'filename') { + $self->{'current_filename'} = join('',@v); + $log->debug("filename: ",$self->{'current_filename'}); + } + } if ($row) { $row->{'tag'} = $field; + + # TODO: name_sigular, name_plural + my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; + $row->{'name'} = $name ? $self->_x($name) : $field; + push @ds, $row; + + $log->debug("row $field: ",sub { Dumper($row) }); } } @@ -629,6 +721,47 @@ return $out; } +=head2 apply_format + +Apply format specified in tag with C and +C. + + my $text = $webpac->apply_format($format_name,$format_delimiter,$data); + +Formats can contain C if you need them. + +=cut + +sub apply_format { + my $self = shift; + + my ($name,$delimiter,$data) = @_; + + my $log = $self->_get_logger(); + + if (! $self->{'import_xml'}->{'format'}->{$name}) { + $log->warn(" is not defined in ",$self->{'import_xml_file'}); + return $data; + } + + $log->warn("no delimiter for format $name") if (! $delimiter); + + my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'"); + + my @data = split(/\Q$delimiter\E/, $data); + + my $out = sprintf($format, @data); + $log->debug("using format $name [$format] on $data to produce: $out"); + + if ($out =~ m/$LOOKUP_REGEX/o) { + return $self->lookup($out); + } else { + return $out; + } + +} + + # # # @@ -681,11 +814,36 @@ return $va <=> $vb; } +=head2 _get_logger + +Get C object with a twist: domains are defined for each +method + + my $log = $webpac->_get_logger(); + +=cut + sub _get_logger { my $self = shift; - my @c = caller(1); - return get_logger($c[3]); + my $name = (caller(1))[3] || caller; + return get_logger($name); +} + +=head2 _x + +Convert string from UTF-8 to code page defined in C. + + my $text = $webpac->_x('utf8 text'); + +=cut + +sub _x { + my $self = shift; + my $utf8 = shift || return; + + return $self->{'utf2cp'}->convert($utf8) || + $self->_get_logger()->logwarn("can't convert '$utf8'"); } #