--- trunk/parse_format.pm 2003/11/24 01:16:04 176 +++ trunk/parse_format.pm 2005/02/27 23:07:35 678 @@ -10,13 +10,17 @@ my $i = shift || 0; # isis repeatable number my $codepage = shift || die "parse_format must be called with codepage!"; if ($type eq "isis") { - return parse_iso_format($format,$row,$i,$codepage,'isis_sf'); + return parse_iso_format($format,$row,$i,$codepage,'hash_sf'); } elsif ($type eq "excel") { return parse_excel_format($format,$row,$i,$codepage); } elsif ($type eq "marc") { return parse_iso_format($format,$row,$i,$codepage,'marc_sf'); } elsif ($type eq "feed") { return parse_feed_format($format,$row,$i,$codepage); + } elsif ($type eq "dbf") { + return parse_iso_format($format,$row,$i,$codepage,'hash_sf'); + } else { + confess "FATAL: unknown type '$type'"; } } @@ -55,6 +59,9 @@ my $f = $format; + my $eval; + $eval = $1 if ($f =~ s/^eval{([^}]+?)}//); + if ($f =~ s/^([^\d]+)//) { if ($f) { # there is more to parse push @fmt,$1; @@ -80,7 +87,7 @@ # this might be our local scpeciality -- fields 10 and 11 # (as opposed to 010 and 011) so they are strictly listed # here - } elsif ($f =~ s/^(1[01])//) { + } elsif ($f =~ s/^(1[01]\w?)//) { push @fmt,$1; push @fmt,undef; } elsif ($f =~ s/^mfn//i) { @@ -111,9 +118,12 @@ } } push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix + + $cache->{format_eval}->{$format} = $eval; # store eval string (if any) + $cache->{format}->{$format} = \@fmt; - print STDERR "storing format for '$format': [",join("|",@fmt),"]\n"; +# print STDERR "storing format for '$format': [",join("|",@fmt),"]\n"; # print STDERR "storing format for '$format':",Dumper(@fmt),"\n"; # print STDERR Dumper($cache->{format}->{$format}); } @@ -151,11 +161,33 @@ $sufix = shift @fmt; } $display = $prefix.$display.$sufix if ($display); - print STDERR "format left unused: [",join("|",@fmt),"]\n" if (@fmt); - print STDERR "format: [",join("|",@{$tmp}),"]\n" if (@fmt); + my $eval = $cache->{format_eval}->{$format}; + if ($eval) { + sub fld2str { + my ($func,$row,$f,$sf,$i) = @_; +#print STDERR "## in fld2str\n"; + my $tmp = $codepage->convert(&$func($row,$f,$sf,$i)) || $codepage->convert(&$func($row,$f,$sf,0)) || ''; + return "'$tmp'"; + } + + $eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg; +#print STDERR "## eval: $eval\n"; + if (eval "$eval") { + die "eval error: eval{$eval}: $@" if ($@); + return ($swish,$display); + } else { + die "eval error: eval{$eval}: $@" if ($@); + return (undef,undef); + } + } + + if (@fmt) { + print STDERR "format left unused: [",join("|",@fmt),"]\n"; + print STDERR "format: [",join("|",@{$tmp}),"]\n"; + } -# print STDERR "display: $display swish: $swish\n"; +# print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n"; return ($swish,$display); } @@ -166,7 +198,14 @@ my $format = shift; my $row = shift; my $i = shift; - my $codepage = shift; + #my $codepage = shift; + # + # data allready comes in utf-8 due to change in + # SpreadSheet::ParseExcel::FmtDefault line 69 from + # return pack('C*', unpack('n*', $sTxt)); + # to following which returns utf-8: + # return pack('U*', unpack('n*', $sTxt)); + # return if ($i > 0); # Excel doesn't support repeatable fields @@ -187,9 +226,6 @@ #print STDERR "--$1-> $format -[",length($format),"] "; if ($row->{$1}) { my $tmp = $row->{$1}; - if ($codepage) { - $tmp = $codepage->convert($tmp) || warn "excel: $1 '$tmp' can't convert"; - } $display .= $prefix . $tmp; $swish .= $tmp." "; #print STDERR " == $tmp"; @@ -198,7 +234,7 @@ } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) { $prefix .= $1 if ($display); } else { - print STDERR "unparsed format: $format\n"; + #print STDERR "unparsed format: $format\n"; $prefix .= $format; $format = ""; }