--- trunk/parse_format.pm 2003/07/04 20:11:48 62 +++ branches/humanistika/parse_format.pm 2004/03/12 16:02:06 267 @@ -3,7 +3,6 @@ # parse_format(...) # - sub parse_format { my $type = shift || die "parset_format must be called with type!"; my $format = shift || die "parse_format must be called with format!"; @@ -16,6 +15,8 @@ return parse_excel_format($format,$row,$i,$codepage); } elsif ($type eq "marc") { return parse_iso_format($format,$row,$i,$codepage,'marc_sf'); + } elsif ($type eq "feed") { + return parse_feed_format($format,$row,$i,$codepage); } } @@ -35,61 +36,152 @@ my $out; my $out_swish; - my $prefix = ""; - if ($format =~ s/^([^\d]+)//) { - $prefix = $1; - } - my $display; my $swish; sub cnv_cp { - my $tmp = shift; + my $codepage = shift; + my $tmp = shift || return; if ($codepage) { - $tmp = $codepage->convert($tmp) || print STDERR "$1$2 = '$tmp' can't convert"; + $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n"; } return $tmp; } - while ($format) { -#print STDERR "\n#### $format"; - # this is EBSCO special to support numeric subfield in - # form of 856#3 - if ($format =~ s/^(\d\d\d)#*(\w?)//) { - my $tmp = get_sf($row,$1,$2,$i); - if ($tmp) { - $display .= $prefix.cnv_cp($tmp); - $swish .= $tmp." "; -#print STDERR " == $tmp"; + # if format doesn't exits, store it in cache + if (! defined($cache->{format}->{$format})) { +# print STDERR "parsing format for '$format'\n"; + my @fmt; + + my $f = $format; + + my $eval; + $eval = $1 if ($f =~ s/^eval{([^}]+)}//); + + if ($f =~ s/^([^\d]+)//) { + if ($f) { # there is more to parse + push @fmt,$1; + } else { + @fmt = ('',$1,undef,''); +#print STDERR "just one field: $1\n"; } - $prefix = ""; - # this might be our local scpeciality -- fields 10 and 11 - # (as opposed to 010 and 011) so they are strictly listed - # here - } elsif ($format =~ s/^(1[01])//) { - my $tmp = get_sf($row,$1,undef,$i); - if ($tmp) { - $display .= $prefix.cnv_cp($tmp); + } else { + push @fmt,''; + } + + while ($f) { +# print STDERR "\n#### $f"; + # this is EBSCO special to support numeric subfield in + # form of 856#3 + if ($f =~ s/^(\d\d\d)#*(\w?)//) { + push @fmt,$1; + if ($2) { + push @fmt,$2; + } else { + push @fmt,undef; + } + # this might be our local scpeciality -- fields 10 and 11 + # (as opposed to 010 and 011) so they are strictly listed + # here + } elsif ($f =~ s/^(1[01]\w?)//) { + push @fmt,$1; + push @fmt,undef; + } elsif ($f =~ s/^mfn//i) { + push @fmt,'mfn'; + push @fmt,''; + } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) { + # still prefix? + if ($#fmt == 0) { + $fmt[0] .= $1; + } else { + push @fmt,$1; + } + } elsif ($f =~ s/^([^\d]+\d{0,2})//) { + if ($#fmt == 0) { + $fmt[0] .= $1; + } else { + push @fmt,$1; + } + } elsif ($f =~ s/^(\d{1,2})//) { + if ($#fmt == 0) { + $fmt[0] .= $1; + } else { + push @fmt,$1; + } + } else { + print STDERR "unparsed format: $f\n"; + $f = ""; + } + } + push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix + + $cache->{format_eval}->{$format} = $eval; # store eval string (if any) + + $cache->{format}->{$format} = \@fmt; + +# print STDERR "storing format for '$format': [",join("|",@fmt),"]\n"; +# print STDERR "storing format for '$format':",Dumper(@fmt),"\n"; +# print STDERR Dumper($cache->{format}->{$format}); + } + + # now produce actual record + my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'"; + my @fmt = @{$tmp}; +# print STDERR "using format for '$format':",Dumper(@fmt),"\n"; +# print STDERR "tmp ",Dumper($tmp); +# print STDERR "cache: ",Dumper($cache->{format}->{$format}); + + # prefix + my $prefix = shift @fmt; + my $sufix; + while($#fmt > 1) { + my $f = shift @fmt || die "BUG: field name can't be empty!"; + my $sf = shift @fmt; + + if ($f eq 'mfn' && $i == 0) { + $display .= $sufix if ($display); + $display .= $row->{mfn}; + } else { + my $val = &$func($row,$f,$sf,$i); + if ($val) { +# print STDERR "val: $val\n"; + my $tmp = cnv_cp($codepage,$val); + if ($display) { + $display .= $sufix.$tmp; + } else { + $display = $tmp; + } $swish .= $tmp." "; } - $prefix = ""; - } elsif ($format =~ s/^mfn//i) { - $display .= $prefix . $row->{mfn}; - $prefix = ""; - } elsif ($format =~ s/^([^\d]+)(\d{0,3})/$2/) { - $prefix .= $1 if ($display); - } elsif ($format =~ s/^([^\d]+\d{0,2})//) { - $prefix .= $1 if ($display); - } elsif ($format =~ s/^(\d{1,2})//) { - $prefix .= $1 if ($display); + } + $sufix = shift @fmt; + } + $display = $prefix.$display.$sufix if ($display); + + my $eval = $cache->{format_eval}->{$format}; + if ($eval) { + sub fld2str { + my ($func,$row,$f,$sf,$i) = @_; +#print STDERR "## in fld2str\n"; + my $tmp = &$func($row,$f,$sf,$i) || ''; + return "'$tmp'"; + } + + $eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg; +#print STDERR "## eval: $eval\n"; + if (eval "$eval") { + return ($swish,$display); } else { - print STDERR "unparsed format: $format\n"; - $prefix .= $format; - $format = ""; + return (undef,undef); } } - # add suffix - $display .= $prefix if ($display); + + if (@fmt) { + print STDERR "format left unused: [",join("|",@fmt),"]\n"; + print STDERR "format: [",join("|",@{$tmp}),"]\n"; + } + +# print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n"; return ($swish,$display); } @@ -102,6 +194,8 @@ my $i = shift; my $codepage = shift; + return if ($i > 0); # Excel doesn't support repeatable fields + my $out; my $out_swish; @@ -142,4 +236,58 @@ return ($swish,$display); } +#------------------------------------------------------------- + +sub parse_feed_format { + my $format = shift; + my $data = shift; + my $i = shift; + my $codepage = shift; + + # XXX feed doesn't support repeatable fields, but they really + # should, This is a bug. It should be fixed! + return if ($i > 0); + + my $out; + my $out_swish; + + my $prefix = ""; + if ($format =~ s/^([^\d\|]{1,3})//) { + $prefix = $1; + } + + my $display; + my $swish; + + while ($format && length($format) > 0) { +#print STDERR "\n#### $format #"; + if ($format =~ s/^\|(\d+)\|//) { +#print STDERR "--$1-> $format -[",length($format),"] "; + if ($data->{$1}) { + my $tmp = $data->{$1}; + if ($codepage) { + $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n"; + } + $display .= $prefix . $tmp; + $swish .= $tmp." "; +#print STDERR " == $tmp"; + } + $prefix = ""; + } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) { + $prefix .= $1 if ($display); + } else { + print STDERR "unparsed format: $format\n"; + $prefix .= $format; + $format = ""; + } +#print STDERR " display: $display swish: $swish [format: $format]"; + } + # add suffix + $display .= $prefix if ($display); + + return ($swish,$display); +} + +#------------------------------------------------------------- + 1;