--- trunk/parse_format.pm 2003/07/04 15:05:23 57 +++ trunk/parse_format.pm 2004/03/29 19:41:12 293 @@ -3,7 +3,6 @@ # parse_format(...) # - sub parse_format { my $type = shift || die "parset_format must be called with type!"; my $format = shift || die "parse_format must be called with format!"; @@ -11,73 +10,225 @@ my $i = shift || 0; # isis repeatable number my $codepage = shift || die "parse_format must be called with codepage!"; if ($type eq "isis") { - return parse_isis_format($format,$row,$i,$codepage); + return parse_iso_format($format,$row,$i,$codepage,'isis_sf'); } elsif ($type eq "excel") { return parse_excel_format($format,$row,$i,$codepage); + } elsif ($type eq "marc") { + return parse_iso_format($format,$row,$i,$codepage,'marc_sf'); + } elsif ($type eq "feed") { + return parse_feed_format($format,$row,$i,$codepage); } } #------------------------------------------------------------- -sub parse_isis_format { - use isis_sf; +sub parse_iso_format { + + my $format = shift; + my $row = shift; + my $i = shift; + my $codepage = shift; + + my $func = shift || die "need to know which sub-field function to use"; + + require $func.".pm"; + + my $out; + my $out_swish; + + my $display; + my $swish; + + sub cnv_cp { + my $codepage = shift; + my $tmp = shift || return; + if ($codepage) { + $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n"; + } + return $tmp; + } + + # if format doesn't exits, store it in cache + if (! defined($cache->{format}->{$format})) { +# print STDERR "parsing format for '$format'\n"; + my @fmt; + + my $f = $format; + + my $eval; + $eval = $1 if ($f =~ s/^eval{([^}]+)}//); + + if ($f =~ s/^([^\d]+)//) { + if ($f) { # there is more to parse + push @fmt,$1; + } else { + @fmt = ('',$1,undef,''); +#print STDERR "just one field: $1\n"; + } + } else { + push @fmt,''; + } + + while ($f) { +# print STDERR "\n#### $f"; + # this is EBSCO special to support numeric subfield in + # form of 856#3 + if ($f =~ s/^(\d\d\d)#*(\w?)//) { + push @fmt,$1; + if ($2) { + push @fmt,$2; + } else { + push @fmt,undef; + } + # this might be our local scpeciality -- fields 10 and 11 + # (as opposed to 010 and 011) so they are strictly listed + # here + } elsif ($f =~ s/^(1[01]\w?)//) { + push @fmt,$1; + push @fmt,undef; + } elsif ($f =~ s/^mfn//i) { + push @fmt,'mfn'; + push @fmt,''; + } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) { + # still prefix? + if ($#fmt == 0) { + $fmt[0] .= $1; + } else { + push @fmt,$1; + } + } elsif ($f =~ s/^([^\d]+\d{0,2})//) { + if ($#fmt == 0) { + $fmt[0] .= $1; + } else { + push @fmt,$1; + } + } elsif ($f =~ s/^(\d{1,2})//) { + if ($#fmt == 0) { + $fmt[0] .= $1; + } else { + push @fmt,$1; + } + } else { + print STDERR "unparsed format: $f\n"; + $f = ""; + } + } + push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix + + $cache->{format_eval}->{$format} = $eval; # store eval string (if any) + + $cache->{format}->{$format} = \@fmt; + +# print STDERR "storing format for '$format': [",join("|",@fmt),"]\n"; +# print STDERR "storing format for '$format':",Dumper(@fmt),"\n"; +# print STDERR Dumper($cache->{format}->{$format}); + } + + # now produce actual record + my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'"; + my @fmt = @{$tmp}; +# print STDERR "using format for '$format':",Dumper(@fmt),"\n"; +# print STDERR "tmp ",Dumper($tmp); +# print STDERR "cache: ",Dumper($cache->{format}->{$format}); + + # prefix + my $prefix = shift @fmt; + my $sufix; + while($#fmt > 1) { + my $f = shift @fmt || die "BUG: field name can't be empty!"; + my $sf = shift @fmt; + + if ($f eq 'mfn' && $i == 0) { + $display .= $sufix if ($display); + $display .= $row->{mfn}; + } else { + my $val = &$func($row,$f,$sf,$i); + if ($val) { +# print STDERR "val: $val\n"; + my $tmp = cnv_cp($codepage,$val); + if ($display) { + $display .= $sufix.$tmp; + } else { + $display = $tmp; + } + $swish .= $tmp." "; + } + } + $sufix = shift @fmt; + } + $display = $prefix.$display.$sufix if ($display); + + my $eval = $cache->{format_eval}->{$format}; + if ($eval) { + sub fld2str { + my ($func,$row,$f,$sf,$i) = @_; +#print STDERR "## in fld2str\n"; + my $tmp = $codepage->convert(&$func($row,$f,$sf,$i)) || ''; + return "'$tmp'"; + } + + $eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg; +#print STDERR "## eval: $eval\n"; + if (eval "$eval") { + return ($swish,$display); + } else { + return (undef,undef); + } + } + + if (@fmt) { + print STDERR "format left unused: [",join("|",@fmt),"]\n"; + print STDERR "format: [",join("|",@{$tmp}),"]\n"; + } + +# print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n"; + return ($swish,$display); +} + +#------------------------------------------------------------- + +sub parse_excel_format { my $format = shift; my $row = shift; my $i = shift; my $codepage = shift; + return if ($i > 0); # Excel doesn't support repeatable fields + my $out; my $out_swish; my $prefix = ""; - if ($format =~ s/^([^\d]+)//) { + if ($format =~ s/^([^A-Z\|]{1,3})//) { $prefix = $1; } my $display; my $swish; - while ($format) { -#print STDERR "\n#### $format"; - if ($format =~ s/^(\d\d\d)(\w?)//) { - my $tmp = isis_sf($row,$1,$2,$i); - if ($tmp) { + while ($format && length($format) > 0) { +#print STDERR "\n#### $format #"; + if ($format =~ s/^\|([A-Z]{1,2})\|//) { +#print STDERR "--$1-> $format -[",length($format),"] "; + if ($row->{$1}) { + my $tmp = $row->{$1}; if ($codepage) { - $tmp = $codepage->convert($tmp) || warn "row: ",$row->{mfn},", $1$2 '$tmp' can't convert"; + $tmp = $codepage->convert($tmp) || warn "excel: $1 '$tmp' can't convert"; } $display .= $prefix . $tmp; $swish .= $tmp." "; #print STDERR " == $tmp"; } $prefix = ""; - # this might be our local scpeciality -- fields 10 and 11 - # (as opposed to 010 and 011) so they are strictly listed - # here - } elsif ($format =~ s/^(1[01])//) { - my $tmp = isis_sf($row,$1,undef,$i); - if ($tmp) { - if ($codepage) { - $tmp = $codepage->convert($tmp) || warn "row: ",$row->{mfn},", $1$2 '$tmp' can't convert"; - } - $display .= $prefix . $tmp; - $swish .= $tmp." "; - } - $prefix = ""; - } elsif ($format =~ s/^mfn//i) { - $display .= $prefix . $row->{mfn}; - $prefix = ""; - } elsif ($format =~ s/^([^\d]+)(\d{0,3})/$2/) { - $prefix .= $1 if ($display); - } elsif ($format =~ s/^([^\d]+\d{0,2})//) { - $prefix .= $1 if ($display); - } elsif ($format =~ s/^(\d{1,2})//) { + } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) { $prefix .= $1 if ($display); } else { print STDERR "unparsed format: $format\n"; $prefix .= $format; $format = ""; } +#print STDERR " display: $display swish: $swish [format: $format]"; } # add suffix $display .= $prefix if ($display); @@ -87,17 +238,21 @@ #------------------------------------------------------------- -sub parse_excel_format { +sub parse_feed_format { my $format = shift; - my $row = shift; + my $data = shift; my $i = shift; my $codepage = shift; + # XXX feed doesn't support repeatable fields, but they really + # should, This is a bug. It should be fixed! + return if ($i > 0); + my $out; my $out_swish; my $prefix = ""; - if ($format =~ s/^([^A-Z\|]{1,3})//) { + if ($format =~ s/^([^\d\|]{1,3})//) { $prefix = $1; } @@ -106,19 +261,19 @@ while ($format && length($format) > 0) { #print STDERR "\n#### $format #"; - if ($format =~ s/^\|([A-Z]{1,2})\|//) { + if ($format =~ s/^\|(\d+)\|//) { #print STDERR "--$1-> $format -[",length($format),"] "; - if ($row->{$1}) { - my $tmp = $row->{$1}; + if ($data->{$1}) { + my $tmp = $data->{$1}; if ($codepage) { - $tmp = $codepage->convert($tmp) || warn "excel: $1 '$tmp' can't convert"; + $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n"; } $display .= $prefix . $tmp; $swish .= $tmp." "; #print STDERR " == $tmp"; } $prefix = ""; - } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) { + } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) { $prefix .= $1 if ($display); } else { print STDERR "unparsed format: $format\n"; @@ -133,4 +288,6 @@ return ($swish,$display); } +#------------------------------------------------------------- + 1;