--- trunk/parse_format.pm 2003/01/16 17:35:54 10 +++ trunk/parse_format.pm 2003/11/23 15:42:16 170 @@ -1,45 +1,245 @@ #------------------------------------------------------------- # -# parse_format('format',$isis_row); +# parse_format(...) # -use isis_sf; - sub parse_format { + my $type = shift || die "parset_format must be called with type!"; + my $format = shift || die "parse_format must be called with format!"; + my $row = shift || die "parse_format must be called with row!"; + my $i = shift || 0; # isis repeatable number + my $codepage = shift || die "parse_format must be called with codepage!"; + if ($type eq "isis") { + return parse_iso_format($format,$row,$i,$codepage,'isis_sf'); + } elsif ($type eq "excel") { + return parse_excel_format($format,$row,$i,$codepage); + } elsif ($type eq "marc") { + return parse_iso_format($format,$row,$i,$codepage,'marc_sf'); + } elsif ($type eq "feed") { + return parse_feed_format($format,$row,$i,$codepage); + } +} + +#------------------------------------------------------------- + +sub parse_iso_format { + my $format = shift; my $row = shift; + my $i = shift; + my $codepage = shift; + + my $func = shift || die "need to know which sub-field function to use"; + + require $func.".pm"; + + my $out; + my $out_swish; + + my $display; + my $swish; + + sub cnv_cp { + my $codepage = shift; + my $tmp = shift || return; + if ($codepage) { + $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n"; + } + return $tmp; + } + + # if format doesn't exits, store it in cache + if (! defined($cache->{format}->{$format})) { +# print STDERR "parsing format for '$format'\n"; + my @fmt; + + my $f = $format; + + if ($f =~ s/^([^\d]+)//) { + if ($f) { # there is more to parse + push @fmt,$1; + } else { + @fmt = ('',$1,undef,''); +#print STDERR "just one field: $1\n"; + } + } else { + push @fmt,''; + } + + while ($f) { +# print STDERR "\n#### $f"; + # this is EBSCO special to support numeric subfield in + # form of 856#3 + if ($f =~ s/^(\d\d\d)#*(\w?)//) { + push @fmt,$1; + if ($2) { + push @fmt,$2; + } else { + push @fmt,undef; + } + # this might be our local scpeciality -- fields 10 and 11 + # (as opposed to 010 and 011) so they are strictly listed + # here + } elsif ($f =~ s/^(1[01])//) { + push @fmt,$1; + push @fmt,undef; + } elsif ($f =~ s/^mfn//i) { + push @fmt,'mfn'; + push @fmt,''; + } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) { + push @fmt,$1; + } elsif ($f =~ s/^([^\d]+\d{0,2})//) { + push @fmt,$1; + } elsif ($f =~ s/^(\d{1,2})//) { + push @fmt,$1; + } else { + print STDERR "unparsed format: $f\n"; + $f = ""; + } + } + push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix + $cache->{format}->{$format} = \@fmt; + +# print STDERR "storing format for '$format': [",join("|",@fmt),"]\n"; +# print STDERR "storing format for '$format':",Dumper(@fmt),"\n"; +# print STDERR Dumper($cache->{format}->{$format}); + } + + # now produce actual record + my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'"; + my @fmt = @{$tmp}; +# print STDERR "using format for '$format':",Dumper(@fmt),"\n"; +# print STDERR "tmp ",Dumper($tmp); +# print STDERR "cache: ",Dumper($cache->{format}->{$format}); + + # prefix + my $prefix = shift @fmt; + my $sufix; + while($#fmt > 1) { + my $f = shift @fmt || die "BUG: field name can't be empty!"; + my $sf = shift @fmt; + + if ($f eq 'mfn' && $i == 0) { + $display .= $sufix if ($display); + $display .= $row->{mfn}; + } else { + my $val = &$func($row,$f,$sf,$i); + if ($val) { +# print STDERR "val: $val\n"; + my $tmp = cnv_cp($codepage,$val); + if ($display) { + $display .= $sufix.$tmp; + } else { + $display = $tmp; + } + $swish .= $tmp." "; + } + } + $sufix = shift @fmt; + } + $display = $prefix.$display.$sufix if ($display); + print STDERR "format left unused: [",join("|",@fmt),"]\n" if (@fmt); + +# print STDERR "display: $display swish: $swish\n"; + + return ($swish,$display); +} + +#------------------------------------------------------------- + +sub parse_excel_format { + my $format = shift; + my $row = shift; + my $i = shift; + my $codepage = shift; + + return if ($i > 0); # Excel doesn't support repeatable fields + + my $out; + my $out_swish; + + my $prefix = ""; + if ($format =~ s/^([^A-Z\|]{1,3})//) { + $prefix = $1; + } + + my $display; + my $swish; + + while ($format && length($format) > 0) { +#print STDERR "\n#### $format #"; + if ($format =~ s/^\|([A-Z]{1,2})\|//) { +#print STDERR "--$1-> $format -[",length($format),"] "; + if ($row->{$1}) { + my $tmp = $row->{$1}; + if ($codepage) { + $tmp = $codepage->convert($tmp) || warn "excel: $1 '$tmp' can't convert"; + } + $display .= $prefix . $tmp; + $swish .= $tmp." "; +#print STDERR " == $tmp"; + } + $prefix = ""; + } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) { + $prefix .= $1 if ($display); + } else { + print STDERR "unparsed format: $format\n"; + $prefix .= $format; + $format = ""; + } +#print STDERR " display: $display swish: $swish [format: $format]"; + } + # add suffix + $display .= $prefix if ($display); + + return ($swish,$display); +} + +#------------------------------------------------------------- + +sub parse_feed_format { + my $format = shift; + my $data = shift; + my $i = shift; + my $codepage = shift; + + # XXX feed doesn't support repeatable fields, but they really + # should, This is a bug. It should be fixed! + return if ($i > 0); my $out; my $out_swish; my $prefix = ""; - if ($format =~ s/^([^\d]+)//) { - $prefix = "pre: $1"; + if ($format =~ s/^([^\d\|]{1,3})//) { + $prefix = $1; } my $display; my $swish; - while ($format) { -#print STDERR "#### $format\n"; - if ($format =~ s/^(\d\d\d)(\w?)//) { - my $isis_tmp = isis_sf($row,$1,$2); - if ($isis_tmp) { - $display .= $prefix . $isis_tmp; - $swish .= $isis_tmp." "; + while ($format && length($format) > 0) { +#print STDERR "\n#### $format #"; + if ($format =~ s/^\|(\d+)\|//) { +#print STDERR "--$1-> $format -[",length($format),"] "; + if ($data->{$1}) { + my $tmp = $data->{$1}; + if ($codepage) { + $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n"; + } + $display .= $prefix . $tmp; + $swish .= $tmp." "; +#print STDERR " == $tmp"; } $prefix = ""; - } elsif ($format =~ s/^([^\d]+)(\d{0,3})/$2/) { - $prefix .= $1; - } elsif ($format =~ s/^([^\d]+\d{0,2})//) { - $prefix .= $1; - } elsif ($format =~ s/^(\d{1,2})//) { - $prefix .= $1; + } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) { + $prefix .= $1 if ($display); } else { print STDERR "unparsed format: $format\n"; $prefix .= $format; $format = ""; } +#print STDERR " display: $display swish: $swish [format: $format]"; } # add suffix $display .= $prefix if ($display); @@ -48,4 +248,5 @@ } #------------------------------------------------------------- + 1;