#-------------------------------------------------------------
#
# parse_format(...)
#
sub parse_format {
my $type = shift || die "parset_format must be called with type!";
my $format = shift || die "parse_format must be called with format!";
my $row = shift || die "parse_format must be called with row!";
my $i = shift || 0; # isis repeatable number
my $codepage = shift || die "parse_format must be called with codepage!";
if ($type eq "isis") {
return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
} elsif ($type eq "excel") {
return parse_excel_format($format,$row,$i,$codepage);
} elsif ($type eq "marc") {
return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
} elsif ($type eq "feed") {
return parse_feed_format($format,$row,$i,$codepage);
} elsif ($type eq "dbf") {
return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
} else {
confess "FATAL: unknown type '$type'";
}
}
#-------------------------------------------------------------
sub parse_iso_format {
my $format = shift;
my $row = shift;
my $i = shift;
my $codepage = shift;
my $func = shift || die "need to know which sub-field function to use";
require $func.".pm";
my $out;
my $out_swish;
my $display;
my $swish;
sub cnv_cp {
my $codepage = shift;
my $tmp = shift || return;
if ($codepage) {
$tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n";
}
return $tmp;
}
# if format doesn't exits, store it in cache
if (! defined($cache->{format}->{$format})) {
# print STDERR "parsing format for '$format'\n";
my @fmt;
my $f = $format;
my $eval;
$eval = $1 if ($f =~ s/^eval{([^}]+?)}//);
if ($f =~ s/^([^\d]+)//) {
if ($f) { # there is more to parse
push @fmt,$1;
} else {
@fmt = ('',$1,undef,'');
#print STDERR "just one field: $1\n";
}
} else {
push @fmt,'';
}
while ($f) {
# print STDERR "\n#### $f";
# this is EBSCO special to support numeric subfield in
# form of 856#3
if ($f =~ s/^(\d\d\d)#*(\w?)//) {
push @fmt,$1;
if ($2) {
push @fmt,$2;
} else {
push @fmt,undef;
}
# this might be our local scpeciality -- fields 10 and 11
# (as opposed to 010 and 011) so they are strictly listed
# here
} elsif ($f =~ s/^(1[01]\w?)//) {
push @fmt,$1;
push @fmt,undef;
} elsif ($f =~ s/^mfn//i) {
push @fmt,'mfn';
push @fmt,'';
} elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
# still prefix?
if ($#fmt == 0) {
$fmt[0] .= $1;
} else {
push @fmt,$1;
}
} elsif ($f =~ s/^([^\d]+\d{0,2})//) {
if ($#fmt == 0) {
$fmt[0] .= $1;
} else {
push @fmt,$1;
}
} elsif ($f =~ s/^(\d{1,2})//) {
if ($#fmt == 0) {
$fmt[0] .= $1;
} else {
push @fmt,$1;
}
} else {
print STDERR "unparsed format: $f\n";
$f = "";
}
}
push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix
$cache->{format_eval}->{$format} = $eval; # store eval string (if any)
$cache->{format}->{$format} = \@fmt;
# print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
# print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
# print STDERR Dumper($cache->{format}->{$format});
}
# now produce actual record
my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
my @fmt = @{$tmp};
# print STDERR "using format for '$format':",Dumper(@fmt),"\n";
# print STDERR "tmp ",Dumper($tmp);
# print STDERR "cache: ",Dumper($cache->{format}->{$format});
# prefix
my $prefix = shift @fmt;
my $sufix;
while($#fmt > 1) {
my $f = shift @fmt || die "BUG: field name can't be empty!";
my $sf = shift @fmt;
if ($f eq 'mfn' && $i == 0) {
$display .= $sufix if ($display);
$display .= $row->{mfn};
} else {
my $val = &$func($row,$f,$sf,$i);
if ($val) {
# print STDERR "val: $val\n";
my $tmp = cnv_cp($codepage,$val);
if ($display) {
$display .= $sufix.$tmp;
} else {
$display = $tmp;
}
$swish .= $tmp." ";
}
}
$sufix = shift @fmt;
}
$display = $prefix.$display.$sufix if ($display);
my $eval = $cache->{format_eval}->{$format};
if ($eval) {
sub fld2str {
my ($func,$row,$f,$sf,$i) = @_;
#print STDERR "## in fld2str\n";
my $tmp = $codepage->convert(&$func($row,$f,$sf,$i)) || $codepage->convert(&$func($row,$f,$sf,0)) || '';
return "'$tmp'";
}
$eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg;
#print STDERR "## eval: $eval\n";
if (eval "$eval") {
die "eval error: eval{$eval}: $@" if ($@);
return ($swish,$display);
} else {
die "eval error: eval{$eval}: $@" if ($@);
return (undef,undef);
}
}
if (@fmt) {
print STDERR "format left unused: [",join("|",@fmt),"]\n";
print STDERR "format: [",join("|",@{$tmp}),"]\n";
}
# print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n";
return ($swish,$display);
}
#-------------------------------------------------------------
sub parse_excel_format {
my $format = shift;
my $row = shift;
my $i = shift;
#my $codepage = shift;
#
# data allready comes in utf-8 due to change in
# SpreadSheet::ParseExcel::FmtDefault line 69 from
# return pack('C*', unpack('n*', $sTxt));
# to following which returns utf-8:
# return pack('U*', unpack('n*', $sTxt));
#
return if ($i > 0); # Excel doesn't support repeatable fields
my $out;
my $out_swish;
my $prefix = "";
if ($format =~ s/^([^A-Z\|]{1,3})//) {
$prefix = $1;
}
my $display;
my $swish;
while ($format && length($format) > 0) {
#print STDERR "\n#### $format #";
if ($format =~ s/^\|([A-Z]{1,2})\|//) {
#print STDERR "--$1-> $format -[",length($format),"] ";
if ($row->{$1}) {
my $tmp = $row->{$1};
$display .= $prefix . $tmp;
$swish .= $tmp." ";
#print STDERR " == $tmp";
}
$prefix = "";
} elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
$prefix .= $1 if ($display);
} else {
#print STDERR "unparsed format: $format\n";
$prefix .= $format;
$format = "";
}
#print STDERR " display: $display swish: $swish [format: $format]";
}
# add suffix
$display .= $prefix if ($display);
return ($swish,$display);
}
#-------------------------------------------------------------
sub parse_feed_format {
my $format = shift;
my $data = shift;
my $i = shift;
my $codepage = shift;
# XXX feed doesn't support repeatable fields, but they really
# should, This is a bug. It should be fixed!
return if ($i > 0);
my $out;
my $out_swish;
my $prefix = "";
if ($format =~ s/^([^\d\|]{1,3})//) {
$prefix = $1;
}
my $display;
my $swish;
while ($format && length($format) > 0) {
#print STDERR "\n#### $format #";
if ($format =~ s/^\|(\d+)\|//) {
#print STDERR "--$1-> $format -[",length($format),"] ";
if ($data->{$1}) {
my $tmp = $data->{$1};
if ($codepage) {
$tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n";
}
$display .= $prefix . $tmp;
$swish .= $tmp." ";
#print STDERR " == $tmp";
}
$prefix = "";
} elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) {
$prefix .= $1 if ($display);
} else {
print STDERR "unparsed format: $format\n";
$prefix .= $format;
$format = "";
}
#print STDERR " display: $display swish: $swish [format: $format]";
}
# add suffix
$display .= $prefix if ($display);
return ($swish,$display);
}
#-------------------------------------------------------------
1;