Revision 684 (by dpavlin, 2005/02/28 10:43:38) updated branches to head
#-------------------------------------------------------------
#
# parse_format(...)
#

sub parse_format {
	my $type = shift || die "parset_format must be called with type!";
	my $format = shift || die "parse_format must be called with format!";
	my $row = shift || die "parse_format must be called with row!";
	my $i = shift || 0;	# isis repeatable number
	my $codepage = shift || die "parse_format must be called with codepage!";
	if ($type eq "isis") {
		return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
	} elsif ($type eq "excel") {
		return parse_excel_format($format,$row,$i,$codepage);
	} elsif ($type eq "marc") {
		return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
	} elsif ($type eq "feed") {
		return parse_feed_format($format,$row,$i,$codepage);
	} elsif ($type eq "dbf") {
		return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
	} else {
		confess "FATAL: unknown type '$type'";
	}
}

#-------------------------------------------------------------

sub parse_iso_format {

	my $format = shift;
	my $row = shift;
	my $i = shift;
	my $codepage = shift;

	my $func = shift || die "need to know which sub-field function to use";

	require $func.".pm";

	my $out;
	my $out_swish;

	my $display;
	my $swish;

	sub cnv_cp {
		my $codepage = shift;
		my $tmp = shift || return;
		if ($codepage) {
			$tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n";
		}
		return $tmp;
	}

	# if format doesn't exits, store it in cache
	if (! defined($cache->{format}->{$format})) {
#		print STDERR "parsing format for '$format'\n";
		my @fmt;

		my $f = $format;

		my $eval;
		$eval = $1 if ($f =~ s/^eval{([^}]+?)}//);

		if ($f =~ s/^([^\d]+)//) {
			if ($f) {	# there is more to parse
				push @fmt,$1;
			} else {
				@fmt = ('',$1,undef,'');
#print STDERR "just one field: $1\n";
			}
		} else {
			push @fmt,'';
		}

		while ($f) {
#	print STDERR "\n#### $f";
			# this is EBSCO special to support numeric subfield in
			# form of 856#3
			if ($f =~ s/^(\d\d\d*)#*(\w?)//) {
				push @fmt,$1;
				if ($2) {
					push @fmt,$2;
				} else {
					push @fmt,undef;
				}
			# this might be our local scpeciality -- fields 10 and 11
			# (as opposed to 010 and 011) so they are strictly listed
			# here
			} elsif ($f =~ s/^(1[01]\w?)//) {
				push @fmt,$1;
				push @fmt,undef;
			} elsif ($f =~ s/^mfn//i) {
				push @fmt,'mfn';
				push @fmt,'';
			} elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
				# still prefix?
				if ($#fmt == 0) {
					$fmt[0] .= $1;
				} else {
					push @fmt,$1;
				}
			} elsif ($f =~ s/^([^\d]+\d{0,2})//) {
				if ($#fmt == 0) {
					$fmt[0] .= $1;
				} else {
					push @fmt,$1;
				}
			} elsif ($f =~ s/^(\d{1,2})//) {
				if ($#fmt == 0) {
					$fmt[0] .= $1;
				} else {
					push @fmt,$1;
				}
			} else {
				print STDERR "unparsed format: $f\n";
				$f = "";
			}
		}
		push @fmt,'' if ($#fmt % 3 != 0);	# add empty suffix

		$cache->{format_eval}->{$format} = $eval; # store eval string (if any)

		$cache->{format}->{$format} = \@fmt;
		
#		print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
#		print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
#		print STDERR Dumper($cache->{format}->{$format});
	}

	# now produce actual record
	my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
	my @fmt = @{$tmp};
#	print STDERR "using format for '$format':",Dumper(@fmt),"\n";
#	print STDERR "tmp ",Dumper($tmp);
#	print STDERR "cache: ",Dumper($cache->{format}->{$format});

	# prefix
	my $prefix = shift @fmt;
	my $sufix;
	while($#fmt > 1) {
		my $f = shift @fmt || die "BUG: field name can't be empty!";
		my $sf = shift @fmt;

		if ($f eq 'mfn' && $i == 0) {
			$display .= $sufix if ($display);
			$display .= $row->{mfn};
		} else {
			my $val = &$func($row,$f,$sf,$i);
			if ($val) {
#				print STDERR "val: $val\n";
				my $tmp = cnv_cp($codepage,$val);
				if ($display) {
					$display .= $sufix.$tmp;
				} else {
					$display = $tmp;
				}
				$swish .= $tmp." ";
			}
		}
		$sufix = shift @fmt;
	}
	$display = $prefix.$display.$sufix if ($display);

	my $eval = $cache->{format_eval}->{$format};
	if ($eval) {
		sub fld2str {
			my ($func,$row,$f,$sf,$i) = @_;
#print STDERR "## in fld2str\n";
			my $tmp = $codepage->convert(&$func($row,$f,$sf,$i)) ||  $codepage->convert(&$func($row,$f,$sf,0)) || '';
			return "'$tmp'";
		}

		$eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg;
#print STDERR "## eval: $eval\n";
		if (eval "$eval") {
			die "eval error: eval{$eval}: $@" if ($@);
			return ($swish,$display);
		} else {
			die "eval error: eval{$eval}: $@" if ($@);
			return (undef,undef);
		}
	}

	if (@fmt) {
		print STDERR "format left unused: [",join("|",@fmt),"]\n";
		print STDERR "format: [",join("|",@{$tmp}),"]\n";
	}

#	print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n";

	return ($swish,$display);
}

#-------------------------------------------------------------

sub parse_excel_format {
	my $format = shift;
	my $row = shift;
	my $i = shift;
	#my $codepage = shift;
	#
	# data allready comes in utf-8 due to change in
	# SpreadSheet::ParseExcel::FmtDefault line 69 from
	#	return pack('C*', unpack('n*', $sTxt));
	# to following which returns utf-8:
	#	return pack('U*', unpack('n*', $sTxt));
	#

	return if ($i > 0);	# Excel doesn't support repeatable fields

	my $out;
	my $out_swish;

	my $prefix = "";
	if ($format =~ s/^([^A-Z\|]{1,3})//) {
		$prefix = $1;
	}

	my $display;
	my $swish;

	while ($format && length($format) > 0) {
#print STDERR "\n#### $format #";
		if ($format =~ s/^\|([A-Z]{1,2})\|//) {
#print STDERR "--$1-> $format -[",length($format),"] ";
			if ($row->{$1}) {
				my $tmp = $row->{$1};
				$display .= $prefix . $tmp;
				$swish .= $tmp." ";
#print STDERR " == $tmp";
			}
			$prefix = "";
		} elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
			$prefix .= $1 if ($display);
		} else {
			#print STDERR "unparsed format: $format\n";
			$prefix .= $format;
			$format = "";
		}
#print STDERR " display: $display swish: $swish [format: $format]";
	}
	# add suffix
	$display .= $prefix if ($display);

	return ($swish,$display);
}

#-------------------------------------------------------------

sub parse_feed_format {
	my $format = shift;
	my $data = shift;
	my $i = shift;
	my $codepage = shift;

	# XXX feed doesn't support repeatable fields, but they really
	# should, This is a bug. It should be fixed!
	return if ($i > 0);

	my $out;
	my $out_swish;

	my $prefix = "";
	if ($format =~ s/^([^\d\|]{1,3})//) {
		$prefix = $1;
	}

	my $display;
	my $swish;

	while ($format && length($format) > 0) {
#print STDERR "\n#### $format #";
		if ($format =~ s/^\|(\d+)\|//) {
#print STDERR "--$1-> $format -[",length($format),"] ";
			if ($data->{$1}) {
				my $tmp = $data->{$1};
				if ($codepage) {
					$tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n";
				}
				$display .= $prefix . $tmp;
				$swish .= $tmp." ";
#print STDERR " == $tmp";
			}
			$prefix = "";
		} elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) {
			$prefix .= $1 if ($display);
		} else {
			print STDERR "unparsed format: $format\n";
			$prefix .= $format;
			$format = "";
		}
#print STDERR " display: $display swish: $swish [format: $format]";
	}
	# add suffix
	$display .= $prefix if ($display);

	return ($swish,$display);
}

#-------------------------------------------------------------

1;