Revision 337 (by dpavlin, 2004/06/10 19:22:40) new trunk for webpac v2
#!/usr/bin/perl -w

# This utility will convert some (or all, depending of definition in
# configuration XMLfile) fields and subfields with remapping into MARC
# file from one or more CDS/ISIS files
#
# 2004-02-23 Dobrica Pavlinusic <dpavlin@rot13.org>
#
#
# Run without parametars for usage instructions or run without parametars
# and redirect STDOUT to file to create example configuration file like
# this:
#
# ./isis2marc.pl > config.xml
#
# If you want to create unique records, you need to define one or more
# fields as key (which will be used to produce just one record for one
# key)
#
# Keys are global for one run of script (that means for all ISIS databases
# used in one run), but you can write arbitrary values (as opposed to field
# names) inside key tag to produce unique key. For example,
#
# <key>author</key>
# <key>700$a</key>
#
# WARNING: When using <key> tag you can enter field with subfield
# (in format 700$a) just filed name (for fields which doesn't have subfileds
# like 005) or literal value. Fields which doesn't exist in that record
# will be skipped, and if key is empty no output record will be produced.
#
# So, best way to produce just few record in output is to specify field which
# doesn't exist at all in ISIS database for key, or just one literal value!!
#
#
# If ISIS databases are named same as directories in which they
# reside, you can specify just directories (so that shell globing work)
# like this:
# 
# ./isis2marc.pl config.xml all.marc /mnt2/*/LIBRI
#

use strict;
use OpenIsis;
use MARC;
use XML::Simple;
use Data::Dumper;

if ($#ARGV < 2) {
	print STDERR "Usage: $0 config.xml marc_file.iso isis_db [isis_db ...|isis_dir]\n";
	print STDERR <<'_END_OF_USAGE_';

	isis_db can be path to directory (if ISIS database is called
	same as database) which will make shell globing work
	or full path to ISIS database (without any extension)

	Example configuration file will be dumped to standard output
	after this, so you can just re-direct output of this script
	to produce config file like this:

	$ ./isis2marc.pl > config.xml

_END_OF_USAGE_

	print <<'_END_OF_CONFIG_';

<?xml version="1.0" encoding="ISO-8859-2"?>
<!-- template configuration file -->
<mapping>
	<record>
		<key>700$a</key>
		<key>700$b</key>
		<field tag="700">
			<indicator1>0</indicator1>
			<indicator2>#</indicator2>
			<subfield id="a">700$a</subfield>
			<subfield id="b">700$b</subfield>
		</field>
		<field tag="009">
			<nosubfield>900</nosubfield>
		</field>
	</record>

</mapping>

_END_OF_CONFIG_

	exit 1;
}

my $xml = new XML::Simple();

my $config_file = shift @ARGV || die "no config file?";

my $config = $xml->XMLin($config_file,
	KeyAttr => { subfield => 'id' },
	ForceArray => [ 'record', 'field', 'subfield', 'nosubfield' ],
	ContentKey => '-content',
	) || die "can't open configuration file '$config_file': $!";

my $marc_file = shift @ARGV || die "no marc file?";

my $marc=MARC->new;

# it seems that I can't specify invalid template for 005 and prevent
# output from creating field 005
#$num->add_005s({record=>1});

select(STDOUT); $|=1;

my %stored;
my $total = 0;


foreach my $db_file (@ARGV) {

	print "reading '$db_file'";

	if (-d $db_file) {
		$db_file =~ s,([^/]+)/*$,$1/$1,;
	}

	my $db = OpenIsis::open( $db_file );
	my $maxmfn = OpenIsis::maxRowid( $db ) || 1;

	print " [rows: $maxmfn]\n";

	my $progress_len = 50;

	my $step = int($maxmfn/$progress_len);
	$step = 1 if ($step == 0);

	my $new = 0;

	for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
		print "." if ($mfn % $step == 0);
		my $row = OpenIsis::read( $db, $mfn );

		# unroll this field to in-memory structure data
		my %data;

		# delete mfn from $row because it's literal value and
		# not array, so rest of code would croak
		delete($row->{mfn});

		foreach my $fld (keys %{$row}) {

			foreach my $rec_data (@{$row->{$fld}}) {

				while ($rec_data =~ s/\^(\w)([^\^]+)//) {
					$data{$fld.'$'.$1} = $2;

					# delete last subfield delimiter
					$rec_data = "" if ($rec_data =~ /(\^\w*$|\^\w\s*$)/);
				}

				# record data still exist? it's field without
				# subfields, then...
				if ($rec_data) {
					$data{$fld} = $rec_data;
				}
			}
		}

		# now, create output MARC record(s)
	
		foreach my $cfg_rec (@{$config->{record}}) {

			# do we have unique key?
			my $key;
			foreach (@{$cfg_rec->{key}}) {
				if ($data{$_}) {
					$key .= $data{$_};
				} elsif (! m/^\d{3,4}(\$\w)*$/) {
					$key .= $_;
				} else {
					$key .= "";
				}
			}

			next if ($key && $stored{$key} || $key eq "");

			$stored{$key}++ if ($key);


			# this will be new record (if needed)
			my $num;

			# with one or more fields
			foreach my $cfg_fld (@{$cfg_rec->{field}}) {

				my $new_fld = $cfg_fld->{tag};

				#
				# first create fields without subfields
				#

				# with one or more subfields
				foreach my $f (@{$cfg_fld->{nosubfield}}) {
					next if (! $data{$f});

					if (! $num) {
						$num=$marc->createrecord();
						$new++;
					}
					my $i1 = $cfg_fld->{indicator1} || ' ';
					my $i2 = $cfg_fld->{indicator2} || ' ';
					$marc->addfield({record=>$num,
						field=>$new_fld,
						i1=>$i1,
						i2=>$i2,
						value=>$data{$f}
					});
				}

				#
				# then create fields with subfields
				#

				# this will hold subfield values
				my @values;

				# with one or more subfields
				foreach my $new_sf (keys %{$cfg_fld->{subfield}}) {
					# field$subfield
					my $f = $cfg_fld->{subfield}->{$new_sf};
					if ($data{$f}) {
						push @values, $new_sf;
						push @values, $data{$f};
					}
				}
				next if (! @values);

				if (! $num) {
					$num=$marc->createrecord();
					$new++;
				}
				my $i1 = $cfg_fld->{indicator1} || ' ';
				my $i2 = $cfg_fld->{indicator2} || ' ';
				$marc->addfield({record=>$num,
					field=>$new_fld,
					i1=>$i1,
					i2=>$i2,
					value=>\@values}
				);
			}

		}
	}
	$total += $new;
	printf "\t%d (%0.2f%%) t: %d\n",$new,($new*100/$maxmfn),$total;
}

$marc->output({file=>"> $marc_file",'format'=>"usmarc"})