--- trunk/tools/isis2marc.pl 2004/02/23 23:21:43 229 +++ trunk/tools/isis2marc.pl 2004/02/25 10:49:32 230 @@ -1,25 +1,104 @@ #!/usr/bin/perl -w -# this utility will convert some (or all, defined by regex) -# fields into marc file from one or more CDS/ISIS files +# This utility will convert some (or all, depending of definition in +# configuration XMLfile) fields and subfields with remapping into MARC +# file from one or more CDS/ISIS files # # 2004-02-23 Dobrica Pavlinusic # -# if ISIS databases are named same as directories in which they +# +# Run without parametars for usage instructions or run without parametars +# and redirect STDOUT to file to create example configuration file like +# this: +# +# ./isis2marc.pl > config.xml +# +# If you want to create unique records, you need to define one or more +# fields as key (which will be used to produce just one record for one +# key) +# +# Keys are global for one run of script (that means for all ISIS databases +# used in one run), but you can write arbitrary values (as opposed to field +# names) inside key tag to produce unique key. For example, +# +# author +# 700$a +# +# WARNING: When using tag you can enter field with subfield +# (in format 700$a) just filed name (for fields which doesn't have subfileds +# like 005) or literal value. Fields which doesn't exist in that record +# will be skipped, and if key is empty no output record will be produced. +# +# So, best way to produce just few record in output is to specify field which +# doesn't exist at all in ISIS database for key, or just one literal value!! +# +# +# If ISIS databases are named same as directories in which they # reside, you can specify just directories (so that shell globing work) # like this: # -# ./isis2marc.pl all.marc /mnt2/*/LIBRI +# ./isis2marc.pl config.xml all.marc /mnt2/*/LIBRI +# use strict; use OpenIsis; use MARC; +use XML::Simple; use Data::Dumper; -# to select all fields use something like /./ -my $field_filter = '^700$'; +if ($#ARGV < 2) { + print STDERR "Usage: $0 config.xml marc_file.iso isis_db [isis_db ...|isis_dir]\n"; + print STDERR <<'_END_OF_USAGE_'; + + isis_db can be path to directory (if ISIS database is called + same as database) which will make shell globing work + or full path to ISIS database (without any extension) + + Example configuration file will be dumped to standard output + after this, so you can just re-direct output of this script + to produce config file like this: + + $ ./isis2marc.pl > config.xml + +_END_OF_USAGE_ + + print <<'_END_OF_CONFIG_'; + + + + + + 700$a + 700$b + + 0 + # + 700$a + 700$b + + + 900 + + + + + +_END_OF_CONFIG_ + + exit 1; +} + +my $xml = new XML::Simple(); + +my $config_file = shift @ARGV || die "no config file?"; + +my $config = $xml->XMLin($config_file, + KeyAttr => { subfield => 'id' }, + ForceArray => [ 'record', 'field', 'subfield', 'nosubfield' ], + ContentKey => '-content', + ) || die "can't open configuration file '$config_file': $!"; -my $marc_file = shift @ARGV || die "Usage: $0 [MARC file] [ISIS db]..."; +my $marc_file = shift @ARGV || die "no marc file?"; my $marc=MARC->new; @@ -32,6 +111,7 @@ my %stored; my $total = 0; + foreach my $db_file (@ARGV) { print "reading '$db_file'"; @@ -55,43 +135,114 @@ for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { print "." if ($mfn % $step == 0); my $row = OpenIsis::read( $db, $mfn ); - foreach my $fld (keys %{$row}) { - next if ($fld !~ m/$field_filter/); - my @values; - my $num; + # unroll this field to in-memory structure data + my %data; - foreach my $sf (@{$row->{$fld}}) { + # delete mfn from $row because it's literal value and + # not array, so rest of code would croak + delete($row->{mfn}); - $stored{$sf}++; + foreach my $fld (keys %{$row}) { - next if ($stored{$sf} > 1); + foreach my $rec_data (@{$row->{$fld}}) { - my %v; - while ($sf =~ s/\^(\w)([^\^]+)//) { - $v{$1} = $2; + while ($rec_data =~ s/\^(\w)([^\^]+)//) { + $data{$fld.'$'.$1} = $2; # delete last subfield delimiter - $sf = "" if ($sf =~ /\^\w*$/); + $rec_data = "" if ($rec_data =~ /(\^\w*$|\^\w\s*$)/); } - if (%v) { - push @values, %v; - } elsif ($sf && $sf !~ /^(\^\w)*\s*$/) { - # regex above remove empty subfields - push @values, $sf; + + # record data still exist? it's field without + # subfields, then... + if ($rec_data) { + $data{$fld} = $rec_data; } + } + } + + # now, create output MARC record(s) + + foreach my $cfg_rec (@{$config->{record}}) { + + # do we have unique key? + my $key; + foreach (@{$cfg_rec->{key}}) { + if ($data{$_}) { + $key .= $data{$_}; + } elsif (! m/^\d{3,4}(\$\w)*$/) { + $key .= $_; + } else { + $key .= ""; + } + } + next if ($key && $stored{$key} || $key eq ""); + + $stored{$key}++ if ($key); + + + # this will be new record (if needed) + my $num; + + # with one or more fields + foreach my $cfg_fld (@{$cfg_rec->{field}}) { + + my $new_fld = $cfg_fld->{tag}; + + # + # first create fields without subfields + # + + # with one or more subfields + foreach my $f (@{$cfg_fld->{nosubfield}}) { + next if (! $data{$f}); + + if (! $num) { + $num=$marc->createrecord(); + $new++; + } + my $i1 = $cfg_fld->{indicator1} || ' '; + my $i2 = $cfg_fld->{indicator2} || ' '; + $marc->addfield({record=>$num, + field=>$new_fld, + i1=>$i1, + i2=>$i2, + value=>$data{$f} + }); + } + + # + # then create fields with subfields + # + + # this will hold subfield values + my @values; + + # with one or more subfields + foreach my $new_sf (keys %{$cfg_fld->{subfield}}) { + # field$subfield + my $f = $cfg_fld->{subfield}->{$new_sf}; + if ($data{$f}) { + push @values, $new_sf; + push @values, $data{$f}; + } + } next if (! @values); if (! $num) { $num=$marc->createrecord(); $new++; } + my $i1 = $cfg_fld->{indicator1} || ' '; + my $i2 = $cfg_fld->{indicator2} || ' '; $marc->addfield({record=>$num, - field=>$fld, - i1=>" ", i2=>" ", - value=>\@values}); - + field=>$new_fld, + i1=>$i1, + i2=>$i2, + value=>\@values} + ); } }