--- trunk/tools/phpmylib2marc.pl 2004/02/01 15:57:17 208 +++ trunk/tools/phpmylib2marc.pl 2004/02/01 22:18:05 216 @@ -2,7 +2,7 @@ # # This script will try (hard) to convert database from # PhpMyLibrary (http://phpmylibrary.sourceforge.net/) back -# to MARC format +# to MARC format (ISO 2709) # # 2003-01-31 Dobrica Pavlinusic # @@ -24,12 +24,14 @@ my $passwd = ""; my $usage = 0; +my $debug = 0; my $result = GetOptions( "database=s" => \$database, "host=s" => \$host, "user=s" => \$user, "password=s" => \$passwd, + "debug!" => \$debug, "help!" => \$usage, ); @@ -43,16 +45,25 @@ my $dbh = DBI->connect($dsn, $user, $passwd, {'RaiseError' => 1}); # UNIMARC leader format -my $leader_fmt = qq{%05diam0 22%05d 45 }; +#my $leader_fmt = qq{%05diam0 22%05d 45 }; +# MARC leader format +my $leader_fmt = qq{%05dcas 22%05d a 4500}; my $sth = $dbh->prepare("SELECT marc FROM tblbib"); $sth->execute(); + +my $count = 0; +my $rec_nr = 0; + while (my $row = $sth->fetchrow_hashref()) { my $marc = $row->{'marc'}; + $rec_nr++; my $real_len = length($marc); + my $skip = 0; # skip this record? + # fix PhpMyLibrary MARC (why do I have to do this? It's MARC, # for gaddem sake!!! @@ -83,9 +94,77 @@ # (xxxxx)nam 22(.....) 450 <--- $marc =~ m/^(.....)......(.....)polerioj/ || die "record: '$marc' unparsable!"; my ($reclen,$base_addr) = ($1,$2); - my $leader = sprintf($leader_fmt,length($marc)+1,$base_addr); - print $leader . substr($marc,24).chr(29); + my $directory = substr($marc,24,$base_addr-24); + my $fields = substr($marc,$base_addr-1); + + print STDERR "# $rec_nr fields: '$fields'\n" if ($debug); + print STDERR "# $rec_nr directory: [",length($directory),"]\n" if ($debug); + + # PhpMyLibrary MARC records don't have indicators, so we'll add them + + my $o = 0; # offset + my $new_dictionary; + my $new_fields; + + while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) { + my ($tag,$len,$addr) = ($1,$2,$3); + + print STDERR "tag/len/addr: $tag $len $addr\n" if ($debug); + + sub check_field($) { + my $f = shift; + my $del = substr($f,0,1); + + die "expected 0x1e, got '$del' (".ord($del)."): '$f'" if (ord($del) != 30); + } + + if (($addr+$len) > length($fields)) { + print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if ($debug); + $skip = 1; + next; + } + + # take field + my $f = substr($fields,$addr,$len); + print STDERR "data $tag [$len] $addr: '$f'\n" if ($debug); + + my $del = substr($fields,$addr+$len,1); + + # check field delimiters... + if ($del ne chr(30)) { + print STDERR "WARNING: skipping record $rec_nr, can't find delimiters got: '$del'\n" if ($debug); + $skip = 1; + next; + } + + check_field($f); + + if ($tag =~ m/^00/) { + # fields 001-008 doesn't have indicators + $new_dictionary .= sprintf("%03d%04d%05d",$tag,$len,$addr); + $new_fields.=$f; + } else { + $new_dictionary .= sprintf("%03d%04d%05d",$tag,($len+2),($addr+$o)); + $new_fields.=chr(30)." ".substr($f,1); + $o += 2; + } + } + + if (! $skip) { + my $new_leader = sprintf($leader_fmt,24+length($new_dictionary.$new_fields)+2,$base_addr); + my $new_marc = $new_leader . $new_dictionary . $new_fields . chr(30); + $new_marc .= chr(29); # end of record + + print STDERR "original and new marc: [$rec_nr]\n$marc\n$new_marc\n\n" if ($debug); + print "$new_marc"; + $count++; + } + +# last if ($count > 100); + } $sth->finish(); $dbh->disconnect(); + +print STDERR "$count records from database $database converted...\n";