--- trunk/IsisDB.pm 2004/12/30 22:40:53 18 +++ trunk/IsisDB.pm 2005/01/01 22:29:35 27 @@ -9,7 +9,7 @@ BEGIN { use Exporter (); use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); - $VERSION = 0.07; + $VERSION = 0.08; @ISA = qw (Exporter); #Give a hoot don't pollute, do not export more than needed by default @EXPORT = qw (); @@ -37,7 +37,8 @@ =head1 DESCRIPTION This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or -IsisMarc. It can be used as perl-only alternative to OpenIsis module. +IsisMarc. It can be used as perl-only alternative to OpenIsis module which +seems to depriciate it's old C bindings for perl. It can create hash values from data in ISIS database (using C), ASCII dump (using C) or just hash with field names and packed @@ -50,7 +51,7 @@ It also has support for identifiers (only if ISIS database is created by IsisMarc), see C. -This will module will always be slower than OpenIsis module which use C +This module will always be slower than OpenIsis module which use C library. However, since it's written in perl, it's platform independent (so you don't need C compiler), and can be easily modified. I hope that it creates data structures which are easier to use than ones created by @@ -148,7 +149,7 @@ push @must_exist, "fdt" if ($self->{read_fdt}); foreach my $ext (@must_exist) { - confess "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"}); + croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"}); } print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug}); @@ -214,17 +215,20 @@ =head2 read_cnt -This function is not really used by module, but can be useful to find info -about your index (if debugging it for example). +Read content of C<.CNT> file and return hash containing it. print Dumper($isis->read_cnt); +This function is not used by module (C<.CNT> files are not required for this +module to work), but it can be useful to examine your index (while debugging +for example). + =cut sub read_cnt { my $self = shift; - confess "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file}); + croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file}); # Get the index information from $db.CNT @@ -296,7 +300,7 @@ # is mfn allready in memory? my $old_mfn = $self->{'current_mfn'} || -1; - return if ($mfn == $old_mfn); + return $self->{record} if ($mfn == $old_mfn); print STDERR "## fetch: $mfn\n" if ($self->{debug}); @@ -308,18 +312,30 @@ my $buff; + # delete old record + delete $self->{record}; + # read XRFMFB abd XRFMFP read($self->{'fileXRF'}, $buff, 4); my $pointer=unpack("l",$buff) || carp "pointer is null"; + # check for logically deleted record + if ($pointer < 0) { + print STDERR "## record $mfn is logically deleted\n" if ($self->{debug}); + $self->{deleted} = $mfn; + + return unless $self->{include_deleted}; + + $pointer = abs($pointer); + } + my $XRFMFB = int($pointer/2048); my $XRFMFP = $pointer - ($XRFMFB*2048); - # (XRFMFB - 1) * 512 + XRFMFP # why do i have to do XRFMFP % 1024 ? - my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 1024); + my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512); print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'}); @@ -333,16 +349,15 @@ print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug}); if ($value!=$mfn) { - carp "Error: MFN ".$mfn." not found in MST(".$value.")"; - #return; # XXX deleted record? - } + if ($value == 0) { + print STDERR "## record $mfn is physically deleted\n" if ($self->{debug}); + $self->{deleted} = $mfn; + return; + } -# $MFRL=$self->Read16($fileMST); -# $MFBWB=$self->Read32($fileMST); -# $MFBWP=$self->Read16($fileMST); -# $BASE=$self->Read16($fileMST); -# $NVF=$self->Read16($fileMST); -# $STATUS=$self->Read16($fileMST); + carp "Error: MFN ".$mfn." not found in MST file, found $value"; + return; + } read($self->{'fileMST'}, $buff, 14); @@ -350,14 +365,7 @@ print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug}); - # delete old record - delete $self->{record}; - - ## FIXME this is a bug - if (! $self->{'include_deleted'} && $MFRL < 0) { - print "## logically deleted record $mfn, skipping...\n" if ($self->{debug}); - return; - } + warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0); warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF); @@ -373,10 +381,6 @@ for (my $i = 0 ; $i < $NVF ; $i++) { -# $TAG=$self->Read16($fileMST); -# $POS=$self->Read16($fileMST); -# $LEN=$self->Read16($fileMST); - my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6)); print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug}); @@ -413,18 +417,18 @@ $self->{'current_mfn'} = $mfn; - print Dumper($self),"\n" if ($self->{debug}); + print STDERR Dumper($self),"\n" if ($self->{debug}); return $self->{'record'}; } =head2 to_ascii -Dump ASCII output of record with specified MFN +Returns ASCII output of record with specified MFN print $isis->to_ascii(42); -It outputs something like this: +This outputs something like this: 210 ^aNew York^cNew York University press^dcop. 1988 990 2140 @@ -461,7 +465,7 @@ my $hash = $isis->to_hash($mfn); -It has ability to convert characters (using C from ISIS +It has ability to convert characters (using C) from ISIS database before creating structures enabling character re-mapping or quick fix-up of data. @@ -520,7 +524,7 @@ my $val; # has identifiers? - ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])//); + ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/); # has subfields? if ($l =~ m/\^/) { @@ -557,7 +561,18 @@ =head1 BUGS -This module has been very lightly tested. Use with caution and report bugs. +Some parts of CDS/ISIS documentation are not detailed enough to exmplain +some variations in input databases which has been tested with this module. +When I was in doubt, I assumed that OpenIsis's implementation was right +(except for obvious bugs). + +However, every effort has been made to test this module with as much +databases (and programs that create them) as possible. + +I would be very greatful for success or failure reports about usage of this +module with databases from programs other than WinIsis and IsisMarc. I had +tested this against ouput of one C-based application, but I don't +know any details about it's version. =head1 AUTHOR