lib/Biblio/Isis.pm

package Biblio::Isis;
use strict;

use Carp;
use File::Glob qw(:globally :nocase);

use Data::Dumper;

BEGIN {
        use Exporter ();
        use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
        $VERSION     = 0.12;
        @ISA         = qw (Exporter);
        #Give a hoot don't pollute, do not export more than needed by default
        @EXPORT      = qw ();
        @EXPORT_OK   = qw ();
        %EXPORT_TAGS = ();

}

=head1 NAME

Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database

=head1 SYNOPSIS

  use Biblio::Isis;

  my $isis = new Biblio::Isis(
        isisdb => './cds/cds',
  );

  for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
        print $isis->to_ascii($mfn),"\n";
  }

=head1 DESCRIPTION

This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
IsisMarc. It can be used as perl-only alternative to OpenIsis module which
seems to depriciate it's old C<XS> bindings for perl.

It can create hash values from data in ISIS database (using C<to_hash>),
ASCII dump (using C<to_ascii>) or just hash with field names and packed
values (like C<^asomething^belse>).

Unique feature of this module is ability to C<include_deleted> records.
It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
fields which are zero sized will be filled with random junk from memory).

It also has support for identifiers (only if ISIS database is created by
IsisMarc), see C<to_hash>.

This module will always be slower than OpenIsis module which use C
library. However, since it's written in perl, it's platform independent (so
you don't need C compiler), and can be easily modified. I hope that it
creates data structures which are easier to use than ones created by
OpenIsis, so reduced time in other parts of the code should compensate for
slower performance of this module (speed of reading ISIS database is
rarely an issue).

=head1 METHODS

=cut

#  my $ORDN;            # Nodes Order
#  my $ORDF;            # Leafs Order
#  my $N;               # Number of Memory buffers for nodes
#  my $K;               # Number of buffers for first level index
#  my $LIV;             # Current number of Index Levels
#  my $POSRX;           # Pointer to Root Record in N0x
#  my $NMAXPOS;         # Next Available position in N0x
#  my $FMAXPOS;         # Next available position in L0x
#  my $ABNORMAL;        # Formal BTree normality indicator

#
# some binary reads
#

=head2 new

Open ISIS database

 my $isis = new Biblio::Isis(
        isisdb => './cds/cds',
        read_fdt => 1,
        include_deleted => 1,
        hash_filter => sub {
                my $v = shift;
                $v =~ s#foo#bar#g;
        },
        debug => 1,
 );

Options are described below:

=over 5

=item isisdb

This is full or relative path to ISIS database files which include
common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
C<read_fdt> option) files.

In this example it uses C<./cds/cds.MST> and related files.

=item read_fdt

Boolean flag to specify if field definition table should be read. It's off
by default.

=item include_deleted

Don't skip logically deleted records in ISIS.

=item hash_filter

Filter code ref which will be used before data is converted to hash.

=item debug

Dump a B<lot> of debugging output.

=back

=cut

sub new {
        my $class = shift;
        my $self = {};
        bless($self, $class);

        croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});

        foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
                $self->{$v} = {@_}->{$v};
        }

        my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));

        foreach my $f (@isis_files) {
                my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
                $self->{lc($ext)."_file"} = $f;
        }

        my @must_exist = qw(mst xrf);
        push @must_exist, "fdt" if ($self->{read_fdt});

        foreach my $ext (@must_exist) {
                unless ($self->{$ext."_file"}) {
                        carp "missing ",uc($ext)," file in ",$self->{isisdb};
                        return;
                }
        }

        print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});

        # if you want to read .FDT file use read_fdt argument when creating class!
        if ($self->{read_fdt} && -e $self->{fdt_file}) {

                # read the $db.FDT file for tags
                my $fieldzone=0;

                open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
                binmode($fileFDT);

                while (<$fileFDT>) {
                        chomp;
                        if ($fieldzone) {
                                my $name=substr($_,0,30);
                                my $tag=substr($_,50,3);

                                $name =~ s/\s+$//;
                                $tag =~ s/\s+$//;

                                $self->{'TagName'}->{$tag}=$name;  
                        }

                        if (/^\*\*\*/) {
                                $fieldzone=1;
                        }
                }
                
                close($fileFDT);
        }

        # Get the Maximum MFN from $db.MST

        open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
        binmode($self->{'fileMST'});

        # MST format:   (* = 32 bit signed)
        # CTLMFN*       always 0
        # NXTMFN*       MFN to be assigned to the next record created
        # NXTMFB*       last block allocated to master file
        # NXTMFP        offset to next available position in last block
        # MFTYPE        always 0 for user db file (1 for system)
        seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";

        my $buff;

        read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
        $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";

        print STDERR Dumper($self),"\n" if ($self->{debug});

        # open files for later
        open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
        binmode($self->{'fileXRF'});

        $self ? return $self : return undef;
}

=head2 count

Return number of records in database

  print $isis->count;

=cut

sub count {
        my $self = shift;
        return $self->{'NXTMFN'} - 1;
}

=head2 fetch

Read record with selected MFN

  my $rec = $isis->fetch(55);

Returns hash with keys which are field names and values are unpacked values
for that field like this:

  $rec = {
    '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
    '990' => [ '2140', '88', 'HAY' ],
  };

=cut

sub fetch {
        my $self = shift;

        my $mfn = shift || croak "fetch needs MFN as argument!";

        # is mfn allready in memory?
        my $old_mfn = $self->{'current_mfn'} || -1;
        return $self->{record} if ($mfn == $old_mfn);

        print STDERR "## fetch: $mfn\n" if ($self->{debug});

        # XXX check this?
        my $mfnpos=($mfn+int(($mfn-1)/127))*4;

        print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
        seek($self->{'fileXRF'},$mfnpos,0);

        my $buff;

        # delete old record
        delete $self->{record};

        # read XRFMFB abd XRFMFP
        read($self->{'fileXRF'}, $buff, 4);
        my $pointer=unpack("V",$buff) || croak "pointer is null";

        # check for logically deleted record
        if ($pointer & 0x80000000) {
                print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
                $self->{deleted} = $mfn;

                return unless $self->{include_deleted};

                # abs
                $pointer = ($pointer ^ 0xffffffff) + 1;
        }

        my $XRFMFB = int($pointer/2048);
        my $XRFMFP = $pointer - ($XRFMFB*2048);

        # (XRFMFB - 1) * 512 + XRFMFP
        # why do i have to do XRFMFP % 1024 ?

        my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);

        print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});

        # Get Record Information

        seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";

        read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
        my $value=unpack("V",$buff);

        print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});

        if ($value!=$mfn) {
                if ($value == 0) {
                        print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
                        $self->{deleted} = $mfn;
                        return;
                }

                carp "Error: MFN ".$mfn." not found in MST file, found $value";    
                return;
        }

        read($self->{'fileMST'}, $buff, 14);

        my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);

        print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});

        warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);

        warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);

        # Get Directory Format

        my @FieldPOS;
        my @FieldLEN;
        my @FieldTAG;

        read($self->{'fileMST'}, $buff, 6 * $NVF);

        my $rec_len = 0;

        for (my $i = 0 ; $i < $NVF ; $i++) {

                my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));

                print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});

                # The TAG does not exists in .FDT so we set it to 0.
                #
                # XXX This is removed from perl version; .FDT file is updated manually, so
                # you will often have fields in .MST file which aren't in .FDT. On the other
                # hand, IsisMarc doesn't use .FDT files at all!

                #if (! $self->{TagName}->{$TAG}) {
                #       $TAG=0;
                #}

                push @FieldTAG,$TAG;
                push @FieldPOS,$POS;
                push @FieldLEN,$LEN;

                $rec_len += $LEN;
        }

        # Get Variable Fields

        read($self->{'fileMST'},$buff,$rec_len);

        print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});

        for (my $i = 0 ; $i < $NVF ; $i++) {
                # skip zero-sized fields
                next if ($FieldLEN[$i] == 0);

                push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
        }

        $self->{'current_mfn'} = $mfn;

        print STDERR Dumper($self),"\n" if ($self->{debug});

        return $self->{'record'};
}

=head2 to_ascii

Returns ASCII output of record with specified MFN

  print $isis->to_ascii(42);

This outputs something like this:

  210   ^aNew York^cNew York University press^dcop. 1988
  990   2140
  990   88
  990   HAY

If C<read_fdt> is specified when calling C<new> it will display field names
from C<.FDT> file instead of numeric tags.

=cut

sub to_ascii {
        my $self = shift;

        my $mfn = shift || croak "need MFN";

        my $rec = $self->fetch($mfn);

        my $out = "0\t$mfn";

        foreach my $f (sort keys %{$rec}) {
                my $fn = $self->tag_name($f);
                $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
        }

        $out .= "\n";

        return $out;
}

=head2 to_hash

Read record with specified MFN and convert it to hash

  my $hash = $isis->to_hash($mfn);

It has ability to convert characters (using C<hash_filter>) from ISIS
database before creating structures enabling character re-mapping or quick
fix-up of data.

This function returns hash which is like this:

  $hash = {
    '210' => [
               {
                 'c' => 'New York University press',
                 'a' => 'New York',
                 'd' => 'cop. 1988'
               }
             ],
    '990' => [
               '2140',
               '88',
               'HAY'
             ],
  };

You can later use that hash to produce any output from ISIS data.

If database is created using IsisMarc, it will also have to special fields
which will be used for identifiers, C<i1> and C<i2> like this:

  '200' => [
             {
               'i1' => '1',
               'i2' => ' '
               'a' => 'Goa',
               'f' => 'Valdo D\'Arienzo',
               'e' => 'tipografie e tipografi nel XVI secolo',
             }
           ],

This method will also create additional field C<000> with MFN.

=cut

sub to_hash {
        my $self = shift;

        my $mfn = shift || confess "need mfn!";

        # init record to include MFN as field 000
        my $rec = { '000' => [ $mfn ] };

        my $row = $self->fetch($mfn);

        foreach my $k (keys %{$row}) {
                foreach my $l (@{$row->{$k}}) {

                        # filter output
                        $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});

                        my $val;

                        # has identifiers?
                        ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);

                        # has subfields?
                        if ($l =~ m/\^/) {
                                foreach my $t (split(/\^/,$l)) {
                                        next if (! $t);
                                        $val->{substr($t,0,1)} = substr($t,1);
                                }
                        } else {
                                $val = $l;
                        }

                        push @{$rec->{$k}}, $val;
                }
        }

        return $rec;
}

=head2 tag_name

Return name of selected tag

 print $isis->tag_name('200');

=cut

sub tag_name {
        my $self = shift;
        my $tag = shift || return;
        return $self->{'TagName'}->{$tag} || $tag;
}


=head2 read_cnt

Read content of C<.CNT> file and return hash containing it.

  print Dumper($isis->read_cnt);

This function is not used by module (C<.CNT> files are not required for this
module to work), but it can be useful to examine your index (while debugging
for example).

=cut

sub read_cnt  {
        my $self = shift;

        croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});

        # Get the index information from $db.CNT
   
        open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
        binmode($fileCNT);

        my $buff;

        read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
        $self->unpack_cnt($buff);

        read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
        $self->unpack_cnt($buff);

        close($fileCNT);

        return $self->{cnt};
}

=head2 unpack_cnt

Unpack one of two 26 bytes fixed length record in C<.CNT> file.

Here is definition of record:

 off key        description                             size
  0: IDTYPE     BTree type                              s
  2: ORDN       Nodes Order                             s
  4: ORDF       Leafs Order                             s
  6: N          Number of Memory buffers for nodes      s
  8: K          Number of buffers for first level index s
 10: LIV        Current number of Index Levels          s
 12: POSRX      Pointer to Root Record in N0x           l
 16: NMAXPOS    Next Available position in N0x          l
 20: FMAXPOS    Next available position in L0x          l
 24: ABNORMAL   Formal BTree normality indicator        s
 length: 26 bytes

This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.

=cut

sub unpack_cnt {
        my $self = shift;

        my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);

        my $buff = shift || return;
        my @arr = unpack("vvvvvvVVVv", $buff);

        print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});

        my $IDTYPE = shift @arr;
        foreach (@flds) {
                $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
        }
}

1;

=head1 BUGS

Some parts of CDS/ISIS documentation are not detailed enough to exmplain
some variations in input databases which has been tested with this module.
When I was in doubt, I assumed that OpenIsis's implementation was right
(except for obvious bugs).

However, every effort has been made to test this module with as much
databases (and programs that create them) as possible.

I would be very greatful for success or failure reports about usage of this
module with databases from programs other than WinIsis and IsisMarc. I had
tested this against ouput of one C<isis.dll>-based application, but I don't
know any details about it's version.

=head1 AUTHOR

        Dobrica Pavlinusic
        CPAN ID: DPAVLIN
        dpavlin@rot13.org
        http://www.rot13.org/~dpavlin/

This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.

=head1 COPYRIGHT

This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

The full text of the license can be found in the
LICENSE file included with this module.


=head1 SEE ALSO

OpenIsis web site L<http://www.openisis.org>

perl4lib site L<http://perl4lib.perl.org>

1	package Biblio::Isis;
2	use strict;
3
4	use Carp;
5	use File::Glob qw(:globally :nocase);
6
7	use Data::Dumper;
8
9	BEGIN {
10	use Exporter ();
11	use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12	$VERSION = 0.12;
13	@ISA = qw (Exporter);
14	#Give a hoot don't pollute, do not export more than needed by default
15	@EXPORT = qw ();
16	@EXPORT_OK = qw ();
17	%EXPORT_TAGS = ();
18
19	}
20
21	=head1 NAME
22
23	Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
24
25	=head1 SYNOPSIS
26
27	use Biblio::Isis;
28
29	my $isis = new Biblio::Isis(
30	isisdb => './cds/cds',
31	);
32
33	for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
34	print $isis->to_ascii($mfn),"\n";
35	}
36
37	=head1 DESCRIPTION
38
39	This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
40	IsisMarc. It can be used as perl-only alternative to OpenIsis module which
41	seems to depriciate it's old C<XS> bindings for perl.
42
43	It can create hash values from data in ISIS database (using C<to_hash>),
44	ASCII dump (using C<to_ascii>) or just hash with field names and packed
45	values (like C<^asomething^belse>).
46
47	Unique feature of this module is ability to C<include_deleted> records.
48	It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
49	fields which are zero sized will be filled with random junk from memory).
50
51	It also has support for identifiers (only if ISIS database is created by
52	IsisMarc), see C<to_hash>.
53
54	This module will always be slower than OpenIsis module which use C
55	library. However, since it's written in perl, it's platform independent (so
56	you don't need C compiler), and can be easily modified. I hope that it
57	creates data structures which are easier to use than ones created by
58	OpenIsis, so reduced time in other parts of the code should compensate for
59	slower performance of this module (speed of reading ISIS database is
60	rarely an issue).
61
62	=head1 METHODS
63
64	=cut
65
66	# my $ORDN; # Nodes Order
67	# my $ORDF; # Leafs Order
68	# my $N; # Number of Memory buffers for nodes
69	# my $K; # Number of buffers for first level index
70	# my $LIV; # Current number of Index Levels
71	# my $POSRX; # Pointer to Root Record in N0x
72	# my $NMAXPOS; # Next Available position in N0x
73	# my $FMAXPOS; # Next available position in L0x
74	# my $ABNORMAL; # Formal BTree normality indicator
75
76	#
77	# some binary reads
78	#
79
80	=head2 new
81
82	Open ISIS database
83
84	my $isis = new Biblio::Isis(
85	isisdb => './cds/cds',
86	read_fdt => 1,
87	include_deleted => 1,
88	hash_filter => sub {
89	my $v = shift;
90	$v =~ s#foo#bar#g;
91	},
92	debug => 1,
93	);
94
95	Options are described below:
96
97	=over 5
98
99	=item isisdb
100
101	This is full or relative path to ISIS database files which include
102	common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
103	C<read_fdt> option) files.
104
105	In this example it uses C<./cds/cds.MST> and related files.
106
107	=item read_fdt
108
109	Boolean flag to specify if field definition table should be read. It's off
110	by default.
111
112	=item include_deleted
113
114	Don't skip logically deleted records in ISIS.
115
116	=item hash_filter
117
118	Filter code ref which will be used before data is converted to hash.
119
120	=item debug
121
122	Dump a B<lot> of debugging output.
123
124	=back
125
126	=cut
127
128	sub new {
129	my $class = shift;
130	my $self = {};
131	bless($self, $class);
132
133	croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
134
135	foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
136	$self->{$v} = {@_}->{$v};
137	}
138
139	my @isis_files = grep(/\.(FDT\|MST\|XRF\|CNT)$/i,glob($self->{isisdb}."*"));
140
141	foreach my $f (@isis_files) {
142	my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
143	$self->{lc($ext)."_file"} = $f;
144	}
145
146	my @must_exist = qw(mst xrf);
147	push @must_exist, "fdt" if ($self->{read_fdt});
148
149	foreach my $ext (@must_exist) {
150	unless ($self->{$ext."_file"}) {
151	carp "missing ",uc($ext)," file in ",$self->{isisdb};
152	return;
153	}
154	}
155
156	print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});
157
158	# if you want to read .FDT file use read_fdt argument when creating class!
159	if ($self->{read_fdt} && -e $self->{fdt_file}) {
160
161	# read the $db.FDT file for tags
162	my $fieldzone=0;
163
164	open(my $fileFDT, $self->{fdt_file}) \|\| croak "can't read '$self->{fdt_file}': $!";
165	binmode($fileFDT);
166
167	while (<$fileFDT>) {
168	chomp;
169	if ($fieldzone) {
170	my $name=substr($_,0,30);
171	my $tag=substr($_,50,3);
172
173	$name =~ s/\s+$//;
174	$tag =~ s/\s+$//;
175
176	$self->{'TagName'}->{$tag}=$name;
177	}
178
179	if (/^\\\*/) {
180	$fieldzone=1;
181	}
182	}
183
184	close($fileFDT);
185	}
186
187	# Get the Maximum MFN from $db.MST
188
189	open($self->{'fileMST'}, $self->{mst_file}) \|\| croak "can't open '$self->{mst_file}': $!";
190	binmode($self->{'fileMST'});
191
192	# MST format: (* = 32 bit signed)
193	# CTLMFN* always 0
194	# NXTMFN* MFN to be assigned to the next record created
195	# NXTMFB* last block allocated to master file
196	# NXTMFP offset to next available position in last block
197	# MFTYPE always 0 for user db file (1 for system)
198	seek($self->{'fileMST'},4,0) \|\| croak "can't seek to offset 0 in MST: $!";
199
200	my $buff;
201
202	read($self->{'fileMST'}, $buff, 4) \|\| croak "can't read NXTMFN from MST: $!";
203	$self->{'NXTMFN'}=unpack("V",$buff) \|\| croak "NXTNFN is zero";
204
205	print STDERR Dumper($self),"\n" if ($self->{debug});
206
207	# open files for later
208	open($self->{'fileXRF'}, $self->{xrf_file}) \|\| croak "can't open '$self->{xrf_file}': $!";
209	binmode($self->{'fileXRF'});
210
211	$self ? return $self : return undef;
212	}
213
214	=head2 count
215
216	Return number of records in database
217
218	print $isis->count;
219
220	=cut
221
222	sub count {
223	my $self = shift;
224	return $self->{'NXTMFN'} - 1;
225	}
226
227	=head2 fetch
228
229	Read record with selected MFN
230
231	my $rec = $isis->fetch(55);
232
233	Returns hash with keys which are field names and values are unpacked values
234	for that field like this:
235
236	$rec = {
237	'210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
238	'990' => [ '2140', '88', 'HAY' ],
239	};
240
241	=cut
242
243	sub fetch {
244	my $self = shift;
245
246	my $mfn = shift \|\| croak "fetch needs MFN as argument!";
247
248	# is mfn allready in memory?
249	my $old_mfn = $self->{'current_mfn'} \|\| -1;
250	return $self->{record} if ($mfn == $old_mfn);
251
252	print STDERR "## fetch: $mfn\n" if ($self->{debug});
253
254	# XXX check this?
255	my $mfnpos=($mfn+int(($mfn-1)/127))*4;
256
257	print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
258	seek($self->{'fileXRF'},$mfnpos,0);
259
260	my $buff;
261
262	# delete old record
263	delete $self->{record};
264
265	# read XRFMFB abd XRFMFP
266	read($self->{'fileXRF'}, $buff, 4);
267	my $pointer=unpack("V",$buff) \|\| croak "pointer is null";
268
269	# check for logically deleted record
270	if ($pointer & 0x80000000) {
271	print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
272	$self->{deleted} = $mfn;
273
274	return unless $self->{include_deleted};
275
276	# abs
277	$pointer = ($pointer ^ 0xffffffff) + 1;
278	}
279
280	my $XRFMFB = int($pointer/2048);
281	my $XRFMFP = $pointer - ($XRFMFB*2048);
282
283	# (XRFMFB - 1) * 512 + XRFMFP
284	# why do i have to do XRFMFP % 1024 ?
285
286	my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
287
288	print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
289
290	# Get Record Information
291
292	seek($self->{'fileMST'},$blk_off,0) \|\| croak "can't seek to $blk_off: $!";
293
294	read($self->{'fileMST'}, $buff, 4) \|\| croak "can't read 4 bytes at offset $blk_off from MST file: $!";
295	my $value=unpack("V",$buff);
296
297	print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
298
299	if ($value!=$mfn) {
300	if ($value == 0) {
301	print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
302	$self->{deleted} = $mfn;
303	return;
304	}
305
306	carp "Error: MFN ".$mfn." not found in MST file, found $value";
307	return;
308	}
309
310	read($self->{'fileMST'}, $buff, 14);
311
312	my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
313
314	print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
315
316	warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
317
318	warn "BASE is not 18+6NVF" unless ($BASE == 18 + 6 $NVF);
319
320	# Get Directory Format
321
322	my @FieldPOS;
323	my @FieldLEN;
324	my @FieldTAG;
325
326	read($self->{'fileMST'}, $buff, 6 * $NVF);
327
328	my $rec_len = 0;
329
330	for (my $i = 0 ; $i < $NVF ; $i++) {
331
332	my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
333
334	print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
335
336	# The TAG does not exists in .FDT so we set it to 0.
337	#
338	# XXX This is removed from perl version; .FDT file is updated manually, so
339	# you will often have fields in .MST file which aren't in .FDT. On the other
340	# hand, IsisMarc doesn't use .FDT files at all!
341
342	#if (! $self->{TagName}->{$TAG}) {
343	# $TAG=0;
344	#}
345
346	push @FieldTAG,$TAG;
347	push @FieldPOS,$POS;
348	push @FieldLEN,$LEN;
349
350	$rec_len += $LEN;
351	}
352
353	# Get Variable Fields
354
355	read($self->{'fileMST'},$buff,$rec_len);
356
357	print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
358
359	for (my $i = 0 ; $i < $NVF ; $i++) {
360	# skip zero-sized fields
361	next if ($FieldLEN[$i] == 0);
362
363	push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
364	}
365
366	$self->{'current_mfn'} = $mfn;
367
368	print STDERR Dumper($self),"\n" if ($self->{debug});
369
370	return $self->{'record'};
371	}
372
373	=head2 to_ascii
374
375	Returns ASCII output of record with specified MFN
376
377	print $isis->to_ascii(42);
378
379	This outputs something like this:
380
381	210 ^aNew York^cNew York University press^dcop. 1988
382	990 2140
383	990 88
384	990 HAY
385
386	If C<read_fdt> is specified when calling C<new> it will display field names
387	from C<.FDT> file instead of numeric tags.
388
389	=cut
390
391	sub to_ascii {
392	my $self = shift;
393
394	my $mfn = shift \|\| croak "need MFN";
395
396	my $rec = $self->fetch($mfn);
397
398	my $out = "0\t$mfn";
399
400	foreach my $f (sort keys %{$rec}) {
401	my $fn = $self->tag_name($f);
402	$out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
403	}
404
405	$out .= "\n";
406
407	return $out;
408	}
409
410	=head2 to_hash
411
412	Read record with specified MFN and convert it to hash
413
414	my $hash = $isis->to_hash($mfn);
415
416	It has ability to convert characters (using C<hash_filter>) from ISIS
417	database before creating structures enabling character re-mapping or quick
418	fix-up of data.
419
420	This function returns hash which is like this:
421
422	$hash = {
423	'210' => [
424	{
425	'c' => 'New York University press',
426	'a' => 'New York',
427	'd' => 'cop. 1988'
428	}
429	],
430	'990' => [
431	'2140',
432	'88',
433	'HAY'
434	],
435	};
436
437	You can later use that hash to produce any output from ISIS data.
438
439	If database is created using IsisMarc, it will also have to special fields
440	which will be used for identifiers, C<i1> and C<i2> like this:
441
442	'200' => [
443	{
444	'i1' => '1',
445	'i2' => ' '
446	'a' => 'Goa',
447	'f' => 'Valdo D\'Arienzo',
448	'e' => 'tipografie e tipografi nel XVI secolo',
449	}
450	],
451
452	This method will also create additional field C<000> with MFN.
453
454	=cut
455
456	sub to_hash {
457	my $self = shift;
458
459	my $mfn = shift \|\| confess "need mfn!";
460
461	# init record to include MFN as field 000
462	my $rec = { '000' => [ $mfn ] };
463
464	my $row = $self->fetch($mfn);
465
466	foreach my $k (keys %{$row}) {
467	foreach my $l (@{$row->{$k}}) {
468
469	# filter output
470	$l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
471
472	my $val;
473
474	# has identifiers?
475	($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
476
477	# has subfields?
478	if ($l =~ m/\^/) {
479	foreach my $t (split(/\^/,$l)) {
480	next if (! $t);
481	$val->{substr($t,0,1)} = substr($t,1);
482	}
483	} else {
484	$val = $l;
485	}
486
487	push @{$rec->{$k}}, $val;
488	}
489	}
490
491	return $rec;
492	}
493
494	=head2 tag_name
495
496	Return name of selected tag
497
498	print $isis->tag_name('200');
499
500	=cut
501
502	sub tag_name {
503	my $self = shift;
504	my $tag = shift \|\| return;
505	return $self->{'TagName'}->{$tag} \|\| $tag;
506	}
507
508
509	=head2 read_cnt
510
511	Read content of C<.CNT> file and return hash containing it.
512
513	print Dumper($isis->read_cnt);
514
515	This function is not used by module (C<.CNT> files are not required for this
516	module to work), but it can be useful to examine your index (while debugging
517	for example).
518
519	=cut
520
521	sub read_cnt {
522	my $self = shift;
523
524	croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
525
526	# Get the index information from $db.CNT
527
528	open(my $fileCNT, $self->{cnt_file}) \|\| croak "can't read '$self->{cnt_file}': $!";
529	binmode($fileCNT);
530
531	my $buff;
532
533	read($fileCNT, $buff, 26) \|\| croak "can't read first table from CNT: $!";
534	$self->unpack_cnt($buff);
535
536	read($fileCNT, $buff, 26) \|\| croak "can't read second table from CNT: $!";
537	$self->unpack_cnt($buff);
538
539	close($fileCNT);
540
541	return $self->{cnt};
542	}
543
544	=head2 unpack_cnt
545
546	Unpack one of two 26 bytes fixed length record in C<.CNT> file.
547
548	Here is definition of record:
549
550	off key description size
551	0: IDTYPE BTree type s
552	2: ORDN Nodes Order s
553	4: ORDF Leafs Order s
554	6: N Number of Memory buffers for nodes s
555	8: K Number of buffers for first level index s
556	10: LIV Current number of Index Levels s
557	12: POSRX Pointer to Root Record in N0x l
558	16: NMAXPOS Next Available position in N0x l
559	20: FMAXPOS Next available position in L0x l
560	24: ABNORMAL Formal BTree normality indicator s
561	length: 26 bytes
562
563	This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
564
565	=cut
566
567	sub unpack_cnt {
568	my $self = shift;
569
570	my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
571
572	my $buff = shift \|\| return;
573	my @arr = unpack("vvvvvvVVVv", $buff);
574
575	print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
576
577	my $IDTYPE = shift @arr;
578	foreach (@flds) {
579	$self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
580	}
581	}
582
583	1;
584
585	=head1 BUGS
586
587	Some parts of CDS/ISIS documentation are not detailed enough to exmplain
588	some variations in input databases which has been tested with this module.
589	When I was in doubt, I assumed that OpenIsis's implementation was right
590	(except for obvious bugs).
591
592	However, every effort has been made to test this module with as much
593	databases (and programs that create them) as possible.
594
595	I would be very greatful for success or failure reports about usage of this
596	module with databases from programs other than WinIsis and IsisMarc. I had
597	tested this against ouput of one C<isis.dll>-based application, but I don't
598	know any details about it's version.
599
600	=head1 AUTHOR
601
602	Dobrica Pavlinusic
603	CPAN ID: DPAVLIN
604	dpavlin@rot13.org
605	http://www.rot13.org/~dpavlin/
606
607	This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
608	written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
609
610	=head1 COPYRIGHT
611
612	This program is free software; you can redistribute
613	it and/or modify it under the same terms as Perl itself.
614
615	The full text of the license can be found in the
616	LICENSE file included with this module.
617
618
619	=head1 SEE ALSO
620
621	OpenIsis web site L<http://www.openisis.org>
622
623	perl4lib site L<http://perl4lib.perl.org>
624