Biblio-Isis/trunk/IsisDB.pm

package IsisDB;
use strict;

use Carp;
use File::Glob qw(:globally :nocase);

use Data::Dumper;

BEGIN {
        use Exporter ();
        use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
        $VERSION     = 0.09;
        @ISA         = qw (Exporter);
        #Give a hoot don't pollute, do not export more than needed by default
        @EXPORT      = qw ();
        @EXPORT_OK   = qw ();
        %EXPORT_TAGS = ();

}

=head1 NAME

IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database

=head1 SYNOPSIS

  use IsisDB;

  my $isis = new IsisDB(
        isisdb => './cds/cds',
  );

  for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
        print $isis->to_ascii($mfn),"\n";
  }

=head1 DESCRIPTION

This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
IsisMarc. It can be used as perl-only alternative to OpenIsis module which
seems to depriciate it's old C<XS> bindings for perl.

It can create hash values from data in ISIS database (using C<to_hash>),
ASCII dump (using C<to_ascii>) or just hash with field names and packed
values (like C<^asomething^belse>).

Unique feature of this module is ability to C<include_deleted> records.
It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
fields which are zero sized will be filled with random junk from memory).

It also has support for identifiers (only if ISIS database is created by
IsisMarc), see C<to_hash>.

This module will always be slower than OpenIsis module which use C
library. However, since it's written in perl, it's platform independent (so
you don't need C compiler), and can be easily modified. I hope that it
creates data structures which are easier to use than ones created by
OpenIsis, so reduced time in other parts of the code should compensate for
slower performance of this module (speed of reading ISIS database is
rarely an issue).

=head1 METHODS

=cut

#  my $ORDN;            # Nodes Order
#  my $ORDF;            # Leafs Order
#  my $N;               # Number of Memory buffers for nodes
#  my $K;               # Number of buffers for first level index
#  my $LIV;             # Current number of Index Levels
#  my $POSRX;           # Pointer to Root Record in N0x
#  my $NMAXPOS;         # Next Available position in N0x
#  my $FMAXPOS;         # Next available position in L0x
#  my $ABNORMAL;        # Formal BTree normality indicator

#
# some binary reads
#

=head2 new

Open ISIS database

 my $isis = new IsisDB(
        isisdb => './cds/cds',
        read_fdt => 1,
        include_deleted => 1,
        hash_filter => sub {
                my $v = shift;
                $v =~ s#foo#bar#g;
        },
        debug => 1,
 );

Options are described below:

=over 5

=item isisdb

This is full or relative path to ISIS database files which include
common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
C<read_fdt> option) files.

In this example it uses C<./cds/cds.MST> and related files.

=item read_fdt

Boolean flag to specify if field definition table should be read. It's off
by default.

=item include_deleted

Don't skip logically deleted records in ISIS.

=item hash_filter

Filter code ref which will be used before data is converted to hash.

=item debug

Dump a B<lot> of debugging output.

=back

=cut

sub new {
        my $class = shift;
        my $self = {};
        bless($self, $class);

        croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});

        foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
                $self->{$v} = {@_}->{$v};
        }

        my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));

        foreach my $f (@isis_files) {
                my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
                $self->{lc($ext)."_file"} = $f;
        }

        my @must_exist = qw(mst xrf);
        push @must_exist, "fdt" if ($self->{read_fdt});

        foreach my $ext (@must_exist) {
                croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});
        }

        print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});

        # if you want to read .FDT file use read_fdt argument when creating class!
        if ($self->{read_fdt} && -e $self->{fdt_file}) {

                # read the $db.FDT file for tags
                my $fieldzone=0;

                open(fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";

                while (<fileFDT>) {
                        chomp;
                        if ($fieldzone) {
                                my $name=substr($_,0,30);
                                my $tag=substr($_,50,3);

                                $name =~ s/\s+$//;
                                $tag =~ s/\s+$//;

                                $self->{'TagName'}->{$tag}=$name;  
                        }

                        if (/^\*\*\*/) {
                                $fieldzone=1;
                        }
                }
                
                close(fileFDT);
        }

        # Get the Maximum MFN from $db.MST

        open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";

        # MST format:   (* = 32 bit signed)
        # CTLMFN*       always 0
        # NXTMFN*       MFN to be assigned to the next record created
        # NXTMFB*       last block allocated to master file
        # NXTMFP        offset to next available position in last block
        # MFTYPE        always 0 for user db file (1 for system)
        seek($self->{'fileMST'},4,0);

        my $buff;

        read($self->{'fileMST'}, $buff, 4);
        $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";


        print STDERR Dumper($self),"\n" if ($self->{debug});

        # open files for later
        open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";

        $self ? return $self : return undef;
}

=head2 count

Return number of records in database

  print $isis->count;

=cut

sub count {
        my $self = shift;
        return $self->{'NXTMFN'} - 1;
}

=head2 read_cnt

Read content of C<.CNT> file and return hash containing it.

  print Dumper($isis->read_cnt);

This function is not used by module (C<.CNT> files are not required for this
module to work), but it can be useful to examine your index (while debugging
for example).

=cut

sub read_cnt  {
        my $self = shift;

        croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});

        # Get the index information from $db.CNT
   
        open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";

        my $buff;

        read(fileCNT, $buff, 26);
        $self->unpack_cnt($buff);

        read(fileCNT, $buff, 26);
        $self->unpack_cnt($buff);

        close(fileCNT);

        return $self->{cnt};
}

=head2 unpack_cnt

Unpack one of two 26 bytes fixed length record in C<.CNT> file.

Here is definition of record:

 off key        description                             size
  0: IDTYPE     BTree type                              s
  2: ORDN       Nodes Order                             s
  4: ORDF       Leafs Order                             s
  6: N          Number of Memory buffers for nodes      s
  8: K          Number of buffers for first level index s
 10: LIV        Current number of Index Levels          s
 12: POSRX      Pointer to Root Record in N0x           l
 16: NMAXPOS    Next Available position in N0x          l
 20: FMAXPOS    Next available position in L0x          l
 24: ABNORMAL   Formal BTree normality indicator        s
 length: 26 bytes

This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.

=cut

sub unpack_cnt {
        my $self = shift;

        my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);

        my $buff = shift || return;
        my @arr = unpack("ssssssllls", $buff);

        print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});

        my $IDTYPE = shift @arr;
        foreach (@flds) {
                $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
        }
}

=head2 fetch

Read record with selected MFN

  my $rec = $isis->fetch(55);

Returns hash with keys which are field names and values are unpacked values
for that field like this:

  $rec = {
    '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
    '990' => [ '2140', '88', 'HAY' ],
  };

=cut

sub fetch {
        my $self = shift;

        my $mfn = shift || croak "fetch needs MFN as argument!";

        # is mfn allready in memory?
        my $old_mfn = $self->{'current_mfn'} || -1;
        return $self->{record} if ($mfn == $old_mfn);

        print STDERR "## fetch: $mfn\n" if ($self->{debug});

        # XXX check this?
        my $mfnpos=($mfn+int(($mfn-1)/127))*4;

        print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
        seek($self->{'fileXRF'},$mfnpos,0);

        my $buff;

        # delete old record
        delete $self->{record};

        # read XRFMFB abd XRFMFP
        read($self->{'fileXRF'}, $buff, 4);
        my $pointer=unpack("l",$buff) || carp "pointer is null";

        # check for logically deleted record
        if ($pointer < 0) {
                print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
                $self->{deleted} = $mfn;

                return unless $self->{include_deleted};

                $pointer = abs($pointer);
        }

        my $XRFMFB = int($pointer/2048);
        my $XRFMFP = $pointer - ($XRFMFB*2048);

        # (XRFMFB - 1) * 512 + XRFMFP
        # why do i have to do XRFMFP % 1024 ?

        my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);

        print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});

        # Get Record Information

        seek($self->{'fileMST'},$blk_off,0);

        read($self->{'fileMST'}, $buff, 4);
        my $value=unpack("l",$buff);

        print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});

        if ($value!=$mfn) {
                if ($value == 0) {
                        print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
                        $self->{deleted} = $mfn;
                        return;
                }

                carp "Error: MFN ".$mfn." not found in MST file, found $value";    
                return;
        }

        read($self->{'fileMST'}, $buff, 14);

        my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);

        print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});

        warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);

        warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);

        # Get Directory Format

        my @FieldPOS;
        my @FieldLEN;
        my @FieldTAG;

        read($self->{'fileMST'}, $buff, 6 * $NVF);

        my $rec_len = 0;

        for (my $i = 0 ; $i < $NVF ; $i++) {

                my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));

                print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});

                # The TAG does not exists in .FDT so we set it to 0.
                #
                # XXX This is removed from perl version; .FDT file is updated manually, so
                # you will often have fields in .MST file which aren't in .FDT. On the other
                # hand, IsisMarc doesn't use .FDT files at all!

                #if (! $self->{TagName}->{$TAG}) {
                #       $TAG=0;
                #}

                push @FieldTAG,$TAG;
                push @FieldPOS,$POS;
                push @FieldLEN,$LEN;

                $rec_len += $LEN;
        }

        # Get Variable Fields

        read($self->{'fileMST'},$buff,$rec_len);

        print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});

        for (my $i = 0 ; $i < $NVF ; $i++) {
                # skip zero-sized fields
                next if ($FieldLEN[$i] == 0);

                push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
        }

        $self->{'current_mfn'} = $mfn;

        print STDERR Dumper($self),"\n" if ($self->{debug});

        return $self->{'record'};
}

=head2 to_ascii

Returns ASCII output of record with specified MFN

  print $isis->to_ascii(42);

This outputs something like this:

  210   ^aNew York^cNew York University press^dcop. 1988
  990   2140
  990   88
  990   HAY

If C<read_fdt> is specified when calling C<new> it will display field names
from C<.FDT> file instead of numeric tags.

=cut

sub to_ascii {
        my $self = shift;

        my $mfn = shift || croak "need MFN";

        my $rec = $self->fetch($mfn);

        my $out = "0\t$mfn";

        foreach my $f (sort keys %{$rec}) {
                my $fn = $self->tag_name($f);
                $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
        }

        $out .= "\n";

        return $out;
}

=head2 to_hash

Read record with specified MFN and convert it to hash

  my $hash = $isis->to_hash($mfn);

It has ability to convert characters (using C<hash_filter>) from ISIS
database before creating structures enabling character re-mapping or quick
fix-up of data.

This function returns hash which is like this:

  $hash = {
    '210' => [
               {
                 'c' => 'New York University press',
                 'a' => 'New York',
                 'd' => 'cop. 1988'
               }
             ],
    '990' => [
               '2140',
               '88',
               'HAY'
             ],
  };

You can later use that hash to produce any output from ISIS data.

If database is created using IsisMarc, it will also have to special fields
which will be used for identifiers, C<i1> and C<i2> like this:

  '200' => [
             {
               'i1' => '1',
               'i2' => ' '
               'a' => 'Goa',
               'f' => 'Valdo D\'Arienzo',
               'e' => 'tipografie e tipografi nel XVI secolo',
             }
           ],

This method will also create additional field C<000> with MFN.

=cut

sub to_hash {
        my $self = shift;

        my $mfn = shift || confess "need mfn!";

        # init record to include MFN as field 000
        my $rec = { '000' => [ $mfn ] };

        my $row = $self->fetch($mfn);

        foreach my $k (keys %{$row}) {
                foreach my $l (@{$row->{$k}}) {

                        # filter output
                        $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});

                        my $val;

                        # has identifiers?
                        ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);

                        # has subfields?
                        if ($l =~ m/\^/) {
                                foreach my $t (split(/\^/,$l)) {
                                        next if (! $t);
                                        $val->{substr($t,0,1)} = substr($t,1);
                                }
                        } else {
                                $val = $l;
                        }

                        push @{$rec->{$k}}, $val;
                }
        }

        return $rec;
}

=head2 tag_name

Return name of selected tag

 print $isis->tag_name('200');

=cut

sub tag_name {
        my $self = shift;
        my $tag = shift || return;
        return $self->{'TagName'}->{$tag} || $tag;
}

1;

=head1 BUGS

Some parts of CDS/ISIS documentation are not detailed enough to exmplain
some variations in input databases which has been tested with this module.
When I was in doubt, I assumed that OpenIsis's implementation was right
(except for obvious bugs).

However, every effort has been made to test this module with as much
databases (and programs that create them) as possible.

I would be very greatful for success or failure reports about usage of this
module with databases from programs other than WinIsis and IsisMarc. I had
tested this against ouput of one C<isis.dll>-based application, but I don't
know any details about it's version.

=head1 AUTHOR

        Dobrica Pavlinusic
        CPAN ID: DPAVLIN
        dpavlin@rot13.org
        http://www.rot13.org/~dpavlin/

This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.

=head1 COPYRIGHT

This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

The full text of the license can be found in the
LICENSE file included with this module.


=head1 SEE ALSO

OpenIsis web site L<http://www.openisis.org>

perl4lib site L<http://perl4lib.perl.org>

1	package IsisDB;
2	use strict;
3
4	use Carp;
5	use File::Glob qw(:globally :nocase);
6
7	use Data::Dumper;
8
9	BEGIN {
10	use Exporter ();
11	use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12	$VERSION = 0.09;
13	@ISA = qw (Exporter);
14	#Give a hoot don't pollute, do not export more than needed by default
15	@EXPORT = qw ();
16	@EXPORT_OK = qw ();
17	%EXPORT_TAGS = ();
18
19	}
20
21	=head1 NAME
22
23	IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database
24
25	=head1 SYNOPSIS
26
27	use IsisDB;
28
29	my $isis = new IsisDB(
30	isisdb => './cds/cds',
31	);
32
33	for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
34	print $isis->to_ascii($mfn),"\n";
35	}
36
37	=head1 DESCRIPTION
38
39	This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
40	IsisMarc. It can be used as perl-only alternative to OpenIsis module which
41	seems to depriciate it's old C<XS> bindings for perl.
42
43	It can create hash values from data in ISIS database (using C<to_hash>),
44	ASCII dump (using C<to_ascii>) or just hash with field names and packed
45	values (like C<^asomething^belse>).
46
47	Unique feature of this module is ability to C<include_deleted> records.
48	It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
49	fields which are zero sized will be filled with random junk from memory).
50
51	It also has support for identifiers (only if ISIS database is created by
52	IsisMarc), see C<to_hash>.
53
54	This module will always be slower than OpenIsis module which use C
55	library. However, since it's written in perl, it's platform independent (so
56	you don't need C compiler), and can be easily modified. I hope that it
57	creates data structures which are easier to use than ones created by
58	OpenIsis, so reduced time in other parts of the code should compensate for
59	slower performance of this module (speed of reading ISIS database is
60	rarely an issue).
61
62	=head1 METHODS
63
64	=cut
65
66	# my $ORDN; # Nodes Order
67	# my $ORDF; # Leafs Order
68	# my $N; # Number of Memory buffers for nodes
69	# my $K; # Number of buffers for first level index
70	# my $LIV; # Current number of Index Levels
71	# my $POSRX; # Pointer to Root Record in N0x
72	# my $NMAXPOS; # Next Available position in N0x
73	# my $FMAXPOS; # Next available position in L0x
74	# my $ABNORMAL; # Formal BTree normality indicator
75
76	#
77	# some binary reads
78	#
79
80	=head2 new
81
82	Open ISIS database
83
84	my $isis = new IsisDB(
85	isisdb => './cds/cds',
86	read_fdt => 1,
87	include_deleted => 1,
88	hash_filter => sub {
89	my $v = shift;
90	$v =~ s#foo#bar#g;
91	},
92	debug => 1,
93	);
94
95	Options are described below:
96
97	=over 5
98
99	=item isisdb
100
101	This is full or relative path to ISIS database files which include
102	common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
103	C<read_fdt> option) files.
104
105	In this example it uses C<./cds/cds.MST> and related files.
106
107	=item read_fdt
108
109	Boolean flag to specify if field definition table should be read. It's off
110	by default.
111
112	=item include_deleted
113
114	Don't skip logically deleted records in ISIS.
115
116	=item hash_filter
117
118	Filter code ref which will be used before data is converted to hash.
119
120	=item debug
121
122	Dump a B<lot> of debugging output.
123
124	=back
125
126	=cut
127
128	sub new {
129	my $class = shift;
130	my $self = {};
131	bless($self, $class);
132
133	croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
134
135	foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
136	$self->{$v} = {@_}->{$v};
137	}
138
139	my @isis_files = grep(/\.(FDT\|MST\|XRF\|CNT)$/i,glob($self->{isisdb}."*"));
140
141	foreach my $f (@isis_files) {
142	my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
143	$self->{lc($ext)."_file"} = $f;
144	}
145
146	my @must_exist = qw(mst xrf);
147	push @must_exist, "fdt" if ($self->{read_fdt});
148
149	foreach my $ext (@must_exist) {
150	croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});
151	}
152
153	print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});
154
155	# if you want to read .FDT file use read_fdt argument when creating class!
156	if ($self->{read_fdt} && -e $self->{fdt_file}) {
157
158	# read the $db.FDT file for tags
159	my $fieldzone=0;
160
161	open(fileFDT, $self->{fdt_file}) \|\| croak "can't read '$self->{fdt_file}': $!";
162
163	while (<fileFDT>) {
164	chomp;
165	if ($fieldzone) {
166	my $name=substr($_,0,30);
167	my $tag=substr($_,50,3);
168
169	$name =~ s/\s+$//;
170	$tag =~ s/\s+$//;
171
172	$self->{'TagName'}->{$tag}=$name;
173	}
174
175	if (/^\\\*/) {
176	$fieldzone=1;
177	}
178	}
179
180	close(fileFDT);
181	}
182
183	# Get the Maximum MFN from $db.MST
184
185	open($self->{'fileMST'}, $self->{mst_file}) \|\| croak "can't open '$self->{mst_file}': $!";
186
187	# MST format: (* = 32 bit signed)
188	# CTLMFN* always 0
189	# NXTMFN* MFN to be assigned to the next record created
190	# NXTMFB* last block allocated to master file
191	# NXTMFP offset to next available position in last block
192	# MFTYPE always 0 for user db file (1 for system)
193	seek($self->{'fileMST'},4,0);
194
195	my $buff;
196
197	read($self->{'fileMST'}, $buff, 4);
198	$self->{'NXTMFN'}=unpack("l",$buff) \|\| carp "NXTNFN is zero";
199
200
201
202
203	print STDERR Dumper($self),"\n" if ($self->{debug});
204
205	# open files for later
206	open($self->{'fileXRF'}, $self->{xrf_file}) \|\| croak "can't open '$self->{xrf_file}': $!";
207
208	$self ? return $self : return undef;
209	}
210
211	=head2 count
212
213	Return number of records in database
214
215	print $isis->count;
216
217	=cut
218
219	sub count {
220	my $self = shift;
221	return $self->{'NXTMFN'} - 1;
222	}
223
224	=head2 read_cnt
225
226	Read content of C<.CNT> file and return hash containing it.
227
228	print Dumper($isis->read_cnt);
229
230	This function is not used by module (C<.CNT> files are not required for this
231	module to work), but it can be useful to examine your index (while debugging
232	for example).
233
234	=cut
235
236	sub read_cnt {
237	my $self = shift;
238
239	croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
240
241	# Get the index information from $db.CNT
242
243	open(fileCNT, $self->{cnt_file}) \|\| croak "can't read '$self->{cnt_file}': $!";
244
245	my $buff;
246
247	read(fileCNT, $buff, 26);
248	$self->unpack_cnt($buff);
249
250	read(fileCNT, $buff, 26);
251	$self->unpack_cnt($buff);
252
253	close(fileCNT);
254
255	return $self->{cnt};
256	}
257
258	=head2 unpack_cnt
259
260	Unpack one of two 26 bytes fixed length record in C<.CNT> file.
261
262	Here is definition of record:
263
264	off key description size
265	0: IDTYPE BTree type s
266	2: ORDN Nodes Order s
267	4: ORDF Leafs Order s
268	6: N Number of Memory buffers for nodes s
269	8: K Number of buffers for first level index s
270	10: LIV Current number of Index Levels s
271	12: POSRX Pointer to Root Record in N0x l
272	16: NMAXPOS Next Available position in N0x l
273	20: FMAXPOS Next available position in L0x l
274	24: ABNORMAL Formal BTree normality indicator s
275	length: 26 bytes
276
277	This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
278
279	=cut
280
281	sub unpack_cnt {
282	my $self = shift;
283
284	my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
285
286	my $buff = shift \|\| return;
287	my @arr = unpack("ssssssllls", $buff);
288
289	print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
290
291	my $IDTYPE = shift @arr;
292	foreach (@flds) {
293	$self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
294	}
295	}
296
297	=head2 fetch
298
299	Read record with selected MFN
300
301	my $rec = $isis->fetch(55);
302
303	Returns hash with keys which are field names and values are unpacked values
304	for that field like this:
305
306	$rec = {
307	'210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
308	'990' => [ '2140', '88', 'HAY' ],
309	};
310
311	=cut
312
313	sub fetch {
314	my $self = shift;
315
316	my $mfn = shift \|\| croak "fetch needs MFN as argument!";
317
318	# is mfn allready in memory?
319	my $old_mfn = $self->{'current_mfn'} \|\| -1;
320	return $self->{record} if ($mfn == $old_mfn);
321
322	print STDERR "## fetch: $mfn\n" if ($self->{debug});
323
324	# XXX check this?
325	my $mfnpos=($mfn+int(($mfn-1)/127))*4;
326
327	print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
328	seek($self->{'fileXRF'},$mfnpos,0);
329
330	my $buff;
331
332	# delete old record
333	delete $self->{record};
334
335	# read XRFMFB abd XRFMFP
336	read($self->{'fileXRF'}, $buff, 4);
337	my $pointer=unpack("l",$buff) \|\| carp "pointer is null";
338
339	# check for logically deleted record
340	if ($pointer < 0) {
341	print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
342	$self->{deleted} = $mfn;
343
344	return unless $self->{include_deleted};
345
346	$pointer = abs($pointer);
347	}
348
349	my $XRFMFB = int($pointer/2048);
350	my $XRFMFP = $pointer - ($XRFMFB*2048);
351
352	# (XRFMFB - 1) * 512 + XRFMFP
353	# why do i have to do XRFMFP % 1024 ?
354
355	my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
356
357	print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
358
359	# Get Record Information
360
361	seek($self->{'fileMST'},$blk_off,0);
362
363	read($self->{'fileMST'}, $buff, 4);
364	my $value=unpack("l",$buff);
365
366	print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
367
368	if ($value!=$mfn) {
369	if ($value == 0) {
370	print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
371	$self->{deleted} = $mfn;
372	return;
373	}
374
375	carp "Error: MFN ".$mfn." not found in MST file, found $value";
376	return;
377	}
378
379	read($self->{'fileMST'}, $buff, 14);
380
381	my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
382
383	print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
384
385	warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
386
387	warn "BASE is not 18+6NVF" unless ($BASE == 18 + 6 $NVF);
388
389	# Get Directory Format
390
391	my @FieldPOS;
392	my @FieldLEN;
393	my @FieldTAG;
394
395	read($self->{'fileMST'}, $buff, 6 * $NVF);
396
397	my $rec_len = 0;
398
399	for (my $i = 0 ; $i < $NVF ; $i++) {
400
401	my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
402
403	print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
404
405	# The TAG does not exists in .FDT so we set it to 0.
406	#
407	# XXX This is removed from perl version; .FDT file is updated manually, so
408	# you will often have fields in .MST file which aren't in .FDT. On the other
409	# hand, IsisMarc doesn't use .FDT files at all!
410
411	#if (! $self->{TagName}->{$TAG}) {
412	# $TAG=0;
413	#}
414
415	push @FieldTAG,$TAG;
416	push @FieldPOS,$POS;
417	push @FieldLEN,$LEN;
418
419	$rec_len += $LEN;
420	}
421
422	# Get Variable Fields
423
424	read($self->{'fileMST'},$buff,$rec_len);
425
426	print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
427
428	for (my $i = 0 ; $i < $NVF ; $i++) {
429	# skip zero-sized fields
430	next if ($FieldLEN[$i] == 0);
431
432	push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
433	}
434
435	$self->{'current_mfn'} = $mfn;
436
437	print STDERR Dumper($self),"\n" if ($self->{debug});
438
439	return $self->{'record'};
440	}
441
442	=head2 to_ascii
443
444	Returns ASCII output of record with specified MFN
445
446	print $isis->to_ascii(42);
447
448	This outputs something like this:
449
450	210 ^aNew York^cNew York University press^dcop. 1988
451	990 2140
452	990 88
453	990 HAY
454
455	If C<read_fdt> is specified when calling C<new> it will display field names
456	from C<.FDT> file instead of numeric tags.
457
458	=cut
459
460	sub to_ascii {
461	my $self = shift;
462
463	my $mfn = shift \|\| croak "need MFN";
464
465	my $rec = $self->fetch($mfn);
466
467	my $out = "0\t$mfn";
468
469	foreach my $f (sort keys %{$rec}) {
470	my $fn = $self->tag_name($f);
471	$out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
472	}
473
474	$out .= "\n";
475
476	return $out;
477	}
478
479	=head2 to_hash
480
481	Read record with specified MFN and convert it to hash
482
483	my $hash = $isis->to_hash($mfn);
484
485	It has ability to convert characters (using C<hash_filter>) from ISIS
486	database before creating structures enabling character re-mapping or quick
487	fix-up of data.
488
489	This function returns hash which is like this:
490
491	$hash = {
492	'210' => [
493	{
494	'c' => 'New York University press',
495	'a' => 'New York',
496	'd' => 'cop. 1988'
497	}
498	],
499	'990' => [
500	'2140',
501	'88',
502	'HAY'
503	],
504	};
505
506	You can later use that hash to produce any output from ISIS data.
507
508	If database is created using IsisMarc, it will also have to special fields
509	which will be used for identifiers, C<i1> and C<i2> like this:
510
511	'200' => [
512	{
513	'i1' => '1',
514	'i2' => ' '
515	'a' => 'Goa',
516	'f' => 'Valdo D\'Arienzo',
517	'e' => 'tipografie e tipografi nel XVI secolo',
518	}
519	],
520
521	This method will also create additional field C<000> with MFN.
522
523	=cut
524
525	sub to_hash {
526	my $self = shift;
527
528	my $mfn = shift \|\| confess "need mfn!";
529
530	# init record to include MFN as field 000
531	my $rec = { '000' => [ $mfn ] };
532
533	my $row = $self->fetch($mfn);
534
535	foreach my $k (keys %{$row}) {
536	foreach my $l (@{$row->{$k}}) {
537
538	# filter output
539	$l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
540
541	my $val;
542
543	# has identifiers?
544	($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
545
546	# has subfields?
547	if ($l =~ m/\^/) {
548	foreach my $t (split(/\^/,$l)) {
549	next if (! $t);
550	$val->{substr($t,0,1)} = substr($t,1);
551	}
552	} else {
553	$val = $l;
554	}
555
556	push @{$rec->{$k}}, $val;
557	}
558	}
559
560	return $rec;
561	}
562
563	=head2 tag_name
564
565	Return name of selected tag
566
567	print $isis->tag_name('200');
568
569	=cut
570
571	sub tag_name {
572	my $self = shift;
573	my $tag = shift \|\| return;
574	return $self->{'TagName'}->{$tag} \|\| $tag;
575	}
576
577	1;
578
579	=head1 BUGS
580
581	Some parts of CDS/ISIS documentation are not detailed enough to exmplain
582	some variations in input databases which has been tested with this module.
583	When I was in doubt, I assumed that OpenIsis's implementation was right
584	(except for obvious bugs).
585
586	However, every effort has been made to test this module with as much
587	databases (and programs that create them) as possible.
588
589	I would be very greatful for success or failure reports about usage of this
590	module with databases from programs other than WinIsis and IsisMarc. I had
591	tested this against ouput of one C<isis.dll>-based application, but I don't
592	know any details about it's version.
593
594	=head1 AUTHOR
595
596	Dobrica Pavlinusic
597	CPAN ID: DPAVLIN
598	dpavlin@rot13.org
599	http://www.rot13.org/~dpavlin/
600
601	This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
602	written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
603
604	=head1 COPYRIGHT
605
606	This program is free software; you can redistribute
607	it and/or modify it under the same terms as Perl itself.
608
609	The full text of the license can be found in the
610	LICENSE file included with this module.
611
612
613	=head1 SEE ALSO
614
615	OpenIsis web site L<http://www.openisis.org>
616
617	perl4lib site L<http://perl4lib.perl.org>
618