Biblio-Isis/trunk/IsisDB.pm

package IsisDB;
use strict;

use Carp;
use Data::Dumper;

BEGIN {
        use Exporter ();
        use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
        $VERSION     = 0.06;
        @ISA         = qw (Exporter);
        #Give a hoot don't pollute, do not export more than needed by default
        @EXPORT      = qw ();
        @EXPORT_OK   = qw ();
        %EXPORT_TAGS = ();

}

=head1 NAME

IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database

=head1 SYNOPSIS

  use IsisDB;

  my $isis = new IsisDB(
        isisdb => './cds/cds',
  );

  for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
        print $isis->to_ascii($mfn),"\n";
  }

=head1 DESCRIPTION

This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
IsisMarc. It can be used as perl-only alternative to OpenIsis module.

It can create hash values from data in ISIS database (using C<to_hash>),
ASCII dump (using C<to_ascii>) or just hash with field names and packed
values (like C<^asomething^belse>).

Unique feature of this module is ability to C<include_deleted> records.
It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
fields which are zero sized will be filled with random junk from memory).

It also has support for identifiers (only if ISIS database is created by
IsisMarc), see C<to_hash>.

This will module will always be slower than OpenIsis module which use C
library. However, since it's written in perl, it's platform independent (so
you don't need C compiler), and can be easily modified. I hope that it
creates data structures which are easier to use than ones created by
OpenIsis, so reduced time in other parts of the code should compensate for
slower performance of this module (speed of reading ISIS database is
rarely an issue).

=head1 METHODS

=cut

#  my $ORDN;            # Nodes Order
#  my $ORDF;            # Leafs Order
#  my $N;               # Number of Memory buffers for nodes
#  my $K;               # Number of buffers for first level index
#  my $LIV;             # Current number of Index Levels
#  my $POSRX;           # Pointer to Root Record in N0x
#  my $NMAXPOS;         # Next Available position in N0x
#  my $FMAXPOS;         # Next available position in L0x
#  my $ABNORMAL;        # Formal BTree normality indicator

#
# some binary reads
#

=head2 new

Open ISIS database

 my $isis = new IsisDB(
        isisdb => './cds/cds',
        read_fdt => 1,
        include_deleted => 1,
        hash_filter => sub {
                my $v = shift;
                $v =~ s#foo#bar#g;
        },
        debug => 1,
 );

Options are described below:

=over 5

=item isisdb

This is full or relative path to ISIS database files which include
common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.

In this example it uses C<./cds/cds.MST> and related files.

=item read_fdt

Boolean flag to specify if field definition table should be read. It's off
by default.

=item include_deleted

Don't skip logically deleted records in ISIS.

=item hash_filter

Filter code ref which will be used before data is converted to hash.

=item debug

Dump a B<lot> of debugging output.

=back

It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.

=cut

sub new {
        my $class = shift;
        my $self = {};
        bless($self, $class);

        croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});

        foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
                $self->{$v} = {@_}->{$v};
        }

        # if you want to read .FDT file use read_fdt argument when creating class!
        if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {

                # read the $db.FDT file for tags
                my $fieldzone=0;

                open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";

                while (<fileFDT>) {
                        chomp;
                        if ($fieldzone) {
                                my $name=substr($_,0,30);
                                my $tag=substr($_,50,3);

                                $name =~ s/\s+$//;
                                $tag =~ s/\s+$//;

                                $self->{'TagName'}->{$tag}=$name;  
                        }

                        if (/^\*\*\*/) {
                                $fieldzone=1;
                        }
                }
                
                close(fileFDT);
        }

        # Get the Maximum MFN from $db.MST

        open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";

        # MST format:   (* = 32 bit signed)
        # CTLMFN*       always 0
        # NXTMFN*       MFN to be assigned to the next record created
        # NXTMFB*       last block allocated to master file
        # NXTMFP        offset to next available position in last block
        # MFTYPE        always 0 for user db file (1 for system)
        seek(fileMST,4,0);

        my $buff;

        read(fileMST, $buff, 4);
        $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";

        # save maximum MFN
        $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;

        close(fileMST);

        # Get the index information from $db.CNT
   
        open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";

        # There is two 26 Bytes fixed lenght records

        #  0: IDTYPE    BTree type                              16
        #  2: ORDN      Nodes Order                             16
        #  4: ORDF      Leafs Order                             16
        #  6: N         Number of Memory buffers for nodes      16
        #  8: K         Number of buffers for first level index 16
        # 10: LIV       Current number of Index Levels          16
        # 12: POSRX*    Pointer to Root Record in N0x           32
        # 16: NMAXPOS*  Next Available position in N0x          32
        # 20: FMAXPOS*  Next available position in L0x          32
        # 24: ABNORMAL  Formal BTree normality indicator        16
        # length: 26 bytes

        sub unpack_cnt {
                my $self = shift;

                my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);

                my $buff = shift || return;
                my @arr = unpack("ssssssllls", $buff);

                print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});

                my $IDTYPE = shift @arr;
                foreach (@flds) {
                        $self->{$IDTYPE}->{$_} = abs(shift @arr);
                }
        }

        read(fileCNT, $buff, 26);
        $self->unpack_cnt($buff);

        read(fileCNT, $buff, 26);
        $self->unpack_cnt($buff);


        close(fileCNT);

        print STDERR Dumper($self),"\n" if ($self->{debug});

        # open files for later
        open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";

        open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";

        $self ? return $self : return undef;
}

=head2 fetch

Read record with selected MFN

  my $rec = $isis->fetch(55);

Returns hash with keys which are field names and values are unpacked values
for that field like this:

  $rec = {
    '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
    '990' => [ '2140', '88', 'HAY' ],
  };

=cut

sub fetch {
        my $self = shift;

        my $mfn = shift || croak "fetch needs MFN as argument!";

        # is mfn allready in memory?
        my $old_mfn = $self->{'current_mfn'} || -1;
        return if ($mfn == $old_mfn);

        print STDERR "## fetch: $mfn\n" if ($self->{debug});

        # XXX check this?
        my $mfnpos=($mfn+int(($mfn-1)/127))*4;

        print STDERR "## seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
        seek($self->{'fileXRF'},$mfnpos,0);

        my $buff;

        # read XRFMFB abd XRFMFP
        read($self->{'fileXRF'}, $buff, 4);
        my $pointer=unpack("l",$buff) || carp "pointer is null";

        my $XRFMFB = int($pointer/2048);
        my $XRFMFP = $pointer - ($XRFMFB*2048);


        # (XRFMFB - 1) * 512 + XRFMFP
        # why do i have to do XRFMFP % 1024 ?

        my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 1024);

        print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});

        # Get Record Information

        seek($self->{'fileMST'},$blk_off,0);

        read($self->{'fileMST'}, $buff, 4);
        my $value=unpack("l",$buff);

        print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});

        if ($value!=$mfn) {
                carp "Error: MFN ".$mfn." not found in MST(".$value.")";    
                #return;                # XXX deleted record?
        }

#       $MFRL=$self->Read16($fileMST);
#       $MFBWB=$self->Read32($fileMST);
#       $MFBWP=$self->Read16($fileMST);
#       $BASE=$self->Read16($fileMST);
#       $NVF=$self->Read16($fileMST);
#       $STATUS=$self->Read16($fileMST);

        read($self->{'fileMST'}, $buff, 14);

        my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);

        print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});

        # delete old record
        delete $self->{record};

        ## FIXME this is a bug
        if (! $self->{'include_deleted'} && $MFRL < 0) {
                print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
                return;
        }

        warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);

        # Get Directory Format

        my @FieldPOS;
        my @FieldLEN;
        my @FieldTAG;

        read($self->{'fileMST'}, $buff, 6 * $NVF);

        my $rec_len = 0;

        for (my $i = 0 ; $i < $NVF ; $i++) {

#               $TAG=$self->Read16($fileMST);
#               $POS=$self->Read16($fileMST);
#               $LEN=$self->Read16($fileMST);

                my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));

                print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});

                # The TAG does not exists in .FDT so we set it to 0.
                #
                # XXX This is removed from perl version; .FDT file is updated manually, so
                # you will often have fields in .MST file which aren't in .FDT. On the other
                # hand, IsisMarc doesn't use .FDT files at all!

                #if (! $self->{TagName}->{$TAG}) {
                #       $TAG=0;
                #}

                push @FieldTAG,$TAG;
                push @FieldPOS,$POS;
                push @FieldLEN,$LEN;

                $rec_len += $LEN;
        }

        # Get Variable Fields

        read($self->{'fileMST'},$buff,$rec_len);

        print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});

        for (my $i = 0 ; $i < $NVF ; $i++) {
                # skip zero-sized fields
                next if ($FieldLEN[$i] == 0);

                push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
        }
        close(fileMST);

        $self->{'current_mfn'} = $mfn;

        print Dumper($self),"\n" if ($self->{debug});

        return $self->{'record'};
}

=head2 to_ascii

Dump ASCII output of record with specified MFN

  print $isis->to_ascii(42);

It outputs something like this:

  210   ^aNew York^cNew York University press^dcop. 1988
  990   2140
  990   88
  990   HAY

If C<read_fdt> is specified when calling C<new> it will display field names
from C<.FDT> file instead of numeric tags.

=cut

sub to_ascii {
        my $self = shift;

        my $mfn = shift || croak "need MFN";

        my $rec = $self->fetch($mfn);

        my $out = "0\t$mfn";

        foreach my $f (sort keys %{$rec}) {
                my $fn = $self->tag_name($f);
                $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
        }

        $out .= "\n";

        return $out;
}

=head2 to_hash

Read record with specified MFN and convert it to hash

  my $hash = $isis->to_hash($mfn);

It has ability to convert characters (using C<hash_filter> from ISIS
database before creating structures enabling character re-mapping or quick
fix-up of data.

This function returns hash which is like this:

  $hash = {
    '210' => [
               {
                 'c' => 'New York University press',
                 'a' => 'New York',
                 'd' => 'cop. 1988'
               }
             ],
    '990' => [
               '2140',
               '88',
               'HAY'
             ],
  };

You can later use that hash to produce any output from ISIS data.

If database is created using IsisMarc, it will also have to special fields
which will be used for identifiers, C<i1> and C<i2> like this:

  '200' => [
             {
               'i1' => '1',
               'i2' => ' '
               'a' => 'Goa',
               'f' => 'Valdo D\'Arienzo',
               'e' => 'tipografie e tipografi nel XVI secolo',
             }
           ],

This method will also create additional field C<000> with MFN.

=cut

sub to_hash {
        my $self = shift;

        my $mfn = shift || confess "need mfn!";

        # init record to include MFN as field 000
        my $rec = { '000' => [ $mfn ] };

        my $row = $self->fetch($mfn);

        foreach my $k (keys %{$row}) {
                foreach my $l (@{$row->{$k}}) {

                        # filter output
                        $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});

                        my $val;

                        # has identifiers?
                        ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])//);

                        # has subfields?
                        if ($l =~ m/\^/) {
                                foreach my $t (split(/\^/,$l)) {
                                        next if (! $t);
                                        $val->{substr($t,0,1)} = substr($t,1);
                                }
                        } else {
                                $val = $l;
                        }

                        push @{$rec->{$k}}, $val;
                }
        }

        return $rec;
}

=head2 tag_name

Return name of selected tag

 print $isis->tag_name('200');

=cut

sub tag_name {
        my $self = shift;
        my $tag = shift || return;
        return $self->{'TagName'}->{$tag} || $tag;
}

1;

=head1 BUGS

This module has been very lightly tested. Use with caution and report bugs.

=head1 AUTHOR

        Dobrica Pavlinusic
        CPAN ID: DPAVLIN
        dpavlin@rot13.org
        http://www.rot13.org/~dpavlin/

This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.

=head1 COPYRIGHT

This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

The full text of the license can be found in the
LICENSE file included with this module.


=head1 SEE ALSO

OpenIsis web site L<http://www.openisis.org>

perl4lib site L<http://perl4lib.perl.org>

1	package IsisDB;
2	use strict;
3
4	use Carp;
5	use Data::Dumper;
6
7	BEGIN {
8	use Exporter ();
9	use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10	$VERSION = 0.06;
11	@ISA = qw (Exporter);
12	#Give a hoot don't pollute, do not export more than needed by default
13	@EXPORT = qw ();
14	@EXPORT_OK = qw ();
15	%EXPORT_TAGS = ();
16
17	}
18
19	=head1 NAME
20
21	IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database
22
23	=head1 SYNOPSIS
24
25	use IsisDB;
26
27	my $isis = new IsisDB(
28	isisdb => './cds/cds',
29	);
30
31	for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
32	print $isis->to_ascii($mfn),"\n";
33	}
34
35	=head1 DESCRIPTION
36
37	This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
38	IsisMarc. It can be used as perl-only alternative to OpenIsis module.
39
40	It can create hash values from data in ISIS database (using C<to_hash>),
41	ASCII dump (using C<to_ascii>) or just hash with field names and packed
42	values (like C<^asomething^belse>).
43
44	Unique feature of this module is ability to C<include_deleted> records.
45	It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
46	fields which are zero sized will be filled with random junk from memory).
47
48	It also has support for identifiers (only if ISIS database is created by
49	IsisMarc), see C<to_hash>.
50
51	This will module will always be slower than OpenIsis module which use C
52	library. However, since it's written in perl, it's platform independent (so
53	you don't need C compiler), and can be easily modified. I hope that it
54	creates data structures which are easier to use than ones created by
55	OpenIsis, so reduced time in other parts of the code should compensate for
56	slower performance of this module (speed of reading ISIS database is
57	rarely an issue).
58
59	=head1 METHODS
60
61	=cut
62
63	# my $ORDN; # Nodes Order
64	# my $ORDF; # Leafs Order
65	# my $N; # Number of Memory buffers for nodes
66	# my $K; # Number of buffers for first level index
67	# my $LIV; # Current number of Index Levels
68	# my $POSRX; # Pointer to Root Record in N0x
69	# my $NMAXPOS; # Next Available position in N0x
70	# my $FMAXPOS; # Next available position in L0x
71	# my $ABNORMAL; # Formal BTree normality indicator
72
73	#
74	# some binary reads
75	#
76
77	=head2 new
78
79	Open ISIS database
80
81	my $isis = new IsisDB(
82	isisdb => './cds/cds',
83	read_fdt => 1,
84	include_deleted => 1,
85	hash_filter => sub {
86	my $v = shift;
87	$v =~ s#foo#bar#g;
88	},
89	debug => 1,
90	);
91
92	Options are described below:
93
94	=over 5
95
96	=item isisdb
97
98	This is full or relative path to ISIS database files which include
99	common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
100
101	In this example it uses C<./cds/cds.MST> and related files.
102
103	=item read_fdt
104
105	Boolean flag to specify if field definition table should be read. It's off
106	by default.
107
108	=item include_deleted
109
110	Don't skip logically deleted records in ISIS.
111
112	=item hash_filter
113
114	Filter code ref which will be used before data is converted to hash.
115
116	=item debug
117
118	Dump a B<lot> of debugging output.
119
120	=back
121
122	It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
123
124	=cut
125
126	sub new {
127	my $class = shift;
128	my $self = {};
129	bless($self, $class);
130
131	croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
132
133	foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
134	$self->{$v} = {@_}->{$v};
135	}
136
137	# if you want to read .FDT file use read_fdt argument when creating class!
138	if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
139
140	# read the $db.FDT file for tags
141	my $fieldzone=0;
142
143	open(fileFDT, $self->{isisdb}.".FDT") \|\| croak "can't read '$self->{isisdb}.FDT': $!";
144
145	while (<fileFDT>) {
146	chomp;
147	if ($fieldzone) {
148	my $name=substr($_,0,30);
149	my $tag=substr($_,50,3);
150
151	$name =~ s/\s+$//;
152	$tag =~ s/\s+$//;
153
154	$self->{'TagName'}->{$tag}=$name;
155	}
156
157	if (/^\\\*/) {
158	$fieldzone=1;
159	}
160	}
161
162	close(fileFDT);
163	}
164
165	# Get the Maximum MFN from $db.MST
166
167	open(fileMST,$self->{isisdb}.".MST") \|\| croak "can't read '$self->{isisdb}.MST': $!";
168
169	# MST format: (* = 32 bit signed)
170	# CTLMFN* always 0
171	# NXTMFN* MFN to be assigned to the next record created
172	# NXTMFB* last block allocated to master file
173	# NXTMFP offset to next available position in last block
174	# MFTYPE always 0 for user db file (1 for system)
175	seek(fileMST,4,0);
176
177	my $buff;
178
179	read(fileMST, $buff, 4);
180	$self->{'NXTMFN'}=unpack("l",$buff) \|\| carp "NXTNFN is zero";
181
182	# save maximum MFN
183	$self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
184
185	close(fileMST);
186
187	# Get the index information from $db.CNT
188
189	open(fileCNT, $self->{isisdb}.".CNT") \|\| croak "can't read '$self->{isisdb}.CNT': $!";
190
191	# There is two 26 Bytes fixed lenght records
192
193	# 0: IDTYPE BTree type 16
194	# 2: ORDN Nodes Order 16
195	# 4: ORDF Leafs Order 16
196	# 6: N Number of Memory buffers for nodes 16
197	# 8: K Number of buffers for first level index 16
198	# 10: LIV Current number of Index Levels 16
199	# 12: POSRX* Pointer to Root Record in N0x 32
200	# 16: NMAXPOS* Next Available position in N0x 32
201	# 20: FMAXPOS* Next available position in L0x 32
202	# 24: ABNORMAL Formal BTree normality indicator 16
203	# length: 26 bytes
204
205	sub unpack_cnt {
206	my $self = shift;
207
208	my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
209
210	my $buff = shift \|\| return;
211	my @arr = unpack("ssssssllls", $buff);
212
213	print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
214
215	my $IDTYPE = shift @arr;
216	foreach (@flds) {
217	$self->{$IDTYPE}->{$_} = abs(shift @arr);
218	}
219	}
220
221	read(fileCNT, $buff, 26);
222	$self->unpack_cnt($buff);
223
224	read(fileCNT, $buff, 26);
225	$self->unpack_cnt($buff);
226
227
228	close(fileCNT);
229
230	print STDERR Dumper($self),"\n" if ($self->{debug});
231
232	# open files for later
233	open($self->{'fileXRF'}, $self->{isisdb}.".XRF") \|\| croak "can't open '$self->{isisdb}.XRF': $!";
234
235	open($self->{'fileMST'}, $self->{isisdb}.".MST") \|\| croak "can't open '$self->{isisdb}.MST': $!";
236
237	$self ? return $self : return undef;
238	}
239
240	=head2 fetch
241
242	Read record with selected MFN
243
244	my $rec = $isis->fetch(55);
245
246	Returns hash with keys which are field names and values are unpacked values
247	for that field like this:
248
249	$rec = {
250	'210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
251	'990' => [ '2140', '88', 'HAY' ],
252	};
253
254	=cut
255
256	sub fetch {
257	my $self = shift;
258
259	my $mfn = shift \|\| croak "fetch needs MFN as argument!";
260
261	# is mfn allready in memory?
262	my $old_mfn = $self->{'current_mfn'} \|\| -1;
263	return if ($mfn == $old_mfn);
264
265	print STDERR "## fetch: $mfn\n" if ($self->{debug});
266
267	# XXX check this?
268	my $mfnpos=($mfn+int(($mfn-1)/127))*4;
269
270	print STDERR "## seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
271	seek($self->{'fileXRF'},$mfnpos,0);
272
273	my $buff;
274
275	# read XRFMFB abd XRFMFP
276	read($self->{'fileXRF'}, $buff, 4);
277	my $pointer=unpack("l",$buff) \|\| carp "pointer is null";
278
279	my $XRFMFB = int($pointer/2048);
280	my $XRFMFP = $pointer - ($XRFMFB*2048);
281
282
283	# (XRFMFB - 1) * 512 + XRFMFP
284	# why do i have to do XRFMFP % 1024 ?
285
286	my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 1024);
287
288	print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
289
290	# Get Record Information
291
292	seek($self->{'fileMST'},$blk_off,0);
293
294	read($self->{'fileMST'}, $buff, 4);
295	my $value=unpack("l",$buff);
296
297	print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
298
299	if ($value!=$mfn) {
300	carp "Error: MFN ".$mfn." not found in MST(".$value.")";
301	#return; # XXX deleted record?
302	}
303
304	# $MFRL=$self->Read16($fileMST);
305	# $MFBWB=$self->Read32($fileMST);
306	# $MFBWP=$self->Read16($fileMST);
307	# $BASE=$self->Read16($fileMST);
308	# $NVF=$self->Read16($fileMST);
309	# $STATUS=$self->Read16($fileMST);
310
311	read($self->{'fileMST'}, $buff, 14);
312
313	my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
314
315	print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
316
317	# delete old record
318	delete $self->{record};
319
320	## FIXME this is a bug
321	if (! $self->{'include_deleted'} && $MFRL < 0) {
322	print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
323	return;
324	}
325
326	warn "BASE is not 18+6NVF" unless ($BASE == 18 + 6 $NVF);
327
328	# Get Directory Format
329
330	my @FieldPOS;
331	my @FieldLEN;
332	my @FieldTAG;
333
334	read($self->{'fileMST'}, $buff, 6 * $NVF);
335
336	my $rec_len = 0;
337
338	for (my $i = 0 ; $i < $NVF ; $i++) {
339
340	# $TAG=$self->Read16($fileMST);
341	# $POS=$self->Read16($fileMST);
342	# $LEN=$self->Read16($fileMST);
343
344	my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
345
346	print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
347
348	# The TAG does not exists in .FDT so we set it to 0.
349	#
350	# XXX This is removed from perl version; .FDT file is updated manually, so
351	# you will often have fields in .MST file which aren't in .FDT. On the other
352	# hand, IsisMarc doesn't use .FDT files at all!
353
354	#if (! $self->{TagName}->{$TAG}) {
355	# $TAG=0;
356	#}
357
358	push @FieldTAG,$TAG;
359	push @FieldPOS,$POS;
360	push @FieldLEN,$LEN;
361
362	$rec_len += $LEN;
363	}
364
365	# Get Variable Fields
366
367	read($self->{'fileMST'},$buff,$rec_len);
368
369	print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
370
371	for (my $i = 0 ; $i < $NVF ; $i++) {
372	# skip zero-sized fields
373	next if ($FieldLEN[$i] == 0);
374
375	push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
376	}
377	close(fileMST);
378
379	$self->{'current_mfn'} = $mfn;
380
381	print Dumper($self),"\n" if ($self->{debug});
382
383	return $self->{'record'};
384	}
385
386	=head2 to_ascii
387
388	Dump ASCII output of record with specified MFN
389
390	print $isis->to_ascii(42);
391
392	It outputs something like this:
393
394	210 ^aNew York^cNew York University press^dcop. 1988
395	990 2140
396	990 88
397	990 HAY
398
399	If C<read_fdt> is specified when calling C<new> it will display field names
400	from C<.FDT> file instead of numeric tags.
401
402	=cut
403
404	sub to_ascii {
405	my $self = shift;
406
407	my $mfn = shift \|\| croak "need MFN";
408
409	my $rec = $self->fetch($mfn);
410
411	my $out = "0\t$mfn";
412
413	foreach my $f (sort keys %{$rec}) {
414	my $fn = $self->tag_name($f);
415	$out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
416	}
417
418	$out .= "\n";
419
420	return $out;
421	}
422
423	=head2 to_hash
424
425	Read record with specified MFN and convert it to hash
426
427	my $hash = $isis->to_hash($mfn);
428
429	It has ability to convert characters (using C<hash_filter> from ISIS
430	database before creating structures enabling character re-mapping or quick
431	fix-up of data.
432
433	This function returns hash which is like this:
434
435	$hash = {
436	'210' => [
437	{
438	'c' => 'New York University press',
439	'a' => 'New York',
440	'd' => 'cop. 1988'
441	}
442	],
443	'990' => [
444	'2140',
445	'88',
446	'HAY'
447	],
448	};
449
450	You can later use that hash to produce any output from ISIS data.
451
452	If database is created using IsisMarc, it will also have to special fields
453	which will be used for identifiers, C<i1> and C<i2> like this:
454
455	'200' => [
456	{
457	'i1' => '1',
458	'i2' => ' '
459	'a' => 'Goa',
460	'f' => 'Valdo D\'Arienzo',
461	'e' => 'tipografie e tipografi nel XVI secolo',
462	}
463	],
464
465	This method will also create additional field C<000> with MFN.
466
467	=cut
468
469	sub to_hash {
470	my $self = shift;
471
472	my $mfn = shift \|\| confess "need mfn!";
473
474	# init record to include MFN as field 000
475	my $rec = { '000' => [ $mfn ] };
476
477	my $row = $self->fetch($mfn);
478
479	foreach my $k (keys %{$row}) {
480	foreach my $l (@{$row->{$k}}) {
481
482	# filter output
483	$l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
484
485	my $val;
486
487	# has identifiers?
488	($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])//);
489
490	# has subfields?
491	if ($l =~ m/\^/) {
492	foreach my $t (split(/\^/,$l)) {
493	next if (! $t);
494	$val->{substr($t,0,1)} = substr($t,1);
495	}
496	} else {
497	$val = $l;
498	}
499
500	push @{$rec->{$k}}, $val;
501	}
502	}
503
504	return $rec;
505	}
506
507	=head2 tag_name
508
509	Return name of selected tag
510
511	print $isis->tag_name('200');
512
513	=cut
514
515	sub tag_name {
516	my $self = shift;
517	my $tag = shift \|\| return;
518	return $self->{'TagName'}->{$tag} \|\| $tag;
519	}
520
521	1;
522
523	=head1 BUGS
524
525	This module has been very lightly tested. Use with caution and report bugs.
526
527	=head1 AUTHOR
528
529	Dobrica Pavlinusic
530	CPAN ID: DPAVLIN
531	dpavlin@rot13.org
532	http://www.rot13.org/~dpavlin/
533
534	This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
535	written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
536
537	=head1 COPYRIGHT
538
539	This program is free software; you can redistribute
540	it and/or modify it under the same terms as Perl itself.
541
542	The full text of the license can be found in the
543	LICENSE file included with this module.
544
545
546	=head1 SEE ALSO
547
548	OpenIsis web site L<http://www.openisis.org>
549
550	perl4lib site L<http://perl4lib.perl.org>
551