lib/Biblio/Isis.pm

package IsisDB;
use strict;

use Carp;
use File::Glob qw(:globally :nocase);

use Data::Dumper;

BEGIN {
        use Exporter ();
        use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
        $VERSION     = 0.07;
        @ISA         = qw (Exporter);
        #Give a hoot don't pollute, do not export more than needed by default
        @EXPORT      = qw ();
        @EXPORT_OK   = qw ();
        %EXPORT_TAGS = ();

}

=head1 NAME

IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database

=head1 SYNOPSIS

  use IsisDB;

  my $isis = new IsisDB(
        isisdb => './cds/cds',
  );

  for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
        print $isis->to_ascii($mfn),"\n";
  }

=head1 DESCRIPTION

This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
IsisMarc. It can be used as perl-only alternative to OpenIsis module.

It can create hash values from data in ISIS database (using C<to_hash>),
ASCII dump (using C<to_ascii>) or just hash with field names and packed
values (like C<^asomething^belse>).

Unique feature of this module is ability to C<include_deleted> records.
It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
fields which are zero sized will be filled with random junk from memory).

It also has support for identifiers (only if ISIS database is created by
IsisMarc), see C<to_hash>.

This will module will always be slower than OpenIsis module which use C
library. However, since it's written in perl, it's platform independent (so
you don't need C compiler), and can be easily modified. I hope that it
creates data structures which are easier to use than ones created by
OpenIsis, so reduced time in other parts of the code should compensate for
slower performance of this module (speed of reading ISIS database is
rarely an issue).

=head1 METHODS

=cut

#  my $ORDN;            # Nodes Order
#  my $ORDF;            # Leafs Order
#  my $N;               # Number of Memory buffers for nodes
#  my $K;               # Number of buffers for first level index
#  my $LIV;             # Current number of Index Levels
#  my $POSRX;           # Pointer to Root Record in N0x
#  my $NMAXPOS;         # Next Available position in N0x
#  my $FMAXPOS;         # Next available position in L0x
#  my $ABNORMAL;        # Formal BTree normality indicator

#
# some binary reads
#

=head2 new

Open ISIS database

 my $isis = new IsisDB(
        isisdb => './cds/cds',
        read_fdt => 1,
        include_deleted => 1,
        hash_filter => sub {
                my $v = shift;
                $v =~ s#foo#bar#g;
        },
        debug => 1,
 );

Options are described below:

=over 5

=item isisdb

This is full or relative path to ISIS database files which include
common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
C<read_fdt> option) files.

In this example it uses C<./cds/cds.MST> and related files.

=item read_fdt

Boolean flag to specify if field definition table should be read. It's off
by default.

=item include_deleted

Don't skip logically deleted records in ISIS.

=item hash_filter

Filter code ref which will be used before data is converted to hash.

=item debug

Dump a B<lot> of debugging output.

=back

It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.

=cut

sub new {
        my $class = shift;
        my $self = {};
        bless($self, $class);

        croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});

        foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
                $self->{$v} = {@_}->{$v};
        }

        my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));

        foreach my $f (@isis_files) {
                my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
                $self->{lc($ext)."_file"} = $f;
        }

        my @must_exist = qw(mst xrf);
        push @must_exist, "fdt" if ($self->{read_fdt});

        foreach my $ext (@must_exist) {
                croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});
        }

        print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});

        # if you want to read .FDT file use read_fdt argument when creating class!
        if ($self->{read_fdt} && -e $self->{fdt_file}) {

                # read the $db.FDT file for tags
                my $fieldzone=0;

                open(fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";

                while (<fileFDT>) {
                        chomp;
                        if ($fieldzone) {
                                my $name=substr($_,0,30);
                                my $tag=substr($_,50,3);

                                $name =~ s/\s+$//;
                                $tag =~ s/\s+$//;

                                $self->{'TagName'}->{$tag}=$name;  
                        }

                        if (/^\*\*\*/) {
                                $fieldzone=1;
                        }
                }
                
                close(fileFDT);
        }

        # Get the Maximum MFN from $db.MST

        open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";

        # MST format:   (* = 32 bit signed)
        # CTLMFN*       always 0
        # NXTMFN*       MFN to be assigned to the next record created
        # NXTMFB*       last block allocated to master file
        # NXTMFP        offset to next available position in last block
        # MFTYPE        always 0 for user db file (1 for system)
        seek($self->{'fileMST'},4,0);

        my $buff;

        read($self->{'fileMST'}, $buff, 4);
        $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";

        # save maximum MFN
        $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;


        print STDERR Dumper($self),"\n" if ($self->{debug});

        # open files for later
        open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";

        $self ? return $self : return undef;
}

=head2 read_cnt

This function is not really used by module, but can be useful to find info
about your index (if debugging it for example).

  print Dumper($isis->read_cnt);

=cut

sub read_cnt  {
        my $self = shift;

        croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});

        # Get the index information from $db.CNT
   
        open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";

        # There is two 26 Bytes fixed lenght records

        #  0: IDTYPE    BTree type                              16
        #  2: ORDN      Nodes Order                             16
        #  4: ORDF      Leafs Order                             16
        #  6: N         Number of Memory buffers for nodes      16
        #  8: K         Number of buffers for first level index 16
        # 10: LIV       Current number of Index Levels          16
        # 12: POSRX*    Pointer to Root Record in N0x           32
        # 16: NMAXPOS*  Next Available position in N0x          32
        # 20: FMAXPOS*  Next available position in L0x          32
        # 24: ABNORMAL  Formal BTree normality indicator        16
        # length: 26 bytes

        sub unpack_cnt {
                my $self = shift;

                my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);

                my $buff = shift || return;
                my @arr = unpack("ssssssllls", $buff);

                print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});

                my $IDTYPE = shift @arr;
                foreach (@flds) {
                        $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
                }
        }

        my $buff;

        read(fileCNT, $buff, 26);
        $self->unpack_cnt($buff);

        read(fileCNT, $buff, 26);
        $self->unpack_cnt($buff);

        close(fileCNT);

        return $self->{cnt};
}

=head2 fetch

Read record with selected MFN

  my $rec = $isis->fetch(55);

Returns hash with keys which are field names and values are unpacked values
for that field like this:

  $rec = {
    '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
    '990' => [ '2140', '88', 'HAY' ],
  };

=cut

sub fetch {
        my $self = shift;

        my $mfn = shift || croak "fetch needs MFN as argument!";

        # is mfn allready in memory?
        my $old_mfn = $self->{'current_mfn'} || -1;
        return if ($mfn == $old_mfn);

        print STDERR "## fetch: $mfn\n" if ($self->{debug});

        # XXX check this?
        my $mfnpos=($mfn+int(($mfn-1)/127))*4;

        print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
        seek($self->{'fileXRF'},$mfnpos,0);

        my $buff;

        # read XRFMFB abd XRFMFP
        read($self->{'fileXRF'}, $buff, 4);
        my $pointer=unpack("l",$buff) || carp "pointer is null";

        my $XRFMFB = int($pointer/2048);
        my $XRFMFP = $pointer - ($XRFMFB*2048);


        # (XRFMFB - 1) * 512 + XRFMFP
        # why do i have to do XRFMFP % 1024 ?

        my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 1024);

        print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});

        # Get Record Information

        seek($self->{'fileMST'},$blk_off,0);

        read($self->{'fileMST'}, $buff, 4);
        my $value=unpack("l",$buff);

        print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});

        if ($value!=$mfn) {
                carp "Error: MFN ".$mfn." not found in MST(".$value.")";    
                #return;                # XXX deleted record?
        }

#       $MFRL=$self->Read16($fileMST);
#       $MFBWB=$self->Read32($fileMST);
#       $MFBWP=$self->Read16($fileMST);
#       $BASE=$self->Read16($fileMST);
#       $NVF=$self->Read16($fileMST);
#       $STATUS=$self->Read16($fileMST);

        read($self->{'fileMST'}, $buff, 14);

        my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);

        print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});

        # delete old record
        delete $self->{record};

        ## FIXME this is a bug
        if (! $self->{'include_deleted'} && $MFRL < 0) {
                print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
                return;
        }

        warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);

        # Get Directory Format

        my @FieldPOS;
        my @FieldLEN;
        my @FieldTAG;

        read($self->{'fileMST'}, $buff, 6 * $NVF);

        my $rec_len = 0;

        for (my $i = 0 ; $i < $NVF ; $i++) {

#               $TAG=$self->Read16($fileMST);
#               $POS=$self->Read16($fileMST);
#               $LEN=$self->Read16($fileMST);

                my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));

                print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});

                # The TAG does not exists in .FDT so we set it to 0.
                #
                # XXX This is removed from perl version; .FDT file is updated manually, so
                # you will often have fields in .MST file which aren't in .FDT. On the other
                # hand, IsisMarc doesn't use .FDT files at all!

                #if (! $self->{TagName}->{$TAG}) {
                #       $TAG=0;
                #}

                push @FieldTAG,$TAG;
                push @FieldPOS,$POS;
                push @FieldLEN,$LEN;

                $rec_len += $LEN;
        }

        # Get Variable Fields

        read($self->{'fileMST'},$buff,$rec_len);

        print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});

        for (my $i = 0 ; $i < $NVF ; $i++) {
                # skip zero-sized fields
                next if ($FieldLEN[$i] == 0);

                push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
        }

        $self->{'current_mfn'} = $mfn;

        print Dumper($self),"\n" if ($self->{debug});

        return $self->{'record'};
}

=head2 to_ascii

Dump ASCII output of record with specified MFN

  print $isis->to_ascii(42);

It outputs something like this:

  210   ^aNew York^cNew York University press^dcop. 1988
  990   2140
  990   88
  990   HAY

If C<read_fdt> is specified when calling C<new> it will display field names
from C<.FDT> file instead of numeric tags.

=cut

sub to_ascii {
        my $self = shift;

        my $mfn = shift || croak "need MFN";

        my $rec = $self->fetch($mfn);

        my $out = "0\t$mfn";

        foreach my $f (sort keys %{$rec}) {
                my $fn = $self->tag_name($f);
                $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
        }

        $out .= "\n";

        return $out;
}

=head2 to_hash

Read record with specified MFN and convert it to hash

  my $hash = $isis->to_hash($mfn);

It has ability to convert characters (using C<hash_filter> from ISIS
database before creating structures enabling character re-mapping or quick
fix-up of data.

This function returns hash which is like this:

  $hash = {
    '210' => [
               {
                 'c' => 'New York University press',
                 'a' => 'New York',
                 'd' => 'cop. 1988'
               }
             ],
    '990' => [
               '2140',
               '88',
               'HAY'
             ],
  };

You can later use that hash to produce any output from ISIS data.

If database is created using IsisMarc, it will also have to special fields
which will be used for identifiers, C<i1> and C<i2> like this:

  '200' => [
             {
               'i1' => '1',
               'i2' => ' '
               'a' => 'Goa',
               'f' => 'Valdo D\'Arienzo',
               'e' => 'tipografie e tipografi nel XVI secolo',
             }
           ],

This method will also create additional field C<000> with MFN.

=cut

sub to_hash {
        my $self = shift;

        my $mfn = shift || confess "need mfn!";

        # init record to include MFN as field 000
        my $rec = { '000' => [ $mfn ] };

        my $row = $self->fetch($mfn);

        foreach my $k (keys %{$row}) {
                foreach my $l (@{$row->{$k}}) {

                        # filter output
                        $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});

                        my $val;

                        # has identifiers?
                        ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);

                        # has subfields?
                        if ($l =~ m/\^/) {
                                foreach my $t (split(/\^/,$l)) {
                                        next if (! $t);
                                        $val->{substr($t,0,1)} = substr($t,1);
                                }
                        } else {
                                $val = $l;
                        }

                        push @{$rec->{$k}}, $val;
                }
        }

        return $rec;
}

=head2 tag_name

Return name of selected tag

 print $isis->tag_name('200');

=cut

sub tag_name {
        my $self = shift;
        my $tag = shift || return;
        return $self->{'TagName'}->{$tag} || $tag;
}

1;

=head1 BUGS

This module has been very lightly tested. Use with caution and report bugs.

=head1 AUTHOR

        Dobrica Pavlinusic
        CPAN ID: DPAVLIN
        dpavlin@rot13.org
        http://www.rot13.org/~dpavlin/

This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.

=head1 COPYRIGHT

This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

The full text of the license can be found in the
LICENSE file included with this module.


=head1 SEE ALSO

OpenIsis web site L<http://www.openisis.org>

perl4lib site L<http://perl4lib.perl.org>

1	package IsisDB;
2	use strict;
3
4	use Carp;
5	use File::Glob qw(:globally :nocase);
6
7	use Data::Dumper;
8
9	BEGIN {
10	use Exporter ();
11	use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12	$VERSION = 0.07;
13	@ISA = qw (Exporter);
14	#Give a hoot don't pollute, do not export more than needed by default
15	@EXPORT = qw ();
16	@EXPORT_OK = qw ();
17	%EXPORT_TAGS = ();
18
19	}
20
21	=head1 NAME
22
23	IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database
24
25	=head1 SYNOPSIS
26
27	use IsisDB;
28
29	my $isis = new IsisDB(
30	isisdb => './cds/cds',
31	);
32
33	for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
34	print $isis->to_ascii($mfn),"\n";
35	}
36
37	=head1 DESCRIPTION
38
39	This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
40	IsisMarc. It can be used as perl-only alternative to OpenIsis module.
41
42	It can create hash values from data in ISIS database (using C<to_hash>),
43	ASCII dump (using C<to_ascii>) or just hash with field names and packed
44	values (like C<^asomething^belse>).
45
46	Unique feature of this module is ability to C<include_deleted> records.
47	It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
48	fields which are zero sized will be filled with random junk from memory).
49
50	It also has support for identifiers (only if ISIS database is created by
51	IsisMarc), see C<to_hash>.
52
53	This will module will always be slower than OpenIsis module which use C
54	library. However, since it's written in perl, it's platform independent (so
55	you don't need C compiler), and can be easily modified. I hope that it
56	creates data structures which are easier to use than ones created by
57	OpenIsis, so reduced time in other parts of the code should compensate for
58	slower performance of this module (speed of reading ISIS database is
59	rarely an issue).
60
61	=head1 METHODS
62
63	=cut
64
65	# my $ORDN; # Nodes Order
66	# my $ORDF; # Leafs Order
67	# my $N; # Number of Memory buffers for nodes
68	# my $K; # Number of buffers for first level index
69	# my $LIV; # Current number of Index Levels
70	# my $POSRX; # Pointer to Root Record in N0x
71	# my $NMAXPOS; # Next Available position in N0x
72	# my $FMAXPOS; # Next available position in L0x
73	# my $ABNORMAL; # Formal BTree normality indicator
74
75	#
76	# some binary reads
77	#
78
79	=head2 new
80
81	Open ISIS database
82
83	my $isis = new IsisDB(
84	isisdb => './cds/cds',
85	read_fdt => 1,
86	include_deleted => 1,
87	hash_filter => sub {
88	my $v = shift;
89	$v =~ s#foo#bar#g;
90	},
91	debug => 1,
92	);
93
94	Options are described below:
95
96	=over 5
97
98	=item isisdb
99
100	This is full or relative path to ISIS database files which include
101	common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
102	C<read_fdt> option) files.
103
104	In this example it uses C<./cds/cds.MST> and related files.
105
106	=item read_fdt
107
108	Boolean flag to specify if field definition table should be read. It's off
109	by default.
110
111	=item include_deleted
112
113	Don't skip logically deleted records in ISIS.
114
115	=item hash_filter
116
117	Filter code ref which will be used before data is converted to hash.
118
119	=item debug
120
121	Dump a B<lot> of debugging output.
122
123	=back
124
125	It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
126
127	=cut
128
129	sub new {
130	my $class = shift;
131	my $self = {};
132	bless($self, $class);
133
134	croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
135
136	foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
137	$self->{$v} = {@_}->{$v};
138	}
139
140	my @isis_files = grep(/\.(FDT\|MST\|XRF\|CNT)$/i,glob($self->{isisdb}."*"));
141
142	foreach my $f (@isis_files) {
143	my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
144	$self->{lc($ext)."_file"} = $f;
145	}
146
147	my @must_exist = qw(mst xrf);
148	push @must_exist, "fdt" if ($self->{read_fdt});
149
150	foreach my $ext (@must_exist) {
151	croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});
152	}
153
154	print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});
155
156	# if you want to read .FDT file use read_fdt argument when creating class!
157	if ($self->{read_fdt} && -e $self->{fdt_file}) {
158
159	# read the $db.FDT file for tags
160	my $fieldzone=0;
161
162	open(fileFDT, $self->{fdt_file}) \|\| croak "can't read '$self->{fdt_file}': $!";
163
164	while (<fileFDT>) {
165	chomp;
166	if ($fieldzone) {
167	my $name=substr($_,0,30);
168	my $tag=substr($_,50,3);
169
170	$name =~ s/\s+$//;
171	$tag =~ s/\s+$//;
172
173	$self->{'TagName'}->{$tag}=$name;
174	}
175
176	if (/^\\\*/) {
177	$fieldzone=1;
178	}
179	}
180
181	close(fileFDT);
182	}
183
184	# Get the Maximum MFN from $db.MST
185
186	open($self->{'fileMST'}, $self->{mst_file}) \|\| croak "can't open '$self->{mst_file}': $!";
187
188	# MST format: (* = 32 bit signed)
189	# CTLMFN* always 0
190	# NXTMFN* MFN to be assigned to the next record created
191	# NXTMFB* last block allocated to master file
192	# NXTMFP offset to next available position in last block
193	# MFTYPE always 0 for user db file (1 for system)
194	seek($self->{'fileMST'},4,0);
195
196	my $buff;
197
198	read($self->{'fileMST'}, $buff, 4);
199	$self->{'NXTMFN'}=unpack("l",$buff) \|\| carp "NXTNFN is zero";
200
201	# save maximum MFN
202	$self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
203
204
205
206
207	print STDERR Dumper($self),"\n" if ($self->{debug});
208
209	# open files for later
210	open($self->{'fileXRF'}, $self->{xrf_file}) \|\| croak "can't open '$self->{xrf_file}': $!";
211
212	$self ? return $self : return undef;
213	}
214
215	=head2 read_cnt
216
217	This function is not really used by module, but can be useful to find info
218	about your index (if debugging it for example).
219
220	print Dumper($isis->read_cnt);
221
222	=cut
223
224	sub read_cnt {
225	my $self = shift;
226
227	croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
228
229	# Get the index information from $db.CNT
230
231	open(fileCNT, $self->{cnt_file}) \|\| croak "can't read '$self->{cnt_file}': $!";
232
233	# There is two 26 Bytes fixed lenght records
234
235	# 0: IDTYPE BTree type 16
236	# 2: ORDN Nodes Order 16
237	# 4: ORDF Leafs Order 16
238	# 6: N Number of Memory buffers for nodes 16
239	# 8: K Number of buffers for first level index 16
240	# 10: LIV Current number of Index Levels 16
241	# 12: POSRX* Pointer to Root Record in N0x 32
242	# 16: NMAXPOS* Next Available position in N0x 32
243	# 20: FMAXPOS* Next available position in L0x 32
244	# 24: ABNORMAL Formal BTree normality indicator 16
245	# length: 26 bytes
246
247	sub unpack_cnt {
248	my $self = shift;
249
250	my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
251
252	my $buff = shift \|\| return;
253	my @arr = unpack("ssssssllls", $buff);
254
255	print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
256
257	my $IDTYPE = shift @arr;
258	foreach (@flds) {
259	$self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
260	}
261	}
262
263	my $buff;
264
265	read(fileCNT, $buff, 26);
266	$self->unpack_cnt($buff);
267
268	read(fileCNT, $buff, 26);
269	$self->unpack_cnt($buff);
270
271	close(fileCNT);
272
273	return $self->{cnt};
274	}
275
276	=head2 fetch
277
278	Read record with selected MFN
279
280	my $rec = $isis->fetch(55);
281
282	Returns hash with keys which are field names and values are unpacked values
283	for that field like this:
284
285	$rec = {
286	'210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
287	'990' => [ '2140', '88', 'HAY' ],
288	};
289
290	=cut
291
292	sub fetch {
293	my $self = shift;
294
295	my $mfn = shift \|\| croak "fetch needs MFN as argument!";
296
297	# is mfn allready in memory?
298	my $old_mfn = $self->{'current_mfn'} \|\| -1;
299	return if ($mfn == $old_mfn);
300
301	print STDERR "## fetch: $mfn\n" if ($self->{debug});
302
303	# XXX check this?
304	my $mfnpos=($mfn+int(($mfn-1)/127))*4;
305
306	print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
307	seek($self->{'fileXRF'},$mfnpos,0);
308
309	my $buff;
310
311	# read XRFMFB abd XRFMFP
312	read($self->{'fileXRF'}, $buff, 4);
313	my $pointer=unpack("l",$buff) \|\| carp "pointer is null";
314
315	my $XRFMFB = int($pointer/2048);
316	my $XRFMFP = $pointer - ($XRFMFB*2048);
317
318
319	# (XRFMFB - 1) * 512 + XRFMFP
320	# why do i have to do XRFMFP % 1024 ?
321
322	my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 1024);
323
324	print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
325
326	# Get Record Information
327
328	seek($self->{'fileMST'},$blk_off,0);
329
330	read($self->{'fileMST'}, $buff, 4);
331	my $value=unpack("l",$buff);
332
333	print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
334
335	if ($value!=$mfn) {
336	carp "Error: MFN ".$mfn." not found in MST(".$value.")";
337	#return; # XXX deleted record?
338	}
339
340	# $MFRL=$self->Read16($fileMST);
341	# $MFBWB=$self->Read32($fileMST);
342	# $MFBWP=$self->Read16($fileMST);
343	# $BASE=$self->Read16($fileMST);
344	# $NVF=$self->Read16($fileMST);
345	# $STATUS=$self->Read16($fileMST);
346
347	read($self->{'fileMST'}, $buff, 14);
348
349	my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
350
351	print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
352
353	# delete old record
354	delete $self->{record};
355
356	## FIXME this is a bug
357	if (! $self->{'include_deleted'} && $MFRL < 0) {
358	print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
359	return;
360	}
361
362	warn "BASE is not 18+6NVF" unless ($BASE == 18 + 6 $NVF);
363
364	# Get Directory Format
365
366	my @FieldPOS;
367	my @FieldLEN;
368	my @FieldTAG;
369
370	read($self->{'fileMST'}, $buff, 6 * $NVF);
371
372	my $rec_len = 0;
373
374	for (my $i = 0 ; $i < $NVF ; $i++) {
375
376	# $TAG=$self->Read16($fileMST);
377	# $POS=$self->Read16($fileMST);
378	# $LEN=$self->Read16($fileMST);
379
380	my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
381
382	print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
383
384	# The TAG does not exists in .FDT so we set it to 0.
385	#
386	# XXX This is removed from perl version; .FDT file is updated manually, so
387	# you will often have fields in .MST file which aren't in .FDT. On the other
388	# hand, IsisMarc doesn't use .FDT files at all!
389
390	#if (! $self->{TagName}->{$TAG}) {
391	# $TAG=0;
392	#}
393
394	push @FieldTAG,$TAG;
395	push @FieldPOS,$POS;
396	push @FieldLEN,$LEN;
397
398	$rec_len += $LEN;
399	}
400
401	# Get Variable Fields
402
403	read($self->{'fileMST'},$buff,$rec_len);
404
405	print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
406
407	for (my $i = 0 ; $i < $NVF ; $i++) {
408	# skip zero-sized fields
409	next if ($FieldLEN[$i] == 0);
410
411	push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
412	}
413
414	$self->{'current_mfn'} = $mfn;
415
416	print Dumper($self),"\n" if ($self->{debug});
417
418	return $self->{'record'};
419	}
420
421	=head2 to_ascii
422
423	Dump ASCII output of record with specified MFN
424
425	print $isis->to_ascii(42);
426
427	It outputs something like this:
428
429	210 ^aNew York^cNew York University press^dcop. 1988
430	990 2140
431	990 88
432	990 HAY
433
434	If C<read_fdt> is specified when calling C<new> it will display field names
435	from C<.FDT> file instead of numeric tags.
436
437	=cut
438
439	sub to_ascii {
440	my $self = shift;
441
442	my $mfn = shift \|\| croak "need MFN";
443
444	my $rec = $self->fetch($mfn);
445
446	my $out = "0\t$mfn";
447
448	foreach my $f (sort keys %{$rec}) {
449	my $fn = $self->tag_name($f);
450	$out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
451	}
452
453	$out .= "\n";
454
455	return $out;
456	}
457
458	=head2 to_hash
459
460	Read record with specified MFN and convert it to hash
461
462	my $hash = $isis->to_hash($mfn);
463
464	It has ability to convert characters (using C<hash_filter> from ISIS
465	database before creating structures enabling character re-mapping or quick
466	fix-up of data.
467
468	This function returns hash which is like this:
469
470	$hash = {
471	'210' => [
472	{
473	'c' => 'New York University press',
474	'a' => 'New York',
475	'd' => 'cop. 1988'
476	}
477	],
478	'990' => [
479	'2140',
480	'88',
481	'HAY'
482	],
483	};
484
485	You can later use that hash to produce any output from ISIS data.
486
487	If database is created using IsisMarc, it will also have to special fields
488	which will be used for identifiers, C<i1> and C<i2> like this:
489
490	'200' => [
491	{
492	'i1' => '1',
493	'i2' => ' '
494	'a' => 'Goa',
495	'f' => 'Valdo D\'Arienzo',
496	'e' => 'tipografie e tipografi nel XVI secolo',
497	}
498	],
499
500	This method will also create additional field C<000> with MFN.
501
502	=cut
503
504	sub to_hash {
505	my $self = shift;
506
507	my $mfn = shift \|\| confess "need mfn!";
508
509	# init record to include MFN as field 000
510	my $rec = { '000' => [ $mfn ] };
511
512	my $row = $self->fetch($mfn);
513
514	foreach my $k (keys %{$row}) {
515	foreach my $l (@{$row->{$k}}) {
516
517	# filter output
518	$l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
519
520	my $val;
521
522	# has identifiers?
523	($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
524
525	# has subfields?
526	if ($l =~ m/\^/) {
527	foreach my $t (split(/\^/,$l)) {
528	next if (! $t);
529	$val->{substr($t,0,1)} = substr($t,1);
530	}
531	} else {
532	$val = $l;
533	}
534
535	push @{$rec->{$k}}, $val;
536	}
537	}
538
539	return $rec;
540	}
541
542	=head2 tag_name
543
544	Return name of selected tag
545
546	print $isis->tag_name('200');
547
548	=cut
549
550	sub tag_name {
551	my $self = shift;
552	my $tag = shift \|\| return;
553	return $self->{'TagName'}->{$tag} \|\| $tag;
554	}
555
556	1;
557
558	=head1 BUGS
559
560	This module has been very lightly tested. Use with caution and report bugs.
561
562	=head1 AUTHOR
563
564	Dobrica Pavlinusic
565	CPAN ID: DPAVLIN
566	dpavlin@rot13.org
567	http://www.rot13.org/~dpavlin/
568
569	This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
570	written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
571
572	=head1 COPYRIGHT
573
574	This program is free software; you can redistribute
575	it and/or modify it under the same terms as Perl itself.
576
577	The full text of the license can be found in the
578	LICENSE file included with this module.
579
580
581	=head1 SEE ALSO
582
583	OpenIsis web site L<http://www.openisis.org>
584
585	perl4lib site L<http://perl4lib.perl.org>
586