Biblio-Isis/trunk/IsisDB.pm

package IsisDB;
use strict;

use Carp;
use Data::Dumper;

BEGIN {
        use Exporter ();
        use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
        $VERSION     = 0.04;
        @ISA         = qw (Exporter);
        #Give a hoot don't pollute, do not export more than needed by default
        @EXPORT      = qw ();
        @EXPORT_OK   = qw ();
        %EXPORT_TAGS = ();

}

=head1 NAME

IsisDB - Read CDS/ISIS database

=head1 SYNOPSIS

  use IsisDB;

  my $isis = new IsisDB(
        isisdb => './cds/cds',
  );

  for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
        print $isis->to_ascii($mfn),"\n";
  }

=head1 DESCRIPTION

This module will read CDS/ISIS databases and create hash values out of it.
It can be used as perl-only alternative to OpenIsis module.

This will module will always be slower that OpenIsis module which use C
library. However, since it's written in perl, it's platform independent (so
you don't need C compiler), and can be easily modified.

Unique feature of this module is ability to C<include_deleted> records.
It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
fields which are zero sized will be filled with random junk from memory).

=head1 METHODS

=cut

#  my $ORDN;            # Nodes Order
#  my $ORDF;            # Leafs Order
#  my $N;               # Number of Memory buffers for nodes
#  my $K;               # Number of buffers for first level index
#  my $LIV;             # Current number of Index Levels
#  my $POSRX;           # Pointer to Root Record in N0x
#  my $NMAXPOS;         # Next Available position in N0x
#  my $FMAXPOS;         # Next available position in L0x
#  my $ABNORMAL;        # Formal BTree normality indicator

#
# some binary reads
#

=head2 new

Open CDS/ISIS database

 my $isis = new IsisDB(
        isisdb => './cds/cds',
        read_fdt => 1,
        include_deleted => 1,
        hash_filter => sub {
                my $v = shift;
                $v =~ s#foo#bar#g;
        },
        debug => 1,
 );

Options are described below:

=over 5

=item isisdb

Prefix path to CDS/ISIS. It should contain full or relative path to database
and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.

=item read_fdt

Boolean flag to specify if field definition table should be read. It's off
by default.

=item include_deleted

Don't skip logically deleted records in ISIS.

=item hash_filter

Filter code ref which will be used before data is converted to hash.

=item debug

Dump a B<lot> of debugging output.

=back

It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.

=cut

sub new {
        my $class = shift;
        my $self = {};
        bless($self, $class);

        croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});

        foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
                $self->{$v} = {@_}->{$v};
        }

        # if you want to read .FDT file use read_fdt argument when creating class!
        if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {

                # read the $db.FDT file for tags
                my $fieldzone=0;

                open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";

                while (<fileFDT>) {
                        chomp;
                        if ($fieldzone) {
                                my $name=substr($_,0,30);
                                my $tag=substr($_,50,3);

                                $name =~ s/\s+$//;
                                $tag =~ s/\s+$//;

                                $self->{'TagName'}->{$tag}=$name;  
                        }

                        if (/^\*\*\*/) {
                                $fieldzone=1;
                        }
                }
                
                close(fileFDT);
        }

        # Get the Maximum MFN from $db.MST

        open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";

        # MST format:   (* = 32 bit signed)
        # CTLMFN*       always 0
        # NXTMFN*       MFN to be assigned to the next record created
        # NXTMFB*       last block allocated to master file
        # NXTMFP        offset to next available position in last block
        # MFTYPE        always 0 for user db file (1 for system)
        seek(fileMST,4,0);

        my $buff;

        read(fileMST, $buff, 4);
        $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";

        # save maximum MFN
        $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;

        close(fileMST);

        # Get the index information from $db.CNT
   
        open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";

        # There is two 26 Bytes fixed lenght records

        #  0: IDTYPE    BTree type                              16
        #  2: ORDN      Nodes Order                             16
        #  4: ORDF      Leafs Order                             16
        #  6: N         Number of Memory buffers for nodes      16
        #  8: K         Number of buffers for first level index 16
        # 10: LIV       Current number of Index Levels          16
        # 12: POSRX*    Pointer to Root Record in N0x           32
        # 16: NMAXPOS*  Next Available position in N0x          32
        # 20: FMAXPOS*  Next available position in L0x          32
        # 24: ABNORMAL  Formal BTree normality indicator        16
        # length: 26 bytes

        sub unpack_cnt {
                my $self = shift;

                my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);

                my $buff = shift || return;
                my @arr = unpack("ssssssllls", $buff);

                print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});

                my $IDTYPE = shift @arr;
                foreach (@flds) {
                        $self->{$IDTYPE}->{$_} = abs(shift @arr);
                }
        }

        read(fileCNT, $buff, 26);
        $self->unpack_cnt($buff);

        read(fileCNT, $buff, 26);
        $self->unpack_cnt($buff);


        close(fileCNT);

        print Dumper($self) if ($self->{debug});

        # open files for later
        open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";

        open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";

        $self ? return $self : return undef;
}

=head2 fetch

Read record with selected MFN

  my $rec = $isis->fetch(55);

Returns hash with keys which are field names and values are unpacked values
for that field (like C<^asometing^bsomething else>)

=cut

sub fetch {
        my $self = shift;

        my $mfn = shift || croak "fetch needs MFN as argument!";

        print "fetch: $mfn\n" if ($self->{debug});

        # XXX check this?
        my $mfnpos=($mfn+int(($mfn-1)/127))*4;

        print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
        seek($self->{'fileXRF'},$mfnpos,0);

        my $buff;

        # read XRFMFB abd XRFMFP
        read($self->{'fileXRF'}, $buff, 4);
        my $pointer=unpack("l",$buff) || carp "pointer is null";

        my $XRFMFB = int($pointer/2048);
        my $XRFMFP = $pointer - ($XRFMFB*2048);

        print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});

        # XXX fix this to be more readable!!
        # e.g. (XRFMFB - 1) * 512 + XRFMFP

        my $offset = $pointer;
        my $offset2=int($offset/2048)-1;
        my $offset22=int($offset/4096);
        my $offset3=$offset-($offset22*4096);
        if ($offset3>512) {
                $offset3=$offset3-2048;
        }
        my $offset4=($offset2*512)+$offset3;

        print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});

        # Get Record Information

        seek($self->{'fileMST'},$offset4,0);

        read($self->{'fileMST'}, $buff, 4);
        my $value=unpack("l",$buff);

        if ($value!=$mfn) {
print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");    
                return -1;      # XXX deleted record?
        }

#       $MFRL=$self->Read16($fileMST);
#       $MFBWB=$self->Read32($fileMST);
#       $MFBWP=$self->Read16($fileMST);
#       $BASE=$self->Read16($fileMST);
#       $NVF=$self->Read16($fileMST);
#       $STATUS=$self->Read16($fileMST);

        read($self->{'fileMST'}, $buff, 14);

        my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);

        print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});

        # delete old record
        delete $self->{record};

        if (! $self->{'include_deleted'} && $MFRL < 0) {
                print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
                return;
        }

        # Get Directory Format

        my @FieldPOS;
        my @FieldLEN;
        my @FieldTAG;

        read($self->{'fileMST'}, $buff, 6 * $NVF);

        my $fld_len = 0;

        for (my $i = 0 ; $i < $NVF ; $i++) {

#               $TAG=$self->Read16($fileMST);
#               $POS=$self->Read16($fileMST);
#               $LEN=$self->Read16($fileMST);

                my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));

                print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});

                # The TAG does not exists in .FDT so we set it to 0.
                #
                # XXX This is removed from perl version; .FDT file is updated manually, so
                # you will often have fields in .MST file which aren't in .FDT. On the other
                # hand, IsisMarc doesn't use .FDT files at all!

                #if (! $self->{TagName}->{$TAG}) {
                #       $TAG=0;
                #}

                push @FieldTAG,$TAG;
                push @FieldPOS,$POS;
                push @FieldLEN,$LEN;

                $fld_len += $LEN;
        }

        # Get Variable Fields

        read($self->{'fileMST'},$buff,$fld_len);

        for (my $i = 0 ; $i < $NVF ; $i++) {
                # skip zero-sized fields
                next if ($FieldLEN[$i] == 0);

                push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
        }
        close(fileMST);

        print Dumper($self) if ($self->{debug});

        return $self->{'record'};
}

=head2 to_ascii

Dump ascii output of selected MFN

  print $isis->to_ascii(55);

=cut

sub to_ascii {
        my $self = shift;

        my $mfn = shift || croak "need MFN";

        my $rec = $self->fetch($mfn);

        my $out = "0\t$mfn";

        foreach my $f (sort keys %{$rec}) {
                $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
        }

        $out .= "\n";

        return $out;
}

=head2 to_hash

Read mfn and convert it to hash

  my $hash = $isis->to_hash($mfn);

It has ability to convert characters (using C<hash_filter> from ISIS
database before creating structures enabling character remapping or quick
fixup of data.

This function returns hash which is like this:

  $hash = {
    '210' => [
               {
                 'c' => 'New York University press',
                 'a' => 'New York',
                 'd' => 'cop. 1988'
               }
             ],
    '990' => [
               '2140',
               '88',
               'HAY'
             ],
  };

You can later use that has to produce any output from ISIS data.

=cut

sub to_hash {
        my $self = shift;

        my $mfn = shift || confess "need mfn!";

        my $rec;
        my $row = $self->fetch($mfn);

        foreach my $k (keys %{$row}) {
                foreach my $l (@{$row->{$k}}) {

                        # filter output
                        $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});

                        # has subfields?
                        my $val;
                        if ($l =~ m/\^/) {
                                foreach my $t (split(/\^/,$l)) {
                                        next if (! $t);
                                        $val->{substr($t,0,1)} = substr($t,1);
                                }
                        } else {
                                $val = $l;
                        }

                        push @{$rec->{$k}}, $val;
                }
        }

        return $rec;
}

#
# XXX porting from php left-over:
#
# do I *REALLY* need those methods, or should I use
# $self->{something} directly?
#
# Probably direct usage is better!
#

sub TagName {
        my $self = shift;
        return $self->{TagName};
}

sub NextMFN {
        my $self = shift;
        return $self->{NXTMFN};
}

1;

=head1 BUGS

This module has been very lightly tested. Use with caution and report bugs.

=head1 AUTHOR

        Dobrica Pavlinusic
        CPAN ID: DPAVLIN
        dpavlin@rot13.org
        http://www.rot13.org/~dpavlin/

This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.

=head1 COPYRIGHT

This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

The full text of the license can be found in the
LICENSE file included with this module.


=head1 SEE ALSO

L<http://www.openisis.org|OpenIsis>, perl(1).

1	package IsisDB;
2	use strict;
3
4	use Carp;
5	use Data::Dumper;
6
7	BEGIN {
8	use Exporter ();
9	use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10	$VERSION = 0.04;
11	@ISA = qw (Exporter);
12	#Give a hoot don't pollute, do not export more than needed by default
13	@EXPORT = qw ();
14	@EXPORT_OK = qw ();
15	%EXPORT_TAGS = ();
16
17	}
18
19	=head1 NAME
20
21	IsisDB - Read CDS/ISIS database
22
23	=head1 SYNOPSIS
24
25	use IsisDB;
26
27	my $isis = new IsisDB(
28	isisdb => './cds/cds',
29	);
30
31	for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
32	print $isis->to_ascii($mfn),"\n";
33	}
34
35	=head1 DESCRIPTION
36
37	This module will read CDS/ISIS databases and create hash values out of it.
38	It can be used as perl-only alternative to OpenIsis module.
39
40	This will module will always be slower that OpenIsis module which use C
41	library. However, since it's written in perl, it's platform independent (so
42	you don't need C compiler), and can be easily modified.
43
44	Unique feature of this module is ability to C<include_deleted> records.
45	It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
46	fields which are zero sized will be filled with random junk from memory).
47
48	=head1 METHODS
49
50	=cut
51
52	# my $ORDN; # Nodes Order
53	# my $ORDF; # Leafs Order
54	# my $N; # Number of Memory buffers for nodes
55	# my $K; # Number of buffers for first level index
56	# my $LIV; # Current number of Index Levels
57	# my $POSRX; # Pointer to Root Record in N0x
58	# my $NMAXPOS; # Next Available position in N0x
59	# my $FMAXPOS; # Next available position in L0x
60	# my $ABNORMAL; # Formal BTree normality indicator
61
62	#
63	# some binary reads
64	#
65
66	=head2 new
67
68	Open CDS/ISIS database
69
70	my $isis = new IsisDB(
71	isisdb => './cds/cds',
72	read_fdt => 1,
73	include_deleted => 1,
74	hash_filter => sub {
75	my $v = shift;
76	$v =~ s#foo#bar#g;
77	},
78	debug => 1,
79	);
80
81	Options are described below:
82
83	=over 5
84
85	=item isisdb
86
87	Prefix path to CDS/ISIS. It should contain full or relative path to database
88	and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
89
90	=item read_fdt
91
92	Boolean flag to specify if field definition table should be read. It's off
93	by default.
94
95	=item include_deleted
96
97	Don't skip logically deleted records in ISIS.
98
99	=item hash_filter
100
101	Filter code ref which will be used before data is converted to hash.
102
103	=item debug
104
105	Dump a B<lot> of debugging output.
106
107	=back
108
109	It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
110
111	=cut
112
113	sub new {
114	my $class = shift;
115	my $self = {};
116	bless($self, $class);
117
118	croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
119
120	foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
121	$self->{$v} = {@_}->{$v};
122	}
123
124	# if you want to read .FDT file use read_fdt argument when creating class!
125	if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
126
127	# read the $db.FDT file for tags
128	my $fieldzone=0;
129
130	open(fileFDT, $self->{isisdb}.".FDT") \|\| croak "can't read '$self->{isisdb}.FDT': $!";
131
132	while (<fileFDT>) {
133	chomp;
134	if ($fieldzone) {
135	my $name=substr($_,0,30);
136	my $tag=substr($_,50,3);
137
138	$name =~ s/\s+$//;
139	$tag =~ s/\s+$//;
140
141	$self->{'TagName'}->{$tag}=$name;
142	}
143
144	if (/^\\\*/) {
145	$fieldzone=1;
146	}
147	}
148
149	close(fileFDT);
150	}
151
152	# Get the Maximum MFN from $db.MST
153
154	open(fileMST,$self->{isisdb}.".MST") \|\| croak "can't read '$self->{isisdb}.MST': $!";
155
156	# MST format: (* = 32 bit signed)
157	# CTLMFN* always 0
158	# NXTMFN* MFN to be assigned to the next record created
159	# NXTMFB* last block allocated to master file
160	# NXTMFP offset to next available position in last block
161	# MFTYPE always 0 for user db file (1 for system)
162	seek(fileMST,4,0);
163
164	my $buff;
165
166	read(fileMST, $buff, 4);
167	$self->{'NXTMFN'}=unpack("l",$buff) \|\| carp "NXTNFN is zero";
168
169	# save maximum MFN
170	$self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
171
172	close(fileMST);
173
174	# Get the index information from $db.CNT
175
176	open(fileCNT, $self->{isisdb}.".CNT") \|\| croak "can't read '$self->{isisdb}.CNT': $!";
177
178	# There is two 26 Bytes fixed lenght records
179
180	# 0: IDTYPE BTree type 16
181	# 2: ORDN Nodes Order 16
182	# 4: ORDF Leafs Order 16
183	# 6: N Number of Memory buffers for nodes 16
184	# 8: K Number of buffers for first level index 16
185	# 10: LIV Current number of Index Levels 16
186	# 12: POSRX* Pointer to Root Record in N0x 32
187	# 16: NMAXPOS* Next Available position in N0x 32
188	# 20: FMAXPOS* Next available position in L0x 32
189	# 24: ABNORMAL Formal BTree normality indicator 16
190	# length: 26 bytes
191
192	sub unpack_cnt {
193	my $self = shift;
194
195	my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
196
197	my $buff = shift \|\| return;
198	my @arr = unpack("ssssssllls", $buff);
199
200	print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
201
202	my $IDTYPE = shift @arr;
203	foreach (@flds) {
204	$self->{$IDTYPE}->{$_} = abs(shift @arr);
205	}
206	}
207
208	read(fileCNT, $buff, 26);
209	$self->unpack_cnt($buff);
210
211	read(fileCNT, $buff, 26);
212	$self->unpack_cnt($buff);
213
214
215	close(fileCNT);
216
217	print Dumper($self) if ($self->{debug});
218
219	# open files for later
220	open($self->{'fileXRF'}, $self->{isisdb}.".XRF") \|\| croak "can't open '$self->{isisdb}.XRF': $!";
221
222	open($self->{'fileMST'}, $self->{isisdb}.".MST") \|\| croak "can't open '$self->{isisdb}.MST': $!";
223
224	$self ? return $self : return undef;
225	}
226
227	=head2 fetch
228
229	Read record with selected MFN
230
231	my $rec = $isis->fetch(55);
232
233	Returns hash with keys which are field names and values are unpacked values
234	for that field (like C<^asometing^bsomething else>)
235
236	=cut
237
238	sub fetch {
239	my $self = shift;
240
241	my $mfn = shift \|\| croak "fetch needs MFN as argument!";
242
243	print "fetch: $mfn\n" if ($self->{debug});
244
245	# XXX check this?
246	my $mfnpos=($mfn+int(($mfn-1)/127))*4;
247
248	print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
249	seek($self->{'fileXRF'},$mfnpos,0);
250
251	my $buff;
252
253	# read XRFMFB abd XRFMFP
254	read($self->{'fileXRF'}, $buff, 4);
255	my $pointer=unpack("l",$buff) \|\| carp "pointer is null";
256
257	my $XRFMFB = int($pointer/2048);
258	my $XRFMFP = $pointer - ($XRFMFB*2048);
259
260	print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
261
262	# XXX fix this to be more readable!!
263	# e.g. (XRFMFB - 1) * 512 + XRFMFP
264
265	my $offset = $pointer;
266	my $offset2=int($offset/2048)-1;
267	my $offset22=int($offset/4096);
268	my $offset3=$offset-($offset22*4096);
269	if ($offset3>512) {
270	$offset3=$offset3-2048;
271	}
272	my $offset4=($offset2*512)+$offset3;
273
274	print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
275
276	# Get Record Information
277
278	seek($self->{'fileMST'},$offset4,0);
279
280	read($self->{'fileMST'}, $buff, 4);
281	my $value=unpack("l",$buff);
282
283	if ($value!=$mfn) {
284	print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
285	return -1; # XXX deleted record?
286	}
287
288	# $MFRL=$self->Read16($fileMST);
289	# $MFBWB=$self->Read32($fileMST);
290	# $MFBWP=$self->Read16($fileMST);
291	# $BASE=$self->Read16($fileMST);
292	# $NVF=$self->Read16($fileMST);
293	# $STATUS=$self->Read16($fileMST);
294
295	read($self->{'fileMST'}, $buff, 14);
296
297	my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
298
299	print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
300
301	# delete old record
302	delete $self->{record};
303
304	if (! $self->{'include_deleted'} && $MFRL < 0) {
305	print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
306	return;
307	}
308
309	# Get Directory Format
310
311	my @FieldPOS;
312	my @FieldLEN;
313	my @FieldTAG;
314
315	read($self->{'fileMST'}, $buff, 6 * $NVF);
316
317	my $fld_len = 0;
318
319	for (my $i = 0 ; $i < $NVF ; $i++) {
320
321	# $TAG=$self->Read16($fileMST);
322	# $POS=$self->Read16($fileMST);
323	# $LEN=$self->Read16($fileMST);
324
325	my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
326
327	print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
328
329	# The TAG does not exists in .FDT so we set it to 0.
330	#
331	# XXX This is removed from perl version; .FDT file is updated manually, so
332	# you will often have fields in .MST file which aren't in .FDT. On the other
333	# hand, IsisMarc doesn't use .FDT files at all!
334
335	#if (! $self->{TagName}->{$TAG}) {
336	# $TAG=0;
337	#}
338
339	push @FieldTAG,$TAG;
340	push @FieldPOS,$POS;
341	push @FieldLEN,$LEN;
342
343	$fld_len += $LEN;
344	}
345
346	# Get Variable Fields
347
348	read($self->{'fileMST'},$buff,$fld_len);
349
350	for (my $i = 0 ; $i < $NVF ; $i++) {
351	# skip zero-sized fields
352	next if ($FieldLEN[$i] == 0);
353
354	push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
355	}
356	close(fileMST);
357
358	print Dumper($self) if ($self->{debug});
359
360	return $self->{'record'};
361	}
362
363	=head2 to_ascii
364
365	Dump ascii output of selected MFN
366
367	print $isis->to_ascii(55);
368
369	=cut
370
371	sub to_ascii {
372	my $self = shift;
373
374	my $mfn = shift \|\| croak "need MFN";
375
376	my $rec = $self->fetch($mfn);
377
378	my $out = "0\t$mfn";
379
380	foreach my $f (sort keys %{$rec}) {
381	$out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
382	}
383
384	$out .= "\n";
385
386	return $out;
387	}
388
389	=head2 to_hash
390
391	Read mfn and convert it to hash
392
393	my $hash = $isis->to_hash($mfn);
394
395	It has ability to convert characters (using C<hash_filter> from ISIS
396	database before creating structures enabling character remapping or quick
397	fixup of data.
398
399	This function returns hash which is like this:
400
401	$hash = {
402	'210' => [
403	{
404	'c' => 'New York University press',
405	'a' => 'New York',
406	'd' => 'cop. 1988'
407	}
408	],
409	'990' => [
410	'2140',
411	'88',
412	'HAY'
413	],
414	};
415
416	You can later use that has to produce any output from ISIS data.
417
418	=cut
419
420	sub to_hash {
421	my $self = shift;
422
423	my $mfn = shift \|\| confess "need mfn!";
424
425	my $rec;
426	my $row = $self->fetch($mfn);
427
428	foreach my $k (keys %{$row}) {
429	foreach my $l (@{$row->{$k}}) {
430
431	# filter output
432	$l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
433
434	# has subfields?
435	my $val;
436	if ($l =~ m/\^/) {
437	foreach my $t (split(/\^/,$l)) {
438	next if (! $t);
439	$val->{substr($t,0,1)} = substr($t,1);
440	}
441	} else {
442	$val = $l;
443	}
444
445	push @{$rec->{$k}}, $val;
446	}
447	}
448
449	return $rec;
450	}
451
452	#
453	# XXX porting from php left-over:
454	#
455	# do I REALLY need those methods, or should I use
456	# $self->{something} directly?
457	#
458	# Probably direct usage is better!
459	#
460
461	sub TagName {
462	my $self = shift;
463	return $self->{TagName};
464	}
465
466	sub NextMFN {
467	my $self = shift;
468	return $self->{NXTMFN};
469	}
470
471	1;
472
473	=head1 BUGS
474
475	This module has been very lightly tested. Use with caution and report bugs.
476
477	=head1 AUTHOR
478
479	Dobrica Pavlinusic
480	CPAN ID: DPAVLIN
481	dpavlin@rot13.org
482	http://www.rot13.org/~dpavlin/
483
484	This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
485	written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
486
487	=head1 COPYRIGHT
488
489	This program is free software; you can redistribute
490	it and/or modify it under the same terms as Perl itself.
491
492	The full text of the license can be found in the
493	LICENSE file included with this module.
494
495
496	=head1 SEE ALSO
497
498	L<http://www.openisis.org\|OpenIsis>, perl(1).
499