/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 9 - (hide annotations)
Wed Dec 29 16:01:41 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 9067 byte(s)
logically deleted records are by default skipped, but can be included using
include_deleted option to new

1 dpavlin 1 package IsisDB;
2     use strict;
3    
4     use Carp;
5     use Data::Dumper;
6    
7     BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 dpavlin 9 $VERSION = 0.03;
11 dpavlin 1 @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21     IsisDB - Read CDS/ISIS database
22    
23     =head1 SYNOPSIS
24    
25     use IsisDB
26     my $isis = new IsisDB(
27     isisdb => './cds/cds',
28     );
29    
30     =head1 DESCRIPTION
31    
32     This module will read CDS/ISIS databases and create hash values out of it.
33     It can be used as perl-only alternative to OpenIsis module.
34    
35     =head1 METHODS
36    
37     =cut
38    
39     # my $ORDN; # Nodes Order
40     # my $ORDF; # Leafs Order
41     # my $N; # Number of Memory buffers for nodes
42     # my $K; # Number of buffers for first level index
43     # my $LIV; # Current number of Index Levels
44     # my $POSRX; # Pointer to Root Record in N0x
45     # my $NMAXPOS; # Next Available position in N0x
46     # my $FMAXPOS; # Next available position in L0x
47     # my $ABNORMAL; # Formal BTree normality indicator
48    
49     #
50     # some binary reads
51     #
52    
53     sub Read32 {
54     my $self = shift;
55    
56     my $f = shift || die "Read32 needs file handle";
57     read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);
58     return unpack("l",$b);
59     }
60    
61     =head2 new
62    
63     Open CDS/ISIS database
64    
65     my $isis = new IsisDB(
66     isisdb => './cds/cds',
67     read_fdt => 1,
68     debug => 1,
69 dpavlin 9 include_deleted => 1,
70 dpavlin 1 );
71    
72 dpavlin 2 Options are described below:
73    
74     =over 5
75    
76 dpavlin 1 =item isisdb
77    
78     Prefix path to CDS/ISIS. It should contain full or relative path to database
79     and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
80    
81     =item read_fdt
82    
83     Boolean flag to specify if field definition table should be read. It's off
84     by default.
85    
86     =item debug
87    
88     Dump a C<lot> of debugging output.
89    
90 dpavlin 9 =item include_deleted
91    
92     Don't skip logically deleted records.
93    
94 dpavlin 2 =back
95    
96     It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
97    
98 dpavlin 1 =cut
99    
100     sub new {
101     my $class = shift;
102     my $self = {};
103     bless($self, $class);
104    
105 dpavlin 9 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
106 dpavlin 1
107 dpavlin 9 foreach my $v (qw{isisdb debug include_deleted}) {
108     $self->{$v} = {@_}->{$v};
109     }
110 dpavlin 1
111     # if you want to read .FDT file use read_fdt argument when creating class!
112     if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
113    
114     # read the $db.FDT file for tags
115     my $fieldzone=0;
116    
117     open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
118    
119     while (<fileFDT>) {
120     chomp;
121     if ($fieldzone) {
122     my $name=substr($_,0,30);
123     my $tag=substr($_,50,3);
124    
125     $name =~ s/\s+$//;
126     $tag =~ s/\s+$//;
127    
128     $self->{'TagName'}->{$tag}=$name;
129     }
130    
131     if (/^\*\*\*/) {
132     $fieldzone=1;
133     }
134     }
135    
136     close(fileFDT);
137     }
138    
139     # Get the Maximum MFN from $db.MST
140    
141     open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
142    
143     # MST format: (* = 32 bit signed)
144     # CTLMFN* always 0
145     # NXTMFN* MFN to be assigned to the next record created
146     # NXTMFB* last block allocated to master file
147     # NXTMFP offset to next available position in last block
148     # MFTYPE always 0 for user db file (1 for system)
149     seek(fileMST,4,0);
150     $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";
151    
152 dpavlin 2 # save maximum MFN
153     $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
154    
155 dpavlin 1 close(fileMST);
156    
157     # Get the index information from $db.CNT
158    
159     open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
160    
161     # There is two 26 Bytes fixed lenght records
162    
163     # 0: IDTYPE BTree type 16
164     # 2: ORDN Nodes Order 16
165     # 4: ORDF Leafs Order 16
166     # 6: N Number of Memory buffers for nodes 16
167     # 8: K Number of buffers for first level index 16
168     # 10: LIV Current number of Index Levels 16
169     # 12: POSRX* Pointer to Root Record in N0x 32
170     # 16: NMAXPOS* Next Available position in N0x 32
171     # 20: FMAXPOS* Next available position in L0x 32
172     # 24: ABNORMAL Formal BTree normality indicator 16
173     # length: 26 bytes
174    
175     sub unpack_cnt {
176     my $self = shift;
177    
178     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
179    
180     my $buff = shift || return;
181     my @arr = unpack("ssssssllls", $buff);
182    
183 dpavlin 9 print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
184    
185 dpavlin 1 my $IDTYPE = shift @arr;
186     foreach (@flds) {
187     $self->{$IDTYPE}->{$_} = abs(shift @arr);
188     }
189     }
190    
191     my $buff;
192     read(fileCNT, $buff, 26);
193     $self->unpack_cnt($buff);
194    
195     read(fileCNT, $buff, 26);
196     $self->unpack_cnt($buff);
197    
198    
199     close(fileCNT);
200    
201     print Dumper($self) if ($self->{debug});
202    
203 dpavlin 7 # open files for later
204     open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
205    
206     open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
207    
208 dpavlin 1 $self ? return $self : return undef;
209     }
210    
211 dpavlin 7 =head2 fetch
212 dpavlin 1
213 dpavlin 2 Read record with selected MFN
214 dpavlin 1
215 dpavlin 7 my $rec = $isis->fetch(55);
216 dpavlin 2
217     Returns hash with keys which are field names and values are unpacked values
218     for that field.
219    
220     =cut
221    
222 dpavlin 7 sub fetch {
223 dpavlin 1 my $self = shift;
224    
225 dpavlin 7 my $mfn = shift || croak "fetch needs MFN as argument!";
226 dpavlin 1
227 dpavlin 7 print "fetch: $mfn\n" if ($self->{debug});
228 dpavlin 1
229     # XXX check this?
230     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
231    
232     print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
233 dpavlin 7 seek($self->{'fileXRF'},$mfnpos,0);
234 dpavlin 1
235     # read XRFMFB abd XRFMFP
236 dpavlin 7 my $pointer=$self->Read32(\*{$self->{'fileXRF'}});
237 dpavlin 1
238     my $XRFMFB = int($pointer/2048);
239     my $XRFMFP = $pointer - ($XRFMFB*2048);
240    
241     print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
242    
243     # XXX fix this to be more readable!!
244     # e.g. (XRFMFB - 1) * 512 + XRFMFP
245    
246     my $offset = $pointer;
247     my $offset2=int($offset/2048)-1;
248     my $offset22=int($offset/4096);
249     my $offset3=$offset-($offset22*4096);
250     if ($offset3>512) {
251     $offset3=$offset3-2048;
252     }
253     my $offset4=($offset2*512)+$offset3;
254    
255     print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
256    
257     # Get Record Information
258    
259 dpavlin 7 seek($self->{'fileMST'},$offset4,0);
260 dpavlin 1
261 dpavlin 7 my $value=$self->Read32(\*{$self->{'fileMST'}});
262 dpavlin 1
263     if ($value!=$mfn) {
264     print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
265     return -1; # XXX deleted record?
266     }
267    
268     # $MFRL=$self->Read16($fileMST);
269     # $MFBWB=$self->Read32($fileMST);
270     # $MFBWP=$self->Read16($fileMST);
271     # $BASE=$self->Read16($fileMST);
272     # $NVF=$self->Read16($fileMST);
273     # $STATUS=$self->Read16($fileMST);
274    
275     my $buff;
276 dpavlin 7 read($self->{'fileMST'}, $buff, 14);
277 dpavlin 1
278     my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
279    
280     print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
281    
282 dpavlin 9 # delete old record
283     delete $self->{record};
284    
285     if (! $self->{'include_deleted'} && $MFRL < 0) {
286     print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
287     return;
288     }
289    
290 dpavlin 1 # Get Directory Format
291    
292     my @FieldPOS;
293     my @FieldLEN;
294     my @FieldTAG;
295    
296 dpavlin 8 read($self->{'fileMST'}, $buff, 6 * $NVF);
297    
298     my $fld_len = 0;
299    
300 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
301    
302     # $TAG=$self->Read16($fileMST);
303     # $POS=$self->Read16($fileMST);
304     # $LEN=$self->Read16($fileMST);
305    
306 dpavlin 8 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
307 dpavlin 1
308     print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
309    
310     # The TAG does not exists in .FDT so we set it to 0.
311     #
312     # XXX This is removed from perl version; .FDT file is updated manually, so
313     # you will often have fields in .MST file which aren't in .FDT. On the other
314     # hand, IsisMarc doesn't use .FDT files at all!
315    
316     #if (! $self->{TagName}->{$TAG}) {
317     # $TAG=0;
318     #}
319    
320     push @FieldTAG,$TAG;
321     push @FieldPOS,$POS;
322     push @FieldLEN,$LEN;
323 dpavlin 8
324     $fld_len += $LEN;
325 dpavlin 1 }
326    
327     # Get Variable Fields
328    
329 dpavlin 8 read($self->{'fileMST'},$buff,$fld_len);
330    
331 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
332 dpavlin 8 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
333 dpavlin 1 }
334     close(fileMST);
335    
336     print Dumper($self) if ($self->{debug});
337    
338 dpavlin 2 return $self->{'record'};
339 dpavlin 1 }
340    
341 dpavlin 2 =head2 to_ascii
342    
343     Dump ascii output of selected MFN
344    
345     print $isis->to_ascii(55);
346    
347     =cut
348    
349     sub to_ascii {
350     my $self = shift;
351    
352     my $mfn = shift || croak "need MFN";
353    
354 dpavlin 7 my $rec = $self->fetch($mfn);
355 dpavlin 2
356     my $out = "0\t$mfn";
357    
358     foreach my $f (sort keys %{$rec}) {
359     $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
360     }
361    
362     $out .= "\n";
363    
364     return $out;
365     }
366    
367 dpavlin 1 #
368     # XXX porting from php left-over:
369     #
370     # do I *REALLY* need those methods, or should I use
371     # $self->{something} directly?
372     #
373     # Probably direct usage is better!
374     #
375    
376 dpavlin 7 sub TagName {
377 dpavlin 1 my $self = shift;
378     return $self->{TagName};
379     }
380    
381 dpavlin 7 sub NextMFN {
382 dpavlin 1 my $self = shift;
383     return $self->{NXTMFN};
384     }
385    
386     1;
387    
388     =head1 BUGS
389    
390     This module has been very lightly tested. Use with caution and report bugs.
391    
392     =head1 AUTHOR
393    
394     Dobrica Pavlinusic
395     CPAN ID: DPAVLIN
396     dpavlin@rot13.org
397     http://www.rot13.org/~dpavlin/
398    
399     This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
400     written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
401    
402     =head1 COPYRIGHT
403    
404     This program is free software; you can redistribute
405     it and/or modify it under the same terms as Perl itself.
406    
407     The full text of the license can be found in the
408     LICENSE file included with this module.
409    
410    
411     =head1 SEE ALSO
412    
413     L<http://www.openisis.org|OpenIsis>, perl(1).
414    

  ViewVC Help
Powered by ViewVC 1.1.26