/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 7 - (hide annotations)
Wed Dec 29 15:10:34 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 8690 byte(s)
added benchmarking script, some speedup (7029.54/s vs 5829.19/s),
removed left-overs from php porting (dictionaries are not supported by this module),
make dump_isis.pl arguments same as dump_openisis.pl,
renamed GetMFN to fetch

1 dpavlin 1 package IsisDB;
2     use strict;
3    
4     use Carp;
5     use Data::Dumper;
6    
7     BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 dpavlin 7 $VERSION = 0.02;
11 dpavlin 1 @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21     IsisDB - Read CDS/ISIS database
22    
23     =head1 SYNOPSIS
24    
25     use IsisDB
26     my $isis = new IsisDB(
27     isisdb => './cds/cds',
28     );
29    
30     =head1 DESCRIPTION
31    
32     This module will read CDS/ISIS databases and create hash values out of it.
33     It can be used as perl-only alternative to OpenIsis module.
34    
35     =head1 METHODS
36    
37     =cut
38    
39     # my $ORDN; # Nodes Order
40     # my $ORDF; # Leafs Order
41     # my $N; # Number of Memory buffers for nodes
42     # my $K; # Number of buffers for first level index
43     # my $LIV; # Current number of Index Levels
44     # my $POSRX; # Pointer to Root Record in N0x
45     # my $NMAXPOS; # Next Available position in N0x
46     # my $FMAXPOS; # Next available position in L0x
47     # my $ABNORMAL; # Formal BTree normality indicator
48    
49     #
50     # some binary reads
51     #
52    
53     sub Read32 {
54     my $self = shift;
55    
56     my $f = shift || die "Read32 needs file handle";
57     read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);
58     return unpack("l",$b);
59     }
60    
61     =head2 new
62    
63     Open CDS/ISIS database
64    
65     my $isis = new IsisDB(
66     isisdb => './cds/cds',
67     read_fdt => 1,
68     debug => 1,
69     );
70    
71 dpavlin 2 Options are described below:
72    
73     =over 5
74    
75 dpavlin 1 =item isisdb
76    
77     Prefix path to CDS/ISIS. It should contain full or relative path to database
78     and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
79    
80     =item read_fdt
81    
82     Boolean flag to specify if field definition table should be read. It's off
83     by default.
84    
85     =item debug
86    
87     Dump a C<lot> of debugging output.
88    
89 dpavlin 2 =back
90    
91     It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
92    
93 dpavlin 1 =cut
94    
95     sub new {
96     my $class = shift;
97     my $self = {};
98     bless($self, $class);
99    
100     $self->{isisdb} = {@_}->{isisdb} || croak "new needs database name as argument!";
101    
102 dpavlin 2 $self->{debug} = {@_}->{debug};
103 dpavlin 1
104     # if you want to read .FDT file use read_fdt argument when creating class!
105     if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
106    
107     # read the $db.FDT file for tags
108     my $fieldzone=0;
109    
110     open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
111    
112     while (<fileFDT>) {
113     chomp;
114     if ($fieldzone) {
115     my $name=substr($_,0,30);
116     my $tag=substr($_,50,3);
117    
118     $name =~ s/\s+$//;
119     $tag =~ s/\s+$//;
120    
121     $self->{'TagName'}->{$tag}=$name;
122     }
123    
124     if (/^\*\*\*/) {
125     $fieldzone=1;
126     }
127     }
128    
129     close(fileFDT);
130     }
131    
132     # Get the Maximum MFN from $db.MST
133    
134     open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
135    
136     # MST format: (* = 32 bit signed)
137     # CTLMFN* always 0
138     # NXTMFN* MFN to be assigned to the next record created
139     # NXTMFB* last block allocated to master file
140     # NXTMFP offset to next available position in last block
141     # MFTYPE always 0 for user db file (1 for system)
142     seek(fileMST,4,0);
143     $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";
144    
145 dpavlin 2 # save maximum MFN
146     $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
147    
148 dpavlin 1 close(fileMST);
149    
150     # Get the index information from $db.CNT
151    
152     open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
153    
154     # There is two 26 Bytes fixed lenght records
155    
156     # 0: IDTYPE BTree type 16
157     # 2: ORDN Nodes Order 16
158     # 4: ORDF Leafs Order 16
159     # 6: N Number of Memory buffers for nodes 16
160     # 8: K Number of buffers for first level index 16
161     # 10: LIV Current number of Index Levels 16
162     # 12: POSRX* Pointer to Root Record in N0x 32
163     # 16: NMAXPOS* Next Available position in N0x 32
164     # 20: FMAXPOS* Next available position in L0x 32
165     # 24: ABNORMAL Formal BTree normality indicator 16
166     # length: 26 bytes
167    
168     sub unpack_cnt {
169     my $self = shift;
170    
171     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
172    
173     my $buff = shift || return;
174     my @arr = unpack("ssssssllls", $buff);
175    
176     my $IDTYPE = shift @arr;
177     foreach (@flds) {
178     $self->{$IDTYPE}->{$_} = abs(shift @arr);
179     }
180     }
181    
182     my $buff;
183     read(fileCNT, $buff, 26);
184     $self->unpack_cnt($buff);
185    
186     read(fileCNT, $buff, 26);
187     $self->unpack_cnt($buff);
188    
189    
190     close(fileCNT);
191    
192     print Dumper($self) if ($self->{debug});
193    
194 dpavlin 7 # open files for later
195     open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
196    
197     open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
198    
199 dpavlin 1 $self ? return $self : return undef;
200     }
201    
202 dpavlin 7 =head2 fetch
203 dpavlin 1
204 dpavlin 2 Read record with selected MFN
205 dpavlin 1
206 dpavlin 7 my $rec = $isis->fetch(55);
207 dpavlin 2
208     Returns hash with keys which are field names and values are unpacked values
209     for that field.
210    
211     =cut
212    
213 dpavlin 7 sub fetch {
214 dpavlin 1 my $self = shift;
215    
216 dpavlin 7 my $mfn = shift || croak "fetch needs MFN as argument!";
217 dpavlin 1
218 dpavlin 7 print "fetch: $mfn\n" if ($self->{debug});
219 dpavlin 1
220     # XXX check this?
221     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
222    
223     print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
224 dpavlin 7 seek($self->{'fileXRF'},$mfnpos,0);
225 dpavlin 1
226     # read XRFMFB abd XRFMFP
227 dpavlin 7 my $pointer=$self->Read32(\*{$self->{'fileXRF'}});
228 dpavlin 1
229     my $XRFMFB = int($pointer/2048);
230     my $XRFMFP = $pointer - ($XRFMFB*2048);
231    
232     print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
233    
234     # XXX fix this to be more readable!!
235     # e.g. (XRFMFB - 1) * 512 + XRFMFP
236    
237     my $offset = $pointer;
238     my $offset2=int($offset/2048)-1;
239     my $offset22=int($offset/4096);
240     my $offset3=$offset-($offset22*4096);
241     if ($offset3>512) {
242     $offset3=$offset3-2048;
243     }
244     my $offset4=($offset2*512)+$offset3;
245    
246     print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
247    
248     # Get Record Information
249    
250 dpavlin 7 seek($self->{'fileMST'},$offset4,0);
251 dpavlin 1
252 dpavlin 7 my $value=$self->Read32(\*{$self->{'fileMST'}});
253 dpavlin 1
254     if ($value!=$mfn) {
255     print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
256     return -1; # XXX deleted record?
257     }
258    
259     # $MFRL=$self->Read16($fileMST);
260     # $MFBWB=$self->Read32($fileMST);
261     # $MFBWP=$self->Read16($fileMST);
262     # $BASE=$self->Read16($fileMST);
263     # $NVF=$self->Read16($fileMST);
264     # $STATUS=$self->Read16($fileMST);
265    
266     my $buff;
267 dpavlin 7 read($self->{'fileMST'}, $buff, 14);
268 dpavlin 1
269     my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
270    
271     print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
272    
273     # Get Directory Format
274    
275     my @FieldPOS;
276     my @FieldLEN;
277     my @FieldTAG;
278    
279     for (my $i = 0 ; $i < $NVF ; $i++) {
280    
281     # $TAG=$self->Read16($fileMST);
282     # $POS=$self->Read16($fileMST);
283     # $LEN=$self->Read16($fileMST);
284    
285 dpavlin 7 read($self->{'fileMST'}, $buff, 6);
286 dpavlin 1 my ($TAG,$POS,$LEN) = unpack("sss", $buff);
287    
288     print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
289    
290     # The TAG does not exists in .FDT so we set it to 0.
291     #
292     # XXX This is removed from perl version; .FDT file is updated manually, so
293     # you will often have fields in .MST file which aren't in .FDT. On the other
294     # hand, IsisMarc doesn't use .FDT files at all!
295    
296     #if (! $self->{TagName}->{$TAG}) {
297     # $TAG=0;
298     #}
299    
300     push @FieldTAG,$TAG;
301     push @FieldPOS,$POS;
302     push @FieldLEN,$LEN;
303     }
304    
305     # Get Variable Fields
306    
307 dpavlin 2 delete $self->{record};
308    
309 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
310     my $rec;
311 dpavlin 7 read($self->{'fileMST'},$rec,$FieldLEN[$i]);
312 dpavlin 2 push @{$self->{record}->{$FieldTAG[$i]}}, $rec;
313 dpavlin 1 }
314     close(fileMST);
315    
316     # The record is marked for deletion
317     if ($STATUS==1) {
318     return -1;
319     }
320    
321     print Dumper($self) if ($self->{debug});
322    
323 dpavlin 2 return $self->{'record'};
324 dpavlin 1 }
325    
326 dpavlin 2 =head2 to_ascii
327    
328     Dump ascii output of selected MFN
329    
330     print $isis->to_ascii(55);
331    
332     =cut
333    
334     sub to_ascii {
335     my $self = shift;
336    
337     my $mfn = shift || croak "need MFN";
338    
339 dpavlin 7 my $rec = $self->fetch($mfn);
340 dpavlin 2
341     my $out = "0\t$mfn";
342    
343     foreach my $f (sort keys %{$rec}) {
344     $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
345     }
346    
347     $out .= "\n";
348    
349     return $out;
350     }
351    
352 dpavlin 1 #
353     # XXX porting from php left-over:
354     #
355     # do I *REALLY* need those methods, or should I use
356     # $self->{something} directly?
357     #
358     # Probably direct usage is better!
359     #
360    
361 dpavlin 7 sub TagName {
362 dpavlin 1 my $self = shift;
363     return $self->{TagName};
364     }
365    
366 dpavlin 7 sub NextMFN {
367 dpavlin 1 my $self = shift;
368     return $self->{NXTMFN};
369     }
370    
371     1;
372    
373     =head1 BUGS
374    
375     This module has been very lightly tested. Use with caution and report bugs.
376    
377     =head1 AUTHOR
378    
379     Dobrica Pavlinusic
380     CPAN ID: DPAVLIN
381     dpavlin@rot13.org
382     http://www.rot13.org/~dpavlin/
383    
384     This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
385     written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
386    
387     =head1 COPYRIGHT
388    
389     This program is free software; you can redistribute
390     it and/or modify it under the same terms as Perl itself.
391    
392     The full text of the license can be found in the
393     LICENSE file included with this module.
394    
395    
396     =head1 SEE ALSO
397    
398     L<http://www.openisis.org|OpenIsis>, perl(1).
399    

  ViewVC Help
Powered by ViewVC 1.1.26