/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 8 - (hide annotations)
Wed Dec 29 15:17:59 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 8777 byte(s)
another speedup (7845.71/s)

1 dpavlin 1 package IsisDB;
2     use strict;
3    
4     use Carp;
5     use Data::Dumper;
6    
7     BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 dpavlin 7 $VERSION = 0.02;
11 dpavlin 1 @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21     IsisDB - Read CDS/ISIS database
22    
23     =head1 SYNOPSIS
24    
25     use IsisDB
26     my $isis = new IsisDB(
27     isisdb => './cds/cds',
28     );
29    
30     =head1 DESCRIPTION
31    
32     This module will read CDS/ISIS databases and create hash values out of it.
33     It can be used as perl-only alternative to OpenIsis module.
34    
35     =head1 METHODS
36    
37     =cut
38    
39     # my $ORDN; # Nodes Order
40     # my $ORDF; # Leafs Order
41     # my $N; # Number of Memory buffers for nodes
42     # my $K; # Number of buffers for first level index
43     # my $LIV; # Current number of Index Levels
44     # my $POSRX; # Pointer to Root Record in N0x
45     # my $NMAXPOS; # Next Available position in N0x
46     # my $FMAXPOS; # Next available position in L0x
47     # my $ABNORMAL; # Formal BTree normality indicator
48    
49     #
50     # some binary reads
51     #
52    
53     sub Read32 {
54     my $self = shift;
55    
56     my $f = shift || die "Read32 needs file handle";
57     read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);
58     return unpack("l",$b);
59     }
60    
61     =head2 new
62    
63     Open CDS/ISIS database
64    
65     my $isis = new IsisDB(
66     isisdb => './cds/cds',
67     read_fdt => 1,
68     debug => 1,
69     );
70    
71 dpavlin 2 Options are described below:
72    
73     =over 5
74    
75 dpavlin 1 =item isisdb
76    
77     Prefix path to CDS/ISIS. It should contain full or relative path to database
78     and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
79    
80     =item read_fdt
81    
82     Boolean flag to specify if field definition table should be read. It's off
83     by default.
84    
85     =item debug
86    
87     Dump a C<lot> of debugging output.
88    
89 dpavlin 2 =back
90    
91     It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
92    
93 dpavlin 1 =cut
94    
95     sub new {
96     my $class = shift;
97     my $self = {};
98     bless($self, $class);
99    
100     $self->{isisdb} = {@_}->{isisdb} || croak "new needs database name as argument!";
101    
102 dpavlin 2 $self->{debug} = {@_}->{debug};
103 dpavlin 1
104     # if you want to read .FDT file use read_fdt argument when creating class!
105     if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
106    
107     # read the $db.FDT file for tags
108     my $fieldzone=0;
109    
110     open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
111    
112     while (<fileFDT>) {
113     chomp;
114     if ($fieldzone) {
115     my $name=substr($_,0,30);
116     my $tag=substr($_,50,3);
117    
118     $name =~ s/\s+$//;
119     $tag =~ s/\s+$//;
120    
121     $self->{'TagName'}->{$tag}=$name;
122     }
123    
124     if (/^\*\*\*/) {
125     $fieldzone=1;
126     }
127     }
128    
129     close(fileFDT);
130     }
131    
132     # Get the Maximum MFN from $db.MST
133    
134     open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
135    
136     # MST format: (* = 32 bit signed)
137     # CTLMFN* always 0
138     # NXTMFN* MFN to be assigned to the next record created
139     # NXTMFB* last block allocated to master file
140     # NXTMFP offset to next available position in last block
141     # MFTYPE always 0 for user db file (1 for system)
142     seek(fileMST,4,0);
143     $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";
144    
145 dpavlin 2 # save maximum MFN
146     $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
147    
148 dpavlin 1 close(fileMST);
149    
150     # Get the index information from $db.CNT
151    
152     open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
153    
154     # There is two 26 Bytes fixed lenght records
155    
156     # 0: IDTYPE BTree type 16
157     # 2: ORDN Nodes Order 16
158     # 4: ORDF Leafs Order 16
159     # 6: N Number of Memory buffers for nodes 16
160     # 8: K Number of buffers for first level index 16
161     # 10: LIV Current number of Index Levels 16
162     # 12: POSRX* Pointer to Root Record in N0x 32
163     # 16: NMAXPOS* Next Available position in N0x 32
164     # 20: FMAXPOS* Next available position in L0x 32
165     # 24: ABNORMAL Formal BTree normality indicator 16
166     # length: 26 bytes
167    
168     sub unpack_cnt {
169     my $self = shift;
170    
171     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
172    
173     my $buff = shift || return;
174     my @arr = unpack("ssssssllls", $buff);
175    
176     my $IDTYPE = shift @arr;
177     foreach (@flds) {
178     $self->{$IDTYPE}->{$_} = abs(shift @arr);
179     }
180     }
181    
182     my $buff;
183     read(fileCNT, $buff, 26);
184     $self->unpack_cnt($buff);
185    
186     read(fileCNT, $buff, 26);
187     $self->unpack_cnt($buff);
188    
189    
190     close(fileCNT);
191    
192     print Dumper($self) if ($self->{debug});
193    
194 dpavlin 7 # open files for later
195     open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
196    
197     open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
198    
199 dpavlin 1 $self ? return $self : return undef;
200     }
201    
202 dpavlin 7 =head2 fetch
203 dpavlin 1
204 dpavlin 2 Read record with selected MFN
205 dpavlin 1
206 dpavlin 7 my $rec = $isis->fetch(55);
207 dpavlin 2
208     Returns hash with keys which are field names and values are unpacked values
209     for that field.
210    
211     =cut
212    
213 dpavlin 7 sub fetch {
214 dpavlin 1 my $self = shift;
215    
216 dpavlin 7 my $mfn = shift || croak "fetch needs MFN as argument!";
217 dpavlin 1
218 dpavlin 7 print "fetch: $mfn\n" if ($self->{debug});
219 dpavlin 1
220     # XXX check this?
221     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
222    
223     print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
224 dpavlin 7 seek($self->{'fileXRF'},$mfnpos,0);
225 dpavlin 1
226     # read XRFMFB abd XRFMFP
227 dpavlin 7 my $pointer=$self->Read32(\*{$self->{'fileXRF'}});
228 dpavlin 1
229     my $XRFMFB = int($pointer/2048);
230     my $XRFMFP = $pointer - ($XRFMFB*2048);
231    
232     print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
233    
234     # XXX fix this to be more readable!!
235     # e.g. (XRFMFB - 1) * 512 + XRFMFP
236    
237     my $offset = $pointer;
238     my $offset2=int($offset/2048)-1;
239     my $offset22=int($offset/4096);
240     my $offset3=$offset-($offset22*4096);
241     if ($offset3>512) {
242     $offset3=$offset3-2048;
243     }
244     my $offset4=($offset2*512)+$offset3;
245    
246     print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
247    
248     # Get Record Information
249    
250 dpavlin 7 seek($self->{'fileMST'},$offset4,0);
251 dpavlin 1
252 dpavlin 7 my $value=$self->Read32(\*{$self->{'fileMST'}});
253 dpavlin 1
254     if ($value!=$mfn) {
255     print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
256     return -1; # XXX deleted record?
257     }
258    
259     # $MFRL=$self->Read16($fileMST);
260     # $MFBWB=$self->Read32($fileMST);
261     # $MFBWP=$self->Read16($fileMST);
262     # $BASE=$self->Read16($fileMST);
263     # $NVF=$self->Read16($fileMST);
264     # $STATUS=$self->Read16($fileMST);
265    
266     my $buff;
267 dpavlin 7 read($self->{'fileMST'}, $buff, 14);
268 dpavlin 1
269     my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
270    
271     print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
272    
273     # Get Directory Format
274    
275     my @FieldPOS;
276     my @FieldLEN;
277     my @FieldTAG;
278    
279 dpavlin 8 read($self->{'fileMST'}, $buff, 6 * $NVF);
280    
281     my $fld_len = 0;
282    
283 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
284    
285     # $TAG=$self->Read16($fileMST);
286     # $POS=$self->Read16($fileMST);
287     # $LEN=$self->Read16($fileMST);
288    
289 dpavlin 8 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
290 dpavlin 1
291     print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
292    
293     # The TAG does not exists in .FDT so we set it to 0.
294     #
295     # XXX This is removed from perl version; .FDT file is updated manually, so
296     # you will often have fields in .MST file which aren't in .FDT. On the other
297     # hand, IsisMarc doesn't use .FDT files at all!
298    
299     #if (! $self->{TagName}->{$TAG}) {
300     # $TAG=0;
301     #}
302    
303     push @FieldTAG,$TAG;
304     push @FieldPOS,$POS;
305     push @FieldLEN,$LEN;
306 dpavlin 8
307     $fld_len += $LEN;
308 dpavlin 1 }
309    
310     # Get Variable Fields
311    
312 dpavlin 2 delete $self->{record};
313    
314 dpavlin 8 read($self->{'fileMST'},$buff,$fld_len);
315    
316 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
317 dpavlin 8 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
318 dpavlin 1 }
319     close(fileMST);
320    
321     # The record is marked for deletion
322     if ($STATUS==1) {
323     return -1;
324     }
325    
326     print Dumper($self) if ($self->{debug});
327    
328 dpavlin 2 return $self->{'record'};
329 dpavlin 1 }
330    
331 dpavlin 2 =head2 to_ascii
332    
333     Dump ascii output of selected MFN
334    
335     print $isis->to_ascii(55);
336    
337     =cut
338    
339     sub to_ascii {
340     my $self = shift;
341    
342     my $mfn = shift || croak "need MFN";
343    
344 dpavlin 7 my $rec = $self->fetch($mfn);
345 dpavlin 2
346     my $out = "0\t$mfn";
347    
348     foreach my $f (sort keys %{$rec}) {
349     $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
350     }
351    
352     $out .= "\n";
353    
354     return $out;
355     }
356    
357 dpavlin 1 #
358     # XXX porting from php left-over:
359     #
360     # do I *REALLY* need those methods, or should I use
361     # $self->{something} directly?
362     #
363     # Probably direct usage is better!
364     #
365    
366 dpavlin 7 sub TagName {
367 dpavlin 1 my $self = shift;
368     return $self->{TagName};
369     }
370    
371 dpavlin 7 sub NextMFN {
372 dpavlin 1 my $self = shift;
373     return $self->{NXTMFN};
374     }
375    
376     1;
377    
378     =head1 BUGS
379    
380     This module has been very lightly tested. Use with caution and report bugs.
381    
382     =head1 AUTHOR
383    
384     Dobrica Pavlinusic
385     CPAN ID: DPAVLIN
386     dpavlin@rot13.org
387     http://www.rot13.org/~dpavlin/
388    
389     This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
390     written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
391    
392     =head1 COPYRIGHT
393    
394     This program is free software; you can redistribute
395     it and/or modify it under the same terms as Perl itself.
396    
397     The full text of the license can be found in the
398     LICENSE file included with this module.
399    
400    
401     =head1 SEE ALSO
402    
403     L<http://www.openisis.org|OpenIsis>, perl(1).
404    

  ViewVC Help
Powered by ViewVC 1.1.26