/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 9 - (show annotations)
Wed Dec 29 16:01:41 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 9067 byte(s)
logically deleted records are by default skipped, but can be included using
include_deleted option to new

1 package IsisDB;
2 use strict;
3
4 use Carp;
5 use Data::Dumper;
6
7 BEGIN {
8 use Exporter ();
9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 $VERSION = 0.03;
11 @ISA = qw (Exporter);
12 #Give a hoot don't pollute, do not export more than needed by default
13 @EXPORT = qw ();
14 @EXPORT_OK = qw ();
15 %EXPORT_TAGS = ();
16
17 }
18
19 =head1 NAME
20
21 IsisDB - Read CDS/ISIS database
22
23 =head1 SYNOPSIS
24
25 use IsisDB
26 my $isis = new IsisDB(
27 isisdb => './cds/cds',
28 );
29
30 =head1 DESCRIPTION
31
32 This module will read CDS/ISIS databases and create hash values out of it.
33 It can be used as perl-only alternative to OpenIsis module.
34
35 =head1 METHODS
36
37 =cut
38
39 # my $ORDN; # Nodes Order
40 # my $ORDF; # Leafs Order
41 # my $N; # Number of Memory buffers for nodes
42 # my $K; # Number of buffers for first level index
43 # my $LIV; # Current number of Index Levels
44 # my $POSRX; # Pointer to Root Record in N0x
45 # my $NMAXPOS; # Next Available position in N0x
46 # my $FMAXPOS; # Next available position in L0x
47 # my $ABNORMAL; # Formal BTree normality indicator
48
49 #
50 # some binary reads
51 #
52
53 sub Read32 {
54 my $self = shift;
55
56 my $f = shift || die "Read32 needs file handle";
57 read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);
58 return unpack("l",$b);
59 }
60
61 =head2 new
62
63 Open CDS/ISIS database
64
65 my $isis = new IsisDB(
66 isisdb => './cds/cds',
67 read_fdt => 1,
68 debug => 1,
69 include_deleted => 1,
70 );
71
72 Options are described below:
73
74 =over 5
75
76 =item isisdb
77
78 Prefix path to CDS/ISIS. It should contain full or relative path to database
79 and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
80
81 =item read_fdt
82
83 Boolean flag to specify if field definition table should be read. It's off
84 by default.
85
86 =item debug
87
88 Dump a C<lot> of debugging output.
89
90 =item include_deleted
91
92 Don't skip logically deleted records.
93
94 =back
95
96 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
97
98 =cut
99
100 sub new {
101 my $class = shift;
102 my $self = {};
103 bless($self, $class);
104
105 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
106
107 foreach my $v (qw{isisdb debug include_deleted}) {
108 $self->{$v} = {@_}->{$v};
109 }
110
111 # if you want to read .FDT file use read_fdt argument when creating class!
112 if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
113
114 # read the $db.FDT file for tags
115 my $fieldzone=0;
116
117 open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
118
119 while (<fileFDT>) {
120 chomp;
121 if ($fieldzone) {
122 my $name=substr($_,0,30);
123 my $tag=substr($_,50,3);
124
125 $name =~ s/\s+$//;
126 $tag =~ s/\s+$//;
127
128 $self->{'TagName'}->{$tag}=$name;
129 }
130
131 if (/^\*\*\*/) {
132 $fieldzone=1;
133 }
134 }
135
136 close(fileFDT);
137 }
138
139 # Get the Maximum MFN from $db.MST
140
141 open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
142
143 # MST format: (* = 32 bit signed)
144 # CTLMFN* always 0
145 # NXTMFN* MFN to be assigned to the next record created
146 # NXTMFB* last block allocated to master file
147 # NXTMFP offset to next available position in last block
148 # MFTYPE always 0 for user db file (1 for system)
149 seek(fileMST,4,0);
150 $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";
151
152 # save maximum MFN
153 $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
154
155 close(fileMST);
156
157 # Get the index information from $db.CNT
158
159 open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
160
161 # There is two 26 Bytes fixed lenght records
162
163 # 0: IDTYPE BTree type 16
164 # 2: ORDN Nodes Order 16
165 # 4: ORDF Leafs Order 16
166 # 6: N Number of Memory buffers for nodes 16
167 # 8: K Number of buffers for first level index 16
168 # 10: LIV Current number of Index Levels 16
169 # 12: POSRX* Pointer to Root Record in N0x 32
170 # 16: NMAXPOS* Next Available position in N0x 32
171 # 20: FMAXPOS* Next available position in L0x 32
172 # 24: ABNORMAL Formal BTree normality indicator 16
173 # length: 26 bytes
174
175 sub unpack_cnt {
176 my $self = shift;
177
178 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
179
180 my $buff = shift || return;
181 my @arr = unpack("ssssssllls", $buff);
182
183 print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
184
185 my $IDTYPE = shift @arr;
186 foreach (@flds) {
187 $self->{$IDTYPE}->{$_} = abs(shift @arr);
188 }
189 }
190
191 my $buff;
192 read(fileCNT, $buff, 26);
193 $self->unpack_cnt($buff);
194
195 read(fileCNT, $buff, 26);
196 $self->unpack_cnt($buff);
197
198
199 close(fileCNT);
200
201 print Dumper($self) if ($self->{debug});
202
203 # open files for later
204 open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
205
206 open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
207
208 $self ? return $self : return undef;
209 }
210
211 =head2 fetch
212
213 Read record with selected MFN
214
215 my $rec = $isis->fetch(55);
216
217 Returns hash with keys which are field names and values are unpacked values
218 for that field.
219
220 =cut
221
222 sub fetch {
223 my $self = shift;
224
225 my $mfn = shift || croak "fetch needs MFN as argument!";
226
227 print "fetch: $mfn\n" if ($self->{debug});
228
229 # XXX check this?
230 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
231
232 print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
233 seek($self->{'fileXRF'},$mfnpos,0);
234
235 # read XRFMFB abd XRFMFP
236 my $pointer=$self->Read32(\*{$self->{'fileXRF'}});
237
238 my $XRFMFB = int($pointer/2048);
239 my $XRFMFP = $pointer - ($XRFMFB*2048);
240
241 print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
242
243 # XXX fix this to be more readable!!
244 # e.g. (XRFMFB - 1) * 512 + XRFMFP
245
246 my $offset = $pointer;
247 my $offset2=int($offset/2048)-1;
248 my $offset22=int($offset/4096);
249 my $offset3=$offset-($offset22*4096);
250 if ($offset3>512) {
251 $offset3=$offset3-2048;
252 }
253 my $offset4=($offset2*512)+$offset3;
254
255 print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
256
257 # Get Record Information
258
259 seek($self->{'fileMST'},$offset4,0);
260
261 my $value=$self->Read32(\*{$self->{'fileMST'}});
262
263 if ($value!=$mfn) {
264 print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
265 return -1; # XXX deleted record?
266 }
267
268 # $MFRL=$self->Read16($fileMST);
269 # $MFBWB=$self->Read32($fileMST);
270 # $MFBWP=$self->Read16($fileMST);
271 # $BASE=$self->Read16($fileMST);
272 # $NVF=$self->Read16($fileMST);
273 # $STATUS=$self->Read16($fileMST);
274
275 my $buff;
276 read($self->{'fileMST'}, $buff, 14);
277
278 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
279
280 print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
281
282 # delete old record
283 delete $self->{record};
284
285 if (! $self->{'include_deleted'} && $MFRL < 0) {
286 print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
287 return;
288 }
289
290 # Get Directory Format
291
292 my @FieldPOS;
293 my @FieldLEN;
294 my @FieldTAG;
295
296 read($self->{'fileMST'}, $buff, 6 * $NVF);
297
298 my $fld_len = 0;
299
300 for (my $i = 0 ; $i < $NVF ; $i++) {
301
302 # $TAG=$self->Read16($fileMST);
303 # $POS=$self->Read16($fileMST);
304 # $LEN=$self->Read16($fileMST);
305
306 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
307
308 print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
309
310 # The TAG does not exists in .FDT so we set it to 0.
311 #
312 # XXX This is removed from perl version; .FDT file is updated manually, so
313 # you will often have fields in .MST file which aren't in .FDT. On the other
314 # hand, IsisMarc doesn't use .FDT files at all!
315
316 #if (! $self->{TagName}->{$TAG}) {
317 # $TAG=0;
318 #}
319
320 push @FieldTAG,$TAG;
321 push @FieldPOS,$POS;
322 push @FieldLEN,$LEN;
323
324 $fld_len += $LEN;
325 }
326
327 # Get Variable Fields
328
329 read($self->{'fileMST'},$buff,$fld_len);
330
331 for (my $i = 0 ; $i < $NVF ; $i++) {
332 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
333 }
334 close(fileMST);
335
336 print Dumper($self) if ($self->{debug});
337
338 return $self->{'record'};
339 }
340
341 =head2 to_ascii
342
343 Dump ascii output of selected MFN
344
345 print $isis->to_ascii(55);
346
347 =cut
348
349 sub to_ascii {
350 my $self = shift;
351
352 my $mfn = shift || croak "need MFN";
353
354 my $rec = $self->fetch($mfn);
355
356 my $out = "0\t$mfn";
357
358 foreach my $f (sort keys %{$rec}) {
359 $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
360 }
361
362 $out .= "\n";
363
364 return $out;
365 }
366
367 #
368 # XXX porting from php left-over:
369 #
370 # do I *REALLY* need those methods, or should I use
371 # $self->{something} directly?
372 #
373 # Probably direct usage is better!
374 #
375
376 sub TagName {
377 my $self = shift;
378 return $self->{TagName};
379 }
380
381 sub NextMFN {
382 my $self = shift;
383 return $self->{NXTMFN};
384 }
385
386 1;
387
388 =head1 BUGS
389
390 This module has been very lightly tested. Use with caution and report bugs.
391
392 =head1 AUTHOR
393
394 Dobrica Pavlinusic
395 CPAN ID: DPAVLIN
396 dpavlin@rot13.org
397 http://www.rot13.org/~dpavlin/
398
399 This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
400 written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
401
402 =head1 COPYRIGHT
403
404 This program is free software; you can redistribute
405 it and/or modify it under the same terms as Perl itself.
406
407 The full text of the license can be found in the
408 LICENSE file included with this module.
409
410
411 =head1 SEE ALSO
412
413 L<http://www.openisis.org|OpenIsis>, perl(1).
414

  ViewVC Help
Powered by ViewVC 1.1.26