/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 11 - (show annotations)
Wed Dec 29 17:03:52 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 9554 byte(s)
documentation and dependency improvements, inline Read32 to get some more
performance.

1 package IsisDB;
2 use strict;
3
4 use Carp;
5 use Data::Dumper;
6
7 BEGIN {
8 use Exporter ();
9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 $VERSION = 0.03;
11 @ISA = qw (Exporter);
12 #Give a hoot don't pollute, do not export more than needed by default
13 @EXPORT = qw ();
14 @EXPORT_OK = qw ();
15 %EXPORT_TAGS = ();
16
17 }
18
19 =head1 NAME
20
21 IsisDB - Read CDS/ISIS database
22
23 =head1 SYNOPSIS
24
25 use IsisDB;
26
27 my $isis = new IsisDB(
28 isisdb => './cds/cds',
29 );
30
31 for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
32 print $isis->to_ascii($mfn),"\n";
33 }
34
35 =head1 DESCRIPTION
36
37 This module will read CDS/ISIS databases and create hash values out of it.
38 It can be used as perl-only alternative to OpenIsis module.
39
40 This will module will always be slower that OpenIsis module which use C
41 library. However, since it's written in perl, it's platform independent (so
42 you don't need C compiler), and can be easily modified.
43
44 Unique feature of this module is ability to C<include_deleted> records.
45 It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
46 fields which are zero sized will be filled with random junk from memory).
47
48 =head1 METHODS
49
50 =cut
51
52 # my $ORDN; # Nodes Order
53 # my $ORDF; # Leafs Order
54 # my $N; # Number of Memory buffers for nodes
55 # my $K; # Number of buffers for first level index
56 # my $LIV; # Current number of Index Levels
57 # my $POSRX; # Pointer to Root Record in N0x
58 # my $NMAXPOS; # Next Available position in N0x
59 # my $FMAXPOS; # Next available position in L0x
60 # my $ABNORMAL; # Formal BTree normality indicator
61
62 #
63 # some binary reads
64 #
65
66 =head2 new
67
68 Open CDS/ISIS database
69
70 my $isis = new IsisDB(
71 isisdb => './cds/cds',
72 read_fdt => 1,
73 debug => 1,
74 include_deleted => 1,
75 );
76
77 Options are described below:
78
79 =over 5
80
81 =item isisdb
82
83 Prefix path to CDS/ISIS. It should contain full or relative path to database
84 and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
85
86 =item read_fdt
87
88 Boolean flag to specify if field definition table should be read. It's off
89 by default.
90
91 =item debug
92
93 Dump a C<lot> of debugging output.
94
95 =item include_deleted
96
97 Don't skip logically deleted records in ISIS.
98
99 =back
100
101 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
102
103 =cut
104
105 sub new {
106 my $class = shift;
107 my $self = {};
108 bless($self, $class);
109
110 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
111
112 foreach my $v (qw{isisdb debug include_deleted}) {
113 $self->{$v} = {@_}->{$v};
114 }
115
116 # if you want to read .FDT file use read_fdt argument when creating class!
117 if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
118
119 # read the $db.FDT file for tags
120 my $fieldzone=0;
121
122 open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
123
124 while (<fileFDT>) {
125 chomp;
126 if ($fieldzone) {
127 my $name=substr($_,0,30);
128 my $tag=substr($_,50,3);
129
130 $name =~ s/\s+$//;
131 $tag =~ s/\s+$//;
132
133 $self->{'TagName'}->{$tag}=$name;
134 }
135
136 if (/^\*\*\*/) {
137 $fieldzone=1;
138 }
139 }
140
141 close(fileFDT);
142 }
143
144 # Get the Maximum MFN from $db.MST
145
146 open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
147
148 # MST format: (* = 32 bit signed)
149 # CTLMFN* always 0
150 # NXTMFN* MFN to be assigned to the next record created
151 # NXTMFB* last block allocated to master file
152 # NXTMFP offset to next available position in last block
153 # MFTYPE always 0 for user db file (1 for system)
154 seek(fileMST,4,0);
155
156 my $buff;
157
158 read(fileMST, $buff, 4);
159 $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";
160
161 # save maximum MFN
162 $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
163
164 close(fileMST);
165
166 # Get the index information from $db.CNT
167
168 open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
169
170 # There is two 26 Bytes fixed lenght records
171
172 # 0: IDTYPE BTree type 16
173 # 2: ORDN Nodes Order 16
174 # 4: ORDF Leafs Order 16
175 # 6: N Number of Memory buffers for nodes 16
176 # 8: K Number of buffers for first level index 16
177 # 10: LIV Current number of Index Levels 16
178 # 12: POSRX* Pointer to Root Record in N0x 32
179 # 16: NMAXPOS* Next Available position in N0x 32
180 # 20: FMAXPOS* Next available position in L0x 32
181 # 24: ABNORMAL Formal BTree normality indicator 16
182 # length: 26 bytes
183
184 sub unpack_cnt {
185 my $self = shift;
186
187 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
188
189 my $buff = shift || return;
190 my @arr = unpack("ssssssllls", $buff);
191
192 print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
193
194 my $IDTYPE = shift @arr;
195 foreach (@flds) {
196 $self->{$IDTYPE}->{$_} = abs(shift @arr);
197 }
198 }
199
200 read(fileCNT, $buff, 26);
201 $self->unpack_cnt($buff);
202
203 read(fileCNT, $buff, 26);
204 $self->unpack_cnt($buff);
205
206
207 close(fileCNT);
208
209 print Dumper($self) if ($self->{debug});
210
211 # open files for later
212 open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
213
214 open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
215
216 $self ? return $self : return undef;
217 }
218
219 =head2 fetch
220
221 Read record with selected MFN
222
223 my $rec = $isis->fetch(55);
224
225 Returns hash with keys which are field names and values are unpacked values
226 for that field.
227
228 =cut
229
230 sub fetch {
231 my $self = shift;
232
233 my $mfn = shift || croak "fetch needs MFN as argument!";
234
235 print "fetch: $mfn\n" if ($self->{debug});
236
237 # XXX check this?
238 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
239
240 print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
241 seek($self->{'fileXRF'},$mfnpos,0);
242
243 my $buff;
244
245 # read XRFMFB abd XRFMFP
246 read($self->{'fileXRF'}, $buff, 4);
247 my $pointer=unpack("l",$buff) || carp "pointer is null";
248
249 my $XRFMFB = int($pointer/2048);
250 my $XRFMFP = $pointer - ($XRFMFB*2048);
251
252 print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
253
254 # XXX fix this to be more readable!!
255 # e.g. (XRFMFB - 1) * 512 + XRFMFP
256
257 my $offset = $pointer;
258 my $offset2=int($offset/2048)-1;
259 my $offset22=int($offset/4096);
260 my $offset3=$offset-($offset22*4096);
261 if ($offset3>512) {
262 $offset3=$offset3-2048;
263 }
264 my $offset4=($offset2*512)+$offset3;
265
266 print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
267
268 # Get Record Information
269
270 seek($self->{'fileMST'},$offset4,0);
271
272 read($self->{'fileMST'}, $buff, 4);
273 my $value=unpack("l",$buff);
274
275 if ($value!=$mfn) {
276 print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
277 return -1; # XXX deleted record?
278 }
279
280 # $MFRL=$self->Read16($fileMST);
281 # $MFBWB=$self->Read32($fileMST);
282 # $MFBWP=$self->Read16($fileMST);
283 # $BASE=$self->Read16($fileMST);
284 # $NVF=$self->Read16($fileMST);
285 # $STATUS=$self->Read16($fileMST);
286
287 read($self->{'fileMST'}, $buff, 14);
288
289 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
290
291 print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
292
293 # delete old record
294 delete $self->{record};
295
296 if (! $self->{'include_deleted'} && $MFRL < 0) {
297 print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
298 return;
299 }
300
301 # Get Directory Format
302
303 my @FieldPOS;
304 my @FieldLEN;
305 my @FieldTAG;
306
307 read($self->{'fileMST'}, $buff, 6 * $NVF);
308
309 my $fld_len = 0;
310
311 for (my $i = 0 ; $i < $NVF ; $i++) {
312
313 # $TAG=$self->Read16($fileMST);
314 # $POS=$self->Read16($fileMST);
315 # $LEN=$self->Read16($fileMST);
316
317 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
318
319 print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
320
321 # The TAG does not exists in .FDT so we set it to 0.
322 #
323 # XXX This is removed from perl version; .FDT file is updated manually, so
324 # you will often have fields in .MST file which aren't in .FDT. On the other
325 # hand, IsisMarc doesn't use .FDT files at all!
326
327 #if (! $self->{TagName}->{$TAG}) {
328 # $TAG=0;
329 #}
330
331 push @FieldTAG,$TAG;
332 push @FieldPOS,$POS;
333 push @FieldLEN,$LEN;
334
335 $fld_len += $LEN;
336 }
337
338 # Get Variable Fields
339
340 read($self->{'fileMST'},$buff,$fld_len);
341
342 for (my $i = 0 ; $i < $NVF ; $i++) {
343 # skip zero-sized fields
344 next if ($FieldLEN[$i] == 0);
345
346 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
347 }
348 close(fileMST);
349
350 print Dumper($self) if ($self->{debug});
351
352 return $self->{'record'};
353 }
354
355 =head2 to_ascii
356
357 Dump ascii output of selected MFN
358
359 print $isis->to_ascii(55);
360
361 =cut
362
363 sub to_ascii {
364 my $self = shift;
365
366 my $mfn = shift || croak "need MFN";
367
368 my $rec = $self->fetch($mfn);
369
370 my $out = "0\t$mfn";
371
372 foreach my $f (sort keys %{$rec}) {
373 $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
374 }
375
376 $out .= "\n";
377
378 return $out;
379 }
380
381 #
382 # XXX porting from php left-over:
383 #
384 # do I *REALLY* need those methods, or should I use
385 # $self->{something} directly?
386 #
387 # Probably direct usage is better!
388 #
389
390 sub TagName {
391 my $self = shift;
392 return $self->{TagName};
393 }
394
395 sub NextMFN {
396 my $self = shift;
397 return $self->{NXTMFN};
398 }
399
400 1;
401
402 =head1 BUGS
403
404 This module has been very lightly tested. Use with caution and report bugs.
405
406 =head1 AUTHOR
407
408 Dobrica Pavlinusic
409 CPAN ID: DPAVLIN
410 dpavlin@rot13.org
411 http://www.rot13.org/~dpavlin/
412
413 This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
414 written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
415
416 =head1 COPYRIGHT
417
418 This program is free software; you can redistribute
419 it and/or modify it under the same terms as Perl itself.
420
421 The full text of the license can be found in the
422 LICENSE file included with this module.
423
424
425 =head1 SEE ALSO
426
427 L<http://www.openisis.org|OpenIsis>, perl(1).
428

  ViewVC Help
Powered by ViewVC 1.1.26