/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations)
Tue Dec 28 01:41:45 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 16080 byte(s)
first working version:
- add support for repeatable fields (so all hash values becomed arrays, even
  with single element)
- scripts to dump CDS/ISIS database using this module and OpenIsis
- to_ascii method which dumps ascii output of record

1 package IsisDB;
2 use strict;
3
4 use Carp;
5 use Data::Dumper;
6
7 BEGIN {
8 use Exporter ();
9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 $VERSION = 0.01;
11 @ISA = qw (Exporter);
12 #Give a hoot don't pollute, do not export more than needed by default
13 @EXPORT = qw ();
14 @EXPORT_OK = qw ();
15 %EXPORT_TAGS = ();
16
17 }
18
19 =head1 NAME
20
21 IsisDB - Read CDS/ISIS database
22
23 =head1 SYNOPSIS
24
25 use IsisDB
26 my $isis = new IsisDB(
27 isisdb => './cds/cds',
28 );
29
30 =head1 DESCRIPTION
31
32 This module will read CDS/ISIS databases and create hash values out of it.
33 It can be used as perl-only alternative to OpenIsis module.
34
35 =head1 METHODS
36
37 =cut
38
39 # my $ORDN; # Nodes Order
40 # my $ORDF; # Leafs Order
41 # my $N; # Number of Memory buffers for nodes
42 # my $K; # Number of buffers for first level index
43 # my $LIV; # Current number of Index Levels
44 # my $POSRX; # Pointer to Root Record in N0x
45 # my $NMAXPOS; # Next Available position in N0x
46 # my $FMAXPOS; # Next available position in L0x
47 # my $ABNORMAL; # Formal BTree normality indicator
48
49 #
50 # some binary reads
51 #
52
53 sub Read32 {
54 my $self = shift;
55
56 my $f = shift || die "Read32 needs file handle";
57 read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);
58 return unpack("l",$b);
59 }
60
61 =head2 new
62
63 Open CDS/ISIS database
64
65 my $isis = new IsisDB(
66 isisdb => './cds/cds',
67 read_fdt => 1,
68 debug => 1,
69 );
70
71 Options are described below:
72
73 =over 5
74
75 =item isisdb
76
77 Prefix path to CDS/ISIS. It should contain full or relative path to database
78 and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
79
80 =item read_fdt
81
82 Boolean flag to specify if field definition table should be read. It's off
83 by default.
84
85 =item debug
86
87 Dump a C<lot> of debugging output.
88
89 =back
90
91 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
92
93 =cut
94
95 sub new {
96 my $class = shift;
97 my $self = {};
98 bless($self, $class);
99
100 $self->{isisdb} = {@_}->{isisdb} || croak "new needs database name as argument!";
101
102 $self->{debug} = {@_}->{debug};
103
104 # if you want to read .FDT file use read_fdt argument when creating class!
105 if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
106
107 # read the $db.FDT file for tags
108 my $fieldzone=0;
109
110 open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
111
112 while (<fileFDT>) {
113 chomp;
114 if ($fieldzone) {
115 my $name=substr($_,0,30);
116 my $tag=substr($_,50,3);
117
118 $name =~ s/\s+$//;
119 $tag =~ s/\s+$//;
120
121 $self->{'TagName'}->{$tag}=$name;
122 }
123
124 if (/^\*\*\*/) {
125 $fieldzone=1;
126 }
127 }
128
129 close(fileFDT);
130 }
131
132 # Get the Maximum MFN from $db.MST
133
134 open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
135
136 # MST format: (* = 32 bit signed)
137 # CTLMFN* always 0
138 # NXTMFN* MFN to be assigned to the next record created
139 # NXTMFB* last block allocated to master file
140 # NXTMFP offset to next available position in last block
141 # MFTYPE always 0 for user db file (1 for system)
142 seek(fileMST,4,0);
143 $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";
144
145 # save maximum MFN
146 $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
147
148 close(fileMST);
149
150 # Get the index information from $db.CNT
151
152 open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
153
154 # There is two 26 Bytes fixed lenght records
155
156 # 0: IDTYPE BTree type 16
157 # 2: ORDN Nodes Order 16
158 # 4: ORDF Leafs Order 16
159 # 6: N Number of Memory buffers for nodes 16
160 # 8: K Number of buffers for first level index 16
161 # 10: LIV Current number of Index Levels 16
162 # 12: POSRX* Pointer to Root Record in N0x 32
163 # 16: NMAXPOS* Next Available position in N0x 32
164 # 20: FMAXPOS* Next available position in L0x 32
165 # 24: ABNORMAL Formal BTree normality indicator 16
166 # length: 26 bytes
167
168 sub unpack_cnt {
169 my $self = shift;
170
171 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
172
173 my $buff = shift || return;
174 my @arr = unpack("ssssssllls", $buff);
175
176 my $IDTYPE = shift @arr;
177 foreach (@flds) {
178 $self->{$IDTYPE}->{$_} = abs(shift @arr);
179 }
180 }
181
182 my $buff;
183 read(fileCNT, $buff, 26);
184 $self->unpack_cnt($buff);
185
186 read(fileCNT, $buff, 26);
187 $self->unpack_cnt($buff);
188
189
190 close(fileCNT);
191
192 print Dumper($self) if ($self->{debug});
193
194 $self ? return $self : return undef;
195 }
196
197 =head2 GetMFN
198
199 Read record with selected MFN
200
201 my $rec = $isis->GetMFN(55);
202
203 Returns hash with keys which are field names and values are unpacked values
204 for that field.
205
206 =cut
207
208 sub GetMFN {
209 my $self = shift;
210
211 my $mfn = shift || croak "GetMFN needs MFN as argument!";
212
213 print "GetMFN: $mfn\n" if ($self->{debug});
214
215 open(fileXRF, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
216
217 # XXX check this?
218 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
219
220 print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
221 seek(fileXRF,$mfnpos,0);
222
223 # read XRFMFB abd XRFMFP
224 my $pointer=$self->Read32(\*fileXRF);
225
226 my $XRFMFB = int($pointer/2048);
227 my $XRFMFP = $pointer - ($XRFMFB*2048);
228
229 print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
230
231 # XXX fix this to be more readable!!
232 # e.g. (XRFMFB - 1) * 512 + XRFMFP
233
234 my $offset = $pointer;
235 my $offset2=int($offset/2048)-1;
236 my $offset22=int($offset/4096);
237 my $offset3=$offset-($offset22*4096);
238 if ($offset3>512) {
239 $offset3=$offset3-2048;
240 }
241 my $offset4=($offset2*512)+$offset3;
242
243 print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
244
245 close(fileXRF);
246
247 # Get Record Information
248
249 open(fileMST, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
250
251 seek(fileMST,$offset4,0);
252
253 my $value=$self->Read32(\*fileMST);
254
255 if ($value!=$mfn) {
256 print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
257 return -1; # XXX deleted record?
258 }
259
260 # $MFRL=$self->Read16($fileMST);
261 # $MFBWB=$self->Read32($fileMST);
262 # $MFBWP=$self->Read16($fileMST);
263 # $BASE=$self->Read16($fileMST);
264 # $NVF=$self->Read16($fileMST);
265 # $STATUS=$self->Read16($fileMST);
266
267 my $buff;
268 read(fileMST, $buff, 14);
269
270 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
271
272 print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
273
274 # Get Directory Format
275
276 my @FieldPOS;
277 my @FieldLEN;
278 my @FieldTAG;
279
280 for (my $i = 0 ; $i < $NVF ; $i++) {
281
282 # $TAG=$self->Read16($fileMST);
283 # $POS=$self->Read16($fileMST);
284 # $LEN=$self->Read16($fileMST);
285
286 read(fileMST, $buff, 6);
287 my ($TAG,$POS,$LEN) = unpack("sss", $buff);
288
289 print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
290
291 # The TAG does not exists in .FDT so we set it to 0.
292 #
293 # XXX This is removed from perl version; .FDT file is updated manually, so
294 # you will often have fields in .MST file which aren't in .FDT. On the other
295 # hand, IsisMarc doesn't use .FDT files at all!
296
297 #if (! $self->{TagName}->{$TAG}) {
298 # $TAG=0;
299 #}
300
301 push @FieldTAG,$TAG;
302 push @FieldPOS,$POS;
303 push @FieldLEN,$LEN;
304 }
305
306 # Get Variable Fields
307
308 delete $self->{record};
309
310 for (my $i = 0 ; $i < $NVF ; $i++) {
311 my $rec;
312 read(fileMST,$rec,$FieldLEN[$i]);
313 push @{$self->{record}->{$FieldTAG[$i]}}, $rec;
314 }
315 close(fileMST);
316
317 # The record is marked for deletion
318 if ($STATUS==1) {
319 return -1;
320 }
321
322 print Dumper($self) if ($self->{debug});
323
324 return $self->{'record'};
325 }
326
327 =head2 to_ascii
328
329 Dump ascii output of selected MFN
330
331 print $isis->to_ascii(55);
332
333 =cut
334
335 sub to_ascii {
336 my $self = shift;
337
338 my $mfn = shift || croak "need MFN";
339
340 my $rec = $self->GetMFN($mfn);
341
342 print STDERR Dumper($rec);
343
344 my $out = "0\t$mfn";
345
346 foreach my $f (sort keys %{$rec}) {
347 $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
348 }
349
350 $out .= "\n";
351
352 return $out;
353 }
354
355 ################# old cruft which is not ported from php to perl
356
357 =begin php
358
359 # Load the dictionary from the $db.L0x files.
360 # Not usefull Yet
361
362 sub LoadDictionary()
363 {
364 $fileL01=fopen($self->{isisdb}.".L01","r");
365 rewind($fileL01);
366
367 do
368 {
369
370 $POS=$self->Read32($fileL01);
371 $OCK=$self->Read16($fileL01);
372 $IT=$self->Read16($fileL01);
373 $PS=$self->Read32($fileL01);
374 print "<br>PS:".$PS." ".$self->{ORDF}->{1}." ";
375 for ($i=0;$i<$OCK;$i++)
376 {
377 $KEY=fread($fileL01,10);
378
379 print $KEY." ### ";
380
381 $INFO1=$self->Read32($fileL01);
382 $INFO2=$self->Read32($fileL01);
383
384 #L01Key->{$key}=array($INFO1,$INFO2);
385 }
386
387 rewind($fileL01);
388 $offset=($PS-1)*(12+$self->{ORDF}->{1}*18*2);
389 fseek($fileL01,$offset);
390
391 } While (!feof($fileL01));
392
393 fclose($fileL01);
394 }
395
396 # self function search through the tree and returns an array of pointers to IFP
397 # The function must be recursive
398
399 sub SearchTree($search,$fileNB,$PUNT)
400 {
401 $offset=(($PUNT-1)*(8+2*$self->{ORDN}->{1}*14));
402
403 rewind($fileNB1);
404
405 fseek($fileNB,$offset);
406
407 $POS=$self->Read32($fileNB);
408 $OCK=$self->Read16($fileNB);
409 $IT=$self->Read16($fileNB);
410
411 #print "<br>".$POS." - ".$OCK." - ".$IT;
412
413 $OLDPUNT=$POS;
414 $j=0;
415 for ($i=0;$i<$OCK;$i++)
416 {
417 $KEY=fread($fileNB,10);
418
419 $PUNT=$self->Read32($fileNB);
420
421 #print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## ";
422
423 If (strcmp($search,chop($KEY))<0)
424 {
425 break;
426 }
427 $OLDPUNT=$PUNT;
428 }
429 #print $OLDPUNT;
430 Return $OLDPUNT;
431 }
432
433 # Search ISIS for record containing search
434 # Return a sorted array of MFN
435
436 sub Search($search)
437 {
438
439 $search=strtoupper($search);
440 #print "Searching....".$search." - ".$self->{POSRX}->{1}."<br>";
441 # first search .x01
442
443
444 # Search in .N01
445
446
447 $fileN01=fopen($self->{isisdb}.".N01","r");
448 $offset=(($self->{POSRX}->{1}-1)*(8+2*$self->{ORDN}->{1}*14));
449
450 do
451 {
452 rewind($fileN01);
453
454 fseek($fileN01,$offset);
455
456 $POS=$self->Read32($fileN01);
457 $OCK=$self->Read16($fileN01);
458 $IT=$self->Read16($fileN01);
459
460 #print "<br>".$POS." - ".$OCK." - ".$IT;
461
462 $OLDPUNT=$POS;
463 for ($i=0;$i<$OCK;$i++)
464 {
465 $KEY=fread($fileN01,10);
466
467 $PUNT=$self->Read32($fileN01);
468
469 #print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## ";
470
471 If (strcmp($search,chop($KEY))<0)
472 {
473 break;
474 }
475 $OLDPUNT=$PUNT;
476 }
477 $offset=(($OLDPUNT-1)*(8+2*$self->{ORDN}->{1}*14));
478 } while ($OLDPUNT>0);
479 #print $OLDPUNT;
480
481
482 fclose($fileN01);
483
484 # Now look for records in .L01 file
485 $fileL01=fopen($self->{isisdb}.".L01","r");
486 rewind($fileL01);
487
488 $offset=(-$OLDPUNT-1)*(12+$self->{ORDF}->{1}*18*2);
489 fseek($fileL01,$offset);
490
491 $POS=$self->Read32($fileL01);
492 $OCK=$self->Read16($fileL01);
493 $IT=$self->Read16($fileL01);
494 $PS=$self->Read32($fileL01);
495 #print "<br>POS:".$POS." ".$self->{ORDF}->{1}." ";
496 for ($i=0;$i<$OCK;$i++)
497 {
498 $KEY=fread($fileL01,10);
499
500 #print $KEY." ### ";
501
502 $INFO1=$self->Read32($fileL01);
503 $INFO2=$self->Read32($fileL01);
504
505 If (strcmp($search,chop($KEY))==0)
506 {
507 break;
508 }
509 }
510
511 fclose($fileL01);
512
513 #print $INFO1."--".$INFO2;
514
515 # Now look in .IFP for the MFN
516 $fileIFP=fopen($self->{isisdb}.".IFP","r");
517 rewind($fileIFP);
518 $offset=($INFO1-1)*512+($INFO2*4);
519 fseek($fileIFP,$offset);
520
521 $IFPBLK=$self->Read32($fileIFP);
522
523 $IFPNXTB=$self->Read32($fileIFP);
524 $IFPNXTP=$self->Read32($fileIFP);
525 $IFPTOTP=$self->Read32($fileIFP);
526 $IFPSEGP=$self->Read32($fileIFP);
527 $IFPSEGC=$self->Read32($fileIFP);
528
529
530 #print "<br>IFP:".$IFPBLK." # ".$IFPNXTB." - ".$IFPNXTP." - ".$IFPTOTP." - ".$IFPSEGP." - ".$IFPSEGC;
531
532 rewind($fileIFP);
533 $offset=($INFO1-1)*512+24+($INFO2*4);
534 fseek($fileIFP,$offset);
535
536 $j=24+($INFO2*4);
537 $k=0;
538 $l=1;
539 $OLDPMFN="";
540 for ($i=0;$i<$IFPSEGP;$i++)
541 {
542 $B1=$self->Read8($fileIFP);
543 $B2=$self->Read8($fileIFP);
544 $B3=$self->Read8($fileIFP);
545 $B4=$self->Read8($fileIFP);
546 $B5=$self->Read8($fileIFP);
547 $B6=$self->Read8($fileIFP);
548 $B7=$self->Read8($fileIFP);
549 $B8=$self->Read8($fileIFP);
550
551 $PMFN=$B1*65536+$B2*256+$B3;
552 $PTAG=$B4*256+$B5;
553 $POCC=$B6;
554 $PCNT=$B7*256+$B8;
555
556 if ($OLDPMFN!=$PMFN)
557 {
558 if ($PMFN!=0)
559 {
560 $self->{MFNArray}->{$l}=$PMFN;
561 $OLDPMFN=$PMFN;
562 $l+=1;
563 }
564 }
565
566 $j=$j+8;
567 #print "<br>".$PMFN."-".$PTAG." - ".$POCC." - ".$PCNT;
568 #print "@@".$j."@@@@";
569 if ($j>=504)
570 {
571 if ($IFPNXTB==0 && $IFPNXTP==0)
572 {
573 $k=$k+1;
574 rewind($fileIFP);
575 $offset=($INFO1-1+$k)*512;
576 fseek($fileIFP,$offset);
577 $B=$self->Read32($fileIFP);
578 #print "<br>-".$B."-<br>";
579 $j=0;
580 } else
581 {
582 rewind($fileIFP);
583 $offset=($IFPNXTB-1)*512;
584 fseek($fileIFP,$offset);
585
586 $OLDIFPNXTB=$IFPNXTB;
587 $OLDIFPNXTP=$IFPNXTP;
588
589 $IFPBLK=$self->Read32($fileIFP);
590
591 $IFPNXTB=$self->Read32($fileIFP);
592 $IFPNXTP=$self->Read32($fileIFP);
593 $IFPTOTP=$self->Read32($fileIFP);
594 $IFPSEGP=$self->Read32($fileIFP);
595 $IFPSEGC=$self->Read32($fileIFP);
596
597 rewind($fileIFP);
598 $offset=($OLDIFPNXTB-1)*512+24+($OLDIFPNXTP*4);
599 fseek($fileIFP,$offset);
600
601 $j=24+($OLDIFPNXTP*4);
602 $k=0;
603 $j=0;
604 }
605 }
606
607 }
608 fclose($fileIFP);
609 return $l-1;
610 }
611
612 =cut
613
614 #
615 # XXX porting from php left-over:
616 #
617 # do I *REALLY* need those methods, or should I use
618 # $self->{something} directly?
619 #
620 # Probably direct usage is better!
621 #
622
623 sub GetFieldName {
624 my $self = shift;
625 return $self->{FieldName};
626 }
627
628 sub GetTagName {
629 my $self = shift;
630 return $self->{TagName};
631 }
632
633 sub GetFieldTag {
634 my $self = shift;
635 return $self->{FieldTAG};
636 }
637
638 sub GetNextMFN {
639 my $self = shift;
640 return $self->{NXTMFN};
641 }
642
643 sub GetMFNArray {
644 my $self = shift;
645 return $self->{MFNArray};
646 }
647 =begin php
648
649 sub Read32($fileNB)
650 {
651 $B1=ord(fread($fileNB,1));
652 $B2=ord(fread($fileNB,1));
653 $B3=ord(fread($fileNB,1));
654 $B4=ord(fread($fileNB,1));
655
656 if ($B4<=128)
657 {
658 $value=$B1+$B2*256+$B3*65536+$B4*16777216;
659 } else
660 {
661 $value=$self->Not8($B1)+$self->Not8($B2)*256+$self->Not8($B3)*65536+$self->Not8($B4)*16777216;
662 $value=-($value+1);
663 }
664 # print "(".$B1.",".$B2.",".$B3.",".$B4.":".$value.")";
665
666 return $value;
667 }
668
669 sub Read24($fileNB)
670 {
671 $B1=ord(fread($fileNB,1));
672 $B2=ord(fread($fileNB,1));
673 $B3=ord(fread($fileNB,1));
674
675 $value=$B1+$B2*256+$B3*65536;
676
677 # print "(".$B1.",".$B2.",".$B3.":".$value.")";
678
679 return $value;
680 }
681
682 sub Read16($fileNB)
683 {
684 $B1=ord(fread($fileNB,1));
685 $B2=ord(fread($fileNB,1));
686
687 $value=$B1+$B2*256;
688 # print "(".$B1.",".$B2.":".$value.")";
689
690 return $value;
691 }
692
693 sub Read8($fileNB)
694 {
695 $B1=ord(fread($fileNB,1));
696
697 $value=$B1;
698 # print "(".$value.")";
699
700 return $value;
701 }
702
703 sub Not8($value)
704 {
705 $value=decbin($value);
706 if (strlen($value)<8)
707 {
708 $buffer="";
709 for($i=0;$i<(8-strlen($value));$i++)
710 {
711 $buffer.="0";
712 }
713 $value=$buffer.$value;
714 }
715 $value=ereg_replace("0","3",$value);
716 $value=ereg_replace("1","0",$value);
717 $value=ereg_replace("3","1",$value);
718 $value=bindec($value);
719 return $value;
720 }
721 }
722
723 =cut
724
725 1;
726 __END__
727
728 =head1 BUGS
729
730 This module has been very lightly tested. Use with caution and report bugs.
731
732 =head1 AUTHOR
733
734 Dobrica Pavlinusic
735 CPAN ID: DPAVLIN
736 dpavlin@rot13.org
737 http://www.rot13.org/~dpavlin/
738
739 This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
740 written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
741
742 =head1 COPYRIGHT
743
744 This program is free software; you can redistribute
745 it and/or modify it under the same terms as Perl itself.
746
747 The full text of the license can be found in the
748 LICENSE file included with this module.
749
750
751 =head1 SEE ALSO
752
753 L<http://www.openisis.org|OpenIsis>, perl(1).
754

  ViewVC Help
Powered by ViewVC 1.1.26