/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Tue Dec 28 00:43:04 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 15421 byte(s)
Import of old code back from february to actually make it work.

1 package IsisDB;
2 use strict;
3
4 use Carp;
5 use Data::Dumper;
6
7 BEGIN {
8 use Exporter ();
9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 $VERSION = 0.01;
11 @ISA = qw (Exporter);
12 #Give a hoot don't pollute, do not export more than needed by default
13 @EXPORT = qw ();
14 @EXPORT_OK = qw ();
15 %EXPORT_TAGS = ();
16
17 }
18
19 =head1 NAME
20
21 IsisDB - Read CDS/ISIS database
22
23 =head1 SYNOPSIS
24
25 use IsisDB
26 my $isis = new IsisDB(
27 isisdb => './cds/cds',
28 );
29
30 =head1 DESCRIPTION
31
32 This module will read CDS/ISIS databases and create hash values out of it.
33 It can be used as perl-only alternative to OpenIsis module.
34
35 =head1 METHODS
36
37 =cut
38
39 # my $ORDN; # Nodes Order
40 # my $ORDF; # Leafs Order
41 # my $N; # Number of Memory buffers for nodes
42 # my $K; # Number of buffers for first level index
43 # my $LIV; # Current number of Index Levels
44 # my $POSRX; # Pointer to Root Record in N0x
45 # my $NMAXPOS; # Next Available position in N0x
46 # my $FMAXPOS; # Next available position in L0x
47 # my $ABNORMAL; # Formal BTree normality indicator
48
49 #
50 # some binary reads
51 #
52
53 sub Read32 {
54 my $self = shift;
55
56 my $f = shift || die "Read32 needs file handle";
57 read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);
58 return unpack("l",$b);
59 }
60
61 =head2 new
62
63 Open CDS/ISIS database
64
65 my $isis = new IsisDB(
66 isisdb => './cds/cds',
67 read_fdt => 1,
68 debug => 1,
69 );
70
71 =item isisdb
72
73 Prefix path to CDS/ISIS. It should contain full or relative path to database
74 and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
75
76 =item read_fdt
77
78 Boolean flag to specify if field definition table should be read. It's off
79 by default.
80
81 =item debug
82
83 Dump a C<lot> of debugging output.
84
85 =cut
86
87 sub new {
88 my $class = shift;
89 my $self = {};
90 bless($self, $class);
91
92 $self->{isisdb} = {@_}->{isisdb} || croak "new needs database name as argument!";
93
94 $self->{debug} = {@_}->{debug} || 1; # XXX remove debug always!
95
96 # if you want to read .FDT file use read_fdt argument when creating class!
97 if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
98
99 # read the $db.FDT file for tags
100 my $fieldzone=0;
101
102 open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
103
104 while (<fileFDT>) {
105 chomp;
106 if ($fieldzone) {
107 my $name=substr($_,0,30);
108 my $tag=substr($_,50,3);
109
110 $name =~ s/\s+$//;
111 $tag =~ s/\s+$//;
112
113 $self->{'TagName'}->{$tag}=$name;
114 }
115
116 if (/^\*\*\*/) {
117 $fieldzone=1;
118 }
119 }
120
121 close(fileFDT);
122 }
123
124 # Get the Maximum MFN from $db.MST
125
126 open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
127
128 # MST format: (* = 32 bit signed)
129 # CTLMFN* always 0
130 # NXTMFN* MFN to be assigned to the next record created
131 # NXTMFB* last block allocated to master file
132 # NXTMFP offset to next available position in last block
133 # MFTYPE always 0 for user db file (1 for system)
134 seek(fileMST,4,0);
135 $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";
136
137 close(fileMST);
138
139 # Get the index information from $db.CNT
140
141 open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
142
143 # There is two 26 Bytes fixed lenght records
144
145 # 0: IDTYPE BTree type 16
146 # 2: ORDN Nodes Order 16
147 # 4: ORDF Leafs Order 16
148 # 6: N Number of Memory buffers for nodes 16
149 # 8: K Number of buffers for first level index 16
150 # 10: LIV Current number of Index Levels 16
151 # 12: POSRX* Pointer to Root Record in N0x 32
152 # 16: NMAXPOS* Next Available position in N0x 32
153 # 20: FMAXPOS* Next available position in L0x 32
154 # 24: ABNORMAL Formal BTree normality indicator 16
155 # length: 26 bytes
156
157 sub unpack_cnt {
158 my $self = shift;
159
160 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
161
162 my $buff = shift || return;
163 my @arr = unpack("ssssssllls", $buff);
164
165 my $IDTYPE = shift @arr;
166 foreach (@flds) {
167 $self->{$IDTYPE}->{$_} = abs(shift @arr);
168 }
169 }
170
171 my $buff;
172 read(fileCNT, $buff, 26);
173 $self->unpack_cnt($buff);
174
175 read(fileCNT, $buff, 26);
176 $self->unpack_cnt($buff);
177
178
179 close(fileCNT);
180
181 print Dumper($self) if ($self->{debug});
182
183 $self ? return $self : return undef;
184 }
185
186
187 # Get a record from the MFN
188 # Return the number of fields in the record.
189 # Return -1 if the record is marked for deletion
190 # The record is then extracted with call to GETs
191
192 sub GetMFN {
193 my $self = shift;
194
195 my $mfn = shift || croak "GetMFN needs MFN as argument!";
196
197 print "GetMFN: $mfn\n" if ($self->{debug});
198
199 open(fileXRF, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
200
201 # XXX check this?
202 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
203
204 print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
205 seek(fileXRF,$mfnpos,0);
206
207 # read XRFMFB abd XRFMFP
208 my $pointer=$self->Read32(\*fileXRF);
209
210 my $XRFMFB = int($pointer/2048);
211 my $XRFMFP = $pointer - ($XRFMFB*2048);
212
213 print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
214
215 # XXX fix this to be more readable!!
216 # e.g. (XRFMFB - 1) * 512 + XRFMFP
217
218 my $offset = $pointer;
219 my $offset2=int($offset/2048)-1;
220 my $offset22=int($offset/4096);
221 my $offset3=$offset-($offset22*4096);
222 if ($offset3>512) {
223 $offset3=$offset3-2048;
224 }
225 my $offset4=($offset2*512)+$offset3;
226
227 print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
228
229 close(fileXRF);
230
231 # Get Record Information
232
233 open(fileMST, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
234
235 seek(fileMST,$offset4,0);
236
237 my $value=$self->Read32(\*fileMST);
238
239 if ($value!=$mfn) {
240 print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
241 return -1; # XXX deleted record?
242 }
243
244 # $MFRL=$self->Read16($fileMST);
245 # $MFBWB=$self->Read32($fileMST);
246 # $MFBWP=$self->Read16($fileMST);
247 # $BASE=$self->Read16($fileMST);
248 # $NVF=$self->Read16($fileMST);
249 # $STATUS=$self->Read16($fileMST);
250
251 my $buff;
252 read(fileMST, $buff, 14);
253
254 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
255
256 print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
257
258 # Get Directory Format
259
260 my @FieldPOS;
261 my @FieldLEN;
262 my @FieldTAG;
263
264 for (my $i = 0 ; $i < $NVF ; $i++) {
265
266 # $TAG=$self->Read16($fileMST);
267 # $POS=$self->Read16($fileMST);
268 # $LEN=$self->Read16($fileMST);
269
270 read(fileMST, $buff, 6);
271 my ($TAG,$POS,$LEN) = unpack("sss", $buff);
272
273 print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
274
275 # The TAG does not exists in .FDT so we set it to 0.
276 #
277 # XXX This is removed from perl version; .FDT file is updated manually, so
278 # you will often have fields in .MST file which aren't in .FDT. On the other
279 # hand, IsisMarc doesn't use .FDT files at all!
280
281 #if (! $self->{TagName}->{$TAG}) {
282 # $TAG=0;
283 #}
284
285 push @FieldTAG,$TAG;
286 push @FieldPOS,$POS;
287 push @FieldLEN,$LEN;
288 }
289
290 # Get Variable Fields
291
292 for (my $i = 0 ; $i < $NVF ; $i++) {
293 my $rec;
294 read(fileMST,$rec,$FieldLEN[$i]);
295 $self->{record}->{$FieldTAG[$i]} = $rec;
296 }
297 close(fileMST);
298
299 # The record is marked for deletion
300 if ($STATUS==1) {
301 return -1;
302 }
303
304 print Dumper($self) if ($self->{debug});
305
306 return $NVF;
307 }
308
309 =begin php
310
311 # Load the dictionary from the $db.L0x files.
312 # Not usefull Yet
313
314 sub LoadDictionary()
315 {
316 $fileL01=fopen($self->{isisdb}.".L01","r");
317 rewind($fileL01);
318
319 do
320 {
321
322 $POS=$self->Read32($fileL01);
323 $OCK=$self->Read16($fileL01);
324 $IT=$self->Read16($fileL01);
325 $PS=$self->Read32($fileL01);
326 print "<br>PS:".$PS." ".$self->{ORDF}->{1}." ";
327 for ($i=0;$i<$OCK;$i++)
328 {
329 $KEY=fread($fileL01,10);
330
331 print $KEY." ### ";
332
333 $INFO1=$self->Read32($fileL01);
334 $INFO2=$self->Read32($fileL01);
335
336 #L01Key->{$key}=array($INFO1,$INFO2);
337 }
338
339 rewind($fileL01);
340 $offset=($PS-1)*(12+$self->{ORDF}->{1}*18*2);
341 fseek($fileL01,$offset);
342
343 } While (!feof($fileL01));
344
345 fclose($fileL01);
346 }
347
348 # self function search through the tree and returns an array of pointers to IFP
349 # The function must be recursive
350
351 sub SearchTree($search,$fileNB,$PUNT)
352 {
353 $offset=(($PUNT-1)*(8+2*$self->{ORDN}->{1}*14));
354
355 rewind($fileNB1);
356
357 fseek($fileNB,$offset);
358
359 $POS=$self->Read32($fileNB);
360 $OCK=$self->Read16($fileNB);
361 $IT=$self->Read16($fileNB);
362
363 #print "<br>".$POS." - ".$OCK." - ".$IT;
364
365 $OLDPUNT=$POS;
366 $j=0;
367 for ($i=0;$i<$OCK;$i++)
368 {
369 $KEY=fread($fileNB,10);
370
371 $PUNT=$self->Read32($fileNB);
372
373 #print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## ";
374
375 If (strcmp($search,chop($KEY))<0)
376 {
377 break;
378 }
379 $OLDPUNT=$PUNT;
380 }
381 #print $OLDPUNT;
382 Return $OLDPUNT;
383 }
384
385 # Search ISIS for record containing search
386 # Return a sorted array of MFN
387
388 sub Search($search)
389 {
390
391 $search=strtoupper($search);
392 #print "Searching....".$search." - ".$self->{POSRX}->{1}."<br>";
393 # first search .x01
394
395
396 # Search in .N01
397
398
399 $fileN01=fopen($self->{isisdb}.".N01","r");
400 $offset=(($self->{POSRX}->{1}-1)*(8+2*$self->{ORDN}->{1}*14));
401
402 do
403 {
404 rewind($fileN01);
405
406 fseek($fileN01,$offset);
407
408 $POS=$self->Read32($fileN01);
409 $OCK=$self->Read16($fileN01);
410 $IT=$self->Read16($fileN01);
411
412 #print "<br>".$POS." - ".$OCK." - ".$IT;
413
414 $OLDPUNT=$POS;
415 for ($i=0;$i<$OCK;$i++)
416 {
417 $KEY=fread($fileN01,10);
418
419 $PUNT=$self->Read32($fileN01);
420
421 #print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## ";
422
423 If (strcmp($search,chop($KEY))<0)
424 {
425 break;
426 }
427 $OLDPUNT=$PUNT;
428 }
429 $offset=(($OLDPUNT-1)*(8+2*$self->{ORDN}->{1}*14));
430 } while ($OLDPUNT>0);
431 #print $OLDPUNT;
432
433
434 fclose($fileN01);
435
436 # Now look for records in .L01 file
437 $fileL01=fopen($self->{isisdb}.".L01","r");
438 rewind($fileL01);
439
440 $offset=(-$OLDPUNT-1)*(12+$self->{ORDF}->{1}*18*2);
441 fseek($fileL01,$offset);
442
443 $POS=$self->Read32($fileL01);
444 $OCK=$self->Read16($fileL01);
445 $IT=$self->Read16($fileL01);
446 $PS=$self->Read32($fileL01);
447 #print "<br>POS:".$POS." ".$self->{ORDF}->{1}." ";
448 for ($i=0;$i<$OCK;$i++)
449 {
450 $KEY=fread($fileL01,10);
451
452 #print $KEY." ### ";
453
454 $INFO1=$self->Read32($fileL01);
455 $INFO2=$self->Read32($fileL01);
456
457 If (strcmp($search,chop($KEY))==0)
458 {
459 break;
460 }
461 }
462
463 fclose($fileL01);
464
465 #print $INFO1."--".$INFO2;
466
467 # Now look in .IFP for the MFN
468 $fileIFP=fopen($self->{isisdb}.".IFP","r");
469 rewind($fileIFP);
470 $offset=($INFO1-1)*512+($INFO2*4);
471 fseek($fileIFP,$offset);
472
473 $IFPBLK=$self->Read32($fileIFP);
474
475 $IFPNXTB=$self->Read32($fileIFP);
476 $IFPNXTP=$self->Read32($fileIFP);
477 $IFPTOTP=$self->Read32($fileIFP);
478 $IFPSEGP=$self->Read32($fileIFP);
479 $IFPSEGC=$self->Read32($fileIFP);
480
481
482 #print "<br>IFP:".$IFPBLK." # ".$IFPNXTB." - ".$IFPNXTP." - ".$IFPTOTP." - ".$IFPSEGP." - ".$IFPSEGC;
483
484 rewind($fileIFP);
485 $offset=($INFO1-1)*512+24+($INFO2*4);
486 fseek($fileIFP,$offset);
487
488 $j=24+($INFO2*4);
489 $k=0;
490 $l=1;
491 $OLDPMFN="";
492 for ($i=0;$i<$IFPSEGP;$i++)
493 {
494 $B1=$self->Read8($fileIFP);
495 $B2=$self->Read8($fileIFP);
496 $B3=$self->Read8($fileIFP);
497 $B4=$self->Read8($fileIFP);
498 $B5=$self->Read8($fileIFP);
499 $B6=$self->Read8($fileIFP);
500 $B7=$self->Read8($fileIFP);
501 $B8=$self->Read8($fileIFP);
502
503 $PMFN=$B1*65536+$B2*256+$B3;
504 $PTAG=$B4*256+$B5;
505 $POCC=$B6;
506 $PCNT=$B7*256+$B8;
507
508 if ($OLDPMFN!=$PMFN)
509 {
510 if ($PMFN!=0)
511 {
512 $self->{MFNArray}->{$l}=$PMFN;
513 $OLDPMFN=$PMFN;
514 $l+=1;
515 }
516 }
517
518 $j=$j+8;
519 #print "<br>".$PMFN."-".$PTAG." - ".$POCC." - ".$PCNT;
520 #print "@@".$j."@@@@";
521 if ($j>=504)
522 {
523 if ($IFPNXTB==0 && $IFPNXTP==0)
524 {
525 $k=$k+1;
526 rewind($fileIFP);
527 $offset=($INFO1-1+$k)*512;
528 fseek($fileIFP,$offset);
529 $B=$self->Read32($fileIFP);
530 #print "<br>-".$B."-<br>";
531 $j=0;
532 } else
533 {
534 rewind($fileIFP);
535 $offset=($IFPNXTB-1)*512;
536 fseek($fileIFP,$offset);
537
538 $OLDIFPNXTB=$IFPNXTB;
539 $OLDIFPNXTP=$IFPNXTP;
540
541 $IFPBLK=$self->Read32($fileIFP);
542
543 $IFPNXTB=$self->Read32($fileIFP);
544 $IFPNXTP=$self->Read32($fileIFP);
545 $IFPTOTP=$self->Read32($fileIFP);
546 $IFPSEGP=$self->Read32($fileIFP);
547 $IFPSEGC=$self->Read32($fileIFP);
548
549 rewind($fileIFP);
550 $offset=($OLDIFPNXTB-1)*512+24+($OLDIFPNXTP*4);
551 fseek($fileIFP,$offset);
552
553 $j=24+($OLDIFPNXTP*4);
554 $k=0;
555 $j=0;
556 }
557 }
558
559 }
560 fclose($fileIFP);
561 return $l-1;
562 }
563
564 =cut
565
566 #
567 # XXX porting from php left-over:
568 #
569 # do I *REALLY* need those methods, or should I use
570 # $self->{something} directly?
571 #
572 # Probably direct usage is better!
573 #
574
575 sub GetFieldName {
576 my $self = shift;
577 return $self->{FieldName};
578 }
579
580 sub GetTagName {
581 my $self = shift;
582 return $self->{TagName};
583 }
584
585 sub GetFieldTag {
586 my $self = shift;
587 return $self->{FieldTAG};
588 }
589
590 sub GetNextMFN {
591 my $self = shift;
592 return $self->{NXTMFN};
593 }
594
595 sub GetMFNArray {
596 my $self = shift;
597 return $self->{MFNArray};
598 }
599 =begin php
600
601 sub Read32($fileNB)
602 {
603 $B1=ord(fread($fileNB,1));
604 $B2=ord(fread($fileNB,1));
605 $B3=ord(fread($fileNB,1));
606 $B4=ord(fread($fileNB,1));
607
608 if ($B4<=128)
609 {
610 $value=$B1+$B2*256+$B3*65536+$B4*16777216;
611 } else
612 {
613 $value=$self->Not8($B1)+$self->Not8($B2)*256+$self->Not8($B3)*65536+$self->Not8($B4)*16777216;
614 $value=-($value+1);
615 }
616 # print "(".$B1.",".$B2.",".$B3.",".$B4.":".$value.")";
617
618 return $value;
619 }
620
621 sub Read24($fileNB)
622 {
623 $B1=ord(fread($fileNB,1));
624 $B2=ord(fread($fileNB,1));
625 $B3=ord(fread($fileNB,1));
626
627 $value=$B1+$B2*256+$B3*65536;
628
629 # print "(".$B1.",".$B2.",".$B3.":".$value.")";
630
631 return $value;
632 }
633
634 sub Read16($fileNB)
635 {
636 $B1=ord(fread($fileNB,1));
637 $B2=ord(fread($fileNB,1));
638
639 $value=$B1+$B2*256;
640 # print "(".$B1.",".$B2.":".$value.")";
641
642 return $value;
643 }
644
645 sub Read8($fileNB)
646 {
647 $B1=ord(fread($fileNB,1));
648
649 $value=$B1;
650 # print "(".$value.")";
651
652 return $value;
653 }
654
655 sub Not8($value)
656 {
657 $value=decbin($value);
658 if (strlen($value)<8)
659 {
660 $buffer="";
661 for($i=0;$i<(8-strlen($value));$i++)
662 {
663 $buffer.="0";
664 }
665 $value=$buffer.$value;
666 }
667 $value=ereg_replace("0","3",$value);
668 $value=ereg_replace("1","0",$value);
669 $value=ereg_replace("3","1",$value);
670 $value=bindec($value);
671 return $value;
672 }
673 }
674
675 =cut
676
677 1;
678 __END__
679
680 =head1 BUGS
681
682 This module has been very lightly tested. Use with caution and report bugs.
683
684 =head1 AUTHOR
685
686 Dobrica Pavlinusic
687 CPAN ID: DPAVLIN
688 dpavlin@rot13.org
689 http://www.rot13.org/~dpavlin/
690
691 This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
692 written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
693
694 =head1 COPYRIGHT
695
696 This program is free software; you can redistribute
697 it and/or modify it under the same terms as Perl itself.
698
699 The full text of the license can be found in the
700 LICENSE file included with this module.
701
702
703 =head1 SEE ALSO
704
705 L<http://www.openisis.org|OpenIsis>, perl(1).
706

  ViewVC Help
Powered by ViewVC 1.1.26