/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Tue Dec 28 00:43:04 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 15421 byte(s)
Import of old code back from february to actually make it work.

1 dpavlin 1 package IsisDB;
2     use strict;
3    
4     use Carp;
5     use Data::Dumper;
6    
7     BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10     $VERSION = 0.01;
11     @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21     IsisDB - Read CDS/ISIS database
22    
23     =head1 SYNOPSIS
24    
25     use IsisDB
26     my $isis = new IsisDB(
27     isisdb => './cds/cds',
28     );
29    
30     =head1 DESCRIPTION
31    
32     This module will read CDS/ISIS databases and create hash values out of it.
33     It can be used as perl-only alternative to OpenIsis module.
34    
35     =head1 METHODS
36    
37     =cut
38    
39     # my $ORDN; # Nodes Order
40     # my $ORDF; # Leafs Order
41     # my $N; # Number of Memory buffers for nodes
42     # my $K; # Number of buffers for first level index
43     # my $LIV; # Current number of Index Levels
44     # my $POSRX; # Pointer to Root Record in N0x
45     # my $NMAXPOS; # Next Available position in N0x
46     # my $FMAXPOS; # Next available position in L0x
47     # my $ABNORMAL; # Formal BTree normality indicator
48    
49     #
50     # some binary reads
51     #
52    
53     sub Read32 {
54     my $self = shift;
55    
56     my $f = shift || die "Read32 needs file handle";
57     read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);
58     return unpack("l",$b);
59     }
60    
61     =head2 new
62    
63     Open CDS/ISIS database
64    
65     my $isis = new IsisDB(
66     isisdb => './cds/cds',
67     read_fdt => 1,
68     debug => 1,
69     );
70    
71     =item isisdb
72    
73     Prefix path to CDS/ISIS. It should contain full or relative path to database
74     and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
75    
76     =item read_fdt
77    
78     Boolean flag to specify if field definition table should be read. It's off
79     by default.
80    
81     =item debug
82    
83     Dump a C<lot> of debugging output.
84    
85     =cut
86    
87     sub new {
88     my $class = shift;
89     my $self = {};
90     bless($self, $class);
91    
92     $self->{isisdb} = {@_}->{isisdb} || croak "new needs database name as argument!";
93    
94     $self->{debug} = {@_}->{debug} || 1; # XXX remove debug always!
95    
96     # if you want to read .FDT file use read_fdt argument when creating class!
97     if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
98    
99     # read the $db.FDT file for tags
100     my $fieldzone=0;
101    
102     open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
103    
104     while (<fileFDT>) {
105     chomp;
106     if ($fieldzone) {
107     my $name=substr($_,0,30);
108     my $tag=substr($_,50,3);
109    
110     $name =~ s/\s+$//;
111     $tag =~ s/\s+$//;
112    
113     $self->{'TagName'}->{$tag}=$name;
114     }
115    
116     if (/^\*\*\*/) {
117     $fieldzone=1;
118     }
119     }
120    
121     close(fileFDT);
122     }
123    
124     # Get the Maximum MFN from $db.MST
125    
126     open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
127    
128     # MST format: (* = 32 bit signed)
129     # CTLMFN* always 0
130     # NXTMFN* MFN to be assigned to the next record created
131     # NXTMFB* last block allocated to master file
132     # NXTMFP offset to next available position in last block
133     # MFTYPE always 0 for user db file (1 for system)
134     seek(fileMST,4,0);
135     $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";
136    
137     close(fileMST);
138    
139     # Get the index information from $db.CNT
140    
141     open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
142    
143     # There is two 26 Bytes fixed lenght records
144    
145     # 0: IDTYPE BTree type 16
146     # 2: ORDN Nodes Order 16
147     # 4: ORDF Leafs Order 16
148     # 6: N Number of Memory buffers for nodes 16
149     # 8: K Number of buffers for first level index 16
150     # 10: LIV Current number of Index Levels 16
151     # 12: POSRX* Pointer to Root Record in N0x 32
152     # 16: NMAXPOS* Next Available position in N0x 32
153     # 20: FMAXPOS* Next available position in L0x 32
154     # 24: ABNORMAL Formal BTree normality indicator 16
155     # length: 26 bytes
156    
157     sub unpack_cnt {
158     my $self = shift;
159    
160     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
161    
162     my $buff = shift || return;
163     my @arr = unpack("ssssssllls", $buff);
164    
165     my $IDTYPE = shift @arr;
166     foreach (@flds) {
167     $self->{$IDTYPE}->{$_} = abs(shift @arr);
168     }
169     }
170    
171     my $buff;
172     read(fileCNT, $buff, 26);
173     $self->unpack_cnt($buff);
174    
175     read(fileCNT, $buff, 26);
176     $self->unpack_cnt($buff);
177    
178    
179     close(fileCNT);
180    
181     print Dumper($self) if ($self->{debug});
182    
183     $self ? return $self : return undef;
184     }
185    
186    
187     # Get a record from the MFN
188     # Return the number of fields in the record.
189     # Return -1 if the record is marked for deletion
190     # The record is then extracted with call to GETs
191    
192     sub GetMFN {
193     my $self = shift;
194    
195     my $mfn = shift || croak "GetMFN needs MFN as argument!";
196    
197     print "GetMFN: $mfn\n" if ($self->{debug});
198    
199     open(fileXRF, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
200    
201     # XXX check this?
202     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
203    
204     print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
205     seek(fileXRF,$mfnpos,0);
206    
207     # read XRFMFB abd XRFMFP
208     my $pointer=$self->Read32(\*fileXRF);
209    
210     my $XRFMFB = int($pointer/2048);
211     my $XRFMFP = $pointer - ($XRFMFB*2048);
212    
213     print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
214    
215     # XXX fix this to be more readable!!
216     # e.g. (XRFMFB - 1) * 512 + XRFMFP
217    
218     my $offset = $pointer;
219     my $offset2=int($offset/2048)-1;
220     my $offset22=int($offset/4096);
221     my $offset3=$offset-($offset22*4096);
222     if ($offset3>512) {
223     $offset3=$offset3-2048;
224     }
225     my $offset4=($offset2*512)+$offset3;
226    
227     print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
228    
229     close(fileXRF);
230    
231     # Get Record Information
232    
233     open(fileMST, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
234    
235     seek(fileMST,$offset4,0);
236    
237     my $value=$self->Read32(\*fileMST);
238    
239     if ($value!=$mfn) {
240     print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
241     return -1; # XXX deleted record?
242     }
243    
244     # $MFRL=$self->Read16($fileMST);
245     # $MFBWB=$self->Read32($fileMST);
246     # $MFBWP=$self->Read16($fileMST);
247     # $BASE=$self->Read16($fileMST);
248     # $NVF=$self->Read16($fileMST);
249     # $STATUS=$self->Read16($fileMST);
250    
251     my $buff;
252     read(fileMST, $buff, 14);
253    
254     my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
255    
256     print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
257    
258     # Get Directory Format
259    
260     my @FieldPOS;
261     my @FieldLEN;
262     my @FieldTAG;
263    
264     for (my $i = 0 ; $i < $NVF ; $i++) {
265    
266     # $TAG=$self->Read16($fileMST);
267     # $POS=$self->Read16($fileMST);
268     # $LEN=$self->Read16($fileMST);
269    
270     read(fileMST, $buff, 6);
271     my ($TAG,$POS,$LEN) = unpack("sss", $buff);
272    
273     print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
274    
275     # The TAG does not exists in .FDT so we set it to 0.
276     #
277     # XXX This is removed from perl version; .FDT file is updated manually, so
278     # you will often have fields in .MST file which aren't in .FDT. On the other
279     # hand, IsisMarc doesn't use .FDT files at all!
280    
281     #if (! $self->{TagName}->{$TAG}) {
282     # $TAG=0;
283     #}
284    
285     push @FieldTAG,$TAG;
286     push @FieldPOS,$POS;
287     push @FieldLEN,$LEN;
288     }
289    
290     # Get Variable Fields
291    
292     for (my $i = 0 ; $i < $NVF ; $i++) {
293     my $rec;
294     read(fileMST,$rec,$FieldLEN[$i]);
295     $self->{record}->{$FieldTAG[$i]} = $rec;
296     }
297     close(fileMST);
298    
299     # The record is marked for deletion
300     if ($STATUS==1) {
301     return -1;
302     }
303    
304     print Dumper($self) if ($self->{debug});
305    
306     return $NVF;
307     }
308    
309     =begin php
310    
311     # Load the dictionary from the $db.L0x files.
312     # Not usefull Yet
313    
314     sub LoadDictionary()
315     {
316     $fileL01=fopen($self->{isisdb}.".L01","r");
317     rewind($fileL01);
318    
319     do
320     {
321    
322     $POS=$self->Read32($fileL01);
323     $OCK=$self->Read16($fileL01);
324     $IT=$self->Read16($fileL01);
325     $PS=$self->Read32($fileL01);
326     print "<br>PS:".$PS." ".$self->{ORDF}->{1}." ";
327     for ($i=0;$i<$OCK;$i++)
328     {
329     $KEY=fread($fileL01,10);
330    
331     print $KEY." ### ";
332    
333     $INFO1=$self->Read32($fileL01);
334     $INFO2=$self->Read32($fileL01);
335    
336     #L01Key->{$key}=array($INFO1,$INFO2);
337     }
338    
339     rewind($fileL01);
340     $offset=($PS-1)*(12+$self->{ORDF}->{1}*18*2);
341     fseek($fileL01,$offset);
342    
343     } While (!feof($fileL01));
344    
345     fclose($fileL01);
346     }
347    
348     # self function search through the tree and returns an array of pointers to IFP
349     # The function must be recursive
350    
351     sub SearchTree($search,$fileNB,$PUNT)
352     {
353     $offset=(($PUNT-1)*(8+2*$self->{ORDN}->{1}*14));
354    
355     rewind($fileNB1);
356    
357     fseek($fileNB,$offset);
358    
359     $POS=$self->Read32($fileNB);
360     $OCK=$self->Read16($fileNB);
361     $IT=$self->Read16($fileNB);
362    
363     #print "<br>".$POS." - ".$OCK." - ".$IT;
364    
365     $OLDPUNT=$POS;
366     $j=0;
367     for ($i=0;$i<$OCK;$i++)
368     {
369     $KEY=fread($fileNB,10);
370    
371     $PUNT=$self->Read32($fileNB);
372    
373     #print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## ";
374    
375     If (strcmp($search,chop($KEY))<0)
376     {
377     break;
378     }
379     $OLDPUNT=$PUNT;
380     }
381     #print $OLDPUNT;
382     Return $OLDPUNT;
383     }
384    
385     # Search ISIS for record containing search
386     # Return a sorted array of MFN
387    
388     sub Search($search)
389     {
390    
391     $search=strtoupper($search);
392     #print "Searching....".$search." - ".$self->{POSRX}->{1}."<br>";
393     # first search .x01
394    
395    
396     # Search in .N01
397    
398    
399     $fileN01=fopen($self->{isisdb}.".N01","r");
400     $offset=(($self->{POSRX}->{1}-1)*(8+2*$self->{ORDN}->{1}*14));
401    
402     do
403     {
404     rewind($fileN01);
405    
406     fseek($fileN01,$offset);
407    
408     $POS=$self->Read32($fileN01);
409     $OCK=$self->Read16($fileN01);
410     $IT=$self->Read16($fileN01);
411    
412     #print "<br>".$POS." - ".$OCK." - ".$IT;
413    
414     $OLDPUNT=$POS;
415     for ($i=0;$i<$OCK;$i++)
416     {
417     $KEY=fread($fileN01,10);
418    
419     $PUNT=$self->Read32($fileN01);
420    
421     #print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## ";
422    
423     If (strcmp($search,chop($KEY))<0)
424     {
425     break;
426     }
427     $OLDPUNT=$PUNT;
428     }
429     $offset=(($OLDPUNT-1)*(8+2*$self->{ORDN}->{1}*14));
430     } while ($OLDPUNT>0);
431     #print $OLDPUNT;
432    
433    
434     fclose($fileN01);
435    
436     # Now look for records in .L01 file
437     $fileL01=fopen($self->{isisdb}.".L01","r");
438     rewind($fileL01);
439    
440     $offset=(-$OLDPUNT-1)*(12+$self->{ORDF}->{1}*18*2);
441     fseek($fileL01,$offset);
442    
443     $POS=$self->Read32($fileL01);
444     $OCK=$self->Read16($fileL01);
445     $IT=$self->Read16($fileL01);
446     $PS=$self->Read32($fileL01);
447     #print "<br>POS:".$POS." ".$self->{ORDF}->{1}." ";
448     for ($i=0;$i<$OCK;$i++)
449     {
450     $KEY=fread($fileL01,10);
451    
452     #print $KEY." ### ";
453    
454     $INFO1=$self->Read32($fileL01);
455     $INFO2=$self->Read32($fileL01);
456    
457     If (strcmp($search,chop($KEY))==0)
458     {
459     break;
460     }
461     }
462    
463     fclose($fileL01);
464    
465     #print $INFO1."--".$INFO2;
466    
467     # Now look in .IFP for the MFN
468     $fileIFP=fopen($self->{isisdb}.".IFP","r");
469     rewind($fileIFP);
470     $offset=($INFO1-1)*512+($INFO2*4);
471     fseek($fileIFP,$offset);
472    
473     $IFPBLK=$self->Read32($fileIFP);
474    
475     $IFPNXTB=$self->Read32($fileIFP);
476     $IFPNXTP=$self->Read32($fileIFP);
477     $IFPTOTP=$self->Read32($fileIFP);
478     $IFPSEGP=$self->Read32($fileIFP);
479     $IFPSEGC=$self->Read32($fileIFP);
480    
481    
482     #print "<br>IFP:".$IFPBLK." # ".$IFPNXTB." - ".$IFPNXTP." - ".$IFPTOTP." - ".$IFPSEGP." - ".$IFPSEGC;
483    
484     rewind($fileIFP);
485     $offset=($INFO1-1)*512+24+($INFO2*4);
486     fseek($fileIFP,$offset);
487    
488     $j=24+($INFO2*4);
489     $k=0;
490     $l=1;
491     $OLDPMFN="";
492     for ($i=0;$i<$IFPSEGP;$i++)
493     {
494     $B1=$self->Read8($fileIFP);
495     $B2=$self->Read8($fileIFP);
496     $B3=$self->Read8($fileIFP);
497     $B4=$self->Read8($fileIFP);
498     $B5=$self->Read8($fileIFP);
499     $B6=$self->Read8($fileIFP);
500     $B7=$self->Read8($fileIFP);
501     $B8=$self->Read8($fileIFP);
502    
503     $PMFN=$B1*65536+$B2*256+$B3;
504     $PTAG=$B4*256+$B5;
505     $POCC=$B6;
506     $PCNT=$B7*256+$B8;
507    
508     if ($OLDPMFN!=$PMFN)
509     {
510     if ($PMFN!=0)
511     {
512     $self->{MFNArray}->{$l}=$PMFN;
513     $OLDPMFN=$PMFN;
514     $l+=1;
515     }
516     }
517    
518     $j=$j+8;
519     #print "<br>".$PMFN."-".$PTAG." - ".$POCC." - ".$PCNT;
520     #print "@@".$j."@@@@";
521     if ($j>=504)
522     {
523     if ($IFPNXTB==0 && $IFPNXTP==0)
524     {
525     $k=$k+1;
526     rewind($fileIFP);
527     $offset=($INFO1-1+$k)*512;
528     fseek($fileIFP,$offset);
529     $B=$self->Read32($fileIFP);
530     #print "<br>-".$B."-<br>";
531     $j=0;
532     } else
533     {
534     rewind($fileIFP);
535     $offset=($IFPNXTB-1)*512;
536     fseek($fileIFP,$offset);
537    
538     $OLDIFPNXTB=$IFPNXTB;
539     $OLDIFPNXTP=$IFPNXTP;
540    
541     $IFPBLK=$self->Read32($fileIFP);
542    
543     $IFPNXTB=$self->Read32($fileIFP);
544     $IFPNXTP=$self->Read32($fileIFP);
545     $IFPTOTP=$self->Read32($fileIFP);
546     $IFPSEGP=$self->Read32($fileIFP);
547     $IFPSEGC=$self->Read32($fileIFP);
548    
549     rewind($fileIFP);
550     $offset=($OLDIFPNXTB-1)*512+24+($OLDIFPNXTP*4);
551     fseek($fileIFP,$offset);
552    
553     $j=24+($OLDIFPNXTP*4);
554     $k=0;
555     $j=0;
556     }
557     }
558    
559     }
560     fclose($fileIFP);
561     return $l-1;
562     }
563    
564     =cut
565    
566     #
567     # XXX porting from php left-over:
568     #
569     # do I *REALLY* need those methods, or should I use
570     # $self->{something} directly?
571     #
572     # Probably direct usage is better!
573     #
574    
575     sub GetFieldName {
576     my $self = shift;
577     return $self->{FieldName};
578     }
579    
580     sub GetTagName {
581     my $self = shift;
582     return $self->{TagName};
583     }
584    
585     sub GetFieldTag {
586     my $self = shift;
587     return $self->{FieldTAG};
588     }
589    
590     sub GetNextMFN {
591     my $self = shift;
592     return $self->{NXTMFN};
593     }
594    
595     sub GetMFNArray {
596     my $self = shift;
597     return $self->{MFNArray};
598     }
599     =begin php
600    
601     sub Read32($fileNB)
602     {
603     $B1=ord(fread($fileNB,1));
604     $B2=ord(fread($fileNB,1));
605     $B3=ord(fread($fileNB,1));
606     $B4=ord(fread($fileNB,1));
607    
608     if ($B4<=128)
609     {
610     $value=$B1+$B2*256+$B3*65536+$B4*16777216;
611     } else
612     {
613     $value=$self->Not8($B1)+$self->Not8($B2)*256+$self->Not8($B3)*65536+$self->Not8($B4)*16777216;
614     $value=-($value+1);
615     }
616     # print "(".$B1.",".$B2.",".$B3.",".$B4.":".$value.")";
617    
618     return $value;
619     }
620    
621     sub Read24($fileNB)
622     {
623     $B1=ord(fread($fileNB,1));
624     $B2=ord(fread($fileNB,1));
625     $B3=ord(fread($fileNB,1));
626    
627     $value=$B1+$B2*256+$B3*65536;
628    
629     # print "(".$B1.",".$B2.",".$B3.":".$value.")";
630    
631     return $value;
632     }
633    
634     sub Read16($fileNB)
635     {
636     $B1=ord(fread($fileNB,1));
637     $B2=ord(fread($fileNB,1));
638    
639     $value=$B1+$B2*256;
640     # print "(".$B1.",".$B2.":".$value.")";
641    
642     return $value;
643     }
644    
645     sub Read8($fileNB)
646     {
647     $B1=ord(fread($fileNB,1));
648    
649     $value=$B1;
650     # print "(".$value.")";
651    
652     return $value;
653     }
654    
655     sub Not8($value)
656     {
657     $value=decbin($value);
658     if (strlen($value)<8)
659     {
660     $buffer="";
661     for($i=0;$i<(8-strlen($value));$i++)
662     {
663     $buffer.="0";
664     }
665     $value=$buffer.$value;
666     }
667     $value=ereg_replace("0","3",$value);
668     $value=ereg_replace("1","0",$value);
669     $value=ereg_replace("3","1",$value);
670     $value=bindec($value);
671     return $value;
672     }
673     }
674    
675     =cut
676    
677     1;
678     __END__
679    
680     =head1 BUGS
681    
682     This module has been very lightly tested. Use with caution and report bugs.
683    
684     =head1 AUTHOR
685    
686     Dobrica Pavlinusic
687     CPAN ID: DPAVLIN
688     dpavlin@rot13.org
689     http://www.rot13.org/~dpavlin/
690    
691     This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
692     written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
693    
694     =head1 COPYRIGHT
695    
696     This program is free software; you can redistribute
697     it and/or modify it under the same terms as Perl itself.
698    
699     The full text of the license can be found in the
700     LICENSE file included with this module.
701    
702    
703     =head1 SEE ALSO
704    
705     L<http://www.openisis.org|OpenIsis>, perl(1).
706    

  ViewVC Help
Powered by ViewVC 1.1.26