/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/IsisDB.pm revision 27 by dpavlin, Sat Jan 1 22:29:35 2005 UTC trunk/lib/Biblio/Isis.pm revision 39 by dpavlin, Thu Jan 27 22:01:17 2005 UTC
# Line 1  Line 1 
1  package IsisDB;  package Biblio::Isis;
2  use strict;  use strict;
3    
4  use Carp;  use Carp;
# Line 9  use Data::Dumper; Line 9  use Data::Dumper;
9  BEGIN {  BEGIN {
10          use Exporter ();          use Exporter ();
11          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12          $VERSION     = 0.08;          $VERSION     = 0.12;
13          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
14          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
15          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 20  BEGIN { Line 20  BEGIN {
20    
21  =head1 NAME  =head1 NAME
22    
23  IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database  Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
24    
25  =head1 SYNOPSIS  =head1 SYNOPSIS
26    
27    use IsisDB;    use Biblio::Isis;
28    
29    my $isis = new IsisDB(    my $isis = new Biblio::Isis(
30          isisdb => './cds/cds',          isisdb => './cds/cds',
31    );    );
32    
33    for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {    for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
34          print $isis->to_ascii($mfn),"\n";          print $isis->to_ascii($mfn),"\n";
35    }    }
36    
# Line 81  rarely an issue). Line 81  rarely an issue).
81    
82  Open ISIS database  Open ISIS database
83    
84   my $isis = new IsisDB(   my $isis = new Biblio::Isis(
85          isisdb => './cds/cds',          isisdb => './cds/cds',
86          read_fdt => 1,          read_fdt => 1,
87          include_deleted => 1,          include_deleted => 1,
# Line 123  Dump a B<lot> of debugging output. Line 123  Dump a B<lot> of debugging output.
123    
124  =back  =back
125    
 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.  
   
126  =cut  =cut
127    
128  sub new {  sub new {
# Line 149  sub new { Line 147  sub new {
147          push @must_exist, "fdt" if ($self->{read_fdt});          push @must_exist, "fdt" if ($self->{read_fdt});
148    
149          foreach my $ext (@must_exist) {          foreach my $ext (@must_exist) {
150                  croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});                  unless ($self->{$ext."_file"}) {
151                            carp "missing ",uc($ext)," file in ",$self->{isisdb};
152                            return;
153                    }
154          }          }
155    
156          print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});          print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});
# Line 160  sub new { Line 161  sub new {
161                  # read the $db.FDT file for tags                  # read the $db.FDT file for tags
162                  my $fieldzone=0;                  my $fieldzone=0;
163    
164                  open(fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";                  open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
165                    binmode($fileFDT);
166    
167                  while (<fileFDT>) {                  while (<$fileFDT>) {
168                          chomp;                          chomp;
169                          if ($fieldzone) {                          if ($fieldzone) {
170                                  my $name=substr($_,0,30);                                  my $name=substr($_,0,30);
# Line 179  sub new { Line 181  sub new {
181                          }                          }
182                  }                  }
183                                    
184                  close(fileFDT);                  close($fileFDT);
185          }          }
186    
187          # Get the Maximum MFN from $db.MST          # Get the Maximum MFN from $db.MST
188    
189          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
190            binmode($self->{'fileMST'});
191    
192          # MST format:   (* = 32 bit signed)          # MST format:   (* = 32 bit signed)
193          # CTLMFN*       always 0          # CTLMFN*       always 0
# Line 192  sub new { Line 195  sub new {
195          # NXTMFB*       last block allocated to master file          # NXTMFB*       last block allocated to master file
196          # NXTMFP        offset to next available position in last block          # NXTMFP        offset to next available position in last block
197          # MFTYPE        always 0 for user db file (1 for system)          # MFTYPE        always 0 for user db file (1 for system)
198          seek($self->{'fileMST'},4,0);          seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
199    
200          my $buff;          my $buff;
201    
202          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
203          $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";          $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
   
         # save maximum MFN  
         $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;  
   
   
   
204    
205          print STDERR Dumper($self),"\n" if ($self->{debug});          print STDERR Dumper($self),"\n" if ($self->{debug});
206    
207          # open files for later          # open files for later
208          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
209            binmode($self->{'fileXRF'});
210    
211          $self ? return $self : return undef;          $self ? return $self : return undef;
212  }  }
213    
214  =head2 read_cnt  =head2 count
   
 Read content of C<.CNT> file and return hash containing it.  
215    
216    print Dumper($isis->read_cnt);  Return number of records in database
217    
218  This function is not used by module (C<.CNT> files are not required for this    print $isis->count;
 module to work), but it can be useful to examine your index (while debugging  
 for example).  
219    
220  =cut  =cut
221    
222  sub read_cnt  {  sub count {
223          my $self = shift;          my $self = shift;
224            return $self->{'NXTMFN'} - 1;
         croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});  
   
         # Get the index information from $db.CNT  
     
         open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";  
   
         # There is two 26 Bytes fixed lenght records  
   
         #  0: IDTYPE    BTree type                              16  
         #  2: ORDN      Nodes Order                             16  
         #  4: ORDF      Leafs Order                             16  
         #  6: N         Number of Memory buffers for nodes      16  
         #  8: K         Number of buffers for first level index 16  
         # 10: LIV       Current number of Index Levels          16  
         # 12: POSRX*    Pointer to Root Record in N0x           32  
         # 16: NMAXPOS*  Next Available position in N0x          32  
         # 20: FMAXPOS*  Next available position in L0x          32  
         # 24: ABNORMAL  Formal BTree normality indicator        16  
         # length: 26 bytes  
   
         sub unpack_cnt {  
                 my $self = shift;  
   
                 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);  
   
                 my $buff = shift || return;  
                 my @arr = unpack("ssssssllls", $buff);  
   
                 print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});  
   
                 my $IDTYPE = shift @arr;  
                 foreach (@flds) {  
                         $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);  
                 }  
         }  
   
         my $buff;  
   
         read(fileCNT, $buff, 26);  
         $self->unpack_cnt($buff);  
   
         read(fileCNT, $buff, 26);  
         $self->unpack_cnt($buff);  
   
         close(fileCNT);  
   
         return $self->{cnt};  
225  }  }
226    
227  =head2 fetch  =head2 fetch
# Line 317  sub fetch { Line 264  sub fetch {
264    
265          # read XRFMFB abd XRFMFP          # read XRFMFB abd XRFMFP
266          read($self->{'fileXRF'}, $buff, 4);          read($self->{'fileXRF'}, $buff, 4);
267          my $pointer=unpack("l",$buff) || carp "pointer is null";          my $pointer=unpack("V",$buff) || croak "pointer is null";
268    
269          # check for logically deleted record          # check for logically deleted record
270          if ($pointer < 0) {          if ($pointer & 0x80000000) {
271                  print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});                  print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
272                  $self->{deleted} = $mfn;                  $self->{deleted} = $mfn;
273    
274                  return unless $self->{include_deleted};                  return unless $self->{include_deleted};
275    
276                  $pointer = abs($pointer);                  # abs
277                    $pointer = ($pointer ^ 0xffffffff) + 1;
278          }          }
279    
280          my $XRFMFB = int($pointer/2048);          my $XRFMFB = int($pointer/2048);
# Line 341  sub fetch { Line 289  sub fetch {
289    
290          # Get Record Information          # Get Record Information
291    
292          seek($self->{'fileMST'},$blk_off,0);          seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
293    
294          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
295          my $value=unpack("l",$buff);          my $value=unpack("V",$buff);
296    
297          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
298    
# Line 361  sub fetch { Line 309  sub fetch {
309    
310          read($self->{'fileMST'}, $buff, 14);          read($self->{'fileMST'}, $buff, 14);
311    
312          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
313    
314          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
315    
# Line 381  sub fetch { Line 329  sub fetch {
329    
330          for (my $i = 0 ; $i < $NVF ; $i++) {          for (my $i = 0 ; $i < $NVF ; $i++) {
331    
332                  my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));                  my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
333    
334                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
335    
# Line 557  sub tag_name { Line 505  sub tag_name {
505          return $self->{'TagName'}->{$tag} || $tag;          return $self->{'TagName'}->{$tag} || $tag;
506  }  }
507    
508    
509    =head2 read_cnt
510    
511    Read content of C<.CNT> file and return hash containing it.
512    
513      print Dumper($isis->read_cnt);
514    
515    This function is not used by module (C<.CNT> files are not required for this
516    module to work), but it can be useful to examine your index (while debugging
517    for example).
518    
519    =cut
520    
521    sub read_cnt  {
522            my $self = shift;
523    
524            croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
525    
526            # Get the index information from $db.CNT
527      
528            open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
529            binmode($fileCNT);
530    
531            my $buff;
532    
533            read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
534            $self->unpack_cnt($buff);
535    
536            read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
537            $self->unpack_cnt($buff);
538    
539            close($fileCNT);
540    
541            return $self->{cnt};
542    }
543    
544    =head2 unpack_cnt
545    
546    Unpack one of two 26 bytes fixed length record in C<.CNT> file.
547    
548    Here is definition of record:
549    
550     off key        description                             size
551      0: IDTYPE     BTree type                              s
552      2: ORDN       Nodes Order                             s
553      4: ORDF       Leafs Order                             s
554      6: N          Number of Memory buffers for nodes      s
555      8: K          Number of buffers for first level index s
556     10: LIV        Current number of Index Levels          s
557     12: POSRX      Pointer to Root Record in N0x           l
558     16: NMAXPOS    Next Available position in N0x          l
559     20: FMAXPOS    Next available position in L0x          l
560     24: ABNORMAL   Formal BTree normality indicator        s
561     length: 26 bytes
562    
563    This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
564    
565    =cut
566    
567    sub unpack_cnt {
568            my $self = shift;
569    
570            my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
571    
572            my $buff = shift || return;
573            my @arr = unpack("vvvvvvVVVv", $buff);
574    
575            print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
576    
577            my $IDTYPE = shift @arr;
578            foreach (@flds) {
579                    $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
580            }
581    }
582    
583  1;  1;
584    
585  =head1 BUGS  =head1 BUGS

Legend:
Removed from v.27  
changed lines
  Added in v.39

  ViewVC Help
Powered by ViewVC 1.1.26