/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/IsisDB.pm revision 32 by dpavlin, Wed Jan 5 15:46:26 2005 UTC trunk/lib/Biblio/Isis.pm revision 45 by dpavlin, Thu Jul 6 20:31:46 2006 UTC
# Line 1  Line 1 
1  package IsisDB;  package Biblio::Isis;
2  use strict;  use strict;
3    
4  use Carp;  use Carp;
5  use File::Glob qw(:globally :nocase);  use File::Glob qw(:globally :nocase);
6    
 use Data::Dumper;  
   
7  BEGIN {  BEGIN {
8          use Exporter ();          use Exporter ();
9          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10          $VERSION     = 0.09;          $VERSION     = 0.14;
11          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
12          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
13          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 20  BEGIN { Line 18  BEGIN {
18    
19  =head1 NAME  =head1 NAME
20    
21  IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database  Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
22    
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
25    use IsisDB;    use Biblio::Isis;
26    
27    my $isis = new IsisDB(    my $isis = new Biblio::Isis(
28          isisdb => './cds/cds',          isisdb => './cds/cds',
29    );    );
30    
# Line 81  rarely an issue). Line 79  rarely an issue).
79    
80  Open ISIS database  Open ISIS database
81    
82   my $isis = new IsisDB(   my $isis = new Biblio::Isis(
83          isisdb => './cds/cds',          isisdb => './cds/cds',
84          read_fdt => 1,          read_fdt => 1,
85          include_deleted => 1,          include_deleted => 1,
# Line 147  sub new { Line 145  sub new {
145          push @must_exist, "fdt" if ($self->{read_fdt});          push @must_exist, "fdt" if ($self->{read_fdt});
146    
147          foreach my $ext (@must_exist) {          foreach my $ext (@must_exist) {
148                  croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});                  unless ($self->{$ext."_file"}) {
149                            carp "missing ",uc($ext)," file in ",$self->{isisdb};
150                            return;
151                    }
152          }          }
153    
154          print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});          if ($self->{debug}) {
155                    print STDERR "## using files: ",join(" ",@isis_files),"\n";
156                    eval "use Data::Dump";
157    
158                    if (! $@) {
159                            *Dumper = *Data::Dump::dump;
160                    } else {
161                            use Data::Dumper;
162                    }
163            }
164    
165          # if you want to read .FDT file use read_fdt argument when creating class!          # if you want to read .FDT file use read_fdt argument when creating class!
166          if ($self->{read_fdt} && -e $self->{fdt_file}) {          if ($self->{read_fdt} && -e $self->{fdt_file}) {
# Line 158  sub new { Line 168  sub new {
168                  # read the $db.FDT file for tags                  # read the $db.FDT file for tags
169                  my $fieldzone=0;                  my $fieldzone=0;
170    
171                  open(fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";                  open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
172                    binmode($fileFDT);
173    
174                  while (<fileFDT>) {                  while (<$fileFDT>) {
175                          chomp;                          chomp;
176                          if ($fieldzone) {                          if ($fieldzone) {
177                                  my $name=substr($_,0,30);                                  my $name=substr($_,0,30);
# Line 177  sub new { Line 188  sub new {
188                          }                          }
189                  }                  }
190                                    
191                  close(fileFDT);                  close($fileFDT);
192          }          }
193    
194          # Get the Maximum MFN from $db.MST          # Get the Maximum MFN from $db.MST
195    
196          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
197            binmode($self->{'fileMST'});
198    
199          # MST format:   (* = 32 bit signed)          # MST format:   (* = 32 bit signed)
200          # CTLMFN*       always 0          # CTLMFN*       always 0
# Line 190  sub new { Line 202  sub new {
202          # NXTMFB*       last block allocated to master file          # NXTMFB*       last block allocated to master file
203          # NXTMFP        offset to next available position in last block          # NXTMFP        offset to next available position in last block
204          # MFTYPE        always 0 for user db file (1 for system)          # MFTYPE        always 0 for user db file (1 for system)
205          seek($self->{'fileMST'},4,0);          seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
206    
207          my $buff;          my $buff;
208    
209          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
210          $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";          $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
211    
212            print STDERR "## self ",Dumper($self),"\n" if ($self->{debug});
   
   
         print STDERR Dumper($self),"\n" if ($self->{debug});  
213    
214          # open files for later          # open files for later
215          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
216            binmode($self->{'fileXRF'});
217    
218          $self ? return $self : return undef;          $self ? return $self : return undef;
219  }  }
# Line 221  sub count { Line 231  sub count {
231          return $self->{'NXTMFN'} - 1;          return $self->{'NXTMFN'} - 1;
232  }  }
233    
 =head2 read_cnt  
   
 Read content of C<.CNT> file and return hash containing it.  
   
   print Dumper($isis->read_cnt);  
   
 This function is not used by module (C<.CNT> files are not required for this  
 module to work), but it can be useful to examine your index (while debugging  
 for example).  
   
 =cut  
   
 sub read_cnt  {  
         my $self = shift;  
   
         croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});  
   
         # Get the index information from $db.CNT  
     
         open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";  
   
         my $buff;  
   
         read(fileCNT, $buff, 26);  
         $self->unpack_cnt($buff);  
   
         read(fileCNT, $buff, 26);  
         $self->unpack_cnt($buff);  
   
         close(fileCNT);  
   
         return $self->{cnt};  
 }  
   
 =head2 unpack_cnt  
   
 Unpack one of two 26 bytes fixed length record in C<.CNT> file.  
   
 Here is definition of record:  
   
  off key        description                             size  
   0: IDTYPE     BTree type                              s  
   2: ORDN       Nodes Order                             s  
   4: ORDF       Leafs Order                             s  
   6: N          Number of Memory buffers for nodes      s  
   8: K          Number of buffers for first level index s  
  10: LIV        Current number of Index Levels          s  
  12: POSRX      Pointer to Root Record in N0x           l  
  16: NMAXPOS    Next Available position in N0x          l  
  20: FMAXPOS    Next available position in L0x          l  
  24: ABNORMAL   Formal BTree normality indicator        s  
  length: 26 bytes  
   
 This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.  
   
 =cut  
   
 sub unpack_cnt {  
         my $self = shift;  
   
         my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);  
   
         my $buff = shift || return;  
         my @arr = unpack("ssssssllls", $buff);  
   
         print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});  
   
         my $IDTYPE = shift @arr;  
         foreach (@flds) {  
                 $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);  
         }  
 }  
   
234  =head2 fetch  =head2 fetch
235    
236  Read record with selected MFN  Read record with selected MFN
# Line 334  sub fetch { Line 271  sub fetch {
271    
272          # read XRFMFB abd XRFMFP          # read XRFMFB abd XRFMFP
273          read($self->{'fileXRF'}, $buff, 4);          read($self->{'fileXRF'}, $buff, 4);
274          my $pointer=unpack("l",$buff) || carp "pointer is null";          my $pointer=unpack("V",$buff);
275            if (! $pointer) {
276                    if ($self->{include_deleted}) {
277                            return;
278                    } else {
279                            warn "pointer for MFN $mfn is null\n";
280                            return;
281                    }
282            }
283    
284          # check for logically deleted record          # check for logically deleted record
285          if ($pointer < 0) {          if ($pointer & 0x80000000) {
286                  print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});                  print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
287                  $self->{deleted} = $mfn;                  $self->{deleted} = $mfn;
288    
289                  return unless $self->{include_deleted};                  return unless $self->{include_deleted};
290    
291                  $pointer = abs($pointer);                  # abs
292                    $pointer = ($pointer ^ 0xffffffff) + 1;
293          }          }
294    
295          my $XRFMFB = int($pointer/2048);          my $XRFMFB = int($pointer/2048);
# Line 358  sub fetch { Line 304  sub fetch {
304    
305          # Get Record Information          # Get Record Information
306    
307          seek($self->{'fileMST'},$blk_off,0);          seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
308    
309          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
310          my $value=unpack("l",$buff);          my $value=unpack("V",$buff);
311    
312          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
313    
# Line 378  sub fetch { Line 324  sub fetch {
324    
325          read($self->{'fileMST'}, $buff, 14);          read($self->{'fileMST'}, $buff, 14);
326    
327          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
328    
329          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
330    
# Line 398  sub fetch { Line 344  sub fetch {
344    
345          for (my $i = 0 ; $i < $NVF ; $i++) {          for (my $i = 0 ; $i < $NVF ; $i++) {
346    
347                  my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));                  my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
348    
349                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
350    
# Line 462  sub to_ascii { Line 408  sub to_ascii {
408    
409          my $mfn = shift || croak "need MFN";          my $mfn = shift || croak "need MFN";
410    
411          my $rec = $self->fetch($mfn);          my $rec = $self->fetch($mfn) || return;
412    
413          my $out = "0\t$mfn";          my $out = "0\t$mfn";
414    
# Line 530  sub to_hash { Line 476  sub to_hash {
476          # init record to include MFN as field 000          # init record to include MFN as field 000
477          my $rec = { '000' => [ $mfn ] };          my $rec = { '000' => [ $mfn ] };
478    
479          my $row = $self->fetch($mfn);          my $row = $self->fetch($mfn) || return;
480    
481          foreach my $k (keys %{$row}) {          foreach my $k (keys %{$row}) {
482                  foreach my $l (@{$row->{$k}}) {                  foreach my $l (@{$row->{$k}}) {
483    
484                          # filter output                          # filter output
485                          $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});                          if ($self->{'hash_filter'}) {
486                                    $l = $self->{'hash_filter'}->($l);
487                                    next unless defined($l);
488                            }
489    
490                          my $val;                          my $val;
491    
# Line 574  sub tag_name { Line 523  sub tag_name {
523          return $self->{'TagName'}->{$tag} || $tag;          return $self->{'TagName'}->{$tag} || $tag;
524  }  }
525    
526    
527    =head2 read_cnt
528    
529    Read content of C<.CNT> file and return hash containing it.
530    
531      print Dumper($isis->read_cnt);
532    
533    This function is not used by module (C<.CNT> files are not required for this
534    module to work), but it can be useful to examine your index (while debugging
535    for example).
536    
537    =cut
538    
539    sub read_cnt  {
540            my $self = shift;
541    
542            croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
543    
544            # Get the index information from $db.CNT
545      
546            open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
547            binmode($fileCNT);
548    
549            my $buff;
550    
551            read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
552            $self->unpack_cnt($buff);
553    
554            read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
555            $self->unpack_cnt($buff);
556    
557            close($fileCNT);
558    
559            return $self->{cnt};
560    }
561    
562    =head2 unpack_cnt
563    
564    Unpack one of two 26 bytes fixed length record in C<.CNT> file.
565    
566    Here is definition of record:
567    
568     off key        description                             size
569      0: IDTYPE     BTree type                              s
570      2: ORDN       Nodes Order                             s
571      4: ORDF       Leafs Order                             s
572      6: N          Number of Memory buffers for nodes      s
573      8: K          Number of buffers for first level index s
574     10: LIV        Current number of Index Levels          s
575     12: POSRX      Pointer to Root Record in N0x           l
576     16: NMAXPOS    Next Available position in N0x          l
577     20: FMAXPOS    Next available position in L0x          l
578     24: ABNORMAL   Formal BTree normality indicator        s
579     length: 26 bytes
580    
581    This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
582    
583    =cut
584    
585    sub unpack_cnt {
586            my $self = shift;
587    
588            my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
589    
590            my $buff = shift || return;
591            my @arr = unpack("vvvvvvVVVv", $buff);
592    
593            print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
594    
595            my $IDTYPE = shift @arr;
596            foreach (@flds) {
597                    $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
598            }
599    }
600    
601  1;  1;
602    
603  =head1 BUGS  =head1 BUGS

Legend:
Removed from v.32  
changed lines
  Added in v.45

  ViewVC Help
Powered by ViewVC 1.1.26