/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/IsisDB.pm revision 33 by dpavlin, Wed Jan 5 21:23:04 2005 UTC trunk/lib/Biblio/Isis.pm revision 56 by dpavlin, Sat Jul 8 16:03:52 2006 UTC
# Line 1  Line 1 
1  package IsisDB;  package Biblio::Isis;
2  use strict;  use strict;
3    
4  use Carp;  use Carp;
5  use File::Glob qw(:globally :nocase);  use File::Glob qw(:globally :nocase);
6    
 use Data::Dumper;  
   
7  BEGIN {  BEGIN {
8          use Exporter ();          use Exporter ();
9          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10          $VERSION     = 0.09;          $VERSION     = 0.20;
11          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
12          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
13          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 20  BEGIN { Line 18  BEGIN {
18    
19  =head1 NAME  =head1 NAME
20    
21  IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database  Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
22    
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
25    use IsisDB;    use Biblio::Isis;
26    
27    my $isis = new IsisDB(    my $isis = new Biblio::Isis(
28          isisdb => './cds/cds',          isisdb => './cds/cds',
29    );    );
30    
# Line 81  rarely an issue). Line 79  rarely an issue).
79    
80  Open ISIS database  Open ISIS database
81    
82   my $isis = new IsisDB(   my $isis = new Biblio::Isis(
83          isisdb => './cds/cds',          isisdb => './cds/cds',
84          read_fdt => 1,          read_fdt => 1,
85          include_deleted => 1,          include_deleted => 1,
# Line 119  Filter code ref which will be used befor Line 117  Filter code ref which will be used befor
117    
118  =item debug  =item debug
119    
120  Dump a B<lot> of debugging output.  Dump a B<lot> of debugging output even at level 1. For even more increase level.
121    
122  =back  =back
123    
# Line 147  sub new { Line 145  sub new {
145          push @must_exist, "fdt" if ($self->{read_fdt});          push @must_exist, "fdt" if ($self->{read_fdt});
146    
147          foreach my $ext (@must_exist) {          foreach my $ext (@must_exist) {
148                  croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});                  unless ($self->{$ext."_file"}) {
149                            carp "missing ",uc($ext)," file in ",$self->{isisdb};
150                            return;
151                    }
152          }          }
153    
154          print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});          if ($self->{debug}) {
155                    print STDERR "## using files: ",join(" ",@isis_files),"\n";
156                    eval "use Data::Dump";
157    
158                    if (! $@) {
159                            *Dumper = *Data::Dump::dump;
160                    } else {
161                            use Data::Dumper;
162                    }
163            }
164    
165          # if you want to read .FDT file use read_fdt argument when creating class!          # if you want to read .FDT file use read_fdt argument when creating class!
166          if ($self->{read_fdt} && -e $self->{fdt_file}) {          if ($self->{read_fdt} && -e $self->{fdt_file}) {
# Line 192  sub new { Line 202  sub new {
202          # NXTMFB*       last block allocated to master file          # NXTMFB*       last block allocated to master file
203          # NXTMFP        offset to next available position in last block          # NXTMFP        offset to next available position in last block
204          # MFTYPE        always 0 for user db file (1 for system)          # MFTYPE        always 0 for user db file (1 for system)
205          seek($self->{'fileMST'},4,0) || carp "can't seek to offset 0 in MST: $!";          seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
206    
207          my $buff;          my $buff;
208    
209          read($self->{'fileMST'}, $buff, 4) || carp "can't read NXTMFN from MST: $!";          read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
210          $self->{'NXTMFN'}=unpack("V",$buff) || carp "NXTNFN is zero";          $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
211    
212          print STDERR Dumper($self),"\n" if ($self->{debug});          print STDERR "## self ",Dumper($self),"\n" if ($self->{debug});
213    
214          # open files for later          # open files for later
215          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
# Line 221  sub count { Line 231  sub count {
231          return $self->{'NXTMFN'} - 1;          return $self->{'NXTMFN'} - 1;
232  }  }
233    
 =head2 read_cnt  
   
 Read content of C<.CNT> file and return hash containing it.  
   
   print Dumper($isis->read_cnt);  
   
 This function is not used by module (C<.CNT> files are not required for this  
 module to work), but it can be useful to examine your index (while debugging  
 for example).  
   
 =cut  
   
 sub read_cnt  {  
         my $self = shift;  
   
         croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});  
   
         # Get the index information from $db.CNT  
     
         open(my $fileCNT, $self->{cnt_file}) || carp "can't read '$self->{cnt_file}': $!";  
         binmode($fileCNT);  
   
         my $buff;  
   
         read($fileCNT, $buff, 26) || carp "can't read first table from CNT: $!";  
         $self->unpack_cnt($buff);  
   
         read($fileCNT, $buff, 26) || carp "can't read second table from CNT: $!";  
         $self->unpack_cnt($buff);  
   
         close($fileCNT);  
   
         return $self->{cnt};  
 }  
   
 =head2 unpack_cnt  
   
 Unpack one of two 26 bytes fixed length record in C<.CNT> file.  
   
 Here is definition of record:  
   
  off key        description                             size  
   0: IDTYPE     BTree type                              s  
   2: ORDN       Nodes Order                             s  
   4: ORDF       Leafs Order                             s  
   6: N          Number of Memory buffers for nodes      s  
   8: K          Number of buffers for first level index s  
  10: LIV        Current number of Index Levels          s  
  12: POSRX      Pointer to Root Record in N0x           l  
  16: NMAXPOS    Next Available position in N0x          l  
  20: FMAXPOS    Next available position in L0x          l  
  24: ABNORMAL   Formal BTree normality indicator        s  
  length: 26 bytes  
   
 This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.  
   
 =cut  
   
 sub unpack_cnt {  
         my $self = shift;  
   
         my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);  
   
         my $buff = shift || return;  
         my @arr = unpack("vvvvvvVVVv", $buff);  
   
         print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});  
   
         my $IDTYPE = shift @arr;  
         foreach (@flds) {  
                 $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);  
         }  
 }  
   
234  =head2 fetch  =head2 fetch
235    
236  Read record with selected MFN  Read record with selected MFN
# Line 335  sub fetch { Line 271  sub fetch {
271    
272          # read XRFMFB abd XRFMFP          # read XRFMFB abd XRFMFP
273          read($self->{'fileXRF'}, $buff, 4);          read($self->{'fileXRF'}, $buff, 4);
274          my $pointer=unpack("V",$buff) || carp "pointer is null";          my $pointer=unpack("V",$buff);
275            if (! $pointer) {
276                    if ($self->{include_deleted}) {
277                            return;
278                    } else {
279                            warn "pointer for MFN $mfn is null\n";
280                            return;
281                    }
282            }
283    
284          # check for logically deleted record          # check for logically deleted record
285          if ($pointer & 0x80000000) {          if ($pointer & 0x80000000) {
# Line 441  sub fetch { Line 385  sub fetch {
385          return $self->{'record'};          return $self->{'record'};
386  }  }
387    
388    =head2 mfn
389    
390    Returns current MFN position
391    
392      my $mfn = $isis->mfn;
393    
394    =cut
395    
396    # This function should be simple return $self->{current_mfn},
397    # but if new is called with _hack_mfn it becomes setter.
398    # It's useful in tests when setting $isis->{record} directly
399    
400    sub mfn {
401            my $self = shift;
402            return $self->{current_mfn};
403    };
404    
405    
406  =head2 to_ascii  =head2 to_ascii
407    
408  Returns ASCII output of record with specified MFN  Returns ASCII output of record with specified MFN
# Line 464  sub to_ascii { Line 426  sub to_ascii {
426    
427          my $mfn = shift || croak "need MFN";          my $mfn = shift || croak "need MFN";
428    
429          my $rec = $self->fetch($mfn);          my $rec = $self->fetch($mfn) || return;
430    
431          my $out = "0\t$mfn";          my $out = "0\t$mfn";
432    
# Line 520  which will be used for identifiers, C<i1 Line 482  which will be used for identifiers, C<i1
482               }               }
483             ],             ],
484    
485    In case there are repeatable subfields in record, this will create
486    following structure:
487    
488      '900' => [ {
489            'a' => [ 'foo', 'bar', 'baz' ],
490      }]
491    
492  This method will also create additional field C<000> with MFN.  This method will also create additional field C<000> with MFN.
493    
494    There is also more elaborative way to call C<to_hash> like this:
495    
496      my $hash = $isis->to_hash({
497            mfn => 42,
498            include_empty_subfields => 1,
499      });
500    
501  =cut  =cut
502    
503  sub to_hash {  sub to_hash {
504          my $self = shift;          my $self = shift;
505    
506    
507          my $mfn = shift || confess "need mfn!";          my $mfn = shift || confess "need mfn!";
508            my $arg;
509    
510            if (ref($mfn) eq 'HASH') {
511                    $arg = $mfn;
512                    $mfn = $arg->{mfn} || confess "need mfn in arguments";
513            }
514    
515          # init record to include MFN as field 000          # init record to include MFN as field 000
516          my $rec = { '000' => [ $mfn ] };          my $rec = { '000' => [ $mfn ] };
517    
518          my $row = $self->fetch($mfn);          my $row = $self->fetch($mfn) || return;
519    
520          foreach my $k (keys %{$row}) {          foreach my $k (keys %{$row}) {
521                  foreach my $l (@{$row->{$k}}) {                  foreach my $l (@{$row->{$k}}) {
522    
523                          # filter output                          # filter output
524                          $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});                          if ($self->{'hash_filter'}) {
525                                    $l = $self->{'hash_filter'}->($l);
526                                    next unless defined($l);
527                            }
528    
529                          my $val;                          my $val;
530    
# Line 549  sub to_hash { Line 535  sub to_hash {
535                          if ($l =~ m/\^/) {                          if ($l =~ m/\^/) {
536                                  foreach my $t (split(/\^/,$l)) {                                  foreach my $t (split(/\^/,$l)) {
537                                          next if (! $t);                                          next if (! $t);
538                                          $val->{substr($t,0,1)} = substr($t,1);                                          my ($sf,$v) = (substr($t,0,1), substr($t,1));
539                                            # FIXME make this option !
540                                            next unless ($v);
541    #                                       warn "### $k^$sf:$v",$/ if ($self->{debug} > 1);
542    
543                                            # FIXME array return optional, by default unroll to ' ; '
544                                            if (ref( $val->{$sf} ) eq 'ARRAY') {
545    
546                                                    push @{ $val->{$sf} }, $v;
547                                            } elsif (defined( $val->{$sf} )) {
548                                                    # convert scalar field to array
549                                                    $val->{$sf} = [ $val->{$sf}, $v ];
550                                            } else {
551                                                    $val->{$sf} = $v;
552                                            }
553                                  }                                  }
554                          } else {                          } else {
555                                  $val = $l;                                  $val = $l;
# Line 576  sub tag_name { Line 576  sub tag_name {
576          return $self->{'TagName'}->{$tag} || $tag;          return $self->{'TagName'}->{$tag} || $tag;
577  }  }
578    
579    
580    =head2 read_cnt
581    
582    Read content of C<.CNT> file and return hash containing it.
583    
584      print Dumper($isis->read_cnt);
585    
586    This function is not used by module (C<.CNT> files are not required for this
587    module to work), but it can be useful to examine your index (while debugging
588    for example).
589    
590    =cut
591    
592    sub read_cnt  {
593            my $self = shift;
594    
595            croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
596    
597            # Get the index information from $db.CNT
598      
599            open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
600            binmode($fileCNT);
601    
602            my $buff;
603    
604            read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
605            $self->unpack_cnt($buff);
606    
607            read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
608            $self->unpack_cnt($buff);
609    
610            close($fileCNT);
611    
612            return $self->{cnt};
613    }
614    
615    =head2 unpack_cnt
616    
617    Unpack one of two 26 bytes fixed length record in C<.CNT> file.
618    
619    Here is definition of record:
620    
621     off key        description                             size
622      0: IDTYPE     BTree type                              s
623      2: ORDN       Nodes Order                             s
624      4: ORDF       Leafs Order                             s
625      6: N          Number of Memory buffers for nodes      s
626      8: K          Number of buffers for first level index s
627     10: LIV        Current number of Index Levels          s
628     12: POSRX      Pointer to Root Record in N0x           l
629     16: NMAXPOS    Next Available position in N0x          l
630     20: FMAXPOS    Next available position in L0x          l
631     24: ABNORMAL   Formal BTree normality indicator        s
632     length: 26 bytes
633    
634    This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
635    
636    =cut
637    
638    sub unpack_cnt {
639            my $self = shift;
640    
641            my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
642    
643            my $buff = shift || return;
644            my @arr = unpack("vvvvvvVVVv", $buff);
645    
646            print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
647    
648            my $IDTYPE = shift @arr;
649            foreach (@flds) {
650                    $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
651            }
652    }
653    
654  1;  1;
655    
656  =head1 BUGS  =head1 BUGS
# Line 593  module with databases from programs othe Line 668  module with databases from programs othe
668  tested this against ouput of one C<isis.dll>-based application, but I don't  tested this against ouput of one C<isis.dll>-based application, but I don't
669  know any details about it's version.  know any details about it's version.
670    
671    =head1 VERSIONS
672    
673    You can find version dependencies documented here
674    
675    =over 8
676    
677    =item 0.20
678    
679    Added C<< $isis->mfn >>, support for repeatable subfields and
680    C<< $isis->to_hash({ mfn => 42, ... }) >> calling convention
681    
682    =back
683    
684  =head1 AUTHOR  =head1 AUTHOR
685    
686          Dobrica Pavlinusic          Dobrica Pavlinusic

Legend:
Removed from v.33  
changed lines
  Added in v.56

  ViewVC Help
Powered by ViewVC 1.1.26