/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 7 by dpavlin, Wed Dec 29 15:10:34 2004 UTC revision 12 by dpavlin, Wed Dec 29 20:10:11 2004 UTC
# Line 7  use Data::Dumper; Line 7  use Data::Dumper;
7  BEGIN {  BEGIN {
8          use Exporter ();          use Exporter ();
9          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10          $VERSION     = 0.02;          $VERSION     = 0.04;
11          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
12          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
13          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 22  IsisDB - Read CDS/ISIS database Line 22  IsisDB - Read CDS/ISIS database
22    
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
25    use IsisDB    use IsisDB;
26    
27    my $isis = new IsisDB(    my $isis = new IsisDB(
28          isisdb => './cds/cds',          isisdb => './cds/cds',
29    );    );
30    
31      for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
32            print $isis->to_ascii($mfn),"\n";
33      }
34    
35  =head1 DESCRIPTION  =head1 DESCRIPTION
36    
37  This module will read CDS/ISIS databases and create hash values out of it.  This module will read CDS/ISIS databases and create hash values out of it.
38  It can be used as perl-only alternative to OpenIsis module.  It can be used as perl-only alternative to OpenIsis module.
39    
40    This will module will always be slower that OpenIsis module which use C
41    library. However, since it's written in perl, it's platform independent (so
42    you don't need C compiler), and can be easily modified.
43    
44    Unique feature of this module is ability to C<include_deleted> records.
45    It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
46    fields which are zero sized will be filled with random junk from memory).
47    
48  =head1 METHODS  =head1 METHODS
49    
50  =cut  =cut
# Line 50  It can be used as perl-only alternative Line 63  It can be used as perl-only alternative
63  # some binary reads  # some binary reads
64  #  #
65    
 sub Read32 {  
         my $self = shift;  
   
         my $f = shift || die "Read32 needs file handle";  
         read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);  
         return unpack("l",$b);  
 }  
   
66  =head2 new  =head2 new
67    
68  Open CDS/ISIS database  Open CDS/ISIS database
# Line 65  Open CDS/ISIS database Line 70  Open CDS/ISIS database
70   my $isis = new IsisDB(   my $isis = new IsisDB(
71          isisdb => './cds/cds',          isisdb => './cds/cds',
72          read_fdt => 1,          read_fdt => 1,
73            include_deleted => 1,
74            hash_filter => sub {
75                    my $v = shift;
76                    $v =~ s#foo#bar#g;
77            },
78          debug => 1,          debug => 1,
79   );   );
80    
# Line 82  and common prefix of C<.FDT>, C<.MST>, C Line 92  and common prefix of C<.FDT>, C<.MST>, C
92  Boolean flag to specify if field definition table should be read. It's off  Boolean flag to specify if field definition table should be read. It's off
93  by default.  by default.
94    
95    =item include_deleted
96    
97    Don't skip logically deleted records in ISIS.
98    
99    =item hash_filter
100    
101    Filter code ref which will be used before data is converted to hash.
102    
103  =item debug  =item debug
104    
105  Dump a C<lot> of debugging output.  Dump a B<lot> of debugging output.
106    
107  =back  =back
108    
# Line 97  sub new { Line 115  sub new {
115          my $self = {};          my $self = {};
116          bless($self, $class);          bless($self, $class);
117    
118          $self->{isisdb} = {@_}->{isisdb} || croak "new needs database name as argument!";          croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
119    
120          $self->{debug} = {@_}->{debug};          foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
121                    $self->{$v} = {@_}->{$v};
122            }
123    
124          # if you want to read .FDT file use read_fdt argument when creating class!          # if you want to read .FDT file use read_fdt argument when creating class!
125          if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {          if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
# Line 140  sub new { Line 160  sub new {
160          # NXTMFP        offset to next available position in last block          # NXTMFP        offset to next available position in last block
161          # MFTYPE        always 0 for user db file (1 for system)          # MFTYPE        always 0 for user db file (1 for system)
162          seek(fileMST,4,0);          seek(fileMST,4,0);
163          $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";  
164            my $buff;
165    
166            read(fileMST, $buff, 4);
167            $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";
168    
169          # save maximum MFN          # save maximum MFN
170          $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;          $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
# Line 173  sub new { Line 197  sub new {
197                  my $buff = shift || return;                  my $buff = shift || return;
198                  my @arr = unpack("ssssssllls", $buff);                  my @arr = unpack("ssssssllls", $buff);
199    
200                    print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
201    
202                  my $IDTYPE = shift @arr;                  my $IDTYPE = shift @arr;
203                  foreach (@flds) {                  foreach (@flds) {
204                          $self->{$IDTYPE}->{$_} = abs(shift @arr);                          $self->{$IDTYPE}->{$_} = abs(shift @arr);
205                  }                  }
206          }          }
207    
         my $buff;  
208          read(fileCNT, $buff, 26);          read(fileCNT, $buff, 26);
209          $self->unpack_cnt($buff);          $self->unpack_cnt($buff);
210    
# Line 206  Read record with selected MFN Line 231  Read record with selected MFN
231    my $rec = $isis->fetch(55);    my $rec = $isis->fetch(55);
232    
233  Returns hash with keys which are field names and values are unpacked values  Returns hash with keys which are field names and values are unpacked values
234  for that field.  for that field (like C<^asometing^bsomething else>)
235    
236  =cut  =cut
237    
# Line 223  sub fetch { Line 248  sub fetch {
248          print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});          print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
249          seek($self->{'fileXRF'},$mfnpos,0);          seek($self->{'fileXRF'},$mfnpos,0);
250    
251            my $buff;
252    
253          # read XRFMFB abd XRFMFP          # read XRFMFB abd XRFMFP
254          my $pointer=$self->Read32(\*{$self->{'fileXRF'}});          read($self->{'fileXRF'}, $buff, 4);
255            my $pointer=unpack("l",$buff) || carp "pointer is null";
256    
257          my $XRFMFB = int($pointer/2048);          my $XRFMFB = int($pointer/2048);
258          my $XRFMFP = $pointer - ($XRFMFB*2048);          my $XRFMFP = $pointer - ($XRFMFB*2048);
# Line 249  sub fetch { Line 277  sub fetch {
277    
278          seek($self->{'fileMST'},$offset4,0);          seek($self->{'fileMST'},$offset4,0);
279    
280          my $value=$self->Read32(\*{$self->{'fileMST'}});          read($self->{'fileMST'}, $buff, 4);
281            my $value=unpack("l",$buff);
282    
283          if ($value!=$mfn) {          if ($value!=$mfn) {
284  print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");      print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");    
# Line 263  print ("Error: The MFN:".$mfn." is not f Line 292  print ("Error: The MFN:".$mfn." is not f
292  #       $NVF=$self->Read16($fileMST);  #       $NVF=$self->Read16($fileMST);
293  #       $STATUS=$self->Read16($fileMST);  #       $STATUS=$self->Read16($fileMST);
294    
         my $buff;  
295          read($self->{'fileMST'}, $buff, 14);          read($self->{'fileMST'}, $buff, 14);
296    
297          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
298    
299          print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});          print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
300    
301            # delete old record
302            delete $self->{record};
303    
304            if (! $self->{'include_deleted'} && $MFRL < 0) {
305                    print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
306                    return;
307            }
308    
309          # Get Directory Format          # Get Directory Format
310    
311          my @FieldPOS;          my @FieldPOS;
312          my @FieldLEN;          my @FieldLEN;
313          my @FieldTAG;          my @FieldTAG;
314    
315            read($self->{'fileMST'}, $buff, 6 * $NVF);
316    
317            my $fld_len = 0;
318    
319          for (my $i = 0 ; $i < $NVF ; $i++) {          for (my $i = 0 ; $i < $NVF ; $i++) {
320    
321  #               $TAG=$self->Read16($fileMST);  #               $TAG=$self->Read16($fileMST);
322  #               $POS=$self->Read16($fileMST);  #               $POS=$self->Read16($fileMST);
323  #               $LEN=$self->Read16($fileMST);  #               $LEN=$self->Read16($fileMST);
324    
325                  read($self->{'fileMST'}, $buff, 6);                  my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
                 my ($TAG,$POS,$LEN) = unpack("sss", $buff);  
326    
327                  print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});                  print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
328    
# Line 300  print ("Error: The MFN:".$mfn." is not f Line 339  print ("Error: The MFN:".$mfn." is not f
339                  push @FieldTAG,$TAG;                  push @FieldTAG,$TAG;
340                  push @FieldPOS,$POS;                  push @FieldPOS,$POS;
341                  push @FieldLEN,$LEN;                  push @FieldLEN,$LEN;
342    
343                    $fld_len += $LEN;
344          }          }
345    
346          # Get Variable Fields          # Get Variable Fields
347    
348          delete $self->{record};          read($self->{'fileMST'},$buff,$fld_len);
349    
350          for (my $i = 0 ; $i < $NVF ; $i++) {          for (my $i = 0 ; $i < $NVF ; $i++) {
351                  my $rec;                  # skip zero-sized fields
352                  read($self->{'fileMST'},$rec,$FieldLEN[$i]);                  next if ($FieldLEN[$i] == 0);
                 push @{$self->{record}->{$FieldTAG[$i]}}, $rec;  
         }  
         close(fileMST);  
353    
354          # The record is marked for deletion                  push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
         if ($STATUS==1) {  
                 return -1;  
355          }          }
356            close(fileMST);
357    
358          print Dumper($self) if ($self->{debug});          print Dumper($self) if ($self->{debug});
359    
# Line 349  sub to_ascii { Line 386  sub to_ascii {
386          return $out;          return $out;
387  }  }
388    
389    =head2 to_hash
390    
391    Read mfn and convert it to hash
392    
393      my $hash = $isis->to_hash($mfn);
394    
395    It has ability to convert characters (using C<hash_filter> from ISIS
396    database before creating structures enabling character remapping or quick
397    fixup of data.
398    
399    This function returns hash which is like this:
400    
401      $hash = {
402        '210' => [
403                   {
404                     'c' => 'New York University press',
405                     'a' => 'New York',
406                     'd' => 'cop. 1988'
407                   }
408                 ],
409        '990' => [
410                   '2140',
411                   '88',
412                   'HAY'
413                 ],
414      };
415    
416    You can later use that has to produce any output from ISIS data.
417    
418    =cut
419    
420    sub to_hash {
421            my $self = shift;
422    
423            my $mfn = shift || confess "need mfn!";
424    
425            my $rec;
426            my $row = $self->fetch($mfn);
427    
428            foreach my $k (keys %{$row}) {
429                    foreach my $l (@{$row->{$k}}) {
430    
431                            # filter output
432                            $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
433    
434                            # has subfields?
435                            my $val;
436                            if ($l =~ m/\^/) {
437                                    foreach my $t (split(/\^/,$l)) {
438                                            next if (! $t);
439                                            $val->{substr($t,0,1)} = substr($t,1);
440                                    }
441                            } else {
442                                    $val = $l;
443                            }
444    
445                            push @{$rec->{$k}}, $val;
446                    }
447            }
448    
449            return $rec;
450    }
451    
452  #  #
453  # XXX porting from php left-over:  # XXX porting from php left-over:
454  #  #

Legend:
Removed from v.7  
changed lines
  Added in v.12

  ViewVC Help
Powered by ViewVC 1.1.26