/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 65 - (hide annotations)
Thu Jul 13 13:34:30 2006 UTC (17 years, 8 months ago) by dpavlin
File size: 19030 byte(s)
documented that hash filter gets also a field number [0.22]
1 dpavlin 36 package Biblio::Isis;
2 dpavlin 1 use strict;
3    
4     use Carp;
5 dpavlin 18 use File::Glob qw(:globally :nocase);
6    
7 dpavlin 1 BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 dpavlin 65 $VERSION = 0.22;
11 dpavlin 1 @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21 dpavlin 36 Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
22 dpavlin 1
23     =head1 SYNOPSIS
24    
25 dpavlin 36 use Biblio::Isis;
26 dpavlin 11
27 dpavlin 36 my $isis = new Biblio::Isis(
28 dpavlin 1 isisdb => './cds/cds',
29     );
30    
31 dpavlin 32 for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
32 dpavlin 11 print $isis->to_ascii($mfn),"\n";
33     }
34    
35 dpavlin 1 =head1 DESCRIPTION
36    
37 dpavlin 15 This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
38 dpavlin 27 IsisMarc. It can be used as perl-only alternative to OpenIsis module which
39     seems to depriciate it's old C<XS> bindings for perl.
40 dpavlin 1
41 dpavlin 15 It can create hash values from data in ISIS database (using C<to_hash>),
42     ASCII dump (using C<to_ascii>) or just hash with field names and packed
43     values (like C<^asomething^belse>).
44 dpavlin 11
45     Unique feature of this module is ability to C<include_deleted> records.
46     It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
47     fields which are zero sized will be filled with random junk from memory).
48    
49 dpavlin 15 It also has support for identifiers (only if ISIS database is created by
50     IsisMarc), see C<to_hash>.
51    
52 dpavlin 27 This module will always be slower than OpenIsis module which use C
53 dpavlin 15 library. However, since it's written in perl, it's platform independent (so
54     you don't need C compiler), and can be easily modified. I hope that it
55     creates data structures which are easier to use than ones created by
56     OpenIsis, so reduced time in other parts of the code should compensate for
57     slower performance of this module (speed of reading ISIS database is
58     rarely an issue).
59    
60 dpavlin 1 =head1 METHODS
61    
62     =cut
63    
64     # my $ORDN; # Nodes Order
65     # my $ORDF; # Leafs Order
66     # my $N; # Number of Memory buffers for nodes
67     # my $K; # Number of buffers for first level index
68     # my $LIV; # Current number of Index Levels
69     # my $POSRX; # Pointer to Root Record in N0x
70     # my $NMAXPOS; # Next Available position in N0x
71     # my $FMAXPOS; # Next available position in L0x
72     # my $ABNORMAL; # Formal BTree normality indicator
73    
74     #
75     # some binary reads
76     #
77    
78     =head2 new
79    
80 dpavlin 15 Open ISIS database
81 dpavlin 1
82 dpavlin 36 my $isis = new Biblio::Isis(
83 dpavlin 1 isisdb => './cds/cds',
84     read_fdt => 1,
85 dpavlin 12 include_deleted => 1,
86     hash_filter => sub {
87 dpavlin 64 my ($v,$field_number) = @_;
88 dpavlin 12 $v =~ s#foo#bar#g;
89     },
90 dpavlin 1 debug => 1,
91 dpavlin 57 join_subfields_with => ' ; ',
92 dpavlin 1 );
93    
94 dpavlin 2 Options are described below:
95    
96     =over 5
97    
98 dpavlin 1 =item isisdb
99    
100 dpavlin 15 This is full or relative path to ISIS database files which include
101 dpavlin 18 common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
102     C<read_fdt> option) files.
103 dpavlin 1
104 dpavlin 15 In this example it uses C<./cds/cds.MST> and related files.
105    
106 dpavlin 1 =item read_fdt
107    
108     Boolean flag to specify if field definition table should be read. It's off
109     by default.
110    
111 dpavlin 9 =item include_deleted
112    
113 dpavlin 11 Don't skip logically deleted records in ISIS.
114 dpavlin 9
115 dpavlin 12 =item hash_filter
116    
117 dpavlin 65 Filter code ref which will be used before data is converted to hash. It will
118     receive two arguments, whole line from current field (in C<< $_[0] >>) and
119     field number (in C<< $_[1] >>).
120 dpavlin 12
121     =item debug
122    
123 dpavlin 54 Dump a B<lot> of debugging output even at level 1. For even more increase level.
124 dpavlin 12
125 dpavlin 57 =item join_subfields_with
126    
127     Define delimiter which will be used to join repeatable subfields. This
128     option is included to support lagacy application written against version
129     older than 0.21 of this module. By default, it disabled. See L</to_hash>.
130    
131 dpavlin 2 =back
132    
133 dpavlin 1 =cut
134    
135     sub new {
136     my $class = shift;
137     my $self = {};
138     bless($self, $class);
139    
140 dpavlin 9 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
141 dpavlin 1
142 dpavlin 12 foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
143 dpavlin 9 $self->{$v} = {@_}->{$v};
144     }
145 dpavlin 1
146 dpavlin 18 my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));
147    
148     foreach my $f (@isis_files) {
149     my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
150     $self->{lc($ext)."_file"} = $f;
151     }
152    
153     my @must_exist = qw(mst xrf);
154     push @must_exist, "fdt" if ($self->{read_fdt});
155    
156     foreach my $ext (@must_exist) {
157 dpavlin 39 unless ($self->{$ext."_file"}) {
158     carp "missing ",uc($ext)," file in ",$self->{isisdb};
159     return;
160     }
161 dpavlin 18 }
162    
163 dpavlin 45 if ($self->{debug}) {
164     print STDERR "## using files: ",join(" ",@isis_files),"\n";
165     eval "use Data::Dump";
166 dpavlin 18
167 dpavlin 45 if (! $@) {
168     *Dumper = *Data::Dump::dump;
169     } else {
170     use Data::Dumper;
171     }
172     }
173    
174 dpavlin 1 # if you want to read .FDT file use read_fdt argument when creating class!
175 dpavlin 18 if ($self->{read_fdt} && -e $self->{fdt_file}) {
176 dpavlin 1
177     # read the $db.FDT file for tags
178     my $fieldzone=0;
179    
180 dpavlin 33 open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
181     binmode($fileFDT);
182 dpavlin 1
183 dpavlin 33 while (<$fileFDT>) {
184 dpavlin 1 chomp;
185     if ($fieldzone) {
186     my $name=substr($_,0,30);
187     my $tag=substr($_,50,3);
188    
189     $name =~ s/\s+$//;
190     $tag =~ s/\s+$//;
191    
192     $self->{'TagName'}->{$tag}=$name;
193     }
194    
195     if (/^\*\*\*/) {
196     $fieldzone=1;
197     }
198     }
199    
200 dpavlin 33 close($fileFDT);
201 dpavlin 1 }
202    
203     # Get the Maximum MFN from $db.MST
204    
205 dpavlin 18 open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
206 dpavlin 33 binmode($self->{'fileMST'});
207 dpavlin 1
208     # MST format: (* = 32 bit signed)
209     # CTLMFN* always 0
210     # NXTMFN* MFN to be assigned to the next record created
211     # NXTMFB* last block allocated to master file
212     # NXTMFP offset to next available position in last block
213     # MFTYPE always 0 for user db file (1 for system)
214 dpavlin 34 seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
215 dpavlin 1
216 dpavlin 11 my $buff;
217    
218 dpavlin 34 read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
219     $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
220 dpavlin 11
221 dpavlin 45 print STDERR "## self ",Dumper($self),"\n" if ($self->{debug});
222 dpavlin 18
223     # open files for later
224     open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
225 dpavlin 33 binmode($self->{'fileXRF'});
226 dpavlin 18
227     $self ? return $self : return undef;
228     }
229    
230 dpavlin 32 =head2 count
231    
232     Return number of records in database
233    
234     print $isis->count;
235    
236     =cut
237    
238     sub count {
239     my $self = shift;
240     return $self->{'NXTMFN'} - 1;
241     }
242    
243 dpavlin 7 =head2 fetch
244 dpavlin 1
245 dpavlin 2 Read record with selected MFN
246 dpavlin 1
247 dpavlin 7 my $rec = $isis->fetch(55);
248 dpavlin 2
249     Returns hash with keys which are field names and values are unpacked values
250 dpavlin 15 for that field like this:
251 dpavlin 2
252 dpavlin 15 $rec = {
253     '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
254     '990' => [ '2140', '88', 'HAY' ],
255     };
256    
257 dpavlin 2 =cut
258    
259 dpavlin 7 sub fetch {
260 dpavlin 1 my $self = shift;
261    
262 dpavlin 7 my $mfn = shift || croak "fetch needs MFN as argument!";
263 dpavlin 1
264 dpavlin 16 # is mfn allready in memory?
265     my $old_mfn = $self->{'current_mfn'} || -1;
266 dpavlin 25 return $self->{record} if ($mfn == $old_mfn);
267 dpavlin 1
268 dpavlin 16 print STDERR "## fetch: $mfn\n" if ($self->{debug});
269    
270 dpavlin 1 # XXX check this?
271     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
272    
273 dpavlin 18 print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
274 dpavlin 7 seek($self->{'fileXRF'},$mfnpos,0);
275 dpavlin 1
276 dpavlin 11 my $buff;
277    
278 dpavlin 25 # delete old record
279     delete $self->{record};
280    
281 dpavlin 1 # read XRFMFB abd XRFMFP
282 dpavlin 11 read($self->{'fileXRF'}, $buff, 4);
283 dpavlin 41 my $pointer=unpack("V",$buff);
284     if (! $pointer) {
285     if ($self->{include_deleted}) {
286     return;
287     } else {
288     warn "pointer for MFN $mfn is null\n";
289     return;
290     }
291     }
292 dpavlin 1
293 dpavlin 25 # check for logically deleted record
294 dpavlin 33 if ($pointer & 0x80000000) {
295 dpavlin 25 print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
296     $self->{deleted} = $mfn;
297    
298     return unless $self->{include_deleted};
299    
300 dpavlin 33 # abs
301     $pointer = ($pointer ^ 0xffffffff) + 1;
302 dpavlin 25 }
303    
304 dpavlin 1 my $XRFMFB = int($pointer/2048);
305     my $XRFMFP = $pointer - ($XRFMFB*2048);
306    
307 dpavlin 16 # (XRFMFB - 1) * 512 + XRFMFP
308     # why do i have to do XRFMFP % 1024 ?
309 dpavlin 1
310 dpavlin 26 my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
311 dpavlin 1
312 dpavlin 16 print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
313 dpavlin 1
314     # Get Record Information
315    
316 dpavlin 33 seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
317 dpavlin 1
318 dpavlin 33 read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
319     my $value=unpack("V",$buff);
320 dpavlin 1
321 dpavlin 16 print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
322    
323 dpavlin 1 if ($value!=$mfn) {
324 dpavlin 26 if ($value == 0) {
325     print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
326     $self->{deleted} = $mfn;
327     return;
328     }
329    
330     carp "Error: MFN ".$mfn." not found in MST file, found $value";
331     return;
332 dpavlin 1 }
333    
334 dpavlin 7 read($self->{'fileMST'}, $buff, 14);
335 dpavlin 1
336 dpavlin 33 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
337 dpavlin 1
338 dpavlin 16 print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
339 dpavlin 1
340 dpavlin 25 warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
341 dpavlin 9
342 dpavlin 16 warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
343    
344 dpavlin 1 # Get Directory Format
345    
346     my @FieldPOS;
347     my @FieldLEN;
348     my @FieldTAG;
349    
350 dpavlin 8 read($self->{'fileMST'}, $buff, 6 * $NVF);
351    
352 dpavlin 16 my $rec_len = 0;
353 dpavlin 8
354 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
355    
356 dpavlin 33 my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
357 dpavlin 1
358 dpavlin 16 print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
359 dpavlin 1
360     # The TAG does not exists in .FDT so we set it to 0.
361     #
362     # XXX This is removed from perl version; .FDT file is updated manually, so
363     # you will often have fields in .MST file which aren't in .FDT. On the other
364     # hand, IsisMarc doesn't use .FDT files at all!
365    
366     #if (! $self->{TagName}->{$TAG}) {
367     # $TAG=0;
368     #}
369    
370     push @FieldTAG,$TAG;
371     push @FieldPOS,$POS;
372     push @FieldLEN,$LEN;
373 dpavlin 8
374 dpavlin 16 $rec_len += $LEN;
375 dpavlin 1 }
376    
377     # Get Variable Fields
378    
379 dpavlin 16 read($self->{'fileMST'},$buff,$rec_len);
380 dpavlin 8
381 dpavlin 16 print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
382    
383 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
384 dpavlin 10 # skip zero-sized fields
385     next if ($FieldLEN[$i] == 0);
386    
387 dpavlin 8 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
388 dpavlin 1 }
389    
390 dpavlin 16 $self->{'current_mfn'} = $mfn;
391    
392 dpavlin 25 print STDERR Dumper($self),"\n" if ($self->{debug});
393 dpavlin 1
394 dpavlin 2 return $self->{'record'};
395 dpavlin 1 }
396    
397 dpavlin 54 =head2 mfn
398    
399     Returns current MFN position
400    
401     my $mfn = $isis->mfn;
402    
403     =cut
404    
405     # This function should be simple return $self->{current_mfn},
406     # but if new is called with _hack_mfn it becomes setter.
407     # It's useful in tests when setting $isis->{record} directly
408    
409     sub mfn {
410     my $self = shift;
411     return $self->{current_mfn};
412     };
413    
414    
415 dpavlin 2 =head2 to_ascii
416    
417 dpavlin 27 Returns ASCII output of record with specified MFN
418 dpavlin 2
419 dpavlin 15 print $isis->to_ascii(42);
420 dpavlin 2
421 dpavlin 27 This outputs something like this:
422 dpavlin 15
423     210 ^aNew York^cNew York University press^dcop. 1988
424     990 2140
425     990 88
426     990 HAY
427    
428     If C<read_fdt> is specified when calling C<new> it will display field names
429     from C<.FDT> file instead of numeric tags.
430    
431 dpavlin 2 =cut
432    
433     sub to_ascii {
434     my $self = shift;
435    
436     my $mfn = shift || croak "need MFN";
437    
438 dpavlin 41 my $rec = $self->fetch($mfn) || return;
439 dpavlin 2
440     my $out = "0\t$mfn";
441    
442     foreach my $f (sort keys %{$rec}) {
443 dpavlin 15 my $fn = $self->tag_name($f);
444     $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
445 dpavlin 2 }
446    
447     $out .= "\n";
448    
449     return $out;
450     }
451    
452 dpavlin 12 =head2 to_hash
453    
454 dpavlin 15 Read record with specified MFN and convert it to hash
455 dpavlin 12
456     my $hash = $isis->to_hash($mfn);
457    
458 dpavlin 27 It has ability to convert characters (using C<hash_filter>) from ISIS
459 dpavlin 15 database before creating structures enabling character re-mapping or quick
460     fix-up of data.
461 dpavlin 12
462     This function returns hash which is like this:
463    
464     $hash = {
465     '210' => [
466     {
467     'c' => 'New York University press',
468     'a' => 'New York',
469     'd' => 'cop. 1988'
470     }
471     ],
472     '990' => [
473     '2140',
474     '88',
475     'HAY'
476     ],
477     };
478    
479 dpavlin 15 You can later use that hash to produce any output from ISIS data.
480 dpavlin 12
481 dpavlin 15 If database is created using IsisMarc, it will also have to special fields
482     which will be used for identifiers, C<i1> and C<i2> like this:
483    
484     '200' => [
485     {
486     'i1' => '1',
487     'i2' => ' '
488     'a' => 'Goa',
489     'f' => 'Valdo D\'Arienzo',
490     'e' => 'tipografie e tipografi nel XVI secolo',
491     }
492     ],
493    
494 dpavlin 50 In case there are repeatable subfields in record, this will create
495     following structure:
496    
497     '900' => [ {
498     'a' => [ 'foo', 'bar', 'baz' ],
499     }]
500    
501 dpavlin 57 Or in more complex example of
502    
503     902 ^aa1^aa2^aa3^bb1^aa4^bb2^cc1^aa5
504    
505     it will create
506    
507     902 => [
508     { a => ["a1", "a2", "a3", "a4", "a5"], b => ["b1", "b2"], c => "c1" },
509     ],
510    
511     This behaviour can be changed using C<join_subfields_with> option to L</new>,
512     in which case C<to_hash> will always create single value for each subfield.
513     This will change result to:
514    
515    
516    
517 dpavlin 15 This method will also create additional field C<000> with MFN.
518    
519 dpavlin 56 There is also more elaborative way to call C<to_hash> like this:
520    
521     my $hash = $isis->to_hash({
522     mfn => 42,
523 dpavlin 57 include_subfields => 1,
524 dpavlin 56 });
525    
526 dpavlin 57 Each option controll creation of hash:
527    
528     =over 4
529    
530     =item mfn
531    
532     Specify MFN number of record
533    
534     =item include_subfields
535    
536     This option will create additional key in hash called C<subfields> which will
537     have original record subfield order and index to that subfield like this:
538    
539     902 => [ {
540     a => ["a1", "a2", "a3", "a4", "a5"],
541     b => ["b1", "b2"],
542     c => "c1",
543     subfields => ["a", 0, "a", 1, "a", 2, "b", 0, "a", 3, "b", 1, "c", 0, "a", 4],
544     } ],
545    
546     =item join_subfields_with
547    
548     Define delimiter which will be used to join repeatable subfields. You can
549 dpavlin 58 specify option here instead in L</new> if you want to have per-record control.
550 dpavlin 57
551     =back
552    
553 dpavlin 12 =cut
554    
555     sub to_hash {
556     my $self = shift;
557    
558 dpavlin 56
559 dpavlin 12 my $mfn = shift || confess "need mfn!";
560 dpavlin 56 my $arg;
561 dpavlin 12
562 dpavlin 56 if (ref($mfn) eq 'HASH') {
563     $arg = $mfn;
564     $mfn = $arg->{mfn} || confess "need mfn in arguments";
565     }
566    
567 dpavlin 15 # init record to include MFN as field 000
568 dpavlin 16 my $rec = { '000' => [ $mfn ] };
569 dpavlin 15
570 dpavlin 41 my $row = $self->fetch($mfn) || return;
571 dpavlin 12
572 dpavlin 58 my $j_rs = $arg->{join_subfields_with};
573     $j_rs = $self->{join_subfields_with} unless(defined($j_rs));
574 dpavlin 57 my $i_sf = $arg->{include_subfields};
575 dpavlin 12
576 dpavlin 57 foreach my $f_nr (keys %{$row}) {
577     foreach my $l (@{$row->{$f_nr}}) {
578    
579 dpavlin 12 # filter output
580 dpavlin 44 if ($self->{'hash_filter'}) {
581 dpavlin 64 $l = $self->{'hash_filter'}->($l, $f_nr);
582 dpavlin 44 next unless defined($l);
583     }
584 dpavlin 12
585 dpavlin 15 my $val;
586 dpavlin 57 my $r_sf; # repeatable subfields in this record
587 dpavlin 15
588     # has identifiers?
589 dpavlin 23 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
590 dpavlin 15
591 dpavlin 12 # has subfields?
592     if ($l =~ m/\^/) {
593     foreach my $t (split(/\^/,$l)) {
594     next if (! $t);
595 dpavlin 50 my ($sf,$v) = (substr($t,0,1), substr($t,1));
596 dpavlin 57 # XXX this might be option, but why?
597 dpavlin 54 next unless ($v);
598 dpavlin 57 # warn "### $f_nr^$sf:$v",$/ if ($self->{debug} > 1);
599 dpavlin 54
600 dpavlin 50 if (ref( $val->{$sf} ) eq 'ARRAY') {
601 dpavlin 54
602 dpavlin 50 push @{ $val->{$sf} }, $v;
603 dpavlin 57
604     # record repeatable subfield it it's offset
605     push @{ $val->{subfields} }, ( $sf, $#{ $val->{$sf} } ) if (! $j_rs && $i_sf);
606     $r_sf->{$sf}++;
607    
608 dpavlin 50 } elsif (defined( $val->{$sf} )) {
609 dpavlin 57
610 dpavlin 50 # convert scalar field to array
611     $val->{$sf} = [ $val->{$sf}, $v ];
612 dpavlin 57
613     push @{ $val->{subfields} }, ( $sf, 1 ) if (! $j_rs && $i_sf);
614     $r_sf->{$sf}++;
615    
616 dpavlin 50 } else {
617     $val->{$sf} = $v;
618 dpavlin 57 push @{ $val->{subfields} }, ( $sf, 0 ) if ($i_sf);
619 dpavlin 50 }
620 dpavlin 12 }
621     } else {
622     $val = $l;
623     }
624    
625 dpavlin 57 if ($j_rs) {
626     map {
627     $val->{$_} = join($j_rs, @{ $val->{$_} });
628     } keys %$r_sf
629     }
630    
631     push @{$rec->{$f_nr}}, $val;
632 dpavlin 12 }
633     }
634    
635     return $rec;
636     }
637    
638 dpavlin 15 =head2 tag_name
639 dpavlin 1
640 dpavlin 15 Return name of selected tag
641 dpavlin 1
642 dpavlin 15 print $isis->tag_name('200');
643    
644     =cut
645    
646     sub tag_name {
647 dpavlin 1 my $self = shift;
648 dpavlin 15 my $tag = shift || return;
649     return $self->{'TagName'}->{$tag} || $tag;
650 dpavlin 1 }
651    
652 dpavlin 35
653     =head2 read_cnt
654    
655     Read content of C<.CNT> file and return hash containing it.
656    
657     print Dumper($isis->read_cnt);
658    
659     This function is not used by module (C<.CNT> files are not required for this
660     module to work), but it can be useful to examine your index (while debugging
661     for example).
662    
663     =cut
664    
665     sub read_cnt {
666     my $self = shift;
667    
668     croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
669    
670     # Get the index information from $db.CNT
671    
672     open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
673     binmode($fileCNT);
674    
675     my $buff;
676    
677     read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
678     $self->unpack_cnt($buff);
679    
680     read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
681     $self->unpack_cnt($buff);
682    
683     close($fileCNT);
684    
685     return $self->{cnt};
686     }
687    
688     =head2 unpack_cnt
689    
690     Unpack one of two 26 bytes fixed length record in C<.CNT> file.
691    
692     Here is definition of record:
693    
694     off key description size
695     0: IDTYPE BTree type s
696     2: ORDN Nodes Order s
697     4: ORDF Leafs Order s
698     6: N Number of Memory buffers for nodes s
699     8: K Number of buffers for first level index s
700     10: LIV Current number of Index Levels s
701     12: POSRX Pointer to Root Record in N0x l
702     16: NMAXPOS Next Available position in N0x l
703     20: FMAXPOS Next available position in L0x l
704     24: ABNORMAL Formal BTree normality indicator s
705     length: 26 bytes
706    
707     This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
708    
709     =cut
710    
711     sub unpack_cnt {
712     my $self = shift;
713    
714     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
715    
716     my $buff = shift || return;
717     my @arr = unpack("vvvvvvVVVv", $buff);
718    
719     print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
720    
721     my $IDTYPE = shift @arr;
722     foreach (@flds) {
723     $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
724     }
725     }
726    
727 dpavlin 1 1;
728    
729     =head1 BUGS
730    
731 dpavlin 27 Some parts of CDS/ISIS documentation are not detailed enough to exmplain
732     some variations in input databases which has been tested with this module.
733     When I was in doubt, I assumed that OpenIsis's implementation was right
734     (except for obvious bugs).
735 dpavlin 1
736 dpavlin 27 However, every effort has been made to test this module with as much
737     databases (and programs that create them) as possible.
738    
739     I would be very greatful for success or failure reports about usage of this
740     module with databases from programs other than WinIsis and IsisMarc. I had
741     tested this against ouput of one C<isis.dll>-based application, but I don't
742     know any details about it's version.
743    
744 dpavlin 54 =head1 VERSIONS
745    
746 dpavlin 57 As this is young module, new features are added in subsequent version. It's
747     a good idea to specify version when using this module like this:
748 dpavlin 54
749 dpavlin 57 use Biblio::Isis 0.21
750    
751     Below is list of changes in specific version of module (so you can target
752     older versions if you really have to):
753    
754 dpavlin 54 =over 8
755    
756 dpavlin 65 =item 0.22
757    
758     Added field number when calling C<hash_filter>
759    
760 dpavlin 57 =item 0.21
761    
762     Added C<join_subfields_with> to L</new> and L</to_hash>.
763    
764     Added C<include_subfields> to L</to_hash>.
765    
766 dpavlin 54 =item 0.20
767    
768 dpavlin 56 Added C<< $isis->mfn >>, support for repeatable subfields and
769     C<< $isis->to_hash({ mfn => 42, ... }) >> calling convention
770 dpavlin 54
771     =back
772    
773 dpavlin 1 =head1 AUTHOR
774    
775     Dobrica Pavlinusic
776     CPAN ID: DPAVLIN
777     dpavlin@rot13.org
778     http://www.rot13.org/~dpavlin/
779    
780 dpavlin 15 This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
781     written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
782 dpavlin 1
783     =head1 COPYRIGHT
784    
785     This program is free software; you can redistribute
786     it and/or modify it under the same terms as Perl itself.
787    
788     The full text of the license can be found in the
789     LICENSE file included with this module.
790    
791    
792     =head1 SEE ALSO
793    
794 dpavlin 59 L<Biblio::Isis::Manual> for CDS/ISIS manual appendix F, G and H which describe file format
795    
796 dpavlin 15 OpenIsis web site L<http://www.openisis.org>
797 dpavlin 1
798 dpavlin 15 perl4lib site L<http://perl4lib.perl.org>
799    

  ViewVC Help
Powered by ViewVC 1.1.26