/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 72 - (hide annotations)
Fri May 18 21:16:52 2007 UTC (16 years, 10 months ago) by dpavlin
File size: 19735 byte(s)
push version to 0.24
1 dpavlin 36 package Biblio::Isis;
2 dpavlin 1 use strict;
3    
4     use Carp;
5 dpavlin 18 use File::Glob qw(:globally :nocase);
6    
7 dpavlin 1 BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 dpavlin 72 $VERSION = 0.24;
11 dpavlin 1 @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21 dpavlin 36 Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
22 dpavlin 1
23     =head1 SYNOPSIS
24    
25 dpavlin 36 use Biblio::Isis;
26 dpavlin 11
27 dpavlin 36 my $isis = new Biblio::Isis(
28 dpavlin 1 isisdb => './cds/cds',
29     );
30    
31 dpavlin 32 for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
32 dpavlin 11 print $isis->to_ascii($mfn),"\n";
33     }
34    
35 dpavlin 1 =head1 DESCRIPTION
36    
37 dpavlin 15 This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
38 dpavlin 27 IsisMarc. It can be used as perl-only alternative to OpenIsis module which
39     seems to depriciate it's old C<XS> bindings for perl.
40 dpavlin 1
41 dpavlin 15 It can create hash values from data in ISIS database (using C<to_hash>),
42     ASCII dump (using C<to_ascii>) or just hash with field names and packed
43     values (like C<^asomething^belse>).
44 dpavlin 11
45     Unique feature of this module is ability to C<include_deleted> records.
46     It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
47     fields which are zero sized will be filled with random junk from memory).
48    
49 dpavlin 15 It also has support for identifiers (only if ISIS database is created by
50     IsisMarc), see C<to_hash>.
51    
52 dpavlin 27 This module will always be slower than OpenIsis module which use C
53 dpavlin 15 library. However, since it's written in perl, it's platform independent (so
54     you don't need C compiler), and can be easily modified. I hope that it
55     creates data structures which are easier to use than ones created by
56     OpenIsis, so reduced time in other parts of the code should compensate for
57     slower performance of this module (speed of reading ISIS database is
58     rarely an issue).
59    
60 dpavlin 1 =head1 METHODS
61    
62     =cut
63    
64     # my $ORDN; # Nodes Order
65     # my $ORDF; # Leafs Order
66     # my $N; # Number of Memory buffers for nodes
67     # my $K; # Number of buffers for first level index
68     # my $LIV; # Current number of Index Levels
69     # my $POSRX; # Pointer to Root Record in N0x
70     # my $NMAXPOS; # Next Available position in N0x
71     # my $FMAXPOS; # Next available position in L0x
72     # my $ABNORMAL; # Formal BTree normality indicator
73    
74     #
75     # some binary reads
76     #
77    
78     =head2 new
79    
80 dpavlin 15 Open ISIS database
81 dpavlin 1
82 dpavlin 36 my $isis = new Biblio::Isis(
83 dpavlin 1 isisdb => './cds/cds',
84     read_fdt => 1,
85 dpavlin 12 include_deleted => 1,
86     hash_filter => sub {
87 dpavlin 64 my ($v,$field_number) = @_;
88 dpavlin 12 $v =~ s#foo#bar#g;
89     },
90 dpavlin 1 debug => 1,
91 dpavlin 57 join_subfields_with => ' ; ',
92 dpavlin 1 );
93    
94 dpavlin 2 Options are described below:
95    
96     =over 5
97    
98 dpavlin 1 =item isisdb
99    
100 dpavlin 15 This is full or relative path to ISIS database files which include
101 dpavlin 18 common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
102     C<read_fdt> option) files.
103 dpavlin 1
104 dpavlin 15 In this example it uses C<./cds/cds.MST> and related files.
105    
106 dpavlin 1 =item read_fdt
107    
108     Boolean flag to specify if field definition table should be read. It's off
109     by default.
110    
111 dpavlin 9 =item include_deleted
112    
113 dpavlin 11 Don't skip logically deleted records in ISIS.
114 dpavlin 9
115 dpavlin 12 =item hash_filter
116    
117 dpavlin 65 Filter code ref which will be used before data is converted to hash. It will
118     receive two arguments, whole line from current field (in C<< $_[0] >>) and
119     field number (in C<< $_[1] >>).
120 dpavlin 12
121     =item debug
122    
123 dpavlin 54 Dump a B<lot> of debugging output even at level 1. For even more increase level.
124 dpavlin 12
125 dpavlin 57 =item join_subfields_with
126    
127     Define delimiter which will be used to join repeatable subfields. This
128     option is included to support lagacy application written against version
129     older than 0.21 of this module. By default, it disabled. See L</to_hash>.
130    
131 dpavlin 70 =item ignore_empty_subfields
132    
133     Remove all empty subfields while reading from ISIS file.
134    
135 dpavlin 2 =back
136    
137 dpavlin 1 =cut
138    
139     sub new {
140     my $class = shift;
141     my $self = {};
142     bless($self, $class);
143    
144 dpavlin 9 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
145 dpavlin 1
146 dpavlin 70 foreach my $v (qw{isisdb debug include_deleted hash_filter join_subfields_with ignore_empty_subfields}) {
147 dpavlin 69 $self->{$v} = {@_}->{$v} if defined({@_}->{$v});
148 dpavlin 9 }
149 dpavlin 1
150 dpavlin 18 my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));
151    
152     foreach my $f (@isis_files) {
153     my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
154     $self->{lc($ext)."_file"} = $f;
155     }
156    
157     my @must_exist = qw(mst xrf);
158     push @must_exist, "fdt" if ($self->{read_fdt});
159    
160     foreach my $ext (@must_exist) {
161 dpavlin 39 unless ($self->{$ext."_file"}) {
162     carp "missing ",uc($ext)," file in ",$self->{isisdb};
163     return;
164     }
165 dpavlin 18 }
166    
167 dpavlin 45 if ($self->{debug}) {
168     print STDERR "## using files: ",join(" ",@isis_files),"\n";
169     eval "use Data::Dump";
170 dpavlin 18
171 dpavlin 45 if (! $@) {
172     *Dumper = *Data::Dump::dump;
173     } else {
174     use Data::Dumper;
175     }
176     }
177    
178 dpavlin 1 # if you want to read .FDT file use read_fdt argument when creating class!
179 dpavlin 18 if ($self->{read_fdt} && -e $self->{fdt_file}) {
180 dpavlin 1
181     # read the $db.FDT file for tags
182     my $fieldzone=0;
183    
184 dpavlin 33 open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
185     binmode($fileFDT);
186 dpavlin 1
187 dpavlin 33 while (<$fileFDT>) {
188 dpavlin 1 chomp;
189     if ($fieldzone) {
190     my $name=substr($_,0,30);
191     my $tag=substr($_,50,3);
192    
193     $name =~ s/\s+$//;
194     $tag =~ s/\s+$//;
195    
196     $self->{'TagName'}->{$tag}=$name;
197     }
198    
199     if (/^\*\*\*/) {
200     $fieldzone=1;
201     }
202     }
203    
204 dpavlin 33 close($fileFDT);
205 dpavlin 1 }
206    
207     # Get the Maximum MFN from $db.MST
208    
209 dpavlin 18 open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
210 dpavlin 33 binmode($self->{'fileMST'});
211 dpavlin 1
212     # MST format: (* = 32 bit signed)
213     # CTLMFN* always 0
214     # NXTMFN* MFN to be assigned to the next record created
215     # NXTMFB* last block allocated to master file
216     # NXTMFP offset to next available position in last block
217     # MFTYPE always 0 for user db file (1 for system)
218 dpavlin 34 seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
219 dpavlin 1
220 dpavlin 11 my $buff;
221    
222 dpavlin 34 read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
223     $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
224 dpavlin 11
225 dpavlin 45 print STDERR "## self ",Dumper($self),"\n" if ($self->{debug});
226 dpavlin 18
227     # open files for later
228     open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
229 dpavlin 33 binmode($self->{'fileXRF'});
230 dpavlin 18
231     $self ? return $self : return undef;
232     }
233    
234 dpavlin 32 =head2 count
235    
236     Return number of records in database
237    
238     print $isis->count;
239    
240     =cut
241    
242     sub count {
243     my $self = shift;
244     return $self->{'NXTMFN'} - 1;
245     }
246    
247 dpavlin 7 =head2 fetch
248 dpavlin 1
249 dpavlin 2 Read record with selected MFN
250 dpavlin 1
251 dpavlin 7 my $rec = $isis->fetch(55);
252 dpavlin 2
253     Returns hash with keys which are field names and values are unpacked values
254 dpavlin 15 for that field like this:
255 dpavlin 2
256 dpavlin 15 $rec = {
257     '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
258     '990' => [ '2140', '88', 'HAY' ],
259     };
260    
261 dpavlin 2 =cut
262    
263 dpavlin 7 sub fetch {
264 dpavlin 1 my $self = shift;
265    
266 dpavlin 7 my $mfn = shift || croak "fetch needs MFN as argument!";
267 dpavlin 1
268 dpavlin 16 # is mfn allready in memory?
269     my $old_mfn = $self->{'current_mfn'} || -1;
270 dpavlin 25 return $self->{record} if ($mfn == $old_mfn);
271 dpavlin 1
272 dpavlin 16 print STDERR "## fetch: $mfn\n" if ($self->{debug});
273    
274 dpavlin 1 # XXX check this?
275     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
276    
277 dpavlin 18 print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
278 dpavlin 7 seek($self->{'fileXRF'},$mfnpos,0);
279 dpavlin 1
280 dpavlin 11 my $buff;
281    
282 dpavlin 25 # delete old record
283     delete $self->{record};
284    
285 dpavlin 1 # read XRFMFB abd XRFMFP
286 dpavlin 11 read($self->{'fileXRF'}, $buff, 4);
287 dpavlin 41 my $pointer=unpack("V",$buff);
288     if (! $pointer) {
289     if ($self->{include_deleted}) {
290     return;
291     } else {
292     warn "pointer for MFN $mfn is null\n";
293     return;
294     }
295     }
296 dpavlin 1
297 dpavlin 25 # check for logically deleted record
298 dpavlin 33 if ($pointer & 0x80000000) {
299 dpavlin 25 print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
300     $self->{deleted} = $mfn;
301    
302     return unless $self->{include_deleted};
303    
304 dpavlin 33 # abs
305     $pointer = ($pointer ^ 0xffffffff) + 1;
306 dpavlin 25 }
307    
308 dpavlin 1 my $XRFMFB = int($pointer/2048);
309     my $XRFMFP = $pointer - ($XRFMFB*2048);
310    
311 dpavlin 16 # (XRFMFB - 1) * 512 + XRFMFP
312     # why do i have to do XRFMFP % 1024 ?
313 dpavlin 1
314 dpavlin 26 my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
315 dpavlin 1
316 dpavlin 16 print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
317 dpavlin 1
318     # Get Record Information
319    
320 dpavlin 33 seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
321 dpavlin 1
322 dpavlin 33 read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
323     my $value=unpack("V",$buff);
324 dpavlin 1
325 dpavlin 16 print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
326    
327 dpavlin 1 if ($value!=$mfn) {
328 dpavlin 26 if ($value == 0) {
329     print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
330     $self->{deleted} = $mfn;
331     return;
332     }
333    
334     carp "Error: MFN ".$mfn." not found in MST file, found $value";
335     return;
336 dpavlin 1 }
337    
338 dpavlin 7 read($self->{'fileMST'}, $buff, 14);
339 dpavlin 1
340 dpavlin 33 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
341 dpavlin 1
342 dpavlin 16 print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
343 dpavlin 1
344 dpavlin 25 warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
345 dpavlin 9
346 dpavlin 16 warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
347    
348 dpavlin 1 # Get Directory Format
349    
350     my @FieldPOS;
351     my @FieldLEN;
352     my @FieldTAG;
353    
354 dpavlin 8 read($self->{'fileMST'}, $buff, 6 * $NVF);
355    
356 dpavlin 16 my $rec_len = 0;
357 dpavlin 8
358 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
359    
360 dpavlin 33 my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
361 dpavlin 1
362 dpavlin 16 print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
363 dpavlin 1
364     # The TAG does not exists in .FDT so we set it to 0.
365     #
366     # XXX This is removed from perl version; .FDT file is updated manually, so
367     # you will often have fields in .MST file which aren't in .FDT. On the other
368     # hand, IsisMarc doesn't use .FDT files at all!
369    
370     #if (! $self->{TagName}->{$TAG}) {
371     # $TAG=0;
372     #}
373    
374     push @FieldTAG,$TAG;
375     push @FieldPOS,$POS;
376     push @FieldLEN,$LEN;
377 dpavlin 8
378 dpavlin 16 $rec_len += $LEN;
379 dpavlin 1 }
380    
381     # Get Variable Fields
382    
383 dpavlin 16 read($self->{'fileMST'},$buff,$rec_len);
384 dpavlin 8
385 dpavlin 16 print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
386    
387 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
388 dpavlin 10 # skip zero-sized fields
389     next if ($FieldLEN[$i] == 0);
390    
391 dpavlin 70 my $v = substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
392    
393     if ( $self->{ignore_empty_subfields} ) {
394     $v =~ s/(\^\w)+(\^\w)/$2/g;
395     $v =~ s/\^\w$//; # last on line?
396     next if ($v eq '');
397     }
398    
399     push @{$self->{record}->{$FieldTAG[$i]}}, $v;
400 dpavlin 1 }
401    
402 dpavlin 16 $self->{'current_mfn'} = $mfn;
403    
404 dpavlin 25 print STDERR Dumper($self),"\n" if ($self->{debug});
405 dpavlin 1
406 dpavlin 2 return $self->{'record'};
407 dpavlin 1 }
408    
409 dpavlin 54 =head2 mfn
410    
411     Returns current MFN position
412    
413     my $mfn = $isis->mfn;
414    
415     =cut
416    
417     # This function should be simple return $self->{current_mfn},
418     # but if new is called with _hack_mfn it becomes setter.
419     # It's useful in tests when setting $isis->{record} directly
420    
421     sub mfn {
422     my $self = shift;
423     return $self->{current_mfn};
424     };
425    
426    
427 dpavlin 2 =head2 to_ascii
428    
429 dpavlin 27 Returns ASCII output of record with specified MFN
430 dpavlin 2
431 dpavlin 15 print $isis->to_ascii(42);
432 dpavlin 2
433 dpavlin 27 This outputs something like this:
434 dpavlin 15
435     210 ^aNew York^cNew York University press^dcop. 1988
436     990 2140
437     990 88
438     990 HAY
439    
440     If C<read_fdt> is specified when calling C<new> it will display field names
441     from C<.FDT> file instead of numeric tags.
442    
443 dpavlin 2 =cut
444    
445     sub to_ascii {
446     my $self = shift;
447    
448     my $mfn = shift || croak "need MFN";
449    
450 dpavlin 41 my $rec = $self->fetch($mfn) || return;
451 dpavlin 2
452     my $out = "0\t$mfn";
453    
454     foreach my $f (sort keys %{$rec}) {
455 dpavlin 15 my $fn = $self->tag_name($f);
456     $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
457 dpavlin 2 }
458    
459     $out .= "\n";
460    
461     return $out;
462     }
463    
464 dpavlin 12 =head2 to_hash
465    
466 dpavlin 15 Read record with specified MFN and convert it to hash
467 dpavlin 12
468     my $hash = $isis->to_hash($mfn);
469    
470 dpavlin 27 It has ability to convert characters (using C<hash_filter>) from ISIS
471 dpavlin 15 database before creating structures enabling character re-mapping or quick
472     fix-up of data.
473 dpavlin 12
474     This function returns hash which is like this:
475    
476     $hash = {
477     '210' => [
478     {
479     'c' => 'New York University press',
480     'a' => 'New York',
481     'd' => 'cop. 1988'
482     }
483     ],
484     '990' => [
485     '2140',
486     '88',
487     'HAY'
488     ],
489     };
490    
491 dpavlin 15 You can later use that hash to produce any output from ISIS data.
492 dpavlin 12
493 dpavlin 15 If database is created using IsisMarc, it will also have to special fields
494     which will be used for identifiers, C<i1> and C<i2> like this:
495    
496     '200' => [
497     {
498     'i1' => '1',
499     'i2' => ' '
500     'a' => 'Goa',
501     'f' => 'Valdo D\'Arienzo',
502     'e' => 'tipografie e tipografi nel XVI secolo',
503     }
504     ],
505    
506 dpavlin 50 In case there are repeatable subfields in record, this will create
507     following structure:
508    
509     '900' => [ {
510     'a' => [ 'foo', 'bar', 'baz' ],
511     }]
512    
513 dpavlin 57 Or in more complex example of
514    
515     902 ^aa1^aa2^aa3^bb1^aa4^bb2^cc1^aa5
516    
517     it will create
518    
519     902 => [
520     { a => ["a1", "a2", "a3", "a4", "a5"], b => ["b1", "b2"], c => "c1" },
521     ],
522    
523     This behaviour can be changed using C<join_subfields_with> option to L</new>,
524     in which case C<to_hash> will always create single value for each subfield.
525     This will change result to:
526    
527    
528    
529 dpavlin 15 This method will also create additional field C<000> with MFN.
530    
531 dpavlin 56 There is also more elaborative way to call C<to_hash> like this:
532    
533     my $hash = $isis->to_hash({
534     mfn => 42,
535 dpavlin 57 include_subfields => 1,
536 dpavlin 56 });
537    
538 dpavlin 57 Each option controll creation of hash:
539    
540     =over 4
541    
542     =item mfn
543    
544     Specify MFN number of record
545    
546     =item include_subfields
547    
548     This option will create additional key in hash called C<subfields> which will
549     have original record subfield order and index to that subfield like this:
550    
551     902 => [ {
552     a => ["a1", "a2", "a3", "a4", "a5"],
553     b => ["b1", "b2"],
554     c => "c1",
555     subfields => ["a", 0, "a", 1, "a", 2, "b", 0, "a", 3, "b", 1, "c", 0, "a", 4],
556     } ],
557    
558     =item join_subfields_with
559    
560     Define delimiter which will be used to join repeatable subfields. You can
561 dpavlin 58 specify option here instead in L</new> if you want to have per-record control.
562 dpavlin 57
563 dpavlin 66 =item hash_filter
564    
565     You can override C<hash_filter> defined in L</new> using this option.
566    
567 dpavlin 57 =back
568    
569 dpavlin 12 =cut
570    
571     sub to_hash {
572     my $self = shift;
573    
574 dpavlin 56
575 dpavlin 12 my $mfn = shift || confess "need mfn!";
576 dpavlin 56 my $arg;
577 dpavlin 12
578 dpavlin 66 my $hash_filter = $self->{hash_filter};
579    
580 dpavlin 56 if (ref($mfn) eq 'HASH') {
581     $arg = $mfn;
582     $mfn = $arg->{mfn} || confess "need mfn in arguments";
583 dpavlin 66 $hash_filter = $arg->{hash_filter} if ($arg->{hash_filter});
584 dpavlin 56 }
585    
586 dpavlin 15 # init record to include MFN as field 000
587 dpavlin 16 my $rec = { '000' => [ $mfn ] };
588 dpavlin 15
589 dpavlin 41 my $row = $self->fetch($mfn) || return;
590 dpavlin 12
591 dpavlin 69 my $j_rs = $arg->{join_subfields_with} || $self->{join_subfields_with};
592 dpavlin 58 $j_rs = $self->{join_subfields_with} unless(defined($j_rs));
593 dpavlin 57 my $i_sf = $arg->{include_subfields};
594 dpavlin 12
595 dpavlin 57 foreach my $f_nr (keys %{$row}) {
596     foreach my $l (@{$row->{$f_nr}}) {
597    
598 dpavlin 12 # filter output
599 dpavlin 66 $l = $hash_filter->($l, $f_nr) if ($hash_filter);
600     next unless defined($l);
601 dpavlin 12
602 dpavlin 15 my $val;
603 dpavlin 57 my $r_sf; # repeatable subfields in this record
604 dpavlin 15
605     # has identifiers?
606 dpavlin 23 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
607 dpavlin 15
608 dpavlin 12 # has subfields?
609     if ($l =~ m/\^/) {
610     foreach my $t (split(/\^/,$l)) {
611     next if (! $t);
612 dpavlin 50 my ($sf,$v) = (substr($t,0,1), substr($t,1));
613 dpavlin 57 # XXX this might be option, but why?
614 dpavlin 67 next unless (defined($v) && $v ne '');
615 dpavlin 57 # warn "### $f_nr^$sf:$v",$/ if ($self->{debug} > 1);
616 dpavlin 54
617 dpavlin 50 if (ref( $val->{$sf} ) eq 'ARRAY') {
618 dpavlin 54
619 dpavlin 50 push @{ $val->{$sf} }, $v;
620 dpavlin 57
621     # record repeatable subfield it it's offset
622     push @{ $val->{subfields} }, ( $sf, $#{ $val->{$sf} } ) if (! $j_rs && $i_sf);
623     $r_sf->{$sf}++;
624    
625 dpavlin 50 } elsif (defined( $val->{$sf} )) {
626 dpavlin 57
627 dpavlin 50 # convert scalar field to array
628     $val->{$sf} = [ $val->{$sf}, $v ];
629 dpavlin 57
630     push @{ $val->{subfields} }, ( $sf, 1 ) if (! $j_rs && $i_sf);
631     $r_sf->{$sf}++;
632    
633 dpavlin 50 } else {
634     $val->{$sf} = $v;
635 dpavlin 57 push @{ $val->{subfields} }, ( $sf, 0 ) if ($i_sf);
636 dpavlin 50 }
637 dpavlin 12 }
638     } else {
639     $val = $l;
640     }
641    
642 dpavlin 57 if ($j_rs) {
643     map {
644     $val->{$_} = join($j_rs, @{ $val->{$_} });
645     } keys %$r_sf
646     }
647    
648     push @{$rec->{$f_nr}}, $val;
649 dpavlin 12 }
650     }
651    
652     return $rec;
653     }
654    
655 dpavlin 15 =head2 tag_name
656 dpavlin 1
657 dpavlin 15 Return name of selected tag
658 dpavlin 1
659 dpavlin 15 print $isis->tag_name('200');
660    
661     =cut
662    
663     sub tag_name {
664 dpavlin 1 my $self = shift;
665 dpavlin 15 my $tag = shift || return;
666     return $self->{'TagName'}->{$tag} || $tag;
667 dpavlin 1 }
668    
669 dpavlin 35
670     =head2 read_cnt
671    
672     Read content of C<.CNT> file and return hash containing it.
673    
674     print Dumper($isis->read_cnt);
675    
676     This function is not used by module (C<.CNT> files are not required for this
677     module to work), but it can be useful to examine your index (while debugging
678     for example).
679    
680     =cut
681    
682     sub read_cnt {
683     my $self = shift;
684    
685     croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
686    
687     # Get the index information from $db.CNT
688    
689     open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
690     binmode($fileCNT);
691    
692     my $buff;
693    
694     read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
695     $self->unpack_cnt($buff);
696    
697     read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
698     $self->unpack_cnt($buff);
699    
700     close($fileCNT);
701    
702     return $self->{cnt};
703     }
704    
705     =head2 unpack_cnt
706    
707     Unpack one of two 26 bytes fixed length record in C<.CNT> file.
708    
709     Here is definition of record:
710    
711     off key description size
712     0: IDTYPE BTree type s
713     2: ORDN Nodes Order s
714     4: ORDF Leafs Order s
715     6: N Number of Memory buffers for nodes s
716     8: K Number of buffers for first level index s
717     10: LIV Current number of Index Levels s
718     12: POSRX Pointer to Root Record in N0x l
719     16: NMAXPOS Next Available position in N0x l
720     20: FMAXPOS Next available position in L0x l
721     24: ABNORMAL Formal BTree normality indicator s
722     length: 26 bytes
723    
724     This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
725    
726     =cut
727    
728     sub unpack_cnt {
729     my $self = shift;
730    
731     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
732    
733     my $buff = shift || return;
734     my @arr = unpack("vvvvvvVVVv", $buff);
735    
736     print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
737    
738     my $IDTYPE = shift @arr;
739     foreach (@flds) {
740     $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
741     }
742     }
743    
744 dpavlin 1 1;
745    
746     =head1 BUGS
747    
748 dpavlin 27 Some parts of CDS/ISIS documentation are not detailed enough to exmplain
749     some variations in input databases which has been tested with this module.
750     When I was in doubt, I assumed that OpenIsis's implementation was right
751     (except for obvious bugs).
752 dpavlin 1
753 dpavlin 27 However, every effort has been made to test this module with as much
754     databases (and programs that create them) as possible.
755    
756     I would be very greatful for success or failure reports about usage of this
757     module with databases from programs other than WinIsis and IsisMarc. I had
758     tested this against ouput of one C<isis.dll>-based application, but I don't
759     know any details about it's version.
760    
761 dpavlin 54 =head1 VERSIONS
762    
763 dpavlin 57 As this is young module, new features are added in subsequent version. It's
764     a good idea to specify version when using this module like this:
765 dpavlin 54
766 dpavlin 66 use Biblio::Isis 0.23
767 dpavlin 57
768     Below is list of changes in specific version of module (so you can target
769     older versions if you really have to):
770    
771 dpavlin 54 =over 8
772    
773 dpavlin 70 =item 0.24
774    
775     Added C<ignore_empty_subfields>
776    
777 dpavlin 66 =item 0.23
778    
779     Added C<hash_filter> to L</to_hash>
780    
781 dpavlin 69 Fixed bug with documented C<join_subfields_with> in L</new> which wasn't
782     implemented
783    
784 dpavlin 65 =item 0.22
785    
786     Added field number when calling C<hash_filter>
787    
788 dpavlin 57 =item 0.21
789    
790     Added C<join_subfields_with> to L</new> and L</to_hash>.
791    
792     Added C<include_subfields> to L</to_hash>.
793    
794 dpavlin 54 =item 0.20
795    
796 dpavlin 56 Added C<< $isis->mfn >>, support for repeatable subfields and
797     C<< $isis->to_hash({ mfn => 42, ... }) >> calling convention
798 dpavlin 54
799     =back
800    
801 dpavlin 1 =head1 AUTHOR
802    
803     Dobrica Pavlinusic
804     CPAN ID: DPAVLIN
805     dpavlin@rot13.org
806     http://www.rot13.org/~dpavlin/
807    
808 dpavlin 15 This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
809     written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
810 dpavlin 1
811     =head1 COPYRIGHT
812    
813     This program is free software; you can redistribute
814     it and/or modify it under the same terms as Perl itself.
815    
816     The full text of the license can be found in the
817     LICENSE file included with this module.
818    
819    
820     =head1 SEE ALSO
821    
822 dpavlin 59 L<Biblio::Isis::Manual> for CDS/ISIS manual appendix F, G and H which describe file format
823    
824 dpavlin 15 OpenIsis web site L<http://www.openisis.org>
825 dpavlin 1
826 dpavlin 15 perl4lib site L<http://perl4lib.perl.org>
827    

  ViewVC Help
Powered by ViewVC 1.1.26