/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 61 - (hide annotations)
Sun Jul 9 21:36:33 2006 UTC (17 years, 9 months ago) by dpavlin
File size: 19342 byte(s)
initial implementation of split_on_delimiters functionality needed for
Webpac. It adds just regexpes hash to to_hash, but I'm still not quite
satisfied with it.
1 dpavlin 36 package Biblio::Isis;
2 dpavlin 1 use strict;
3    
4     use Carp;
5 dpavlin 18 use File::Glob qw(:globally :nocase);
6    
7 dpavlin 1 BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 dpavlin 61 $VERSION = 0.22_1;
11 dpavlin 1 @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21 dpavlin 36 Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
22 dpavlin 1
23     =head1 SYNOPSIS
24    
25 dpavlin 36 use Biblio::Isis;
26 dpavlin 11
27 dpavlin 36 my $isis = new Biblio::Isis(
28 dpavlin 1 isisdb => './cds/cds',
29     );
30    
31 dpavlin 32 for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
32 dpavlin 11 print $isis->to_ascii($mfn),"\n";
33     }
34    
35 dpavlin 1 =head1 DESCRIPTION
36    
37 dpavlin 15 This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
38 dpavlin 27 IsisMarc. It can be used as perl-only alternative to OpenIsis module which
39     seems to depriciate it's old C<XS> bindings for perl.
40 dpavlin 1
41 dpavlin 15 It can create hash values from data in ISIS database (using C<to_hash>),
42     ASCII dump (using C<to_ascii>) or just hash with field names and packed
43     values (like C<^asomething^belse>).
44 dpavlin 11
45     Unique feature of this module is ability to C<include_deleted> records.
46     It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
47     fields which are zero sized will be filled with random junk from memory).
48    
49 dpavlin 15 It also has support for identifiers (only if ISIS database is created by
50     IsisMarc), see C<to_hash>.
51    
52 dpavlin 27 This module will always be slower than OpenIsis module which use C
53 dpavlin 15 library. However, since it's written in perl, it's platform independent (so
54     you don't need C compiler), and can be easily modified. I hope that it
55     creates data structures which are easier to use than ones created by
56     OpenIsis, so reduced time in other parts of the code should compensate for
57     slower performance of this module (speed of reading ISIS database is
58     rarely an issue).
59    
60 dpavlin 1 =head1 METHODS
61    
62     =cut
63    
64     # my $ORDN; # Nodes Order
65     # my $ORDF; # Leafs Order
66     # my $N; # Number of Memory buffers for nodes
67     # my $K; # Number of buffers for first level index
68     # my $LIV; # Current number of Index Levels
69     # my $POSRX; # Pointer to Root Record in N0x
70     # my $NMAXPOS; # Next Available position in N0x
71     # my $FMAXPOS; # Next available position in L0x
72     # my $ABNORMAL; # Formal BTree normality indicator
73    
74     #
75     # some binary reads
76     #
77    
78     =head2 new
79    
80 dpavlin 15 Open ISIS database
81 dpavlin 1
82 dpavlin 36 my $isis = new Biblio::Isis(
83 dpavlin 1 isisdb => './cds/cds',
84     read_fdt => 1,
85 dpavlin 12 include_deleted => 1,
86     hash_filter => sub {
87     my $v = shift;
88     $v =~ s#foo#bar#g;
89     },
90 dpavlin 1 debug => 1,
91 dpavlin 57 join_subfields_with => ' ; ',
92 dpavlin 1 );
93    
94 dpavlin 2 Options are described below:
95    
96     =over 5
97    
98 dpavlin 1 =item isisdb
99    
100 dpavlin 15 This is full or relative path to ISIS database files which include
101 dpavlin 18 common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
102     C<read_fdt> option) files.
103 dpavlin 1
104 dpavlin 15 In this example it uses C<./cds/cds.MST> and related files.
105    
106 dpavlin 1 =item read_fdt
107    
108     Boolean flag to specify if field definition table should be read. It's off
109     by default.
110    
111 dpavlin 9 =item include_deleted
112    
113 dpavlin 11 Don't skip logically deleted records in ISIS.
114 dpavlin 9
115 dpavlin 12 =item hash_filter
116    
117     Filter code ref which will be used before data is converted to hash.
118    
119     =item debug
120    
121 dpavlin 54 Dump a B<lot> of debugging output even at level 1. For even more increase level.
122 dpavlin 12
123 dpavlin 57 =item join_subfields_with
124    
125     Define delimiter which will be used to join repeatable subfields. This
126     option is included to support lagacy application written against version
127     older than 0.21 of this module. By default, it disabled. See L</to_hash>.
128    
129 dpavlin 2 =back
130    
131 dpavlin 1 =cut
132    
133     sub new {
134     my $class = shift;
135     my $self = {};
136     bless($self, $class);
137    
138 dpavlin 9 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
139 dpavlin 1
140 dpavlin 12 foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
141 dpavlin 9 $self->{$v} = {@_}->{$v};
142     }
143 dpavlin 1
144 dpavlin 18 my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));
145    
146     foreach my $f (@isis_files) {
147     my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
148     $self->{lc($ext)."_file"} = $f;
149     }
150    
151     my @must_exist = qw(mst xrf);
152     push @must_exist, "fdt" if ($self->{read_fdt});
153    
154     foreach my $ext (@must_exist) {
155 dpavlin 39 unless ($self->{$ext."_file"}) {
156     carp "missing ",uc($ext)," file in ",$self->{isisdb};
157     return;
158     }
159 dpavlin 18 }
160    
161 dpavlin 45 if ($self->{debug}) {
162     print STDERR "## using files: ",join(" ",@isis_files),"\n";
163     eval "use Data::Dump";
164 dpavlin 18
165 dpavlin 45 if (! $@) {
166     *Dumper = *Data::Dump::dump;
167     } else {
168     use Data::Dumper;
169     }
170     }
171    
172 dpavlin 1 # if you want to read .FDT file use read_fdt argument when creating class!
173 dpavlin 18 if ($self->{read_fdt} && -e $self->{fdt_file}) {
174 dpavlin 1
175     # read the $db.FDT file for tags
176     my $fieldzone=0;
177    
178 dpavlin 33 open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
179     binmode($fileFDT);
180 dpavlin 1
181 dpavlin 33 while (<$fileFDT>) {
182 dpavlin 1 chomp;
183     if ($fieldzone) {
184     my $name=substr($_,0,30);
185     my $tag=substr($_,50,3);
186    
187     $name =~ s/\s+$//;
188     $tag =~ s/\s+$//;
189    
190     $self->{'TagName'}->{$tag}=$name;
191     }
192    
193     if (/^\*\*\*/) {
194     $fieldzone=1;
195     }
196     }
197    
198 dpavlin 33 close($fileFDT);
199 dpavlin 1 }
200    
201     # Get the Maximum MFN from $db.MST
202    
203 dpavlin 18 open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
204 dpavlin 33 binmode($self->{'fileMST'});
205 dpavlin 1
206     # MST format: (* = 32 bit signed)
207     # CTLMFN* always 0
208     # NXTMFN* MFN to be assigned to the next record created
209     # NXTMFB* last block allocated to master file
210     # NXTMFP offset to next available position in last block
211     # MFTYPE always 0 for user db file (1 for system)
212 dpavlin 34 seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
213 dpavlin 1
214 dpavlin 11 my $buff;
215    
216 dpavlin 34 read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
217     $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
218 dpavlin 11
219 dpavlin 45 print STDERR "## self ",Dumper($self),"\n" if ($self->{debug});
220 dpavlin 18
221     # open files for later
222     open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
223 dpavlin 33 binmode($self->{'fileXRF'});
224 dpavlin 18
225     $self ? return $self : return undef;
226     }
227    
228 dpavlin 32 =head2 count
229    
230     Return number of records in database
231    
232     print $isis->count;
233    
234     =cut
235    
236     sub count {
237     my $self = shift;
238     return $self->{'NXTMFN'} - 1;
239     }
240    
241 dpavlin 7 =head2 fetch
242 dpavlin 1
243 dpavlin 2 Read record with selected MFN
244 dpavlin 1
245 dpavlin 7 my $rec = $isis->fetch(55);
246 dpavlin 2
247     Returns hash with keys which are field names and values are unpacked values
248 dpavlin 15 for that field like this:
249 dpavlin 2
250 dpavlin 15 $rec = {
251     '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
252     '990' => [ '2140', '88', 'HAY' ],
253     };
254    
255 dpavlin 2 =cut
256    
257 dpavlin 7 sub fetch {
258 dpavlin 1 my $self = shift;
259    
260 dpavlin 7 my $mfn = shift || croak "fetch needs MFN as argument!";
261 dpavlin 1
262 dpavlin 16 # is mfn allready in memory?
263     my $old_mfn = $self->{'current_mfn'} || -1;
264 dpavlin 25 return $self->{record} if ($mfn == $old_mfn);
265 dpavlin 1
266 dpavlin 16 print STDERR "## fetch: $mfn\n" if ($self->{debug});
267    
268 dpavlin 1 # XXX check this?
269     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
270    
271 dpavlin 18 print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
272 dpavlin 7 seek($self->{'fileXRF'},$mfnpos,0);
273 dpavlin 1
274 dpavlin 11 my $buff;
275    
276 dpavlin 25 # delete old record
277     delete $self->{record};
278    
279 dpavlin 1 # read XRFMFB abd XRFMFP
280 dpavlin 11 read($self->{'fileXRF'}, $buff, 4);
281 dpavlin 41 my $pointer=unpack("V",$buff);
282     if (! $pointer) {
283     if ($self->{include_deleted}) {
284     return;
285     } else {
286     warn "pointer for MFN $mfn is null\n";
287     return;
288     }
289     }
290 dpavlin 1
291 dpavlin 25 # check for logically deleted record
292 dpavlin 33 if ($pointer & 0x80000000) {
293 dpavlin 25 print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
294     $self->{deleted} = $mfn;
295    
296     return unless $self->{include_deleted};
297    
298 dpavlin 33 # abs
299     $pointer = ($pointer ^ 0xffffffff) + 1;
300 dpavlin 25 }
301    
302 dpavlin 1 my $XRFMFB = int($pointer/2048);
303     my $XRFMFP = $pointer - ($XRFMFB*2048);
304    
305 dpavlin 16 # (XRFMFB - 1) * 512 + XRFMFP
306     # why do i have to do XRFMFP % 1024 ?
307 dpavlin 1
308 dpavlin 26 my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
309 dpavlin 1
310 dpavlin 16 print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
311 dpavlin 1
312     # Get Record Information
313    
314 dpavlin 33 seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
315 dpavlin 1
316 dpavlin 33 read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
317     my $value=unpack("V",$buff);
318 dpavlin 1
319 dpavlin 16 print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
320    
321 dpavlin 1 if ($value!=$mfn) {
322 dpavlin 26 if ($value == 0) {
323     print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
324     $self->{deleted} = $mfn;
325     return;
326     }
327    
328     carp "Error: MFN ".$mfn." not found in MST file, found $value";
329     return;
330 dpavlin 1 }
331    
332 dpavlin 7 read($self->{'fileMST'}, $buff, 14);
333 dpavlin 1
334 dpavlin 33 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
335 dpavlin 1
336 dpavlin 16 print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
337 dpavlin 1
338 dpavlin 25 warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
339 dpavlin 9
340 dpavlin 16 warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
341    
342 dpavlin 1 # Get Directory Format
343    
344     my @FieldPOS;
345     my @FieldLEN;
346     my @FieldTAG;
347    
348 dpavlin 8 read($self->{'fileMST'}, $buff, 6 * $NVF);
349    
350 dpavlin 16 my $rec_len = 0;
351 dpavlin 8
352 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
353    
354 dpavlin 33 my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
355 dpavlin 1
356 dpavlin 16 print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
357 dpavlin 1
358     # The TAG does not exists in .FDT so we set it to 0.
359     #
360     # XXX This is removed from perl version; .FDT file is updated manually, so
361     # you will often have fields in .MST file which aren't in .FDT. On the other
362     # hand, IsisMarc doesn't use .FDT files at all!
363    
364     #if (! $self->{TagName}->{$TAG}) {
365     # $TAG=0;
366     #}
367    
368     push @FieldTAG,$TAG;
369     push @FieldPOS,$POS;
370     push @FieldLEN,$LEN;
371 dpavlin 8
372 dpavlin 16 $rec_len += $LEN;
373 dpavlin 1 }
374    
375     # Get Variable Fields
376    
377 dpavlin 16 read($self->{'fileMST'},$buff,$rec_len);
378 dpavlin 8
379 dpavlin 16 print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
380    
381 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
382 dpavlin 10 # skip zero-sized fields
383     next if ($FieldLEN[$i] == 0);
384    
385 dpavlin 8 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
386 dpavlin 1 }
387    
388 dpavlin 16 $self->{'current_mfn'} = $mfn;
389    
390 dpavlin 25 print STDERR Dumper($self),"\n" if ($self->{debug});
391 dpavlin 1
392 dpavlin 2 return $self->{'record'};
393 dpavlin 1 }
394    
395 dpavlin 54 =head2 mfn
396    
397     Returns current MFN position
398    
399     my $mfn = $isis->mfn;
400    
401     =cut
402    
403     # This function should be simple return $self->{current_mfn},
404     # but if new is called with _hack_mfn it becomes setter.
405     # It's useful in tests when setting $isis->{record} directly
406    
407     sub mfn {
408     my $self = shift;
409     return $self->{current_mfn};
410     };
411    
412    
413 dpavlin 2 =head2 to_ascii
414    
415 dpavlin 27 Returns ASCII output of record with specified MFN
416 dpavlin 2
417 dpavlin 15 print $isis->to_ascii(42);
418 dpavlin 2
419 dpavlin 27 This outputs something like this:
420 dpavlin 15
421     210 ^aNew York^cNew York University press^dcop. 1988
422     990 2140
423     990 88
424     990 HAY
425    
426     If C<read_fdt> is specified when calling C<new> it will display field names
427     from C<.FDT> file instead of numeric tags.
428    
429 dpavlin 2 =cut
430    
431     sub to_ascii {
432     my $self = shift;
433    
434     my $mfn = shift || croak "need MFN";
435    
436 dpavlin 41 my $rec = $self->fetch($mfn) || return;
437 dpavlin 2
438     my $out = "0\t$mfn";
439    
440     foreach my $f (sort keys %{$rec}) {
441 dpavlin 15 my $fn = $self->tag_name($f);
442     $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
443 dpavlin 2 }
444    
445     $out .= "\n";
446    
447     return $out;
448     }
449    
450 dpavlin 12 =head2 to_hash
451    
452 dpavlin 15 Read record with specified MFN and convert it to hash
453 dpavlin 12
454     my $hash = $isis->to_hash($mfn);
455    
456 dpavlin 27 It has ability to convert characters (using C<hash_filter>) from ISIS
457 dpavlin 15 database before creating structures enabling character re-mapping or quick
458     fix-up of data.
459 dpavlin 12
460     This function returns hash which is like this:
461    
462     $hash = {
463     '210' => [
464     {
465     'c' => 'New York University press',
466     'a' => 'New York',
467     'd' => 'cop. 1988'
468     }
469     ],
470     '990' => [
471     '2140',
472     '88',
473     'HAY'
474     ],
475     };
476    
477 dpavlin 15 You can later use that hash to produce any output from ISIS data.
478 dpavlin 12
479 dpavlin 15 If database is created using IsisMarc, it will also have to special fields
480     which will be used for identifiers, C<i1> and C<i2> like this:
481    
482     '200' => [
483     {
484     'i1' => '1',
485     'i2' => ' '
486     'a' => 'Goa',
487     'f' => 'Valdo D\'Arienzo',
488     'e' => 'tipografie e tipografi nel XVI secolo',
489     }
490     ],
491    
492 dpavlin 50 In case there are repeatable subfields in record, this will create
493     following structure:
494    
495     '900' => [ {
496     'a' => [ 'foo', 'bar', 'baz' ],
497     }]
498    
499 dpavlin 57 Or in more complex example of
500    
501     902 ^aa1^aa2^aa3^bb1^aa4^bb2^cc1^aa5
502    
503     it will create
504    
505     902 => [
506     { a => ["a1", "a2", "a3", "a4", "a5"], b => ["b1", "b2"], c => "c1" },
507     ],
508    
509     This behaviour can be changed using C<join_subfields_with> option to L</new>,
510     in which case C<to_hash> will always create single value for each subfield.
511     This will change result to:
512    
513    
514    
515 dpavlin 15 This method will also create additional field C<000> with MFN.
516    
517 dpavlin 56 There is also more elaborative way to call C<to_hash> like this:
518    
519     my $hash = $isis->to_hash({
520     mfn => 42,
521 dpavlin 57 include_subfields => 1,
522 dpavlin 61 regexps => [
523     's/something/else/g',
524     ],
525 dpavlin 56 });
526    
527 dpavlin 57 Each option controll creation of hash:
528    
529     =over 4
530    
531     =item mfn
532    
533     Specify MFN number of record
534    
535     =item include_subfields
536    
537     This option will create additional key in hash called C<subfields> which will
538     have original record subfield order and index to that subfield like this:
539    
540     902 => [ {
541     a => ["a1", "a2", "a3", "a4", "a5"],
542     b => ["b1", "b2"],
543     c => "c1",
544     subfields => ["a", 0, "a", 1, "a", 2, "b", 0, "a", 3, "b", 1, "c", 0, "a", 4],
545     } ],
546    
547     =item join_subfields_with
548    
549     Define delimiter which will be used to join repeatable subfields. You can
550 dpavlin 58 specify option here instead in L</new> if you want to have per-record control.
551 dpavlin 57
552     =back
553    
554 dpavlin 12 =cut
555    
556     sub to_hash {
557     my $self = shift;
558    
559 dpavlin 56
560 dpavlin 12 my $mfn = shift || confess "need mfn!";
561 dpavlin 56 my $arg;
562 dpavlin 12
563 dpavlin 56 if (ref($mfn) eq 'HASH') {
564     $arg = $mfn;
565     $mfn = $arg->{mfn} || confess "need mfn in arguments";
566     }
567    
568 dpavlin 61 confess "regexps must be HASH" if ($arg->{regexps} && ref($arg->{regexps}) ne 'HASH');
569    
570 dpavlin 15 # init record to include MFN as field 000
571 dpavlin 16 my $rec = { '000' => [ $mfn ] };
572 dpavlin 15
573 dpavlin 41 my $row = $self->fetch($mfn) || return;
574 dpavlin 12
575 dpavlin 58 my $j_rs = $arg->{join_subfields_with};
576     $j_rs = $self->{join_subfields_with} unless(defined($j_rs));
577 dpavlin 57 my $i_sf = $arg->{include_subfields};
578 dpavlin 12
579 dpavlin 57 foreach my $f_nr (keys %{$row}) {
580     foreach my $l (@{$row->{$f_nr}}) {
581    
582 dpavlin 12 # filter output
583 dpavlin 44 if ($self->{'hash_filter'}) {
584     $l = $self->{'hash_filter'}->($l);
585     next unless defined($l);
586     }
587 dpavlin 12
588 dpavlin 61 # apply regexps
589     if ($arg->{regexps} && defined($arg->{regexps}->{$f_nr})) {
590     confess "regexps->{$f_nr} must be ARRAY" if (ref($arg->{regexps}->{$f_nr}) ne 'ARRAY');
591     my $c = 0;
592     foreach my $r (@{ $arg->{regexps}->{$f_nr} }) {
593     while ( eval '$l =~ ' . $r ) { $c++ };
594     }
595     warn "## field $f_nr triggered $c regexpes\n" if ($c && $self->{debug});
596     }
597    
598 dpavlin 15 my $val;
599 dpavlin 57 my $r_sf; # repeatable subfields in this record
600 dpavlin 15
601     # has identifiers?
602 dpavlin 23 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
603 dpavlin 15
604 dpavlin 12 # has subfields?
605     if ($l =~ m/\^/) {
606     foreach my $t (split(/\^/,$l)) {
607     next if (! $t);
608 dpavlin 50 my ($sf,$v) = (substr($t,0,1), substr($t,1));
609 dpavlin 57 # XXX this might be option, but why?
610 dpavlin 54 next unless ($v);
611 dpavlin 57 # warn "### $f_nr^$sf:$v",$/ if ($self->{debug} > 1);
612 dpavlin 54
613 dpavlin 50 if (ref( $val->{$sf} ) eq 'ARRAY') {
614 dpavlin 54
615 dpavlin 50 push @{ $val->{$sf} }, $v;
616 dpavlin 57
617     # record repeatable subfield it it's offset
618     push @{ $val->{subfields} }, ( $sf, $#{ $val->{$sf} } ) if (! $j_rs && $i_sf);
619     $r_sf->{$sf}++;
620    
621 dpavlin 50 } elsif (defined( $val->{$sf} )) {
622 dpavlin 57
623 dpavlin 50 # convert scalar field to array
624     $val->{$sf} = [ $val->{$sf}, $v ];
625 dpavlin 57
626     push @{ $val->{subfields} }, ( $sf, 1 ) if (! $j_rs && $i_sf);
627     $r_sf->{$sf}++;
628    
629 dpavlin 50 } else {
630     $val->{$sf} = $v;
631 dpavlin 57 push @{ $val->{subfields} }, ( $sf, 0 ) if ($i_sf);
632 dpavlin 50 }
633 dpavlin 12 }
634     } else {
635     $val = $l;
636     }
637    
638 dpavlin 57 if ($j_rs) {
639     map {
640     $val->{$_} = join($j_rs, @{ $val->{$_} });
641     } keys %$r_sf
642     }
643    
644     push @{$rec->{$f_nr}}, $val;
645 dpavlin 12 }
646     }
647    
648     return $rec;
649     }
650    
651 dpavlin 15 =head2 tag_name
652 dpavlin 1
653 dpavlin 15 Return name of selected tag
654 dpavlin 1
655 dpavlin 15 print $isis->tag_name('200');
656    
657     =cut
658    
659     sub tag_name {
660 dpavlin 1 my $self = shift;
661 dpavlin 15 my $tag = shift || return;
662     return $self->{'TagName'}->{$tag} || $tag;
663 dpavlin 1 }
664    
665 dpavlin 35
666     =head2 read_cnt
667    
668     Read content of C<.CNT> file and return hash containing it.
669    
670     print Dumper($isis->read_cnt);
671    
672     This function is not used by module (C<.CNT> files are not required for this
673     module to work), but it can be useful to examine your index (while debugging
674     for example).
675    
676     =cut
677    
678     sub read_cnt {
679     my $self = shift;
680    
681     croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
682    
683     # Get the index information from $db.CNT
684    
685     open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
686     binmode($fileCNT);
687    
688     my $buff;
689    
690     read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
691     $self->unpack_cnt($buff);
692    
693     read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
694     $self->unpack_cnt($buff);
695    
696     close($fileCNT);
697    
698     return $self->{cnt};
699     }
700    
701     =head2 unpack_cnt
702    
703     Unpack one of two 26 bytes fixed length record in C<.CNT> file.
704    
705     Here is definition of record:
706    
707     off key description size
708     0: IDTYPE BTree type s
709     2: ORDN Nodes Order s
710     4: ORDF Leafs Order s
711     6: N Number of Memory buffers for nodes s
712     8: K Number of buffers for first level index s
713     10: LIV Current number of Index Levels s
714     12: POSRX Pointer to Root Record in N0x l
715     16: NMAXPOS Next Available position in N0x l
716     20: FMAXPOS Next available position in L0x l
717     24: ABNORMAL Formal BTree normality indicator s
718     length: 26 bytes
719    
720     This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
721    
722     =cut
723    
724     sub unpack_cnt {
725     my $self = shift;
726    
727     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
728    
729     my $buff = shift || return;
730     my @arr = unpack("vvvvvvVVVv", $buff);
731    
732     print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
733    
734     my $IDTYPE = shift @arr;
735     foreach (@flds) {
736     $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
737     }
738     }
739    
740 dpavlin 1 1;
741    
742     =head1 BUGS
743    
744 dpavlin 27 Some parts of CDS/ISIS documentation are not detailed enough to exmplain
745     some variations in input databases which has been tested with this module.
746     When I was in doubt, I assumed that OpenIsis's implementation was right
747     (except for obvious bugs).
748 dpavlin 1
749 dpavlin 27 However, every effort has been made to test this module with as much
750     databases (and programs that create them) as possible.
751    
752     I would be very greatful for success or failure reports about usage of this
753     module with databases from programs other than WinIsis and IsisMarc. I had
754     tested this against ouput of one C<isis.dll>-based application, but I don't
755     know any details about it's version.
756    
757 dpavlin 54 =head1 VERSIONS
758    
759 dpavlin 57 As this is young module, new features are added in subsequent version. It's
760     a good idea to specify version when using this module like this:
761 dpavlin 54
762 dpavlin 57 use Biblio::Isis 0.21
763    
764     Below is list of changes in specific version of module (so you can target
765     older versions if you really have to):
766    
767 dpavlin 54 =over 8
768    
769 dpavlin 57 =item 0.21
770    
771     Added C<join_subfields_with> to L</new> and L</to_hash>.
772    
773     Added C<include_subfields> to L</to_hash>.
774    
775 dpavlin 54 =item 0.20
776    
777 dpavlin 56 Added C<< $isis->mfn >>, support for repeatable subfields and
778     C<< $isis->to_hash({ mfn => 42, ... }) >> calling convention
779 dpavlin 54
780     =back
781    
782 dpavlin 1 =head1 AUTHOR
783    
784     Dobrica Pavlinusic
785     CPAN ID: DPAVLIN
786     dpavlin@rot13.org
787     http://www.rot13.org/~dpavlin/
788    
789 dpavlin 15 This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
790     written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
791 dpavlin 1
792     =head1 COPYRIGHT
793    
794     This program is free software; you can redistribute
795     it and/or modify it under the same terms as Perl itself.
796    
797     The full text of the license can be found in the
798     LICENSE file included with this module.
799    
800    
801     =head1 SEE ALSO
802    
803 dpavlin 59 L<Biblio::Isis::Manual> for CDS/ISIS manual appendix F, G and H which describe file format
804    
805 dpavlin 15 OpenIsis web site L<http://www.openisis.org>
806 dpavlin 1
807 dpavlin 15 perl4lib site L<http://perl4lib.perl.org>
808    

  ViewVC Help
Powered by ViewVC 1.1.26