/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 62 - (hide annotations)
Mon Jul 10 12:01:04 2006 UTC (17 years, 8 months ago) by dpavlin
File size: 19701 byte(s)
added regexpes to new as option
1 dpavlin 36 package Biblio::Isis;
2 dpavlin 1 use strict;
3    
4     use Carp;
5 dpavlin 18 use File::Glob qw(:globally :nocase);
6    
7 dpavlin 1 BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 dpavlin 62 $VERSION = 0.22_2;
11 dpavlin 1 @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21 dpavlin 36 Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
22 dpavlin 1
23     =head1 SYNOPSIS
24    
25 dpavlin 36 use Biblio::Isis;
26 dpavlin 11
27 dpavlin 36 my $isis = new Biblio::Isis(
28 dpavlin 1 isisdb => './cds/cds',
29     );
30    
31 dpavlin 32 for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
32 dpavlin 11 print $isis->to_ascii($mfn),"\n";
33     }
34    
35 dpavlin 1 =head1 DESCRIPTION
36    
37 dpavlin 15 This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
38 dpavlin 27 IsisMarc. It can be used as perl-only alternative to OpenIsis module which
39     seems to depriciate it's old C<XS> bindings for perl.
40 dpavlin 1
41 dpavlin 15 It can create hash values from data in ISIS database (using C<to_hash>),
42     ASCII dump (using C<to_ascii>) or just hash with field names and packed
43     values (like C<^asomething^belse>).
44 dpavlin 11
45     Unique feature of this module is ability to C<include_deleted> records.
46     It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
47     fields which are zero sized will be filled with random junk from memory).
48    
49 dpavlin 15 It also has support for identifiers (only if ISIS database is created by
50     IsisMarc), see C<to_hash>.
51    
52 dpavlin 27 This module will always be slower than OpenIsis module which use C
53 dpavlin 15 library. However, since it's written in perl, it's platform independent (so
54     you don't need C compiler), and can be easily modified. I hope that it
55     creates data structures which are easier to use than ones created by
56     OpenIsis, so reduced time in other parts of the code should compensate for
57     slower performance of this module (speed of reading ISIS database is
58     rarely an issue).
59    
60 dpavlin 1 =head1 METHODS
61    
62     =cut
63    
64     # my $ORDN; # Nodes Order
65     # my $ORDF; # Leafs Order
66     # my $N; # Number of Memory buffers for nodes
67     # my $K; # Number of buffers for first level index
68     # my $LIV; # Current number of Index Levels
69     # my $POSRX; # Pointer to Root Record in N0x
70     # my $NMAXPOS; # Next Available position in N0x
71     # my $FMAXPOS; # Next available position in L0x
72     # my $ABNORMAL; # Formal BTree normality indicator
73    
74     #
75     # some binary reads
76     #
77    
78     =head2 new
79    
80 dpavlin 15 Open ISIS database
81 dpavlin 1
82 dpavlin 36 my $isis = new Biblio::Isis(
83 dpavlin 1 isisdb => './cds/cds',
84     read_fdt => 1,
85 dpavlin 12 include_deleted => 1,
86     hash_filter => sub {
87     my $v = shift;
88     $v =~ s#foo#bar#g;
89     },
90 dpavlin 1 debug => 1,
91 dpavlin 57 join_subfields_with => ' ; ',
92 dpavlin 62 regexps => [
93     's/something/else/g',
94     ],
95 dpavlin 1 );
96    
97 dpavlin 2 Options are described below:
98    
99     =over 5
100    
101 dpavlin 1 =item isisdb
102    
103 dpavlin 15 This is full or relative path to ISIS database files which include
104 dpavlin 18 common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
105     C<read_fdt> option) files.
106 dpavlin 1
107 dpavlin 15 In this example it uses C<./cds/cds.MST> and related files.
108    
109 dpavlin 1 =item read_fdt
110    
111     Boolean flag to specify if field definition table should be read. It's off
112     by default.
113    
114 dpavlin 9 =item include_deleted
115    
116 dpavlin 11 Don't skip logically deleted records in ISIS.
117 dpavlin 9
118 dpavlin 12 =item hash_filter
119    
120     Filter code ref which will be used before data is converted to hash.
121    
122     =item debug
123    
124 dpavlin 54 Dump a B<lot> of debugging output even at level 1. For even more increase level.
125 dpavlin 12
126 dpavlin 57 =item join_subfields_with
127    
128     Define delimiter which will be used to join repeatable subfields. This
129     option is included to support lagacy application written against version
130     older than 0.21 of this module. By default, it disabled. See L</to_hash>.
131    
132 dpavlin 62 =item regexpes
133    
134     Define (any number) of regexpes to apply at field values before they are
135     splitted into subfield. This is great place to split subfields in input to
136     mulitple subfields if needed or rename subfields.
137    
138 dpavlin 2 =back
139    
140 dpavlin 1 =cut
141    
142     sub new {
143     my $class = shift;
144     my $self = {};
145     bless($self, $class);
146    
147 dpavlin 9 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
148 dpavlin 1
149 dpavlin 12 foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
150 dpavlin 9 $self->{$v} = {@_}->{$v};
151     }
152 dpavlin 1
153 dpavlin 18 my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));
154    
155     foreach my $f (@isis_files) {
156     my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
157     $self->{lc($ext)."_file"} = $f;
158     }
159    
160     my @must_exist = qw(mst xrf);
161     push @must_exist, "fdt" if ($self->{read_fdt});
162    
163     foreach my $ext (@must_exist) {
164 dpavlin 39 unless ($self->{$ext."_file"}) {
165     carp "missing ",uc($ext)," file in ",$self->{isisdb};
166     return;
167     }
168 dpavlin 18 }
169    
170 dpavlin 45 if ($self->{debug}) {
171     print STDERR "## using files: ",join(" ",@isis_files),"\n";
172     eval "use Data::Dump";
173 dpavlin 18
174 dpavlin 45 if (! $@) {
175     *Dumper = *Data::Dump::dump;
176     } else {
177     use Data::Dumper;
178     }
179     }
180    
181 dpavlin 1 # if you want to read .FDT file use read_fdt argument when creating class!
182 dpavlin 18 if ($self->{read_fdt} && -e $self->{fdt_file}) {
183 dpavlin 1
184     # read the $db.FDT file for tags
185     my $fieldzone=0;
186    
187 dpavlin 33 open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
188     binmode($fileFDT);
189 dpavlin 1
190 dpavlin 33 while (<$fileFDT>) {
191 dpavlin 1 chomp;
192     if ($fieldzone) {
193     my $name=substr($_,0,30);
194     my $tag=substr($_,50,3);
195    
196     $name =~ s/\s+$//;
197     $tag =~ s/\s+$//;
198    
199     $self->{'TagName'}->{$tag}=$name;
200     }
201    
202     if (/^\*\*\*/) {
203     $fieldzone=1;
204     }
205     }
206    
207 dpavlin 33 close($fileFDT);
208 dpavlin 1 }
209    
210     # Get the Maximum MFN from $db.MST
211    
212 dpavlin 18 open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
213 dpavlin 33 binmode($self->{'fileMST'});
214 dpavlin 1
215     # MST format: (* = 32 bit signed)
216     # CTLMFN* always 0
217     # NXTMFN* MFN to be assigned to the next record created
218     # NXTMFB* last block allocated to master file
219     # NXTMFP offset to next available position in last block
220     # MFTYPE always 0 for user db file (1 for system)
221 dpavlin 34 seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
222 dpavlin 1
223 dpavlin 11 my $buff;
224    
225 dpavlin 34 read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
226     $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
227 dpavlin 11
228 dpavlin 45 print STDERR "## self ",Dumper($self),"\n" if ($self->{debug});
229 dpavlin 18
230     # open files for later
231     open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
232 dpavlin 33 binmode($self->{'fileXRF'});
233 dpavlin 18
234     $self ? return $self : return undef;
235     }
236    
237 dpavlin 32 =head2 count
238    
239     Return number of records in database
240    
241     print $isis->count;
242    
243     =cut
244    
245     sub count {
246     my $self = shift;
247     return $self->{'NXTMFN'} - 1;
248     }
249    
250 dpavlin 7 =head2 fetch
251 dpavlin 1
252 dpavlin 2 Read record with selected MFN
253 dpavlin 1
254 dpavlin 7 my $rec = $isis->fetch(55);
255 dpavlin 2
256     Returns hash with keys which are field names and values are unpacked values
257 dpavlin 15 for that field like this:
258 dpavlin 2
259 dpavlin 15 $rec = {
260     '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
261     '990' => [ '2140', '88', 'HAY' ],
262     };
263    
264 dpavlin 2 =cut
265    
266 dpavlin 7 sub fetch {
267 dpavlin 1 my $self = shift;
268    
269 dpavlin 7 my $mfn = shift || croak "fetch needs MFN as argument!";
270 dpavlin 1
271 dpavlin 16 # is mfn allready in memory?
272     my $old_mfn = $self->{'current_mfn'} || -1;
273 dpavlin 25 return $self->{record} if ($mfn == $old_mfn);
274 dpavlin 1
275 dpavlin 16 print STDERR "## fetch: $mfn\n" if ($self->{debug});
276    
277 dpavlin 1 # XXX check this?
278     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
279    
280 dpavlin 18 print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
281 dpavlin 7 seek($self->{'fileXRF'},$mfnpos,0);
282 dpavlin 1
283 dpavlin 11 my $buff;
284    
285 dpavlin 25 # delete old record
286     delete $self->{record};
287    
288 dpavlin 1 # read XRFMFB abd XRFMFP
289 dpavlin 11 read($self->{'fileXRF'}, $buff, 4);
290 dpavlin 41 my $pointer=unpack("V",$buff);
291     if (! $pointer) {
292     if ($self->{include_deleted}) {
293     return;
294     } else {
295     warn "pointer for MFN $mfn is null\n";
296     return;
297     }
298     }
299 dpavlin 1
300 dpavlin 25 # check for logically deleted record
301 dpavlin 33 if ($pointer & 0x80000000) {
302 dpavlin 25 print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
303     $self->{deleted} = $mfn;
304    
305     return unless $self->{include_deleted};
306    
307 dpavlin 33 # abs
308     $pointer = ($pointer ^ 0xffffffff) + 1;
309 dpavlin 25 }
310    
311 dpavlin 1 my $XRFMFB = int($pointer/2048);
312     my $XRFMFP = $pointer - ($XRFMFB*2048);
313    
314 dpavlin 16 # (XRFMFB - 1) * 512 + XRFMFP
315     # why do i have to do XRFMFP % 1024 ?
316 dpavlin 1
317 dpavlin 26 my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
318 dpavlin 1
319 dpavlin 16 print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
320 dpavlin 1
321     # Get Record Information
322    
323 dpavlin 33 seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
324 dpavlin 1
325 dpavlin 33 read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
326     my $value=unpack("V",$buff);
327 dpavlin 1
328 dpavlin 16 print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
329    
330 dpavlin 1 if ($value!=$mfn) {
331 dpavlin 26 if ($value == 0) {
332     print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
333     $self->{deleted} = $mfn;
334     return;
335     }
336    
337     carp "Error: MFN ".$mfn." not found in MST file, found $value";
338     return;
339 dpavlin 1 }
340    
341 dpavlin 7 read($self->{'fileMST'}, $buff, 14);
342 dpavlin 1
343 dpavlin 33 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
344 dpavlin 1
345 dpavlin 16 print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
346 dpavlin 1
347 dpavlin 25 warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
348 dpavlin 9
349 dpavlin 16 warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
350    
351 dpavlin 1 # Get Directory Format
352    
353     my @FieldPOS;
354     my @FieldLEN;
355     my @FieldTAG;
356    
357 dpavlin 8 read($self->{'fileMST'}, $buff, 6 * $NVF);
358    
359 dpavlin 16 my $rec_len = 0;
360 dpavlin 8
361 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
362    
363 dpavlin 33 my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
364 dpavlin 1
365 dpavlin 16 print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
366 dpavlin 1
367     # The TAG does not exists in .FDT so we set it to 0.
368     #
369     # XXX This is removed from perl version; .FDT file is updated manually, so
370     # you will often have fields in .MST file which aren't in .FDT. On the other
371     # hand, IsisMarc doesn't use .FDT files at all!
372    
373     #if (! $self->{TagName}->{$TAG}) {
374     # $TAG=0;
375     #}
376    
377     push @FieldTAG,$TAG;
378     push @FieldPOS,$POS;
379     push @FieldLEN,$LEN;
380 dpavlin 8
381 dpavlin 16 $rec_len += $LEN;
382 dpavlin 1 }
383    
384     # Get Variable Fields
385    
386 dpavlin 16 read($self->{'fileMST'},$buff,$rec_len);
387 dpavlin 8
388 dpavlin 16 print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
389    
390 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
391 dpavlin 10 # skip zero-sized fields
392     next if ($FieldLEN[$i] == 0);
393    
394 dpavlin 8 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
395 dpavlin 1 }
396    
397 dpavlin 16 $self->{'current_mfn'} = $mfn;
398    
399 dpavlin 25 print STDERR Dumper($self),"\n" if ($self->{debug});
400 dpavlin 1
401 dpavlin 2 return $self->{'record'};
402 dpavlin 1 }
403    
404 dpavlin 54 =head2 mfn
405    
406     Returns current MFN position
407    
408     my $mfn = $isis->mfn;
409    
410     =cut
411    
412     # This function should be simple return $self->{current_mfn},
413     # but if new is called with _hack_mfn it becomes setter.
414     # It's useful in tests when setting $isis->{record} directly
415    
416     sub mfn {
417     my $self = shift;
418     return $self->{current_mfn};
419     };
420    
421    
422 dpavlin 2 =head2 to_ascii
423    
424 dpavlin 27 Returns ASCII output of record with specified MFN
425 dpavlin 2
426 dpavlin 15 print $isis->to_ascii(42);
427 dpavlin 2
428 dpavlin 27 This outputs something like this:
429 dpavlin 15
430     210 ^aNew York^cNew York University press^dcop. 1988
431     990 2140
432     990 88
433     990 HAY
434    
435     If C<read_fdt> is specified when calling C<new> it will display field names
436     from C<.FDT> file instead of numeric tags.
437    
438 dpavlin 2 =cut
439    
440     sub to_ascii {
441     my $self = shift;
442    
443     my $mfn = shift || croak "need MFN";
444    
445 dpavlin 41 my $rec = $self->fetch($mfn) || return;
446 dpavlin 2
447     my $out = "0\t$mfn";
448    
449     foreach my $f (sort keys %{$rec}) {
450 dpavlin 15 my $fn = $self->tag_name($f);
451     $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
452 dpavlin 2 }
453    
454     $out .= "\n";
455    
456     return $out;
457     }
458    
459 dpavlin 12 =head2 to_hash
460    
461 dpavlin 15 Read record with specified MFN and convert it to hash
462 dpavlin 12
463     my $hash = $isis->to_hash($mfn);
464    
465 dpavlin 27 It has ability to convert characters (using C<hash_filter>) from ISIS
466 dpavlin 15 database before creating structures enabling character re-mapping or quick
467     fix-up of data.
468 dpavlin 12
469     This function returns hash which is like this:
470    
471     $hash = {
472     '210' => [
473     {
474     'c' => 'New York University press',
475     'a' => 'New York',
476     'd' => 'cop. 1988'
477     }
478     ],
479     '990' => [
480     '2140',
481     '88',
482     'HAY'
483     ],
484     };
485    
486 dpavlin 15 You can later use that hash to produce any output from ISIS data.
487 dpavlin 12
488 dpavlin 15 If database is created using IsisMarc, it will also have to special fields
489     which will be used for identifiers, C<i1> and C<i2> like this:
490    
491     '200' => [
492     {
493     'i1' => '1',
494     'i2' => ' '
495     'a' => 'Goa',
496     'f' => 'Valdo D\'Arienzo',
497     'e' => 'tipografie e tipografi nel XVI secolo',
498     }
499     ],
500    
501 dpavlin 50 In case there are repeatable subfields in record, this will create
502     following structure:
503    
504     '900' => [ {
505     'a' => [ 'foo', 'bar', 'baz' ],
506     }]
507    
508 dpavlin 57 Or in more complex example of
509    
510     902 ^aa1^aa2^aa3^bb1^aa4^bb2^cc1^aa5
511    
512     it will create
513    
514     902 => [
515     { a => ["a1", "a2", "a3", "a4", "a5"], b => ["b1", "b2"], c => "c1" },
516     ],
517    
518     This behaviour can be changed using C<join_subfields_with> option to L</new>,
519     in which case C<to_hash> will always create single value for each subfield.
520     This will change result to:
521    
522    
523    
524 dpavlin 15 This method will also create additional field C<000> with MFN.
525    
526 dpavlin 56 There is also more elaborative way to call C<to_hash> like this:
527    
528     my $hash = $isis->to_hash({
529     mfn => 42,
530 dpavlin 57 include_subfields => 1,
531 dpavlin 61 regexps => [
532     's/something/else/g',
533     ],
534 dpavlin 56 });
535    
536 dpavlin 57 Each option controll creation of hash:
537    
538     =over 4
539    
540     =item mfn
541    
542     Specify MFN number of record
543    
544     =item include_subfields
545    
546     This option will create additional key in hash called C<subfields> which will
547     have original record subfield order and index to that subfield like this:
548    
549     902 => [ {
550     a => ["a1", "a2", "a3", "a4", "a5"],
551     b => ["b1", "b2"],
552     c => "c1",
553     subfields => ["a", 0, "a", 1, "a", 2, "b", 0, "a", 3, "b", 1, "c", 0, "a", 4],
554     } ],
555    
556     =item join_subfields_with
557    
558     Define delimiter which will be used to join repeatable subfields. You can
559 dpavlin 58 specify option here instead in L</new> if you want to have per-record control.
560 dpavlin 57
561 dpavlin 62 =item regexpes
562    
563     Override C<regexpes> specified in L</new>.
564    
565 dpavlin 57 =back
566    
567 dpavlin 12 =cut
568    
569     sub to_hash {
570     my $self = shift;
571    
572 dpavlin 56
573 dpavlin 12 my $mfn = shift || confess "need mfn!";
574 dpavlin 56 my $arg;
575 dpavlin 12
576 dpavlin 56 if (ref($mfn) eq 'HASH') {
577     $arg = $mfn;
578     $mfn = $arg->{mfn} || confess "need mfn in arguments";
579     }
580    
581 dpavlin 62 $arg->{regexpes} ||= $self->{regexpes};
582    
583 dpavlin 61 confess "regexps must be HASH" if ($arg->{regexps} && ref($arg->{regexps}) ne 'HASH');
584    
585 dpavlin 15 # init record to include MFN as field 000
586 dpavlin 16 my $rec = { '000' => [ $mfn ] };
587 dpavlin 15
588 dpavlin 41 my $row = $self->fetch($mfn) || return;
589 dpavlin 12
590 dpavlin 58 my $j_rs = $arg->{join_subfields_with};
591     $j_rs = $self->{join_subfields_with} unless(defined($j_rs));
592 dpavlin 57 my $i_sf = $arg->{include_subfields};
593 dpavlin 12
594 dpavlin 57 foreach my $f_nr (keys %{$row}) {
595     foreach my $l (@{$row->{$f_nr}}) {
596    
597 dpavlin 12 # filter output
598 dpavlin 44 if ($self->{'hash_filter'}) {
599     $l = $self->{'hash_filter'}->($l);
600     next unless defined($l);
601     }
602 dpavlin 12
603 dpavlin 61 # apply regexps
604     if ($arg->{regexps} && defined($arg->{regexps}->{$f_nr})) {
605     confess "regexps->{$f_nr} must be ARRAY" if (ref($arg->{regexps}->{$f_nr}) ne 'ARRAY');
606     my $c = 0;
607     foreach my $r (@{ $arg->{regexps}->{$f_nr} }) {
608     while ( eval '$l =~ ' . $r ) { $c++ };
609     }
610     warn "## field $f_nr triggered $c regexpes\n" if ($c && $self->{debug});
611     }
612    
613 dpavlin 15 my $val;
614 dpavlin 57 my $r_sf; # repeatable subfields in this record
615 dpavlin 15
616     # has identifiers?
617 dpavlin 23 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
618 dpavlin 15
619 dpavlin 12 # has subfields?
620     if ($l =~ m/\^/) {
621     foreach my $t (split(/\^/,$l)) {
622     next if (! $t);
623 dpavlin 50 my ($sf,$v) = (substr($t,0,1), substr($t,1));
624 dpavlin 57 # XXX this might be option, but why?
625 dpavlin 54 next unless ($v);
626 dpavlin 57 # warn "### $f_nr^$sf:$v",$/ if ($self->{debug} > 1);
627 dpavlin 54
628 dpavlin 50 if (ref( $val->{$sf} ) eq 'ARRAY') {
629 dpavlin 54
630 dpavlin 50 push @{ $val->{$sf} }, $v;
631 dpavlin 57
632     # record repeatable subfield it it's offset
633     push @{ $val->{subfields} }, ( $sf, $#{ $val->{$sf} } ) if (! $j_rs && $i_sf);
634     $r_sf->{$sf}++;
635    
636 dpavlin 50 } elsif (defined( $val->{$sf} )) {
637 dpavlin 57
638 dpavlin 50 # convert scalar field to array
639     $val->{$sf} = [ $val->{$sf}, $v ];
640 dpavlin 57
641     push @{ $val->{subfields} }, ( $sf, 1 ) if (! $j_rs && $i_sf);
642     $r_sf->{$sf}++;
643    
644 dpavlin 50 } else {
645     $val->{$sf} = $v;
646 dpavlin 57 push @{ $val->{subfields} }, ( $sf, 0 ) if ($i_sf);
647 dpavlin 50 }
648 dpavlin 12 }
649     } else {
650     $val = $l;
651     }
652    
653 dpavlin 57 if ($j_rs) {
654     map {
655     $val->{$_} = join($j_rs, @{ $val->{$_} });
656     } keys %$r_sf
657     }
658    
659     push @{$rec->{$f_nr}}, $val;
660 dpavlin 12 }
661     }
662    
663     return $rec;
664     }
665    
666 dpavlin 15 =head2 tag_name
667 dpavlin 1
668 dpavlin 15 Return name of selected tag
669 dpavlin 1
670 dpavlin 15 print $isis->tag_name('200');
671    
672     =cut
673    
674     sub tag_name {
675 dpavlin 1 my $self = shift;
676 dpavlin 15 my $tag = shift || return;
677     return $self->{'TagName'}->{$tag} || $tag;
678 dpavlin 1 }
679    
680 dpavlin 35
681     =head2 read_cnt
682    
683     Read content of C<.CNT> file and return hash containing it.
684    
685     print Dumper($isis->read_cnt);
686    
687     This function is not used by module (C<.CNT> files are not required for this
688     module to work), but it can be useful to examine your index (while debugging
689     for example).
690    
691     =cut
692    
693     sub read_cnt {
694     my $self = shift;
695    
696     croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
697    
698     # Get the index information from $db.CNT
699    
700     open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
701     binmode($fileCNT);
702    
703     my $buff;
704    
705     read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
706     $self->unpack_cnt($buff);
707    
708     read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
709     $self->unpack_cnt($buff);
710    
711     close($fileCNT);
712    
713     return $self->{cnt};
714     }
715    
716     =head2 unpack_cnt
717    
718     Unpack one of two 26 bytes fixed length record in C<.CNT> file.
719    
720     Here is definition of record:
721    
722     off key description size
723     0: IDTYPE BTree type s
724     2: ORDN Nodes Order s
725     4: ORDF Leafs Order s
726     6: N Number of Memory buffers for nodes s
727     8: K Number of buffers for first level index s
728     10: LIV Current number of Index Levels s
729     12: POSRX Pointer to Root Record in N0x l
730     16: NMAXPOS Next Available position in N0x l
731     20: FMAXPOS Next available position in L0x l
732     24: ABNORMAL Formal BTree normality indicator s
733     length: 26 bytes
734    
735     This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
736    
737     =cut
738    
739     sub unpack_cnt {
740     my $self = shift;
741    
742     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
743    
744     my $buff = shift || return;
745     my @arr = unpack("vvvvvvVVVv", $buff);
746    
747     print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
748    
749     my $IDTYPE = shift @arr;
750     foreach (@flds) {
751     $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
752     }
753     }
754    
755 dpavlin 1 1;
756    
757     =head1 BUGS
758    
759 dpavlin 27 Some parts of CDS/ISIS documentation are not detailed enough to exmplain
760     some variations in input databases which has been tested with this module.
761     When I was in doubt, I assumed that OpenIsis's implementation was right
762     (except for obvious bugs).
763 dpavlin 1
764 dpavlin 27 However, every effort has been made to test this module with as much
765     databases (and programs that create them) as possible.
766    
767     I would be very greatful for success or failure reports about usage of this
768     module with databases from programs other than WinIsis and IsisMarc. I had
769     tested this against ouput of one C<isis.dll>-based application, but I don't
770     know any details about it's version.
771    
772 dpavlin 54 =head1 VERSIONS
773    
774 dpavlin 57 As this is young module, new features are added in subsequent version. It's
775     a good idea to specify version when using this module like this:
776 dpavlin 54
777 dpavlin 57 use Biblio::Isis 0.21
778    
779     Below is list of changes in specific version of module (so you can target
780     older versions if you really have to):
781    
782 dpavlin 54 =over 8
783    
784 dpavlin 57 =item 0.21
785    
786     Added C<join_subfields_with> to L</new> and L</to_hash>.
787    
788     Added C<include_subfields> to L</to_hash>.
789    
790 dpavlin 54 =item 0.20
791    
792 dpavlin 56 Added C<< $isis->mfn >>, support for repeatable subfields and
793     C<< $isis->to_hash({ mfn => 42, ... }) >> calling convention
794 dpavlin 54
795     =back
796    
797 dpavlin 1 =head1 AUTHOR
798    
799     Dobrica Pavlinusic
800     CPAN ID: DPAVLIN
801     dpavlin@rot13.org
802     http://www.rot13.org/~dpavlin/
803    
804 dpavlin 15 This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
805     written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
806 dpavlin 1
807     =head1 COPYRIGHT
808    
809     This program is free software; you can redistribute
810     it and/or modify it under the same terms as Perl itself.
811    
812     The full text of the license can be found in the
813     LICENSE file included with this module.
814    
815    
816     =head1 SEE ALSO
817    
818 dpavlin 59 L<Biblio::Isis::Manual> for CDS/ISIS manual appendix F, G and H which describe file format
819    
820 dpavlin 15 OpenIsis web site L<http://www.openisis.org>
821 dpavlin 1
822 dpavlin 15 perl4lib site L<http://perl4lib.perl.org>
823    

  ViewVC Help
Powered by ViewVC 1.1.26