/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 12 - (show annotations)
Wed Dec 29 20:10:11 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 10943 byte(s)
added to_hash method and hash_filter coderef to new constructor to filter
data prior to unpacking ISIS data into hash.

1 package IsisDB;
2 use strict;
3
4 use Carp;
5 use Data::Dumper;
6
7 BEGIN {
8 use Exporter ();
9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 $VERSION = 0.04;
11 @ISA = qw (Exporter);
12 #Give a hoot don't pollute, do not export more than needed by default
13 @EXPORT = qw ();
14 @EXPORT_OK = qw ();
15 %EXPORT_TAGS = ();
16
17 }
18
19 =head1 NAME
20
21 IsisDB - Read CDS/ISIS database
22
23 =head1 SYNOPSIS
24
25 use IsisDB;
26
27 my $isis = new IsisDB(
28 isisdb => './cds/cds',
29 );
30
31 for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
32 print $isis->to_ascii($mfn),"\n";
33 }
34
35 =head1 DESCRIPTION
36
37 This module will read CDS/ISIS databases and create hash values out of it.
38 It can be used as perl-only alternative to OpenIsis module.
39
40 This will module will always be slower that OpenIsis module which use C
41 library. However, since it's written in perl, it's platform independent (so
42 you don't need C compiler), and can be easily modified.
43
44 Unique feature of this module is ability to C<include_deleted> records.
45 It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
46 fields which are zero sized will be filled with random junk from memory).
47
48 =head1 METHODS
49
50 =cut
51
52 # my $ORDN; # Nodes Order
53 # my $ORDF; # Leafs Order
54 # my $N; # Number of Memory buffers for nodes
55 # my $K; # Number of buffers for first level index
56 # my $LIV; # Current number of Index Levels
57 # my $POSRX; # Pointer to Root Record in N0x
58 # my $NMAXPOS; # Next Available position in N0x
59 # my $FMAXPOS; # Next available position in L0x
60 # my $ABNORMAL; # Formal BTree normality indicator
61
62 #
63 # some binary reads
64 #
65
66 =head2 new
67
68 Open CDS/ISIS database
69
70 my $isis = new IsisDB(
71 isisdb => './cds/cds',
72 read_fdt => 1,
73 include_deleted => 1,
74 hash_filter => sub {
75 my $v = shift;
76 $v =~ s#foo#bar#g;
77 },
78 debug => 1,
79 );
80
81 Options are described below:
82
83 =over 5
84
85 =item isisdb
86
87 Prefix path to CDS/ISIS. It should contain full or relative path to database
88 and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
89
90 =item read_fdt
91
92 Boolean flag to specify if field definition table should be read. It's off
93 by default.
94
95 =item include_deleted
96
97 Don't skip logically deleted records in ISIS.
98
99 =item hash_filter
100
101 Filter code ref which will be used before data is converted to hash.
102
103 =item debug
104
105 Dump a B<lot> of debugging output.
106
107 =back
108
109 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
110
111 =cut
112
113 sub new {
114 my $class = shift;
115 my $self = {};
116 bless($self, $class);
117
118 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
119
120 foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
121 $self->{$v} = {@_}->{$v};
122 }
123
124 # if you want to read .FDT file use read_fdt argument when creating class!
125 if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
126
127 # read the $db.FDT file for tags
128 my $fieldzone=0;
129
130 open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
131
132 while (<fileFDT>) {
133 chomp;
134 if ($fieldzone) {
135 my $name=substr($_,0,30);
136 my $tag=substr($_,50,3);
137
138 $name =~ s/\s+$//;
139 $tag =~ s/\s+$//;
140
141 $self->{'TagName'}->{$tag}=$name;
142 }
143
144 if (/^\*\*\*/) {
145 $fieldzone=1;
146 }
147 }
148
149 close(fileFDT);
150 }
151
152 # Get the Maximum MFN from $db.MST
153
154 open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
155
156 # MST format: (* = 32 bit signed)
157 # CTLMFN* always 0
158 # NXTMFN* MFN to be assigned to the next record created
159 # NXTMFB* last block allocated to master file
160 # NXTMFP offset to next available position in last block
161 # MFTYPE always 0 for user db file (1 for system)
162 seek(fileMST,4,0);
163
164 my $buff;
165
166 read(fileMST, $buff, 4);
167 $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";
168
169 # save maximum MFN
170 $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
171
172 close(fileMST);
173
174 # Get the index information from $db.CNT
175
176 open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
177
178 # There is two 26 Bytes fixed lenght records
179
180 # 0: IDTYPE BTree type 16
181 # 2: ORDN Nodes Order 16
182 # 4: ORDF Leafs Order 16
183 # 6: N Number of Memory buffers for nodes 16
184 # 8: K Number of buffers for first level index 16
185 # 10: LIV Current number of Index Levels 16
186 # 12: POSRX* Pointer to Root Record in N0x 32
187 # 16: NMAXPOS* Next Available position in N0x 32
188 # 20: FMAXPOS* Next available position in L0x 32
189 # 24: ABNORMAL Formal BTree normality indicator 16
190 # length: 26 bytes
191
192 sub unpack_cnt {
193 my $self = shift;
194
195 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
196
197 my $buff = shift || return;
198 my @arr = unpack("ssssssllls", $buff);
199
200 print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
201
202 my $IDTYPE = shift @arr;
203 foreach (@flds) {
204 $self->{$IDTYPE}->{$_} = abs(shift @arr);
205 }
206 }
207
208 read(fileCNT, $buff, 26);
209 $self->unpack_cnt($buff);
210
211 read(fileCNT, $buff, 26);
212 $self->unpack_cnt($buff);
213
214
215 close(fileCNT);
216
217 print Dumper($self) if ($self->{debug});
218
219 # open files for later
220 open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
221
222 open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
223
224 $self ? return $self : return undef;
225 }
226
227 =head2 fetch
228
229 Read record with selected MFN
230
231 my $rec = $isis->fetch(55);
232
233 Returns hash with keys which are field names and values are unpacked values
234 for that field (like C<^asometing^bsomething else>)
235
236 =cut
237
238 sub fetch {
239 my $self = shift;
240
241 my $mfn = shift || croak "fetch needs MFN as argument!";
242
243 print "fetch: $mfn\n" if ($self->{debug});
244
245 # XXX check this?
246 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
247
248 print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
249 seek($self->{'fileXRF'},$mfnpos,0);
250
251 my $buff;
252
253 # read XRFMFB abd XRFMFP
254 read($self->{'fileXRF'}, $buff, 4);
255 my $pointer=unpack("l",$buff) || carp "pointer is null";
256
257 my $XRFMFB = int($pointer/2048);
258 my $XRFMFP = $pointer - ($XRFMFB*2048);
259
260 print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
261
262 # XXX fix this to be more readable!!
263 # e.g. (XRFMFB - 1) * 512 + XRFMFP
264
265 my $offset = $pointer;
266 my $offset2=int($offset/2048)-1;
267 my $offset22=int($offset/4096);
268 my $offset3=$offset-($offset22*4096);
269 if ($offset3>512) {
270 $offset3=$offset3-2048;
271 }
272 my $offset4=($offset2*512)+$offset3;
273
274 print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
275
276 # Get Record Information
277
278 seek($self->{'fileMST'},$offset4,0);
279
280 read($self->{'fileMST'}, $buff, 4);
281 my $value=unpack("l",$buff);
282
283 if ($value!=$mfn) {
284 print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
285 return -1; # XXX deleted record?
286 }
287
288 # $MFRL=$self->Read16($fileMST);
289 # $MFBWB=$self->Read32($fileMST);
290 # $MFBWP=$self->Read16($fileMST);
291 # $BASE=$self->Read16($fileMST);
292 # $NVF=$self->Read16($fileMST);
293 # $STATUS=$self->Read16($fileMST);
294
295 read($self->{'fileMST'}, $buff, 14);
296
297 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
298
299 print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
300
301 # delete old record
302 delete $self->{record};
303
304 if (! $self->{'include_deleted'} && $MFRL < 0) {
305 print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
306 return;
307 }
308
309 # Get Directory Format
310
311 my @FieldPOS;
312 my @FieldLEN;
313 my @FieldTAG;
314
315 read($self->{'fileMST'}, $buff, 6 * $NVF);
316
317 my $fld_len = 0;
318
319 for (my $i = 0 ; $i < $NVF ; $i++) {
320
321 # $TAG=$self->Read16($fileMST);
322 # $POS=$self->Read16($fileMST);
323 # $LEN=$self->Read16($fileMST);
324
325 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
326
327 print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
328
329 # The TAG does not exists in .FDT so we set it to 0.
330 #
331 # XXX This is removed from perl version; .FDT file is updated manually, so
332 # you will often have fields in .MST file which aren't in .FDT. On the other
333 # hand, IsisMarc doesn't use .FDT files at all!
334
335 #if (! $self->{TagName}->{$TAG}) {
336 # $TAG=0;
337 #}
338
339 push @FieldTAG,$TAG;
340 push @FieldPOS,$POS;
341 push @FieldLEN,$LEN;
342
343 $fld_len += $LEN;
344 }
345
346 # Get Variable Fields
347
348 read($self->{'fileMST'},$buff,$fld_len);
349
350 for (my $i = 0 ; $i < $NVF ; $i++) {
351 # skip zero-sized fields
352 next if ($FieldLEN[$i] == 0);
353
354 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
355 }
356 close(fileMST);
357
358 print Dumper($self) if ($self->{debug});
359
360 return $self->{'record'};
361 }
362
363 =head2 to_ascii
364
365 Dump ascii output of selected MFN
366
367 print $isis->to_ascii(55);
368
369 =cut
370
371 sub to_ascii {
372 my $self = shift;
373
374 my $mfn = shift || croak "need MFN";
375
376 my $rec = $self->fetch($mfn);
377
378 my $out = "0\t$mfn";
379
380 foreach my $f (sort keys %{$rec}) {
381 $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
382 }
383
384 $out .= "\n";
385
386 return $out;
387 }
388
389 =head2 to_hash
390
391 Read mfn and convert it to hash
392
393 my $hash = $isis->to_hash($mfn);
394
395 It has ability to convert characters (using C<hash_filter> from ISIS
396 database before creating structures enabling character remapping or quick
397 fixup of data.
398
399 This function returns hash which is like this:
400
401 $hash = {
402 '210' => [
403 {
404 'c' => 'New York University press',
405 'a' => 'New York',
406 'd' => 'cop. 1988'
407 }
408 ],
409 '990' => [
410 '2140',
411 '88',
412 'HAY'
413 ],
414 };
415
416 You can later use that has to produce any output from ISIS data.
417
418 =cut
419
420 sub to_hash {
421 my $self = shift;
422
423 my $mfn = shift || confess "need mfn!";
424
425 my $rec;
426 my $row = $self->fetch($mfn);
427
428 foreach my $k (keys %{$row}) {
429 foreach my $l (@{$row->{$k}}) {
430
431 # filter output
432 $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
433
434 # has subfields?
435 my $val;
436 if ($l =~ m/\^/) {
437 foreach my $t (split(/\^/,$l)) {
438 next if (! $t);
439 $val->{substr($t,0,1)} = substr($t,1);
440 }
441 } else {
442 $val = $l;
443 }
444
445 push @{$rec->{$k}}, $val;
446 }
447 }
448
449 return $rec;
450 }
451
452 #
453 # XXX porting from php left-over:
454 #
455 # do I *REALLY* need those methods, or should I use
456 # $self->{something} directly?
457 #
458 # Probably direct usage is better!
459 #
460
461 sub TagName {
462 my $self = shift;
463 return $self->{TagName};
464 }
465
466 sub NextMFN {
467 my $self = shift;
468 return $self->{NXTMFN};
469 }
470
471 1;
472
473 =head1 BUGS
474
475 This module has been very lightly tested. Use with caution and report bugs.
476
477 =head1 AUTHOR
478
479 Dobrica Pavlinusic
480 CPAN ID: DPAVLIN
481 dpavlin@rot13.org
482 http://www.rot13.org/~dpavlin/
483
484 This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
485 written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
486
487 =head1 COPYRIGHT
488
489 This program is free software; you can redistribute
490 it and/or modify it under the same terms as Perl itself.
491
492 The full text of the license can be found in the
493 LICENSE file included with this module.
494
495
496 =head1 SEE ALSO
497
498 L<http://www.openisis.org|OpenIsis>, perl(1).
499

  ViewVC Help
Powered by ViewVC 1.1.26