--- isis2stream.pl 2002/06/14 18:24:05 1.2
+++ isis2stream.pl 2002/06/16 18:11:14 1.7
@@ -3,51 +3,30 @@
use strict;
use OpenIsis;
use Getopt::Std;
-#use Data::Dumper;
-
-my $install_dir="/local/index";
-my $mpsindex="/local/mps-5.3/bin/mpsindex -l 9 -b";
-my $isis_data="/var/autofs/misc/isis_data/";
-#my $isis_data="/mnt/20020606/Isis/Data/"; # doma
+use Data::Dumper;
+use common;
my %opts;
-getopt('dD', \%opts);
+getopt('dm', \%opts);
-die "usage: $0 -d [database_dir] " if (! $opts{d});
+die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts);
my $db_dir = $opts{d};
-mkdir "$install_dir/$db_dir" if (!-e "$install_dir/$db_dir");
-mkdir "$install_dir/$db_dir/data" if (!-e "$install_dir/$db_dir/data");
+mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir");
+mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data");
-my $dir="$install_dir/$db_dir/data";
+my $dir="$common::install_dir/$db_dir/data";
-open(S,"> $dir/stream") || die "can't open output $dir/stram: $!";
+open(S,"> $dir/stream") || die "can't open output $dir/stream: $!";
open(R,"> $dir/bib") || die "can't open output $dir/bib: $!";
-open(MPS,"| $mpsindex -d $install_dir/$db_dir -autokey") || die "can't start MPS indexer $mpsindex: $!";
+open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!";
#open(MPS,"> /tmp/mpsindex") || die "mps: $!";
-my $s="V 5 3
-L hr-HR
-F 700+ 1 Autor
-F 200+ 2 Naslov
-F 210 3 Izdavanje
-F 225 4 Nakladnička cjelina
-F 300+ 5 Napomene
-F 330 6 Sadržaj
-F 464 7 Analitički radovi
-F 610 8 Ključne riječi
-F 675 9 UDK
-F 686 10 CC
-F 990 11 Signatura
-F 991 12 Inventarni broj
-F 10 13 ISBN
-";
-
-print S $s;
-print MPS $s;
+print S $common::mps_header;
+print MPS $common::mps_header;
#
# expand(nr,"space separated string");
@@ -177,27 +156,6 @@
}
#--------------------------------------------------------------------
-#
-# mps_expand(nr,"space separated string");
-#
-
-sub mps_expand {
- my $nr = shift @_;
- my $out = "";
- while (my $fld = shift @_) {
- if ($fld =~ s/\s*[,;\.!?'"<>\[\]]*\s+/ /g) {
- foreach my $w (split(/\s+/,$fld)) {
- # FIX: this should be replaced by stemmer!
- $out .= "W $w $nr\n";
- }
- } else {
- $out .= "W $fld $nr\n";
- }
- }
- return c_852_czs($out);
-}
-
-#--------------------------------------------------------------------
# $mps .= sf_to_mps($subfiled_hash,"subfields",mps_id);
#
@@ -349,7 +307,7 @@
sub isis_sf {
my $row = shift @_;
my $isis_id = shift @_;
- my $subfield = shift @_ || 'a';
+ my $subfield = shift @_;
my $prefix = shift @_ || '';
my $postfix = shift @_ || '';
@@ -357,7 +315,10 @@
if ($row->{$isis_id}->[0]) {
my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
- if (length($subfield) == 1) {
+ if (! defined $subfield || length($subfield) == 0) {
+ # subfield list undef, empty or no defined subfields for this record
+ return $prefix . $row->{$isis_id}->[0] . $postfix;
+ } elsif (length($subfield) == 1) {
if ($sf->{$subfield}) {
return $prefix . $sf->{$subfield} . $postfix;
} else {
@@ -386,18 +347,39 @@
my $last_tell=0;
-my $db = OpenIsis::open( "$isis_data/$db_dir/LIBRI/LIBRI" );
+my @isis_dirs = ( '.' ); # use dirname as database name
-my $max_rowid = OpenIsis::maxRowid( $db );
+if ($opts{m}) {
+ @isis_dirs = split(/,/,$opts{m});
+}
-my $last_pcnt = 0;
+my @isis_dbs;
-for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
+foreach (@isis_dirs) {
+ if (-e "$common::isis_data/$db_dir/$_/LIBRI") {
+ push @isis_dbs,"$common::isis_data/$db_dir/$_/LIBRI/LIBRI";
+ }
+ if (-e "$common::isis_data/$db_dir/$_/PERI") {
+ push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI";
+ }
+}
+
+foreach my $isis_db (@isis_dbs) {
+
+ print MPS "M reading ISIS from '$isis_db'...\n";
+
+ my $db = OpenIsis::open( "$isis_db" );
+
+ my $max_rowid = OpenIsis::maxRowid( $db );
+
+ my $last_pcnt = 0;
+
+ for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
my $row = OpenIsis::read( $db, $row_id );
if (my $tmp = $row->{'200'}->[0]) {
my $bib = "%MFN $row->{mfn}\n";
- my $mps;
+ my $mps = "W $row->{mfn} 14\n";
my $pcnt = int($row->{mfn} * 100 / $max_rowid);
if ($pcnt != $last_pcnt) {
@@ -422,6 +404,7 @@
$mps .= isis_to_mps($row,'711',1);
$mps .= isis_to_mps($row,'503',1);
$mps .= isis_to_mps($row,'702',1);
+ $mps .= isis_to_mps($row,'200',1,"fg");
$bib .= isis_to_bib($row,'205','%205');
@@ -443,11 +426,51 @@
$mps .= isis_to_mps($row,'532',2);
$mps .= isis_to_mps($row,'424',2);
+ $mps .= isis_to_mps($row,'230',2,"ae");
+ $mps .= isis_to_mps($row,'231',2,"ae");
+ $mps .= isis_to_mps($row,'232',2,"ae");
+ $mps .= isis_to_mps($row,'233',2,"ae");
+
+
+ $bib .= "%sv ".isis_sf($row,'230','v').
+ isis_sf($row,'230','a',' : ').
+ isis_sf($row,'250',undef,'. - ').
+ isis_sf($row,'260',undef,'. - ').
+ isis_sf($row,'290',undef,'
ISBN ').
+ "\n";
+ $bib .= "%sv ".isis_sf($row,'231','v','
').
+ isis_sf($row,'231','a',' : ').
+ isis_sf($row,'251',undef,'. - ').
+ isis_sf($row,'261',undef,'. - ').
+ isis_sf($row,'291',undef,'
ISBN ').
+ "\n";
+ $bib .= "%sv ".isis_sf($row,'232','v','
').
+ isis_sf($row,'232','a',' : ').
+ isis_sf($row,'252',undef,'. - ').
+ isis_sf($row,'262',undef,'. - ').
+ isis_sf($row,'292',undef,'
ISBN ').
+ "\n";
+ $bib .= "%sv ".isis_sf($row,'233','v','
').
+ isis_sf($row,'233','a',' : ').
+ isis_sf($row,'253',undef,'. - ').
+ isis_sf($row,'263',undef,'. - ').
+ isis_sf($row,'293',undef,'
ISBN ').
+ "\n";
+
+ $mps .= isis_to_mps($row,'270',2);
+ $mps .= isis_to_mps($row,'271',2);
+ $mps .= isis_to_mps($row,'272',2);
+ $mps .= isis_to_mps($row,'273',2);
+
$headline .= isis_sf($row,'700','b'," ");
$headline .= isis_sf($row,'700','a'," ");
# izdavac
$mps .= isis_to_mps($row,'210',3);
+ $mps .= isis_to_mps($row,'250',3);
+ $mps .= isis_to_mps($row,'251',3);
+ $mps .= isis_to_mps($row,'252',3);
+ $mps .= isis_to_mps($row,'253',3);
# if (my $sf = OpenIsis::subfields($row->{'210'}->[0])) {
# my $tmp;
# $tmp .= $sf->{a} if ($sf->{a});
@@ -455,7 +478,7 @@
# $tmp .= ", ".$sf->{d} if ($sf->{d});
# $bib .= "%210 $tmp\n" if ($tmp);
# }
- $bib .= "%210 ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n";
+ $bib .= "%210+ ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n";
if (my $year = isis_sf($row,'210','d')) {
$year =~ s/^\s*cop\.*\s*//i;
@@ -464,7 +487,12 @@
$headline .= " ($year)";
}
- $bib .= isis_to_bib($row,'215','%215', '*', undef, undef, ', ');
+ $mps .= isis_to_mps($row,'215',15);
+ $mps .= isis_to_mps($row,'260',15);
+ $mps .= isis_to_mps($row,'261',15);
+ $mps .= isis_to_mps($row,'262',15);
+ $mps .= isis_to_mps($row,'263',15);
+ $bib .= isis_to_bib($row,'215','%215+', '*', undef, undef, ', ');
# $bib .= isis_to_bib($row,'225','%225', 'aehivw');
$bib .= "%225 ".isis_sf($row,'225','aevhiw', '(',')', ('',' : ',' ; ','. ',', ',' ; '))."\n";
@@ -476,6 +504,10 @@
$mps .= isis_to_mps($row,'300',5);
$mps .= isis_to_mps($row,'320',5);
$mps .= isis_to_mps($row,'327',5);
+ $mps .= isis_to_mps($row,'280',5);
+ $mps .= isis_to_mps($row,'281',5);
+ $mps .= isis_to_mps($row,'282',5);
+ $mps .= isis_to_mps($row,'283',5);
$bib .= isis_to_bib($row,'330','%330');
$mps .= isis_to_mps($row,'330',6);
@@ -497,15 +529,28 @@
$bib .= isis_to_bib($row,'991','%991');
$mps .= isis_to_mps($row,'991',12);
- # ISBN
- if (my $isbn = $row->{10}->[0]) {
- $isbn =~ s/ +//g; # remove spaces
- $mps .= "W $isbn 13\n";
- $bib .= "%ISBN $isbn\n";
- $isbn =~ s/-//g;
- $mps .= "W $isbn 13\n";
+ sub store_isn {
+ if (my $isn = shift @_) {
+ my $nr = shift @_;
+ my $tag = shift @_;
+ $isn =~ s/ +//g; # remove spaces
+ $mps .= "W $isn $nr\n";
+ $bib .= "$tag $isn\n";
+ if ($isn =~ s/-//g) {
+ $mps .= "W $isn $nr\n";
+ }
+ }
}
- $mps .= isis_to_mps($row,'10',12);
+
+ # ISBN
+ store_isn($row->{10}->[0],13,'%ISBN');
+ $mps .= isis_to_mps($row,'290',13);
+ $mps .= isis_to_mps($row,'291',13);
+ $mps .= isis_to_mps($row,'292',13);
+ $mps .= isis_to_mps($row,'293',13);
+
+ # ISSN
+ #store_isn($row->{11}->[0],14,'%ISSN');
$mps .= isis_to_mps($row,'532',1);
@@ -528,6 +573,7 @@
#}
print R c_852_iso($bib);
+ print R "%perl ".Dumper($row)."\n";
$mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
$last_tell=tell(R);
@@ -540,6 +586,7 @@
print S $mps;
print MPS $mps;
}
+ }
}
print S "M over and out\nX\n";
print MPS "M over and out\nX\n";