--- isis2stream.pl 2002/06/14 18:24:05 1.2 +++ isis2stream.pl 2002/06/16 18:11:14 1.7 @@ -3,51 +3,30 @@ use strict; use OpenIsis; use Getopt::Std; -#use Data::Dumper; - -my $install_dir="/local/index"; -my $mpsindex="/local/mps-5.3/bin/mpsindex -l 9 -b"; -my $isis_data="/var/autofs/misc/isis_data/"; -#my $isis_data="/mnt/20020606/Isis/Data/"; # doma +use Data::Dumper; +use common; my %opts; -getopt('dD', \%opts); +getopt('dm', \%opts); -die "usage: $0 -d [database_dir] " if (! $opts{d}); +die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts); my $db_dir = $opts{d}; -mkdir "$install_dir/$db_dir" if (!-e "$install_dir/$db_dir"); -mkdir "$install_dir/$db_dir/data" if (!-e "$install_dir/$db_dir/data"); +mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir"); +mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data"); -my $dir="$install_dir/$db_dir/data"; +my $dir="$common::install_dir/$db_dir/data"; -open(S,"> $dir/stream") || die "can't open output $dir/stram: $!"; +open(S,"> $dir/stream") || die "can't open output $dir/stream: $!"; open(R,"> $dir/bib") || die "can't open output $dir/bib: $!"; -open(MPS,"| $mpsindex -d $install_dir/$db_dir -autokey") || die "can't start MPS indexer $mpsindex: $!"; +open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!"; #open(MPS,"> /tmp/mpsindex") || die "mps: $!"; -my $s="V 5 3 -L hr-HR -F 700+ 1 Autor -F 200+ 2 Naslov -F 210 3 Izdavanje -F 225 4 Nakladnička cjelina -F 300+ 5 Napomene -F 330 6 Sadržaj -F 464 7 Analitički radovi -F 610 8 Ključne riječi -F 675 9 UDK -F 686 10 CC -F 990 11 Signatura -F 991 12 Inventarni broj -F 10 13 ISBN -"; - -print S $s; -print MPS $s; +print S $common::mps_header; +print MPS $common::mps_header; # # expand(nr,"space separated string"); @@ -177,27 +156,6 @@ } #-------------------------------------------------------------------- -# -# mps_expand(nr,"space separated string"); -# - -sub mps_expand { - my $nr = shift @_; - my $out = ""; - while (my $fld = shift @_) { - if ($fld =~ s/\s*[,;\.!?'"<>\[\]]*\s+/ /g) { - foreach my $w (split(/\s+/,$fld)) { - # FIX: this should be replaced by stemmer! - $out .= "W $w $nr\n"; - } - } else { - $out .= "W $fld $nr\n"; - } - } - return c_852_czs($out); -} - -#-------------------------------------------------------------------- # $mps .= sf_to_mps($subfiled_hash,"subfields",mps_id); # @@ -349,7 +307,7 @@ sub isis_sf { my $row = shift @_; my $isis_id = shift @_; - my $subfield = shift @_ || 'a'; + my $subfield = shift @_; my $prefix = shift @_ || ''; my $postfix = shift @_ || ''; @@ -357,7 +315,10 @@ if ($row->{$isis_id}->[0]) { my $sf = OpenIsis::subfields($row->{$isis_id}->[0]); - if (length($subfield) == 1) { + if (! defined $subfield || length($subfield) == 0) { + # subfield list undef, empty or no defined subfields for this record + return $prefix . $row->{$isis_id}->[0] . $postfix; + } elsif (length($subfield) == 1) { if ($sf->{$subfield}) { return $prefix . $sf->{$subfield} . $postfix; } else { @@ -386,18 +347,39 @@ my $last_tell=0; -my $db = OpenIsis::open( "$isis_data/$db_dir/LIBRI/LIBRI" ); +my @isis_dirs = ( '.' ); # use dirname as database name -my $max_rowid = OpenIsis::maxRowid( $db ); +if ($opts{m}) { + @isis_dirs = split(/,/,$opts{m}); +} -my $last_pcnt = 0; +my @isis_dbs; -for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) { +foreach (@isis_dirs) { + if (-e "$common::isis_data/$db_dir/$_/LIBRI") { + push @isis_dbs,"$common::isis_data/$db_dir/$_/LIBRI/LIBRI"; + } + if (-e "$common::isis_data/$db_dir/$_/PERI") { + push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI"; + } +} + +foreach my $isis_db (@isis_dbs) { + + print MPS "M reading ISIS from '$isis_db'...\n"; + + my $db = OpenIsis::open( "$isis_db" ); + + my $max_rowid = OpenIsis::maxRowid( $db ); + + my $last_pcnt = 0; + + for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) { my $row = OpenIsis::read( $db, $row_id ); if (my $tmp = $row->{'200'}->[0]) { my $bib = "%MFN $row->{mfn}\n"; - my $mps; + my $mps = "W $row->{mfn} 14\n"; my $pcnt = int($row->{mfn} * 100 / $max_rowid); if ($pcnt != $last_pcnt) { @@ -422,6 +404,7 @@ $mps .= isis_to_mps($row,'711',1); $mps .= isis_to_mps($row,'503',1); $mps .= isis_to_mps($row,'702',1); + $mps .= isis_to_mps($row,'200',1,"fg"); $bib .= isis_to_bib($row,'205','%205'); @@ -443,11 +426,51 @@ $mps .= isis_to_mps($row,'532',2); $mps .= isis_to_mps($row,'424',2); + $mps .= isis_to_mps($row,'230',2,"ae"); + $mps .= isis_to_mps($row,'231',2,"ae"); + $mps .= isis_to_mps($row,'232',2,"ae"); + $mps .= isis_to_mps($row,'233',2,"ae"); + + + $bib .= "%sv ".isis_sf($row,'230','v'). + isis_sf($row,'230','a',' : '). + isis_sf($row,'250',undef,'. - '). + isis_sf($row,'260',undef,'. - '). + isis_sf($row,'290',undef,'
ISBN '). + "\n"; + $bib .= "%sv ".isis_sf($row,'231','v','
'). + isis_sf($row,'231','a',' : '). + isis_sf($row,'251',undef,'. - '). + isis_sf($row,'261',undef,'. - '). + isis_sf($row,'291',undef,'
ISBN '). + "\n"; + $bib .= "%sv ".isis_sf($row,'232','v','
'). + isis_sf($row,'232','a',' : '). + isis_sf($row,'252',undef,'. - '). + isis_sf($row,'262',undef,'. - '). + isis_sf($row,'292',undef,'
ISBN '). + "\n"; + $bib .= "%sv ".isis_sf($row,'233','v','
'). + isis_sf($row,'233','a',' : '). + isis_sf($row,'253',undef,'. - '). + isis_sf($row,'263',undef,'. - '). + isis_sf($row,'293',undef,'
ISBN '). + "\n"; + + $mps .= isis_to_mps($row,'270',2); + $mps .= isis_to_mps($row,'271',2); + $mps .= isis_to_mps($row,'272',2); + $mps .= isis_to_mps($row,'273',2); + $headline .= isis_sf($row,'700','b'," "); $headline .= isis_sf($row,'700','a'," "); # izdavac $mps .= isis_to_mps($row,'210',3); + $mps .= isis_to_mps($row,'250',3); + $mps .= isis_to_mps($row,'251',3); + $mps .= isis_to_mps($row,'252',3); + $mps .= isis_to_mps($row,'253',3); # if (my $sf = OpenIsis::subfields($row->{'210'}->[0])) { # my $tmp; # $tmp .= $sf->{a} if ($sf->{a}); @@ -455,7 +478,7 @@ # $tmp .= ", ".$sf->{d} if ($sf->{d}); # $bib .= "%210 $tmp\n" if ($tmp); # } - $bib .= "%210 ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n"; + $bib .= "%210+ ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n"; if (my $year = isis_sf($row,'210','d')) { $year =~ s/^\s*cop\.*\s*//i; @@ -464,7 +487,12 @@ $headline .= " ($year)"; } - $bib .= isis_to_bib($row,'215','%215', '*', undef, undef, ', '); + $mps .= isis_to_mps($row,'215',15); + $mps .= isis_to_mps($row,'260',15); + $mps .= isis_to_mps($row,'261',15); + $mps .= isis_to_mps($row,'262',15); + $mps .= isis_to_mps($row,'263',15); + $bib .= isis_to_bib($row,'215','%215+', '*', undef, undef, ', '); # $bib .= isis_to_bib($row,'225','%225', 'aehivw'); $bib .= "%225 ".isis_sf($row,'225','aevhiw', '(',')', ('',' : ',' ; ','. ',', ',' ; '))."\n"; @@ -476,6 +504,10 @@ $mps .= isis_to_mps($row,'300',5); $mps .= isis_to_mps($row,'320',5); $mps .= isis_to_mps($row,'327',5); + $mps .= isis_to_mps($row,'280',5); + $mps .= isis_to_mps($row,'281',5); + $mps .= isis_to_mps($row,'282',5); + $mps .= isis_to_mps($row,'283',5); $bib .= isis_to_bib($row,'330','%330'); $mps .= isis_to_mps($row,'330',6); @@ -497,15 +529,28 @@ $bib .= isis_to_bib($row,'991','%991'); $mps .= isis_to_mps($row,'991',12); - # ISBN - if (my $isbn = $row->{10}->[0]) { - $isbn =~ s/ +//g; # remove spaces - $mps .= "W $isbn 13\n"; - $bib .= "%ISBN $isbn\n"; - $isbn =~ s/-//g; - $mps .= "W $isbn 13\n"; + sub store_isn { + if (my $isn = shift @_) { + my $nr = shift @_; + my $tag = shift @_; + $isn =~ s/ +//g; # remove spaces + $mps .= "W $isn $nr\n"; + $bib .= "$tag $isn\n"; + if ($isn =~ s/-//g) { + $mps .= "W $isn $nr\n"; + } + } } - $mps .= isis_to_mps($row,'10',12); + + # ISBN + store_isn($row->{10}->[0],13,'%ISBN'); + $mps .= isis_to_mps($row,'290',13); + $mps .= isis_to_mps($row,'291',13); + $mps .= isis_to_mps($row,'292',13); + $mps .= isis_to_mps($row,'293',13); + + # ISSN + #store_isn($row->{11}->[0],14,'%ISSN'); $mps .= isis_to_mps($row,'532',1); @@ -528,6 +573,7 @@ #} print R c_852_iso($bib); + print R "%perl ".Dumper($row)."\n"; $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n"; $last_tell=tell(R); @@ -540,6 +586,7 @@ print S $mps; print MPS $mps; } + } } print S "M over and out\nX\n"; print MPS "M over and out\nX\n";