--- isis2stream.pl 2002/06/16 19:55:19 1.9 +++ isis2stream.pl 2002/10/24 16:32:00 1.22 @@ -8,25 +8,13 @@ my %opts; -getopt('dm', \%opts); +getopts('d:m:q', \%opts); die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts); my $db_dir = $opts{d}; -mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir"); -mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data"); - -my $dir="$common::install_dir/$db_dir/data"; - - -open(S,"> $dir/stream") || die "can't open output $dir/stream: $!"; -open(R,"> $dir/bib") || die "can't open output $dir/bib: $!"; -open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!"; -#open(MPS,"> /tmp/mpsindex") || die "mps: $!"; - -print S $common::mps_header; -print MPS $common::mps_header; +my $dir = open_data_files($db_dir); #-------------------------------------------------------------------- # init array in_mps_header for config checks later @@ -41,6 +29,22 @@ require "./search/config.pm"; #-------------------------------------------------------------------- +# read database configuration, store database names +open(CF,$common::database_cf) || die "$common::database_cf: $!"; +my %DatabaseDescriptions; +while() { + chomp; + if (/^database-name:([^=]+)=(.*)$/) { + my ($db_name,$db_desc) = ($1,$2); + $db_desc=~s/^##\w+##//g; + # c_iso_852 is a cludge so that output format would be + # correct 8859-2 again... + $DatabaseDescriptions{$db_name}=c_iso_852($db_desc); + } +} +close(CF); + +#-------------------------------------------------------------------- # # expand(nr,"space separated string"); # @@ -54,7 +58,7 @@ foreach my $w (@words) { # FIX: this should be replaced by stemmer! #$w =~ tr/ƾ/sSdDcCcCzZ/; - $w =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/; + $w =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'".'',"'Rr/; $w =~ s//ss/g; $out .= "W $w $nr\n"; } @@ -70,10 +74,16 @@ return $tmp; } +sub c_iso_852 { + my $tmp = $_[0]; + $tmp =~ tr/ܫꔼȺ̪㍐슂ٝ// if ($tmp); + return $tmp; +} + sub c_852_czs { my $tmp = $_[0]; $tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; - $tmp =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/; + $tmp =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'".'',"'Rr/; $tmp =~ s//ss/g; return $tmp; } @@ -116,7 +126,7 @@ my @sf_arr = subfields_str_2_arr($subfields,$sf_hash); foreach (@sf_arr) { - $out.=mps_expand($mps_id,$sf_hash->{$_}); + $out.=mps_expand($mps_id,c_852_iso($sf_hash->{$_})); } return $out; } @@ -142,15 +152,15 @@ if (scalar keys %{$sf_hash} > 0) { if ($subfields) { foreach (split(//,$subfields)) { - $out.=mps_expand($mps_id,$sf_hash->{$_}); + $out.=mps_expand($mps_id,c_852_iso($sf_hash->{$_})); } } else { foreach (keys %{$sf_hash}) { - $out.=mps_expand($mps_id,$sf_hash->{$_}); + $out.=mps_expand($mps_id,c_852_iso($sf_hash->{$_})); } } } else { - $out.=mps_expand($mps_id,$row->{$isis_id}->[$i]); + $out.=mps_expand($mps_id,c_852_iso($row->{$isis_id}->[$i])); } $i++; } @@ -289,6 +299,12 @@ if (-e "$common::isis_data/$db_dir/$_/PERI") { push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI"; } + if (-e "$common::isis_data/$db_dir/$_/AMS") { + push @isis_dbs,"$common::isis_data/$db_dir/$_/AMS/AMS"; + } + if (-e "$common::isis_data/$db_dir/$_/ARTI") { +# push @isis_dbs,"$common::isis_data/$db_dir/$_/ARTI/ARTI"; + } } foreach my $isis_db (@isis_dbs) { @@ -297,6 +313,20 @@ my $db = OpenIsis::open( "$isis_db" ); + if (! defined $db) { + die "can't open '$isis_db'"; + } + + my $tip = $isis_db; $tip =~ s/^.+?\/([^\/]+)$/$1/; + if (defined $default::tip{$tip}) { + $tip=$default::tip{$tip}; + } elsif ($tip eq "AMS") { + $tip=$default::tip{'LIBRI'}; + } else { + die "can't find tip for database '$isis_db'"; + } + $tip = c_iso_852($tip); + my $max_rowid = OpenIsis::maxRowid( $db ); my $last_pcnt = 0; @@ -305,25 +335,45 @@ my $row = OpenIsis::read( $db, $row_id ); if (my $tmp = $row->{'200'}->[0]) { - my $bib = "%MFN $row->{mfn}\n"; + my $bib; my $mps = "W $row->{mfn} 14\n"; + # tip gradje + $mps .= "W ".c_852_czs($tip)." 17\n"; + $bib .= "%tip $tip\n"; + my $pcnt = int($row->{mfn} * 100 / $max_rowid); if ($pcnt != $last_pcnt) { - printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt); + printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt) if (! $opts{q}); $last_pcnt = $pcnt; } my $headline; - $headline .= isis_sf($row,'200','a',"'"); - $headline .= isis_sf($row,'200','e'," : ","'"); + $headline .= isis_sf($row,'200','a'); + $headline .= isis_sf($row,'200','e'," : "); + $headline .= isis_sf($row,'200','f'," / "); + $headline .= isis_sf($row,'210','d'," , "); + + # remove newlines, compress spaces + $headline =~ s/[\n\r]//g; + $headline =~ s/^\s+//g; + $headline =~ s/\s+$//g; # author - $bib .= isis_to_bib($row,'700','%700+','*',2,'<'); - $bib .= isis_to_bib($row,'701','%700+','*',2,'<'); - $bib .= isis_to_bib($row,'710','%700+','*',2,'<'); - $bib .= isis_to_bib($row,'711','%700+','*',2,'<'); - $bib .= isis_to_bib($row,'503','%700+','*',2,'<'); + $bib .= isis_to_bib($row,'700','%700+','ab',undef,'>',', '); + $bib .= isis_to_bib($row,'701','%700+','ab',undef,'>',', '); + $bib .= isis_to_bib($row,'701','%700+','cd',undef,'>',', '); + + my $tmp; + $tmp = isis_sf($row,'710','abc', '', '', (' : ',', ')); + $tmp .= isis_sf($row,'710','dfe', ' (', ')', ('', ' ; ',' ; ')); + $bib .= "%700+ $tmp\n" if ($tmp); + + $tmp = isis_sf($row,'711','abc', '', '', (' : ',', ')); + $tmp .= isis_sf($row,'711','dfe', ' (', ')', ('', ' ; ',' ; ')); + $bib .= "%700+ $tmp\n" if ($tmp); + + $bib .= isis_to_bib($row,'503','%700+','ab',undef,'>',', '); $mps .= isis_to_mps($row,'700',1); $mps .= isis_to_mps($row,'701',1); @@ -359,7 +409,6 @@ $mps .= isis_to_mps($row,'233',2,"ae"); - my $tmp; $tmp = isis_sf($row,'230','v'). isis_sf($row,'230','a',' : '). isis_sf($row,'250',undef,'. - '). @@ -392,9 +441,6 @@ $mps .= isis_to_mps($row,'272',2); $mps .= isis_to_mps($row,'273',2); - $headline .= isis_sf($row,'700','b'," "); - $headline .= isis_sf($row,'700','a'," "); - # izdavac $mps .= isis_to_mps($row,'210',3); $mps .= isis_to_mps($row,'250',3); @@ -413,8 +459,8 @@ if (my $year = isis_sf($row,'210','d')) { $year =~ s/^\s*cop\.*\s*//i; $year =~ s/[\[\]]*//g; + $year =~ s/[\n\r]//g; # remove cr $mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/); - $headline .= " ($year)"; } $mps .= isis_to_mps($row,'215',15); @@ -448,9 +494,9 @@ $bib .= isis_to_bib($row,'610','%610'); $mps .= isis_to_mps($row,'610',8); - $bib .= isis_to_bib($row,'675','%675+'); + $bib .= isis_to_bib($row,'675','%675+','a'); $mps .= isis_to_mps($row,'675',9); - $bib .= isis_to_bib($row,'686','%675+'); + $bib .= isis_to_bib($row,'686','%675+','a'); $mps .= isis_to_mps($row,'686',10); $bib .= isis_to_bib($row,'990','%990'); @@ -459,6 +505,12 @@ $bib .= isis_to_bib($row,'991','%991'); $mps .= isis_to_mps($row,'991',12); + # Jezik + $bib .= isis_to_bib($row,'101','%101'); + $mps .= isis_to_mps($row,'101',16); + # Pismo + $bib .= isis_to_bib($row,'998','%101', 'a'); + sub isis_isn_to_mps { my $row = shift @_ || die; my $isis_id = shift @_ || die; @@ -467,6 +519,7 @@ my $mps=''; while (my $isn=$row->{$isis_id}->[$i]) { $isn =~ s/ +//g; # remove spaces + $isn =~ s/[\n\r]//g; # remove cr $mps .= "W $isn $nr\n"; if ($isn =~ s/-//g) { $mps .= "W $isn $nr\n"; @@ -485,15 +538,27 @@ $mps .= isis_isn_to_mps($row,'293',13); # ISSN - #store_isn($row->{11}->[0],14,'%ISSN'); + $bib .= isis_to_bib($row,'11','%ISSN'); + $mps .= isis_isn_to_mps($row,'11',13); $mps .= isis_to_mps($row,'532',1); - $bib .= isis_to_bib($row,'994','%994a','a'); + # Casopisi + $tmp = isis_to_bib($row,'326','%326'); + $tmp =~ s/g1/godinjak/; + $tmp =~ s/g6/dvomjesenik/; + $tmp =~ s/10/godinje 10 brojeva/; + $tmp =~ s/m1/mjesenik/; + $tmp =~ s/m2/polumjesenik/; + $tmp =~ s/nr/neredovito/; + $tmp =~ s/g4/etiri puta godinje/; + $bib .= c_iso_852($tmp); + $bib .= isis_to_bib($row,'992','%992'); + $bib .= '%knjiz '.$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}."\n"; # headline if ($headline) { - $headline .= " [".$row->{mfn}."]"; ## debug MFN! + $headline .= " (".$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}.")"; ## debug MFN! $headline =~ s/&/∧/g; $headline =~ s//>/g; @@ -528,7 +593,6 @@ $mps .= "E\n"; - print S $mps; print MPS $mps; }