--- isis2stream.pl 2002/06/16 19:39:42 1.8 +++ isis2stream.pl 2002/09/23 18:46:21 1.21 @@ -8,7 +8,7 @@ my %opts; -getopt('dm', \%opts); +getopts('d:m:q', \%opts); die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts); @@ -41,6 +41,22 @@ require "./search/config.pm"; #-------------------------------------------------------------------- +# read database configuration, store database names +open(CF,$common::database_cf) || die "$common::database_cf: $!"; +my %DatabaseDescriptions; +while() { + chomp; + if (/^database-name:([^=]+)=(.*)$/) { + my ($db_name,$db_desc) = ($1,$2); + $db_desc=~s/^##\w+##//g; + # c_iso_852 is a cludge so that output format would be + # correct 8859-2 again... + $DatabaseDescriptions{$db_name}=c_iso_852($db_desc); + } +} +close(CF); + +#-------------------------------------------------------------------- # # expand(nr,"space separated string"); # @@ -54,7 +70,7 @@ foreach my $w (@words) { # FIX: this should be replaced by stemmer! #$w =~ tr/ƾ/sSdDcCcCzZ/; - $w =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/; + $w =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'".'',"'Rr/; $w =~ s//ss/g; $out .= "W $w $nr\n"; } @@ -70,10 +86,16 @@ return $tmp; } +sub c_iso_852 { + my $tmp = $_[0]; + $tmp =~ tr/ܫꔼȺ̪㍐슂ٝ// if ($tmp); + return $tmp; +} + sub c_852_czs { my $tmp = $_[0]; $tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; - $tmp =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/; + $tmp =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'".'',"'Rr/; $tmp =~ s//ss/g; return $tmp; } @@ -289,6 +311,12 @@ if (-e "$common::isis_data/$db_dir/$_/PERI") { push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI"; } + if (-e "$common::isis_data/$db_dir/$_/AMS") { + push @isis_dbs,"$common::isis_data/$db_dir/$_/AMS/AMS"; + } + if (-e "$common::isis_data/$db_dir/$_/ARTI") { +# push @isis_dbs,"$common::isis_data/$db_dir/$_/ARTI/ARTI"; + } } foreach my $isis_db (@isis_dbs) { @@ -297,6 +325,20 @@ my $db = OpenIsis::open( "$isis_db" ); + if (! defined $db) { + die "can't open '$isis_db'"; + } + + my $tip = $isis_db; $tip =~ s/^.+?\/([^\/]+)$/$1/; + if (defined $default::tip{$tip}) { + $tip=$default::tip{$tip}; + } elsif ($tip eq "AMS") { + $tip=$default::tip{'LIBRI'}; + } else { + die "can't find tip for database '$isis_db'"; + } + $tip = c_iso_852($tip); + my $max_rowid = OpenIsis::maxRowid( $db ); my $last_pcnt = 0; @@ -305,25 +347,45 @@ my $row = OpenIsis::read( $db, $row_id ); if (my $tmp = $row->{'200'}->[0]) { - my $bib = "%MFN $row->{mfn}\n"; + my $bib; my $mps = "W $row->{mfn} 14\n"; + # tip gradje + $mps .= "W ".c_852_czs($tip)." 17\n"; + $bib .= "%tip $tip\n"; + my $pcnt = int($row->{mfn} * 100 / $max_rowid); if ($pcnt != $last_pcnt) { - printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt); + printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt) if (! $opts{q}); $last_pcnt = $pcnt; } my $headline; - $headline .= isis_sf($row,'200','a',"'"); - $headline .= isis_sf($row,'200','e'," : ","'"); + $headline .= isis_sf($row,'200','a'); + $headline .= isis_sf($row,'200','e'," : "); + $headline .= isis_sf($row,'200','f'," / "); + $headline .= isis_sf($row,'210','d'," , "); + + # remove newlines, compress spaces + $headline =~ s/[\n\r]//g; + $headline =~ s/^\s+//g; + $headline =~ s/\s+$//g; # author - $bib .= isis_to_bib($row,'700','%700+','*',2,'<'); - $bib .= isis_to_bib($row,'701','%700+','*',2,'<'); - $bib .= isis_to_bib($row,'710','%700+','*',2,'<'); - $bib .= isis_to_bib($row,'711','%700+','*',2,'<'); - $bib .= isis_to_bib($row,'503','%700+','*',2,'<'); + $bib .= isis_to_bib($row,'700','%700+','ab',undef,'>',', '); + $bib .= isis_to_bib($row,'701','%700+','ab',undef,'>',', '); + $bib .= isis_to_bib($row,'701','%700+','cd',undef,'>',', '); + + my $tmp; + $tmp = isis_sf($row,'710','abc', '', '', (' : ',', ')); + $tmp .= isis_sf($row,'710','dfe', ' (', ')', ('', ' ; ',' ; ')); + $bib .= "%700+ $tmp\n" if ($tmp); + + $tmp = isis_sf($row,'711','abc', '', '', (' : ',', ')); + $tmp .= isis_sf($row,'711','dfe', ' (', ')', ('', ' ; ',' ; ')); + $bib .= "%700+ $tmp\n" if ($tmp); + + $bib .= isis_to_bib($row,'503','%700+','ab',undef,'>',', '); $mps .= isis_to_mps($row,'700',1); $mps .= isis_to_mps($row,'701',1); @@ -359,7 +421,6 @@ $mps .= isis_to_mps($row,'233',2,"ae"); - my $tmp; $tmp = isis_sf($row,'230','v'). isis_sf($row,'230','a',' : '). isis_sf($row,'250',undef,'. - '). @@ -392,9 +453,6 @@ $mps .= isis_to_mps($row,'272',2); $mps .= isis_to_mps($row,'273',2); - $headline .= isis_sf($row,'700','b'," "); - $headline .= isis_sf($row,'700','a'," "); - # izdavac $mps .= isis_to_mps($row,'210',3); $mps .= isis_to_mps($row,'250',3); @@ -413,8 +471,8 @@ if (my $year = isis_sf($row,'210','d')) { $year =~ s/^\s*cop\.*\s*//i; $year =~ s/[\[\]]*//g; + $year =~ s/[\n\r]//g; # remove cr $mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/); - $headline .= " ($year)"; } $mps .= isis_to_mps($row,'215',15); @@ -448,9 +506,9 @@ $bib .= isis_to_bib($row,'610','%610'); $mps .= isis_to_mps($row,'610',8); - $bib .= isis_to_bib($row,'675','%675+'); + $bib .= isis_to_bib($row,'675','%675+','a'); $mps .= isis_to_mps($row,'675',9); - $bib .= isis_to_bib($row,'686','%675+'); + $bib .= isis_to_bib($row,'686','%675+','a'); $mps .= isis_to_mps($row,'686',10); $bib .= isis_to_bib($row,'990','%990'); @@ -459,36 +517,60 @@ $bib .= isis_to_bib($row,'991','%991'); $mps .= isis_to_mps($row,'991',12); - sub store_isn { - if (my $isn = shift @_) { - my $nr = shift @_; - my $tag = shift @_; + # Jezik + $bib .= isis_to_bib($row,'101','%101'); + $mps .= isis_to_mps($row,'101',16); + # Pismo + $bib .= isis_to_bib($row,'998','%101', 'a'); + + sub isis_isn_to_mps { + my $row = shift @_ || die; + my $isis_id = shift @_ || die; + my $nr = shift @_ || die; + my $i=0; + my $mps=''; + while (my $isn=$row->{$isis_id}->[$i]) { $isn =~ s/ +//g; # remove spaces + $isn =~ s/[\n\r]//g; # remove cr $mps .= "W $isn $nr\n"; - $bib .= "$tag $isn\n"; if ($isn =~ s/-//g) { $mps .= "W $isn $nr\n"; } + $i++; } + return $mps; } # ISBN - store_isn($row->{10}->[0],13,'%ISBN'); - $mps .= isis_to_mps($row,'290',13); - $mps .= isis_to_mps($row,'291',13); - $mps .= isis_to_mps($row,'292',13); - $mps .= isis_to_mps($row,'293',13); + $bib .= isis_to_bib($row,'10','%ISBN'); + $mps .= isis_isn_to_mps($row,'10',13); + $mps .= isis_isn_to_mps($row,'290',13); + $mps .= isis_isn_to_mps($row,'291',13); + $mps .= isis_isn_to_mps($row,'292',13); + $mps .= isis_isn_to_mps($row,'293',13); # ISSN - #store_isn($row->{11}->[0],14,'%ISSN'); + $bib .= isis_to_bib($row,'11','%ISSN'); + $mps .= isis_isn_to_mps($row,'11',13); $mps .= isis_to_mps($row,'532',1); - $bib .= isis_to_bib($row,'994','%994a','a'); + # Casopisi + $tmp = isis_to_bib($row,'326','%326'); + $tmp =~ s/g1/godinjak/; + $tmp =~ s/g6/dvomjesenik/; + $tmp =~ s/10/godinje 10 brojeva/; + $tmp =~ s/m1/mjesenik/; + $tmp =~ s/m2/polumjesenik/; + $tmp =~ s/nr/neredovito/; + $tmp =~ s/g4/etiri puta godinje/; + $bib .= c_iso_852($tmp); + $bib .= isis_to_bib($row,'992','%992'); + $bib .= '%knjiz '.$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}."\n"; # headline if ($headline) { - $headline .= " [".$row->{mfn}."]"; ## debug MFN! + $headline .= " (".$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}.")"; ## debug MFN! $headline =~ s/&/∧/g; $headline =~ s//>/g;