8 |
|
|
9 |
my %opts; |
my %opts; |
10 |
|
|
11 |
getopt('dm', \%opts); |
getopts('d:m:q', \%opts); |
12 |
|
|
13 |
die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts); |
die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts); |
14 |
|
|
15 |
my $db_dir = $opts{d}; |
my $db_dir = $opts{d}; |
16 |
|
|
17 |
mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir"); |
my $dir = open_data_files($db_dir); |
|
mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data"); |
|
|
|
|
|
my $dir="$common::install_dir/$db_dir/data"; |
|
|
|
|
|
|
|
|
open(S,"> $dir/stream") || die "can't open output $dir/stream: $!"; |
|
|
open(R,"> $dir/bib") || die "can't open output $dir/bib: $!"; |
|
|
open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!"; |
|
|
#open(MPS,"> /tmp/mpsindex") || die "mps: $!"; |
|
|
|
|
|
print S $common::mps_header; |
|
|
print MPS $common::mps_header; |
|
18 |
|
|
19 |
#-------------------------------------------------------------------- |
#-------------------------------------------------------------------- |
20 |
# init array in_mps_header for config checks later |
# init array in_mps_header for config checks later |
37 |
if (/^database-name:([^=]+)=(.*)$/) { |
if (/^database-name:([^=]+)=(.*)$/) { |
38 |
my ($db_name,$db_desc) = ($1,$2); |
my ($db_name,$db_desc) = ($1,$2); |
39 |
$db_desc=~s/^##\w+##//g; |
$db_desc=~s/^##\w+##//g; |
40 |
$DatabaseDescriptions{$db_name}=$db_desc; |
# c_iso_852 is a cludge so that output format would be |
41 |
|
# correct 8859-2 again... |
42 |
|
$DatabaseDescriptions{$db_name}=c_iso_852($db_desc); |
43 |
} |
} |
44 |
} |
} |
45 |
close(CF); |
close(CF); |
58 |
foreach my $w (@words) { |
foreach my $w (@words) { |
59 |
# FIX: this should be replaced by stemmer! |
# FIX: this should be replaced by stemmer! |
60 |
#$w =~ tr/ƾ/sSdDcCcCzZ/; |
#$w =~ tr/ƾ/sSdDcCcCzZ/; |
61 |
$w =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/; |
$w =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'".'',"'Rr/; |
62 |
$w =~ s//ss/g; |
$w =~ s//ss/g; |
63 |
$out .= "W $w $nr\n"; |
$out .= "W $w $nr\n"; |
64 |
} |
} |
74 |
return $tmp; |
return $tmp; |
75 |
} |
} |
76 |
|
|
77 |
|
sub c_iso_852 { |
78 |
|
my $tmp = $_[0]; |
79 |
|
$tmp =~ tr/ܫꔼȺ̪㍐슂ٝ// if ($tmp); |
80 |
|
return $tmp; |
81 |
|
} |
82 |
|
|
83 |
sub c_852_czs { |
sub c_852_czs { |
84 |
my $tmp = $_[0]; |
my $tmp = $_[0]; |
85 |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; |
86 |
$tmp =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/; |
$tmp =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'".'',"'Rr/; |
87 |
$tmp =~ s//ss/g; |
$tmp =~ s//ss/g; |
88 |
return $tmp; |
return $tmp; |
89 |
} |
} |
126 |
my @sf_arr = subfields_str_2_arr($subfields,$sf_hash); |
my @sf_arr = subfields_str_2_arr($subfields,$sf_hash); |
127 |
|
|
128 |
foreach (@sf_arr) { |
foreach (@sf_arr) { |
129 |
$out.=mps_expand($mps_id,$sf_hash->{$_}); |
$out.=mps_expand($mps_id,c_852_iso($sf_hash->{$_})); |
130 |
} |
} |
131 |
return $out; |
return $out; |
132 |
} |
} |
152 |
if (scalar keys %{$sf_hash} > 0) { |
if (scalar keys %{$sf_hash} > 0) { |
153 |
if ($subfields) { |
if ($subfields) { |
154 |
foreach (split(//,$subfields)) { |
foreach (split(//,$subfields)) { |
155 |
$out.=mps_expand($mps_id,$sf_hash->{$_}); |
$out.=mps_expand($mps_id,c_852_iso($sf_hash->{$_})); |
156 |
} |
} |
157 |
} else { |
} else { |
158 |
foreach (keys %{$sf_hash}) { |
foreach (keys %{$sf_hash}) { |
159 |
$out.=mps_expand($mps_id,$sf_hash->{$_}); |
$out.=mps_expand($mps_id,c_852_iso($sf_hash->{$_})); |
160 |
} |
} |
161 |
} |
} |
162 |
} else { |
} else { |
163 |
$out.=mps_expand($mps_id,$row->{$isis_id}->[$i]); |
$out.=mps_expand($mps_id,c_852_iso($row->{$isis_id}->[$i])); |
164 |
} |
} |
165 |
$i++; |
$i++; |
166 |
} |
} |
299 |
if (-e "$common::isis_data/$db_dir/$_/PERI") { |
if (-e "$common::isis_data/$db_dir/$_/PERI") { |
300 |
push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI"; |
push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI"; |
301 |
} |
} |
302 |
|
if (-e "$common::isis_data/$db_dir/$_/AMS") { |
303 |
|
push @isis_dbs,"$common::isis_data/$db_dir/$_/AMS/AMS"; |
304 |
|
} |
305 |
|
if (-e "$common::isis_data/$db_dir/$_/ARTI") { |
306 |
|
# push @isis_dbs,"$common::isis_data/$db_dir/$_/ARTI/ARTI"; |
307 |
|
} |
308 |
} |
} |
309 |
|
|
310 |
foreach my $isis_db (@isis_dbs) { |
foreach my $isis_db (@isis_dbs) { |
313 |
|
|
314 |
my $db = OpenIsis::open( "$isis_db" ); |
my $db = OpenIsis::open( "$isis_db" ); |
315 |
|
|
316 |
|
if (! defined $db) { |
317 |
|
die "can't open '$isis_db'"; |
318 |
|
} |
319 |
|
|
320 |
|
my $tip = $isis_db; $tip =~ s/^.+?\/([^\/]+)$/$1/; |
321 |
|
if (defined $default::tip{$tip}) { |
322 |
|
$tip=$default::tip{$tip}; |
323 |
|
} elsif ($tip eq "AMS") { |
324 |
|
$tip=$default::tip{'LIBRI'}; |
325 |
|
} else { |
326 |
|
die "can't find tip for database '$isis_db'"; |
327 |
|
} |
328 |
|
$tip = c_iso_852($tip); |
329 |
|
|
330 |
my $max_rowid = OpenIsis::maxRowid( $db ); |
my $max_rowid = OpenIsis::maxRowid( $db ); |
331 |
|
|
332 |
my $last_pcnt = 0; |
my $last_pcnt = 0; |
335 |
my $row = OpenIsis::read( $db, $row_id ); |
my $row = OpenIsis::read( $db, $row_id ); |
336 |
if (my $tmp = $row->{'200'}->[0]) { |
if (my $tmp = $row->{'200'}->[0]) { |
337 |
|
|
338 |
my $bib = "%MFN $row->{mfn}\n"; |
my $bib; |
339 |
my $mps = "W $row->{mfn} 14\n"; |
my $mps = "W $row->{mfn} 14\n"; |
340 |
|
|
341 |
|
# tip gradje |
342 |
|
$mps .= "W ".c_852_czs($tip)." 17\n"; |
343 |
|
$bib .= "%tip $tip\n"; |
344 |
|
|
345 |
my $pcnt = int($row->{mfn} * 100 / $max_rowid); |
my $pcnt = int($row->{mfn} * 100 / $max_rowid); |
346 |
if ($pcnt != $last_pcnt) { |
if ($pcnt != $last_pcnt) { |
347 |
printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt); |
printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt) if (! $opts{q}); |
348 |
$last_pcnt = $pcnt; |
$last_pcnt = $pcnt; |
349 |
} |
} |
350 |
|
|
354 |
$headline .= isis_sf($row,'200','f'," / "); |
$headline .= isis_sf($row,'200','f'," / "); |
355 |
$headline .= isis_sf($row,'210','d'," , "); |
$headline .= isis_sf($row,'210','d'," , "); |
356 |
|
|
357 |
|
# remove newlines, compress spaces |
358 |
|
$headline =~ s/[\n\r]//g; |
359 |
|
$headline =~ s/^\s+//g; |
360 |
|
$headline =~ s/\s+$//g; |
361 |
|
|
362 |
# author |
# author |
363 |
$bib .= isis_to_bib($row,'700','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'700','%700+','ab',undef,'>',', '); |
364 |
$bib .= isis_to_bib($row,'701','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'701','%700+','ab',undef,'>',', '); |
365 |
$bib .= isis_to_bib($row,'710','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'701','%700+','cd',undef,'>',', '); |
366 |
$bib .= isis_to_bib($row,'711','%700+','*',2,'<'); |
|
367 |
$bib .= isis_to_bib($row,'503','%700+','*',2,'<'); |
my $tmp; |
368 |
|
$tmp = isis_sf($row,'710','abc', '', '', (' : ',', ')); |
369 |
|
$tmp .= isis_sf($row,'710','dfe', ' (', ')', ('', ' ; ',' ; ')); |
370 |
|
$bib .= "%700+ $tmp\n" if ($tmp); |
371 |
|
|
372 |
|
$tmp = isis_sf($row,'711','abc', '', '', (' : ',', ')); |
373 |
|
$tmp .= isis_sf($row,'711','dfe', ' (', ')', ('', ' ; ',' ; ')); |
374 |
|
$bib .= "%700+ $tmp\n" if ($tmp); |
375 |
|
|
376 |
|
$bib .= isis_to_bib($row,'503','%700+','ab',undef,'>',', '); |
377 |
|
|
378 |
$mps .= isis_to_mps($row,'700',1); |
$mps .= isis_to_mps($row,'700',1); |
379 |
$mps .= isis_to_mps($row,'701',1); |
$mps .= isis_to_mps($row,'701',1); |
409 |
$mps .= isis_to_mps($row,'233',2,"ae"); |
$mps .= isis_to_mps($row,'233',2,"ae"); |
410 |
|
|
411 |
|
|
|
my $tmp; |
|
412 |
$tmp = isis_sf($row,'230','v'). |
$tmp = isis_sf($row,'230','v'). |
413 |
isis_sf($row,'230','a',' : '). |
isis_sf($row,'230','a',' : '). |
414 |
isis_sf($row,'250',undef,'. - '). |
isis_sf($row,'250',undef,'. - '). |
459 |
if (my $year = isis_sf($row,'210','d')) { |
if (my $year = isis_sf($row,'210','d')) { |
460 |
$year =~ s/^\s*cop\.*\s*//i; |
$year =~ s/^\s*cop\.*\s*//i; |
461 |
$year =~ s/[\[\]]*//g; |
$year =~ s/[\[\]]*//g; |
462 |
|
$year =~ s/[\n\r]//g; # remove cr |
463 |
$mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/); |
$mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/); |
464 |
} |
} |
465 |
|
|
494 |
$bib .= isis_to_bib($row,'610','%610'); |
$bib .= isis_to_bib($row,'610','%610'); |
495 |
$mps .= isis_to_mps($row,'610',8); |
$mps .= isis_to_mps($row,'610',8); |
496 |
|
|
497 |
$bib .= isis_to_bib($row,'675','%675+'); |
$bib .= isis_to_bib($row,'675','%675+','a'); |
498 |
$mps .= isis_to_mps($row,'675',9); |
$mps .= isis_to_mps($row,'675',9); |
499 |
$bib .= isis_to_bib($row,'686','%675+'); |
$bib .= isis_to_bib($row,'686','%675+','a'); |
500 |
$mps .= isis_to_mps($row,'686',10); |
$mps .= isis_to_mps($row,'686',10); |
501 |
|
|
502 |
$bib .= isis_to_bib($row,'990','%990'); |
$bib .= isis_to_bib($row,'990','%990'); |
505 |
$bib .= isis_to_bib($row,'991','%991'); |
$bib .= isis_to_bib($row,'991','%991'); |
506 |
$mps .= isis_to_mps($row,'991',12); |
$mps .= isis_to_mps($row,'991',12); |
507 |
|
|
508 |
|
# Jezik |
509 |
|
$bib .= isis_to_bib($row,'101','%101'); |
510 |
|
$mps .= isis_to_mps($row,'101',16); |
511 |
|
# Pismo |
512 |
|
$bib .= isis_to_bib($row,'998','%101', 'a'); |
513 |
|
|
514 |
sub isis_isn_to_mps { |
sub isis_isn_to_mps { |
515 |
my $row = shift @_ || die; |
my $row = shift @_ || die; |
516 |
my $isis_id = shift @_ || die; |
my $isis_id = shift @_ || die; |
519 |
my $mps=''; |
my $mps=''; |
520 |
while (my $isn=$row->{$isis_id}->[$i]) { |
while (my $isn=$row->{$isis_id}->[$i]) { |
521 |
$isn =~ s/ +//g; # remove spaces |
$isn =~ s/ +//g; # remove spaces |
522 |
|
$isn =~ s/[\n\r]//g; # remove cr |
523 |
$mps .= "W $isn $nr\n"; |
$mps .= "W $isn $nr\n"; |
524 |
if ($isn =~ s/-//g) { |
if ($isn =~ s/-//g) { |
525 |
$mps .= "W $isn $nr\n"; |
$mps .= "W $isn $nr\n"; |
538 |
$mps .= isis_isn_to_mps($row,'293',13); |
$mps .= isis_isn_to_mps($row,'293',13); |
539 |
|
|
540 |
# ISSN |
# ISSN |
541 |
#store_isn($row->{11}->[0],14,'%ISSN'); |
$bib .= isis_to_bib($row,'11','%ISSN'); |
542 |
|
$mps .= isis_isn_to_mps($row,'11',13); |
543 |
|
|
544 |
$mps .= isis_to_mps($row,'532',1); |
$mps .= isis_to_mps($row,'532',1); |
545 |
|
|
546 |
$bib .= isis_to_bib($row,'994','%994a','a'); |
# Casopisi |
547 |
|
$tmp = isis_to_bib($row,'326','%326'); |
548 |
|
$tmp =~ s/g1/godinjak/; |
549 |
|
$tmp =~ s/g6/dvomjesenik/; |
550 |
|
$tmp =~ s/10/godinje 10 brojeva/; |
551 |
|
$tmp =~ s/m1/mjesenik/; |
552 |
|
$tmp =~ s/m2/polumjesenik/; |
553 |
|
$tmp =~ s/nr/neredovito/; |
554 |
|
$tmp =~ s/g4/etiri puta godinje/; |
555 |
|
$bib .= c_iso_852($tmp); |
556 |
|
$bib .= isis_to_bib($row,'992','%992'); |
557 |
|
|
558 |
|
$bib .= '%knjiz '.$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}."\n"; |
559 |
# headline |
# headline |
560 |
if ($headline) { |
if ($headline) { |
561 |
$headline .= " <i>(".$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}.")</i>"; ## debug MFN! |
$headline .= " <i>(".$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}.")</i>"; ## debug MFN! |
562 |
$headline =~ s/&/∧/g; |
$headline =~ s/&/∧/g; |
563 |
$headline =~ s/</</g; |
$headline =~ s/</</g; |
564 |
$headline =~ s/>/>/g; |
$headline =~ s/>/>/g; |
|
$headline =~ s/<(\/?[bi])>/<$1>/g; |
|
565 |
$mps .= "H ".c_852_iso($headline)."\n"; |
$mps .= "H ".c_852_iso($headline)."\n"; |
566 |
} else { |
} else { |
567 |
$mps .= "H nepoznato\n"; |
$mps .= "H nepoznato\n"; |
593 |
|
|
594 |
$mps .= "E\n"; |
$mps .= "E\n"; |
595 |
|
|
|
|
|
596 |
print S $mps; |
print S $mps; |
597 |
print MPS $mps; |
print MPS $mps; |
598 |
} |
} |