8 |
|
|
9 |
my %opts; |
my %opts; |
10 |
|
|
11 |
getopt('dm', \%opts); |
getopts('d:m:q', \%opts); |
12 |
|
|
13 |
die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts); |
die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts); |
14 |
|
|
28 |
print S $common::mps_header; |
print S $common::mps_header; |
29 |
print MPS $common::mps_header; |
print MPS $common::mps_header; |
30 |
|
|
31 |
|
#-------------------------------------------------------------------- |
32 |
|
# init array in_mps_header for config checks later |
33 |
|
|
34 |
|
my %in_mps_header; |
35 |
|
foreach (split(/\n/,$common::mps_header)) { |
36 |
|
if (/^F /) { |
37 |
|
my (undef,$isis,$mps,undef) = split(/ /,$_,4); |
38 |
|
$in_mps_header{$mps}=$isis; |
39 |
|
} |
40 |
|
} |
41 |
|
require "./search/config.pm"; |
42 |
|
|
43 |
|
#-------------------------------------------------------------------- |
44 |
|
# read database configuration, store database names |
45 |
|
open(CF,$common::database_cf) || die "$common::database_cf: $!"; |
46 |
|
my %DatabaseDescriptions; |
47 |
|
while(<CF>) { |
48 |
|
chomp; |
49 |
|
if (/^database-name:([^=]+)=(.*)$/) { |
50 |
|
my ($db_name,$db_desc) = ($1,$2); |
51 |
|
$db_desc=~s/^##\w+##//g; |
52 |
|
# c_iso_852 is a cludge so that output format would be |
53 |
|
# correct 8859-2 again... |
54 |
|
$DatabaseDescriptions{$db_name}=c_iso_852($db_desc); |
55 |
|
} |
56 |
|
} |
57 |
|
close(CF); |
58 |
|
|
59 |
|
#-------------------------------------------------------------------- |
60 |
# |
# |
61 |
# expand(nr,"space separated string"); |
# expand(nr,"space separated string"); |
62 |
# |
# |
63 |
|
|
64 |
sub expand { |
sub expand { |
65 |
my $nr = shift @_; |
my $nr = shift @_; |
66 |
|
die "$nr is not in mps_header" if (!$in_mps_header{$nr}); |
67 |
my $out = ""; |
my $out = ""; |
68 |
while (my $fld = c_852_iso(shift @_)) { |
while (my $fld = c_852_iso(shift @_)) { |
69 |
my @words=split(/\s+/,$fld); |
my @words=split(/\s+/,$fld); |
80 |
|
|
81 |
#-------------------------------------------------------------------- |
#-------------------------------------------------------------------- |
82 |
|
|
|
################### ERASE###############3 |
|
|
|
|
|
# expand sub-fileds from ISIS field |
|
|
# (^a.....^b....) |
|
|
my %data; # FIX |
|
|
sub ex_sf { |
|
|
%data = (); |
|
|
my $in = $_[0]; |
|
|
if (my $tmp = $in) { |
|
|
# $tmp =~ tr/џ/ƾ/; # ISIS -> iso-8859-2 |
|
|
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; |
|
|
if ($in =~ m/^\^/) { |
|
|
my @sub = split(/\^/,$in); |
|
|
foreach my $fld (@sub) { |
|
|
$data{$1} = $2 if ($fld =~ m/^(\w+)(.+)$/) |
|
|
} |
|
|
} else { |
|
|
$data{all} = $in."<-- iz polja bez podpolja"; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
# dump all sub-fields |
|
|
sub all_sf { |
|
|
my $nr = shift @_; |
|
|
my $out=""; |
|
|
foreach my $k (sort keys %data) { |
|
|
$out.=expand($nr,$data{$k}); |
|
|
} |
|
|
return $out; |
|
|
} |
|
|
|
|
|
sub all_sf_r { |
|
|
my $nr = shift @_; |
|
|
my $out=""; |
|
|
foreach my $k (sort {$b cmp $a} keys %data) { |
|
|
$out.=expand($nr,$data{$k}); |
|
|
} |
|
|
return $out; |
|
|
} |
|
|
|
|
|
sub all_sf2bib { |
|
|
my $nr = shift @_; |
|
|
my $max_in_line=shift @_ || 0; |
|
|
my $sep = shift @_ || ' '; |
|
|
my $out; |
|
|
my $i=0; |
|
|
my $bib = ""; |
|
|
foreach my $k (sort keys %data) { |
|
|
if ($out) { |
|
|
$out.= $sep.$data{$k}; |
|
|
} else { |
|
|
$out = $data{$k}; |
|
|
} |
|
|
$i++; |
|
|
if ($i == $max_in_line) { |
|
|
$bib .= $nr." ".$out."\n" if ($out); |
|
|
$i=0; |
|
|
$out=""; |
|
|
} |
|
|
} |
|
|
$bib .= $nr." ".$out."\n" if ($out); |
|
|
return $bib; |
|
|
} |
|
|
|
|
|
sub all_sf2bib_r { |
|
|
my $nr = shift @_; |
|
|
my $max_in_line=shift @_ || 0; |
|
|
my $sep = shift @_ || ' '; |
|
|
my $out; |
|
|
my $i=0; |
|
|
my $bib = ""; |
|
|
foreach my $k (sort {$b cmp $a} keys %data) { |
|
|
if ($out) { |
|
|
$out.= $sep.$data{$k}; |
|
|
} else { |
|
|
$out = $data{$k}; |
|
|
} |
|
|
$i++; |
|
|
if ($i == $max_in_line) { |
|
|
$bib .= $nr." ".$out."\n" if ($out); |
|
|
$i=0; |
|
|
$out=""; |
|
|
} |
|
|
} |
|
|
$bib .= $nr." ".$out."\n" if ($out); |
|
|
return $bib; |
|
|
} |
|
|
|
|
|
#-------------------------------------------------------------------- |
|
|
|
|
83 |
sub c_852_iso { |
sub c_852_iso { |
84 |
my $tmp = $_[0]; |
my $tmp = $_[0]; |
85 |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/ if ($tmp); |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/ if ($tmp); |
86 |
return $tmp; |
return $tmp; |
87 |
} |
} |
88 |
|
|
89 |
|
sub c_iso_852 { |
90 |
|
my $tmp = $_[0]; |
91 |
|
$tmp =~ tr/ܫꔼȺ̪㍐슂ٝ// if ($tmp); |
92 |
|
return $tmp; |
93 |
|
} |
94 |
|
|
95 |
sub c_852_czs { |
sub c_852_czs { |
96 |
my $tmp = $_[0]; |
my $tmp = $_[0]; |
97 |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; |
102 |
|
|
103 |
#-------------------------------------------------------------------- |
#-------------------------------------------------------------------- |
104 |
|
|
105 |
# $mps .= sf_to_mps($subfiled_hash,"subfields",mps_id); |
# $mps .= sf_to_mps("subfields",$subfiled_hash) |
106 |
# |
# |
107 |
# subfields options: |
# subfields options: |
108 |
# * - all (no sort) |
# * - all (no sort) |
133 |
|
|
134 |
sub sf_to_mps { |
sub sf_to_mps { |
135 |
my ($sf_hash,$subfields,$mps_id) = @_; |
my ($sf_hash,$subfields,$mps_id) = @_; |
136 |
|
die "$mps_id is not in mps_header" if (!$in_mps_header{$mps_id}); |
137 |
my $out=""; |
my $out=""; |
138 |
my @sf_arr = subfields_str_2_arr($subfields,$sf_hash); |
my @sf_arr = subfields_str_2_arr($subfields,$sf_hash); |
139 |
|
|
153 |
my $mps_id = shift @_ || die; |
my $mps_id = shift @_ || die; |
154 |
my $subfields = shift @_; |
my $subfields = shift @_; |
155 |
|
|
156 |
|
die "$mps_id is not in mps_header" if (!$in_mps_header{$mps_id}); |
157 |
|
|
158 |
my $i=0; |
my $i=0; |
159 |
my $out = ""; |
my $out = ""; |
160 |
|
|
198 |
my $group_sort = shift @_ || ''; |
my $group_sort = shift @_ || ''; |
199 |
my $sep = shift @_ || ' '; |
my $sep = shift @_ || ' '; |
200 |
|
|
201 |
|
my $fld = $bib_id; $fld =~ s/^%//; |
202 |
|
die "$fld is not in FieldNames" if (!$default::FieldNames{$fld}); |
203 |
|
|
204 |
my $i=0; |
my $i=0; |
205 |
|
|
206 |
my $bib=""; |
my $bib=""; |
293 |
} |
} |
294 |
|
|
295 |
#-------------------------------------------------------------------- |
#-------------------------------------------------------------------- |
|
#-------------------------------------------------------------------- |
|
|
|
|
296 |
|
|
297 |
my $last_tell=0; |
my $last_tell=0; |
298 |
|
|
327 |
my $row = OpenIsis::read( $db, $row_id ); |
my $row = OpenIsis::read( $db, $row_id ); |
328 |
if (my $tmp = $row->{'200'}->[0]) { |
if (my $tmp = $row->{'200'}->[0]) { |
329 |
|
|
330 |
my $bib = "%MFN $row->{mfn}\n"; |
my $bib; |
331 |
my $mps = "W $row->{mfn} 14\n"; |
my $mps = "W $row->{mfn} 14\n"; |
332 |
|
|
333 |
my $pcnt = int($row->{mfn} * 100 / $max_rowid); |
my $pcnt = int($row->{mfn} * 100 / $max_rowid); |
334 |
if ($pcnt != $last_pcnt) { |
if ($pcnt != $last_pcnt) { |
335 |
printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt); |
printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt) if (! $opts{q}); |
336 |
$last_pcnt = $pcnt; |
$last_pcnt = $pcnt; |
337 |
} |
} |
338 |
|
|
339 |
my $headline; |
my $headline; |
340 |
$headline .= isis_sf($row,'200','a',"'"); |
$headline .= isis_sf($row,'200','a'); |
341 |
$headline .= isis_sf($row,'200','e'," : ","'"); |
$headline .= isis_sf($row,'200','e'," : "); |
342 |
|
$headline .= isis_sf($row,'200','f'," / "); |
343 |
|
$headline .= isis_sf($row,'210','d'," , "); |
344 |
|
|
345 |
|
# remove newlines, compress spaces |
346 |
|
$headline =~ s/[\n\r]//g; |
347 |
|
$headline =~ s/^\s+//g; |
348 |
|
$headline =~ s/\s+$//g; |
349 |
|
|
350 |
# author |
# author |
351 |
$bib .= isis_to_bib($row,'700','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'700','%700+','ab',undef,'>',', '); |
352 |
$bib .= isis_to_bib($row,'701','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'701','%700+','ab',undef,'>',', '); |
353 |
$bib .= isis_to_bib($row,'710','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'701','%700+','cd',undef,'>',', '); |
354 |
$bib .= isis_to_bib($row,'711','%700+','*',2,'<'); |
|
355 |
$bib .= isis_to_bib($row,'503','%700+','*',2,'<'); |
my $tmp; |
356 |
|
$tmp = isis_sf($row,'710','abc', '', '', (' : ',', ')); |
357 |
|
$tmp .= isis_sf($row,'710','dfe', ' (', ')', ('', ' ; ',' ; ')); |
358 |
|
$bib .= "%700+ $tmp\n" if ($tmp); |
359 |
|
|
360 |
|
$tmp = isis_sf($row,'711','abc', '', '', (' : ',', ')); |
361 |
|
$tmp .= isis_sf($row,'711','dfe', ' (', ')', ('', ' ; ',' ; ')); |
362 |
|
$bib .= "%700+ $tmp\n" if ($tmp); |
363 |
|
|
364 |
|
$bib .= isis_to_bib($row,'503','%700+','ab',undef,'>',', '); |
365 |
|
|
366 |
$mps .= isis_to_mps($row,'700',1); |
$mps .= isis_to_mps($row,'700',1); |
367 |
$mps .= isis_to_mps($row,'701',1); |
$mps .= isis_to_mps($row,'701',1); |
385 |
$book .= ". ".$sf->{c} if ($sf->{c}); |
$book .= ". ".$sf->{c} if ($sf->{c}); |
386 |
$book .= " / ".$sf->{x} if ($sf->{x}); |
$book .= " / ".$sf->{x} if ($sf->{x}); |
387 |
$book .= " ; ".$sf->{y} if ($sf->{y}); |
$book .= " ; ".$sf->{y} if ($sf->{y}); |
388 |
$bib .= "%200 $book\n" if ($book); |
$bib .= "%200+ $book\n" if ($book); |
389 |
|
|
390 |
$mps .= isis_to_mps($row,'200',2,"akcde"); |
$mps .= isis_to_mps($row,'200',2,"akcde"); |
391 |
$mps .= isis_to_mps($row,'532',2); |
$mps .= isis_to_mps($row,'532',2); |
397 |
$mps .= isis_to_mps($row,'233',2,"ae"); |
$mps .= isis_to_mps($row,'233',2,"ae"); |
398 |
|
|
399 |
|
|
400 |
$bib .= "%sv ".isis_sf($row,'230','v'). |
$tmp = isis_sf($row,'230','v'). |
401 |
isis_sf($row,'230','a',' : '). |
isis_sf($row,'230','a',' : '). |
402 |
isis_sf($row,'250',undef,'. - '). |
isis_sf($row,'250',undef,'. - '). |
403 |
isis_sf($row,'260',undef,'. - '). |
isis_sf($row,'260',undef,'. - '). |
404 |
isis_sf($row,'290',undef,'<br>ISBN '). |
isis_sf($row,'290',undef,'<br>ISBN '); |
405 |
"\n"; |
$bib .= "%sv $tmp\n" if ($tmp); |
406 |
$bib .= "%sv ".isis_sf($row,'231','v','<br>'). |
|
407 |
|
$tmp = isis_sf($row,'231','v','<br>'). |
408 |
isis_sf($row,'231','a',' : '). |
isis_sf($row,'231','a',' : '). |
409 |
isis_sf($row,'251',undef,'. - '). |
isis_sf($row,'251',undef,'. - '). |
410 |
isis_sf($row,'261',undef,'. - '). |
isis_sf($row,'261',undef,'. - '). |
411 |
isis_sf($row,'291',undef,'<br>ISBN '). |
isis_sf($row,'291',undef,'<br>ISBN '); |
412 |
"\n"; |
$bib .= "%sv $tmp\n" if ($tmp); |
413 |
$bib .= "%sv ".isis_sf($row,'232','v','<br>'). |
|
414 |
|
$tmp = isis_sf($row,'232','v','<br>'). |
415 |
isis_sf($row,'232','a',' : '). |
isis_sf($row,'232','a',' : '). |
416 |
isis_sf($row,'252',undef,'. - '). |
isis_sf($row,'252',undef,'. - '). |
417 |
isis_sf($row,'262',undef,'. - '). |
isis_sf($row,'262',undef,'. - '). |
418 |
isis_sf($row,'292',undef,'<br>ISBN '). |
isis_sf($row,'292',undef,'<br>ISBN '); |
419 |
"\n"; |
$bib .= "%sv $tmp\n" if ($tmp); |
420 |
$bib .= "%sv ".isis_sf($row,'233','v','<br>'). |
|
421 |
|
$tmp = isis_sf($row,'233','v','<br>'). |
422 |
isis_sf($row,'233','a',' : '). |
isis_sf($row,'233','a',' : '). |
423 |
isis_sf($row,'253',undef,'. - '). |
isis_sf($row,'253',undef,'. - '). |
424 |
isis_sf($row,'263',undef,'. - '). |
isis_sf($row,'263',undef,'. - '). |
425 |
isis_sf($row,'293',undef,'<br>ISBN '). |
isis_sf($row,'293',undef,'<br>ISBN '); |
|
"\n"; |
|
426 |
|
|
427 |
$mps .= isis_to_mps($row,'270',2); |
$mps .= isis_to_mps($row,'270',2); |
428 |
$mps .= isis_to_mps($row,'271',2); |
$mps .= isis_to_mps($row,'271',2); |
429 |
$mps .= isis_to_mps($row,'272',2); |
$mps .= isis_to_mps($row,'272',2); |
430 |
$mps .= isis_to_mps($row,'273',2); |
$mps .= isis_to_mps($row,'273',2); |
431 |
|
|
|
$headline .= isis_sf($row,'700','b'," "); |
|
|
$headline .= isis_sf($row,'700','a'," "); |
|
|
|
|
432 |
# izdavac |
# izdavac |
433 |
$mps .= isis_to_mps($row,'210',3); |
$mps .= isis_to_mps($row,'210',3); |
434 |
$mps .= isis_to_mps($row,'250',3); |
$mps .= isis_to_mps($row,'250',3); |
447 |
if (my $year = isis_sf($row,'210','d')) { |
if (my $year = isis_sf($row,'210','d')) { |
448 |
$year =~ s/^\s*cop\.*\s*//i; |
$year =~ s/^\s*cop\.*\s*//i; |
449 |
$year =~ s/[\[\]]*//g; |
$year =~ s/[\[\]]*//g; |
450 |
|
$year =~ s/[\n\r]//g; # remove cr |
451 |
$mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/); |
$mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/); |
|
$headline .= " ($year)"; |
|
452 |
} |
} |
453 |
|
|
454 |
$mps .= isis_to_mps($row,'215',15); |
$mps .= isis_to_mps($row,'215',15); |
482 |
$bib .= isis_to_bib($row,'610','%610'); |
$bib .= isis_to_bib($row,'610','%610'); |
483 |
$mps .= isis_to_mps($row,'610',8); |
$mps .= isis_to_mps($row,'610',8); |
484 |
|
|
485 |
$bib .= isis_to_bib($row,'675','%675+'); |
$bib .= isis_to_bib($row,'675','%675+','a'); |
486 |
$mps .= isis_to_mps($row,'675',9); |
$mps .= isis_to_mps($row,'675',9); |
487 |
$bib .= isis_to_bib($row,'686','%675+'); |
$bib .= isis_to_bib($row,'686','%675+','a'); |
488 |
$mps .= isis_to_mps($row,'686',10); |
$mps .= isis_to_mps($row,'686',10); |
489 |
|
|
490 |
$bib .= isis_to_bib($row,'990','%990'); |
$bib .= isis_to_bib($row,'990','%990'); |
493 |
$bib .= isis_to_bib($row,'991','%991'); |
$bib .= isis_to_bib($row,'991','%991'); |
494 |
$mps .= isis_to_mps($row,'991',12); |
$mps .= isis_to_mps($row,'991',12); |
495 |
|
|
496 |
sub store_isn { |
# Jezik |
497 |
if (my $isn = shift @_) { |
$bib .= isis_to_bib($row,'101','%101'); |
498 |
my $nr = shift @_; |
$mps .= isis_to_mps($row,'101',16); |
499 |
my $tag = shift @_; |
# Pismo |
500 |
|
$bib .= isis_to_bib($row,'998','%101', 'a'); |
501 |
|
|
502 |
|
sub isis_isn_to_mps { |
503 |
|
my $row = shift @_ || die; |
504 |
|
my $isis_id = shift @_ || die; |
505 |
|
my $nr = shift @_ || die; |
506 |
|
my $i=0; |
507 |
|
my $mps=''; |
508 |
|
while (my $isn=$row->{$isis_id}->[$i]) { |
509 |
$isn =~ s/ +//g; # remove spaces |
$isn =~ s/ +//g; # remove spaces |
510 |
|
$isn =~ s/[\n\r]//g; # remove cr |
511 |
$mps .= "W $isn $nr\n"; |
$mps .= "W $isn $nr\n"; |
|
$bib .= "$tag $isn\n"; |
|
512 |
if ($isn =~ s/-//g) { |
if ($isn =~ s/-//g) { |
513 |
$mps .= "W $isn $nr\n"; |
$mps .= "W $isn $nr\n"; |
514 |
} |
} |
515 |
|
$i++; |
516 |
} |
} |
517 |
|
return $mps; |
518 |
} |
} |
519 |
|
|
520 |
# ISBN |
# ISBN |
521 |
store_isn($row->{10}->[0],13,'%ISBN'); |
$bib .= isis_to_bib($row,'10','%ISBN'); |
522 |
$mps .= isis_to_mps($row,'290',13); |
$mps .= isis_isn_to_mps($row,'10',13); |
523 |
$mps .= isis_to_mps($row,'291',13); |
$mps .= isis_isn_to_mps($row,'290',13); |
524 |
$mps .= isis_to_mps($row,'292',13); |
$mps .= isis_isn_to_mps($row,'291',13); |
525 |
$mps .= isis_to_mps($row,'293',13); |
$mps .= isis_isn_to_mps($row,'292',13); |
526 |
|
$mps .= isis_isn_to_mps($row,'293',13); |
527 |
|
|
528 |
# ISSN |
# ISSN |
529 |
#store_isn($row->{11}->[0],14,'%ISSN'); |
#store_isn($row->{11}->[0],14,'%ISSN'); |
530 |
|
|
531 |
$mps .= isis_to_mps($row,'532',1); |
$mps .= isis_to_mps($row,'532',1); |
532 |
|
|
533 |
$bib .= isis_to_bib($row,'994','%994a','a'); |
$bib .= '%knjiz '.$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}."\n"; |
|
|
|
534 |
# headline |
# headline |
535 |
if ($headline) { |
if ($headline) { |
536 |
$headline .= " [".$row->{mfn}."]"; ## debug MFN! |
$headline .= " <i>(".$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}.")</i>"; ## debug MFN! |
537 |
|
$headline =~ s/&/∧/g; |
538 |
|
$headline =~ s/</</g; |
539 |
|
$headline =~ s/>/>/g; |
540 |
$mps .= "H ".c_852_iso($headline)."\n"; |
$mps .= "H ".c_852_iso($headline)."\n"; |
541 |
} else { |
} else { |
542 |
$mps .= "H nepoznato\n"; |
$mps .= "H nepoznato\n"; |
551 |
#} |
#} |
552 |
|
|
553 |
print R c_852_iso($bib); |
print R c_852_iso($bib); |
554 |
print R "%perl ".Dumper($row)."\n"; |
|
555 |
|
# check if all fields are defined |
556 |
|
foreach (split(/\n/,$bib)) { |
557 |
|
if (/^%(\w+)\s/ && !$default::FieldNames{$1}) { |
558 |
|
die "field $1 used but not in FieldNames"; |
559 |
|
} |
560 |
|
} |
561 |
|
|
562 |
|
# print R "%perl ".Dumper($row)."\n"; |
563 |
|
|
564 |
$mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n"; |
$mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n"; |
565 |
$last_tell=tell(R); |
$last_tell=tell(R); |