8 |
|
|
9 |
my %opts; |
my %opts; |
10 |
|
|
11 |
getopt('dm', \%opts); |
getopts('d:m:q', \%opts); |
12 |
|
|
13 |
die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts); |
die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts); |
14 |
|
|
28 |
print S $common::mps_header; |
print S $common::mps_header; |
29 |
print MPS $common::mps_header; |
print MPS $common::mps_header; |
30 |
|
|
31 |
|
#-------------------------------------------------------------------- |
32 |
|
# init array in_mps_header for config checks later |
33 |
|
|
34 |
|
my %in_mps_header; |
35 |
|
foreach (split(/\n/,$common::mps_header)) { |
36 |
|
if (/^F /) { |
37 |
|
my (undef,$isis,$mps,undef) = split(/ /,$_,4); |
38 |
|
$in_mps_header{$mps}=$isis; |
39 |
|
} |
40 |
|
} |
41 |
|
require "./search/config.pm"; |
42 |
|
|
43 |
|
#-------------------------------------------------------------------- |
44 |
|
# read database configuration, store database names |
45 |
|
open(CF,$common::database_cf) || die "$common::database_cf: $!"; |
46 |
|
my %DatabaseDescriptions; |
47 |
|
while(<CF>) { |
48 |
|
chomp; |
49 |
|
if (/^database-name:([^=]+)=(.*)$/) { |
50 |
|
my ($db_name,$db_desc) = ($1,$2); |
51 |
|
$db_desc=~s/^##\w+##//g; |
52 |
|
$DatabaseDescriptions{$db_name}=$db_desc; |
53 |
|
} |
54 |
|
} |
55 |
|
close(CF); |
56 |
|
|
57 |
|
#-------------------------------------------------------------------- |
58 |
# |
# |
59 |
# expand(nr,"space separated string"); |
# expand(nr,"space separated string"); |
60 |
# |
# |
61 |
|
|
62 |
sub expand { |
sub expand { |
63 |
my $nr = shift @_; |
my $nr = shift @_; |
64 |
|
die "$nr is not in mps_header" if (!$in_mps_header{$nr}); |
65 |
my $out = ""; |
my $out = ""; |
66 |
while (my $fld = c_852_iso(shift @_)) { |
while (my $fld = c_852_iso(shift @_)) { |
67 |
my @words=split(/\s+/,$fld); |
my @words=split(/\s+/,$fld); |
78 |
|
|
79 |
#-------------------------------------------------------------------- |
#-------------------------------------------------------------------- |
80 |
|
|
|
################### ERASE###############3 |
|
|
|
|
|
# expand sub-fileds from ISIS field |
|
|
# (^a.....^b....) |
|
|
my %data; # FIX |
|
|
sub ex_sf { |
|
|
%data = (); |
|
|
my $in = $_[0]; |
|
|
if (my $tmp = $in) { |
|
|
# $tmp =~ tr/џ/ƾ/; # ISIS -> iso-8859-2 |
|
|
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; |
|
|
if ($in =~ m/^\^/) { |
|
|
my @sub = split(/\^/,$in); |
|
|
foreach my $fld (@sub) { |
|
|
$data{$1} = $2 if ($fld =~ m/^(\w+)(.+)$/) |
|
|
} |
|
|
} else { |
|
|
$data{all} = $in."<-- iz polja bez podpolja"; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
# dump all sub-fields |
|
|
sub all_sf { |
|
|
my $nr = shift @_; |
|
|
my $out=""; |
|
|
foreach my $k (sort keys %data) { |
|
|
$out.=expand($nr,$data{$k}); |
|
|
} |
|
|
return $out; |
|
|
} |
|
|
|
|
|
sub all_sf_r { |
|
|
my $nr = shift @_; |
|
|
my $out=""; |
|
|
foreach my $k (sort {$b cmp $a} keys %data) { |
|
|
$out.=expand($nr,$data{$k}); |
|
|
} |
|
|
return $out; |
|
|
} |
|
|
|
|
|
sub all_sf2bib { |
|
|
my $nr = shift @_; |
|
|
my $max_in_line=shift @_ || 0; |
|
|
my $sep = shift @_ || ' '; |
|
|
my $out; |
|
|
my $i=0; |
|
|
my $bib = ""; |
|
|
foreach my $k (sort keys %data) { |
|
|
if ($out) { |
|
|
$out.= $sep.$data{$k}; |
|
|
} else { |
|
|
$out = $data{$k}; |
|
|
} |
|
|
$i++; |
|
|
if ($i == $max_in_line) { |
|
|
$bib .= $nr." ".$out."\n" if ($out); |
|
|
$i=0; |
|
|
$out=""; |
|
|
} |
|
|
} |
|
|
$bib .= $nr." ".$out."\n" if ($out); |
|
|
return $bib; |
|
|
} |
|
|
|
|
|
sub all_sf2bib_r { |
|
|
my $nr = shift @_; |
|
|
my $max_in_line=shift @_ || 0; |
|
|
my $sep = shift @_ || ' '; |
|
|
my $out; |
|
|
my $i=0; |
|
|
my $bib = ""; |
|
|
foreach my $k (sort {$b cmp $a} keys %data) { |
|
|
if ($out) { |
|
|
$out.= $sep.$data{$k}; |
|
|
} else { |
|
|
$out = $data{$k}; |
|
|
} |
|
|
$i++; |
|
|
if ($i == $max_in_line) { |
|
|
$bib .= $nr." ".$out."\n" if ($out); |
|
|
$i=0; |
|
|
$out=""; |
|
|
} |
|
|
} |
|
|
$bib .= $nr." ".$out."\n" if ($out); |
|
|
return $bib; |
|
|
} |
|
|
|
|
|
#-------------------------------------------------------------------- |
|
|
|
|
81 |
sub c_852_iso { |
sub c_852_iso { |
82 |
my $tmp = $_[0]; |
my $tmp = $_[0]; |
83 |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/ if ($tmp); |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/ if ($tmp); |
84 |
return $tmp; |
return $tmp; |
85 |
} |
} |
86 |
|
|
87 |
|
sub c_iso_852 { |
88 |
|
my $tmp = $_[0]; |
89 |
|
$tmp =~ tr/ܫꔼȺ̪㍐슂ٝ// if ($tmp); |
90 |
|
return $tmp; |
91 |
|
} |
92 |
|
|
93 |
sub c_852_czs { |
sub c_852_czs { |
94 |
my $tmp = $_[0]; |
my $tmp = $_[0]; |
95 |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; |
100 |
|
|
101 |
#-------------------------------------------------------------------- |
#-------------------------------------------------------------------- |
102 |
|
|
103 |
# $mps .= sf_to_mps($subfiled_hash,"subfields",mps_id); |
# $mps .= sf_to_mps("subfields",$subfiled_hash) |
104 |
# |
# |
105 |
# subfields options: |
# subfields options: |
106 |
# * - all (no sort) |
# * - all (no sort) |
131 |
|
|
132 |
sub sf_to_mps { |
sub sf_to_mps { |
133 |
my ($sf_hash,$subfields,$mps_id) = @_; |
my ($sf_hash,$subfields,$mps_id) = @_; |
134 |
|
die "$mps_id is not in mps_header" if (!$in_mps_header{$mps_id}); |
135 |
my $out=""; |
my $out=""; |
136 |
my @sf_arr = subfields_str_2_arr($subfields,$sf_hash); |
my @sf_arr = subfields_str_2_arr($subfields,$sf_hash); |
137 |
|
|
151 |
my $mps_id = shift @_ || die; |
my $mps_id = shift @_ || die; |
152 |
my $subfields = shift @_; |
my $subfields = shift @_; |
153 |
|
|
154 |
|
die "$mps_id is not in mps_header" if (!$in_mps_header{$mps_id}); |
155 |
|
|
156 |
my $i=0; |
my $i=0; |
157 |
my $out = ""; |
my $out = ""; |
158 |
|
|
196 |
my $group_sort = shift @_ || ''; |
my $group_sort = shift @_ || ''; |
197 |
my $sep = shift @_ || ' '; |
my $sep = shift @_ || ' '; |
198 |
|
|
199 |
|
my $fld = $bib_id; $fld =~ s/^%//; |
200 |
|
die "$fld is not in FieldNames" if (!$default::FieldNames{$fld}); |
201 |
|
|
202 |
my $i=0; |
my $i=0; |
203 |
|
|
204 |
my $bib=""; |
my $bib=""; |
291 |
} |
} |
292 |
|
|
293 |
#-------------------------------------------------------------------- |
#-------------------------------------------------------------------- |
|
#-------------------------------------------------------------------- |
|
|
|
|
294 |
|
|
295 |
my $last_tell=0; |
my $last_tell=0; |
296 |
|
|
330 |
|
|
331 |
my $pcnt = int($row->{mfn} * 100 / $max_rowid); |
my $pcnt = int($row->{mfn} * 100 / $max_rowid); |
332 |
if ($pcnt != $last_pcnt) { |
if ($pcnt != $last_pcnt) { |
333 |
printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt); |
printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt) if (! $opts{q}); |
334 |
$last_pcnt = $pcnt; |
$last_pcnt = $pcnt; |
335 |
} |
} |
336 |
|
|
337 |
my $headline; |
my $headline; |
338 |
$headline .= isis_sf($row,'200','a',"'"); |
$headline .= isis_sf($row,'200','a'); |
339 |
$headline .= isis_sf($row,'200','e'," : ","'"); |
$headline .= isis_sf($row,'200','e'," : "); |
340 |
|
$headline .= isis_sf($row,'200','f'," / "); |
341 |
|
$headline .= isis_sf($row,'210','d'," , "); |
342 |
|
|
343 |
|
# remove newlines, compress spaces |
344 |
|
$headline =~ s/[\n\r]//g; |
345 |
|
$headline =~ s/^\s+//g; |
346 |
|
$headline =~ s/\s+$//g; |
347 |
|
|
348 |
# author |
# author |
349 |
$bib .= isis_to_bib($row,'700','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'700','%700+','*',2,'>',', '); |
350 |
$bib .= isis_to_bib($row,'701','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'701','%700+','*',2,'>',', '); |
351 |
$bib .= isis_to_bib($row,'710','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'710','%700+','*',2,'>',', '); |
352 |
$bib .= isis_to_bib($row,'711','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'711','%700+','*',2,'>',', '); |
353 |
$bib .= isis_to_bib($row,'503','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'503','%700+','*',2,'>',', '); |
354 |
|
|
355 |
$mps .= isis_to_mps($row,'700',1); |
$mps .= isis_to_mps($row,'700',1); |
356 |
$mps .= isis_to_mps($row,'701',1); |
$mps .= isis_to_mps($row,'701',1); |
374 |
$book .= ". ".$sf->{c} if ($sf->{c}); |
$book .= ". ".$sf->{c} if ($sf->{c}); |
375 |
$book .= " / ".$sf->{x} if ($sf->{x}); |
$book .= " / ".$sf->{x} if ($sf->{x}); |
376 |
$book .= " ; ".$sf->{y} if ($sf->{y}); |
$book .= " ; ".$sf->{y} if ($sf->{y}); |
377 |
$bib .= "%200 $book\n" if ($book); |
$bib .= "%200+ $book\n" if ($book); |
378 |
|
|
379 |
$mps .= isis_to_mps($row,'200',2,"akcde"); |
$mps .= isis_to_mps($row,'200',2,"akcde"); |
380 |
$mps .= isis_to_mps($row,'532',2); |
$mps .= isis_to_mps($row,'532',2); |
386 |
$mps .= isis_to_mps($row,'233',2,"ae"); |
$mps .= isis_to_mps($row,'233',2,"ae"); |
387 |
|
|
388 |
|
|
389 |
$bib .= "%sv ".isis_sf($row,'230','v'). |
my $tmp; |
390 |
|
$tmp = isis_sf($row,'230','v'). |
391 |
isis_sf($row,'230','a',' : '). |
isis_sf($row,'230','a',' : '). |
392 |
isis_sf($row,'250',undef,'. - '). |
isis_sf($row,'250',undef,'. - '). |
393 |
isis_sf($row,'260',undef,'. - '). |
isis_sf($row,'260',undef,'. - '). |
394 |
isis_sf($row,'290',undef,'<br>ISBN '). |
isis_sf($row,'290',undef,'<br>ISBN '); |
395 |
"\n"; |
$bib .= "%sv $tmp\n" if ($tmp); |
396 |
$bib .= "%sv ".isis_sf($row,'231','v','<br>'). |
|
397 |
|
$tmp = isis_sf($row,'231','v','<br>'). |
398 |
isis_sf($row,'231','a',' : '). |
isis_sf($row,'231','a',' : '). |
399 |
isis_sf($row,'251',undef,'. - '). |
isis_sf($row,'251',undef,'. - '). |
400 |
isis_sf($row,'261',undef,'. - '). |
isis_sf($row,'261',undef,'. - '). |
401 |
isis_sf($row,'291',undef,'<br>ISBN '). |
isis_sf($row,'291',undef,'<br>ISBN '); |
402 |
"\n"; |
$bib .= "%sv $tmp\n" if ($tmp); |
403 |
$bib .= "%sv ".isis_sf($row,'232','v','<br>'). |
|
404 |
|
$tmp = isis_sf($row,'232','v','<br>'). |
405 |
isis_sf($row,'232','a',' : '). |
isis_sf($row,'232','a',' : '). |
406 |
isis_sf($row,'252',undef,'. - '). |
isis_sf($row,'252',undef,'. - '). |
407 |
isis_sf($row,'262',undef,'. - '). |
isis_sf($row,'262',undef,'. - '). |
408 |
isis_sf($row,'292',undef,'<br>ISBN '). |
isis_sf($row,'292',undef,'<br>ISBN '); |
409 |
"\n"; |
$bib .= "%sv $tmp\n" if ($tmp); |
410 |
$bib .= "%sv ".isis_sf($row,'233','v','<br>'). |
|
411 |
|
$tmp = isis_sf($row,'233','v','<br>'). |
412 |
isis_sf($row,'233','a',' : '). |
isis_sf($row,'233','a',' : '). |
413 |
isis_sf($row,'253',undef,'. - '). |
isis_sf($row,'253',undef,'. - '). |
414 |
isis_sf($row,'263',undef,'. - '). |
isis_sf($row,'263',undef,'. - '). |
415 |
isis_sf($row,'293',undef,'<br>ISBN '). |
isis_sf($row,'293',undef,'<br>ISBN '); |
|
"\n"; |
|
416 |
|
|
417 |
$mps .= isis_to_mps($row,'270',2); |
$mps .= isis_to_mps($row,'270',2); |
418 |
$mps .= isis_to_mps($row,'271',2); |
$mps .= isis_to_mps($row,'271',2); |
419 |
$mps .= isis_to_mps($row,'272',2); |
$mps .= isis_to_mps($row,'272',2); |
420 |
$mps .= isis_to_mps($row,'273',2); |
$mps .= isis_to_mps($row,'273',2); |
421 |
|
|
|
$headline .= isis_sf($row,'700','b'," "); |
|
|
$headline .= isis_sf($row,'700','a'," "); |
|
|
|
|
422 |
# izdavac |
# izdavac |
423 |
$mps .= isis_to_mps($row,'210',3); |
$mps .= isis_to_mps($row,'210',3); |
424 |
$mps .= isis_to_mps($row,'250',3); |
$mps .= isis_to_mps($row,'250',3); |
437 |
if (my $year = isis_sf($row,'210','d')) { |
if (my $year = isis_sf($row,'210','d')) { |
438 |
$year =~ s/^\s*cop\.*\s*//i; |
$year =~ s/^\s*cop\.*\s*//i; |
439 |
$year =~ s/[\[\]]*//g; |
$year =~ s/[\[\]]*//g; |
440 |
|
$year =~ s/[\n\r]//g; # remove cr |
441 |
$mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/); |
$mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/); |
|
$headline .= " ($year)"; |
|
442 |
} |
} |
443 |
|
|
444 |
$mps .= isis_to_mps($row,'215',15); |
$mps .= isis_to_mps($row,'215',15); |
483 |
$bib .= isis_to_bib($row,'991','%991'); |
$bib .= isis_to_bib($row,'991','%991'); |
484 |
$mps .= isis_to_mps($row,'991',12); |
$mps .= isis_to_mps($row,'991',12); |
485 |
|
|
486 |
sub store_isn { |
sub isis_isn_to_mps { |
487 |
if (my $isn = shift @_) { |
my $row = shift @_ || die; |
488 |
my $nr = shift @_; |
my $isis_id = shift @_ || die; |
489 |
my $tag = shift @_; |
my $nr = shift @_ || die; |
490 |
|
my $i=0; |
491 |
|
my $mps=''; |
492 |
|
while (my $isn=$row->{$isis_id}->[$i]) { |
493 |
$isn =~ s/ +//g; # remove spaces |
$isn =~ s/ +//g; # remove spaces |
494 |
|
$isn =~ s/[\n\r]//g; # remove cr |
495 |
$mps .= "W $isn $nr\n"; |
$mps .= "W $isn $nr\n"; |
|
$bib .= "$tag $isn\n"; |
|
496 |
if ($isn =~ s/-//g) { |
if ($isn =~ s/-//g) { |
497 |
$mps .= "W $isn $nr\n"; |
$mps .= "W $isn $nr\n"; |
498 |
} |
} |
499 |
|
$i++; |
500 |
} |
} |
501 |
|
return $mps; |
502 |
} |
} |
503 |
|
|
504 |
# ISBN |
# ISBN |
505 |
store_isn($row->{10}->[0],13,'%ISBN'); |
$bib .= isis_to_bib($row,'10','%ISBN'); |
506 |
$mps .= isis_to_mps($row,'290',13); |
$mps .= isis_isn_to_mps($row,'10',13); |
507 |
$mps .= isis_to_mps($row,'291',13); |
$mps .= isis_isn_to_mps($row,'290',13); |
508 |
$mps .= isis_to_mps($row,'292',13); |
$mps .= isis_isn_to_mps($row,'291',13); |
509 |
$mps .= isis_to_mps($row,'293',13); |
$mps .= isis_isn_to_mps($row,'292',13); |
510 |
|
$mps .= isis_isn_to_mps($row,'293',13); |
511 |
|
|
512 |
# ISSN |
# ISSN |
513 |
#store_isn($row->{11}->[0],14,'%ISSN'); |
#store_isn($row->{11}->[0],14,'%ISSN'); |
518 |
|
|
519 |
# headline |
# headline |
520 |
if ($headline) { |
if ($headline) { |
521 |
$headline .= " [".$row->{mfn}."]"; ## debug MFN! |
$headline .= " <i>(".$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}.")</i>"; ## debug MFN! |
522 |
|
$headline =~ s/&/∧/g; |
523 |
|
$headline =~ s/</</g; |
524 |
|
$headline =~ s/>/>/g; |
525 |
$mps .= "H ".c_852_iso($headline)."\n"; |
$mps .= "H ".c_852_iso($headline)."\n"; |
526 |
} else { |
} else { |
527 |
$mps .= "H nepoznato\n"; |
$mps .= "H nepoznato\n"; |
536 |
#} |
#} |
537 |
|
|
538 |
print R c_852_iso($bib); |
print R c_852_iso($bib); |
539 |
print R "%perl ".Dumper($row)."\n"; |
|
540 |
|
# check if all fields are defined |
541 |
|
foreach (split(/\n/,$bib)) { |
542 |
|
if (/^%(\w+)\s/ && !$default::FieldNames{$1}) { |
543 |
|
die "field $1 used but not in FieldNames"; |
544 |
|
} |
545 |
|
} |
546 |
|
|
547 |
|
# print R "%perl ".Dumper($row)."\n"; |
548 |
|
|
549 |
$mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n"; |
$mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n"; |
550 |
$last_tell=tell(R); |
$last_tell=tell(R); |