/[webpac-proto]/isis2stream.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /isis2stream.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations)
Sun Jun 16 14:37:43 2002 UTC (17 years, 3 months ago) by dpavlin
Branch: MAIN
Changes since 1.2: +1 -21 lines
File MIME type: text/plain
mored common functions to common.pl

1 #!/usr/bin/perl -w
2
3 use strict;
4 use OpenIsis;
5 use Getopt::Std;
6 #use Data::Dumper;
7 require "common.pl";
8
9 my $install_dir="/local/index";
10 my $mpsindex="/local/mps-5.3/bin/mpsindex -l 9 -b";
11 my $isis_data="/var/autofs/misc/isis_data/";
12 #my $isis_data="/mnt/20020606/Isis/Data/"; # doma
13
14 my %opts;
15
16 getopt('dD', \%opts);
17
18 die "usage: $0 -d [database_dir] " if (! $opts{d});
19
20 my $db_dir = $opts{d};
21
22 mkdir "$install_dir/$db_dir" if (!-e "$install_dir/$db_dir");
23 mkdir "$install_dir/$db_dir/data" if (!-e "$install_dir/$db_dir/data");
24
25 my $dir="$install_dir/$db_dir/data";
26
27
28 open(S,"> $dir/stream") || die "can't open output $dir/stram: $!";
29 open(R,"> $dir/bib") || die "can't open output $dir/bib: $!";
30 open(MPS,"| $mpsindex -d $install_dir/$db_dir -autokey") || die "can't start MPS indexer $mpsindex: $!";
31 #open(MPS,"> /tmp/mpsindex") || die "mps: $!";
32
33 my $s="V 5 3
34 L hr-HR
35 F 700+ 1 Autor
36 F 200+ 2 Naslov
37 F 210 3 Izdavanje
38 F 225 4 Nakladnièka cjelina
39 F 300+ 5 Napomene
40 F 330 6 Sadr¾aj
41 F 464 7 Analitièki radovi
42 F 610 8 Kljuène rijeèi
43 F 675 9 UDK
44 F 686 10 CC
45 F 990 11 Signatura
46 F 991 12 Inventarni broj
47 F 10 13 ISBN
48 ";
49
50 print S $s;
51 print MPS $s;
52
53 #
54 # expand(nr,"space separated string");
55 #
56
57 sub expand {
58 my $nr = shift @_;
59 my $out = "";
60 while (my $fld = c_852_iso(shift @_)) {
61 my @words=split(/\s+/,$fld);
62 foreach my $w (@words) {
63 # FIX: this should be replaced by stemmer!
64 #$w =~ tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
65 $w =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
66 $w =~ s/ß/ss/g;
67 $out .= "W $w $nr\n";
68 }
69 }
70 return $out;
71 }
72
73 #--------------------------------------------------------------------
74
75 ################### ERASE###############3
76
77 # expand sub-fileds from ISIS field
78 # (^a.....^b....)
79 my %data; # FIX
80 sub ex_sf {
81 %data = ();
82 my $in = $_[0];
83 if (my $tmp = $in) {
84 # $tmp =~ tr/çæÐџ¬†§¦/¹©ðÐèÈæƾ®/; # ISIS -> iso-8859-2
85 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
86 if ($in =~ m/^\^/) {
87 my @sub = split(/\^/,$in);
88 foreach my $fld (@sub) {
89 $data{$1} = $2 if ($fld =~ m/^(\w+)(.+)$/)
90 }
91 } else {
92 $data{all} = $in."<-- iz polja bez podpolja";
93 }
94 }
95 }
96
97 # dump all sub-fields
98 sub all_sf {
99 my $nr = shift @_;
100 my $out="";
101 foreach my $k (sort keys %data) {
102 $out.=expand($nr,$data{$k});
103 }
104 return $out;
105 }
106
107 sub all_sf_r {
108 my $nr = shift @_;
109 my $out="";
110 foreach my $k (sort {$b cmp $a} keys %data) {
111 $out.=expand($nr,$data{$k});
112 }
113 return $out;
114 }
115
116 sub all_sf2bib {
117 my $nr = shift @_;
118 my $max_in_line=shift @_ || 0;
119 my $sep = shift @_ || ' ';
120 my $out;
121 my $i=0;
122 my $bib = "";
123 foreach my $k (sort keys %data) {
124 if ($out) {
125 $out.= $sep.$data{$k};
126 } else {
127 $out = $data{$k};
128 }
129 $i++;
130 if ($i == $max_in_line) {
131 $bib .= $nr." ".$out."\n" if ($out);
132 $i=0;
133 $out="";
134 }
135 }
136 $bib .= $nr." ".$out."\n" if ($out);
137 return $bib;
138 }
139
140 sub all_sf2bib_r {
141 my $nr = shift @_;
142 my $max_in_line=shift @_ || 0;
143 my $sep = shift @_ || ' ';
144 my $out;
145 my $i=0;
146 my $bib = "";
147 foreach my $k (sort {$b cmp $a} keys %data) {
148 if ($out) {
149 $out.= $sep.$data{$k};
150 } else {
151 $out = $data{$k};
152 }
153 $i++;
154 if ($i == $max_in_line) {
155 $bib .= $nr." ".$out."\n" if ($out);
156 $i=0;
157 $out="";
158 }
159 }
160 $bib .= $nr." ".$out."\n" if ($out);
161 return $bib;
162 }
163
164 #--------------------------------------------------------------------
165
166 sub c_852_iso {
167 my $tmp = $_[0];
168 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ / if ($tmp);
169 return $tmp;
170 }
171
172 sub c_852_czs {
173 my $tmp = $_[0];
174 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
175 $tmp =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
176 $tmp =~ s/ß/ss/g;
177 return $tmp;
178 }
179
180 #--------------------------------------------------------------------
181
182 # $mps .= sf_to_mps($subfiled_hash,"subfields",mps_id);
183 #
184 # subfields options:
185 # * - all (no sort)
186 # > - all, sort ascending
187 # < - all, sort descending
188
189 sub subfields_str_2_arr {
190 my $subfields = shift @_ || return;
191 my $sf_hash = shift @_;
192 my @sf_arr;
193
194 if ($subfields eq "*") {
195 @sf_arr = keys %{$sf_hash};
196 } elsif ($subfields eq ">") {
197 @sf_arr = sort keys %{$sf_hash};
198 } elsif ($subfields eq "<") {
199 @sf_arr = sort {$b cmp $a} keys %{$sf_hash};
200 } elsif ($subfields =~ s/>//) {
201 @sf_arr = sort split(//,$subfields);
202 } elsif ($subfields =~ s/<//) {
203 @sf_arr = sort {$b cmp $a} split(//,$subfields);
204 } else {
205 @sf_arr = split(//,$subfields);
206 }
207 return @sf_arr;
208 }
209
210
211 sub sf_to_mps {
212 my ($sf_hash,$subfields,$mps_id) = @_;
213 my $out="";
214 my @sf_arr = subfields_str_2_arr($subfields,$sf_hash);
215
216 foreach (@sf_arr) {
217 $out.=mps_expand($mps_id,$sf_hash->{$_});
218 }
219 return $out;
220 }
221
222 #--------------------------------------------------------------------
223
224 # $mps .= isis_to_mps ($row,isis_id,mps_id[,"subfields"])
225
226 sub isis_to_mps {
227 my $row = shift @_ || die;
228 my $isis_id = shift @_ || die;
229 my $mps_id = shift @_ || die;
230 my $subfields = shift @_;
231
232 my $i=0;
233 my $out = "";
234
235 while ($row->{$isis_id}->[$i]) {
236 my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
237
238 if (scalar keys %{$sf_hash} > 0) {
239 if ($subfields) {
240 foreach (split(//,$subfields)) {
241 $out.=mps_expand($mps_id,$sf_hash->{$_});
242 }
243 } else {
244 foreach (keys %{$sf_hash}) {
245 $out.=mps_expand($mps_id,$sf_hash->{$_});
246 }
247 }
248 } else {
249 $out.=mps_expand($mps_id,$row->{$isis_id}->[$i]);
250 }
251 $i++;
252 }
253 return $out;
254 }
255
256 #--------------------------------------------------------------------
257
258 # $bib .= isis_to_bib ($row,isis_id,bib_id,"subfields",[,group size][,"group sort"][,"separator"])
259 #
260 # fields:
261 # * - all (no sort)
262 # > - all, sort ascending
263 # < - all, sort descending
264 #
265
266 sub isis_to_bib {
267 my $row = shift @_ || die;
268 my $isis_id = shift @_ || die;
269 my $bib_id = shift @_ || die;
270 my $subfields = shift @_ || '*';
271 my $group_size = shift @_ || 0;
272 my $group_sort = shift @_ || '';
273 my $sep = shift @_ || ' ';
274
275 my $i=0;
276
277 my $bib="";
278
279 my $sf_hash;
280
281 # bib_grp(('a','b','c'))
282 sub bib_grp {
283 my $bib_grp;
284 my $sf_hash = shift @_ || return "";
285 my $bib_id = shift @_;
286 my $sep = shift @_;
287 foreach (@_) {
288 next if (! defined $sf_hash->{$_});
289 if ($bib_grp) {
290 $bib_grp.= $sep . $sf_hash->{$_};
291 } else {
292 $bib_grp = $sf_hash->{$_};
293 }
294 }
295 if ($bib_grp) {
296 return "$bib_id $bib_grp\n"
297 } else {
298 return "";
299 }
300 }
301
302 while ($row->{$isis_id}->[$i]) {
303 my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
304
305 if (scalar keys %{$sf_hash} > 0) {
306 if ($group_size) {
307 my $tmp_flds = join("",subfields_str_2_arr($subfields,$sf_hash));
308 while ($tmp_flds) {
309 my $tmp_fld_grp = substr($tmp_flds,0,$group_size);
310 $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($tmp_fld_grp.$group_sort) );
311 last if (length($tmp_flds) <= $group_size);
312 $tmp_flds=substr($tmp_flds,$group_size,length($tmp_flds)-$group_size);
313 }
314
315 } else {
316 $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($subfields,$sf_hash) );
317 }
318 } else {
319 # no subfields, use just value!
320 $bib .= "$bib_id ".$row->{$isis_id}->[$i]."\n";
321 }
322 $i++;
323 }
324 return $bib;
325 }
326
327 #--------------------------------------------------------------------
328
329 sub isis_sf {
330 my $row = shift @_;
331 my $isis_id = shift @_;
332 my $subfield = shift @_ || 'a';
333 my $prefix = shift @_ || '';
334 my $postfix = shift @_ || '';
335
336 my @sep = @_; # rest are separators
337
338 if ($row->{$isis_id}->[0]) {
339 my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
340 if (length($subfield) == 1) {
341 if ($sf->{$subfield}) {
342 return $prefix . $sf->{$subfield} . $postfix;
343 } else {
344 return '';
345 }
346 } elsif (length($subfield) > 1) {
347 my @s = split(//,$subfield);
348 my $out;
349 foreach (@s) {
350 my $sep = shift @sep || ' ';
351 if ($out) {
352 $out .= $sep . $sf->{$_} if ($sf->{$_});
353 } else {
354 $out = $sf->{$_} if ($sf->{$_});
355 }
356 }
357 return $prefix . $out . $postfix if ($out);
358 }
359 }
360 return '';
361 }
362
363 #--------------------------------------------------------------------
364 #--------------------------------------------------------------------
365
366
367 my $last_tell=0;
368
369 my $db = OpenIsis::open( "$isis_data/$db_dir/LIBRI/LIBRI" );
370
371 my $max_rowid = OpenIsis::maxRowid( $db );
372
373 my $last_pcnt = 0;
374
375 for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
376 my $row = OpenIsis::read( $db, $row_id );
377 if (my $tmp = $row->{'200'}->[0]) {
378
379 my $bib = "%MFN $row->{mfn}\n";
380 my $mps;
381
382 my $pcnt = int($row->{mfn} * 100 / $max_rowid);
383 if ($pcnt != $last_pcnt) {
384 printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt);
385 $last_pcnt = $pcnt;
386 }
387
388 my $headline;
389 $headline .= isis_sf($row,'200','a',"'");
390 $headline .= isis_sf($row,'200','e'," : ","'");
391
392 # author
393 $bib .= isis_to_bib($row,'700','%700+','*',2,'<');
394 $bib .= isis_to_bib($row,'701','%700+','*',2,'<');
395 $bib .= isis_to_bib($row,'710','%700+','*',2,'<');
396 $bib .= isis_to_bib($row,'711','%700+','*',2,'<');
397 $bib .= isis_to_bib($row,'503','%700+','*',2,'<');
398
399 $mps .= isis_to_mps($row,'700',1);
400 $mps .= isis_to_mps($row,'701',1);
401 $mps .= isis_to_mps($row,'710',1);
402 $mps .= isis_to_mps($row,'711',1);
403 $mps .= isis_to_mps($row,'503',1);
404 $mps .= isis_to_mps($row,'702',1);
405
406 $bib .= isis_to_bib($row,'205','%205');
407
408 # naslov
409 my $sf = OpenIsis::subfields($row->{'200'}->[0]);
410 my $book;
411 $book .= $sf->{a} if ($sf->{a});
412 $book .= " ; ".$sf->{k} if ($sf->{k});
413 $book .= " = ".$sf->{d} if ($sf->{d});
414 $book .= " : ".$sf->{e} if ($sf->{e});
415 $book .= " / ".$sf->{f} if ($sf->{f});
416 $book .= " ; ".$sf->{g} if ($sf->{g});
417 $book .= ". ".$sf->{c} if ($sf->{c});
418 $book .= " / ".$sf->{x} if ($sf->{x});
419 $book .= " ; ".$sf->{y} if ($sf->{y});
420 $bib .= "%200 $book\n" if ($book);
421
422 $mps .= isis_to_mps($row,'200',2,"akcde");
423 $mps .= isis_to_mps($row,'532',2);
424 $mps .= isis_to_mps($row,'424',2);
425
426 $headline .= isis_sf($row,'700','b'," ");
427 $headline .= isis_sf($row,'700','a'," ");
428
429 # izdavac
430 $mps .= isis_to_mps($row,'210',3);
431 # if (my $sf = OpenIsis::subfields($row->{'210'}->[0])) {
432 # my $tmp;
433 # $tmp .= $sf->{a} if ($sf->{a});
434 # $tmp .= " : ".$sf->{c} if ($sf->{c});
435 # $tmp .= ", ".$sf->{d} if ($sf->{d});
436 # $bib .= "%210 $tmp\n" if ($tmp);
437 # }
438 $bib .= "%210 ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n";
439
440 if (my $year = isis_sf($row,'210','d')) {
441 $year =~ s/^\s*cop\.*\s*//i;
442 $year =~ s/[\[\]]*//g;
443 $mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/);
444 $headline .= " ($year)";
445 }
446
447 $bib .= isis_to_bib($row,'215','%215', '*', undef, undef, ', ');
448
449 # $bib .= isis_to_bib($row,'225','%225', 'aehivw');
450 $bib .= "%225 ".isis_sf($row,'225','aevhiw', '(',')', ('',' : ',' ; ','. ',', ',' ; '))."\n";
451 $mps .= isis_to_mps($row,'225',4);
452
453 $bib .= isis_to_bib($row,'300','%300+');
454 $bib .= isis_to_bib($row,'320','%300+');
455 $bib .= isis_to_bib($row,'327','%300+');
456 $mps .= isis_to_mps($row,'300',5);
457 $mps .= isis_to_mps($row,'320',5);
458 $mps .= isis_to_mps($row,'327',5);
459
460 $bib .= isis_to_bib($row,'330','%330');
461 $mps .= isis_to_mps($row,'330',6);
462
463 $bib .= isis_to_bib($row,'423','%423');
464 $bib .= isis_to_bib($row,'464','%464');
465 $mps .= isis_to_mps($row,'464',7);
466 $bib .= isis_to_bib($row,'610','%610');
467 $mps .= isis_to_mps($row,'610',8);
468
469 $bib .= isis_to_bib($row,'675','%675+');
470 $mps .= isis_to_mps($row,'675',9);
471 $bib .= isis_to_bib($row,'686','%675+');
472 $mps .= isis_to_mps($row,'686',10);
473
474 $bib .= isis_to_bib($row,'990','%990');
475 $mps .= isis_to_mps($row,'990',11);
476
477 $bib .= isis_to_bib($row,'991','%991');
478 $mps .= isis_to_mps($row,'991',12);
479
480 # ISBN
481 if (my $isbn = $row->{10}->[0]) {
482 $isbn =~ s/ +//g; # remove spaces
483 $mps .= "W $isbn 13\n";
484 $bib .= "%ISBN $isbn\n";
485 $isbn =~ s/-//g;
486 $mps .= "W $isbn 13\n";
487 }
488 $mps .= isis_to_mps($row,'10',12);
489
490 $mps .= isis_to_mps($row,'532',1);
491
492 $bib .= isis_to_bib($row,'994','%994a','a');
493
494 # headline
495 if ($headline) {
496 $headline .= " [".$row->{mfn}."]"; ## debug MFN!
497 $mps .= "H ".c_852_iso($headline)."\n";
498 } else {
499 $mps .= "H nepoznato\n";
500 }
501
502
503 #if ($db_dir eq "sf") {
504 # print "MFN: $row->{mfn} ROW ID: $row_id\n";
505 # if ($row->{mfn} >= 146) {
506 # print Dumper($row);
507 # }
508 #}
509
510 print R c_852_iso($bib);
511
512 $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
513 $last_tell=tell(R);
514
515 print R "\n";
516
517 $mps .= "E\n";
518
519
520 print S $mps;
521 print MPS $mps;
522 }
523 }
524 print S "M over and out\nX\n";
525 print MPS "M over and out\nX\n";
526 close(MPS);

  ViewVC Help
Powered by ViewVC 1.1.26