/[webpac-proto]/isis2stream.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /isis2stream.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (show annotations)
Sun Jun 16 15:26:13 2002 UTC (21 years, 9 months ago) by dpavlin
Branch: MAIN
Changes since 1.3: +11 -33 lines
File MIME type: text/plain
finished move to common.pm

1 #!/usr/bin/perl -w
2
3 use strict;
4 use OpenIsis;
5 use Getopt::Std;
6 #use Data::Dumper;
7 use common;
8
9 my %opts;
10
11 getopt('dm', \%opts);
12
13 die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts);
14
15 my $db_dir = $opts{d};
16
17 mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir");
18 mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data");
19
20 my $dir="$common::install_dir/$db_dir/data";
21
22
23 open(S,"> $dir/stream") || die "can't open output $dir/stream: $!";
24 open(R,"> $dir/bib") || die "can't open output $dir/bib: $!";
25 open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!";
26 #open(MPS,"> /tmp/mpsindex") || die "mps: $!";
27
28 print S $common::mps_header;
29 print MPS $common::mps_header;
30
31 #
32 # expand(nr,"space separated string");
33 #
34
35 sub expand {
36 my $nr = shift @_;
37 my $out = "";
38 while (my $fld = c_852_iso(shift @_)) {
39 my @words=split(/\s+/,$fld);
40 foreach my $w (@words) {
41 # FIX: this should be replaced by stemmer!
42 #$w =~ tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
43 $w =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
44 $w =~ s/ß/ss/g;
45 $out .= "W $w $nr\n";
46 }
47 }
48 return $out;
49 }
50
51 #--------------------------------------------------------------------
52
53 ################### ERASE###############3
54
55 # expand sub-fileds from ISIS field
56 # (^a.....^b....)
57 my %data; # FIX
58 sub ex_sf {
59 %data = ();
60 my $in = $_[0];
61 if (my $tmp = $in) {
62 # $tmp =~ tr/çæÐџ¬†§¦/¹©ðÐèÈæƾ®/; # ISIS -> iso-8859-2
63 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
64 if ($in =~ m/^\^/) {
65 my @sub = split(/\^/,$in);
66 foreach my $fld (@sub) {
67 $data{$1} = $2 if ($fld =~ m/^(\w+)(.+)$/)
68 }
69 } else {
70 $data{all} = $in."<-- iz polja bez podpolja";
71 }
72 }
73 }
74
75 # dump all sub-fields
76 sub all_sf {
77 my $nr = shift @_;
78 my $out="";
79 foreach my $k (sort keys %data) {
80 $out.=expand($nr,$data{$k});
81 }
82 return $out;
83 }
84
85 sub all_sf_r {
86 my $nr = shift @_;
87 my $out="";
88 foreach my $k (sort {$b cmp $a} keys %data) {
89 $out.=expand($nr,$data{$k});
90 }
91 return $out;
92 }
93
94 sub all_sf2bib {
95 my $nr = shift @_;
96 my $max_in_line=shift @_ || 0;
97 my $sep = shift @_ || ' ';
98 my $out;
99 my $i=0;
100 my $bib = "";
101 foreach my $k (sort keys %data) {
102 if ($out) {
103 $out.= $sep.$data{$k};
104 } else {
105 $out = $data{$k};
106 }
107 $i++;
108 if ($i == $max_in_line) {
109 $bib .= $nr." ".$out."\n" if ($out);
110 $i=0;
111 $out="";
112 }
113 }
114 $bib .= $nr." ".$out."\n" if ($out);
115 return $bib;
116 }
117
118 sub all_sf2bib_r {
119 my $nr = shift @_;
120 my $max_in_line=shift @_ || 0;
121 my $sep = shift @_ || ' ';
122 my $out;
123 my $i=0;
124 my $bib = "";
125 foreach my $k (sort {$b cmp $a} keys %data) {
126 if ($out) {
127 $out.= $sep.$data{$k};
128 } else {
129 $out = $data{$k};
130 }
131 $i++;
132 if ($i == $max_in_line) {
133 $bib .= $nr." ".$out."\n" if ($out);
134 $i=0;
135 $out="";
136 }
137 }
138 $bib .= $nr." ".$out."\n" if ($out);
139 return $bib;
140 }
141
142 #--------------------------------------------------------------------
143
144 sub c_852_iso {
145 my $tmp = $_[0];
146 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ / if ($tmp);
147 return $tmp;
148 }
149
150 sub c_852_czs {
151 my $tmp = $_[0];
152 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
153 $tmp =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
154 $tmp =~ s/ß/ss/g;
155 return $tmp;
156 }
157
158 #--------------------------------------------------------------------
159
160 # $mps .= sf_to_mps($subfiled_hash,"subfields",mps_id);
161 #
162 # subfields options:
163 # * - all (no sort)
164 # > - all, sort ascending
165 # < - all, sort descending
166
167 sub subfields_str_2_arr {
168 my $subfields = shift @_ || return;
169 my $sf_hash = shift @_;
170 my @sf_arr;
171
172 if ($subfields eq "*") {
173 @sf_arr = keys %{$sf_hash};
174 } elsif ($subfields eq ">") {
175 @sf_arr = sort keys %{$sf_hash};
176 } elsif ($subfields eq "<") {
177 @sf_arr = sort {$b cmp $a} keys %{$sf_hash};
178 } elsif ($subfields =~ s/>//) {
179 @sf_arr = sort split(//,$subfields);
180 } elsif ($subfields =~ s/<//) {
181 @sf_arr = sort {$b cmp $a} split(//,$subfields);
182 } else {
183 @sf_arr = split(//,$subfields);
184 }
185 return @sf_arr;
186 }
187
188
189 sub sf_to_mps {
190 my ($sf_hash,$subfields,$mps_id) = @_;
191 my $out="";
192 my @sf_arr = subfields_str_2_arr($subfields,$sf_hash);
193
194 foreach (@sf_arr) {
195 $out.=mps_expand($mps_id,$sf_hash->{$_});
196 }
197 return $out;
198 }
199
200 #--------------------------------------------------------------------
201
202 # $mps .= isis_to_mps ($row,isis_id,mps_id[,"subfields"])
203
204 sub isis_to_mps {
205 my $row = shift @_ || die;
206 my $isis_id = shift @_ || die;
207 my $mps_id = shift @_ || die;
208 my $subfields = shift @_;
209
210 my $i=0;
211 my $out = "";
212
213 while ($row->{$isis_id}->[$i]) {
214 my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
215
216 if (scalar keys %{$sf_hash} > 0) {
217 if ($subfields) {
218 foreach (split(//,$subfields)) {
219 $out.=mps_expand($mps_id,$sf_hash->{$_});
220 }
221 } else {
222 foreach (keys %{$sf_hash}) {
223 $out.=mps_expand($mps_id,$sf_hash->{$_});
224 }
225 }
226 } else {
227 $out.=mps_expand($mps_id,$row->{$isis_id}->[$i]);
228 }
229 $i++;
230 }
231 return $out;
232 }
233
234 #--------------------------------------------------------------------
235
236 # $bib .= isis_to_bib ($row,isis_id,bib_id,"subfields",[,group size][,"group sort"][,"separator"])
237 #
238 # fields:
239 # * - all (no sort)
240 # > - all, sort ascending
241 # < - all, sort descending
242 #
243
244 sub isis_to_bib {
245 my $row = shift @_ || die;
246 my $isis_id = shift @_ || die;
247 my $bib_id = shift @_ || die;
248 my $subfields = shift @_ || '*';
249 my $group_size = shift @_ || 0;
250 my $group_sort = shift @_ || '';
251 my $sep = shift @_ || ' ';
252
253 my $i=0;
254
255 my $bib="";
256
257 my $sf_hash;
258
259 # bib_grp(('a','b','c'))
260 sub bib_grp {
261 my $bib_grp;
262 my $sf_hash = shift @_ || return "";
263 my $bib_id = shift @_;
264 my $sep = shift @_;
265 foreach (@_) {
266 next if (! defined $sf_hash->{$_});
267 if ($bib_grp) {
268 $bib_grp.= $sep . $sf_hash->{$_};
269 } else {
270 $bib_grp = $sf_hash->{$_};
271 }
272 }
273 if ($bib_grp) {
274 return "$bib_id $bib_grp\n"
275 } else {
276 return "";
277 }
278 }
279
280 while ($row->{$isis_id}->[$i]) {
281 my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
282
283 if (scalar keys %{$sf_hash} > 0) {
284 if ($group_size) {
285 my $tmp_flds = join("",subfields_str_2_arr($subfields,$sf_hash));
286 while ($tmp_flds) {
287 my $tmp_fld_grp = substr($tmp_flds,0,$group_size);
288 $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($tmp_fld_grp.$group_sort) );
289 last if (length($tmp_flds) <= $group_size);
290 $tmp_flds=substr($tmp_flds,$group_size,length($tmp_flds)-$group_size);
291 }
292
293 } else {
294 $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($subfields,$sf_hash) );
295 }
296 } else {
297 # no subfields, use just value!
298 $bib .= "$bib_id ".$row->{$isis_id}->[$i]."\n";
299 }
300 $i++;
301 }
302 return $bib;
303 }
304
305 #--------------------------------------------------------------------
306
307 sub isis_sf {
308 my $row = shift @_;
309 my $isis_id = shift @_;
310 my $subfield = shift @_ || 'a';
311 my $prefix = shift @_ || '';
312 my $postfix = shift @_ || '';
313
314 my @sep = @_; # rest are separators
315
316 if ($row->{$isis_id}->[0]) {
317 my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
318 if (length($subfield) == 1) {
319 if ($sf->{$subfield}) {
320 return $prefix . $sf->{$subfield} . $postfix;
321 } else {
322 return '';
323 }
324 } elsif (length($subfield) > 1) {
325 my @s = split(//,$subfield);
326 my $out;
327 foreach (@s) {
328 my $sep = shift @sep || ' ';
329 if ($out) {
330 $out .= $sep . $sf->{$_} if ($sf->{$_});
331 } else {
332 $out = $sf->{$_} if ($sf->{$_});
333 }
334 }
335 return $prefix . $out . $postfix if ($out);
336 }
337 }
338 return '';
339 }
340
341 #--------------------------------------------------------------------
342 #--------------------------------------------------------------------
343
344
345 my $last_tell=0;
346
347 my $db = OpenIsis::open( "$common::isis_data/$db_dir/LIBRI/LIBRI" );
348
349 my $max_rowid = OpenIsis::maxRowid( $db );
350
351 my $last_pcnt = 0;
352
353 for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
354 my $row = OpenIsis::read( $db, $row_id );
355 if (my $tmp = $row->{'200'}->[0]) {
356
357 my $bib = "%MFN $row->{mfn}\n";
358 my $mps;
359
360 my $pcnt = int($row->{mfn} * 100 / $max_rowid);
361 if ($pcnt != $last_pcnt) {
362 printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt);
363 $last_pcnt = $pcnt;
364 }
365
366 my $headline;
367 $headline .= isis_sf($row,'200','a',"'");
368 $headline .= isis_sf($row,'200','e'," : ","'");
369
370 # author
371 $bib .= isis_to_bib($row,'700','%700+','*',2,'<');
372 $bib .= isis_to_bib($row,'701','%700+','*',2,'<');
373 $bib .= isis_to_bib($row,'710','%700+','*',2,'<');
374 $bib .= isis_to_bib($row,'711','%700+','*',2,'<');
375 $bib .= isis_to_bib($row,'503','%700+','*',2,'<');
376
377 $mps .= isis_to_mps($row,'700',1);
378 $mps .= isis_to_mps($row,'701',1);
379 $mps .= isis_to_mps($row,'710',1);
380 $mps .= isis_to_mps($row,'711',1);
381 $mps .= isis_to_mps($row,'503',1);
382 $mps .= isis_to_mps($row,'702',1);
383
384 $bib .= isis_to_bib($row,'205','%205');
385
386 # naslov
387 my $sf = OpenIsis::subfields($row->{'200'}->[0]);
388 my $book;
389 $book .= $sf->{a} if ($sf->{a});
390 $book .= " ; ".$sf->{k} if ($sf->{k});
391 $book .= " = ".$sf->{d} if ($sf->{d});
392 $book .= " : ".$sf->{e} if ($sf->{e});
393 $book .= " / ".$sf->{f} if ($sf->{f});
394 $book .= " ; ".$sf->{g} if ($sf->{g});
395 $book .= ". ".$sf->{c} if ($sf->{c});
396 $book .= " / ".$sf->{x} if ($sf->{x});
397 $book .= " ; ".$sf->{y} if ($sf->{y});
398 $bib .= "%200 $book\n" if ($book);
399
400 $mps .= isis_to_mps($row,'200',2,"akcde");
401 $mps .= isis_to_mps($row,'532',2);
402 $mps .= isis_to_mps($row,'424',2);
403
404 $headline .= isis_sf($row,'700','b'," ");
405 $headline .= isis_sf($row,'700','a'," ");
406
407 # izdavac
408 $mps .= isis_to_mps($row,'210',3);
409 # if (my $sf = OpenIsis::subfields($row->{'210'}->[0])) {
410 # my $tmp;
411 # $tmp .= $sf->{a} if ($sf->{a});
412 # $tmp .= " : ".$sf->{c} if ($sf->{c});
413 # $tmp .= ", ".$sf->{d} if ($sf->{d});
414 # $bib .= "%210 $tmp\n" if ($tmp);
415 # }
416 $bib .= "%210 ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n";
417
418 if (my $year = isis_sf($row,'210','d')) {
419 $year =~ s/^\s*cop\.*\s*//i;
420 $year =~ s/[\[\]]*//g;
421 $mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/);
422 $headline .= " ($year)";
423 }
424
425 $bib .= isis_to_bib($row,'215','%215', '*', undef, undef, ', ');
426
427 # $bib .= isis_to_bib($row,'225','%225', 'aehivw');
428 $bib .= "%225 ".isis_sf($row,'225','aevhiw', '(',')', ('',' : ',' ; ','. ',', ',' ; '))."\n";
429 $mps .= isis_to_mps($row,'225',4);
430
431 $bib .= isis_to_bib($row,'300','%300+');
432 $bib .= isis_to_bib($row,'320','%300+');
433 $bib .= isis_to_bib($row,'327','%300+');
434 $mps .= isis_to_mps($row,'300',5);
435 $mps .= isis_to_mps($row,'320',5);
436 $mps .= isis_to_mps($row,'327',5);
437
438 $bib .= isis_to_bib($row,'330','%330');
439 $mps .= isis_to_mps($row,'330',6);
440
441 $bib .= isis_to_bib($row,'423','%423');
442 $bib .= isis_to_bib($row,'464','%464');
443 $mps .= isis_to_mps($row,'464',7);
444 $bib .= isis_to_bib($row,'610','%610');
445 $mps .= isis_to_mps($row,'610',8);
446
447 $bib .= isis_to_bib($row,'675','%675+');
448 $mps .= isis_to_mps($row,'675',9);
449 $bib .= isis_to_bib($row,'686','%675+');
450 $mps .= isis_to_mps($row,'686',10);
451
452 $bib .= isis_to_bib($row,'990','%990');
453 $mps .= isis_to_mps($row,'990',11);
454
455 $bib .= isis_to_bib($row,'991','%991');
456 $mps .= isis_to_mps($row,'991',12);
457
458 # ISBN
459 if (my $isbn = $row->{10}->[0]) {
460 $isbn =~ s/ +//g; # remove spaces
461 $mps .= "W $isbn 13\n";
462 $bib .= "%ISBN $isbn\n";
463 $isbn =~ s/-//g;
464 $mps .= "W $isbn 13\n";
465 }
466 $mps .= isis_to_mps($row,'10',12);
467
468 $mps .= isis_to_mps($row,'532',1);
469
470 $bib .= isis_to_bib($row,'994','%994a','a');
471
472 # headline
473 if ($headline) {
474 $headline .= " [".$row->{mfn}."]"; ## debug MFN!
475 $mps .= "H ".c_852_iso($headline)."\n";
476 } else {
477 $mps .= "H nepoznato\n";
478 }
479
480
481 #if ($db_dir eq "sf") {
482 # print "MFN: $row->{mfn} ROW ID: $row_id\n";
483 # if ($row->{mfn} >= 146) {
484 # print Dumper($row);
485 # }
486 #}
487
488 print R c_852_iso($bib);
489
490 $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
491 $last_tell=tell(R);
492
493 print R "\n";
494
495 $mps .= "E\n";
496
497
498 print S $mps;
499 print MPS $mps;
500 }
501 }
502 print S "M over and out\nX\n";
503 print MPS "M over and out\nX\n";
504 close(MPS);

  ViewVC Help
Powered by ViewVC 1.1.26