/[webpac-proto]/isis2stream.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /isis2stream.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (show annotations)
Sun Jun 16 16:06:55 2002 UTC (21 years, 9 months ago) by dpavlin
Branch: MAIN
Changes since 1.5: +16 -5 lines
File MIME type: text/plain
support for PERI dbs

1 #!/usr/bin/perl -w
2
3 use strict;
4 use OpenIsis;
5 use Getopt::Std;
6 #use Data::Dumper;
7 use common;
8
9 my %opts;
10
11 getopt('dm', \%opts);
12
13 die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts);
14
15 my $db_dir = $opts{d};
16
17 mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir");
18 mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data");
19
20 my $dir="$common::install_dir/$db_dir/data";
21
22
23 open(S,"> $dir/stream") || die "can't open output $dir/stream: $!";
24 open(R,"> $dir/bib") || die "can't open output $dir/bib: $!";
25 open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!";
26 #open(MPS,"> /tmp/mpsindex") || die "mps: $!";
27
28 print S $common::mps_header;
29 print MPS $common::mps_header;
30
31 #
32 # expand(nr,"space separated string");
33 #
34
35 sub expand {
36 my $nr = shift @_;
37 my $out = "";
38 while (my $fld = c_852_iso(shift @_)) {
39 my @words=split(/\s+/,$fld);
40 foreach my $w (@words) {
41 # FIX: this should be replaced by stemmer!
42 #$w =~ tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
43 $w =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
44 $w =~ s/ß/ss/g;
45 $out .= "W $w $nr\n";
46 }
47 }
48 return $out;
49 }
50
51 #--------------------------------------------------------------------
52
53 ################### ERASE###############3
54
55 # expand sub-fileds from ISIS field
56 # (^a.....^b....)
57 my %data; # FIX
58 sub ex_sf {
59 %data = ();
60 my $in = $_[0];
61 if (my $tmp = $in) {
62 # $tmp =~ tr/çæÐџ¬†§¦/¹©ðÐèÈæƾ®/; # ISIS -> iso-8859-2
63 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
64 if ($in =~ m/^\^/) {
65 my @sub = split(/\^/,$in);
66 foreach my $fld (@sub) {
67 $data{$1} = $2 if ($fld =~ m/^(\w+)(.+)$/)
68 }
69 } else {
70 $data{all} = $in."<-- iz polja bez podpolja";
71 }
72 }
73 }
74
75 # dump all sub-fields
76 sub all_sf {
77 my $nr = shift @_;
78 my $out="";
79 foreach my $k (sort keys %data) {
80 $out.=expand($nr,$data{$k});
81 }
82 return $out;
83 }
84
85 sub all_sf_r {
86 my $nr = shift @_;
87 my $out="";
88 foreach my $k (sort {$b cmp $a} keys %data) {
89 $out.=expand($nr,$data{$k});
90 }
91 return $out;
92 }
93
94 sub all_sf2bib {
95 my $nr = shift @_;
96 my $max_in_line=shift @_ || 0;
97 my $sep = shift @_ || ' ';
98 my $out;
99 my $i=0;
100 my $bib = "";
101 foreach my $k (sort keys %data) {
102 if ($out) {
103 $out.= $sep.$data{$k};
104 } else {
105 $out = $data{$k};
106 }
107 $i++;
108 if ($i == $max_in_line) {
109 $bib .= $nr." ".$out."\n" if ($out);
110 $i=0;
111 $out="";
112 }
113 }
114 $bib .= $nr." ".$out."\n" if ($out);
115 return $bib;
116 }
117
118 sub all_sf2bib_r {
119 my $nr = shift @_;
120 my $max_in_line=shift @_ || 0;
121 my $sep = shift @_ || ' ';
122 my $out;
123 my $i=0;
124 my $bib = "";
125 foreach my $k (sort {$b cmp $a} keys %data) {
126 if ($out) {
127 $out.= $sep.$data{$k};
128 } else {
129 $out = $data{$k};
130 }
131 $i++;
132 if ($i == $max_in_line) {
133 $bib .= $nr." ".$out."\n" if ($out);
134 $i=0;
135 $out="";
136 }
137 }
138 $bib .= $nr." ".$out."\n" if ($out);
139 return $bib;
140 }
141
142 #--------------------------------------------------------------------
143
144 sub c_852_iso {
145 my $tmp = $_[0];
146 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ / if ($tmp);
147 return $tmp;
148 }
149
150 sub c_852_czs {
151 my $tmp = $_[0];
152 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
153 $tmp =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
154 $tmp =~ s/ß/ss/g;
155 return $tmp;
156 }
157
158 #--------------------------------------------------------------------
159
160 # $mps .= sf_to_mps($subfiled_hash,"subfields",mps_id);
161 #
162 # subfields options:
163 # * - all (no sort)
164 # > - all, sort ascending
165 # < - all, sort descending
166
167 sub subfields_str_2_arr {
168 my $subfields = shift @_ || return;
169 my $sf_hash = shift @_;
170 my @sf_arr;
171
172 if ($subfields eq "*") {
173 @sf_arr = keys %{$sf_hash};
174 } elsif ($subfields eq ">") {
175 @sf_arr = sort keys %{$sf_hash};
176 } elsif ($subfields eq "<") {
177 @sf_arr = sort {$b cmp $a} keys %{$sf_hash};
178 } elsif ($subfields =~ s/>//) {
179 @sf_arr = sort split(//,$subfields);
180 } elsif ($subfields =~ s/<//) {
181 @sf_arr = sort {$b cmp $a} split(//,$subfields);
182 } else {
183 @sf_arr = split(//,$subfields);
184 }
185 return @sf_arr;
186 }
187
188
189 sub sf_to_mps {
190 my ($sf_hash,$subfields,$mps_id) = @_;
191 my $out="";
192 my @sf_arr = subfields_str_2_arr($subfields,$sf_hash);
193
194 foreach (@sf_arr) {
195 $out.=mps_expand($mps_id,$sf_hash->{$_});
196 }
197 return $out;
198 }
199
200 #--------------------------------------------------------------------
201
202 # $mps .= isis_to_mps ($row,isis_id,mps_id[,"subfields"])
203
204 sub isis_to_mps {
205 my $row = shift @_ || die;
206 my $isis_id = shift @_ || die;
207 my $mps_id = shift @_ || die;
208 my $subfields = shift @_;
209
210 my $i=0;
211 my $out = "";
212
213 while ($row->{$isis_id}->[$i]) {
214 my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
215
216 if (scalar keys %{$sf_hash} > 0) {
217 if ($subfields) {
218 foreach (split(//,$subfields)) {
219 $out.=mps_expand($mps_id,$sf_hash->{$_});
220 }
221 } else {
222 foreach (keys %{$sf_hash}) {
223 $out.=mps_expand($mps_id,$sf_hash->{$_});
224 }
225 }
226 } else {
227 $out.=mps_expand($mps_id,$row->{$isis_id}->[$i]);
228 }
229 $i++;
230 }
231 return $out;
232 }
233
234 #--------------------------------------------------------------------
235
236 # $bib .= isis_to_bib ($row,isis_id,bib_id,"subfields",[,group size][,"group sort"][,"separator"])
237 #
238 # fields:
239 # * - all (no sort)
240 # > - all, sort ascending
241 # < - all, sort descending
242 #
243
244 sub isis_to_bib {
245 my $row = shift @_ || die;
246 my $isis_id = shift @_ || die;
247 my $bib_id = shift @_ || die;
248 my $subfields = shift @_ || '*';
249 my $group_size = shift @_ || 0;
250 my $group_sort = shift @_ || '';
251 my $sep = shift @_ || ' ';
252
253 my $i=0;
254
255 my $bib="";
256
257 my $sf_hash;
258
259 # bib_grp(('a','b','c'))
260 sub bib_grp {
261 my $bib_grp;
262 my $sf_hash = shift @_ || return "";
263 my $bib_id = shift @_;
264 my $sep = shift @_;
265 foreach (@_) {
266 next if (! defined $sf_hash->{$_});
267 if ($bib_grp) {
268 $bib_grp.= $sep . $sf_hash->{$_};
269 } else {
270 $bib_grp = $sf_hash->{$_};
271 }
272 }
273 if ($bib_grp) {
274 return "$bib_id $bib_grp\n"
275 } else {
276 return "";
277 }
278 }
279
280 while ($row->{$isis_id}->[$i]) {
281 my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
282
283 if (scalar keys %{$sf_hash} > 0) {
284 if ($group_size) {
285 my $tmp_flds = join("",subfields_str_2_arr($subfields,$sf_hash));
286 while ($tmp_flds) {
287 my $tmp_fld_grp = substr($tmp_flds,0,$group_size);
288 $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($tmp_fld_grp.$group_sort) );
289 last if (length($tmp_flds) <= $group_size);
290 $tmp_flds=substr($tmp_flds,$group_size,length($tmp_flds)-$group_size);
291 }
292
293 } else {
294 $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($subfields,$sf_hash) );
295 }
296 } else {
297 # no subfields, use just value!
298 $bib .= "$bib_id ".$row->{$isis_id}->[$i]."\n";
299 }
300 $i++;
301 }
302 return $bib;
303 }
304
305 #--------------------------------------------------------------------
306
307 sub isis_sf {
308 my $row = shift @_;
309 my $isis_id = shift @_;
310 my $subfield = shift @_ || 'a';
311 my $prefix = shift @_ || '';
312 my $postfix = shift @_ || '';
313
314 my @sep = @_; # rest are separators
315
316 if ($row->{$isis_id}->[0]) {
317 my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
318 if (length($subfield) == 1) {
319 if ($sf->{$subfield}) {
320 return $prefix . $sf->{$subfield} . $postfix;
321 } else {
322 return '';
323 }
324 } elsif (length($subfield) > 1) {
325 my @s = split(//,$subfield);
326 my $out;
327 foreach (@s) {
328 my $sep = shift @sep || ' ';
329 if ($out) {
330 $out .= $sep . $sf->{$_} if ($sf->{$_});
331 } else {
332 $out = $sf->{$_} if ($sf->{$_});
333 }
334 }
335 return $prefix . $out . $postfix if ($out);
336 }
337 }
338 return '';
339 }
340
341 #--------------------------------------------------------------------
342 #--------------------------------------------------------------------
343
344
345 my $last_tell=0;
346
347 my @isis_dirs = ( '.' ); # use dirname as database name
348
349 if ($opts{m}) {
350 @isis_dirs = split(/,/,$opts{m});
351 }
352
353 my @isis_dbs;
354
355 foreach (@isis_dirs) {
356 if (-e "$common::isis_data/$db_dir/$_/LIBRI") {
357 push @isis_dbs,"$common::isis_data/$db_dir/$_/LIBRI/LIBRI";
358 }
359 if (-e "$common::isis_data/$db_dir/$_/PERI") {
360 push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI";
361 }
362 }
363
364 foreach my $isis_db (@isis_dbs) {
365
366 print MPS "M reading ISIS from '$isis_db'...\n";
367
368 my $db = OpenIsis::open( "$isis_db" );
369
370 my $max_rowid = OpenIsis::maxRowid( $db );
371
372 my $last_pcnt = 0;
373
374 for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
375 my $row = OpenIsis::read( $db, $row_id );
376 if (my $tmp = $row->{'200'}->[0]) {
377
378 my $bib = "%MFN $row->{mfn}\n";
379 my $mps;
380
381 my $pcnt = int($row->{mfn} * 100 / $max_rowid);
382 if ($pcnt != $last_pcnt) {
383 printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt);
384 $last_pcnt = $pcnt;
385 }
386
387 my $headline;
388 $headline .= isis_sf($row,'200','a',"'");
389 $headline .= isis_sf($row,'200','e'," : ","'");
390
391 # author
392 $bib .= isis_to_bib($row,'700','%700+','*',2,'<');
393 $bib .= isis_to_bib($row,'701','%700+','*',2,'<');
394 $bib .= isis_to_bib($row,'710','%700+','*',2,'<');
395 $bib .= isis_to_bib($row,'711','%700+','*',2,'<');
396 $bib .= isis_to_bib($row,'503','%700+','*',2,'<');
397
398 $mps .= isis_to_mps($row,'700',1);
399 $mps .= isis_to_mps($row,'701',1);
400 $mps .= isis_to_mps($row,'710',1);
401 $mps .= isis_to_mps($row,'711',1);
402 $mps .= isis_to_mps($row,'503',1);
403 $mps .= isis_to_mps($row,'702',1);
404
405 $bib .= isis_to_bib($row,'205','%205');
406
407 # naslov
408 my $sf = OpenIsis::subfields($row->{'200'}->[0]);
409 my $book;
410 $book .= $sf->{a} if ($sf->{a});
411 $book .= " ; ".$sf->{k} if ($sf->{k});
412 $book .= " = ".$sf->{d} if ($sf->{d});
413 $book .= " : ".$sf->{e} if ($sf->{e});
414 $book .= " / ".$sf->{f} if ($sf->{f});
415 $book .= " ; ".$sf->{g} if ($sf->{g});
416 $book .= ". ".$sf->{c} if ($sf->{c});
417 $book .= " / ".$sf->{x} if ($sf->{x});
418 $book .= " ; ".$sf->{y} if ($sf->{y});
419 $bib .= "%200 $book\n" if ($book);
420
421 $mps .= isis_to_mps($row,'200',2,"akcde");
422 $mps .= isis_to_mps($row,'532',2);
423 $mps .= isis_to_mps($row,'424',2);
424
425 $headline .= isis_sf($row,'700','b'," ");
426 $headline .= isis_sf($row,'700','a'," ");
427
428 # izdavac
429 $mps .= isis_to_mps($row,'210',3);
430 # if (my $sf = OpenIsis::subfields($row->{'210'}->[0])) {
431 # my $tmp;
432 # $tmp .= $sf->{a} if ($sf->{a});
433 # $tmp .= " : ".$sf->{c} if ($sf->{c});
434 # $tmp .= ", ".$sf->{d} if ($sf->{d});
435 # $bib .= "%210 $tmp\n" if ($tmp);
436 # }
437 $bib .= "%210 ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n";
438
439 if (my $year = isis_sf($row,'210','d')) {
440 $year =~ s/^\s*cop\.*\s*//i;
441 $year =~ s/[\[\]]*//g;
442 $mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/);
443 $headline .= " ($year)";
444 }
445
446 $bib .= isis_to_bib($row,'215','%215', '*', undef, undef, ', ');
447
448 # $bib .= isis_to_bib($row,'225','%225', 'aehivw');
449 $bib .= "%225 ".isis_sf($row,'225','aevhiw', '(',')', ('',' : ',' ; ','. ',', ',' ; '))."\n";
450 $mps .= isis_to_mps($row,'225',4);
451
452 $bib .= isis_to_bib($row,'300','%300+');
453 $bib .= isis_to_bib($row,'320','%300+');
454 $bib .= isis_to_bib($row,'327','%300+');
455 $mps .= isis_to_mps($row,'300',5);
456 $mps .= isis_to_mps($row,'320',5);
457 $mps .= isis_to_mps($row,'327',5);
458
459 $bib .= isis_to_bib($row,'330','%330');
460 $mps .= isis_to_mps($row,'330',6);
461
462 $bib .= isis_to_bib($row,'423','%423');
463 $bib .= isis_to_bib($row,'464','%464');
464 $mps .= isis_to_mps($row,'464',7);
465 $bib .= isis_to_bib($row,'610','%610');
466 $mps .= isis_to_mps($row,'610',8);
467
468 $bib .= isis_to_bib($row,'675','%675+');
469 $mps .= isis_to_mps($row,'675',9);
470 $bib .= isis_to_bib($row,'686','%675+');
471 $mps .= isis_to_mps($row,'686',10);
472
473 $bib .= isis_to_bib($row,'990','%990');
474 $mps .= isis_to_mps($row,'990',11);
475
476 $bib .= isis_to_bib($row,'991','%991');
477 $mps .= isis_to_mps($row,'991',12);
478
479 # ISBN
480 if (my $isbn = $row->{10}->[0]) {
481 $isbn =~ s/ +//g; # remove spaces
482 $mps .= "W $isbn 13\n";
483 $bib .= "%ISBN $isbn\n";
484 $isbn =~ s/-//g;
485 $mps .= "W $isbn 13\n";
486 }
487 $mps .= isis_to_mps($row,'10',12);
488
489 $mps .= isis_to_mps($row,'532',1);
490
491 $bib .= isis_to_bib($row,'994','%994a','a');
492
493 # headline
494 if ($headline) {
495 $headline .= " [".$row->{mfn}."]"; ## debug MFN!
496 $mps .= "H ".c_852_iso($headline)."\n";
497 } else {
498 $mps .= "H nepoznato\n";
499 }
500
501
502 #if ($db_dir eq "sf") {
503 # print "MFN: $row->{mfn} ROW ID: $row_id\n";
504 # if ($row->{mfn} >= 146) {
505 # print Dumper($row);
506 # }
507 #}
508
509 print R c_852_iso($bib);
510
511 $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
512 $last_tell=tell(R);
513
514 print R "\n";
515
516 $mps .= "E\n";
517
518
519 print S $mps;
520 print MPS $mps;
521 }
522 }
523 }
524 print S "M over and out\nX\n";
525 print MPS "M over and out\nX\n";
526 close(MPS);

  ViewVC Help
Powered by ViewVC 1.1.26