/[webpac-proto]/isis2stream.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /isis2stream.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (show annotations)
Sun Jun 16 19:55:19 2002 UTC (21 years, 9 months ago) by dpavlin
Branch: MAIN
Changes since 1.8: +15 -10 lines
File MIME type: text/plain
isbn fix

1 #!/usr/bin/perl -w
2
3 use strict;
4 use OpenIsis;
5 use Getopt::Std;
6 use Data::Dumper;
7 use common;
8
9 my %opts;
10
11 getopt('dm', \%opts);
12
13 die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts);
14
15 my $db_dir = $opts{d};
16
17 mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir");
18 mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data");
19
20 my $dir="$common::install_dir/$db_dir/data";
21
22
23 open(S,"> $dir/stream") || die "can't open output $dir/stream: $!";
24 open(R,"> $dir/bib") || die "can't open output $dir/bib: $!";
25 open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!";
26 #open(MPS,"> /tmp/mpsindex") || die "mps: $!";
27
28 print S $common::mps_header;
29 print MPS $common::mps_header;
30
31 #--------------------------------------------------------------------
32 # init array in_mps_header for config checks later
33
34 my %in_mps_header;
35 foreach (split(/\n/,$common::mps_header)) {
36 if (/^F /) {
37 my (undef,$isis,$mps,undef) = split(/ /,$_,4);
38 $in_mps_header{$mps}=$isis;
39 }
40 }
41 require "./search/config.pm";
42
43 #--------------------------------------------------------------------
44 #
45 # expand(nr,"space separated string");
46 #
47
48 sub expand {
49 my $nr = shift @_;
50 die "$nr is not in mps_header" if (!$in_mps_header{$nr});
51 my $out = "";
52 while (my $fld = c_852_iso(shift @_)) {
53 my @words=split(/\s+/,$fld);
54 foreach my $w (@words) {
55 # FIX: this should be replaced by stemmer!
56 #$w =~ tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
57 $w =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
58 $w =~ s/ß/ss/g;
59 $out .= "W $w $nr\n";
60 }
61 }
62 return $out;
63 }
64
65 #--------------------------------------------------------------------
66
67 sub c_852_iso {
68 my $tmp = $_[0];
69 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ / if ($tmp);
70 return $tmp;
71 }
72
73 sub c_852_czs {
74 my $tmp = $_[0];
75 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
76 $tmp =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
77 $tmp =~ s/ß/ss/g;
78 return $tmp;
79 }
80
81 #--------------------------------------------------------------------
82
83 # $mps .= sf_to_mps("subfields",$subfiled_hash)
84 #
85 # subfields options:
86 # * - all (no sort)
87 # > - all, sort ascending
88 # < - all, sort descending
89
90 sub subfields_str_2_arr {
91 my $subfields = shift @_ || return;
92 my $sf_hash = shift @_;
93 my @sf_arr;
94
95 if ($subfields eq "*") {
96 @sf_arr = keys %{$sf_hash};
97 } elsif ($subfields eq ">") {
98 @sf_arr = sort keys %{$sf_hash};
99 } elsif ($subfields eq "<") {
100 @sf_arr = sort {$b cmp $a} keys %{$sf_hash};
101 } elsif ($subfields =~ s/>//) {
102 @sf_arr = sort split(//,$subfields);
103 } elsif ($subfields =~ s/<//) {
104 @sf_arr = sort {$b cmp $a} split(//,$subfields);
105 } else {
106 @sf_arr = split(//,$subfields);
107 }
108 return @sf_arr;
109 }
110
111
112 sub sf_to_mps {
113 my ($sf_hash,$subfields,$mps_id) = @_;
114 die "$mps_id is not in mps_header" if (!$in_mps_header{$mps_id});
115 my $out="";
116 my @sf_arr = subfields_str_2_arr($subfields,$sf_hash);
117
118 foreach (@sf_arr) {
119 $out.=mps_expand($mps_id,$sf_hash->{$_});
120 }
121 return $out;
122 }
123
124 #--------------------------------------------------------------------
125
126 # $mps .= isis_to_mps ($row,isis_id,mps_id[,"subfields"])
127
128 sub isis_to_mps {
129 my $row = shift @_ || die;
130 my $isis_id = shift @_ || die;
131 my $mps_id = shift @_ || die;
132 my $subfields = shift @_;
133
134 die "$mps_id is not in mps_header" if (!$in_mps_header{$mps_id});
135
136 my $i=0;
137 my $out = "";
138
139 while ($row->{$isis_id}->[$i]) {
140 my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
141
142 if (scalar keys %{$sf_hash} > 0) {
143 if ($subfields) {
144 foreach (split(//,$subfields)) {
145 $out.=mps_expand($mps_id,$sf_hash->{$_});
146 }
147 } else {
148 foreach (keys %{$sf_hash}) {
149 $out.=mps_expand($mps_id,$sf_hash->{$_});
150 }
151 }
152 } else {
153 $out.=mps_expand($mps_id,$row->{$isis_id}->[$i]);
154 }
155 $i++;
156 }
157 return $out;
158 }
159
160 #--------------------------------------------------------------------
161
162 # $bib .= isis_to_bib ($row,isis_id,bib_id,"subfields",[,group size][,"group sort"][,"separator"])
163 #
164 # fields:
165 # * - all (no sort)
166 # > - all, sort ascending
167 # < - all, sort descending
168 #
169
170 sub isis_to_bib {
171 my $row = shift @_ || die;
172 my $isis_id = shift @_ || die;
173 my $bib_id = shift @_ || die;
174 my $subfields = shift @_ || '*';
175 my $group_size = shift @_ || 0;
176 my $group_sort = shift @_ || '';
177 my $sep = shift @_ || ' ';
178
179 my $fld = $bib_id; $fld =~ s/^%//;
180 die "$fld is not in FieldNames" if (!$default::FieldNames{$fld});
181
182 my $i=0;
183
184 my $bib="";
185
186 my $sf_hash;
187
188 # bib_grp(('a','b','c'))
189 sub bib_grp {
190 my $bib_grp;
191 my $sf_hash = shift @_ || return "";
192 my $bib_id = shift @_;
193 my $sep = shift @_;
194 foreach (@_) {
195 next if (! defined $sf_hash->{$_});
196 if ($bib_grp) {
197 $bib_grp.= $sep . $sf_hash->{$_};
198 } else {
199 $bib_grp = $sf_hash->{$_};
200 }
201 }
202 if ($bib_grp) {
203 return "$bib_id $bib_grp\n"
204 } else {
205 return "";
206 }
207 }
208
209 while ($row->{$isis_id}->[$i]) {
210 my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
211
212 if (scalar keys %{$sf_hash} > 0) {
213 if ($group_size) {
214 my $tmp_flds = join("",subfields_str_2_arr($subfields,$sf_hash));
215 while ($tmp_flds) {
216 my $tmp_fld_grp = substr($tmp_flds,0,$group_size);
217 $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($tmp_fld_grp.$group_sort) );
218 last if (length($tmp_flds) <= $group_size);
219 $tmp_flds=substr($tmp_flds,$group_size,length($tmp_flds)-$group_size);
220 }
221
222 } else {
223 $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($subfields,$sf_hash) );
224 }
225 } else {
226 # no subfields, use just value!
227 $bib .= "$bib_id ".$row->{$isis_id}->[$i]."\n";
228 }
229 $i++;
230 }
231 return $bib;
232 }
233
234 #--------------------------------------------------------------------
235
236 sub isis_sf {
237 my $row = shift @_;
238 my $isis_id = shift @_;
239 my $subfield = shift @_;
240 my $prefix = shift @_ || '';
241 my $postfix = shift @_ || '';
242
243 my @sep = @_; # rest are separators
244
245 if ($row->{$isis_id}->[0]) {
246 my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
247 if (! defined $subfield || length($subfield) == 0) {
248 # subfield list undef, empty or no defined subfields for this record
249 return $prefix . $row->{$isis_id}->[0] . $postfix;
250 } elsif (length($subfield) == 1) {
251 if ($sf->{$subfield}) {
252 return $prefix . $sf->{$subfield} . $postfix;
253 } else {
254 return '';
255 }
256 } elsif (length($subfield) > 1) {
257 my @s = split(//,$subfield);
258 my $out;
259 foreach (@s) {
260 my $sep = shift @sep || ' ';
261 if ($out) {
262 $out .= $sep . $sf->{$_} if ($sf->{$_});
263 } else {
264 $out = $sf->{$_} if ($sf->{$_});
265 }
266 }
267 return $prefix . $out . $postfix if ($out);
268 }
269 }
270 return '';
271 }
272
273 #--------------------------------------------------------------------
274
275 my $last_tell=0;
276
277 my @isis_dirs = ( '.' ); # use dirname as database name
278
279 if ($opts{m}) {
280 @isis_dirs = split(/,/,$opts{m});
281 }
282
283 my @isis_dbs;
284
285 foreach (@isis_dirs) {
286 if (-e "$common::isis_data/$db_dir/$_/LIBRI") {
287 push @isis_dbs,"$common::isis_data/$db_dir/$_/LIBRI/LIBRI";
288 }
289 if (-e "$common::isis_data/$db_dir/$_/PERI") {
290 push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI";
291 }
292 }
293
294 foreach my $isis_db (@isis_dbs) {
295
296 print MPS "M reading ISIS from '$isis_db'...\n";
297
298 my $db = OpenIsis::open( "$isis_db" );
299
300 my $max_rowid = OpenIsis::maxRowid( $db );
301
302 my $last_pcnt = 0;
303
304 for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
305 my $row = OpenIsis::read( $db, $row_id );
306 if (my $tmp = $row->{'200'}->[0]) {
307
308 my $bib = "%MFN $row->{mfn}\n";
309 my $mps = "W $row->{mfn} 14\n";
310
311 my $pcnt = int($row->{mfn} * 100 / $max_rowid);
312 if ($pcnt != $last_pcnt) {
313 printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt);
314 $last_pcnt = $pcnt;
315 }
316
317 my $headline;
318 $headline .= isis_sf($row,'200','a',"'");
319 $headline .= isis_sf($row,'200','e'," : ","'");
320
321 # author
322 $bib .= isis_to_bib($row,'700','%700+','*',2,'<');
323 $bib .= isis_to_bib($row,'701','%700+','*',2,'<');
324 $bib .= isis_to_bib($row,'710','%700+','*',2,'<');
325 $bib .= isis_to_bib($row,'711','%700+','*',2,'<');
326 $bib .= isis_to_bib($row,'503','%700+','*',2,'<');
327
328 $mps .= isis_to_mps($row,'700',1);
329 $mps .= isis_to_mps($row,'701',1);
330 $mps .= isis_to_mps($row,'710',1);
331 $mps .= isis_to_mps($row,'711',1);
332 $mps .= isis_to_mps($row,'503',1);
333 $mps .= isis_to_mps($row,'702',1);
334 $mps .= isis_to_mps($row,'200',1,"fg");
335
336 $bib .= isis_to_bib($row,'205','%205');
337
338 # naslov
339 my $sf = OpenIsis::subfields($row->{'200'}->[0]);
340 my $book;
341 $book .= $sf->{a} if ($sf->{a});
342 $book .= " ; ".$sf->{k} if ($sf->{k});
343 $book .= " = ".$sf->{d} if ($sf->{d});
344 $book .= " : ".$sf->{e} if ($sf->{e});
345 $book .= " / ".$sf->{f} if ($sf->{f});
346 $book .= " ; ".$sf->{g} if ($sf->{g});
347 $book .= ". ".$sf->{c} if ($sf->{c});
348 $book .= " / ".$sf->{x} if ($sf->{x});
349 $book .= " ; ".$sf->{y} if ($sf->{y});
350 $bib .= "%200+ $book\n" if ($book);
351
352 $mps .= isis_to_mps($row,'200',2,"akcde");
353 $mps .= isis_to_mps($row,'532',2);
354 $mps .= isis_to_mps($row,'424',2);
355
356 $mps .= isis_to_mps($row,'230',2,"ae");
357 $mps .= isis_to_mps($row,'231',2,"ae");
358 $mps .= isis_to_mps($row,'232',2,"ae");
359 $mps .= isis_to_mps($row,'233',2,"ae");
360
361
362 my $tmp;
363 $tmp = isis_sf($row,'230','v').
364 isis_sf($row,'230','a',' : ').
365 isis_sf($row,'250',undef,'. - ').
366 isis_sf($row,'260',undef,'. - ').
367 isis_sf($row,'290',undef,'<br>ISBN ');
368 $bib .= "%sv $tmp\n" if ($tmp);
369
370 $tmp = isis_sf($row,'231','v','<br>').
371 isis_sf($row,'231','a',' : ').
372 isis_sf($row,'251',undef,'. - ').
373 isis_sf($row,'261',undef,'. - ').
374 isis_sf($row,'291',undef,'<br>ISBN ');
375 $bib .= "%sv $tmp\n" if ($tmp);
376
377 $tmp = isis_sf($row,'232','v','<br>').
378 isis_sf($row,'232','a',' : ').
379 isis_sf($row,'252',undef,'. - ').
380 isis_sf($row,'262',undef,'. - ').
381 isis_sf($row,'292',undef,'<br>ISBN ');
382 $bib .= "%sv $tmp\n" if ($tmp);
383
384 $tmp = isis_sf($row,'233','v','<br>').
385 isis_sf($row,'233','a',' : ').
386 isis_sf($row,'253',undef,'. - ').
387 isis_sf($row,'263',undef,'. - ').
388 isis_sf($row,'293',undef,'<br>ISBN ');
389
390 $mps .= isis_to_mps($row,'270',2);
391 $mps .= isis_to_mps($row,'271',2);
392 $mps .= isis_to_mps($row,'272',2);
393 $mps .= isis_to_mps($row,'273',2);
394
395 $headline .= isis_sf($row,'700','b'," ");
396 $headline .= isis_sf($row,'700','a'," ");
397
398 # izdavac
399 $mps .= isis_to_mps($row,'210',3);
400 $mps .= isis_to_mps($row,'250',3);
401 $mps .= isis_to_mps($row,'251',3);
402 $mps .= isis_to_mps($row,'252',3);
403 $mps .= isis_to_mps($row,'253',3);
404 # if (my $sf = OpenIsis::subfields($row->{'210'}->[0])) {
405 # my $tmp;
406 # $tmp .= $sf->{a} if ($sf->{a});
407 # $tmp .= " : ".$sf->{c} if ($sf->{c});
408 # $tmp .= ", ".$sf->{d} if ($sf->{d});
409 # $bib .= "%210 $tmp\n" if ($tmp);
410 # }
411 $bib .= "%210+ ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n";
412
413 if (my $year = isis_sf($row,'210','d')) {
414 $year =~ s/^\s*cop\.*\s*//i;
415 $year =~ s/[\[\]]*//g;
416 $mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/);
417 $headline .= " ($year)";
418 }
419
420 $mps .= isis_to_mps($row,'215',15);
421 $mps .= isis_to_mps($row,'260',15);
422 $mps .= isis_to_mps($row,'261',15);
423 $mps .= isis_to_mps($row,'262',15);
424 $mps .= isis_to_mps($row,'263',15);
425 $bib .= isis_to_bib($row,'215','%215+', '*', undef, undef, ', ');
426
427 # $bib .= isis_to_bib($row,'225','%225', 'aehivw');
428 $bib .= "%225 ".isis_sf($row,'225','aevhiw', '(',')', ('',' : ',' ; ','. ',', ',' ; '))."\n";
429 $mps .= isis_to_mps($row,'225',4);
430
431 $bib .= isis_to_bib($row,'300','%300+');
432 $bib .= isis_to_bib($row,'320','%300+');
433 $bib .= isis_to_bib($row,'327','%300+');
434 $mps .= isis_to_mps($row,'300',5);
435 $mps .= isis_to_mps($row,'320',5);
436 $mps .= isis_to_mps($row,'327',5);
437 $mps .= isis_to_mps($row,'280',5);
438 $mps .= isis_to_mps($row,'281',5);
439 $mps .= isis_to_mps($row,'282',5);
440 $mps .= isis_to_mps($row,'283',5);
441
442 $bib .= isis_to_bib($row,'330','%330');
443 $mps .= isis_to_mps($row,'330',6);
444
445 $bib .= isis_to_bib($row,'423','%423');
446 $bib .= isis_to_bib($row,'464','%464');
447 $mps .= isis_to_mps($row,'464',7);
448 $bib .= isis_to_bib($row,'610','%610');
449 $mps .= isis_to_mps($row,'610',8);
450
451 $bib .= isis_to_bib($row,'675','%675+');
452 $mps .= isis_to_mps($row,'675',9);
453 $bib .= isis_to_bib($row,'686','%675+');
454 $mps .= isis_to_mps($row,'686',10);
455
456 $bib .= isis_to_bib($row,'990','%990');
457 $mps .= isis_to_mps($row,'990',11);
458
459 $bib .= isis_to_bib($row,'991','%991');
460 $mps .= isis_to_mps($row,'991',12);
461
462 sub isis_isn_to_mps {
463 my $row = shift @_ || die;
464 my $isis_id = shift @_ || die;
465 my $nr = shift @_ || die;
466 my $i=0;
467 my $mps='';
468 while (my $isn=$row->{$isis_id}->[$i]) {
469 $isn =~ s/ +//g; # remove spaces
470 $mps .= "W $isn $nr\n";
471 if ($isn =~ s/-//g) {
472 $mps .= "W $isn $nr\n";
473 }
474 $i++;
475 }
476 return $mps;
477 }
478
479 # ISBN
480 $bib .= isis_to_bib($row,'10','%ISBN');
481 $mps .= isis_isn_to_mps($row,'10',13);
482 $mps .= isis_isn_to_mps($row,'290',13);
483 $mps .= isis_isn_to_mps($row,'291',13);
484 $mps .= isis_isn_to_mps($row,'292',13);
485 $mps .= isis_isn_to_mps($row,'293',13);
486
487 # ISSN
488 #store_isn($row->{11}->[0],14,'%ISSN');
489
490 $mps .= isis_to_mps($row,'532',1);
491
492 $bib .= isis_to_bib($row,'994','%994a','a');
493
494 # headline
495 if ($headline) {
496 $headline .= " [".$row->{mfn}."]"; ## debug MFN!
497 $headline =~ s/&/&and;/g;
498 $headline =~ s/</&lt;/g;
499 $headline =~ s/>/&gt;/g;
500 $mps .= "H ".c_852_iso($headline)."\n";
501 } else {
502 $mps .= "H nepoznato\n";
503 }
504
505
506 #if ($db_dir eq "sf") {
507 # print "MFN: $row->{mfn} ROW ID: $row_id\n";
508 # if ($row->{mfn} >= 146) {
509 # print Dumper($row);
510 # }
511 #}
512
513 print R c_852_iso($bib);
514
515 # check if all fields are defined
516 foreach (split(/\n/,$bib)) {
517 if (/^%(\w+)\s/ && !$default::FieldNames{$1}) {
518 die "field $1 used but not in FieldNames";
519 }
520 }
521
522 # print R "%perl ".Dumper($row)."\n";
523
524 $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
525 $last_tell=tell(R);
526
527 print R "\n";
528
529 $mps .= "E\n";
530
531
532 print S $mps;
533 print MPS $mps;
534 }
535 }
536 }
537 print S "M over and out\nX\n";
538 print MPS "M over and out\nX\n";
539 close(MPS);

  ViewVC Help
Powered by ViewVC 1.1.26