/[webpac-proto]/isis2stream.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /isis2stream.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (show annotations)
Sun Jun 16 15:45:44 2002 UTC (21 years, 9 months ago) by dpavlin
Branch: MAIN
Changes since 1.4: +15 -4 lines
File MIME type: text/plain
support for multiple ISIS files in one database (-m flag)

1 #!/usr/bin/perl -w
2
3 use strict;
4 use OpenIsis;
5 use Getopt::Std;
6 #use Data::Dumper;
7 use common;
8
9 my %opts;
10
11 getopt('dm', \%opts);
12
13 die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts);
14
15 my $db_dir = $opts{d};
16
17 mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir");
18 mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data");
19
20 my $dir="$common::install_dir/$db_dir/data";
21
22
23 open(S,"> $dir/stream") || die "can't open output $dir/stream: $!";
24 open(R,"> $dir/bib") || die "can't open output $dir/bib: $!";
25 open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!";
26 #open(MPS,"> /tmp/mpsindex") || die "mps: $!";
27
28 print S $common::mps_header;
29 print MPS $common::mps_header;
30
31 #
32 # expand(nr,"space separated string");
33 #
34
35 sub expand {
36 my $nr = shift @_;
37 my $out = "";
38 while (my $fld = c_852_iso(shift @_)) {
39 my @words=split(/\s+/,$fld);
40 foreach my $w (@words) {
41 # FIX: this should be replaced by stemmer!
42 #$w =~ tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
43 $w =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
44 $w =~ s/ß/ss/g;
45 $out .= "W $w $nr\n";
46 }
47 }
48 return $out;
49 }
50
51 #--------------------------------------------------------------------
52
53 ################### ERASE###############3
54
55 # expand sub-fileds from ISIS field
56 # (^a.....^b....)
57 my %data; # FIX
58 sub ex_sf {
59 %data = ();
60 my $in = $_[0];
61 if (my $tmp = $in) {
62 # $tmp =~ tr/çæÐџ¬†§¦/¹©ðÐèÈæƾ®/; # ISIS -> iso-8859-2
63 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
64 if ($in =~ m/^\^/) {
65 my @sub = split(/\^/,$in);
66 foreach my $fld (@sub) {
67 $data{$1} = $2 if ($fld =~ m/^(\w+)(.+)$/)
68 }
69 } else {
70 $data{all} = $in."<-- iz polja bez podpolja";
71 }
72 }
73 }
74
75 # dump all sub-fields
76 sub all_sf {
77 my $nr = shift @_;
78 my $out="";
79 foreach my $k (sort keys %data) {
80 $out.=expand($nr,$data{$k});
81 }
82 return $out;
83 }
84
85 sub all_sf_r {
86 my $nr = shift @_;
87 my $out="";
88 foreach my $k (sort {$b cmp $a} keys %data) {
89 $out.=expand($nr,$data{$k});
90 }
91 return $out;
92 }
93
94 sub all_sf2bib {
95 my $nr = shift @_;
96 my $max_in_line=shift @_ || 0;
97 my $sep = shift @_ || ' ';
98 my $out;
99 my $i=0;
100 my $bib = "";
101 foreach my $k (sort keys %data) {
102 if ($out) {
103 $out.= $sep.$data{$k};
104 } else {
105 $out = $data{$k};
106 }
107 $i++;
108 if ($i == $max_in_line) {
109 $bib .= $nr." ".$out."\n" if ($out);
110 $i=0;
111 $out="";
112 }
113 }
114 $bib .= $nr." ".$out."\n" if ($out);
115 return $bib;
116 }
117
118 sub all_sf2bib_r {
119 my $nr = shift @_;
120 my $max_in_line=shift @_ || 0;
121 my $sep = shift @_ || ' ';
122 my $out;
123 my $i=0;
124 my $bib = "";
125 foreach my $k (sort {$b cmp $a} keys %data) {
126 if ($out) {
127 $out.= $sep.$data{$k};
128 } else {
129 $out = $data{$k};
130 }
131 $i++;
132 if ($i == $max_in_line) {
133 $bib .= $nr." ".$out."\n" if ($out);
134 $i=0;
135 $out="";
136 }
137 }
138 $bib .= $nr." ".$out."\n" if ($out);
139 return $bib;
140 }
141
142 #--------------------------------------------------------------------
143
144 sub c_852_iso {
145 my $tmp = $_[0];
146 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ / if ($tmp);
147 return $tmp;
148 }
149
150 sub c_852_czs {
151 my $tmp = $_[0];
152 $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
153 $tmp =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
154 $tmp =~ s/ß/ss/g;
155 return $tmp;
156 }
157
158 #--------------------------------------------------------------------
159
160 # $mps .= sf_to_mps($subfiled_hash,"subfields",mps_id);
161 #
162 # subfields options:
163 # * - all (no sort)
164 # > - all, sort ascending
165 # < - all, sort descending
166
167 sub subfields_str_2_arr {
168 my $subfields = shift @_ || return;
169 my $sf_hash = shift @_;
170 my @sf_arr;
171
172 if ($subfields eq "*") {
173 @sf_arr = keys %{$sf_hash};
174 } elsif ($subfields eq ">") {
175 @sf_arr = sort keys %{$sf_hash};
176 } elsif ($subfields eq "<") {
177 @sf_arr = sort {$b cmp $a} keys %{$sf_hash};
178 } elsif ($subfields =~ s/>//) {
179 @sf_arr = sort split(//,$subfields);
180 } elsif ($subfields =~ s/<//) {
181 @sf_arr = sort {$b cmp $a} split(//,$subfields);
182 } else {
183 @sf_arr = split(//,$subfields);
184 }
185 return @sf_arr;
186 }
187
188
189 sub sf_to_mps {
190 my ($sf_hash,$subfields,$mps_id) = @_;
191 my $out="";
192 my @sf_arr = subfields_str_2_arr($subfields,$sf_hash);
193
194 foreach (@sf_arr) {
195 $out.=mps_expand($mps_id,$sf_hash->{$_});
196 }
197 return $out;
198 }
199
200 #--------------------------------------------------------------------
201
202 # $mps .= isis_to_mps ($row,isis_id,mps_id[,"subfields"])
203
204 sub isis_to_mps {
205 my $row = shift @_ || die;
206 my $isis_id = shift @_ || die;
207 my $mps_id = shift @_ || die;
208 my $subfields = shift @_;
209
210 my $i=0;
211 my $out = "";
212
213 while ($row->{$isis_id}->[$i]) {
214 my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
215
216 if (scalar keys %{$sf_hash} > 0) {
217 if ($subfields) {
218 foreach (split(//,$subfields)) {
219 $out.=mps_expand($mps_id,$sf_hash->{$_});
220 }
221 } else {
222 foreach (keys %{$sf_hash}) {
223 $out.=mps_expand($mps_id,$sf_hash->{$_});
224 }
225 }
226 } else {
227 $out.=mps_expand($mps_id,$row->{$isis_id}->[$i]);
228 }
229 $i++;
230 }
231 return $out;
232 }
233
234 #--------------------------------------------------------------------
235
236 # $bib .= isis_to_bib ($row,isis_id,bib_id,"subfields",[,group size][,"group sort"][,"separator"])
237 #
238 # fields:
239 # * - all (no sort)
240 # > - all, sort ascending
241 # < - all, sort descending
242 #
243
244 sub isis_to_bib {
245 my $row = shift @_ || die;
246 my $isis_id = shift @_ || die;
247 my $bib_id = shift @_ || die;
248 my $subfields = shift @_ || '*';
249 my $group_size = shift @_ || 0;
250 my $group_sort = shift @_ || '';
251 my $sep = shift @_ || ' ';
252
253 my $i=0;
254
255 my $bib="";
256
257 my $sf_hash;
258
259 # bib_grp(('a','b','c'))
260 sub bib_grp {
261 my $bib_grp;
262 my $sf_hash = shift @_ || return "";
263 my $bib_id = shift @_;
264 my $sep = shift @_;
265 foreach (@_) {
266 next if (! defined $sf_hash->{$_});
267 if ($bib_grp) {
268 $bib_grp.= $sep . $sf_hash->{$_};
269 } else {
270 $bib_grp = $sf_hash->{$_};
271 }
272 }
273 if ($bib_grp) {
274 return "$bib_id $bib_grp\n"
275 } else {
276 return "";
277 }
278 }
279
280 while ($row->{$isis_id}->[$i]) {
281 my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
282
283 if (scalar keys %{$sf_hash} > 0) {
284 if ($group_size) {
285 my $tmp_flds = join("",subfields_str_2_arr($subfields,$sf_hash));
286 while ($tmp_flds) {
287 my $tmp_fld_grp = substr($tmp_flds,0,$group_size);
288 $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($tmp_fld_grp.$group_sort) );
289 last if (length($tmp_flds) <= $group_size);
290 $tmp_flds=substr($tmp_flds,$group_size,length($tmp_flds)-$group_size);
291 }
292
293 } else {
294 $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($subfields,$sf_hash) );
295 }
296 } else {
297 # no subfields, use just value!
298 $bib .= "$bib_id ".$row->{$isis_id}->[$i]."\n";
299 }
300 $i++;
301 }
302 return $bib;
303 }
304
305 #--------------------------------------------------------------------
306
307 sub isis_sf {
308 my $row = shift @_;
309 my $isis_id = shift @_;
310 my $subfield = shift @_ || 'a';
311 my $prefix = shift @_ || '';
312 my $postfix = shift @_ || '';
313
314 my @sep = @_; # rest are separators
315
316 if ($row->{$isis_id}->[0]) {
317 my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
318 if (length($subfield) == 1) {
319 if ($sf->{$subfield}) {
320 return $prefix . $sf->{$subfield} . $postfix;
321 } else {
322 return '';
323 }
324 } elsif (length($subfield) > 1) {
325 my @s = split(//,$subfield);
326 my $out;
327 foreach (@s) {
328 my $sep = shift @sep || ' ';
329 if ($out) {
330 $out .= $sep . $sf->{$_} if ($sf->{$_});
331 } else {
332 $out = $sf->{$_} if ($sf->{$_});
333 }
334 }
335 return $prefix . $out . $postfix if ($out);
336 }
337 }
338 return '';
339 }
340
341 #--------------------------------------------------------------------
342 #--------------------------------------------------------------------
343
344
345 my $last_tell=0;
346
347 my @isis_dbs = ( '.' ); # use dirname as database name
348
349 if ($opts{m}) {
350 @isis_dbs = split(/,/,$opts{m});
351 }
352
353 foreach my $db_name (@isis_dbs) {
354
355 print MPS "M reading ISIS from $db_dir/$db_name/LIBRI...\n";
356
357 my $db = OpenIsis::open( "$common::isis_data/$db_dir/$db_name/LIBRI/LIBRI" );
358
359 my $max_rowid = OpenIsis::maxRowid( $db );
360
361 my $last_pcnt = 0;
362
363 for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
364 my $row = OpenIsis::read( $db, $row_id );
365 if (my $tmp = $row->{'200'}->[0]) {
366
367 my $bib = "%MFN $row->{mfn}\n";
368 my $mps;
369
370 my $pcnt = int($row->{mfn} * 100 / $max_rowid);
371 if ($pcnt != $last_pcnt) {
372 printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt);
373 $last_pcnt = $pcnt;
374 }
375
376 my $headline;
377 $headline .= isis_sf($row,'200','a',"'");
378 $headline .= isis_sf($row,'200','e'," : ","'");
379
380 # author
381 $bib .= isis_to_bib($row,'700','%700+','*',2,'<');
382 $bib .= isis_to_bib($row,'701','%700+','*',2,'<');
383 $bib .= isis_to_bib($row,'710','%700+','*',2,'<');
384 $bib .= isis_to_bib($row,'711','%700+','*',2,'<');
385 $bib .= isis_to_bib($row,'503','%700+','*',2,'<');
386
387 $mps .= isis_to_mps($row,'700',1);
388 $mps .= isis_to_mps($row,'701',1);
389 $mps .= isis_to_mps($row,'710',1);
390 $mps .= isis_to_mps($row,'711',1);
391 $mps .= isis_to_mps($row,'503',1);
392 $mps .= isis_to_mps($row,'702',1);
393
394 $bib .= isis_to_bib($row,'205','%205');
395
396 # naslov
397 my $sf = OpenIsis::subfields($row->{'200'}->[0]);
398 my $book;
399 $book .= $sf->{a} if ($sf->{a});
400 $book .= " ; ".$sf->{k} if ($sf->{k});
401 $book .= " = ".$sf->{d} if ($sf->{d});
402 $book .= " : ".$sf->{e} if ($sf->{e});
403 $book .= " / ".$sf->{f} if ($sf->{f});
404 $book .= " ; ".$sf->{g} if ($sf->{g});
405 $book .= ". ".$sf->{c} if ($sf->{c});
406 $book .= " / ".$sf->{x} if ($sf->{x});
407 $book .= " ; ".$sf->{y} if ($sf->{y});
408 $bib .= "%200 $book\n" if ($book);
409
410 $mps .= isis_to_mps($row,'200',2,"akcde");
411 $mps .= isis_to_mps($row,'532',2);
412 $mps .= isis_to_mps($row,'424',2);
413
414 $headline .= isis_sf($row,'700','b'," ");
415 $headline .= isis_sf($row,'700','a'," ");
416
417 # izdavac
418 $mps .= isis_to_mps($row,'210',3);
419 # if (my $sf = OpenIsis::subfields($row->{'210'}->[0])) {
420 # my $tmp;
421 # $tmp .= $sf->{a} if ($sf->{a});
422 # $tmp .= " : ".$sf->{c} if ($sf->{c});
423 # $tmp .= ", ".$sf->{d} if ($sf->{d});
424 # $bib .= "%210 $tmp\n" if ($tmp);
425 # }
426 $bib .= "%210 ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n";
427
428 if (my $year = isis_sf($row,'210','d')) {
429 $year =~ s/^\s*cop\.*\s*//i;
430 $year =~ s/[\[\]]*//g;
431 $mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/);
432 $headline .= " ($year)";
433 }
434
435 $bib .= isis_to_bib($row,'215','%215', '*', undef, undef, ', ');
436
437 # $bib .= isis_to_bib($row,'225','%225', 'aehivw');
438 $bib .= "%225 ".isis_sf($row,'225','aevhiw', '(',')', ('',' : ',' ; ','. ',', ',' ; '))."\n";
439 $mps .= isis_to_mps($row,'225',4);
440
441 $bib .= isis_to_bib($row,'300','%300+');
442 $bib .= isis_to_bib($row,'320','%300+');
443 $bib .= isis_to_bib($row,'327','%300+');
444 $mps .= isis_to_mps($row,'300',5);
445 $mps .= isis_to_mps($row,'320',5);
446 $mps .= isis_to_mps($row,'327',5);
447
448 $bib .= isis_to_bib($row,'330','%330');
449 $mps .= isis_to_mps($row,'330',6);
450
451 $bib .= isis_to_bib($row,'423','%423');
452 $bib .= isis_to_bib($row,'464','%464');
453 $mps .= isis_to_mps($row,'464',7);
454 $bib .= isis_to_bib($row,'610','%610');
455 $mps .= isis_to_mps($row,'610',8);
456
457 $bib .= isis_to_bib($row,'675','%675+');
458 $mps .= isis_to_mps($row,'675',9);
459 $bib .= isis_to_bib($row,'686','%675+');
460 $mps .= isis_to_mps($row,'686',10);
461
462 $bib .= isis_to_bib($row,'990','%990');
463 $mps .= isis_to_mps($row,'990',11);
464
465 $bib .= isis_to_bib($row,'991','%991');
466 $mps .= isis_to_mps($row,'991',12);
467
468 # ISBN
469 if (my $isbn = $row->{10}->[0]) {
470 $isbn =~ s/ +//g; # remove spaces
471 $mps .= "W $isbn 13\n";
472 $bib .= "%ISBN $isbn\n";
473 $isbn =~ s/-//g;
474 $mps .= "W $isbn 13\n";
475 }
476 $mps .= isis_to_mps($row,'10',12);
477
478 $mps .= isis_to_mps($row,'532',1);
479
480 $bib .= isis_to_bib($row,'994','%994a','a');
481
482 # headline
483 if ($headline) {
484 $headline .= " [".$row->{mfn}."]"; ## debug MFN!
485 $mps .= "H ".c_852_iso($headline)."\n";
486 } else {
487 $mps .= "H nepoznato\n";
488 }
489
490
491 #if ($db_dir eq "sf") {
492 # print "MFN: $row->{mfn} ROW ID: $row_id\n";
493 # if ($row->{mfn} >= 146) {
494 # print Dumper($row);
495 # }
496 #}
497
498 print R c_852_iso($bib);
499
500 $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
501 $last_tell=tell(R);
502
503 print R "\n";
504
505 $mps .= "E\n";
506
507
508 print S $mps;
509 print MPS $mps;
510 }
511 }
512 }
513 print S "M over and out\nX\n";
514 print MPS "M over and out\nX\n";
515 close(MPS);

  ViewVC Help
Powered by ViewVC 1.1.26