/[webpac-proto]/isis2stream.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /isis2stream.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations)
Sun Jun 16 16:06:55 2002 UTC (17 years, 5 months ago) by dpavlin
Branch: MAIN
Changes since 1.5: +16 -5 lines
File MIME type: text/plain
support for PERI dbs

1 dpavlin 1.1 #!/usr/bin/perl -w
2    
3     use strict;
4     use OpenIsis;
5     use Getopt::Std;
6     #use Data::Dumper;
7 dpavlin 1.4 use common;
8 dpavlin 1.1
9     my %opts;
10    
11 dpavlin 1.4 getopt('dm', \%opts);
12 dpavlin 1.1
13 dpavlin 1.4 die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts);
14 dpavlin 1.1
15     my $db_dir = $opts{d};
16    
17 dpavlin 1.4 mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir");
18     mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data");
19 dpavlin 1.1
20 dpavlin 1.4 my $dir="$common::install_dir/$db_dir/data";
21 dpavlin 1.1
22    
23 dpavlin 1.4 open(S,"> $dir/stream") || die "can't open output $dir/stream: $!";
24 dpavlin 1.1 open(R,"> $dir/bib") || die "can't open output $dir/bib: $!";
25 dpavlin 1.4 open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!";
26 dpavlin 1.1 #open(MPS,"> /tmp/mpsindex") || die "mps: $!";
27    
28 dpavlin 1.4 print S $common::mps_header;
29     print MPS $common::mps_header;
30 dpavlin 1.1
31     #
32     # expand(nr,"space separated string");
33     #
34    
35     sub expand {
36     my $nr = shift @_;
37     my $out = "";
38     while (my $fld = c_852_iso(shift @_)) {
39     my @words=split(/\s+/,$fld);
40     foreach my $w (@words) {
41     # FIX: this should be replaced by stemmer!
42     #$w =~ tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
43     $w =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
44     $w =~ s/ß/ss/g;
45     $out .= "W $w $nr\n";
46     }
47     }
48     return $out;
49     }
50    
51     #--------------------------------------------------------------------
52    
53     ################### ERASE###############3
54    
55     # expand sub-fileds from ISIS field
56     # (^a.....^b....)
57     my %data; # FIX
58     sub ex_sf {
59     %data = ();
60     my $in = $_[0];
61     if (my $tmp = $in) {
62     # $tmp =~ tr/çæÐџ¬†§¦/¹©ðÐèÈæƾ®/; # ISIS -> iso-8859-2
63     $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
64     if ($in =~ m/^\^/) {
65     my @sub = split(/\^/,$in);
66     foreach my $fld (@sub) {
67     $data{$1} = $2 if ($fld =~ m/^(\w+)(.+)$/)
68     }
69     } else {
70     $data{all} = $in."<-- iz polja bez podpolja";
71     }
72     }
73     }
74    
75     # dump all sub-fields
76     sub all_sf {
77     my $nr = shift @_;
78     my $out="";
79     foreach my $k (sort keys %data) {
80     $out.=expand($nr,$data{$k});
81     }
82     return $out;
83     }
84    
85     sub all_sf_r {
86     my $nr = shift @_;
87     my $out="";
88     foreach my $k (sort {$b cmp $a} keys %data) {
89     $out.=expand($nr,$data{$k});
90     }
91     return $out;
92     }
93    
94     sub all_sf2bib {
95     my $nr = shift @_;
96     my $max_in_line=shift @_ || 0;
97     my $sep = shift @_ || ' ';
98     my $out;
99     my $i=0;
100     my $bib = "";
101     foreach my $k (sort keys %data) {
102     if ($out) {
103     $out.= $sep.$data{$k};
104     } else {
105     $out = $data{$k};
106     }
107     $i++;
108     if ($i == $max_in_line) {
109     $bib .= $nr." ".$out."\n" if ($out);
110     $i=0;
111     $out="";
112     }
113     }
114     $bib .= $nr." ".$out."\n" if ($out);
115     return $bib;
116     }
117    
118     sub all_sf2bib_r {
119     my $nr = shift @_;
120     my $max_in_line=shift @_ || 0;
121     my $sep = shift @_ || ' ';
122     my $out;
123     my $i=0;
124     my $bib = "";
125     foreach my $k (sort {$b cmp $a} keys %data) {
126     if ($out) {
127     $out.= $sep.$data{$k};
128     } else {
129     $out = $data{$k};
130     }
131     $i++;
132     if ($i == $max_in_line) {
133     $bib .= $nr." ".$out."\n" if ($out);
134     $i=0;
135     $out="";
136     }
137     }
138     $bib .= $nr." ".$out."\n" if ($out);
139     return $bib;
140     }
141    
142     #--------------------------------------------------------------------
143    
144     sub c_852_iso {
145     my $tmp = $_[0];
146     $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ / if ($tmp);
147     return $tmp;
148     }
149    
150     sub c_852_czs {
151     my $tmp = $_[0];
152     $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
153     $tmp =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
154     $tmp =~ s/ß/ss/g;
155     return $tmp;
156     }
157    
158     #--------------------------------------------------------------------
159    
160     # $mps .= sf_to_mps($subfiled_hash,"subfields",mps_id);
161     #
162     # subfields options:
163     # * - all (no sort)
164     # > - all, sort ascending
165     # < - all, sort descending
166    
167     sub subfields_str_2_arr {
168     my $subfields = shift @_ || return;
169     my $sf_hash = shift @_;
170     my @sf_arr;
171    
172     if ($subfields eq "*") {
173     @sf_arr = keys %{$sf_hash};
174     } elsif ($subfields eq ">") {
175     @sf_arr = sort keys %{$sf_hash};
176     } elsif ($subfields eq "<") {
177     @sf_arr = sort {$b cmp $a} keys %{$sf_hash};
178     } elsif ($subfields =~ s/>//) {
179     @sf_arr = sort split(//,$subfields);
180     } elsif ($subfields =~ s/<//) {
181     @sf_arr = sort {$b cmp $a} split(//,$subfields);
182     } else {
183     @sf_arr = split(//,$subfields);
184     }
185     return @sf_arr;
186     }
187    
188    
189     sub sf_to_mps {
190     my ($sf_hash,$subfields,$mps_id) = @_;
191     my $out="";
192     my @sf_arr = subfields_str_2_arr($subfields,$sf_hash);
193    
194     foreach (@sf_arr) {
195     $out.=mps_expand($mps_id,$sf_hash->{$_});
196     }
197     return $out;
198     }
199    
200     #--------------------------------------------------------------------
201    
202     # $mps .= isis_to_mps ($row,isis_id,mps_id[,"subfields"])
203    
204     sub isis_to_mps {
205     my $row = shift @_ || die;
206     my $isis_id = shift @_ || die;
207     my $mps_id = shift @_ || die;
208     my $subfields = shift @_;
209    
210     my $i=0;
211     my $out = "";
212    
213     while ($row->{$isis_id}->[$i]) {
214     my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
215    
216     if (scalar keys %{$sf_hash} > 0) {
217     if ($subfields) {
218     foreach (split(//,$subfields)) {
219     $out.=mps_expand($mps_id,$sf_hash->{$_});
220     }
221     } else {
222     foreach (keys %{$sf_hash}) {
223     $out.=mps_expand($mps_id,$sf_hash->{$_});
224     }
225     }
226     } else {
227     $out.=mps_expand($mps_id,$row->{$isis_id}->[$i]);
228     }
229     $i++;
230     }
231     return $out;
232     }
233    
234     #--------------------------------------------------------------------
235    
236     # $bib .= isis_to_bib ($row,isis_id,bib_id,"subfields",[,group size][,"group sort"][,"separator"])
237     #
238     # fields:
239     # * - all (no sort)
240     # > - all, sort ascending
241     # < - all, sort descending
242     #
243    
244     sub isis_to_bib {
245     my $row = shift @_ || die;
246     my $isis_id = shift @_ || die;
247     my $bib_id = shift @_ || die;
248     my $subfields = shift @_ || '*';
249     my $group_size = shift @_ || 0;
250     my $group_sort = shift @_ || '';
251     my $sep = shift @_ || ' ';
252    
253     my $i=0;
254    
255     my $bib="";
256    
257     my $sf_hash;
258    
259     # bib_grp(('a','b','c'))
260     sub bib_grp {
261     my $bib_grp;
262     my $sf_hash = shift @_ || return "";
263     my $bib_id = shift @_;
264     my $sep = shift @_;
265     foreach (@_) {
266     next if (! defined $sf_hash->{$_});
267     if ($bib_grp) {
268     $bib_grp.= $sep . $sf_hash->{$_};
269     } else {
270     $bib_grp = $sf_hash->{$_};
271     }
272     }
273     if ($bib_grp) {
274     return "$bib_id $bib_grp\n"
275     } else {
276     return "";
277     }
278     }
279    
280     while ($row->{$isis_id}->[$i]) {
281     my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
282    
283     if (scalar keys %{$sf_hash} > 0) {
284     if ($group_size) {
285     my $tmp_flds = join("",subfields_str_2_arr($subfields,$sf_hash));
286     while ($tmp_flds) {
287     my $tmp_fld_grp = substr($tmp_flds,0,$group_size);
288     $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($tmp_fld_grp.$group_sort) );
289     last if (length($tmp_flds) <= $group_size);
290     $tmp_flds=substr($tmp_flds,$group_size,length($tmp_flds)-$group_size);
291     }
292    
293     } else {
294     $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($subfields,$sf_hash) );
295     }
296     } else {
297     # no subfields, use just value!
298     $bib .= "$bib_id ".$row->{$isis_id}->[$i]."\n";
299     }
300     $i++;
301     }
302     return $bib;
303     }
304    
305     #--------------------------------------------------------------------
306    
307     sub isis_sf {
308     my $row = shift @_;
309     my $isis_id = shift @_;
310     my $subfield = shift @_ || 'a';
311     my $prefix = shift @_ || '';
312     my $postfix = shift @_ || '';
313    
314     my @sep = @_; # rest are separators
315    
316     if ($row->{$isis_id}->[0]) {
317     my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
318     if (length($subfield) == 1) {
319     if ($sf->{$subfield}) {
320     return $prefix . $sf->{$subfield} . $postfix;
321     } else {
322     return '';
323     }
324     } elsif (length($subfield) > 1) {
325     my @s = split(//,$subfield);
326     my $out;
327     foreach (@s) {
328     my $sep = shift @sep || ' ';
329     if ($out) {
330     $out .= $sep . $sf->{$_} if ($sf->{$_});
331     } else {
332     $out = $sf->{$_} if ($sf->{$_});
333     }
334     }
335     return $prefix . $out . $postfix if ($out);
336     }
337     }
338     return '';
339     }
340    
341     #--------------------------------------------------------------------
342     #--------------------------------------------------------------------
343    
344    
345     my $last_tell=0;
346    
347 dpavlin 1.6 my @isis_dirs = ( '.' ); # use dirname as database name
348 dpavlin 1.1
349 dpavlin 1.5 if ($opts{m}) {
350 dpavlin 1.6 @isis_dirs = split(/,/,$opts{m});
351 dpavlin 1.5 }
352 dpavlin 1.1
353 dpavlin 1.6 my @isis_dbs;
354 dpavlin 1.1
355 dpavlin 1.6 foreach (@isis_dirs) {
356     if (-e "$common::isis_data/$db_dir/$_/LIBRI") {
357     push @isis_dbs,"$common::isis_data/$db_dir/$_/LIBRI/LIBRI";
358     }
359     if (-e "$common::isis_data/$db_dir/$_/PERI") {
360     push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI";
361     }
362     }
363    
364     foreach my $isis_db (@isis_dbs) {
365    
366     print MPS "M reading ISIS from '$isis_db'...\n";
367 dpavlin 1.5
368 dpavlin 1.6 my $db = OpenIsis::open( "$isis_db" );
369 dpavlin 1.5
370     my $max_rowid = OpenIsis::maxRowid( $db );
371    
372     my $last_pcnt = 0;
373    
374     for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
375 dpavlin 1.1 my $row = OpenIsis::read( $db, $row_id );
376     if (my $tmp = $row->{'200'}->[0]) {
377    
378     my $bib = "%MFN $row->{mfn}\n";
379     my $mps;
380    
381     my $pcnt = int($row->{mfn} * 100 / $max_rowid);
382     if ($pcnt != $last_pcnt) {
383     printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt);
384     $last_pcnt = $pcnt;
385     }
386    
387     my $headline;
388     $headline .= isis_sf($row,'200','a',"'");
389     $headline .= isis_sf($row,'200','e'," : ","'");
390    
391     # author
392     $bib .= isis_to_bib($row,'700','%700+','*',2,'<');
393     $bib .= isis_to_bib($row,'701','%700+','*',2,'<');
394     $bib .= isis_to_bib($row,'710','%700+','*',2,'<');
395     $bib .= isis_to_bib($row,'711','%700+','*',2,'<');
396     $bib .= isis_to_bib($row,'503','%700+','*',2,'<');
397    
398     $mps .= isis_to_mps($row,'700',1);
399     $mps .= isis_to_mps($row,'701',1);
400     $mps .= isis_to_mps($row,'710',1);
401     $mps .= isis_to_mps($row,'711',1);
402     $mps .= isis_to_mps($row,'503',1);
403     $mps .= isis_to_mps($row,'702',1);
404    
405     $bib .= isis_to_bib($row,'205','%205');
406    
407     # naslov
408     my $sf = OpenIsis::subfields($row->{'200'}->[0]);
409     my $book;
410     $book .= $sf->{a} if ($sf->{a});
411     $book .= " ; ".$sf->{k} if ($sf->{k});
412     $book .= " = ".$sf->{d} if ($sf->{d});
413     $book .= " : ".$sf->{e} if ($sf->{e});
414     $book .= " / ".$sf->{f} if ($sf->{f});
415     $book .= " ; ".$sf->{g} if ($sf->{g});
416     $book .= ". ".$sf->{c} if ($sf->{c});
417     $book .= " / ".$sf->{x} if ($sf->{x});
418     $book .= " ; ".$sf->{y} if ($sf->{y});
419     $bib .= "%200 $book\n" if ($book);
420    
421     $mps .= isis_to_mps($row,'200',2,"akcde");
422     $mps .= isis_to_mps($row,'532',2);
423     $mps .= isis_to_mps($row,'424',2);
424    
425     $headline .= isis_sf($row,'700','b'," ");
426     $headline .= isis_sf($row,'700','a'," ");
427    
428     # izdavac
429     $mps .= isis_to_mps($row,'210',3);
430     # if (my $sf = OpenIsis::subfields($row->{'210'}->[0])) {
431     # my $tmp;
432     # $tmp .= $sf->{a} if ($sf->{a});
433     # $tmp .= " : ".$sf->{c} if ($sf->{c});
434     # $tmp .= ", ".$sf->{d} if ($sf->{d});
435     # $bib .= "%210 $tmp\n" if ($tmp);
436     # }
437     $bib .= "%210 ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n";
438    
439     if (my $year = isis_sf($row,'210','d')) {
440     $year =~ s/^\s*cop\.*\s*//i;
441     $year =~ s/[\[\]]*//g;
442     $mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/);
443     $headline .= " ($year)";
444     }
445    
446     $bib .= isis_to_bib($row,'215','%215', '*', undef, undef, ', ');
447    
448     # $bib .= isis_to_bib($row,'225','%225', 'aehivw');
449     $bib .= "%225 ".isis_sf($row,'225','aevhiw', '(',')', ('',' : ',' ; ','. ',', ',' ; '))."\n";
450     $mps .= isis_to_mps($row,'225',4);
451    
452     $bib .= isis_to_bib($row,'300','%300+');
453     $bib .= isis_to_bib($row,'320','%300+');
454     $bib .= isis_to_bib($row,'327','%300+');
455     $mps .= isis_to_mps($row,'300',5);
456     $mps .= isis_to_mps($row,'320',5);
457     $mps .= isis_to_mps($row,'327',5);
458    
459     $bib .= isis_to_bib($row,'330','%330');
460     $mps .= isis_to_mps($row,'330',6);
461    
462     $bib .= isis_to_bib($row,'423','%423');
463     $bib .= isis_to_bib($row,'464','%464');
464     $mps .= isis_to_mps($row,'464',7);
465     $bib .= isis_to_bib($row,'610','%610');
466     $mps .= isis_to_mps($row,'610',8);
467    
468     $bib .= isis_to_bib($row,'675','%675+');
469     $mps .= isis_to_mps($row,'675',9);
470     $bib .= isis_to_bib($row,'686','%675+');
471     $mps .= isis_to_mps($row,'686',10);
472    
473     $bib .= isis_to_bib($row,'990','%990');
474     $mps .= isis_to_mps($row,'990',11);
475    
476     $bib .= isis_to_bib($row,'991','%991');
477     $mps .= isis_to_mps($row,'991',12);
478    
479     # ISBN
480     if (my $isbn = $row->{10}->[0]) {
481     $isbn =~ s/ +//g; # remove spaces
482     $mps .= "W $isbn 13\n";
483     $bib .= "%ISBN $isbn\n";
484     $isbn =~ s/-//g;
485     $mps .= "W $isbn 13\n";
486     }
487     $mps .= isis_to_mps($row,'10',12);
488    
489     $mps .= isis_to_mps($row,'532',1);
490    
491     $bib .= isis_to_bib($row,'994','%994a','a');
492    
493     # headline
494     if ($headline) {
495     $headline .= " [".$row->{mfn}."]"; ## debug MFN!
496     $mps .= "H ".c_852_iso($headline)."\n";
497     } else {
498     $mps .= "H nepoznato\n";
499     }
500    
501    
502     #if ($db_dir eq "sf") {
503     # print "MFN: $row->{mfn} ROW ID: $row_id\n";
504     # if ($row->{mfn} >= 146) {
505     # print Dumper($row);
506     # }
507     #}
508    
509     print R c_852_iso($bib);
510    
511     $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
512     $last_tell=tell(R);
513    
514     print R "\n";
515    
516     $mps .= "E\n";
517    
518    
519     print S $mps;
520     print MPS $mps;
521     }
522 dpavlin 1.5 }
523 dpavlin 1.1 }
524     print S "M over and out\nX\n";
525     print MPS "M over and out\nX\n";
526     close(MPS);

  ViewVC Help
Powered by ViewVC 1.1.26