/[webpac-proto]/isis2stream.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /isis2stream.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations)
Sun Jun 16 14:37:43 2002 UTC (16 years, 11 months ago) by dpavlin
Branch: MAIN
Changes since 1.2: +1 -21 lines
File MIME type: text/plain
mored common functions to common.pl

1 dpavlin 1.1 #!/usr/bin/perl -w
2    
3     use strict;
4     use OpenIsis;
5     use Getopt::Std;
6     #use Data::Dumper;
7 dpavlin 1.3 require "common.pl";
8 dpavlin 1.1
9     my $install_dir="/local/index";
10     my $mpsindex="/local/mps-5.3/bin/mpsindex -l 9 -b";
11     my $isis_data="/var/autofs/misc/isis_data/";
12     #my $isis_data="/mnt/20020606/Isis/Data/"; # doma
13    
14     my %opts;
15    
16     getopt('dD', \%opts);
17    
18     die "usage: $0 -d [database_dir] " if (! $opts{d});
19    
20     my $db_dir = $opts{d};
21    
22     mkdir "$install_dir/$db_dir" if (!-e "$install_dir/$db_dir");
23     mkdir "$install_dir/$db_dir/data" if (!-e "$install_dir/$db_dir/data");
24    
25     my $dir="$install_dir/$db_dir/data";
26    
27    
28     open(S,"> $dir/stream") || die "can't open output $dir/stram: $!";
29     open(R,"> $dir/bib") || die "can't open output $dir/bib: $!";
30     open(MPS,"| $mpsindex -d $install_dir/$db_dir -autokey") || die "can't start MPS indexer $mpsindex: $!";
31     #open(MPS,"> /tmp/mpsindex") || die "mps: $!";
32    
33     my $s="V 5 3
34     L hr-HR
35     F 700+ 1 Autor
36     F 200+ 2 Naslov
37     F 210 3 Izdavanje
38     F 225 4 Nakladnièka cjelina
39     F 300+ 5 Napomene
40     F 330 6 Sadr¾aj
41     F 464 7 Analitièki radovi
42     F 610 8 Kljuène rijeèi
43     F 675 9 UDK
44     F 686 10 CC
45     F 990 11 Signatura
46     F 991 12 Inventarni broj
47     F 10 13 ISBN
48     ";
49    
50     print S $s;
51     print MPS $s;
52    
53     #
54     # expand(nr,"space separated string");
55     #
56    
57     sub expand {
58     my $nr = shift @_;
59     my $out = "";
60     while (my $fld = c_852_iso(shift @_)) {
61     my @words=split(/\s+/,$fld);
62     foreach my $w (@words) {
63     # FIX: this should be replaced by stemmer!
64     #$w =~ tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
65     $w =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
66     $w =~ s/ß/ss/g;
67     $out .= "W $w $nr\n";
68     }
69     }
70     return $out;
71     }
72    
73     #--------------------------------------------------------------------
74    
75     ################### ERASE###############3
76    
77     # expand sub-fileds from ISIS field
78     # (^a.....^b....)
79     my %data; # FIX
80     sub ex_sf {
81     %data = ();
82     my $in = $_[0];
83     if (my $tmp = $in) {
84     # $tmp =~ tr/çæÐџ¬†§¦/¹©ðÐèÈæƾ®/; # ISIS -> iso-8859-2
85     $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
86     if ($in =~ m/^\^/) {
87     my @sub = split(/\^/,$in);
88     foreach my $fld (@sub) {
89     $data{$1} = $2 if ($fld =~ m/^(\w+)(.+)$/)
90     }
91     } else {
92     $data{all} = $in."<-- iz polja bez podpolja";
93     }
94     }
95     }
96    
97     # dump all sub-fields
98     sub all_sf {
99     my $nr = shift @_;
100     my $out="";
101     foreach my $k (sort keys %data) {
102     $out.=expand($nr,$data{$k});
103     }
104     return $out;
105     }
106    
107     sub all_sf_r {
108     my $nr = shift @_;
109     my $out="";
110     foreach my $k (sort {$b cmp $a} keys %data) {
111     $out.=expand($nr,$data{$k});
112     }
113     return $out;
114     }
115    
116     sub all_sf2bib {
117     my $nr = shift @_;
118     my $max_in_line=shift @_ || 0;
119     my $sep = shift @_ || ' ';
120     my $out;
121     my $i=0;
122     my $bib = "";
123     foreach my $k (sort keys %data) {
124     if ($out) {
125     $out.= $sep.$data{$k};
126     } else {
127     $out = $data{$k};
128     }
129     $i++;
130     if ($i == $max_in_line) {
131     $bib .= $nr." ".$out."\n" if ($out);
132     $i=0;
133     $out="";
134     }
135     }
136     $bib .= $nr." ".$out."\n" if ($out);
137     return $bib;
138     }
139    
140     sub all_sf2bib_r {
141     my $nr = shift @_;
142     my $max_in_line=shift @_ || 0;
143     my $sep = shift @_ || ' ';
144     my $out;
145     my $i=0;
146     my $bib = "";
147     foreach my $k (sort {$b cmp $a} keys %data) {
148     if ($out) {
149     $out.= $sep.$data{$k};
150     } else {
151     $out = $data{$k};
152     }
153     $i++;
154     if ($i == $max_in_line) {
155     $bib .= $nr." ".$out."\n" if ($out);
156     $i=0;
157     $out="";
158     }
159     }
160     $bib .= $nr." ".$out."\n" if ($out);
161     return $bib;
162     }
163    
164     #--------------------------------------------------------------------
165    
166     sub c_852_iso {
167     my $tmp = $_[0];
168     $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ / if ($tmp);
169     return $tmp;
170     }
171    
172     sub c_852_czs {
173     my $tmp = $_[0];
174     $tmp =~ tr/€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúüýþÿ/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£×èáíóú¡±®¾ÊꔼȺ—„•’ˆƒÁÂ̪‡€œ›¯¿‹Ÿ–˜Ž‘Ã㍐…†“™ž¤ðÐÏËïÒÍÎ슂‰šÞٝÓßÔÑñò©¹ÀÚàÛýÝþ´­½²·¢§÷¸°¨ÿØøŒ /;
175     $tmp =~ tr/Çüéâäùæç³ëÕõî¬ÄÆÉÅåôö¥µ¦¶ÖÜ«»£èáíóú¡±®¾Êê¼ÈºÁÂ̪¯¿ÃãðÐÏËïÒÍÎìÞÙÓÔÑñò©¹ÀÚàÛýÝþ´­½²·¢¸¨ÿØø/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/;
176     $tmp =~ s/ß/ss/g;
177     return $tmp;
178     }
179    
180     #--------------------------------------------------------------------
181    
182     # $mps .= sf_to_mps($subfiled_hash,"subfields",mps_id);
183     #
184     # subfields options:
185     # * - all (no sort)
186     # > - all, sort ascending
187     # < - all, sort descending
188    
189     sub subfields_str_2_arr {
190     my $subfields = shift @_ || return;
191     my $sf_hash = shift @_;
192     my @sf_arr;
193    
194     if ($subfields eq "*") {
195     @sf_arr = keys %{$sf_hash};
196     } elsif ($subfields eq ">") {
197     @sf_arr = sort keys %{$sf_hash};
198     } elsif ($subfields eq "<") {
199     @sf_arr = sort {$b cmp $a} keys %{$sf_hash};
200     } elsif ($subfields =~ s/>//) {
201     @sf_arr = sort split(//,$subfields);
202     } elsif ($subfields =~ s/<//) {
203     @sf_arr = sort {$b cmp $a} split(//,$subfields);
204     } else {
205     @sf_arr = split(//,$subfields);
206     }
207     return @sf_arr;
208     }
209    
210    
211     sub sf_to_mps {
212     my ($sf_hash,$subfields,$mps_id) = @_;
213     my $out="";
214     my @sf_arr = subfields_str_2_arr($subfields,$sf_hash);
215    
216     foreach (@sf_arr) {
217     $out.=mps_expand($mps_id,$sf_hash->{$_});
218     }
219     return $out;
220     }
221    
222     #--------------------------------------------------------------------
223    
224     # $mps .= isis_to_mps ($row,isis_id,mps_id[,"subfields"])
225    
226     sub isis_to_mps {
227     my $row = shift @_ || die;
228     my $isis_id = shift @_ || die;
229     my $mps_id = shift @_ || die;
230     my $subfields = shift @_;
231    
232     my $i=0;
233     my $out = "";
234    
235     while ($row->{$isis_id}->[$i]) {
236     my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
237    
238     if (scalar keys %{$sf_hash} > 0) {
239     if ($subfields) {
240     foreach (split(//,$subfields)) {
241     $out.=mps_expand($mps_id,$sf_hash->{$_});
242     }
243     } else {
244     foreach (keys %{$sf_hash}) {
245     $out.=mps_expand($mps_id,$sf_hash->{$_});
246     }
247     }
248     } else {
249     $out.=mps_expand($mps_id,$row->{$isis_id}->[$i]);
250     }
251     $i++;
252     }
253     return $out;
254     }
255    
256     #--------------------------------------------------------------------
257    
258     # $bib .= isis_to_bib ($row,isis_id,bib_id,"subfields",[,group size][,"group sort"][,"separator"])
259     #
260     # fields:
261     # * - all (no sort)
262     # > - all, sort ascending
263     # < - all, sort descending
264     #
265    
266     sub isis_to_bib {
267     my $row = shift @_ || die;
268     my $isis_id = shift @_ || die;
269     my $bib_id = shift @_ || die;
270     my $subfields = shift @_ || '*';
271     my $group_size = shift @_ || 0;
272     my $group_sort = shift @_ || '';
273     my $sep = shift @_ || ' ';
274    
275     my $i=0;
276    
277     my $bib="";
278    
279     my $sf_hash;
280    
281     # bib_grp(('a','b','c'))
282     sub bib_grp {
283     my $bib_grp;
284     my $sf_hash = shift @_ || return "";
285     my $bib_id = shift @_;
286     my $sep = shift @_;
287     foreach (@_) {
288     next if (! defined $sf_hash->{$_});
289     if ($bib_grp) {
290     $bib_grp.= $sep . $sf_hash->{$_};
291     } else {
292     $bib_grp = $sf_hash->{$_};
293     }
294     }
295     if ($bib_grp) {
296     return "$bib_id $bib_grp\n"
297     } else {
298     return "";
299     }
300     }
301    
302     while ($row->{$isis_id}->[$i]) {
303     my $sf_hash = OpenIsis::subfields($row->{$isis_id}->[$i]);
304    
305     if (scalar keys %{$sf_hash} > 0) {
306     if ($group_size) {
307     my $tmp_flds = join("",subfields_str_2_arr($subfields,$sf_hash));
308     while ($tmp_flds) {
309     my $tmp_fld_grp = substr($tmp_flds,0,$group_size);
310     $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($tmp_fld_grp.$group_sort) );
311     last if (length($tmp_flds) <= $group_size);
312     $tmp_flds=substr($tmp_flds,$group_size,length($tmp_flds)-$group_size);
313     }
314    
315     } else {
316     $bib .= bib_grp( $sf_hash, $bib_id, $sep, subfields_str_2_arr($subfields,$sf_hash) );
317     }
318     } else {
319     # no subfields, use just value!
320     $bib .= "$bib_id ".$row->{$isis_id}->[$i]."\n";
321     }
322     $i++;
323     }
324     return $bib;
325     }
326    
327     #--------------------------------------------------------------------
328    
329     sub isis_sf {
330     my $row = shift @_;
331     my $isis_id = shift @_;
332     my $subfield = shift @_ || 'a';
333     my $prefix = shift @_ || '';
334     my $postfix = shift @_ || '';
335    
336     my @sep = @_; # rest are separators
337    
338     if ($row->{$isis_id}->[0]) {
339     my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
340     if (length($subfield) == 1) {
341     if ($sf->{$subfield}) {
342     return $prefix . $sf->{$subfield} . $postfix;
343     } else {
344     return '';
345     }
346     } elsif (length($subfield) > 1) {
347     my @s = split(//,$subfield);
348     my $out;
349     foreach (@s) {
350     my $sep = shift @sep || ' ';
351     if ($out) {
352     $out .= $sep . $sf->{$_} if ($sf->{$_});
353     } else {
354     $out = $sf->{$_} if ($sf->{$_});
355     }
356     }
357     return $prefix . $out . $postfix if ($out);
358     }
359     }
360     return '';
361     }
362    
363     #--------------------------------------------------------------------
364     #--------------------------------------------------------------------
365    
366    
367     my $last_tell=0;
368    
369     my $db = OpenIsis::open( "$isis_data/$db_dir/LIBRI/LIBRI" );
370    
371     my $max_rowid = OpenIsis::maxRowid( $db );
372    
373     my $last_pcnt = 0;
374    
375     for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
376     my $row = OpenIsis::read( $db, $row_id );
377     if (my $tmp = $row->{'200'}->[0]) {
378    
379     my $bib = "%MFN $row->{mfn}\n";
380     my $mps;
381    
382     my $pcnt = int($row->{mfn} * 100 / $max_rowid);
383     if ($pcnt != $last_pcnt) {
384     printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt);
385     $last_pcnt = $pcnt;
386     }
387    
388     my $headline;
389     $headline .= isis_sf($row,'200','a',"'");
390     $headline .= isis_sf($row,'200','e'," : ","'");
391    
392     # author
393     $bib .= isis_to_bib($row,'700','%700+','*',2,'<');
394     $bib .= isis_to_bib($row,'701','%700+','*',2,'<');
395     $bib .= isis_to_bib($row,'710','%700+','*',2,'<');
396     $bib .= isis_to_bib($row,'711','%700+','*',2,'<');
397     $bib .= isis_to_bib($row,'503','%700+','*',2,'<');
398    
399     $mps .= isis_to_mps($row,'700',1);
400     $mps .= isis_to_mps($row,'701',1);
401     $mps .= isis_to_mps($row,'710',1);
402     $mps .= isis_to_mps($row,'711',1);
403     $mps .= isis_to_mps($row,'503',1);
404     $mps .= isis_to_mps($row,'702',1);
405    
406     $bib .= isis_to_bib($row,'205','%205');
407    
408     # naslov
409     my $sf = OpenIsis::subfields($row->{'200'}->[0]);
410     my $book;
411     $book .= $sf->{a} if ($sf->{a});
412     $book .= " ; ".$sf->{k} if ($sf->{k});
413     $book .= " = ".$sf->{d} if ($sf->{d});
414     $book .= " : ".$sf->{e} if ($sf->{e});
415     $book .= " / ".$sf->{f} if ($sf->{f});
416     $book .= " ; ".$sf->{g} if ($sf->{g});
417     $book .= ". ".$sf->{c} if ($sf->{c});
418     $book .= " / ".$sf->{x} if ($sf->{x});
419     $book .= " ; ".$sf->{y} if ($sf->{y});
420     $bib .= "%200 $book\n" if ($book);
421    
422     $mps .= isis_to_mps($row,'200',2,"akcde");
423     $mps .= isis_to_mps($row,'532',2);
424     $mps .= isis_to_mps($row,'424',2);
425    
426     $headline .= isis_sf($row,'700','b'," ");
427     $headline .= isis_sf($row,'700','a'," ");
428    
429     # izdavac
430     $mps .= isis_to_mps($row,'210',3);
431     # if (my $sf = OpenIsis::subfields($row->{'210'}->[0])) {
432     # my $tmp;
433     # $tmp .= $sf->{a} if ($sf->{a});
434     # $tmp .= " : ".$sf->{c} if ($sf->{c});
435     # $tmp .= ", ".$sf->{d} if ($sf->{d});
436     # $bib .= "%210 $tmp\n" if ($tmp);
437     # }
438     $bib .= "%210 ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n";
439    
440     if (my $year = isis_sf($row,'210','d')) {
441     $year =~ s/^\s*cop\.*\s*//i;
442     $year =~ s/[\[\]]*//g;
443     $mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/);
444     $headline .= " ($year)";
445     }
446    
447     $bib .= isis_to_bib($row,'215','%215', '*', undef, undef, ', ');
448    
449     # $bib .= isis_to_bib($row,'225','%225', 'aehivw');
450     $bib .= "%225 ".isis_sf($row,'225','aevhiw', '(',')', ('',' : ',' ; ','. ',', ',' ; '))."\n";
451     $mps .= isis_to_mps($row,'225',4);
452    
453     $bib .= isis_to_bib($row,'300','%300+');
454     $bib .= isis_to_bib($row,'320','%300+');
455     $bib .= isis_to_bib($row,'327','%300+');
456     $mps .= isis_to_mps($row,'300',5);
457     $mps .= isis_to_mps($row,'320',5);
458     $mps .= isis_to_mps($row,'327',5);
459    
460     $bib .= isis_to_bib($row,'330','%330');
461     $mps .= isis_to_mps($row,'330',6);
462    
463     $bib .= isis_to_bib($row,'423','%423');
464     $bib .= isis_to_bib($row,'464','%464');
465     $mps .= isis_to_mps($row,'464',7);
466     $bib .= isis_to_bib($row,'610','%610');
467     $mps .= isis_to_mps($row,'610',8);
468    
469     $bib .= isis_to_bib($row,'675','%675+');
470     $mps .= isis_to_mps($row,'675',9);
471     $bib .= isis_to_bib($row,'686','%675+');
472     $mps .= isis_to_mps($row,'686',10);
473    
474     $bib .= isis_to_bib($row,'990','%990');
475     $mps .= isis_to_mps($row,'990',11);
476    
477     $bib .= isis_to_bib($row,'991','%991');
478     $mps .= isis_to_mps($row,'991',12);
479    
480     # ISBN
481     if (my $isbn = $row->{10}->[0]) {
482     $isbn =~ s/ +//g; # remove spaces
483     $mps .= "W $isbn 13\n";
484     $bib .= "%ISBN $isbn\n";
485     $isbn =~ s/-//g;
486     $mps .= "W $isbn 13\n";
487     }
488     $mps .= isis_to_mps($row,'10',12);
489    
490     $mps .= isis_to_mps($row,'532',1);
491    
492     $bib .= isis_to_bib($row,'994','%994a','a');
493    
494     # headline
495     if ($headline) {
496     $headline .= " [".$row->{mfn}."]"; ## debug MFN!
497     $mps .= "H ".c_852_iso($headline)."\n";
498     } else {
499     $mps .= "H nepoznato\n";
500     }
501    
502    
503     #if ($db_dir eq "sf") {
504     # print "MFN: $row->{mfn} ROW ID: $row_id\n";
505     # if ($row->{mfn} >= 146) {
506     # print Dumper($row);
507     # }
508     #}
509    
510     print R c_852_iso($bib);
511    
512     $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
513     $last_tell=tell(R);
514    
515     print R "\n";
516    
517     $mps .= "E\n";
518    
519    
520     print S $mps;
521     print MPS $mps;
522     }
523     }
524     print S "M over and out\nX\n";
525     print MPS "M over and out\nX\n";
526     close(MPS);

  ViewVC Help
Powered by ViewVC 1.1.26