1 |
dpavlin |
1 |
package COBISS; |
2 |
|
|
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
use WWW::Mechanize; |
7 |
dpavlin |
2 |
use MARC::Record; |
8 |
dpavlin |
1 |
|
9 |
dpavlin |
2 |
binmode STDOUT, ':utf8'; |
10 |
|
|
|
11 |
|
|
my $cobiss_marc21 = { |
12 |
|
|
'010' => { a => [ '020', 'a' ] }, |
13 |
|
|
200 => { |
14 |
|
|
a => [ 245 , 'a' ], |
15 |
|
|
f => [ 245 , 'f' ], |
16 |
|
|
}, |
17 |
|
|
205 => { a => [ 250 , 'a' ] }, |
18 |
|
|
210 => { |
19 |
|
|
a => [ 250 , 'a' ], |
20 |
|
|
c => [ 260 , 'b' ], |
21 |
|
|
d => [ 260 , 'c' ], |
22 |
|
|
}, |
23 |
|
|
215 => { |
24 |
|
|
a => [ 300 , 'a' ], |
25 |
|
|
c => [ 300 , 'b' ], |
26 |
|
|
d => [ 300 , 'c' ], |
27 |
|
|
}, |
28 |
|
|
700 => { |
29 |
|
|
a => [ 100 , 'a' ], |
30 |
|
|
}, |
31 |
|
|
}; |
32 |
|
|
|
33 |
dpavlin |
1 |
sub search { |
34 |
|
|
|
35 |
|
|
my $url = 'http://cobiss.izum.si/scripts/cobiss?ukaz=GETID&lani=en'; |
36 |
|
|
|
37 |
|
|
warn "# get $url\n"; |
38 |
|
|
|
39 |
|
|
my $mech = WWW::Mechanize->new(); |
40 |
|
|
$mech->get( $url ); |
41 |
|
|
|
42 |
|
|
warn "# got session\n"; |
43 |
|
|
|
44 |
|
|
$mech->follow_link( text_regex => qr/union/ ); |
45 |
|
|
|
46 |
|
|
warn "# submit search\n"; |
47 |
|
|
|
48 |
|
|
$mech->submit_form( |
49 |
|
|
fields => { |
50 |
|
|
'SS1' => 'Krleza', |
51 |
|
|
}, |
52 |
|
|
); |
53 |
|
|
|
54 |
|
|
my $hits = 1; |
55 |
|
|
if ( $mech->content =~ m{hits:\s*<b>\s*(\d+)\s*</b>}s ) { |
56 |
|
|
$hits = $1; |
57 |
|
|
} else { |
58 |
|
|
warn "get't find results in ", $mech->content; |
59 |
|
|
} |
60 |
|
|
|
61 |
|
|
warn "# got $hits results, get first one\n"; |
62 |
|
|
|
63 |
|
|
$mech->follow_link( url_regex => qr/ukaz=DISP/ ); |
64 |
|
|
|
65 |
|
|
warn "# in COMARC format\n"; |
66 |
|
|
|
67 |
|
|
$mech->follow_link( url_regex => qr/fmt=13/ ); |
68 |
|
|
|
69 |
|
|
my $comarc; |
70 |
|
|
|
71 |
|
|
if ( $mech->content =~ m{<pre>\s*(.+1\..+?)\s*</pre>}s ) { |
72 |
|
|
my $comarc = $1; |
73 |
|
|
$comarc =~ s{</?b>}{}gs; |
74 |
dpavlin |
2 |
$comarc =~ s{<font[^>]*>}{<s>}gs; |
75 |
|
|
$comarc =~ s{</font>}{<e>}gs; |
76 |
dpavlin |
1 |
|
77 |
|
|
print $comarc; |
78 |
|
|
|
79 |
dpavlin |
2 |
my $marc = MARC::Record->new; |
80 |
|
|
|
81 |
|
|
foreach my $line ( split(/[\r\n]+/, $comarc) ) { |
82 |
|
|
our @f; |
83 |
|
|
|
84 |
|
|
if ( $line !~ s{(\d\d\d)([01 ])([01 ])}{} ) { |
85 |
|
|
warn "SKIP: $line\n"; |
86 |
|
|
} else { |
87 |
|
|
$line .= "<eol>"; |
88 |
|
|
|
89 |
|
|
@f = ( $1, $2, $3 ); |
90 |
|
|
sub sf { warn "sf",@_,"|",@f; push @f, @_; } |
91 |
|
|
$line =~ s{<s>(\w)<e>([^<]+)\s*}{sf($1, $2)}ges; |
92 |
|
|
warn "# f:", join(' ', @f), " left:|$line|\n"; |
93 |
|
|
$marc->add_fields( @f ); |
94 |
|
|
} |
95 |
|
|
} |
96 |
|
|
|
97 |
|
|
open(my $out, '>:utf8', 'out.marc'); |
98 |
|
|
print $out $marc->as_usmarc; |
99 |
|
|
close($out); |
100 |
|
|
|
101 |
|
|
warn $marc->as_formatted; |
102 |
|
|
|
103 |
dpavlin |
1 |
return $comarc; |
104 |
|
|
} else { |
105 |
|
|
die "can't fetch COMARC format from ", $mech->content; |
106 |
|
|
} |
107 |
|
|
|
108 |
|
|
} |
109 |
|
|
|
110 |
|
|
1; |