1 |
package COBISS; |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
|
6 |
use WWW::Mechanize; |
7 |
use MARC::Record; |
8 |
|
9 |
binmode STDOUT, ':utf8'; |
10 |
|
11 |
my $cobiss_marc21 = { |
12 |
'010' => { a => [ '020', 'a' ] }, |
13 |
200 => { |
14 |
a => [ 245 , 'a' ], |
15 |
f => [ 245 , 'f' ], |
16 |
}, |
17 |
205 => { a => [ 250 , 'a' ] }, |
18 |
210 => { |
19 |
a => [ 250 , 'a' ], |
20 |
c => [ 260 , 'b' ], |
21 |
d => [ 260 , 'c' ], |
22 |
}, |
23 |
215 => { |
24 |
a => [ 300 , 'a' ], |
25 |
c => [ 300 , 'b' ], |
26 |
d => [ 300 , 'c' ], |
27 |
}, |
28 |
700 => { |
29 |
a => [ 100 , 'a' ], |
30 |
}, |
31 |
}; |
32 |
|
33 |
sub search { |
34 |
|
35 |
my $url = 'http://cobiss.izum.si/scripts/cobiss?ukaz=GETID&lani=en'; |
36 |
|
37 |
warn "# get $url\n"; |
38 |
|
39 |
my $mech = WWW::Mechanize->new(); |
40 |
$mech->get( $url ); |
41 |
|
42 |
warn "# got session\n"; |
43 |
|
44 |
$mech->follow_link( text_regex => qr/union/ ); |
45 |
|
46 |
warn "# submit search\n"; |
47 |
|
48 |
$mech->submit_form( |
49 |
fields => { |
50 |
'SS1' => 'Krleza', |
51 |
}, |
52 |
); |
53 |
|
54 |
my $hits = 1; |
55 |
if ( $mech->content =~ m{hits:\s*<b>\s*(\d+)\s*</b>}s ) { |
56 |
$hits = $1; |
57 |
} else { |
58 |
warn "get't find results in ", $mech->content; |
59 |
} |
60 |
|
61 |
warn "# got $hits results, get first one\n"; |
62 |
|
63 |
$mech->follow_link( url_regex => qr/ukaz=DISP/ ); |
64 |
|
65 |
warn "# in COMARC format\n"; |
66 |
|
67 |
$mech->follow_link( url_regex => qr/fmt=13/ ); |
68 |
|
69 |
my $comarc; |
70 |
|
71 |
if ( $mech->content =~ m{<pre>\s*(.+1\..+?)\s*</pre>}s ) { |
72 |
my $comarc = $1; |
73 |
$comarc =~ s{</?b>}{}gs; |
74 |
$comarc =~ s{<font[^>]*>}{<s>}gs; |
75 |
$comarc =~ s{</font>}{<e>}gs; |
76 |
|
77 |
print $comarc; |
78 |
|
79 |
my $marc = MARC::Record->new; |
80 |
|
81 |
foreach my $line ( split(/[\r\n]+/, $comarc) ) { |
82 |
our @f; |
83 |
|
84 |
if ( $line !~ s{(\d\d\d)([01 ])([01 ])}{} ) { |
85 |
warn "SKIP: $line\n"; |
86 |
} else { |
87 |
$line .= "<eol>"; |
88 |
|
89 |
@f = ( $1, $2, $3 ); |
90 |
sub sf { warn "sf",@_,"|",@f; push @f, @_; } |
91 |
$line =~ s{<s>(\w)<e>([^<]+)\s*}{sf($1, $2)}ges; |
92 |
warn "# f:", join(' ', @f), " left:|$line|\n"; |
93 |
$marc->add_fields( @f ); |
94 |
} |
95 |
} |
96 |
|
97 |
open(my $out, '>:utf8', 'out.marc'); |
98 |
print $out $marc->as_usmarc; |
99 |
close($out); |
100 |
|
101 |
warn $marc->as_formatted; |
102 |
|
103 |
return $comarc; |
104 |
} else { |
105 |
die "can't fetch COMARC format from ", $mech->content; |
106 |
} |
107 |
|
108 |
} |
109 |
|
110 |
1; |