1 |
dpavlin |
11 |
package Aleph; |
2 |
|
|
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
use WWW::Mechanize; |
7 |
|
|
use MARC::Record; |
8 |
|
|
use Data::Dump qw/dump/; |
9 |
|
|
|
10 |
|
|
binmode STDOUT, ':utf8'; |
11 |
|
|
|
12 |
|
|
our $mech = WWW::Mechanize->new(); |
13 |
|
|
our $hits; |
14 |
|
|
|
15 |
|
|
sub diag { |
16 |
|
|
print "# ", @_, $/; |
17 |
|
|
} |
18 |
|
|
|
19 |
|
|
# Koha Z39.50 query: |
20 |
|
|
# |
21 |
|
|
# Bib-1 @and @and @and @and @and @and @and @or |
22 |
|
|
# @attr 1=8 isbn-issn |
23 |
|
|
# @attr 1=7 isbn-issn |
24 |
|
|
# @attr 1=4 title |
25 |
|
|
# @attr 1=1003 author |
26 |
|
|
# @attr 1=16 dewey |
27 |
|
|
# @attr 1=21 subject-holding |
28 |
|
|
# @attr 1=12 control-no |
29 |
|
|
# @attr 1=1007 standard-id |
30 |
|
|
# @attr 1=1016 any |
31 |
|
|
|
32 |
|
|
# LCC - Klasifikacija Kongresne knjižnice |
33 |
|
|
# LCN - Signatura Kongresne knjižnice |
34 |
|
|
# DDC - Deweyjeva klasifikacija |
35 |
|
|
# TIT - Naslovi |
36 |
|
|
# AUT - Autori |
37 |
|
|
# IMP - Impresum |
38 |
|
|
# SUB - Predmetnice |
39 |
|
|
# SRS - Nakladnička cjelina |
40 |
|
|
# LOC - Lokacija |
41 |
|
|
# WRD - Riječi |
42 |
|
|
# WTI - Riječi u polju naslova |
43 |
|
|
# WAU - Riječi u polju autora |
44 |
|
|
# WPE - Riječi u polju individualnog autora |
45 |
|
|
# WCO - Riječi u polju korporativnog autora |
46 |
|
|
# WME - Riječi u polju sastanka |
47 |
|
|
# WUT - Riječi u polju jedinstvenog naslova |
48 |
|
|
# WPL - Riječi u polju mjesta izdavanja |
49 |
|
|
# WPU - Riječi u polju nakladnika |
50 |
|
|
# WSU - Riječi u polju predmetnica |
51 |
|
|
# WSM - Riječi u predmetnicama MeSH-a |
52 |
|
|
# WST - Riječi u polju status |
53 |
|
|
# WGA - Riječi u geografskim odrednicama |
54 |
|
|
# WYR - Godina izdavanja |
55 |
|
|
|
56 |
|
|
our $usemap = { |
57 |
|
|
# 8 => '', |
58 |
|
|
# 7 => '', |
59 |
|
|
4 => 'WTI', |
60 |
|
|
1003 => 'WTI', |
61 |
|
|
16 => 'CU', |
62 |
|
|
21 => 'SU', |
63 |
|
|
# 12 => '', |
64 |
|
|
# 1007 => '', |
65 |
|
|
# 1016 => '', |
66 |
|
|
|
67 |
|
|
}; |
68 |
|
|
|
69 |
|
|
sub usemap { |
70 |
|
|
my $f = shift || die; |
71 |
|
|
$usemap->{$f}; |
72 |
|
|
} |
73 |
|
|
|
74 |
|
|
sub search { |
75 |
|
|
my ( $self, $query ) = @_; |
76 |
|
|
|
77 |
|
|
die "need query" unless defined $query; |
78 |
|
|
|
79 |
|
|
my $url = 'http://161.53.240.197:8991/F?RN=' . rand(1000000000); |
80 |
|
|
# fake JavaScript code on page which creates random session |
81 |
|
|
|
82 |
|
|
diag "get $url"; |
83 |
|
|
|
84 |
|
|
$mech->get( $url ); |
85 |
|
|
|
86 |
|
|
diag "advanced search"; |
87 |
|
|
|
88 |
|
|
$mech->follow_link( url_regex => qr/find-c/ ); |
89 |
|
|
|
90 |
|
|
diag "submit search $query"; |
91 |
|
|
|
92 |
|
|
$mech->submit_form( |
93 |
|
|
fields => { |
94 |
|
|
'ccl_term' => $query, |
95 |
|
|
}, |
96 |
|
|
); |
97 |
|
|
|
98 |
|
|
$hits = 0; |
99 |
|
|
if ( $mech->content =~ m{ukupno\s+(\d+).*(do\s+(\d+))}s ) { |
100 |
|
|
$hits = $1; |
101 |
|
|
$hits = $2 if $2 && $2 < $1; # correct for max. results |
102 |
|
|
} else { |
103 |
|
|
diag "get't find results in ", $mech->content; |
104 |
|
|
return; |
105 |
|
|
} |
106 |
|
|
|
107 |
|
|
diag "got $hits results, get first one"; |
108 |
|
|
|
109 |
|
|
$mech->follow_link( url_regex => qr/set_entry=000001/ ); |
110 |
|
|
|
111 |
|
|
diag "in MARC format"; |
112 |
|
|
|
113 |
|
|
$mech->follow_link( url_regex => qr/format=001/ ); |
114 |
|
|
} |
115 |
|
|
|
116 |
|
|
|
117 |
|
|
sub next_marc { |
118 |
|
|
my ($self,$format) = @_; |
119 |
|
|
|
120 |
|
|
print $mech->content; |
121 |
|
|
|
122 |
|
|
if ( $mech->content =~ m{Zapis\s+(\d+)}s ) { |
123 |
|
|
|
124 |
|
|
my $nr = $1; |
125 |
|
|
|
126 |
|
|
diag "parse $nr"; |
127 |
|
|
|
128 |
|
|
my $html = $mech->content; |
129 |
|
|
my $hash; |
130 |
|
|
$html =~ s|<tr>\s*<td class=td1 id=bold[^>]*>(.+?)</td>\s*<td class=td1>(.+?)</td>|$hash->{$1} = "$2";|ges; |
131 |
|
|
diag dump($hash); |
132 |
|
|
|
133 |
|
|
my $id = $hash->{SYS} || die "no SYS"; |
134 |
|
|
|
135 |
|
|
die; |
136 |
|
|
|
137 |
|
|
my $marc = MARC::Record->new; |
138 |
|
|
|
139 |
|
|
# $marc->add_fields( $f, $i1, $i2, @{ $out->{$f} } ); |
140 |
|
|
|
141 |
|
|
my $path = "marc/$id.$format"; |
142 |
|
|
|
143 |
|
|
open(my $out, '>:utf8', $path); |
144 |
|
|
print $out $marc->as_usmarc; |
145 |
|
|
close($out); |
146 |
|
|
|
147 |
|
|
diag "created $path ", -s $path, " bytes"; |
148 |
|
|
|
149 |
|
|
diag $marc->as_formatted; |
150 |
|
|
|
151 |
|
|
$nr++; |
152 |
|
|
|
153 |
|
|
die if $nr == 3; # FIXME |
154 |
|
|
|
155 |
|
|
$mech->follow_link( url_regex => qr/set_entry=0*$nr/ ); |
156 |
|
|
|
157 |
|
|
return $marc->as_usmarc; |
158 |
|
|
} else { |
159 |
|
|
die "can't fetch COMARC format from ", $mech->content; |
160 |
|
|
} |
161 |
|
|
|
162 |
|
|
} |
163 |
|
|
|
164 |
|
|
1; |