1 |
package Aleph; |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
|
6 |
use WWW::Mechanize; |
7 |
use MARC::Record; |
8 |
use Data::Dump qw/dump/; |
9 |
|
10 |
binmode STDOUT, ':utf8'; |
11 |
|
12 |
our $mech = WWW::Mechanize->new(); |
13 |
our $hits; |
14 |
|
15 |
sub diag { |
16 |
print "# ", @_, $/; |
17 |
} |
18 |
|
19 |
# Koha Z39.50 query: |
20 |
# |
21 |
# Bib-1 @and @and @and @and @and @and @and @or |
22 |
# @attr 1=8 isbn-issn |
23 |
# @attr 1=7 isbn-issn |
24 |
# @attr 1=4 title |
25 |
# @attr 1=1003 author |
26 |
# @attr 1=16 dewey |
27 |
# @attr 1=21 subject-holding |
28 |
# @attr 1=12 control-no |
29 |
# @attr 1=1007 standard-id |
30 |
# @attr 1=1016 any |
31 |
|
32 |
# LCC - Klasifikacija Kongresne knjižnice |
33 |
# LCN - Signatura Kongresne knjižnice |
34 |
# DDC - Deweyjeva klasifikacija |
35 |
# TIT - Naslovi |
36 |
# AUT - Autori |
37 |
# IMP - Impresum |
38 |
# SUB - Predmetnice |
39 |
# SRS - Nakladnička cjelina |
40 |
# LOC - Lokacija |
41 |
# WRD - Riječi |
42 |
# WTI - Riječi u polju naslova |
43 |
# WAU - Riječi u polju autora |
44 |
# WPE - Riječi u polju individualnog autora |
45 |
# WCO - Riječi u polju korporativnog autora |
46 |
# WME - Riječi u polju sastanka |
47 |
# WUT - Riječi u polju jedinstvenog naslova |
48 |
# WPL - Riječi u polju mjesta izdavanja |
49 |
# WPU - Riječi u polju nakladnika |
50 |
# WSU - Riječi u polju predmetnica |
51 |
# WSM - Riječi u predmetnicama MeSH-a |
52 |
# WST - Riječi u polju status |
53 |
# WGA - Riječi u geografskim odrednicama |
54 |
# WYR - Godina izdavanja |
55 |
|
56 |
our $usemap = { |
57 |
# 8 => '', |
58 |
# 7 => '', |
59 |
4 => 'WTI', |
60 |
1003 => 'WTI', |
61 |
16 => 'CU', |
62 |
21 => 'SU', |
63 |
# 12 => '', |
64 |
# 1007 => '', |
65 |
# 1016 => '', |
66 |
|
67 |
}; |
68 |
|
69 |
sub usemap { |
70 |
my $f = shift || die; |
71 |
$usemap->{$f}; |
72 |
} |
73 |
|
74 |
sub search { |
75 |
my ( $self, $query ) = @_; |
76 |
|
77 |
die "need query" unless defined $query; |
78 |
|
79 |
my $url = 'http://161.53.240.197:8991/F?RN=' . rand(1000000000); |
80 |
# fake JavaScript code on page which creates random session |
81 |
|
82 |
diag "get $url"; |
83 |
|
84 |
$mech->get( $url ); |
85 |
|
86 |
diag "advanced search"; |
87 |
|
88 |
$mech->follow_link( url_regex => qr/find-c/ ); |
89 |
|
90 |
diag "submit search $query"; |
91 |
|
92 |
$mech->submit_form( |
93 |
fields => { |
94 |
'ccl_term' => $query, |
95 |
}, |
96 |
); |
97 |
|
98 |
$hits = 0; |
99 |
if ( $mech->content =~ m{ukupno\s+(\d+).*(do\s+(\d+))}s ) { |
100 |
$hits = $1; |
101 |
$hits = $2 if $2 && $2 < $1; # correct for max. results |
102 |
} else { |
103 |
diag "get't find results in ", $mech->content; |
104 |
return; |
105 |
} |
106 |
|
107 |
diag "got $hits results, get first one"; |
108 |
|
109 |
$mech->follow_link( url_regex => qr/set_entry=000001/ ); |
110 |
|
111 |
diag "in MARC format"; |
112 |
|
113 |
$mech->follow_link( url_regex => qr/format=001/ ); |
114 |
} |
115 |
|
116 |
|
117 |
sub next_marc { |
118 |
my ($self,$format) = @_; |
119 |
|
120 |
print $mech->content; |
121 |
|
122 |
if ( $mech->content =~ m{Zapis\s+(\d+)}s ) { |
123 |
|
124 |
my $nr = $1; |
125 |
|
126 |
diag "parse $nr"; |
127 |
|
128 |
my $html = $mech->content; |
129 |
my $hash; |
130 |
$html =~ s|<tr>\s*<td class=td1 id=bold[^>]*>(.+?)</td>\s*<td class=td1>(.+?)</td>|$hash->{$1} = "$2";|ges; |
131 |
diag dump($hash); |
132 |
|
133 |
my $id = $hash->{SYS} || die "no SYS"; |
134 |
|
135 |
die; |
136 |
|
137 |
my $marc = MARC::Record->new; |
138 |
|
139 |
# $marc->add_fields( $f, $i1, $i2, @{ $out->{$f} } ); |
140 |
|
141 |
my $path = "marc/$id.$format"; |
142 |
|
143 |
open(my $out, '>:utf8', $path); |
144 |
print $out $marc->as_usmarc; |
145 |
close($out); |
146 |
|
147 |
diag "created $path ", -s $path, " bytes"; |
148 |
|
149 |
diag $marc->as_formatted; |
150 |
|
151 |
$nr++; |
152 |
|
153 |
die if $nr == 3; # FIXME |
154 |
|
155 |
$mech->follow_link( url_regex => qr/set_entry=0*$nr/ ); |
156 |
|
157 |
return $marc->as_usmarc; |
158 |
} else { |
159 |
die "can't fetch COMARC format from ", $mech->content; |
160 |
} |
161 |
|
162 |
} |
163 |
|
164 |
1; |