/[Z3950-HTML-Scraper]/COBISS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /COBISS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Fri Jun 19 17:50:34 2009 UTC (14 years, 10 months ago) by dpavlin
File size: 1000 byte(s)
scrape COBISS

1 package COBISS;
2
3 use warnings;
4 use strict;
5
6 use WWW::Mechanize;
7
8 sub search {
9
10 my $url = 'http://cobiss.izum.si/scripts/cobiss?ukaz=GETID&lani=en';
11
12 warn "# get $url\n";
13
14 my $mech = WWW::Mechanize->new();
15 $mech->get( $url );
16
17 warn "# got session\n";
18
19 $mech->follow_link( text_regex => qr/union/ );
20
21 warn "# submit search\n";
22
23 $mech->submit_form(
24 fields => {
25 'SS1' => 'Krleza',
26 },
27 );
28
29 my $hits = 1;
30 if ( $mech->content =~ m{hits:\s*<b>\s*(\d+)\s*</b>}s ) {
31 $hits = $1;
32 } else {
33 warn "get't find results in ", $mech->content;
34 }
35
36 warn "# got $hits results, get first one\n";
37
38 $mech->follow_link( url_regex => qr/ukaz=DISP/ );
39
40 warn "# in COMARC format\n";
41
42 $mech->follow_link( url_regex => qr/fmt=13/ );
43
44 my $comarc;
45
46 if ( $mech->content =~ m{<pre>\s*(.+1\..+?)\s*</pre>}s ) {
47 my $comarc = $1;
48 $comarc =~ s{</?b>}{}gs;
49 $comarc =~ s{<(/?font)[^>]*>}{<sf>}gs;
50
51 print $comarc;
52
53 return $comarc;
54 } else {
55 die "can't fetch COMARC format from ", $mech->content;
56 }
57
58 }
59
60 1;

  ViewVC Help
Powered by ViewVC 1.1.26