/[Z3950-HTML-Scraper]/COBISS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /COBISS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Fri Jun 19 17:50:34 2009 UTC (14 years, 10 months ago) by dpavlin
File size: 1000 byte(s)
scrape COBISS

1 dpavlin 1 package COBISS;
2    
3     use warnings;
4     use strict;
5    
6     use WWW::Mechanize;
7    
8     sub search {
9    
10     my $url = 'http://cobiss.izum.si/scripts/cobiss?ukaz=GETID&lani=en';
11    
12     warn "# get $url\n";
13    
14     my $mech = WWW::Mechanize->new();
15     $mech->get( $url );
16    
17     warn "# got session\n";
18    
19     $mech->follow_link( text_regex => qr/union/ );
20    
21     warn "# submit search\n";
22    
23     $mech->submit_form(
24     fields => {
25     'SS1' => 'Krleza',
26     },
27     );
28    
29     my $hits = 1;
30     if ( $mech->content =~ m{hits:\s*<b>\s*(\d+)\s*</b>}s ) {
31     $hits = $1;
32     } else {
33     warn "get't find results in ", $mech->content;
34     }
35    
36     warn "# got $hits results, get first one\n";
37    
38     $mech->follow_link( url_regex => qr/ukaz=DISP/ );
39    
40     warn "# in COMARC format\n";
41    
42     $mech->follow_link( url_regex => qr/fmt=13/ );
43    
44     my $comarc;
45    
46     if ( $mech->content =~ m{<pre>\s*(.+1\..+?)\s*</pre>}s ) {
47     my $comarc = $1;
48     $comarc =~ s{</?b>}{}gs;
49     $comarc =~ s{<(/?font)[^>]*>}{<sf>}gs;
50    
51     print $comarc;
52    
53     return $comarc;
54     } else {
55     die "can't fetch COMARC format from ", $mech->content;
56     }
57    
58     }
59    
60     1;

  ViewVC Help
Powered by ViewVC 1.1.26