/[webpac]/trunk/feeds/sciencedirect.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/feeds/sciencedirect.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 74 - (hide annotations)
Sat Jul 5 22:37:30 2003 UTC (20 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 1412 byte(s)
support for new feed format which have decimal number of field, semicolumn
and space at beginning of each line (like: 0: data)

1 dpavlin 67 #!/usr/bin/perl -w
2    
3     # This script will fatch list of articles on which you have access
4     # (using IP authorisation) from ScienceDirect
5    
6     use LWP::UserAgent;
7     use HTML::TreeBuilder;
8     use strict;
9    
10     my $debug=1;
11    
12     my $base_url = 'http://www.sciencedirect.com';
13     my $url = $base_url . '/science?_ob=JournalListURL&_type=subscribed&_stype=title&subjColl=all&_auth=y&_update=y&_frameSeg=M&_title=all&_acct=C000050661&_version=1&_urlVersion=0&_userid=1034703&md5=6d4b6e263318a1d7d2a3b523d861f920';
14    
15     $debug++ if (lc($ARGV[0]) eq "-d");
16    
17     my $ua = new LWP::UserAgent;
18     $ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
19     $ua->timeout(60);
20     #$ua->env_proxy();
21     #$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
22    
23     print STDERR "getting '$url'...\n" if ($debug);
24     my $req = HTTP::Request->new(GET => $url);
25    
26     my @out;
27    
28     my $res = $ua->request($req);
29     if ($res->is_success) {
30     print STDERR "parsing html...\n" if ($debug);
31     my $tree = HTML::TreeBuilder->new;
32     # $tree->parse_file("list.html"); # !
33     $tree->parse($res->content);
34    
35     foreach my $tr ($tree->look_down('_tag', 'tr')) {
36     my $link;
37     if ($link = $tr->look_down('_tag','a')) {
38     if ($link->attr('href') =~ m{/science\?_ob=JournalURL}) {
39 dpavlin 74 print "0: ",$link->as_text."\n";
40     print "7: ",$base_url.$link->attr('href')."\n";
41 dpavlin 67 print "\n";
42     }
43     }
44     }
45    
46     $tree->delete; # clear memory!
47    
48     } else {
49     warn "can't fetch web page from '$url'";
50     }

Properties

Name Value
cvs2svn:cvs-rev 1.2
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26