fsb/feeds/sciencedirect.pl

#!/usr/bin/perl -w

# This script will fatch list of articles on which you have access
# (using IP authorisation) from ScienceDirect

use LWP::UserAgent;
use HTML::TreeBuilder;
use strict;

my $debug=1;

my $base_url = 'http://www.sciencedirect.com';
my $url = $base_url . '/science?_ob=JournalListURL&_type=subscribed&_stype=title&subjColl=all&_auth=y&_update=y&_frameSeg=M&_title=all&_acct=C000050661&_version=1&_urlVersion=0&_userid=1034703&md5=6d4b6e263318a1d7d2a3b523d861f920';

$debug++ if (lc($ARGV[0]) eq "-d");

my $ua = new LWP::UserAgent;
$ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
$ua->timeout(60);
#$ua->env_proxy();
#$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');

print STDERR "getting '$url'...\n" if ($debug);
my $req = HTTP::Request->new(GET => $url);

my @out;

my $res = $ua->request($req);
if ($res->is_success) {
        print STDERR "parsing html...\n" if ($debug);
        my $tree = HTML::TreeBuilder->new;
#       $tree->parse_file("list.html");   # !
        $tree->parse($res->content);

        foreach my $tr ($tree->look_down('_tag', 'tr')) {
                my $link;
                if ($link = $tr->look_down('_tag','a')) {
                        if ($link->attr('href') =~ m{/science\?_ob=JournalURL}) {
                                print $base_url.$link->attr('href')."\n";
                                print $link->as_text."\n";
                                print "\n";
                        }
                }
        }

        $tree->delete; # clear memory!

} else {
    warn "can't fetch web page from '$url'";
}
1	dpavlin	67	#!/usr/bin/perl -w
2
3			# This script will fatch list of articles on which you have access
4			# (using IP authorisation) from ScienceDirect
5
6			use LWP::UserAgent;
7			use HTML::TreeBuilder;
8			use strict;
9
10			my $debug=1;
11
12			my $base_url = 'http://www.sciencedirect.com';
13			my $url = $base_url . '/science?_ob=JournalListURL&_type=subscribed&_stype=title&subjColl=all&_auth=y&_update=y&_frameSeg=M&_title=all&_acct=C000050661&_version=1&_urlVersion=0&_userid=1034703&md5=6d4b6e263318a1d7d2a3b523d861f920';
14
15			$debug++ if (lc($ARGV[0]) eq "-d");
16
17			my $ua = new LWP::UserAgent;
18			$ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
19			$ua->timeout(60);
20			#$ua->env_proxy();
21			#$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
22
23			print STDERR "getting '$url'...\n" if ($debug);
24			my $req = HTTP::Request->new(GET => $url);
25
26			my @out;
27
28			my $res = $ua->request($req);
29			if ($res->is_success) {
30			print STDERR "parsing html...\n" if ($debug);
31			my $tree = HTML::TreeBuilder->new;
32			# $tree->parse_file("list.html"); # !
33			$tree->parse($res->content);
34
35			foreach my $tr ($tree->look_down('_tag', 'tr')) {
36			my $link;
37			if ($link = $tr->look_down('_tag','a')) {
38			if ($link->attr('href') =~ m{/science\?_ob=JournalURL}) {
39			print $base_url.$link->attr('href')."\n";
40			print $link->as_text."\n";
41			print "\n";
42			}
43			}
44			}
45
46			$tree->delete; # clear memory!
47
48			} else {
49			warn "can't fetch web page from '$url'";
50			}