drustvene/feeds/sciencedirect.pl

#!/usr/bin/perl -w

# This script will fatch list of articles on which you have access
# (using IP authorisation) from ScienceDirect

use LWP::UserAgent;
use HTML::TreeBuilder;
use strict;

my $debug=1;

my $base_url = 'http://www.sciencedirect.com';
my $url = $base_url . '/science?_ob=JournalListURL&_type=subscribed&_stype=title&subjColl=all&_auth=y&_update=y&_frameSeg=M&_title=all&_acct=C000050661&_version=1&_urlVersion=0&_userid=1034703&md5=6d4b6e263318a1d7d2a3b523d861f920';

$debug++ if (lc($ARGV[0]) eq "-d");

my $ua = new LWP::UserAgent;
$ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
$ua->timeout(60);
#$ua->env_proxy();
#$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');

print STDERR "getting '$url'...\n" if ($debug);
my $req = HTTP::Request->new(GET => $url);

my @out;

my $res = $ua->request($req);
if ($res->is_success) {
        print STDERR "parsing html...\n" if ($debug);
        my $tree = HTML::TreeBuilder->new;
#       $tree->parse_file("list.html");   # !
        $tree->parse($res->content);

        foreach my $tr ($tree->look_down('_tag', 'tr')) {
                my $link;
                if ($link = $tr->look_down('_tag','a')) {
                        if ($link->attr('href') =~ m{/science\?_ob=JournalURL}) {
                                print "0: ",$link->as_text."\n";
                                print "7: ",$base_url.$link->attr('href')."\n";
                                print "\n";
                        }
                }
        }

        $tree->delete; # clear memory!

} else {
    warn "can't fetch web page from '$url'";
}
1	#!/usr/bin/perl -w
2
3	# This script will fatch list of articles on which you have access
4	# (using IP authorisation) from ScienceDirect
5
6	use LWP::UserAgent;
7	use HTML::TreeBuilder;
8	use strict;
9
10	my $debug=1;
11
12	my $base_url = 'http://www.sciencedirect.com';
13	my $url = $base_url . '/science?_ob=JournalListURL&_type=subscribed&_stype=title&subjColl=all&_auth=y&_update=y&_frameSeg=M&_title=all&_acct=C000050661&_version=1&_urlVersion=0&_userid=1034703&md5=6d4b6e263318a1d7d2a3b523d861f920';
14
15	$debug++ if (lc($ARGV[0]) eq "-d");
16
17	my $ua = new LWP::UserAgent;
18	$ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
19	$ua->timeout(60);
20	#$ua->env_proxy();
21	#$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
22
23	print STDERR "getting '$url'...\n" if ($debug);
24	my $req = HTTP::Request->new(GET => $url);
25
26	my @out;
27
28	my $res = $ua->request($req);
29	if ($res->is_success) {
30	print STDERR "parsing html...\n" if ($debug);
31	my $tree = HTML::TreeBuilder->new;
32	# $tree->parse_file("list.html"); # !
33	$tree->parse($res->content);
34
35	foreach my $tr ($tree->look_down('_tag', 'tr')) {
36	my $link;
37	if ($link = $tr->look_down('_tag','a')) {
38	if ($link->attr('href') =~ m{/science\?_ob=JournalURL}) {
39	print "0: ",$link->as_text."\n";
40	print "7: ",$base_url.$link->attr('href')."\n";
41	print "\n";
42	}
43	}
44	}
45
46	$tree->delete; # clear memory!
47
48	} else {
49	warn "can't fetch web page from '$url'";
50	}