/[webpac-proto]/casopisi/sciencedirect.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /casopisi/sciencedirect.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations)
Fri Oct 25 11:20:56 2002 UTC (21 years, 5 months ago) by dpavlin
Branch: MAIN
Changes since 1.3: +6 -8 lines
File MIME type: text/plain
fetch from on-line site

1 dpavlin 1.1 #!/usr/bin/perl -w
2    
3     use LWP::UserAgent;
4     use HTML::TreeBuilder;
5     use strict;
6     require "../common.pm";
7    
8     my $debug=1;
9    
10     my $dir = open_data_files("sciencedirect");
11     my $last_tell=0;
12    
13     print MPS "M working...\n";
14    
15 dpavlin 1.2 my $base_url = 'http://www.sciencedirect.com';
16 dpavlin 1.1 my $url = $base_url . '/science?_ob=JournalListURL&_type=subscribed&_stype=title&subjColl=all&_auth=y&_update=y&_frameSeg=M&_title=all&_acct=C000050661&_version=1&_urlVersion=0&_userid=1034703&md5=6d4b6e263318a1d7d2a3b523d861f920';
17    
18     $debug++ if (lc($ARGV[0]) eq "-d");
19    
20     sub print_debug {
21     return if (! $debug);
22     open(DEBUG,">> debug") || warn "can't open debug file!";
23     print DEBUG "###",@_;
24     print @_;
25     close(DEBUG);
26     }
27    
28     print_debug("debug level $debug");
29    
30     my $ua = new LWP::UserAgent;
31     $ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
32     $ua->timeout(60);
33     #$ua->env_proxy();
34     #$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
35    
36 dpavlin 1.4 my $req = HTTP::Request->new(GET => $url);
37 dpavlin 1.1
38 dpavlin 1.4 my $res = $ua->request($req);
39     if ($res->is_success) {
40 dpavlin 1.1 my $tree = HTML::TreeBuilder->new;
41 dpavlin 1.4 # $tree->parse_file("list.html"); # !
42     $tree->parse($res->content);
43 dpavlin 1.1
44     foreach my $tr ($tree->look_down('_tag', 'tr')) {
45     my $link;
46     if ($link = $tr->look_down('_tag','a')) {
47     if ($link->attr('href') =~ m{/science\?_ob=JournalURL}) {
48     my $bib = "";
49     my $mps = "";
50    
51     $bib .= "%tip Èasopis\n";
52 dpavlin 1.2 $bib .= "%tip on-line\n";
53 dpavlin 1.1 # url
54     $bib .= "%856 $base_url".$link->attr('href')."\n";
55     # naslov
56 dpavlin 1.2 $bib .= "%200+ ".$link->as_text."\n";
57 dpavlin 1.1 $mps .= mps_expand(2,$link->as_text);
58    
59 dpavlin 1.3 # tip
60     $mps .= mps_expand(17,"on-line");
61    
62 dpavlin 1.4 $mps .= "H ".$link->as_text." <i>(on-line, ScrienceDirect)</i>\n";
63 dpavlin 1.1
64 dpavlin 1.2 print R $bib."\n";
65 dpavlin 1.1 $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
66     $last_tell=tell(R);
67    
68 dpavlin 1.2 print R "\n";
69 dpavlin 1.1
70     $mps .= "E\n";
71    
72     print S $mps;
73     print MPS $mps;
74     }
75     }
76     }
77    
78     $tree->delete; # clear memory!
79    
80     } else {
81     warn "can't fetch web page from '$url'";
82     }
83    
84     print S "M over and out\nX\n";
85     print MPS "M over and out\nX\n";
86     close(MPS);

  ViewVC Help
Powered by ViewVC 1.1.26