/[webpac-proto]/casopisi/sciencedirect.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /casopisi/sciencedirect.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (show annotations)
Sun Oct 27 21:20:39 2002 UTC (16 years, 9 months ago) by dpavlin
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +2 -0 lines
File MIME type: text/plain
added debug

1 #!/usr/bin/perl -w
2
3 use LWP::UserAgent;
4 use HTML::TreeBuilder;
5 use strict;
6 require "../common.pm";
7
8 my $debug=1;
9
10 my $dir = open_data_files("sciencedirect");
11 my $last_tell=0;
12
13 print MPS "M working...\n";
14
15 my $base_url = 'http://www.sciencedirect.com';
16 my $url = $base_url . '/science?_ob=JournalListURL&_type=subscribed&_stype=title&subjColl=all&_auth=y&_update=y&_frameSeg=M&_title=all&_acct=C000050661&_version=1&_urlVersion=0&_userid=1034703&md5=6d4b6e263318a1d7d2a3b523d861f920';
17
18 $debug++ if (lc($ARGV[0]) eq "-d");
19
20 sub print_debug {
21 return if (! $debug);
22 open(DEBUG,">> debug") || warn "can't open debug file!";
23 print DEBUG "###",@_;
24 print @_;
25 close(DEBUG);
26 }
27
28 print_debug("debug level $debug");
29
30 my $ua = new LWP::UserAgent;
31 $ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
32 $ua->timeout(60);
33 #$ua->env_proxy();
34 #$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
35
36 print "getting '$url'...\n";
37 my $req = HTTP::Request->new(GET => $url);
38
39 my $res = $ua->request($req);
40 if ($res->is_success) {
41 print "parsing html...\n";
42 my $tree = HTML::TreeBuilder->new;
43 # $tree->parse_file("list.html"); # !
44 $tree->parse($res->content);
45
46 foreach my $tr ($tree->look_down('_tag', 'tr')) {
47 my $link;
48 if ($link = $tr->look_down('_tag','a')) {
49 if ($link->attr('href') =~ m{/science\?_ob=JournalURL}) {
50 my $bib = "";
51 my $mps = "";
52
53 $bib .= "%tip ├łasopis\n";
54 $bib .= "%tip on-line\n";
55 # url
56 $bib .= "%856 $base_url".$link->attr('href')."\n";
57 # naslov
58 $bib .= "%200+ ".$link->as_text."\n";
59 $mps .= mps_expand(2,$link->as_text);
60
61 # tip
62 $mps .= mps_expand(17,"on-line casopis");
63
64 $mps .= "H ".$link->as_text." <i>(on-line, ScrienceDirect)</i>\n";
65
66 print R $bib."\n";
67 $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
68 $last_tell=tell(R);
69
70 print R "\n";
71
72 $mps .= "E\n";
73
74 print S $mps;
75 print MPS $mps;
76 }
77 }
78 }
79
80 $tree->delete; # clear memory!
81
82 } else {
83 warn "can't fetch web page from '$url'";
84 }
85
86 print S "M over and out\nX\n";
87 print MPS "M over and out\nX\n";
88 close(MPS);

  ViewVC Help
Powered by ViewVC 1.1.26