/[webpac]/branches/cpi/feeds/sciencedirect2.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /branches/cpi/feeds/sciencedirect2.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 331 by dpavlin, Tue May 18 17:41:14 2004 UTC revision 339 by dpavlin, Thu Jun 10 22:07:06 2004 UTC
# Line 14  use strict; Line 14  use strict;
14    
15  my $debug=1;  my $debug=1;
16    
17    my $file;
18    
19    # uncomment following line if you want to use file instead of http connection
20    #$file="list.html";
21    
22  # configure ScienceDirect CVS files location  # configure ScienceDirect CVS files location
23  my $csv_dir="/data/isis_data/sciencedirect";  my $csv_dir="/data/isis_data/sciencedirect";
24  my $j_holdings="sd_JournalHoldingsRpt.txt";  my $j_holdings="sd_JournalHoldingsRpt.txt";
# Line 81  print STDERR "$c_nr categories assigned, Line 86  print STDERR "$c_nr categories assigned,
86    
87  $debug++ if (lc($ARGV[0]) eq "-d");  $debug++ if (lc($ARGV[0]) eq "-d");
88    
 my $ua = new LWP::UserAgent;  
 $ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");  
 $ua->timeout(60);  
 #$ua->env_proxy();  
 #$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');  
   
 print STDERR "getting '$url'...\n" if ($debug);  
 my $req = HTTP::Request->new(GET => $url);  
89    
90  my @out;  my $res;
91    if (! $file) {
92            my $ua = new LWP::UserAgent;
93            $ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
94            $ua->timeout(60);
95            #$ua->env_proxy();
96            #$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
97    
98            print STDERR "getting '$url'...\n" if ($debug);
99            my $req = HTTP::Request->new(GET => $url);
100    
101            $res = $ua->request($req);
102    } elsif (! -e $file) {
103            die "can't find feed file '$file'";
104    }
105    
106  my $res = $ua->request($req);  if ($file || $res->is_success) {
 if ($res->is_success) {  
107          print STDERR "parsing html...\n" if ($debug);          print STDERR "parsing html...\n" if ($debug);
108          my $tree = HTML::TreeBuilder->new;          my $tree = HTML::TreeBuilder->new;
109  #       $tree->parse_file("list.html");   # !          if ($file) {
110          $tree->parse($res->content);                  $tree->parse_file("list.html");
111            } else {
112                    $tree->parse($res->content);
113            }
114    
115          foreach my $tr ($tree->look_down('_tag', 'tr')) {          foreach my $tr ($tree->look_down('_tag', 'tr')) {
116                  my $link;                  my $link;
117                  if ($link = $tr->look_down('_tag','a')) {                  foreach my $link ($tr->look_down('_tag','a')) {
118                          if ($link->attr('href') =~ m{/science\?_ob=JournalURL}) {                          if ($link->attr('href') =~ m{/science\?_ob=JournalURL}) {
119                                  my $j=nuc($link->as_text);                                  my $j=nuc($link->as_text);
120                                  if ($journal->{$j}) {                                  if ($journal->{$j}) {

Legend:
Removed from v.331  
changed lines
  Added in v.339

  ViewVC Help
Powered by ViewVC 1.1.26