/[webpac-proto]/casopisi/ebsco.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /casopisi/ebsco.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations)
Sat Oct 26 20:10:28 2002 UTC (21 years, 6 months ago) by dpavlin
Branch: MAIN
File MIME type: text/plain
getch EBSCO data over http and create stream

1 dpavlin 1.1 #!/usr/bin/perl -w
2    
3     use LWP::UserAgent;
4     use HTML::TreeBuilder;
5     use URI::Escape;
6     use strict;
7     require "../common.pm";
8    
9     my $debug=1;
10    
11     my $dir = open_data_files("ebsco");
12     my $last_tell=0;
13    
14     print MPS "M working...\n";
15    
16     my %dbs = (
17     'http://www.epnet.com/TitleLists/html/ap_ft_h1.htm' =>
18     'Academic Search Premier',
19     'http://www.epnet.com/TitleLists/html/bu_ft_h1.htm' =>
20     'Business Source Premier',
21     'http://www.epnet.com/TitleLists/html/hc_h1.htm' =>
22     'Health Source: Nursing/Academic Edition',
23     'http://www.epnet.com/images/html.gif' =>
24     'MasterFILE Premier',
25     'http://www.epnet.com/TitleLists/html/bw_h1.htm' =>
26     'Regional Business News
27     )
28    
29     my $url = 'http://www.epnet.com/TitleLists/html/ap_ft_h1.htm';
30     my $base_url = 'http://web13.epnet.com/HJAFDetail.asp?tb=1&_ug=dbs+0+ln+en%2Dus+sid+956028C9%2DC274%2D45E9%2DB9C3%2DE2EEDB566264%40Sessionmgr6+46E0&_uh=btn+Y+fst+Zoologica++Scripta+idb+aphish+jdb+aphjnh+lst+Zygon%3A++Journal++of++Religion++%26++Science+md+B+op+scan+shn+1+B7D1&_us=db+0+dstb+ES+fh+0+hd+0+hs+0+or+Date+sm+ES+ss+SO+BC5A&vw=D&rn=&st=';
31    
32     $debug++ if (lc($ARGV[0]) eq "-d");
33    
34     sub print_debug {
35     return if (! $debug);
36     open(DEBUG,">> debug") || warn "can't open debug file!";
37     print DEBUG "###",@_;
38     print @_;
39     close(DEBUG);
40     }
41    
42     print_debug("debug level $debug");
43    
44     my $ua = new LWP::UserAgent;
45     $ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
46     $ua->timeout(60);
47     #$ua->env_proxy();
48     #$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
49    
50     my $req = HTTP::Request->new(GET => $url);
51    
52     my $tree = HTML::TreeBuilder->new;
53     my $res = $ua->request($req);
54     if ($res->is_success) {
55     $tree->parse($res->content);
56     # print_debug($res->content);
57    
58     #if (1) {
59     # $tree->parse_file("list.html"); # !
60    
61     print "parse...\n";
62    
63     foreach my $tr ($tree->look_down('_tag', 'tr')) {
64     my $link;
65     if (1) { # oh, i'm lazy to fix indent...
66     my @arr;
67     foreach ($tr->look_down('_tag','td')) {
68     push @arr,$_->as_text;
69     }
70     my ($issn,$title,$publisher) = @arr;
71     print "#### $issn # $title # $publisher\n";
72     print "##",join("|",@arr),"\n";
73     if ($issn =~ m/^\d{4}\-\d{3}[\dX]/) {
74     my $bib = "";
75     my $mps = "";
76    
77     $bib .= "%tip Èasopis\n";
78     $bib .= "%tip on-line\n";
79     # url
80     $bib .= "%856 $base_url".uri_escape($title)."\n";
81    
82     # naslov
83     $bib .= "%200+ $title\n";
84     $mps .= mps_expand(2,$title);
85    
86     # izdavanje
87     $bib .= "%210+ $publisher\n";
88     $mps .= mps_expand(3,$publisher);
89    
90     # issn
91     $bib .= "%ISSN $issn\n";
92     $mps .= mps_expand(2,$issn);
93    
94     # tip
95     $mps .= mps_expand(17,"on-line");
96    
97     $mps .= "H $title <i>(on-line, EBSCO Academic Search Premier)</i>\n";
98    
99     print R $bib."\n";
100     $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
101     $last_tell=tell(R);
102    
103     print R "\n";
104    
105     $mps .= "E\n";
106    
107     print S $mps;
108     print MPS $mps;
109     }
110     }
111     }
112    
113     $tree->delete; # clear memory!
114    
115     } else {
116     warn "can't fetch web page from '$url'";
117     }
118    
119     print S "M over and out\nX\n";
120     print MPS "M over and out\nX\n";
121     close(MPS);

  ViewVC Help
Powered by ViewVC 1.1.26