/[webpac-proto]/casopisi/ebsco.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /casopisi/ebsco.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations)
Sat Oct 26 20:44:30 2002 UTC (16 years, 7 months ago) by dpavlin
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +0 -13 lines
File MIME type: text/plain
never used

1 #!/usr/bin/perl -w
2
3 use LWP::UserAgent;
4 use HTML::TreeBuilder;
5 use URI::Escape;
6 use strict;
7 require "../common.pm";
8
9 my $debug=1;
10
11 my $dir = open_data_files("ebsco");
12 my $last_tell=0;
13
14 print MPS "M working...\n";
15
16 my $url = 'http://www.epnet.com/TitleLists/html/ap_ft_h1.htm';
17 my $base_url = 'http://web13.epnet.com/HJAFDetail.asp?tb=1&_ug=dbs+0+ln+en%2Dus+sid+956028C9%2DC274%2D45E9%2DB9C3%2DE2EEDB566264%40Sessionmgr6+46E0&_uh=btn+Y+fst+Zoologica++Scripta+idb+aphish+jdb+aphjnh+lst+Zygon%3A++Journal++of++Religion++%26++Science+md+B+op+scan+shn+1+B7D1&_us=db+0+dstb+ES+fh+0+hd+0+hs+0+or+Date+sm+ES+ss+SO+BC5A&vw=D&rn=&st=';
18
19 $debug++ if (lc($ARGV[0]) eq "-d");
20
21 sub print_debug {
22 return if (! $debug);
23 open(DEBUG,">> debug") || warn "can't open debug file!";
24 print DEBUG "###",@_;
25 print @_;
26 close(DEBUG);
27 }
28
29 print_debug("debug level $debug");
30
31 my $ua = new LWP::UserAgent;
32 $ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0");
33 $ua->timeout(60);
34 #$ua->env_proxy();
35 #$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/');
36
37 my $req = HTTP::Request->new(GET => $url);
38
39 my $tree = HTML::TreeBuilder->new;
40 my $res = $ua->request($req);
41 if ($res->is_success) {
42 $tree->parse($res->content);
43 # print_debug($res->content);
44
45 #if (1) {
46 # $tree->parse_file("list.html"); # !
47
48 print "parse...\n";
49
50 foreach my $tr ($tree->look_down('_tag', 'tr')) {
51 my $link;
52 if (1) { # oh, i'm lazy to fix indent...
53 my @arr;
54 foreach ($tr->look_down('_tag','td')) {
55 push @arr,$_->as_text;
56 }
57 my ($issn,$title,$publisher) = @arr;
58 print "#### $issn # $title # $publisher\n";
59 print "##",join("|",@arr),"\n";
60 if ($issn =~ m/^\d{4}\-\d{3}[\dX]/) {
61 my $bib = "";
62 my $mps = "";
63
64 $bib .= "%tip ├łasopis\n";
65 $bib .= "%tip on-line\n";
66 # url
67 $bib .= "%856 $base_url".uri_escape($title)."\n";
68
69 # naslov
70 $bib .= "%200+ $title\n";
71 $mps .= mps_expand(2,$title);
72
73 # izdavanje
74 $bib .= "%210+ $publisher\n";
75 $mps .= mps_expand(3,$publisher);
76
77 # issn
78 $bib .= "%ISSN $issn\n";
79 $mps .= mps_expand(2,$issn);
80
81 # tip
82 $mps .= mps_expand(17,"on-line");
83
84 $mps .= "H $title <i>(on-line, EBSCO Academic Search Premier)</i>\n";
85
86 print R $bib."\n";
87 $mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
88 $last_tell=tell(R);
89
90 print R "\n";
91
92 $mps .= "E\n";
93
94 print S $mps;
95 print MPS $mps;
96 }
97 }
98 }
99
100 $tree->delete; # clear memory!
101
102 } else {
103 warn "can't fetch web page from '$url'";
104 }
105
106 print S "M over and out\nX\n";
107 print MPS "M over and out\nX\n";
108 close(MPS);

  ViewVC Help
Powered by ViewVC 1.1.26