1 |
#!/usr/local/bin/perl -w |
2 |
|
3 |
use LWP::UserAgent; |
4 |
use strict; |
5 |
use DBI; |
6 |
use HTML::TreeBuilder; |
7 |
|
8 |
my $debug=0; |
9 |
|
10 |
my $url = 'http://custom.marketwatch.com/custom/alliance/ftmw/invrel.asp?siteid=!plivadd-0773-4F6D-DHID-QN1112NFTD0X&symb=PLVD'; |
11 |
|
12 |
$debug++ if (lc($ARGV[0]) eq "-d"); |
13 |
|
14 |
my %val; |
15 |
|
16 |
sub print_debug { |
17 |
return if (! $debug); |
18 |
open(DEBUG,">> debug") || warn "can't open debug file!"; |
19 |
print DEBUG "###",@_,"\n"; |
20 |
print @_,"\n"; |
21 |
close(DEBUG); |
22 |
} |
23 |
|
24 |
sub print_val { |
25 |
foreach (keys %val) { |
26 |
print "$_: $val{$_}\n"; |
27 |
} |
28 |
} |
29 |
|
30 |
print_debug("debug level $debug"); |
31 |
|
32 |
my $dbh = DBI->connect("DBI:Pg:dbname=corp","","") || die $DBI::errstr; |
33 |
|
34 |
my $ua = new LWP::UserAgent; |
35 |
$ua->agent("pliva_harvester 0.0"); |
36 |
$ua->timeout(60); |
37 |
$ua->env_proxy(); |
38 |
$ua->proxy(['http', 'ftp'], 'http://proxy.pliva.hr:8080/'); |
39 |
|
40 |
|
41 |
my $close_time='21:21:21 CET'; |
42 |
|
43 |
sub insert { |
44 |
# fix UNCH to 0 |
45 |
foreach (keys %val) { |
46 |
$val{$_} =~ s/^\s+//; |
47 |
$val{$_} =~ s/\s+$//; |
48 |
$val{$_} = 0 if ($val{$_} eq "UNCH"); |
49 |
} |
50 |
|
51 |
my $sth = $dbh->prepare("select count(date) from stocks where date='".$val{date}."'"); |
52 |
$sth->execute(); |
53 |
my ($nr) = $sth->fetchrow_array; |
54 |
if ($nr == 0) { |
55 |
my $sql="insert into stocks values ('$val{date}','LSE',$val{curr},$val{change},$val{high},$val{low},$val{open})"; |
56 |
$sql=~s/,\+(\d)/,$1/g; # nuke + which pgsql doesn't like |
57 |
if ($sql =~ m,n/a,i) { |
58 |
print_debug("undefined values found. not inserting in db [$sql]"); |
59 |
#print_val(); |
60 |
} else { |
61 |
$dbh->do("$sql") || die "$sql\n".$DBI::errstr; |
62 |
} |
63 |
print_debug("sql: $sql\n"); |
64 |
} else { |
65 |
print_debug("skip: $val{date}\n"); |
66 |
} |
67 |
} |
68 |
|
69 |
my $req = HTTP::Request->new(GET => $url); |
70 |
my $tree = HTML::TreeBuilder->new; |
71 |
|
72 |
my $res = $ua->request($req); |
73 |
if ($res->is_success) { |
74 |
print_debug("html: ".$res->content."\n"); |
75 |
$tree->parse($res->content) || die "can't parse html file!"; |
76 |
|
77 |
# $tree->parse_file("out-without_proxy") || die "can't parse html file!"; |
78 |
|
79 |
# find date |
80 |
my $t = $tree; |
81 |
# $t = $tree->look_down('_tag', 'td'); |
82 |
# print "##td: ",$t->as_text,"\n"; |
83 |
if ($t = $tree->look_down('class', 'ft-quotedate')) { |
84 |
if ($t->as_text =~ m,(\d+):(\d+) (\d+)/(\d+)/(\d+),) { |
85 |
my ($h,$m,$dd,$mm,$yy) = ($1,$2,$3,$4,$5); |
86 |
my $date=($yy+2000)."-$mm-$dd"; |
87 |
my (undef,undef,$local_h) = localtime(time); |
88 |
$h += 12 if ($local_h - $h > 12); |
89 |
my $time="$h:$m"; |
90 |
print_debug("time: $time date: $date"); |
91 |
$val{date}="$date $time"; |
92 |
} else { |
93 |
die "can't recognise date format ".$t->as_text; |
94 |
} |
95 |
} else { |
96 |
die "can't find date in html"; |
97 |
} |
98 |
|
99 |
# last, change |
100 |
if (my @q = $tree->look_down('class', 'ft-quoteLG')) { |
101 |
if ($#q+1 == 2) { |
102 |
$val{curr} = $q[0]->as_text; |
103 |
$val{change} = $q[1]->as_text; |
104 |
} else { |
105 |
die "can't find 2 classes ft-quoteLG"; |
106 |
} |
107 |
} else { |
108 |
die "can't find class ft-quoteLG (last value and change)"; |
109 |
} |
110 |
|
111 |
# high, low, open |
112 |
if (my @q = $tree->look_down('class', 'ft-quoteMd')) { |
113 |
if ($#q+1 >= 5) { |
114 |
$val{high} = $q[1]->as_text; |
115 |
$val{low} = $q[2]->as_text; |
116 |
$val{open} = $q[3]->as_text; |
117 |
} else { |
118 |
die "can't find 11 classes ft-quoteMd (just ".($#q+1)." found)"; |
119 |
} |
120 |
} else { |
121 |
die "can't find class ft-quoteMd"; |
122 |
} |
123 |
|
124 |
insert(); |
125 |
} else { |
126 |
warn "can't fetch stock data"; |
127 |
} |
128 |
|
129 |
print_val() if ($debug); |
130 |
|
131 |
$dbh->disconnect; |
132 |
|