1 |
#!/usr/bin/perl |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
|
6 |
use XML::FeedPP; |
7 |
use DateTime; |
8 |
use Data::Dump qw(dump); |
9 |
|
10 |
my $now = DateTime->now; |
11 |
|
12 |
sub scrape { |
13 |
my $url = shift; |
14 |
my $feed = XML::FeedPP->new( $url ); |
15 |
print "Title: ", $feed->title(), "\n"; |
16 |
print "Date: ", $feed->pubDate(), "\n"; |
17 |
foreach my $item ( $feed->get_item() ) { |
18 |
my ( $channel, $date ) = split /\s*-\s*/, $item->title(), 2; |
19 |
my ( $dd, $mm, $yyyy ) = split /\./, $date, 3; |
20 |
my $program = $item->description(); |
21 |
$program =~ s{<div[^>]*>}{\n}gs; |
22 |
$program =~ s{</div>}{\n}gs; |
23 |
my @last; |
24 |
foreach my $line ( split(/\n+/, $program) ) { |
25 |
if ( $line =~ s{<strong>(\d+):(\d+)</strong>\s*}{} ) { |
26 |
my ($h,$m) = ($1,$2); |
27 |
my $link = $1 if $line =~ s{<a.+href="(.+?)">([^<]+)</a>}{$2}; |
28 |
my $t = DateTime->new( |
29 |
year => $yyyy, month => $mm, day => $dd, |
30 |
hour => $h, minute => $m |
31 |
); |
32 |
if ( @last ) { |
33 |
my ( $dt, $description, $link ) = @last; |
34 |
$t->add( days => 1 ) if $t < $dt; |
35 |
my $duration = $t - $dt; |
36 |
my $sec = $duration->hours * 60 * 60 |
37 |
+ $duration->minutes * 60 |
38 |
+ $duration->seconds; |
39 |
warn "# $t $dt = $sec\n"; |
40 |
printf "[ ] %4s %s %s %4d\t%s%s\n", |
41 |
$channel, |
42 |
$dt->ymd('-'), $dt->hms(':'), $sec, |
43 |
$description, |
44 |
$link ? "\t<$link>" : '' if $t > $now; |
45 |
}; |
46 |
|
47 |
@last = ( $t, $line, $link ); |
48 |
} |
49 |
} |
50 |
} |
51 |
} |
52 |
|
53 |
scrape "http://mojtv.hr/rss/rsschannel.ashx?id=$_" foreach ( 1,2,3,4 ); |