1 |
dpavlin |
1 |
# $Id: Feed.pm 1958 2006-08-14 05:31:27Z btrott $ |
2 |
|
|
|
3 |
|
|
package XML::Feed; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
use base qw( Class::ErrorHandler ); |
7 |
|
|
use Feed::Find; |
8 |
|
|
use URI::Fetch; |
9 |
|
|
use Carp; |
10 |
|
|
|
11 |
|
|
our $VERSION = '0.12'; |
12 |
|
|
|
13 |
|
|
sub new { |
14 |
|
|
my $class = shift; |
15 |
|
|
my($format) = @_; |
16 |
|
|
$format ||= 'Atom'; |
17 |
|
|
my $format_class = 'XML::Feed::' . $format; |
18 |
|
|
eval "use $format_class"; |
19 |
|
|
Carp::croak("Unsupported format $format: $@") if $@; |
20 |
|
|
my $feed = bless {}, join('::', __PACKAGE__, $format); |
21 |
|
|
$feed->init_empty or return $class->error($feed->errstr); |
22 |
|
|
$feed; |
23 |
|
|
} |
24 |
|
|
|
25 |
|
|
sub init_empty { 1 } |
26 |
|
|
|
27 |
|
|
sub parse { |
28 |
|
|
my $class = shift; |
29 |
|
|
my($stream) = @_; |
30 |
|
|
return $class->error("Stream parameter is required") unless $stream; |
31 |
|
|
my $feed = bless {}, $class; |
32 |
|
|
my $xml = ''; |
33 |
|
|
if (UNIVERSAL::isa($stream, 'URI')) { |
34 |
|
|
my $res = URI::Fetch->fetch($stream) |
35 |
|
|
or return $class->error(URI::Fetch->errstr); |
36 |
|
|
return $class->error("This feed has been permanently removed") |
37 |
|
|
if $res->status == URI::Fetch::URI_GONE(); |
38 |
|
|
$xml = $res->content; |
39 |
|
|
} elsif (ref($stream) eq 'SCALAR') { |
40 |
|
|
$xml = $$stream; |
41 |
|
|
} elsif (ref($stream)) { |
42 |
|
|
while (read($stream, my($chunk), 8192)) { |
43 |
|
|
$xml .= $chunk; |
44 |
|
|
} |
45 |
|
|
} else { |
46 |
|
|
open my $fh, $stream |
47 |
|
|
or return $class->error("Can't open $stream: $!"); |
48 |
|
|
while (read $fh, my($chunk), 8192) { |
49 |
|
|
$xml .= $chunk; |
50 |
|
|
} |
51 |
|
|
close $fh; |
52 |
|
|
} |
53 |
|
|
return $class->error("Can't get feed XML content from $stream") |
54 |
|
|
unless $xml; |
55 |
|
|
my $format = $feed->identify_format(\$xml) |
56 |
|
|
or return $class->error($feed->errstr); |
57 |
|
|
my $format_class = join '::', __PACKAGE__, $format; |
58 |
|
|
eval "use $format_class"; |
59 |
|
|
return $class->error("Unsupported format $format: $@") if $@; |
60 |
|
|
bless $feed, $format_class; |
61 |
|
|
$feed->init_string(\$xml) or return $class->error($feed->errstr); |
62 |
|
|
$feed; |
63 |
|
|
} |
64 |
|
|
|
65 |
|
|
sub identify_format { |
66 |
|
|
my $feed = shift; |
67 |
|
|
my($xml) = @_; |
68 |
|
|
## Auto-detect feed type based on first element. This is prone |
69 |
|
|
## to breakage, but then again we don't want to parse the whole |
70 |
|
|
## feed ourselves. |
71 |
|
|
my $tag; |
72 |
|
|
while ($$xml =~ /<(\S+)/sg) { |
73 |
|
|
(my $t = $1) =~ tr/a-zA-Z0-9:\-\?!//cd; |
74 |
|
|
my $first = substr $t, 0, 1; |
75 |
|
|
$tag = $t, last unless $first eq '?' || $first eq '!'; |
76 |
|
|
} |
77 |
|
|
return $feed->error("Cannot find first element") unless $tag; |
78 |
|
|
$tag =~ s/^.*://; |
79 |
|
|
if ($tag eq 'rss' || $tag eq 'RDF') { |
80 |
|
|
return 'RSS'; |
81 |
|
|
} elsif ($tag eq 'feed') { |
82 |
|
|
return 'Atom'; |
83 |
|
|
} else { |
84 |
|
|
return $feed->error("Cannot detect feed type"); |
85 |
|
|
} |
86 |
|
|
} |
87 |
|
|
|
88 |
|
|
sub find_feeds { |
89 |
|
|
my $class = shift; |
90 |
|
|
my($uri) = @_; |
91 |
|
|
my @feeds = Feed::Find->find($uri) |
92 |
|
|
or return $class->error(Feed::Find->errstr); |
93 |
|
|
@feeds; |
94 |
|
|
} |
95 |
|
|
|
96 |
|
|
sub convert { |
97 |
|
|
my $feed = shift; |
98 |
|
|
my($format) = @_; |
99 |
|
|
my $new = __PACKAGE__->new($format); |
100 |
|
|
for my $field (qw( title link description language author copyright modified generator )) { |
101 |
|
|
my $val = $feed->$field(); |
102 |
|
|
next unless defined $val; |
103 |
|
|
$new->$field($val); |
104 |
|
|
} |
105 |
|
|
for my $entry ($feed->entries) { |
106 |
|
|
$new->add_entry($entry->convert($format)); |
107 |
|
|
} |
108 |
|
|
$new; |
109 |
|
|
} |
110 |
|
|
|
111 |
|
|
sub splice { |
112 |
|
|
my $feed = shift; |
113 |
|
|
my($other) = @_; |
114 |
|
|
my %ids = map { $_->id => 1 } $feed->entries; |
115 |
|
|
for my $entry ($other->entries) { |
116 |
|
|
$feed->add_entry($entry) unless $ids{$entry->id}++; |
117 |
|
|
} |
118 |
|
|
} |
119 |
|
|
|
120 |
|
|
sub format; |
121 |
|
|
sub title; |
122 |
|
|
sub link; |
123 |
|
|
sub description; |
124 |
|
|
sub language; |
125 |
|
|
sub author; |
126 |
|
|
sub copyright; |
127 |
|
|
sub modified; |
128 |
|
|
sub generator; |
129 |
|
|
sub add_entry; |
130 |
|
|
sub entries; |
131 |
|
|
sub as_xml; |
132 |
|
|
|
133 |
|
|
sub tagline { shift->description(@_) } |
134 |
|
|
sub items { $_[0]->entries } |
135 |
|
|
|
136 |
|
|
1; |
137 |
|
|
__END__ |
138 |
|
|
|
139 |
|
|
=head1 NAME |
140 |
|
|
|
141 |
|
|
XML::Feed - Syndication feed parser and auto-discovery |
142 |
|
|
|
143 |
|
|
=head1 SYNOPSIS |
144 |
|
|
|
145 |
|
|
use XML::Feed; |
146 |
|
|
my $feed = XML::Feed->parse(URI->new('http://example.com/atom.xml')) |
147 |
|
|
or die XML::Feed->errstr; |
148 |
|
|
print $feed->title, "\n"; |
149 |
|
|
for my $entry ($feed->entries) { |
150 |
|
|
} |
151 |
|
|
|
152 |
|
|
## Find all of the syndication feeds on a given page, using |
153 |
|
|
## auto-discovery. |
154 |
|
|
my @feeds = XML::Feed->find_feeds('http://example.com/'); |
155 |
|
|
|
156 |
|
|
=head1 DESCRIPTION |
157 |
|
|
|
158 |
|
|
I<XML::Feed> is a syndication feed parser for both RSS and Atom feeds. It |
159 |
|
|
also implements feed auto-discovery for finding feeds, given a URI. |
160 |
|
|
|
161 |
|
|
I<XML::Feed> supports the following syndication feed formats: |
162 |
|
|
|
163 |
|
|
=over 4 |
164 |
|
|
|
165 |
|
|
=item * RSS 0.91 |
166 |
|
|
|
167 |
|
|
=item * RSS 1.0 |
168 |
|
|
|
169 |
|
|
=item * RSS 2.0 |
170 |
|
|
|
171 |
|
|
=item * Atom |
172 |
|
|
|
173 |
|
|
=back |
174 |
|
|
|
175 |
|
|
The goal of I<XML::Feed> is to provide a unified API for parsing and using |
176 |
|
|
the various syndication formats. The different flavors of RSS and Atom |
177 |
|
|
handle data in different ways: date handling; summaries and content; |
178 |
|
|
escaping and quoting; etc. This module attempts to remove those differences |
179 |
|
|
by providing a wrapper around the formats and the classes implementing |
180 |
|
|
those formats (I<XML::RSS> and I<XML::Atom::Feed>). For example, dates are |
181 |
|
|
handled differently in each of the above formats. To provide a unified API for |
182 |
|
|
date handling, I<XML::Feed> converts all date formats transparently into |
183 |
|
|
I<DateTime> objects, which it then returns to the caller. |
184 |
|
|
|
185 |
|
|
=head1 USAGE |
186 |
|
|
|
187 |
|
|
=head2 XML::Feed->new($format) |
188 |
|
|
|
189 |
|
|
Creates a new empty I<XML::Feed> object using the format I<$format>. |
190 |
|
|
|
191 |
|
|
=head2 XML::Feed->parse($stream) |
192 |
|
|
|
193 |
|
|
Parses a syndication feed identified by I<$stream>. I<$stream> can be any |
194 |
|
|
one of the following: |
195 |
|
|
|
196 |
|
|
=over 4 |
197 |
|
|
|
198 |
|
|
=item * Scalar reference |
199 |
|
|
|
200 |
|
|
A reference to string containing the XML body of the feed. |
201 |
|
|
|
202 |
|
|
=item * Filehandle |
203 |
|
|
|
204 |
|
|
An open filehandle from which the feed XML will be read. |
205 |
|
|
|
206 |
|
|
=item * File name |
207 |
|
|
|
208 |
|
|
The name of a file containing the feed XML. |
209 |
|
|
|
210 |
|
|
=item * URI object |
211 |
|
|
|
212 |
|
|
A URI from which the feed XML will be retrieved. |
213 |
|
|
|
214 |
|
|
=back |
215 |
|
|
|
216 |
|
|
=head2 XML::Feed->find_feeds($uri) |
217 |
|
|
|
218 |
|
|
Given a URI I<$uri>, use auto-discovery to find all of the feeds linked |
219 |
|
|
from that page (using I<E<lt>linkE<gt>> tags). |
220 |
|
|
|
221 |
|
|
Returns a list of feed URIs. |
222 |
|
|
|
223 |
|
|
=head2 $feed->convert($format) |
224 |
|
|
|
225 |
|
|
Converts the I<XML::Feed> object into the I<$format> format, and returns |
226 |
|
|
the new object. |
227 |
|
|
|
228 |
|
|
=head2 $feed->splice($other_feed) |
229 |
|
|
|
230 |
|
|
Splices in all of the entries from the feed I<$other_feed> into I<$feed>, |
231 |
|
|
skipping posts that are already in I<$feed>. |
232 |
|
|
|
233 |
|
|
=head2 $feed->format |
234 |
|
|
|
235 |
|
|
Returns the format of the feed (C<Atom>, or some version of C<RSS>). |
236 |
|
|
|
237 |
|
|
=head2 $feed->title([ $title ]) |
238 |
|
|
|
239 |
|
|
The title of the feed/channel. |
240 |
|
|
|
241 |
|
|
=head2 $feed->link([ $uri ]) |
242 |
|
|
|
243 |
|
|
The permalink of the feed/channel. |
244 |
|
|
|
245 |
|
|
=head2 $feed->tagline([ $tagline ]) |
246 |
|
|
|
247 |
|
|
The description or tagline of the feed/channel. |
248 |
|
|
|
249 |
|
|
=head2 $feed->description([ $description ]) |
250 |
|
|
|
251 |
|
|
Alias for I<$feed-E<gt>tagline>. |
252 |
|
|
|
253 |
|
|
=head2 $feed->author([ $author ]) |
254 |
|
|
|
255 |
|
|
The author of the feed/channel. |
256 |
|
|
|
257 |
|
|
=head2 $feed->language([ $language ]) |
258 |
|
|
|
259 |
|
|
The language of the feed. |
260 |
|
|
|
261 |
|
|
=head2 $feed->copyright([ $copyright ]) |
262 |
|
|
|
263 |
|
|
The copyright notice of the feed. |
264 |
|
|
|
265 |
|
|
=head2 $feed->modified([ $modified ]) |
266 |
|
|
|
267 |
|
|
A I<DateTime> object representing the last-modified date of the feed. |
268 |
|
|
|
269 |
|
|
If present, I<$modified> should be a I<DateTime> object. |
270 |
|
|
|
271 |
|
|
=head2 $feed->generator([ $generator ]) |
272 |
|
|
|
273 |
|
|
The generator of the feed. |
274 |
|
|
|
275 |
|
|
=head2 $feed->entries |
276 |
|
|
|
277 |
|
|
A list of the entries/items in the feed. Returns an array containing |
278 |
|
|
I<XML::Feed::Entry> objects. |
279 |
|
|
|
280 |
|
|
=head2 $feed->add_entry($entry) |
281 |
|
|
|
282 |
|
|
Adds an entry to the feed. I<$entry> should be an I<XML::Feed::Entry> |
283 |
|
|
object in the correct format for the feed. |
284 |
|
|
|
285 |
|
|
=head2 $feed->as_xml |
286 |
|
|
|
287 |
|
|
Returns an XML representation of the feed, in the format determined by |
288 |
|
|
the current format of the I<$feed> object. |
289 |
|
|
|
290 |
|
|
=head1 PACKAGE VARIABLES |
291 |
|
|
|
292 |
|
|
=over 4 |
293 |
|
|
|
294 |
|
|
=item C<$XML::Feed::RSS::PREFERRED_PARSER> |
295 |
|
|
|
296 |
|
|
If you want to use another RSS parser class than XML::RSS (default), you can |
297 |
|
|
change the class by setting C<$PREFERRED_PARSER> variable in XML::Feed::RSS |
298 |
|
|
package. |
299 |
|
|
|
300 |
|
|
$XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML"; |
301 |
|
|
|
302 |
|
|
B<Note:> this will only work for parsing feeds, not creating feeds. |
303 |
|
|
|
304 |
|
|
=back |
305 |
|
|
|
306 |
|
|
=head1 LICENSE |
307 |
|
|
|
308 |
|
|
I<XML::Feed> is free software; you may redistribute it and/or modify it |
309 |
|
|
under the same terms as Perl itself. |
310 |
|
|
|
311 |
|
|
=head1 AUTHOR & COPYRIGHT |
312 |
|
|
|
313 |
|
|
Except where otherwise noted, I<XML::Feed> is Copyright 2004-2005 |
314 |
|
|
Six Apart, cpan@sixapart.com. All rights reserved. |
315 |
|
|
|
316 |
|
|
=cut |