1 |
# $Id: Feed.pm 1958 2006-08-14 05:31:27Z btrott $ |
2 |
|
3 |
package XML::Feed; |
4 |
use strict; |
5 |
|
6 |
use base qw( Class::ErrorHandler ); |
7 |
use Feed::Find; |
8 |
use URI::Fetch; |
9 |
use Carp; |
10 |
|
11 |
our $VERSION = '0.12'; |
12 |
|
13 |
sub new { |
14 |
my $class = shift; |
15 |
my($format) = @_; |
16 |
$format ||= 'Atom'; |
17 |
my $format_class = 'XML::Feed::' . $format; |
18 |
eval "use $format_class"; |
19 |
Carp::croak("Unsupported format $format: $@") if $@; |
20 |
my $feed = bless {}, join('::', __PACKAGE__, $format); |
21 |
$feed->init_empty or return $class->error($feed->errstr); |
22 |
$feed; |
23 |
} |
24 |
|
25 |
sub init_empty { 1 } |
26 |
|
27 |
sub parse { |
28 |
my $class = shift; |
29 |
my($stream) = @_; |
30 |
return $class->error("Stream parameter is required") unless $stream; |
31 |
my $feed = bless {}, $class; |
32 |
my $xml = ''; |
33 |
if (UNIVERSAL::isa($stream, 'URI')) { |
34 |
my $res = URI::Fetch->fetch($stream) |
35 |
or return $class->error(URI::Fetch->errstr); |
36 |
return $class->error("This feed has been permanently removed") |
37 |
if $res->status == URI::Fetch::URI_GONE(); |
38 |
$xml = $res->content; |
39 |
} elsif (ref($stream) eq 'SCALAR') { |
40 |
$xml = $$stream; |
41 |
} elsif (ref($stream)) { |
42 |
while (read($stream, my($chunk), 8192)) { |
43 |
$xml .= $chunk; |
44 |
} |
45 |
} else { |
46 |
open my $fh, $stream |
47 |
or return $class->error("Can't open $stream: $!"); |
48 |
while (read $fh, my($chunk), 8192) { |
49 |
$xml .= $chunk; |
50 |
} |
51 |
close $fh; |
52 |
} |
53 |
return $class->error("Can't get feed XML content from $stream") |
54 |
unless $xml; |
55 |
my $format = $feed->identify_format(\$xml) |
56 |
or return $class->error($feed->errstr); |
57 |
my $format_class = join '::', __PACKAGE__, $format; |
58 |
eval "use $format_class"; |
59 |
return $class->error("Unsupported format $format: $@") if $@; |
60 |
bless $feed, $format_class; |
61 |
$feed->init_string(\$xml) or return $class->error($feed->errstr); |
62 |
$feed; |
63 |
} |
64 |
|
65 |
sub identify_format { |
66 |
my $feed = shift; |
67 |
my($xml) = @_; |
68 |
## Auto-detect feed type based on first element. This is prone |
69 |
## to breakage, but then again we don't want to parse the whole |
70 |
## feed ourselves. |
71 |
my $tag; |
72 |
while ($$xml =~ /<(\S+)/sg) { |
73 |
(my $t = $1) =~ tr/a-zA-Z0-9:\-\?!//cd; |
74 |
my $first = substr $t, 0, 1; |
75 |
$tag = $t, last unless $first eq '?' || $first eq '!'; |
76 |
} |
77 |
return $feed->error("Cannot find first element") unless $tag; |
78 |
$tag =~ s/^.*://; |
79 |
if ($tag eq 'rss' || $tag eq 'RDF') { |
80 |
return 'RSS'; |
81 |
} elsif ($tag eq 'feed') { |
82 |
return 'Atom'; |
83 |
} else { |
84 |
return $feed->error("Cannot detect feed type"); |
85 |
} |
86 |
} |
87 |
|
88 |
sub find_feeds { |
89 |
my $class = shift; |
90 |
my($uri) = @_; |
91 |
my @feeds = Feed::Find->find($uri) |
92 |
or return $class->error(Feed::Find->errstr); |
93 |
@feeds; |
94 |
} |
95 |
|
96 |
sub convert { |
97 |
my $feed = shift; |
98 |
my($format) = @_; |
99 |
my $new = __PACKAGE__->new($format); |
100 |
for my $field (qw( title link description language author copyright modified generator )) { |
101 |
my $val = $feed->$field(); |
102 |
next unless defined $val; |
103 |
$new->$field($val); |
104 |
} |
105 |
for my $entry ($feed->entries) { |
106 |
$new->add_entry($entry->convert($format)); |
107 |
} |
108 |
$new; |
109 |
} |
110 |
|
111 |
sub splice { |
112 |
my $feed = shift; |
113 |
my($other) = @_; |
114 |
my %ids = map { $_->id => 1 } $feed->entries; |
115 |
for my $entry ($other->entries) { |
116 |
$feed->add_entry($entry) unless $ids{$entry->id}++; |
117 |
} |
118 |
} |
119 |
|
120 |
sub format; |
121 |
sub title; |
122 |
sub link; |
123 |
sub description; |
124 |
sub language; |
125 |
sub author; |
126 |
sub copyright; |
127 |
sub modified; |
128 |
sub generator; |
129 |
sub add_entry; |
130 |
sub entries; |
131 |
sub as_xml; |
132 |
|
133 |
sub tagline { shift->description(@_) } |
134 |
sub items { $_[0]->entries } |
135 |
|
136 |
1; |
137 |
__END__ |
138 |
|
139 |
=head1 NAME |
140 |
|
141 |
XML::Feed - Syndication feed parser and auto-discovery |
142 |
|
143 |
=head1 SYNOPSIS |
144 |
|
145 |
use XML::Feed; |
146 |
my $feed = XML::Feed->parse(URI->new('http://example.com/atom.xml')) |
147 |
or die XML::Feed->errstr; |
148 |
print $feed->title, "\n"; |
149 |
for my $entry ($feed->entries) { |
150 |
} |
151 |
|
152 |
## Find all of the syndication feeds on a given page, using |
153 |
## auto-discovery. |
154 |
my @feeds = XML::Feed->find_feeds('http://example.com/'); |
155 |
|
156 |
=head1 DESCRIPTION |
157 |
|
158 |
I<XML::Feed> is a syndication feed parser for both RSS and Atom feeds. It |
159 |
also implements feed auto-discovery for finding feeds, given a URI. |
160 |
|
161 |
I<XML::Feed> supports the following syndication feed formats: |
162 |
|
163 |
=over 4 |
164 |
|
165 |
=item * RSS 0.91 |
166 |
|
167 |
=item * RSS 1.0 |
168 |
|
169 |
=item * RSS 2.0 |
170 |
|
171 |
=item * Atom |
172 |
|
173 |
=back |
174 |
|
175 |
The goal of I<XML::Feed> is to provide a unified API for parsing and using |
176 |
the various syndication formats. The different flavors of RSS and Atom |
177 |
handle data in different ways: date handling; summaries and content; |
178 |
escaping and quoting; etc. This module attempts to remove those differences |
179 |
by providing a wrapper around the formats and the classes implementing |
180 |
those formats (I<XML::RSS> and I<XML::Atom::Feed>). For example, dates are |
181 |
handled differently in each of the above formats. To provide a unified API for |
182 |
date handling, I<XML::Feed> converts all date formats transparently into |
183 |
I<DateTime> objects, which it then returns to the caller. |
184 |
|
185 |
=head1 USAGE |
186 |
|
187 |
=head2 XML::Feed->new($format) |
188 |
|
189 |
Creates a new empty I<XML::Feed> object using the format I<$format>. |
190 |
|
191 |
=head2 XML::Feed->parse($stream) |
192 |
|
193 |
Parses a syndication feed identified by I<$stream>. I<$stream> can be any |
194 |
one of the following: |
195 |
|
196 |
=over 4 |
197 |
|
198 |
=item * Scalar reference |
199 |
|
200 |
A reference to string containing the XML body of the feed. |
201 |
|
202 |
=item * Filehandle |
203 |
|
204 |
An open filehandle from which the feed XML will be read. |
205 |
|
206 |
=item * File name |
207 |
|
208 |
The name of a file containing the feed XML. |
209 |
|
210 |
=item * URI object |
211 |
|
212 |
A URI from which the feed XML will be retrieved. |
213 |
|
214 |
=back |
215 |
|
216 |
=head2 XML::Feed->find_feeds($uri) |
217 |
|
218 |
Given a URI I<$uri>, use auto-discovery to find all of the feeds linked |
219 |
from that page (using I<E<lt>linkE<gt>> tags). |
220 |
|
221 |
Returns a list of feed URIs. |
222 |
|
223 |
=head2 $feed->convert($format) |
224 |
|
225 |
Converts the I<XML::Feed> object into the I<$format> format, and returns |
226 |
the new object. |
227 |
|
228 |
=head2 $feed->splice($other_feed) |
229 |
|
230 |
Splices in all of the entries from the feed I<$other_feed> into I<$feed>, |
231 |
skipping posts that are already in I<$feed>. |
232 |
|
233 |
=head2 $feed->format |
234 |
|
235 |
Returns the format of the feed (C<Atom>, or some version of C<RSS>). |
236 |
|
237 |
=head2 $feed->title([ $title ]) |
238 |
|
239 |
The title of the feed/channel. |
240 |
|
241 |
=head2 $feed->link([ $uri ]) |
242 |
|
243 |
The permalink of the feed/channel. |
244 |
|
245 |
=head2 $feed->tagline([ $tagline ]) |
246 |
|
247 |
The description or tagline of the feed/channel. |
248 |
|
249 |
=head2 $feed->description([ $description ]) |
250 |
|
251 |
Alias for I<$feed-E<gt>tagline>. |
252 |
|
253 |
=head2 $feed->author([ $author ]) |
254 |
|
255 |
The author of the feed/channel. |
256 |
|
257 |
=head2 $feed->language([ $language ]) |
258 |
|
259 |
The language of the feed. |
260 |
|
261 |
=head2 $feed->copyright([ $copyright ]) |
262 |
|
263 |
The copyright notice of the feed. |
264 |
|
265 |
=head2 $feed->modified([ $modified ]) |
266 |
|
267 |
A I<DateTime> object representing the last-modified date of the feed. |
268 |
|
269 |
If present, I<$modified> should be a I<DateTime> object. |
270 |
|
271 |
=head2 $feed->generator([ $generator ]) |
272 |
|
273 |
The generator of the feed. |
274 |
|
275 |
=head2 $feed->entries |
276 |
|
277 |
A list of the entries/items in the feed. Returns an array containing |
278 |
I<XML::Feed::Entry> objects. |
279 |
|
280 |
=head2 $feed->add_entry($entry) |
281 |
|
282 |
Adds an entry to the feed. I<$entry> should be an I<XML::Feed::Entry> |
283 |
object in the correct format for the feed. |
284 |
|
285 |
=head2 $feed->as_xml |
286 |
|
287 |
Returns an XML representation of the feed, in the format determined by |
288 |
the current format of the I<$feed> object. |
289 |
|
290 |
=head1 PACKAGE VARIABLES |
291 |
|
292 |
=over 4 |
293 |
|
294 |
=item C<$XML::Feed::RSS::PREFERRED_PARSER> |
295 |
|
296 |
If you want to use another RSS parser class than XML::RSS (default), you can |
297 |
change the class by setting C<$PREFERRED_PARSER> variable in XML::Feed::RSS |
298 |
package. |
299 |
|
300 |
$XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML"; |
301 |
|
302 |
B<Note:> this will only work for parsing feeds, not creating feeds. |
303 |
|
304 |
=back |
305 |
|
306 |
=head1 LICENSE |
307 |
|
308 |
I<XML::Feed> is free software; you may redistribute it and/or modify it |
309 |
under the same terms as Perl itself. |
310 |
|
311 |
=head1 AUTHOR & COPYRIGHT |
312 |
|
313 |
Except where otherwise noted, I<XML::Feed> is Copyright 2004-2005 |
314 |
Six Apart, cpan@sixapart.com. All rights reserved. |
315 |
|
316 |
=cut |