root/lang/perl/Data-Feed/trunk/lib/Data/Feed.pm @ 19014

Revision 19014, 5.7 kB (checked in by daisuke, 5 years ago)

version++

  • Property svn:keywords set to Id
Line 
1# $Id$
2
3package Data::Feed;
4use Moose;
5use Carp();
6use Scalar::Util ();
7use URI::Fetch;
8
9our $VERSION = '0.00006';
10our $AUTHORITY = 'cpan:DMAKI';
11
12has 'parser' => (
13    is => 'rw',
14    does => 'Data::Feed::Parser',
15);
16
17__PACKAGE__->meta->make_immutable;
18
19no Moose;
20
21sub parse {
22    my ($self, $stream) = @_;
23
24    if (! Scalar::Util::blessed($self) ){
25        $self = $self->new();
26    }
27
28    if (! $stream) {
29        Carp::confess("No stream to parse was provided to parse()");
30    }
31
32    my $content_ref = $self->fetch_stream($stream);
33
34    my $parser = $self->parser;
35    if ($parser) {
36        # If we get a parser, then use it
37        return $parser;
38    }
39
40    # otherwise, attempt to figure out what we're parsing
41    $parser = $self->find_parser( $content_ref );
42
43    if (! $parser) {
44        Carp::confess("Failed to find a suitable parser");
45    }
46
47    return $parser->parse( $content_ref );
48}
49
50sub find_parser {
51    my ($self, $content_ref) = @_;
52
53    my $format = $self->guess_format($content_ref);
54    if (! $format) {
55        Carp::confess("Unable to guess format from stream content");
56    }
57
58    my $class = join( '::', Scalar::Util::blessed($self), 'Parser', $format );
59
60    Class::MOP::load_class($class);
61
62    return $class->new();
63}
64
65sub guess_format {
66    my ($self, $content_ref) = @_;
67
68    # Auto-detect feed type based on first element. This is prone
69    # to breakage, but then again we don't want to parse the whole
70    # feed ourselves.
71
72    # XXX - Make this extendable!
73
74    {
75        my $tag;
76
77        while ($$content_ref =~ /<(\S+)/sg) {
78            (my $t = $1) =~ tr/a-zA-Z0-9:\-\?!//cd;
79            my $first = substr $t, 0, 1;
80            $tag = $t, last unless $first eq '?' || $first eq '!';
81        }
82
83        if (! $tag) {
84            # confess "Could not find the first XML element";
85            return ();
86        }
87
88        $tag =~ s/^.*://;
89
90        if ($tag eq 'rss' || $tag eq 'RDF') {
91            return 'RSS';
92        } elsif ($tag eq 'feed') {
93            return 'Atom';
94        }
95    }
96
97    return ();
98}
99
100sub fetch_stream {
101    my ($self, $stream) = @_;
102
103    my $content = '';
104    my $ref = Scalar::Util::blessed($stream) || '';
105    if (! $ref ) {
106        # if given a string, it's a filename
107        open( my $fh, '<', $stream )
108            or Carp::confess("Could not open file $stream: $!");
109        $content = do { local $/; <$fh> };
110        close $fh;
111    } else {
112        if ( $stream->isa('URI') ) {
113            # XXX - Shouldn't using LWP suffice here?
114            my $res = URI::Fetch->fetch($stream)
115                or Carp::confess("Failed to fetch URI $stream: " . URI::Fetch->errstr);
116
117            if ( $res->status == URI::Fetch::URI_GONE() ) {
118                Carp::confess("This feed has been permanently removed");
119            }
120            $content = $res->content;
121        } elsif ( $stream->isa('SCALAR') ) {
122            $content = $$stream;
123        } elsif ( $stream->isa('GLOB') ) {
124            $content = do { local $/; <$stream> };
125        } else {
126            Carp::confess("Don't know how to fetch '$ref'");
127        }
128    }
129
130    return \$content;
131}
132
1331;
134
135__END__
136
137=head1 NAME
138
139Data::Feed - Extensible Feed Parsing Tool
140
141=head1 SYNOPSIS
142
143  use Data::Feed;
144
145  # from a file
146  $feed = Data::Feed->parse( '/path/to/my/feed.xml' );
147
148  # from an URI
149  $feed = Data::Feed->parse( URI->new( 'http://example.com/atom.xml' ) );
150
151  # from a string
152  $feed = Data::Feed->parse( \$feed );
153
154  # from a handle
155  $feed = Data::Feed->parse( $fh );
156
157  # Data::Feed auto-guesses the type of a feed by its contents, but you can
158  # explicitly tell what parser to use
159
160  $feed = Data::Feed->new( parser => $myparser )->parse(...);
161
162=head1 DESCRIPTION
163
164Data::Feed is a frontend for feeds. It will attempt to auto-guess what type
165of feed you are passing it, and will generate the appropriate feed object.
166
167What, another XML::Feed? Yes, but this time it's extensible. It's cleanly
168OO (until you get down to the XML nastiness), and it's easy to add your own
169parser to do whatever you want it to do.
170
171=head1 STRUCTURE
172
173Data::Feed has a fairly simple structure. The first layer is a "dynamic"
174parser -- "dynamic" in that Data::Feed will try to find what the feed is,
175and then create the appropriate parser to parse it.
176
177This is done in Data::Feed->find_parser() and Data::Feed->guess_format().
178By default we recognize RSS and Atom feeds. Should the need arise to
179either provide a custom parser or to provide more refined logic to find a
180parser type, override the respective method and do what you will with it.
181
182The second layer is a thin wrapper around RSS and Atom feed objects.
183We use XML::RSS::LibXML (or XML::RSS) and XML::Atom for this purpose.
184
185=head1 PARSING FEEDS
186
187Data::Feed can parse files, URIs, raw strings, and file handles. All you need
188to do is to pass an appropriate parameters.
189
190For file names, we expect a plain scalar:
191
192  Data::Feed->parse( '/path/to/feed.xml' );
193
194For URI (which we will fetch via URI::Fetch), pass in an URI object:
195
196  Data::Feed->parse( URI->new("http://example.com/feed.xml") );
197
198For raw strings, pass in a scalar ref:
199
200  Data::Feed->parse( \qq{<?xml version="1.0"><feed> .... </feed>} );
201
202For file handles, pass in a glob:
203
204  open(my $fh, '<', '/path/to/feed.xml' );
205  Data::Feed->parse( $fh );
206
207=head1 METHODS
208
209=head2 parse($stream)
210
211=head2 find_parser($stream)
212
213Attempts to find an appropriate parser for the given stream.
214
215=head2 guess_format($stream)
216
217=head2 fetch_stream($stream)
218
219=head1 TODO
220
221Be able to /set/ enclosures (We can already get enclosures).
222
223=head1 AUTHORS
224
225Daisuke Maki C<< <daisuke@endeworks.jp> >>
226
227Taro Funaki C<< <t@33rpm.jp> >>
228
229A /Lot/ of the code is based on code from XML::Feed.
230
231=head1 LICENSE
232
233This program is free software; you can redistribute it and/or modify it
234under the same terms as Perl itself.
235
236See http://www.perl.com/perl/misc/Artistic.html
237
238=cut
Note: See TracBrowser for help on using the browser.