root/lang/perl/Data-Feed/trunk/lib/Data/Feed.pm @ 16227

Revision 16227, 3.8 kB (checked in by daisuke, 5 years ago)

license and author

  • Property svn:keywords set to Id
Line 
1# $Id$
2
3package Data::Feed;
4use Moose;
5
6has 'parser' => (
7    is => 'rw',
8    does => 'Data::Feed::Parser',
9);
10
11__PACKAGE__->meta->make_immutable;
12
13no Moose;
14
15sub parse {
16    my ($self, $stream) = @_;
17
18    if (! blessed $self) {
19        $self = $self->new();
20    }
21
22    if (! $stream) {
23        confess "No stream to parse was provided to parse()";
24    }
25
26    my $content_ref = $self->fetch_stream($stream);
27
28    my $parser = $self->parser;
29    if ($parser) {
30        # If we get a parser, then use it
31        return $parser;
32    }
33
34    # otherwise, attempt to figure out what we're parsing
35    $parser = $self->find_parser( $content_ref );
36
37    if (! $parser) {
38        confess "Failed to find a suitable parser";
39    }
40
41    return $parser->parse( $content_ref );
42}
43
44sub find_parser {
45    my ($self, $content_ref) = @_;
46
47    my $format = $self->guess_format($content_ref);
48    if (! $format) {
49        confess "Unable to guess format from stream content";
50    }
51
52    my $class = join( '::', blessed $self, 'Parser', $format );
53
54    Class::MOP::load_class($class);
55
56    return $class->new();
57}
58
59sub guess_format {
60    my ($self, $content_ref) = @_;
61
62    # Auto-detect feed type based on first element. This is prone
63    # to breakage, but then again we don't want to parse the whole
64    # feed ourselves.
65
66    # XXX - Make this extensible!
67
68    {
69        my $tag;
70
71        while ($$content_ref =~ /<(\S+)/sg) {
72            (my $t = $1) =~ tr/a-zA-Z0-9:\-\?!//cd;
73            my $first = substr $t, 0, 1;
74            $tag = $t, last unless $first eq '?' || $first eq '!';
75        }
76
77        if (! $tag) {
78            # confess "Could not find the first XML element";
79            return ();
80        }
81
82        $tag =~ s/^.*://;
83
84        if ($tag eq 'rss' || $tag eq 'RDF') {
85            return 'RSS';
86        } elsif ($tag eq 'feed') {
87            return 'Atom';
88        }
89    }
90
91    return ();
92}
93
94sub fetch_stream {
95    my ($self, $stream) = @_;
96
97    my $content = '';
98    my $ref = blessed $stream || '';
99    if (! $ref ) {
100        # if given a string, it's a filename
101        open( my $fh, '<', $stream )
102            or confess "Could not open file $stream: $!";
103        $content = do { local $/; <$fh> };
104        close $fh;
105    } else {
106        if ( $stream->isa('URI') ) {
107            # XXX - Shouldn't using LWP suffice here?
108            my $res = URI::Fetch->fetch($stream)
109                or confess "Failed to fetch URI $stream: " . URI::Fetch->errstr;
110
111            if ( $res->status == URI::Fetch::URI_GONE() ) {
112                confess "This feed has been permanently removed";
113            }
114            $content = $res->content;
115        } elsif ( $stream->isa('SCALAR') ) {
116            $content = $$stream;
117        } elsif ( $stream->isa('GLOB') ) {
118            $content = do { local $/; <$stream> };
119        } else {
120            confess "Don't know how to fetch '$ref'";
121        }
122    }
123
124    return \$content;
125}
126
1271;
128
129__END__
130
131=head1 NAME
132
133Data::Feed - Extensible Feed Parsing Tool
134
135=head1 SYNOPSIS
136
137  use Data::Feed;
138
139  # from a file
140  $feed = Data::Feed->parse( '/path/to/my/feed.xml' );
141
142  # from an URI
143  $feed = Data::Feed->parse( URI->new( 'http://example.com/atom.xml' ) );
144
145  # from a string
146  $feed = Data::Feed->parse( \$feed );
147
148  # from a handle
149  $feed = Data::Feed->parse( $fh );
150
151  # Data::Feed auto-guesses the type of a feed by its contents, but you can
152  # explicitly tell what parser to use
153
154  $feed = Data::Feed->new( parser => $myparser )->parse(...);
155
156=head1 METHODS
157
158=head2 parse($stream)
159
160=head2 find_parser($stream)
161
162=head2 guess_format($stream)
163
164=head2 fetch_stream($stream)
165
166=head1 AUTHORS
167
168Daisuke Maki C<< <daisuke@endeworks.jp> >>
169
170Taro Funaki C<< <t@33rpm.jp> >>
171
172=head1 LICENSE
173
174This program is free software; you can redistribute it and/or modify it
175under the same terms as Perl itself.
176
177See http://www.perl.com/perl/misc/Artistic.html
178
179=cut
Note: See TracBrowser for help on using the browser.