root/lang/perl/plagger/lib/Plagger/Plugin/Filter/ExtractBody.pm

Revision 268, 1.8 kB (checked in by nyarla, 11 months ago)

lang/perl/plagger: I imported making plugin files.

Line 
1package Plagger::Plugin::Filter::ExtractBody;
2
3use strict;
4use warnings;
5
6use Plagger::Util;
7use Plagger::Text;
8use HTML::TreeBuilder::XPath;
9
10use base qw( Plagger::Plugin );
11
12sub register {
13    my ( $self, $c ) = @_;
14    $c->register_hook(
15        $self,
16        'update.entry.fixup' => $self->can('update'),
17    );
18}
19
20sub update {
21    my ( $self, $c, $args ) = @_;
22    my $entry = $args->{'entry'};
23
24    return if ( ! $entry->body || ! $entry->body->is_html );
25
26    my $body = $entry->body->data;
27       $body = $self->extract( $body );
28       $body = Plagger::Text->new( type => 'html', data => $body );
29
30    $entry->body( $body );
31
32    return 1;
33}
34
35sub extract {
36    my ( $self, $text ) = @_;
37
38    my $tree = HTML::TreeBuilder::XPath->new;
39    $tree->parse( $text );
40    $tree->eof;
41
42    my $xpath = $self->conf->{'xpath'} || '//body';
43
44    no warnings 'redefine';
45    local *HTML::Element::_xml_escape = $self->can('escape_xml');
46    use warnings;
47
48    my $body = q{};
49
50    for my $node ( $tree->findnodes( $xpath ) ) {
51        $body .= ( $node->isElementNode ) ? $node->as_XML : $node->getValue ;
52    }
53
54    return $body;
55}
56
57sub escape_xml {
58    for my $x ( @_ ) {
59        $x = Plagger::Util::encode_xml( $x );
60    }
61}
62
631;
64__END__
65
66=head1 NAME
67
68Plagger::Plugin::Filter::ExtractBody - Extracting element from C<Plagger::Entry-E<gt>body>
69
70=head1 SYNOPSIS
71
72  - module: Filter::ExtractBody
73
74=head1 DESCRIPTION
75
76Extracting element from C<Plagger::Entry-E<gt>body> using XPath expression.
77
78=head1 CONFIG
79
80=head2 xpath
81
82XPath expression for extract.
83
84=head1 AUTHOR
85
86Naoki Okamura (Nyarla,) E<lt>thotep@nyarla.netE<gt>
87
88=head1 LICENSE
89
90This Plug-in is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
91
92=head1 SEE ALSO
93
94L<Plagger>
95
96=cut
Note: See TracBrowser for help on using the browser.