| 1 | package Plagger::Plugin::Filter::ExtractBody;
|
|---|
| 2 |
|
|---|
| 3 | use strict;
|
|---|
| 4 | use warnings;
|
|---|
| 5 |
|
|---|
| 6 | use Plagger::Util;
|
|---|
| 7 | use Plagger::Text;
|
|---|
| 8 | use HTML::TreeBuilder::XPath;
|
|---|
| 9 |
|
|---|
| 10 | use base qw( Plagger::Plugin );
|
|---|
| 11 |
|
|---|
| 12 | sub register {
|
|---|
| 13 | my ( $self, $c ) = @_;
|
|---|
| 14 | $c->register_hook(
|
|---|
| 15 | $self,
|
|---|
| 16 | 'update.entry.fixup' => $self->can('update'),
|
|---|
| 17 | );
|
|---|
| 18 | }
|
|---|
| 19 |
|
|---|
| 20 | sub update {
|
|---|
| 21 | my ( $self, $c, $args ) = @_;
|
|---|
| 22 | my $entry = $args->{'entry'};
|
|---|
| 23 |
|
|---|
| 24 | return if ( ! $entry->body || ! $entry->body->is_html );
|
|---|
| 25 |
|
|---|
| 26 | my $body = $entry->body->data;
|
|---|
| 27 | $body = $self->extract( $body );
|
|---|
| 28 | $body = Plagger::Text->new( type => 'html', data => $body );
|
|---|
| 29 |
|
|---|
| 30 | $entry->body( $body );
|
|---|
| 31 |
|
|---|
| 32 | return 1;
|
|---|
| 33 | }
|
|---|
| 34 |
|
|---|
| 35 | sub extract {
|
|---|
| 36 | my ( $self, $text ) = @_;
|
|---|
| 37 |
|
|---|
| 38 | my $tree = HTML::TreeBuilder::XPath->new;
|
|---|
| 39 | $tree->parse( $text );
|
|---|
| 40 | $tree->eof;
|
|---|
| 41 |
|
|---|
| 42 | my $xpath = $self->conf->{'xpath'} || '//body';
|
|---|
| 43 |
|
|---|
| 44 | no warnings 'redefine';
|
|---|
| 45 | local *HTML::Element::_xml_escape = $self->can('escape_xml');
|
|---|
| 46 | use warnings;
|
|---|
| 47 |
|
|---|
| 48 | my $body = q{};
|
|---|
| 49 |
|
|---|
| 50 | for my $node ( $tree->findnodes( $xpath ) ) {
|
|---|
| 51 | $body .= ( $node->isElementNode ) ? $node->as_XML : $node->getValue ;
|
|---|
| 52 | }
|
|---|
| 53 |
|
|---|
| 54 | return $body;
|
|---|
| 55 | }
|
|---|
| 56 |
|
|---|
| 57 | sub escape_xml {
|
|---|
| 58 | for my $x ( @_ ) {
|
|---|
| 59 | $x = Plagger::Util::encode_xml( $x );
|
|---|
| 60 | }
|
|---|
| 61 | }
|
|---|
| 62 |
|
|---|
| 63 | 1;
|
|---|
| 64 | __END__
|
|---|
| 65 |
|
|---|
| 66 | =head1 NAME
|
|---|
| 67 |
|
|---|
| 68 | Plagger::Plugin::Filter::ExtractBody - Extracting element from C<Plagger::Entry-E<gt>body>
|
|---|
| 69 |
|
|---|
| 70 | =head1 SYNOPSIS
|
|---|
| 71 |
|
|---|
| 72 | - module: Filter::ExtractBody
|
|---|
| 73 |
|
|---|
| 74 | =head1 DESCRIPTION
|
|---|
| 75 |
|
|---|
| 76 | Extracting element from C<Plagger::Entry-E<gt>body> using XPath expression.
|
|---|
| 77 |
|
|---|
| 78 | =head1 CONFIG
|
|---|
| 79 |
|
|---|
| 80 | =head2 xpath
|
|---|
| 81 |
|
|---|
| 82 | XPath expression for extract.
|
|---|
| 83 |
|
|---|
| 84 | =head1 AUTHOR
|
|---|
| 85 |
|
|---|
| 86 | Naoki Okamura (Nyarla,) E<lt>thotep@nyarla.netE<gt>
|
|---|
| 87 |
|
|---|
| 88 | =head1 LICENSE
|
|---|
| 89 |
|
|---|
| 90 | This Plug-in is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
|---|
| 91 |
|
|---|
| 92 | =head1 SEE ALSO
|
|---|
| 93 |
|
|---|
| 94 | L<Plagger>
|
|---|
| 95 |
|
|---|
| 96 | =cut
|
|---|