package Plagger::Plugin::Filter::ExtractBody;

use strict;
use warnings;

use Plagger::Util;
use Plagger::Text;
use HTML::TreeBuilder::XPath;

use base qw( Plagger::Plugin );

sub register {
    my ( $self, $c ) = @_;
    $c->register_hook(
        $self,
        'update.entry.fixup' => $self->can('update'),
    );
}

sub update {
    my ( $self, $c, $args ) = @_;
    my $entry = $args->{'entry'};

    return if ( ! $entry->body || ! $entry->body->is_html );

    my $body = $entry->body->data;
       $body = $self->extract( $body );
       $body = Plagger::Text->new( type => 'html', data => $body );

    $entry->body( $body );

    return 1;
}

sub extract {
    my ( $self, $text ) = @_;

    my $tree = HTML::TreeBuilder::XPath->new;
    $tree->parse( $text );
    $tree->eof;

    my $xpath = $self->conf->{'xpath'} || '//body';

    no warnings 'redefine';
    local *HTML::Element::_xml_escape = $self->can('escape_xml');
    use warnings;

    my $body = q{};

    for my $node ( $tree->findnodes( $xpath ) ) {
        $body .= ( $node->isElementNode ) ? $node->as_XML : $node->getValue ;
    }

    return $body;
}

sub escape_xml {
    for my $x ( @_ ) {
        $x = Plagger::Util::encode_xml( $x );
    }
}

1;
__END__

=head1 NAME

Plagger::Plugin::Filter::ExtractBody - Extracting element from C<Plagger::Entry-E<gt>body>

=head1 SYNOPSIS

  - module: Filter::ExtractBody

=head1 DESCRIPTION

Extracting element from C<Plagger::Entry-E<gt>body> using XPath expression.

=head1 CONFIG

=head2 xpath

XPath expression for extract.

=head1 AUTHOR

Naoki Okamura (Nyarla,) E<lt>thotep@nyarla.netE<gt>

=head1 LICENSE

This Plug-in is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=head1 SEE ALSO

L<Plagger>

=cut
