| 1 | # |
|---|
| 2 | # $Id$ |
|---|
| 3 | # |
|---|
| 4 | package XML::FromHTML; |
|---|
| 5 | use warnings; |
|---|
| 6 | use strict; |
|---|
| 7 | our $VERSION = sprintf "%d.%02d", q$Revision: 0.2 $ =~ /(\d+)/g; |
|---|
| 8 | |
|---|
| 9 | use base 'Exporter'; |
|---|
| 10 | our @EXPORT = qw/html2xml/; |
|---|
| 11 | |
|---|
| 12 | use base 'HTML::Tidy'; |
|---|
| 13 | |
|---|
| 14 | sub new { |
|---|
| 15 | my $class = shift; |
|---|
| 16 | bless HTML::Tidy->new( |
|---|
| 17 | { |
|---|
| 18 | @_, |
|---|
| 19 | doctype => 'omit', # important for speed! |
|---|
| 20 | indent => 0, |
|---|
| 21 | numeric_entities => 1, |
|---|
| 22 | output_xhtml => 1, |
|---|
| 23 | tidy_mark => 0, |
|---|
| 24 | wrap => 0, |
|---|
| 25 | } |
|---|
| 26 | ), $class; |
|---|
| 27 | } |
|---|
| 28 | |
|---|
| 29 | sub html2xml { |
|---|
| 30 | my ($src, $enc) = @_; |
|---|
| 31 | if ($enc){ |
|---|
| 32 | require Encode; |
|---|
| 33 | Encode::from_to($src, $enc, 'utf8'); |
|---|
| 34 | } |
|---|
| 35 | __PACKAGE__->new->clean($src) |
|---|
| 36 | } |
|---|
| 37 | |
|---|
| 38 | if ($0 eq __FILE__){ |
|---|
| 39 | warn html2xml(qq{<a href="http://www.dan.co.jp/">Dan Kogai</a>}) |
|---|
| 40 | } |
|---|
| 41 | |
|---|
| 42 | 1; # End of XML::FromHTML |
|---|
| 43 | |
|---|
| 44 | =head1 NAME |
|---|
| 45 | |
|---|
| 46 | XML::FromHTML - Converts HTML to XHTML |
|---|
| 47 | |
|---|
| 48 | =head1 VERSION |
|---|
| 49 | |
|---|
| 50 | $Id$ |
|---|
| 51 | |
|---|
| 52 | =head1 SYNOPSIS |
|---|
| 53 | |
|---|
| 54 | use XML::FromHTML; |
|---|
| 55 | my $xhtml = XML::FromHTML->new->clean($html); |
|---|
| 56 | # or simply |
|---|
| 57 | my $xhtml = html2xml($html); |
|---|
| 58 | use XML::Simple; |
|---|
| 59 | my $obj = XMLin $xhtml; # works fine! |
|---|
| 60 | use XML::LibXML; |
|---|
| 61 | my $dom = XML::LibXML->new->parse_string($xhtml); # this, too! |
|---|
| 62 | |
|---|
| 63 | =head1 DESCRIPTION |
|---|
| 64 | |
|---|
| 65 | This module uses L<HTML::Tidy> just to turn HTML into XML. T/O. |
|---|
| 66 | |
|---|
| 67 | =head1 EXPORT |
|---|
| 68 | |
|---|
| 69 | C<html2xml> by default. |
|---|
| 70 | |
|---|
| 71 | =head1 FUNCTIONS |
|---|
| 72 | |
|---|
| 73 | =head2 new |
|---|
| 74 | |
|---|
| 75 | Creates an XML::FromHTML object, whose parent class is L<HTML::Tidy>. |
|---|
| 76 | |
|---|
| 77 | =head2 html2xml |
|---|
| 78 | |
|---|
| 79 | just a shortcut for: |
|---|
| 80 | |
|---|
| 81 | XML::FromHTML->new->clean(shift); |
|---|
| 82 | |
|---|
| 83 | =head1 AUTHOR |
|---|
| 84 | |
|---|
| 85 | Dan Kogai, C<< <dankogai at dan.co.jp> >> |
|---|
| 86 | |
|---|
| 87 | =head1 BUGS |
|---|
| 88 | |
|---|
| 89 | Please report any bugs or feature requests to C<bug-xml-fromhtml at rt.cpan.org>, or through |
|---|
| 90 | the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=XML-FromHTML>. I will be notified, and then you'll |
|---|
| 91 | automatically be notified of progress on your bug as I make changes. |
|---|
| 92 | |
|---|
| 93 | =head1 SUPPORT |
|---|
| 94 | |
|---|
| 95 | You can find documentation for this module with the perldoc command. |
|---|
| 96 | |
|---|
| 97 | perldoc XML::FromHTML |
|---|
| 98 | |
|---|
| 99 | You can also look for information at: |
|---|
| 100 | |
|---|
| 101 | =over 4 |
|---|
| 102 | |
|---|
| 103 | =item * RT: CPAN's request tracker |
|---|
| 104 | |
|---|
| 105 | L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-FromHTML> |
|---|
| 106 | |
|---|
| 107 | =item * AnnoCPAN: Annotated CPAN documentation |
|---|
| 108 | |
|---|
| 109 | L<http://annocpan.org/dist/XML-FromHTML> |
|---|
| 110 | |
|---|
| 111 | =item * CPAN Ratings |
|---|
| 112 | |
|---|
| 113 | L<http://cpanratings.perl.org/d/XML-FromHTML> |
|---|
| 114 | |
|---|
| 115 | =item * Search CPAN |
|---|
| 116 | |
|---|
| 117 | L<http://search.cpan.org/dist/XML-FromHTML/> |
|---|
| 118 | |
|---|
| 119 | =back |
|---|
| 120 | |
|---|
| 121 | =head1 ACKNOWLEDGEMENTS |
|---|
| 122 | |
|---|
| 123 | L<HTML::Tidy>, L<XML::Liberal> |
|---|
| 124 | |
|---|
| 125 | =head1 COPYRIGHT & LICENSE |
|---|
| 126 | |
|---|
| 127 | Copyright 2009 Dan Kogai, all rights reserved. |
|---|
| 128 | |
|---|
| 129 | This program is free software; you can redistribute it and/or modify it |
|---|
| 130 | under the same terms as Perl itself. |
|---|
| 131 | |
|---|
| 132 | |
|---|
| 133 | =cut |
|---|