root/websites/AjaxTB/tb/cgi-lib/XML/TreePP.pm @ 4124

Revision 4124, 25.9 kB (checked in by kawa0117, 5 years ago)

ajaxtb-20060919.zip

Line 
1=head1 NAME
2
3XML::TreePP -- Pure Perl implementation for parsing/writing xml files
4
5=head1 SYNOPSIS
6
7parse xml file into hash tree
8
9    use XML::TreePP;
10    my $tpp = XML::TreePP->new();
11    my $tree = $tpp->parsefile( "index.rdf" );
12    print "Title: ", $tree->{"rdf:RDF"}->{item}->[0]->{title}, "\n";
13    print "URL:   ", $tree->{"rdf:RDF"}->{item}->[0]->{link}, "\n";
14
15write xml as string from hash tree
16
17    use XML::TreePP;
18    my $tpp = XML::TreePP->new();
19    my $tree = { rss => { channel => { item => [ {
20        title   => "The Perl Directory",
21        link    => "http://www.perl.org/",
22    }, {
23        title   => "The Comprehensive Perl Archive Network",
24        link    => "http://cpan.perl.org/",
25    } ] } } };
26    my $xml = $tpp->write( $tree );
27    print $xml;
28
29get remote xml file with HTTP-GET and parse it into hash tree
30
31    use XML::TreePP;
32    my $tpp = XML::TreePP->new();
33    my $tree = $tpp->parsehttp( GET => "http://use.perl.org/index.rss" );
34    print "Title: ", $tree->{"rdf:RDF"}->{channel}->{title}, "\n";
35    print "URL:   ", $tree->{"rdf:RDF"}->{channel}->{link}, "\n";
36
37get remote xml file with HTTP-POST and parse it into hash tree
38
39    use XML::TreePP;
40    my $tpp = XML::TreePP->new( force_array => [qw( item )] );
41    my $cgiurl = "http://search.hatena.ne.jp/keyword";
42    my $keyword = "ajax";
43    my $cgiquery = "mode=rss2&word=".$keyword;
44    my $tree = $tpp->parsehttp( POST => $cgiurl, $cgiquery );
45    print "Link: ", $tree->{rss}->{channel}->{item}->[0]->{link}, "\n";
46    print "Desc: ", $tree->{rss}->{channel}->{item}->[0]->{description}, "\n";
47
48=head1 DESCRIPTION
49
50XML::TreePP module parses XML file and expands it for a hash tree.
51And also generate XML file from a hash tree.
52This is a pure Perl implementation.
53You can also download XML from remote web server
54like XMLHttpRequest object at JavaScript language.
55
56=head1 EXAMPLES
57
58=head2 Parse XML file
59
60Sample XML source:
61
62    <?xml version="1.0" encoding="UTF-8"?>
63    <family name="Kawasaki">
64        <father>Yasuhisa</father>
65        <mother>Chizuko</mother>
66        <children>
67            <girl>Shiori</girl>
68            <boy>Yusuke</boy>
69            <boy>Kairi</boy>
70        </children>
71    </family>
72
73Sample program to read a xml file and dump it:
74
75    use XML::TreePP;
76    use Data::Dumper;
77    my $tpp = XML::TreePP->new();
78    my $tree = $tpp->parsefile( "family.xml" );
79    my $text = Dumper( $tree );
80    print $text;
81
82Result dumped:
83
84    $VAR1 = {
85        'family' => {
86            '-name' => 'Kawasaki',
87            'father' => 'Yasuhisa',
88            'mother' => 'Chizuko',
89            'children' => {
90                'girl' => 'Shiori'
91                'boy' => [
92                    'Yusuke',
93                    'Kairi'
94                ],
95            }
96        }
97    };
98
99Details:
100
101    print $tree->{family}->{father};        # the father's given name.
102
103The prefix '-' is added on every attributes' name.
104
105    print $tree->{family}->{"-name"};       # the family name of the family
106
107The array is used because the family has two boys.
108
109    print $tree->{family}->{children}->{boy}->[1];  # The second boy's name
110    print $tree->{family}->{children}->{girl};      # The girl's name
111
112=head2 Text node and attributes:
113
114If a element has both of a text node and attributes
115or both of a text node and other child nodes,
116value of a text node is moved to '#text' like child nodes.
117
118    use XML::TreePP;
119    use Data::Dumper;
120    my $tpp = XML::TreePP->new();
121    my $source = '<span class="author">Kawasaki Yusuke</span>';
122    my $tree = $tpp->parse( $source );
123    my $text = Dumper( $tree );
124    print $text;
125
126The result dumped is following:
127
128    $VAR1 = {
129        'span' => {
130            '-class' => 'author',
131            '#text' => 'Kawasaki Yusuke'
132        }
133    };
134
135The special node name of '#text' is used because this elements
136has attribute(s) in addition to the text node.
137
138=head1 CONSTRUCTOR AND OPTIONS
139
140=head2 $tpp = XML::TreePP->new();
141
142This constructor method returns a new XML::TreePP object.
143
144=head2 $tpp = XML::TreePP->new( %options );
145
146Its first argument is a hash variable to set one or more options
147like following:
148
149=head2 $tpp->set( option_name => $option_value );
150
151This method sets a option value for "option_name".
152If $option_value is not defined, its option is deleted.
153Options below are available:
154
155=head2 $tpp->set( output_encoding => 'UTF-8' );
156
157You can define a encoding of xml file generated by write/writefile
158methods. On Perl 5.8.x and later, you can select it from every
159encodings supported by Encode.pm. On Perl 5.6.x or before with
160Jcode.pm, you can use 'Shift_JIS', 'EUC-JP', 'ISO-2022-JP' and
161'UTF-8'. The default value is 'UTF-8'.
162
163=head2 $tpp->set( force_array => [ 'rdf:li', 'item', '-xmlns' ] );
164
165This option allows you to specify a list of element names which
166should always be forced into an array representation
167The default value is null, it means that context of the elements
168will determine to make array or to keep it scalar.
169
170=head2 $tpp->set( first_out => [ 'link', 'title', '-type' ] );
171
172This option allows you to specify a list of element/attribute
173names which should always appears at first on output XML code.
174The default value is null, it means alphabetical order is used.
175
176=head2 $tpp->set( last_out => [ 'items', 'item', 'entry' ] );
177
178This option allows you to specify a list of element/attribute
179names which should always appears at last on output XML code.
180
181=head2 $tpp->set( cdata_scalar_ref => 1 );
182
183This option allows you to convert a cdata section into a reference
184for scalar on parsing XML source. If this option is false, per
185default, cdata section is converted into a scalar.
186
187=head2 $tpp->set( user_agent => 'Mozilla/4.0 (compatible; ...)' );
188
189This option allows you to specify a HTTP_USER_AGENT string which
190is used by parsehttp() method.
191The default string is "XML-TreePP/#.##", where "#.##" is
192substituted with the version number of this library.
193
194=head2 $tpp->set( attr_prefix => '@' );
195
196This option allows you to specify a prefix character(s) which
197is inserted before each attribute names.
198The default character is '-'.
199Or set '@' to access attribute values like E4X, ECMAScript for XML.
200
201=head2 $tpp->set( ignore_error => 1 );
202
203This module calls Carp::croak function on an error per default.
204This option makes all errors ignored and just return.
205
206=head2 $tpp->set( xml_decl => '' );
207
208This module generates an XML declaration on writing an XML code per default.
209This option forces to change or leave it.
210
211=head2 $tpp->get( "option_name" );
212
213This method returns a current option value for "option_name".
214
215=head1 METHODS
216
217=head2 $tree = $tpp->parse( $source );
218
219This method reads XML source and returns a hash tree converted.
220The first argument is a scalar or a reference to a scalar.
221
222=head2 $tree = $tpp->parsefile( $file );
223
224This method reads a XML file and returns a hash tree converted.
225The first argument is a filename.
226
227=head2 $tree = $tpp->parsehttp( $method, $url, $body, $head );
228
229This method receives a XML file from a remote server via HTTP and
230returns a hash tree converted.
231$method is a method of HTTP connection: GET/POST/PUT/DELETE
232$url is an URI of a XML file.
233$body is a request body when you use POST method.
234$head is a request headers as a hash ref.
235LWP::UserAgent module or HTTP::Lite module is required to fetch a file.
236
237=head2 $source = $tpp->write( $tree, $encode );
238
239This method parses a hash tree and returns a XML source generated.
240$tree is a referecen to a hash tree.
241
242=head2 $tpp->writefile( $file, $tree, $encode );
243
244This method parses a hash tree and writes a XML source into a file.
245$file is a filename to create.
246$tree is a referecen to a hash tree.
247
248=head1 AUTHOR
249
250Yusuke Kawasaki, http://www.kawa.net/
251
252=head1 COPYRIGHT AND LICENSE
253
254Copyright (c) 2006 Yusuke Kawasaki.  All rights reserved.  This program
255is free software; you can redistribute it and/or modify it under the same
256terms as Perl itself.
257
258=cut
259
260package XML::TreePP;
261use strict;
262use Carp;
263use Symbol;
264
265use vars qw( $VERSION );
266$VERSION = '0.18';
267
268my $XML_ENCODING      = 'UTF-8';
269my $INTERNAL_ENCODING = 'UTF-8';
270my $USER_AGENT        = 'XML-TreePP/'.$VERSION.' ';
271my $ATTR_PREFIX       = '-';
272
273sub new {
274    my $package = shift;
275    my $self    = {@_};
276    bless $self, $package;
277    $self;
278}
279
280sub die {
281    my $self = shift;
282    my $mess = shift;
283    return if $self->{ignore_error};
284    Carp::croak $mess;
285}
286
287sub warn {
288    my $self = shift;
289    my $mess = shift;
290    return if $self->{ignore_error};
291    Carp::carp $mess;
292}
293
294sub set {
295    my $self = shift;
296    my $key  = shift;
297    my $val  = shift;
298    if ( defined $val ) {
299        $self->{$key} = $val;
300    }
301    else {
302        delete $self->{$key};
303    }
304}
305
306sub get {
307    my $self = shift;
308    my $key  = shift;
309    $self->{$key} if exists $self->{$key};
310}
311
312sub writefile {
313    my $self   = shift;
314    my $file   = shift;
315    my $tree   = shift or return $self->die( 'Invalid tree' );
316    my $encode = shift;
317    return $self->die( 'Invalid filename' ) unless defined $file;
318    my $text = $self->write( $tree, $encode );
319    $self->write_raw_xml( $file, $text );
320}
321
322sub write {
323    my $self = shift;
324    my $tree = shift or return $self->die( 'Invalid tree' );
325    my $from = $self->{internal_encoding} || $INTERNAL_ENCODING;
326    my $to   = shift || $self->{output_encoding} || $XML_ENCODING;
327    my $decl = $self->{xml_decl};
328    $decl = '<?xml version="1.0" encoding="' . $to . '" ?>' unless defined $decl;
329    if ( exists $self->{first_out} ) {
330        my $keys = $self->{first_out};
331        $keys = [$keys] unless ref $keys;
332        $self->{__first_out} = { map { $_ => 1 } @$keys };
333    }
334    if ( exists $self->{last_out} ) {
335        my $keys = $self->{last_out};
336        $keys = [$keys] unless ref $keys;
337        $self->{__last_out} = { map { $_ => 1 } @$keys };
338    }
339    my $text = $self->hash_to_xml( undef, $tree );
340    if ( $from && $to ) {
341        my $stat = $self->encode_from_to( \$text, $from, $to );
342        return $self->die( "Unsupported encoding: $to" ) unless $stat;
343    }
344    return $text if ( $decl eq '' );
345    join( "\n", $decl, $text );
346}
347
348sub parsehttp {
349    my $self = shift;
350
351    if ( exists $self->{user_agent} ) {
352        my $agent = $self->{user_agent};
353        $agent .= $USER_AGENT if ( $agent =~ /\s$/s );
354        $self->{__user_agent} = $agent if ( $agent ne '' );
355    } else {
356        $self->{__user_agent} = $USER_AGENT;
357    }
358
359    my $http = $self->{__http_module};
360    unless ( $http ) {
361        $http = &find_http_module();
362        $self->{__http_module} = $http;
363    }
364
365    if ( $http eq 'LWP::UserAgent' ) {
366        return $self->parsehttp_lwp(@_);
367    }
368    elsif ( $http eq 'HTTP::Lite' ) {
369        return $self->parsehttp_lite(@_);
370    }
371    else {
372        return $self->die( "LWP::UserAgent or HTTP::Lite is required: $_[1]" );
373    }
374}
375
376sub find_http_module {
377    return 'LWP::UserAgent' if defined $LWP::UserAgent::VERSION;
378    return 'HTTP::Lite'     if defined $HTTP::Lite::VERSION;
379
380    local $@;
381    eval { require LWP::UserAgent; };
382    return 'LWP::UserAgent' if defined $LWP::UserAgent::VERSION;
383
384    eval { require HTTP::Lite; };
385    return 'HTTP::Lite'     if defined $HTTP::Lite::VERSION;
386    return;
387}
388
389sub parsehttp_lwp {
390    my $self   = shift;
391    my $method = shift or return $self->die( 'Invalid HTTP method' );
392    my $url    = shift or return $self->die( 'Invalid URL' );
393    my $body   = shift;
394    my $header = shift;
395
396    my $ua = LWP::UserAgent->new();
397    $ua->timeout(10);
398    $ua->env_proxy();
399
400    $ua->agent( $self->{__user_agent} ) if defined $self->{__user_agent};
401    my $req = HTTP::Request->new( $method, $url );
402    my $ct = 0;
403    if ( ref $header ) {
404        foreach my $field ( sort keys %$header ) {
405            my $value = $header->{$field};
406            $req->header( $field => $value );
407            $ct ++ if ( $field =~ /^Content-Type$/i );
408        }
409    }
410    if ( defined $body && ! $ct ) {
411        $req->header( 'Content-Type' => 'application/x-www-form-urlencoded' );
412    }
413    $req->content($body) if defined $body;
414    my $res = $ua->request($req);
415    return unless $res->is_success();
416    my $text = $res->content();
417    $self->parse( \$text );
418}
419
420sub parsehttp_lite {
421    my $self   = shift;
422    my $method = shift or return $self->die( 'Invalid HTTP method' );
423    my $url    = shift or return $self->die( 'Invalid URL' );
424    my $body   = shift;
425    my $header = shift;
426
427    my $http = HTTP::Lite->new();
428    $http->method($method);
429    my $ua = 0;
430    if ( ref $header ) {
431        foreach my $field ( sort keys %$header ) {
432            my $value = $header->{$field};
433            $http->add_req_header( $field, $value );
434            $ua ++ if ( $field =~ /^User-Agent$/i );
435        }
436    }
437    if ( defined $self->{__user_agent} && ! $ua ) {
438        $http->add_req_header( 'User-Agent', $self->{__user_agent} );
439    }
440    $http->{content} = $body if defined $body;
441    $http->request($url) or return;
442    my $text = $http->body();
443    $self->parse( \$text );
444}
445
446sub parsefile {
447    my $self = shift;
448    my $file = shift;
449    return $self->die( 'Invalid filename' ) unless defined $file;
450    my $text = $self->read_raw_xml($file);
451    $self->parse( \$text );
452}
453
454sub parse {
455    my $self = shift;
456    my $textref = ref $_[0] ? $_[0] : \$_[0];
457    return $self->die( 'Invalid XML source' ) if ( ref($textref) ne 'SCALAR' );
458    return $self->die( 'Null XML source' ) unless defined $$textref;
459
460    my $to = $self->{internal_encoding} || $INTERNAL_ENCODING;
461    if ($to) {
462        my $from = &xml_decl_encoding($textref);
463        if ($from) {
464            my $stat = $self->encode_from_to( $textref, $from, $to );
465            return $self->die( "Unsupported encoding: $from" ) unless $stat;
466        }
467    }
468    if ( exists $self->{force_array} ) {
469        my $force = $self->{force_array};
470        $force = [$force] unless ref $force;
471        $self->{__force_array} = { map { $_ => 1 } @$force };
472    }
473    my $flat = $self->xml_to_flat($textref);
474    my $tree = $self->flat_to_tree( $flat, '' );
475    wantarray ? ( $tree, $$textref ) : $tree;
476}
477
478sub xml_to_flat {
479    my $self    = shift;
480    my $textref = shift;    # reference
481    my $flat    = [];
482    my $prefix = $self->{attr_prefix} || $ATTR_PREFIX;
483    while ( $$textref =~ m{
484        ([^<]*) <
485        ((
486            \? ([^<>]*) \?
487        )|(
488            \!\[CDATA\[(.*?)\]\]
489        )|(
490            \!DOCTYPE\s+([^\[\]<>]*(?:\[.*?\]\s*)?)
491        )|(
492            \!--(.*?)--
493        )|(
494            ([^\!\?\s<>](?:"[^"]*"|'[^']*'|[^"'<>])*)
495        ))
496        > ([^<]*)
497    }sxg ) {
498        my (
499            $ahead,     $match,    $typePI,   $contPI,   $typeCDATA,
500            $contCDATA, $typeDocT, $contDocT, $typeCmnt, $contCmnt,
501            $typeElem,  $contElem, $follow
502          )
503          = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 );
504        if ( defined $ahead && $ahead =~ /\S/ ) {
505            $self->warn( "Invalid string: [$ahead] before <$match>" );
506        }
507
508        if ($typeElem) {                        # Element
509            my $node = {};
510            if ( $contElem =~ s#^/## ) {
511                $node->{endTag}++;
512            }
513            elsif ( $contElem =~ s#/$## ) {
514                # one line
515            }
516            else {
517                $node->{startTag}++;
518            }
519            $node->{tagName} = $1 if ( $contElem =~ s#^(\S+)\s*## );
520            unless ( $node->{endTag} ) {
521                my $attr = {};
522                while (
523                    $contElem =~ m/([^\s\=\"\']+)=(?:(")(.*?)"|'(.*?)')/sg )
524                {
525                    my $key = $1;
526                    my $val = &xml_unescape( $2 ? $3 : $4 );
527                    $attr->{$prefix.$key} = $val;
528                }
529                $node->{attributes} = $attr if scalar keys %$attr;
530            }
531            push( @$flat, $node );
532        }
533        elsif ($typeCDATA) {    ## CDATASection
534            if ( exists $self->{cdata_scalar_ref} && $self->{cdata_scalar_ref} ) {
535                push( @$flat, \$contCDATA );    # as reference for scalar
536            }
537            else {
538                push( @$flat, $contCDATA );     # as scalar like text node
539            }
540        }
541        elsif ($typeCmnt) {                     # Comment (ignore)
542        }
543        elsif ($typeDocT) {                     # DocumentType (ignore)
544        }
545        elsif ($typePI) {                       # ProcessingInstruction (ignore)
546        }
547        else {
548            $self->warn( "Invalid Tag: <$match>" );
549        }
550        if ( $follow =~ /\S/ ) {                # text node
551            my $val = &xml_unescape($follow);
552            push( @$flat, $val );
553        }
554    }
555    $flat;
556}
557
558sub flat_to_tree {
559    my $self   = shift;
560    my $source = shift;
561    my $parent = shift;
562    my $tree   = {};
563    my $text   = [];
564
565    while ( scalar @$source ) {
566        my $node = shift @$source;
567        if ( !ref $node || UNIVERSAL::isa( $node, "SCALAR" ) ) {
568            push( @$text, $node );              # cdata or text node
569            next;
570        }
571        my $name = $node->{tagName};
572        if ( $node->{endTag} ) {
573            last if ( $parent eq $name );
574            return $self->die( "Invalid tag sequence: <$parent></$name>" );
575        }
576        my $elem = $node->{attributes};
577        if ( $node->{startTag} ) {              # recursive call
578            my $child = $self->flat_to_tree( $source, $name );
579            if ( ref $elem && scalar keys %$elem ) {
580                if ( UNIVERSAL::isa( $child, "HASH" ) ) {
581                    # some attributes and some child nodes
582                    foreach my $key ( keys %$child ) {
583                        $elem->{$key} = $child->{$key};
584                    }
585                }
586                elsif ( defined $child ) {
587                    # some attributes and text node
588                    $elem->{'#text'} = $child;
589                }
590            }
591            else {
592                # no attributes and text node or nothing
593                $elem = $child;
594            }
595        }
596        # next unless defined $elem;
597        $tree->{$name} ||= [];
598        push( @{ $tree->{$name} }, $elem );
599    }
600    foreach my $key ( keys %$tree ) {
601        next if $self->{__force_array}->{$key};
602        next if ( 1 < scalar @{ $tree->{$key} } );
603        $tree->{$key} = shift @{ $tree->{$key} };
604    }
605    if ( scalar @$text ) {
606        if ( scalar @$text == 1 ) {
607            $text = shift @$text;
608        }
609        elsif ( ! scalar grep {ref $_} @$text ) {
610            $text = join( '', @$text );
611        }
612        else {
613            my $join = join( '', map {ref $_ ? $$_ : $_} @$text );
614            $text = \$join;
615        }
616        if ( scalar keys %$tree ) {
617            # some child nodes and also text node
618            $tree->{'#text'} = $text;
619        }
620        else {
621            # only text node without child nodes
622            $tree = $text;
623        }
624    }
625    $tree;
626}
627
628sub hash_to_xml {
629    my $self      = shift;
630    my $name      = shift;
631    my $hash      = shift;
632    my $out       = [];
633    my $attr      = [];
634    my $allkeys   = [ sort keys %$hash ];
635    my $firstkeys = [ grep { $self->{__first_out}->{$_} } @$allkeys ] if ref $self->{__first_out};
636    my $lastkeys = [ grep { $self->{__last_out}->{$_} } @$allkeys ] if ref $self->{__last_out};
637    $allkeys = [ grep { !$self->{__first_out}->{$_} } @$allkeys ] if ref $self->{__first_out};
638    $allkeys = [ grep { !$self->{__last_out}->{$_} } @$allkeys ] if ref $self->{__last_out};
639    my $prefix = $self->{attr_prefix} || $ATTR_PREFIX;
640    $prefix = "\Q$prefix\E";
641
642    foreach my $loopkey ( $firstkeys, $allkeys, $lastkeys ) {
643        next unless ref $loopkey;
644        foreach my $key ( grep { !/^$prefix/ } @$loopkey ) {
645            my $val = $hash->{$key};
646            if ( !defined $val ) {
647                push( @$out, "<$key />" );
648            }
649            elsif ( UNIVERSAL::isa( $val, 'ARRAY' ) ) {
650                my $child = $self->array_to_xml( $key, $val );
651                push( @$out, $child );
652            }
653            elsif ( UNIVERSAL::isa( $val, 'SCALAR' ) ) {
654                my $child = $self->scalaref_to_cdata( $key, $val );
655                push( @$out, $child );
656            }
657            elsif ( ref $val ) {
658                my $child = $self->hash_to_xml( $key, $val );
659                push( @$out, $child );
660            }
661            else {
662                my $child = $self->scalar_to_xml( $key, $val );
663                push( @$out, $child );
664            }
665        }
666        foreach my $key ( grep { /^$prefix/ } @$loopkey ) {
667            my $name = ( $key =~ /^$prefix(.*)$/s )[0];
668            my $val = &xml_escape( $hash->{$key} );
669            push( @$attr, ' ' . $name . '="' . $val . '"' );
670        }
671    }
672    my $jattr = join( '', @$attr );
673
674    # s/^(\s*<)/  $1/mg foreach @$out;              # indent
675    my $text = join( '', @$out );
676    if ( defined $name ) {
677        if ( scalar @$out ) {
678            $text = "<$name$jattr>$text</$name>\n";
679        }
680        else {
681            $text = "<$name$jattr />\n";
682        }
683    }
684    $text;
685}
686
687sub array_to_xml {
688    my $self  = shift;
689    my $name  = shift;
690    my $array = shift;
691    my $out   = [];
692    foreach my $val (@$array) {
693        if ( !defined $val ) {
694            push( @$out, "<$name />\n" );
695        }
696        elsif ( UNIVERSAL::isa( $val, 'ARRAY' ) ) {
697            my $child = $self->array_to_xml( $name, $val );
698            push( @$out, $child );
699        }
700        elsif ( UNIVERSAL::isa( $val, 'SCALAR' ) ) {
701            my $child = $self->scalaref_to_cdata( $name, $val );
702            push( @$out, $child );
703        }
704        elsif ( ref $val ) {
705            my $child = $self->hash_to_xml( $name, $val );
706            push( @$out, $child );
707        }
708        else {
709            my $child = $self->scalar_to_xml( $name, $val );
710            push( @$out, $child );
711        }
712    }
713
714    # s/^(\s*<)/  $1/mg foreach @$out;              # indent
715    my $text = join( '', @$out );
716    $text;
717}
718
719sub scalaref_to_cdata {
720    my $self = shift;
721    my $name = shift;
722    my $ref  = shift;
723    my $text = '<![CDATA[' . $$ref . ']]>';
724    $text = "<$name>$text</$name>\n" if ( $name ne '#text' );
725    $text;
726}
727
728sub scalar_to_xml {
729    my $self   = shift;
730    my $name   = shift;
731    my $scalar = shift;
732    my $copy   = $scalar;
733    my $text   = &xml_escape($copy);
734    $text = "<$name>$text</$name>\n" if ( $name ne '#text' );
735    $text;
736}
737
738sub write_raw_xml {
739    my $self = shift;
740    my $file = shift;
741    my $fh   = Symbol::gensym();
742    open( $fh, ">$file" ) or return $self->die( "$! - $file" );
743    print $fh @_;
744    close($fh);
745}
746
747sub read_raw_xml {
748    my $self = shift;
749    my $file = shift;
750    my $fh   = Symbol::gensym();
751    open( $fh, $file ) or return $self->die( "$! - $file" );
752    local $/ = undef;
753    my $text = <$fh>;
754    close($fh);
755    $text;
756}
757
758sub xml_decl_encoding {
759    my $textref = shift;
760    return unless defined $$textref;
761    my $args    = ( $$textref =~ /^\s*<\?xml(\s+\S.*)\?>/s )[0] or return;
762    my $getcode = ( $args =~ /\s+encoding="(.*?)"/ )[0] or return;
763    $getcode;
764}
765
766sub encode_from_to {
767    my $self   = shift;
768    my $txtref = shift or return;
769    my $from   = shift or return;
770    my $to     = shift or return;
771    return $to if ( uc($from) eq uc($to) );
772    &load_encode() if ( $] > 5.008 );
773
774    unless ( defined $Encode::EUCJPMS::VERSION ) {
775        $from = 'EUC-JP' if ( $from =~ /\beuc-?jp-?(win|ms)$/i );
776        $to   = 'EUC-JP' if ( $to   =~ /\beuc-?jp-?(win|ms)$/i );
777    }
778
779    if ( defined $Encode::VERSION ) {
780        my $check = ( $Encode::VERSION < 2.13 ) ? 0x400 : Encode::FB_XMLCREF();
781        Encode::from_to( $$txtref, $from, $to, $check );
782    }
783    elsif ( (  uc($from) eq 'ISO-8859-1'
784            || uc($from) eq 'US-ASCII'
785            || uc($from) eq 'LATIN-1' ) && uc($to) eq 'UTF-8' ) {
786        &latin1_to_utf8($txtref);
787    }
788    else {
789        my $jfrom = &get_jcode_name($from);
790        my $jto   = &get_jcode_name($to);
791        return $to if ( uc($jfrom) eq uc($jto) );
792        if ( $jfrom && $jto ) {
793            &load_jcode();
794            if ( defined $Jcode::VERSION ) {
795                Jcode::convert( $txtref, $jto, $jfrom );
796            }
797            else {
798                return $self->die( "Jcode.pm is required: $from to $to" );
799            }
800        }
801        else {
802            return $self->die( "Encode.pm is required: $from to $to" );
803        }
804    }
805    $to;
806}
807
808sub load_jcode {
809    return if defined $Jcode::VERSION;
810    local $@;
811    eval { require Jcode; };
812}
813
814sub load_encode {
815    return if defined $Encode::VERSION;
816    local $@;
817    eval { require Encode; };
818}
819
820sub latin1_to_utf8 {
821    my $strref = shift;
822    $$strref =~ s{
823        ([\x80-\xFF])
824    }{
825        pack( 'C2' => 0xC0|(ord($1)>>6),0x80|(ord($1)&0x3F) )
826    }exg;
827}
828
829sub get_jcode_name {
830    my $src = shift;
831    my $dst;
832    if ( $src =~ /^utf-?8$/i ) {
833        $dst = 'utf8';
834    }
835    elsif ( $src =~ /^euc.*jp(-?(win|ms))?$/i ) {
836        $dst = 'euc';
837    }
838    elsif ( $src =~ /^(shift.*jis|cp932|windows-31j)$/i ) {
839        $dst = 'sjis';
840    }
841    elsif ( $src =~ /^iso-2022-jp/ ) {
842        $dst = 'jis';
843    }
844    $dst;
845}
846
847sub xml_escape {
848    my $str = shift;
849    # except for TAB(\x09),CR(\x0D),LF(\x0A)
850    $str =~ s{
851        ([\x00-\x08\x0B\x0C\x0E-\x1F\x7F])
852    }{
853        sprintf( '&#%d;', ord($1) );
854    }gex;
855    $str =~ s/&(?!#(\d+;|x[\dA-Fa-f]+;))/&amp;/g;
856    $str =~ s/</&lt;/g;
857    $str =~ s/>/&gt;/g;
858    $str =~ s/'/&apos;/g;
859    $str =~ s/"/&quot;/g;
860    $str;
861}
862
863sub xml_unescape {
864    my $str = shift;
865    my $map = {qw( quot " lt < gt > apos ' amp & )};
866    $str =~ s{
867        (&(?:\#(\d+)|\#x([0-9a-fA-F]+)|(quot|lt|gt|apos|amp));)
868    }{
869        $4 ? $map->{$4} : &char_deref($1,$2,$3);
870    }gex;
871    $str;
872}
873
874sub char_deref {
875    my( $str, $dec, $hex ) = @_;
876    if ( defined $dec ) {
877        return &code_to_utf8( $dec ) if ( $dec < 256 );
878    }
879    elsif ( defined $hex ) {
880        my $num = hex($hex);
881        return &code_to_utf8( $num ) if ( $num < 256 );
882    }
883    return $str;
884}
885
886sub code_to_utf8 {
887    my $code = shift;
888    if ( $code < 128 ) {
889        return pack( C => $code );
890    }
891    elsif ( $code < 256 ) {
892        return pack( C2 => 0xC0|($code>>6), 0x80|($code&0x3F));
893    }
894    elsif ( $code < 65536 ) {
895        return pack( C3 => 0xC0|($code>>12), 0x80|(($code>>6)&0x3F), 0x80|($code&0x3F));
896    }
897    return shift if scalar @_;      # default value
898    sprintf( '&#x%04X;', $code );
899}
900
9011;
Note: See TracBrowser for help on using the browser.