Changeset 29767
- Timestamp:
- 02/09/09 18:50:51 (12 months ago)
- Location:
- lang/perl/Encode/trunk
- Files:
-
- 3 modified
-
Unicode/Unicode.pm (modified) (4 diffs)
-
encoding.pm (modified) (8 diffs)
-
lib/Encode/MIME/Header.pm (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
-
lang/perl/Encode/trunk/Unicode/Unicode.pm
r1747 r29767 70 70 =head1 SYNOPSIS 71 71 72 use Encode qw/encode decode/; 72 use Encode qw/encode decode/; 73 73 $ucs2 = encode("UCS-2BE", $utf8); 74 74 $utf8 = decode("UCS-2BE", $ucs2); … … 231 231 232 232 Note this move has made \x{D800}-\x{DFFF} into a forbidden zone but 233 perl does not prohibit the use of characters within this range. To perl, 233 perl does not prohibit the use of characters within this range. To perl, 234 234 every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I<a character>. 235 235 … … 242 242 Unicode encodings simply croaks. 243 243 244 % perl -MEncode -e '$_ = "\xfe\xff\xd8\xd9\xda\xdb\0\n"' \245 -e 'Encode::from_to($_, "utf16","shift_jis", 0); print'244 % perl -MEncode -e'$_ = "\xfe\xff\xd8\xd9\xda\xdb\0\n"' \ 245 -e'Encode::from_to($_, "utf16","shift_jis", 0); print' 246 246 UTF-16:Malformed LO surrogate d8d9 at /path/to/Encode.pm line 184. 247 % perl -MEncode -e '$a = "BOM missing"' \248 -e ' Encode::from_to($a, "utf16", "shift_jis", 0); print'247 % perl -MEncode -e'$a = "BOM missing"' \ 248 -e' Encode::from_to($a, "utf16", "shift_jis", 0); print' 249 249 UTF-16:Unrecognised BOM 424f at /path/to/Encode.pm line 184. 250 250 … … 265 265 266 266 Ch. 15, pp. 403 of C<Programming Perl (3rd Edition)> 267 by Larry Wall, Tom Christiansen, Jon Orwant; 267 by Larry Wall, Tom Christiansen, Jon Orwant; 268 268 O'Reilly & Associates; ISBN 0-596-00027-8 269 269 -
lang/perl/Encode/trunk/encoding.pm
r7855 r29767 207 207 # or you can even do this if your shell supports your native encoding 208 208 209 perl -Mencoding=latin2 -e '...' # Feeling centrally European?210 perl -Mencoding=euc-kr -e '...' # Or Korean?209 perl -Mencoding=latin2 -e'...' # Feeling centrally European? 210 perl -Mencoding=euc-kr -e'...' # Or Korean? 211 211 212 212 # more control … … 332 332 or later fixes this problem. 333 333 334 =item tr// 334 =item tr// 335 335 336 336 C<tr//> was overlooked by Perl 5 porters when they released perl 5.8.0 … … 339 339 =item DATA pseudo-filehandle 340 340 341 Another feature that was overlooked was C<DATA>. 341 Another feature that was overlooked was C<DATA>. 342 342 343 343 =back … … 349 349 =item use encoding [I<ENCNAME>] ; 350 350 351 Sets the script encoding to I<ENCNAME>. And unless ${^UNICODE} 351 Sets the script encoding to I<ENCNAME>. And unless ${^UNICODE} 352 352 exists and non-zero, PerlIO layers of STDIN and STDOUT are set to 353 353 ":encoding(I<ENCNAME>)". … … 427 427 428 428 The pragma is a per script, not a per block lexical. Only the last 429 C<use encoding> or C<no encoding> matters, and it affects 430 B<the whole script>. However, the <no encoding> pragma is supported and 431 B<use encoding> can appear as many times as you want in a given script. 429 C<use encoding> or C<no encoding> matters, and it affects 430 B<the whole script>. However, the <no encoding> pragma is supported and 431 B<use encoding> can appear as many times as you want in a given script. 432 432 The multiple use of this pragma is discouraged. 433 433 434 434 By the same reason, the use this pragma inside modules is also 435 discouraged (though not as strongly discouraged as the case above. 435 discouraged (though not as strongly discouraged as the case above. 436 436 See below). 437 437 … … 602 602 $camel = "*non-ascii*"; 603 603 binmode(STDOUT=>':encoding(utf8)'); # bang! 604 write; # funny 604 write; # funny 605 605 print $camel, "\n"; # fine 606 606 … … 635 635 If 1. didn't work but we are under the locale pragma, the environment 636 636 variables LC_ALL and LANG (in that order) are matched for encodings 637 (the part after C<.>, if any), and if any found, that is used 637 (the part after C<.>, if any), and if any found, that is used 638 638 as the default encoding for the open pragma. 639 639 … … 654 654 =head1 HISTORY 655 655 656 This pragma first appeared in Perl 5.8.0. For features that require 656 This pragma first appeared in Perl 5.8.0. For features that require 657 657 5.8.1 and better, see above. 658 658 -
lang/perl/Encode/trunk/lib/Encode/MIME/Header.pm
r29396 r29767 45 45 46 46 # multi-line header to single line 47 $str =~ s/(?:\r |\n|\r\n)[ \t]+//gos;47 $str =~ s/(?:\r\n|[\r\n)[ \t]+//gos; 48 48 49 49 1 while ( $str =~ 50 s/( \=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/ )50 s/(=\?[-0-9A-Za-z_]+\?[Qq]\?)(.*?)\?=\1(.*?\?=)/$1$2$3/ ) 51 51 ; # Concat consecutive QP encoded mime headers 52 52 # Fixes breaking inside multi-byte characters 53 53 54 54 $str =~ s{ 55 =\? # begin encoded word56 ([ 0-9A-Za-z\-_]+) # charset (encoding)57 (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)55 =\? # begin encoded word 56 ([-0-9A-Za-z_]+) # charset (encoding) 57 (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) 58 58 \?([QqBb])\? # delimiter 59 59 (.*?) # Base64-encodede contents 60 \?= # end encoded word 61 }{62 if (uc($2) eq 'B'){60 \?= # end encoded word 61 }{ 62 if (uc($2) eq 'B'){ 63 63 $obj->{decode_b} or croak qq(MIME "B" unsupported); 64 64 decode_b($1, $3); 65 } elsif(uc($2) eq 'Q'){65 } elsif (uc($2) eq 'Q'){ 66 66 $obj->{decode_q} or croak qq(MIME "Q" unsupported); 67 67 decode_q($1, $3); 68 } else{68 } else { 69 69 croak qq(MIME "$2" encoding is nonexistent!); 70 70 } 71 }egox;71 }egox; 72 72 $_[1] = '' if $chk; 73 73 return $str; … … 95 95 my $especials = 96 96 join( '|' => map { quotemeta( chr($_) ) } 97 unpack( "C*", qq{()<>@,;: \"\'/[]?.=} ) );97 unpack( "C*", qq{()<>@,;:"'/[]?.=} ) ); 98 98 99 99 my $re_encoded_word = qr{ 100 (?: 101 =\? # begin encoded word 102 (?:[0-9A-Za-z\-_]+) # charset (encoding) 103 (?:\*\w+(?:-\w+)*)? # language (RFC 2231) 104 \?(?:[QqBb])\? # delimiter 105 (?:.*?) # Base64-encodede contents 106 \?= # end encoded word 107 ) 108 }xo; 100 =\? # begin encoded word 101 (?:[-0-9A-Za-z_]+) # charset (encoding) 102 (?:\*[A-Za-z]{1,8}+(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) 103 \?(?:[QqBb])\? # delimiter 104 (?:.*?) # Base64-encodede contents 105 \?= # end encoded word 106 }xo; 109 107 110 108 my $re_especials = qr{$re_encoded_word|$especials}xo; … … 113 111 my ( $obj, $str, $chk ) = @_; 114 112 my @line = (); 115 for my $line ( split /\r |\n|\r\n/o, $str ) {113 for my $line ( split /\r\n|[\r\n]/o, $str ) { 116 114 my ( @word, @subline ); 117 115 for my $word ( split /($re_especials)/o, $line ) { … … 177 175 $chunk = encode_utf8($chunk); 178 176 $chunk =~ s{ 179 ([^0-9A-Za-z])180 }{181 join("" => map {sprintf "=%02X", $_} unpack("C*", $1))182 }egox;177 [^0-9A-Za-z] 178 }{ 179 join("" => map {sprintf "=%02X", $_} unpack("C*", $&)) 180 }egox; 183 181 return HEAD . 'Q?' . $chunk . TAIL; 184 182 } … … 193 191 =head1 SYNOPSIS 194 192 195 use Encode qw/encode decode/; 193 use Encode qw/encode decode/; 196 194 $utf8 = decode('MIME-Header', $header); 197 195 $header = encode('MIME-Header', $utf8); … … 238 236 239 237 RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other 240 locations. 238 locations. 241 239 242 240 =cut
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)