Changeset 30111
- Timestamp:
- 02/16/09 10:25:01 (12 months ago)
- Location:
- lang/perl/Encode/trunk
- Files:
-
- 3 modified
-
Unicode/Unicode.pm (modified) (4 diffs)
-
encoding.pm (modified) (8 diffs)
-
lib/Encode/MIME/Header.pm (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
-
lang/perl/Encode/trunk/Unicode/Unicode.pm
r30107 r30111 70 70 =head1 SYNOPSIS 71 71 72 use Encode qw/encode decode/; 72 use Encode qw/encode decode/; 73 73 $ucs2 = encode("UCS-2BE", $utf8); 74 74 $utf8 = decode("UCS-2BE", $ucs2); … … 231 231 232 232 Note this move has made \x{D800}-\x{DFFF} into a forbidden zone but 233 perl does not prohibit the use of characters within this range. To perl, 233 perl does not prohibit the use of characters within this range. To perl, 234 234 every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I<a character>. 235 235 … … 242 242 Unicode encodings simply croaks. 243 243 244 % perl -MEncode -e '$_ = "\xfe\xff\xd8\xd9\xda\xdb\0\n"' \245 -e 'Encode::from_to($_, "utf16","shift_jis", 0); print'244 % perl -MEncode -e '$_ = "\xfe\xff\xd8\xd9\xda\xdb\0\n"' \ 245 -e 'Encode::from_to($_, "utf16","shift_jis", 0); print' 246 246 UTF-16:Malformed LO surrogate d8d9 at /path/to/Encode.pm line 184. 247 % perl -MEncode -e '$a = "BOM missing"' \248 -e ' Encode::from_to($a, "utf16", "shift_jis", 0); print'247 % perl -MEncode -e '$a = "BOM missing"' \ 248 -e ' Encode::from_to($a, "utf16", "shift_jis", 0); print' 249 249 UTF-16:Unrecognised BOM 424f at /path/to/Encode.pm line 184. 250 250 … … 265 265 266 266 Ch. 15, pp. 403 of C<Programming Perl (3rd Edition)> 267 by Larry Wall, Tom Christiansen, Jon Orwant; 267 by Larry Wall, Tom Christiansen, Jon Orwant; 268 268 O'Reilly & Associates; ISBN 0-596-00027-8 269 269 -
lang/perl/Encode/trunk/encoding.pm
r30107 r30111 207 207 # or you can even do this if your shell supports your native encoding 208 208 209 perl -Mencoding=latin2 -e '...' # Feeling centrally European?210 perl -Mencoding=euc-kr -e '...' # Or Korean?209 perl -Mencoding=latin2 -e '...' # Feeling centrally European? 210 perl -Mencoding=euc-kr -e '...' # Or Korean? 211 211 212 212 # more control … … 332 332 or later fixes this problem. 333 333 334 =item tr// 334 =item tr// 335 335 336 336 C<tr//> was overlooked by Perl 5 porters when they released perl 5.8.0 … … 339 339 =item DATA pseudo-filehandle 340 340 341 Another feature that was overlooked was C<DATA>. 341 Another feature that was overlooked was C<DATA>. 342 342 343 343 =back … … 349 349 =item use encoding [I<ENCNAME>] ; 350 350 351 Sets the script encoding to I<ENCNAME>. And unless ${^UNICODE} 351 Sets the script encoding to I<ENCNAME>. And unless ${^UNICODE} 352 352 exists and non-zero, PerlIO layers of STDIN and STDOUT are set to 353 353 ":encoding(I<ENCNAME>)". … … 427 427 428 428 The pragma is a per script, not a per block lexical. Only the last 429 C<use encoding> or C<no encoding> matters, and it affects 430 B<the whole script>. However, the <no encoding> pragma is supported and 431 B<use encoding> can appear as many times as you want in a given script. 429 C<use encoding> or C<no encoding> matters, and it affects 430 B<the whole script>. However, the <no encoding> pragma is supported and 431 B<use encoding> can appear as many times as you want in a given script. 432 432 The multiple use of this pragma is discouraged. 433 433 434 434 By the same reason, the use this pragma inside modules is also 435 discouraged (though not as strongly discouraged as the case above. 435 discouraged (though not as strongly discouraged as the case above. 436 436 See below). 437 437 … … 602 602 $camel = "*non-ascii*"; 603 603 binmode(STDOUT=>':encoding(utf8)'); # bang! 604 write; # funny 604 write; # funny 605 605 print $camel, "\n"; # fine 606 606 … … 635 635 If 1. didn't work but we are under the locale pragma, the environment 636 636 variables LC_ALL and LANG (in that order) are matched for encodings 637 (the part after C<.>, if any), and if any found, that is used 637 (the part after C<.>, if any), and if any found, that is used 638 638 as the default encoding for the open pragma. 639 639 … … 654 654 =head1 HISTORY 655 655 656 This pragma first appeared in Perl 5.8.0. For features that require 656 This pragma first appeared in Perl 5.8.0. For features that require 657 657 5.8.1 and better, see above. 658 658 -
lang/perl/Encode/trunk/lib/Encode/MIME/Header.pm
r30107 r30111 45 45 46 46 # multi-line header to single line 47 $str =~ s/(?:\r\n|[\r\n])[ \t]+//gos;47 $str =~ s/(?:\r|\n|\r\n)[ \t]+//gos; 48 48 49 49 1 while ( $str =~ 50 s/(=\?[-0-9A-Za-z_]+\?[Qq]\?)(.*?)\?=\1(.*?\?=)/$1$2$3/ )50 s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/ ) 51 51 ; # Concat consecutive QP encoded mime headers 52 52 # Fixes breaking inside multi-byte characters … … 96 96 my $especials = 97 97 join( '|' => map { quotemeta( chr($_) ) } 98 unpack( "C*", qq{()<>@,;: "'/[]?.=} ) );98 unpack( "C*", qq{()<>@,;:\"\'/[]?.=} ) ); 99 99 100 100 my $re_encoded_word = qr{ 101 =\? # begin encoded word 102 (?:[-0-9A-Za-z_]+) # charset (encoding) 103 (?:\*[A-Za-z]{1,8}+(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) 104 \?(?:[QqBb])\? # delimiter 105 (?:.*?) # Base64-encodede contents 106 \?= # end encoded word 107 }xo; 101 (?: 102 =\? # begin encoded word 103 (?:[0-9A-Za-z\-_]+) # charset (encoding) 104 (?:\*\w+(?:-\w+)*)? # language (RFC 2231) 105 \?(?:[QqBb])\? # delimiter 106 (?:.*?) # Base64-encodede contents 107 \?= # end encoded word 108 ) 109 }xo; 108 110 109 111 my $re_especials = qr{$re_encoded_word|$especials}xo; … … 112 114 my ( $obj, $str, $chk ) = @_; 113 115 my @line = (); 114 for my $line ( split /\r \n|[\r\n]/o, $str ) {116 for my $line ( split /\r|\n|\r\n/o, $str ) { 115 117 my ( @word, @subline ); 116 118 for my $word ( split /($re_especials)/o, $line ) { … … 176 178 $chunk = encode_utf8($chunk); 177 179 $chunk =~ s{ 178 [^0-9A-Za-z]179 }{180 join("" => map {sprintf "=%02X", $_} unpack("C*", $&))181 }egox;180 ([^0-9A-Za-z]) 181 }{ 182 join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) 183 }egox; 182 184 return HEAD . 'Q?' . $chunk . TAIL; 183 185 } … … 192 194 =head1 SYNOPSIS 193 195 194 use Encode qw/encode decode/; 196 use Encode qw/encode decode/; 195 197 $utf8 = decode('MIME-Header', $header); 196 198 $header = encode('MIME-Header', $utf8); … … 237 239 238 240 RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other 239 locations. 241 locations. 240 242 241 243 =cut
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)