root/lang/perl/Encode/trunk/lib/Encode/MIME/Header.pm @ 34412

Revision 34412, 6.7 kB (checked in by dankogai, 5 years ago)

VERSION 2.35

Line 
1package Encode::MIME::Header;
2use strict;
3use warnings;
4no warnings 'redefine';
5
6our $VERSION = do { my @r = ( q$Revision: 2.11 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
7use Encode qw(find_encoding encode_utf8 decode_utf8);
8use MIME::Base64;
9use Carp;
10
11my %seed = (
12    decode_b => '1',    # decodes 'B' encoding ?
13    decode_q => '1',    # decodes 'Q' encoding ?
14    encode   => 'B',    # encode with 'B' or 'Q' ?
15    bpl      => 75,     # bytes per line
16);
17
18$Encode::Encoding{'MIME-Header'} =
19  bless { %seed, Name => 'MIME-Header', } => __PACKAGE__;
20
21$Encode::Encoding{'MIME-B'} = bless {
22    %seed,
23    decode_q => 0,
24    Name     => 'MIME-B',
25} => __PACKAGE__;
26
27$Encode::Encoding{'MIME-Q'} = bless {
28    %seed,
29    decode_q => 1,
30    encode   => 'Q',
31    Name     => 'MIME-Q',
32} => __PACKAGE__;
33
34use base qw(Encode::Encoding);
35
36sub needs_lines { 1 }
37sub perlio_ok   { 0 }
38
39sub decode($$;$) {
40    use utf8;
41    my ( $obj, $str, $chk ) = @_;
42
43    # zap spaces between encoded words
44    $str =~ s/\?=\s+=\?/\?==\?/gos;
45
46    # multi-line header to single line
47    $str =~ s/(?:\r\n|[\r\n])[ \t]//gos;
48
49    1 while ( $str =~
50        s/(=\?[-0-9A-Za-z_]+\?[Qq]\?)(.*?)\?=\1(.*?\?=)/$1$2$3/ )
51      ;    # Concat consecutive QP encoded mime headers
52           # Fixes breaking inside multi-byte characters
53
54    $str =~ s{
55        =\?              # begin encoded word
56        ([-0-9A-Za-z_]+) # charset (encoding)
57        (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
58        \?([QqBb])\?     # delimiter
59        (.*?)            # Base64-encodede contents
60        \?=              # end encoded word
61    }{
62        if      (uc($2) eq 'B'){
63            $obj->{decode_b} or croak qq(MIME "B" unsupported);
64            decode_b($1, $3, $chk);
65        } elsif (uc($2) eq 'Q'){
66            $obj->{decode_q} or croak qq(MIME "Q" unsupported);
67            decode_q($1, $3, $chk);
68        } else {
69            croak qq(MIME "$2" encoding is nonexistent!);
70        }
71    }egox;
72    $_[1] = $str if $chk;
73    return $str;
74}
75
76sub decode_b {
77    my $enc  = shift;
78    my $d    = find_encoding($enc) or croak qq(Unknown encoding "$enc");
79    my $db64 = decode_base64(shift);
80    my $chk  = shift;
81    return $d->name eq 'utf8'
82      ? Encode::decode_utf8($db64)
83      : $d->decode( $db64, $chk || Encode::FB_PERLQQ );
84}
85
86sub decode_q {
87    my ( $enc, $q, $chk ) = @_;
88    my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
89    $q =~ s/_/ /go;
90    $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
91    return $d->name eq 'utf8'
92      ? Encode::decode_utf8($q)
93      : $d->decode( $q, $chk || Encode::FB_PERLQQ );
94}
95
96my $especials =
97  join( '|' => map { quotemeta( chr($_) ) }
98      unpack( "C*", qq{()<>@,;:"'/[]?.=} ) );
99
100my $re_encoded_word = qr{
101    =\?                # begin encoded word
102    (?:[-0-9A-Za-z_]+) # charset (encoding)
103    (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
104    \?(?:[QqBb])\?     # delimiter
105    (?:.*?)            # Base64-encodede contents
106    \?=                # end encoded word
107}xo;
108
109my $re_especials = qr{$re_encoded_word|$especials}xo;
110
111sub encode($$;$) {
112    my ( $obj, $str, $chk ) = @_;
113    my @line = ();
114    for my $line ( split /\r\n|[\r\n]/o, $str ) {
115        my ( @word, @subline );
116        for my $word ( split /($re_especials)/o, $line ) {
117            if (   $word =~ /[^\x00-\x7f]/o
118                or $word =~ /^$re_encoded_word$/o )
119            {
120                push @word, $obj->_encode($word);
121            }
122            else {
123                push @word, $word;
124            }
125        }
126        my $subline = '';
127        for my $word (@word) {
128            use bytes ();
129            if ( bytes::length($subline) + bytes::length($word) >
130                $obj->{bpl} )
131            {
132                push @subline, $subline;
133                $subline = '';
134            }
135            $subline .= $word;
136        }
137        $subline and push @subline, $subline;
138        push @line, join( "\n " => @subline );
139    }
140    $_[1] = '' if $chk;
141    return join( "\n", @line );
142}
143
144use constant HEAD   => '=?UTF-8?';
145use constant TAIL   => '?=';
146use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
147
148sub _encode {
149    my ( $o, $str ) = @_;
150    my $enc  = $o->{encode};
151    my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );
152
153    # to coerce a floating-point arithmetics, the following contains
154    # .0 in numbers -- dankogai
155    $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0;
156    my @result = ();
157    my $chunk  = '';
158    while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
159        use bytes ();
160        if ( bytes::length($chunk) + bytes::length($chr) > $llen ) {
161            push @result, SINGLE->{$enc}($chunk);
162            $chunk = '';
163        }
164        $chunk .= $chr;
165    }
166    length($chunk) and push @result, SINGLE->{$enc}($chunk);
167    return @result;
168}
169
170sub _encode_b {
171    HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
172}
173
174sub _encode_q {
175    my $chunk = shift;
176    $chunk = encode_utf8($chunk);
177    $chunk =~ s{
178           ([^0-9A-Za-z])
179       }{
180            join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
181       }egox;
182    return HEAD . 'Q?' . $chunk . TAIL;
183}
184
1851;
186__END__
187
188=head1 NAME
189
190Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
191
192=head1 SYNOPSIS
193
194    use Encode qw/encode decode/;
195    $utf8   = decode('MIME-Header', $header);
196    $header = encode('MIME-Header', $utf8);
197
198=head1 ABSTRACT
199
200This module implements RFC 2047 Mime Header Encoding.  There are 3
201variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>.  The
202difference is described below
203
204              decode()          encode()
205  ----------------------------------------------
206  MIME-Header Both B and Q      =?UTF-8?B?....?=
207  MIME-B      B only; Q croaks  =?UTF-8?B?....?=
208  MIME-Q      Q only; B croaks  =?UTF-8?Q?....?=
209
210=head1 DESCRIPTION
211
212When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
213is extracted and decoded for I<X> encoding (B for Base64, Q for
214Quoted-Printable). Then the decoded chunk is fed to
215decode(I<encoding>).  So long as I<encoding> is supported by Encode,
216any source encoding is fine.
217
218When you encode, it just encodes UTF-8 string with I<X> encoding then
219quoted with =?UTF-8?I<X>?....?= .  The parts that RFC 2047 forbids to
220encode are left as is and long lines are folded within 76 bytes per
221line.
222
223=head1 BUGS
224
225It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
226and =?ISO-8859-1?= but that makes the implementation too complicated.
227These days major mail agents all support =?UTF-8? so I think it is
228just good enough.
229
230Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
231Makamaka.  Thre are still too many MUAs especially cellular phone
232handsets which does not grok UTF-8.
233
234=head1 SEE ALSO
235
236L<Encode>
237
238RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
239locations.
240
241=cut
Note: See TracBrowser for help on using the browser.