| 1 | # ------------------------------------------------------------------------ |
|---|
| 2 | #  EmailParser.pm --- �ޥ��ѡ���б��᡼���߼��⥸�塼��  Copyright 1997-2005 Kawasaki Yusuke <u-suke@kawa.net> |
|---|
| 3 | # ------------------------------------------------------------------------ |
|---|
| 4 | Â Â package EmailParser; |
|---|
| 5 | Â Â use strict; |
|---|
| 6 | Â Â use vars qw( $DEBUG $VERSION ); |
|---|
| 7 | Â Â $VERSION = "0.01"; |
|---|
| 8 | Â Â $DEBUG = \*STDERR; |
|---|
| 9 | # ------------------------------------------------------------------------ |
|---|
| 10 | Â Â use MIME::Base64; |
|---|
| 11 | Â Â use Encode; |
|---|
| 12 | Â Â use DateTime::Format::Mail; |
|---|
| 13 | Â Â use IO::File; |
|---|
| 14 | Â Â use IO::Scalar; |
|---|
| 15 | Â Â use IO::ScalarArray; |
|---|
| 16 | # ------------------------------------------------------------------------ |
|---|
| 17 | =head1 NAME |
|---|
| 18 | |
|---|
| 19 | Â Â EmailParser.pm ---- Multipart email parser |
|---|
| 20 | |
|---|
| 21 | =head1 USAGE |
|---|
| 22 | |
|---|
| 23 | Â Â use EmailParser; |
|---|
| 24 | |
|---|
| 25 | Â Â $ep = EmailParser->new( $file ); |
|---|
| 26 | Â Â # $ep = EmailParser->new()->fromHandle( $handle ); |
|---|
| 27 | Â Â # $ep = EmailParser->new()->fromFile( $file ); |
|---|
| 28 | Â Â # $ep = EmailParser->new()->fromArray( $array ); |
|---|
| 29 | Â Â # $ep = EmailParser->new()->fromString( $scalar ); |
|---|
| 30 | |
|---|
| 31 | Â Â $ep->setInternalTimeZone( "Japan" ); |
|---|
| 32 | Â Â $ep->setInternalCharset( "utf8" ); |
|---|
| 33 | Â Â ( $part0, $part1, $part2, ... ) = $ep->parse(); |
|---|
| 34 | |
|---|
| 35 | Â Â $string = $part0->toString(); |
|---|
| 36 |   $array = $part1->toArray(); |
|---|
| 37 |   $file  = $part2->toFile( $file ); |
|---|
| 38 | Â Â $handle = $part3->toHandle( $handle ); |
|---|
| 39 | |
|---|
| 40 | =head1 PARSED PROPATIES |
|---|
| 41 | |
|---|
| 42 |   �ڶ��������  $part->{format} mulitipart/�� multipart content |
|---|
| 43 |           text/plain   text content |
|---|
| 44 |           text/html    HTML content |
|---|
| 45 |           image/��    image data content |
|---|
| 46 | Â Â Â Â Â Â Â Â Â Â message/rfc822Â RFC822 content |
|---|
| 47 | Â Â $part->{encoding}Â Â base64Â Â Â Base64 |
|---|
| 48 |             quoted   quoted-printable |
|---|
| 49 | Â Â $part->{offset}Â Â Â Â Â Â Â Â Â offset to this part |
|---|
| 50 | Â Â $part->{boundary}Â Â Â Â Â Â Â Â boundary string of the part |
|---|
| 51 | Â Â $part->{head_text}Â Â Â Â Â Â Â raw header text |
|---|
| 52 | Â Â $part->{raw}->{FROM}->[0]Â Â Â Â Â Â From: line |
|---|
| 53 | Â Â $part->{raw}->{SUBJECT}->[0]Â Â Â Â Subject: line |
|---|
| 54 | Â Â $part->{raw}->{CONTENT_TYPE}->[0]Â Â Content-Type: line |
|---|
| 55 | |
|---|
| 56 |   �ڥ᡼������Τߤ�����  $part->{addr_from}->[0]     ���пͥ᡼�륢�ɥ쥹 |
|---|
| 57 |   $part->{addr_to}->[0]��    ����᡼�륢�ɥ쥹To: |
|---|
| 58 |   $part->{addr_cc}->[0]��    ����᡼�륢�ɥ쥹Cc: |
|---|
| 59 |   $part->{addr_bcc}->[0]��    ����᡼�륢�ɥ쥹Bcc: |
|---|
| 60 |   $part->{subject}        �᡼��̾ |
|---|
| 61 |   $part->{epoch}         ������(�) |
|---|
| 62 |   $part->{datetime}        ������(YYYY/MM/DD HH:MM:SS) |
|---|
| 63 |   $part->{priority}        ͥ��������  $part->{ml_name}        ML̾�� |
|---|
| 64 |   �ڥޥ��ѡ��ȥ������Τߤ�����  $part->{child_boundary}     �ҥѡ��ȤΥХ������  �ڥƥ����ȥ������Τߤ�����  $part->{charset}        ����������� |
|---|
| 65 | =head1 HISTORY |
|---|
| 66 | |
|---|
| 67 | Â Â 1997/ |
|---|
| 68 | Â Â 2005/04/21 EmailParser.pm |
|---|
| 69 |   2005/05/08 EmailParser::Part ���饹��� |
|---|
| 70 | |
|---|
| 71 | =head1 COPYRIGHT |
|---|
| 72 | |
|---|
| 73 | Â Â Kawasaki Yusuke <u-suke@kawa.net> |
|---|
| 74 | Â Â http://www.kawa.net/ |
|---|
| 75 | |
|---|
| 76 | =cut |
|---|
| 77 | # ------------------------------------------------------------------------ |
|---|
| 78 |   my $INTERNAL_TIMEZONE = "Japan";  # �ǥե����Υ����ॾ���� my $INTERNAL_CHARSET = "utf8";   # �����ʸ����# ------------------------------------------------------------------------ |
|---|
| 79 | #  ���󥹥ȥ饯�� |
|---|
| 80 | # ------------------------------------------------------------------------ |
|---|
| 81 | sub new { |
|---|
| 82 | Â Â my $pkg = shift; |
|---|
| 83 | Â Â my $file = shift; |
|---|
| 84 | Â Â ## $DEBUG and print $DEBUG "[EmailParser::new] $pkg $file\n"; |
|---|
| 85 | Â Â my $emp = {}; |
|---|
| 86 | Â Â bless $emp, $pkg; |
|---|
| 87 | Â Â $emp->fromFile( $file ) if $file; |
|---|
| 88 | Â Â $emp; |
|---|
| 89 | } |
|---|
| 90 | # ------------------------------------------------------------------------ |
|---|
| 91 | #  �ǥ��󥹥ȥ饯�� |
|---|
| 92 | # ------------------------------------------------------------------------ |
|---|
| 93 | sub DESTROY { |
|---|
| 94 | Â Â my $emp = shift or return; |
|---|
| 95 | Â Â ## $DEBUG and print $DEBUG "[DESTROY] $emp\n"; |
|---|
| 96 | Â Â if ( $emp->{ihandle_opened} ) { |
|---|
| 97 |     # ï¿½ï¿½ï¿½Ï¥ï¿½ë¤¬ï¿½ï¿½ï¿½ï¿½ï¿½×¥ï¿½ß¤Î¾ï¿½ï¿½Ï¥ï¿½ï¿½í¡¼ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½Â Â Â Â eval { |
|---|
| 98 | Â Â Â Â Â Â $emp->{ihandle}->close(); |
|---|
| 99 | Â Â Â Â }; |
|---|
| 100 | Â Â } |
|---|
| 101 | Â Â $emp; |
|---|
| 102 | } |
|---|
| 103 | # ------------------------------------------------------------------------ |
|---|
| 104 | #  �������ॾ�����ꤹ���᡼�������Υ����ॾ����Ϥʤ���# ------------------------------------------------------------------------ |
|---|
| 105 | sub setInternalTimeZone { |
|---|
| 106 | Â Â my $emp = shift; |
|---|
| 107 | Â Â my $timezone = shift; |
|---|
| 108 |   # DateTime::TimeZone ����ȳ���Ǥ��뤫�⡩ |
|---|
| 109 | Â Â $emp->{timezone} = $timezone; |
|---|
| 110 | } |
|---|
| 111 | sub getInternalTimeZone { |
|---|
| 112 | Â Â defined $_[0]->{timezone} ? $_[0]->{timezone} : $INTERNAL_TIMEZONE; |
|---|
| 113 | } |
|---|
| 114 | # ------------------------------------------------------------------------ |
|---|
| 115 | #  ��ʸ���Ȥ�ꤹ���᡼��ʸ���ȤǤϤʤ���# ------------------------------------------------------------------------ |
|---|
| 116 | sub setInternalCharset { |
|---|
| 117 | Â Â my $emp = shift; |
|---|
| 118 | Â Â my $charset = shift; |
|---|
| 119 | Â Â EmailParser::Util::valid_charset( $charset ) or die "Invalid charset: $charset\n"; |
|---|
| 120 | Â Â $emp->{charset} = $charset; |
|---|
| 121 | } |
|---|
| 122 | sub getInternalCharset { |
|---|
| 123 | Â Â defined $_[0]->{charset} ? $_[0]->{charset} : $INTERNAL_CHARSET; |
|---|
| 124 | } |
|---|
| 125 | # ------------------------------------------------------------------------ |
|---|
| 126 | #  ��߹��ߥϥ���� |
|---|
| 127 | # ------------------------------------------------------------------------ |
|---|
| 128 | sub fromHandle { |
|---|
| 129 | Â Â my $emp = shift; |
|---|
| 130 | Â Â my $ifh = shift; |
|---|
| 131 | Â Â ## $DEBUG and print $DEBUG "[fromHandle] ",ref($ifh),"\n"; |
|---|
| 132 |   $emp->{ihandle} = $ifh;       # ��߹��ߥϥ���Ͽ |
|---|
| 133 |   $emp->{begin} = $emp->tell();    # ���ϰ�֤�� |
|---|
| 134 | Â Â $ifh; |
|---|
| 135 | } |
|---|
| 136 | # ------------------------------------------------------------------------ |
|---|
| 137 | sub fromFile { |
|---|
| 138 | Â Â my $emp = shift; |
|---|
| 139 | Â Â my $file = shift; |
|---|
| 140 | Â Â ## $DEBUG and print $DEBUG "[fromFile] $file\n"; |
|---|
| 141 | Â Â my $ifh = new IO::File( $file, "r" ) or die "$! - $file\n"; |
|---|
| 142 |   $emp->{ihandle_opened} ++;     # OPEN�ե饰��  $emp->fromHandle( $ifh ); |
|---|
| 143 | } |
|---|
| 144 | # ------------------------------------------------------------------------ |
|---|
| 145 | sub fromArray { |
|---|
| 146 | Â Â my $emp = shift; |
|---|
| 147 | Â Â my $array = shift; |
|---|
| 148 | Â Â ## $DEBUG and print $DEBUG "[fromArray] $array\n"; |
|---|
| 149 | Â Â my $ifh = new IO::ScalarArray( $array ) or die "IO::ScalarArray failed.\n"; |
|---|
| 150 |   $emp->{ihandle_opened} ++;     # OPEN�ե饰��  $emp->fromHandle( $ifh ); |
|---|
| 151 | } |
|---|
| 152 | # ------------------------------------------------------------------------ |
|---|
| 153 | sub fromString { |
|---|
| 154 | Â Â my $emp = shift; |
|---|
| 155 | Â Â my $string = shift; |
|---|
| 156 | Â Â ## $DEBUG and print $DEBUG "[fromString] \$string\n"; |
|---|
| 157 | Â Â my $ifh = new IO::Scalar( \$string ) or die "IO::Scalar failed.\n"; |
|---|
| 158 |   $emp->{ihandle_opened} ++;     # OPEN�ե饰��  $emp->fromHandle( $ifh ); |
|---|
| 159 | } |
|---|
| 160 | # ------------------------------------------------------------------------ |
|---|
| 161 | #  ����ɤ߹��� ------------------------------------------------------------------------ |
|---|
| 162 | sub getline { |
|---|
| 163 | Â Â $_[0]->{ihandle}->getline() if $_[0]->{ihandle}; |
|---|
| 164 | } |
|---|
| 165 | sub seek { |
|---|
| 166 | Â Â my $emp = shift; |
|---|
| 167 | Â Â my $pos = shift; |
|---|
| 168 | Â Â ## $DEBUG and print $DEBUG "[seek] $pos\n"; |
|---|
| 169 | Â Â $emp->{ihandle}->seek($pos,0) if $emp->{ihandle}; |
|---|
| 170 | } |
|---|
| 171 | sub tell { |
|---|
| 172 | Â Â my $emp = shift; |
|---|
| 173 | Â Â $emp->{ihandle}->tell() if $emp->{ihandle}; |
|---|
| 174 | } |
|---|
| 175 | # ------------------------------------------------------------------------ |
|---|
| 176 | #  �Ƭ�˴������ |
|---|
| 177 | # ------------------------------------------------------------------------ |
|---|
| 178 | sub rewind { |
|---|
| 179 | Â Â my $emp = shift; |
|---|
| 180 | Â Â my $begin = $emp->{begin}; |
|---|
| 181 | Â Â ## $DEBUG and print $DEBUG "[rewind] $begin\n"; |
|---|
| 182 | Â Â $emp->seek( $begin ); |
|---|
| 183 | } |
|---|
| 184 | # ------------------------------------------------------------------------ |
|---|
| 185 | #  �᡼���(�ޤ��ϥޥ��ѡ�����������ơ��ƥѡ��Ȥ�� |
|---|
| 186 | # ------------------------------------------------------------------------ |
|---|
| 187 | sub parse { |
|---|
| 188 | Â Â my $emp = shift; |
|---|
| 189 |   my $selfbnd = shift;          # ���ѡ��ȤΥХ����  my $epart = EmailParser::Part->new( $emp, $selfbnd ); |
|---|
| 190 |   my $childbnd = $epart->{child_boundary}; # �ҥѡ��ȤΥХ����  my $is_multi = ( $childbnd && $epart->{format} =~ m#^multipart/# ); |
|---|
| 191 | Â Â my @parts = (); |
|---|
| 192 | Â Â my $findchr = $epart->{charset}; |
|---|
| 193 | |
|---|
| 194 |   # ���Ȥ��ޥ��ѡ��Ȥξ��ϡ��ҥѡ��Ȥ�Ϥ���  if ( $is_multi ) { |
|---|
| 195 |     # �ǽ��ҥѡ��Ȥ��Ϥޤ��Ǥ���� |
|---|
| 196 | Â Â Â Â while( 1 ) { |
|---|
| 197 | Â Â Â Â Â Â my $line = $emp->getline(); |
|---|
| 198 | Â Â Â Â Â Â last unless defined $line; |
|---|
| 199 | Â Â Â Â Â Â $line =~ s/[\r\n]*$/\n/s; |
|---|
| 200 | Â Â Â Â Â Â last if ( $line =~ /^--\Q$childbnd\E[\r\n]*$/ ); |
|---|
| 201 | Â Â Â Â } |
|---|
| 202 |     # �ƻҥѡ��Ȥ򷫤����ʺƵ��ƤӽФ���    my $partcnt; |
|---|
| 203 | Â Â Â Â while ( 1 ) { |
|---|
| 204 | Â Â Â Â Â Â $partcnt ++; |
|---|
| 205 | Â Â Â Â Â Â ## $DEBUG and print $DEBUG "[parse] multipart #$partcnt\n"; |
|---|
| 206 | Â Â Â Â Â Â my @childlen = $emp->parse( $childbnd ); |
|---|
| 207 |       my $partcnt = scalar @childlen or last; # �ޥ��ѡ��Ƚ�λ |
|---|
| 208 |       push( @parts, @childlen );       # ����¤٤Ƥ��� |
|---|
| 209 | Â Â Â Â Â Â my $lastchild = $childlen[$#childlen] or last; |
|---|
| 210 |       last unless $lastchild->{has_next};   # ³���Υѡ��Ȥ�����    } |
|---|
| 211 | Â Â Â Â ## $DEBUG and print $DEBUG "[parse] multipart finish\n"; |
|---|
| 212 | |
|---|
| 213 |     # �Ƥ�����Ȥ���������Ҥ�����Ȥ�Ƚ������� |
|---|
| 214 |     # �Ҥ�����Ȥκǽ�ʸ���Ȥ򡢿Ƥ�����ȤȤߤʤ� |
|---|
| 215 | Â Â Â Â if ( ! $findchr && scalar @parts ) { |
|---|
| 216 | Â Â Â Â Â Â $findchr = ( map {$_->{charset}} |
|---|
| 217 | Â Â Â Â Â Â Â Â Â Â Â Â Â grep {defined $_->{charset}} @parts )[0]; |
|---|
| 218 | Â Â Â Â Â Â ## $DEBUG and print $DEBUG "[parse] child charset=$findchr\n"; |
|---|
| 219 | Â Â Â Â } |
|---|
| 220 | Â Â } |
|---|
| 221 | |
|---|
| 222 |   # �ޥ��ѡ��ȤǤʤ���拾���ξ��ϡ��ʸ�����ɤ��� |
|---|
| 223 |   # ������ʤ��顢�⤷����ʤ�����������⤹��  if ( ! $is_multi ) { |
|---|
| 224 | Â Â Â Â my $notascii = ( $findchr ne "" );Â Â Â Â Â Â Â Â Â Â Â Â Â # |
|---|
| 225 | Â Â Â Â my $is_text = ( $epart->{format} =~ m#^text/# );Â Â Â Â Â Â # |
|---|
| 226 | Â Â Â Â my $enc_qp = ( $epart->{encoding} eq "quoted-printable" ); |
|---|
| 227 | Â Â Â Â my $enc_b64 = ( $epart->{encoding} eq "base64" ); |
|---|
| 228 | Â Â Â Â my $prevtag; |
|---|
| 229 | |
|---|
| 230 | Â Â Â Â while( 1 ) { |
|---|
| 231 | Â Â Â Â Â Â my $line = $emp->getline(); |
|---|
| 232 | Â Â Â Â Â Â last unless defined $line; |
|---|
| 233 | Â Â Â Â Â Â if ( $selfbnd && $line =~ /^--\Q$selfbnd\E(--)?[\r\n]*$/ ) { |
|---|
| 234 |         $epart->{has_next} ++ unless $1;  # ³���Υѡ��Ȥ�����        last; |
|---|
| 235 | Â Â Â Â Â Â } |
|---|
| 236 | Â Â Â Â Â Â next if ( $line =~ /^$/ ); |
|---|
| 237 | |
|---|
| 238 | Â Â Â Â Â Â if ( ! $findchr && $is_text ) { |
|---|
| 239 | Â Â Â Â Â Â Â Â if ( $enc_qp ) { |
|---|
| 240 | Â Â Â Â Â Â Â Â Â Â $line =~ s/=([a-z0-9]{2})/pack(C=>hex($1))/gei; |
|---|
| 241 | Â Â Â Â Â Â Â Â } elsif ( $enc_b64 ) { |
|---|
| 242 | Â Â Â Â Â Â Â Â Â Â $line =~ s/\s+$//s; |
|---|
| 243 |           next unless ( length($line) % 4 == 0 );   # 4��ܿ�ʤ���̵��          $line = MIME::Base64::decode_base64($line); |
|---|
| 244 | Â Â Â Â Â Â Â Â } |
|---|
| 245 | Â Â Â Â Â Â Â Â $line = $prevtag.$line if $prevtag; |
|---|
| 246 | Â Â Â Â Â Â Â Â if ( $line =~ m#(?:^|\W)charset=["']?([\w\-\.]+)#i ) { |
|---|
| 247 | Â Â Â Â Â Â Â Â Â Â ## $DEBUG and print $DEBUG "[parse] html charset=$1\n"; |
|---|
| 248 | Â Â Â Â Â Â Â Â Â Â $findchr = EmailParser::Util::valid_charset( $1 ); |
|---|
| 249 | Â Â Â Â Â Â Â Â } elsif ( $line =~ m#\e\$B# ) { |
|---|
| 250 |           $findchr = "ISO-2022-JP";          # ���������ץ�������        } elsif ( $line =~ m#https?://[\w\%\-\.]+\.(\w{2})/#i ) { |
|---|
| 251 | Â Â Â Â Â Â Â Â Â Â my $cntry = $1; |
|---|
| 252 | Â Â Â Â Â Â Â Â Â Â ## $DEBUG and print $DEBUG "[parse] country=$cntry\n"; |
|---|
| 253 | Â Â Â Â Â Â Â Â Â Â $findchr = $EmailParser::COUNTRY2CHARSET->{$cntry}; |
|---|
| 254 | Â Â Â Â Â Â Â Â } |
|---|
| 255 | Â Â Â Â Â Â Â Â $notascii ++ if ( ! $notascii && $line =~ /[^\000-\177]/ ); |
|---|
| 256 |         $prevtag = ( $line =~ /(<[^<>]*)$/s )[0];    # �Ǹ����� |
|---|
| 257 | Â Â Â Â Â Â } |
|---|
| 258 | Â Â Â Â } |
|---|
| 259 | |
|---|
| 260 |     # charset ���ǡ����ġ�� ASCII ʸ�����������ʤ��� us-ascii |
|---|
| 261 |     if ( ! $findchr && $is_text && ! $notascii ) { |
|---|
| 262 | Â Â Â Â Â Â $findchr = "us-ascii"; |
|---|
| 263 | Â Â Â Â Â Â ## $DEBUG and print $DEBUG "[parse] findchr=$findchr\n"; |
|---|
| 264 | Â Â Â Â } |
|---|
| 265 | Â Â } |
|---|
| 266 | |
|---|
| 267 |   # �ʸ�������ɤ����ޤ����ʤ顢MIME�إå��� |
|---|
| 268 | Â Â if ( ! $findchr ) { |
|---|
| 269 | Â Â Â Â if ( $epart->{head_text} =~ /(\=\?([\w\-]+)\?[BQ]\?)([^\s\?]+)\?\=/i ) { |
|---|
| 270 | Â Â Â Â Â Â $findchr = EmailParser::Util::valid_charset( $2 ); |
|---|
| 271 | Â Â Â Â } elsif ( $epart->{head_text} =~ /\e\$B/ ) { |
|---|
| 272 | Â Â Â Â Â Â $findchr = "ISO-2022-JP"; |
|---|
| 273 | Â Â Â Â } |
|---|
| 274 | Â Â } |
|---|
| 275 | |
|---|
| 276 |   # ʸ���ɤ����ޤ����ʤ顢���пͥ᡼�륢�ɥ쥹��� |
|---|
| 277 | Â Â if ( ! $findchr && ref $epart->{raw}->{FROM} ) { |
|---|
| 278 |     my $from = $epart->{raw}->{FROM}->[0];           # ���пͥ��ɥ쥹 |
|---|
| 279 | Â Â Â Â my $DOMAIN2CHARSET = EmailParser::Map::domain2charset(); |
|---|
| 280 |     my $domain = ( $from =~ /\@(?:[\w\-]+\.)*([\w\-]+\.[\w\-]+)(\W|$)/ )[0];     # �ɥᥤ��    $findchr ||= $DOMAIN2CHARSET->{lc($domain)} if $domain; |
|---|
| 281 | Â Â Â Â $DEBUG and print $DEBUG "[parse] domain=$domain [$findchr]\n"; |
|---|
| 282 | |
|---|
| 283 | Â Â Â Â my $COUNTRY2CHARSET = EmailParser::Map::country2charset(); |
|---|
| 284 |     my $cntry = ( $from =~ /\@[\w\-\.]+\.(\w{2})(\W|$)/)[0];  # ��ɥᥤ��   $findchr ||= $COUNTRY2CHARSET->{lc($cntry)} if $cntry; |
|---|
| 285 | Â Â Â Â $DEBUG and print $DEBUG "[parse] country=$cntry [$findchr]\n"; |
|---|
| 286 | Â Â } |
|---|
| 287 | |
|---|
| 288 |   # ʸ���ɤ򺣲󸡽Ф��Ƥ����ʤ顢�إå�������Ƥ��� |
|---|
| 289 | Â Â if ( ! $epart->{charset} && $findchr ) { |
|---|
| 290 | Â Â Â Â $epart->{charset} = $findchr; |
|---|
| 291 | Â Â Â Â my $convto = $emp->getInternalCharset(); |
|---|
| 292 | Â Â Â Â $epart->convertCharset( $convto ) if $convto; |
|---|
| 293 | Â Â } |
|---|
| 294 | |
|---|
| 295 |   # ������������ѡ��Ȥ������� |
|---|
| 296 | Â Â unshift( @parts, $epart ); |
|---|
| 297 | |
|---|
| 298 |   # ��λ |
|---|
| 299 | Â Â wantarray ? @parts : $epart; |
|---|
| 300 | } |
|---|
| 301 | # ------------------------------------------------------------------------ |
|---|
| 302 | Â Â package EmailParser::Part; |
|---|
| 303 | Â Â use strict; |
|---|
| 304 | Â Â use vars qw( $DEBUG ); |
|---|
| 305 | Â Â $DEBUG = \*STDERR; |
|---|
| 306 | # ------------------------------------------------------------------------ |
|---|
| 307 | sub new { |
|---|
| 308 | Â Â my $pkg = shift; |
|---|
| 309 | Â Â my $emp = shift or return;Â # MailParser object |
|---|
| 310 | Â Â my $selfbnd = shift; |
|---|
| 311 | Â Â ## $DEBUG and print $DEBUG "[EmailParser::Part::new] $pkg $emp $selfbnd\n"; |
|---|
| 312 | Â Â my $epart = {}; |
|---|
| 313 | Â Â bless $epart, $pkg; |
|---|
| 314 | Â Â $epart->{email} = $emp; |
|---|
| 315 | Â Â $epart->{offset} = $emp->tell(); |
|---|
| 316 | Â Â $epart->{boundary} = $selfbnd; |
|---|
| 317 | Â Â $epart->readHeader(); |
|---|
| 318 | Â Â $epart; |
|---|
| 319 | } |
|---|
| 320 | # ------------------------------------------------------------------------ |
|---|
| 321 | #  �إå��������� ------------------------------------------------------------------------ |
|---|
| 322 | sub readHeader { |
|---|
| 323 | Â Â my $epart = shift; |
|---|
| 324 | |
|---|
| 325 | Â Â my $emp = $epart->{email}; |
|---|
| 326 | Â Â my $selfbnd = $epart->{boundary}; |
|---|
| 327 | |
|---|
| 328 |   # �������ե��åȤ˰������  my $offset = $epart->{offset}; |
|---|
| 329 | Â Â $emp->seek( $offset ) if defined $offset; |
|---|
| 330 | |
|---|
| 331 |   # �إå��߹���  my $harray = []; |
|---|
| 332 | Â Â while( 1 ) { |
|---|
| 333 | Â Â Â Â my $line = $emp->getline(); |
|---|
| 334 | Â Â Â Â last unless defined $line; |
|---|
| 335 |     last if ( $line =~ /^$/ );   # �إå��λ |
|---|
| 336 | Â Â Â Â if ( $selfbnd && $line =~ /^--\Q$selfbnd\E(--)?[\r\n]$/ ){ |
|---|
| 337 |       return;           # �ѡ��Ƚ�λ���å�ʤ� |
|---|
| 338 | Â Â Â Â } |
|---|
| 339 | Â Â Â Â $line =~ s/[\r\n]+$//s; |
|---|
| 340 | Â Â Â Â push( @$harray, $line ); |
|---|
| 341 | Â Â } |
|---|
| 342 | Â Â $epart->{head_text} = join( "", @$harray ); |
|---|
| 343 | |
|---|
| 344 |   # �إå��å���Ÿ������  my $raw = {}; |
|---|
| 345 | Â Â my $key; |
|---|
| 346 | Â Â foreach ( @$harray ) { |
|---|
| 347 | Â Â Â Â if ( /^([\w\-]+)\:\s*(.*)$/ ) { |
|---|
| 348 | Â Â Â Â Â Â $key = uc($1); |
|---|
| 349 | Â Â Â Â Â Â my $val = $2; |
|---|
| 350 | Â Â Â Â Â Â $key =~ s/\W/_/g;Â Â Â Â Â Â |
|---|
| 351 | Â Â Â Â Â Â $raw->{$key} ||= []; |
|---|
| 352 | Â Â Â Â Â Â push( @{$raw->{$key}}, $val ); |
|---|
| 353 | Â Â Â Â } elsif ( $key ne "" && /^\s+(.*)/ ) { |
|---|
| 354 |       $raw->{$key}->[$#{$raw->{$key}}] .= $1; # ����ɲä��Ƥ��� |
|---|
| 355 | Â Â Â Â } else { |
|---|
| 356 | Â Â Â Â Â Â $key = ""; |
|---|
| 357 | Â Â Â Â } |
|---|
| 358 | Â Â } |
|---|
| 359 | Â Â $epart->{raw} = $raw; |
|---|
| 360 | |
|---|
| 361 |   # ʸ���ɤβ� |
|---|
| 362 | Â Â my $contype; |
|---|
| 363 | Â Â $contype = $raw->{CONTENT_TYPE}->[0] if ref $raw->{CONTENT_TYPE}; |
|---|
| 364 | Â Â $contype =~ s/\s+/ /sg; |
|---|
| 365 | Â Â my $charset; |
|---|
| 366 | Â Â if ( $contype =~ /\;\s*charset=["']?([\w\-\.]+)["']?/i ) { |
|---|
| 367 | Â Â Â Â ## $DEBUG and print $DEBUG "[readHeader] charset=$1\n"; |
|---|
| 368 | Â Â Â Â $charset = EmailParser::Util::valid_charset( $1 ); |
|---|
| 369 | Â Â } |
|---|
| 370 | Â Â $epart->{charset} = $charset; |
|---|
| 371 | |
|---|
| 372 |   # �⤷ charset= ������ɤ�ʬ���äƤ�����������Ƥ��� |
|---|
| 373 | |
|---|
| 374 | Â Â my $convto = $emp->getInternalCharset(); |
|---|
| 375 | Â Â $epart->convertCharset( $convto ) if $convto; |
|---|
| 376 | |
|---|
| 377 |   # Content-Type: �Ԥβ� |
|---|
| 378 | Â Â $contype = $raw->{CONTENT_TYPE}->[0] if ref $raw->{CONTENT_TYPE}; |
|---|
| 379 | Â Â $contype =~ s/\s+/ /sg; |
|---|
| 380 | Â Â my $format = ( $contype =~ m#^([\w\-\.]+/[\w\-\.]+)# )[0]; |
|---|
| 381 | Â Â $format =~ tr/A-Z/a-z/; |
|---|
| 382 | Â Â $epart->{format} = $format; |
|---|
| 383 | Â Â ## $DEBUG and print $DEBUG "[readHeader] format=$format\n"; |
|---|
| 384 | |
|---|
| 385 |   # �ҥѡ��ȤΥХ����  my $childbnd = ( $contype =~ /\;\s*boundary=["']?([^\s"']+)/i )[0]; |
|---|
| 386 | Â Â $epart->{child_boundary} = $childbnd; |
|---|
| 387 | Â Â ## $DEBUG and print $DEBUG "[readHeader] child_boundary=$childbnd\n"; |
|---|
| 388 | |
|---|
| 389 |   # Content-Transfer-Encoding: �Ԥβ�base64 quoted-pritable |
|---|
| 390 | |
|---|
| 391 | Â Â my $encoding = ( $raw->{CONTENT_TRANSFER_ENCODING}->[0] =~ /^([\w\-]+)/ )[0] |
|---|
| 392 | Â Â Â Â if ref $raw->{CONTENT_TRANSFER_ENCODING}; |
|---|
| 393 | Â Â $encoding =~ tr/A-Z/a-z/; |
|---|
| 394 | Â Â $epart->{encoding} = $encoding; |
|---|
| 395 | Â Â ## $DEBUG and print $DEBUG "[readHeader] encoding=$encoding\n"; |
|---|
| 396 | |
|---|
| 397 |   # Date: �Ԥβ� |
|---|
| 398 | Â Â my $srcdate; |
|---|
| 399 | Â Â $srcdate = $raw->{DATE}->[0] if ref $raw->{DATE}; |
|---|
| 400 |   $srcdate =~ s/[^\040-\176]+/ /g;      # ���߽� $srcdate ||= ( $raw->{RECEIVED}->[$#{$raw->{RECEIVED}}] =~ /;\s* |
|---|
| 401 | Â Â Â Â Â Â Â Â (\d+\s+[a-z]+\s+\d+\s+\d+:\d+:\d+\s+[\+\-]\d+(\:\d+)?) |
|---|
| 402 | Â Â Â Â Â Â Â Â (\D|$)/xi )[0] if ref $raw->{RECEIVED}; |
|---|
| 403 | Â Â ## $DEBUG and print $DEBUG "[readHeader] Date: $srcdate\n"; |
|---|
| 404 | Â Â if ( $DateTime::Format::Mail::VERSION && $srcdate ) { |
|---|
| 405 | Â Â Â Â my $dt; |
|---|
| 406 | Â Â Â Â eval { |
|---|
| 407 | Â Â Â Â Â Â my $dtfm = DateTime::Format::Mail->new()->loose(); |
|---|
| 408 | Â Â Â Â Â Â $dt = $dtfm->parse_datetime( $srcdate ); |
|---|
| 409 | Â Â Â Â }; |
|---|
| 410 | Â Â Â Â if ( ref $dt ) { |
|---|
| 411 |       my $tz = $emp->getInternalTimeZone();    # �����������ॾ����     $dt->set_time_zone( $tz ) if $tz;  # �����ॾ�����     $epart->{epoch} = $dt->epoch(); |
|---|
| 412 | Â Â Â Â Â Â my $datetime = $dt->ymd('/')." ".$dt->hms(':'); |
|---|
| 413 | Â Â Â Â Â Â $epart->{datetime} = $datetime; |
|---|
| 414 | Â Â Â Â Â Â ## $DEBUG and print $DEBUG "[readHeader] datetime=$datetime\n"; |
|---|
| 415 | Â Â Â Â } |
|---|
| 416 | Â Â } |
|---|
| 417 | |
|---|
| 418 |   # �ե��������ե�����ĥ�Ҥ򸡽�  { |
|---|
| 419 | Â Â Â Â my $filename = ( $contype =~ /\;\s*name=["']?([^\s"']+)/i )[0]; |
|---|
| 420 | Â Â Â Â $filename ||= ( $raw->{CONTENT_DISPOSITION}->[0] =~ |
|---|
| 421 | Â Â Â Â Â Â /\;\s*filename=["']?([^\s"']+)/i )[0] |
|---|
| 422 | Â Â Â Â Â Â if ref $raw->{CONTENT_DISPOSITION}; |
|---|
| 423 | Â Â Â Â ## $DEBUG and print $DEBUG "[readHeader] filename=$filename\n"; |
|---|
| 424 | Â Â Â Â $epart->{filename} = $filename; |
|---|
| 425 | |
|---|
| 426 | Â Â Â Â my $ext = ( $filename =~ m#[^\/\:\.]\.(\w[\w\-]*)$# )[0]; |
|---|
| 427 | Â Â Â Â my $FORMAT2EXT = EmailParser::Map::format2ext(); |
|---|
| 428 | Â Â Â Â $ext ||= $FORMAT2EXT->{$format} if $format; |
|---|
| 429 | Â Â Â Â ## $DEBUG and print $DEBUG "[readHeader] ext=$ext\n"; |
|---|
| 430 | Â Â Â Â $epart->{ext} = $ext; |
|---|
| 431 | Â Â } |
|---|
| 432 | |
|---|
| 433 |   # Subject: �Ԥ��  { |
|---|
| 434 | Â Â Â Â my $subject; |
|---|
| 435 | Â Â Â Â $subject = $raw->{SUBJECT}->[0] if ref $raw->{SUBJECT}; |
|---|
| 436 | Â Â Â Â ## $DEBUG and print $DEBUG "[readHeader] subject=$subject\n"; |
|---|
| 437 | Â Â Â Â $epart->{subject} = $subject; |
|---|
| 438 | Â Â } |
|---|
| 439 | |
|---|
| 440 |   # X-Priority: �Ԥ��  { |
|---|
| 441 | Â Â Â Â my $priority; |
|---|
| 442 | Â Â Â Â $priority = $raw->{PRIORITY}->[0] if ref $raw->{PRIORITY}; |
|---|
| 443 | Â Â Â Â $priority ||= $raw->{X_PRIORITY}->[0] if ref $raw->{X_PRIORITY}; |
|---|
| 444 | Â Â Â Â $priority = ( $priority =~ /(\d+)/ )[0]; |
|---|
| 445 | Â Â Â Â ## $DEBUG and print $DEBUG "[readHeader] priority=$priority\n"; |
|---|
| 446 | Â Â Â Â $epart->{priority} = $priority; |
|---|
| 447 | Â Â } |
|---|
| 448 | |
|---|
| 449 |   # X-ML-Name: �Ԥ��  { |
|---|
| 450 | Â Â Â Â my $mlname; |
|---|
| 451 | Â Â Â Â $mlname = $raw->{X_ML_NAME}->[0] if ref $raw->{X_ML_NAME}; |
|---|
| 452 | Â Â Â Â $mlname ||= $raw->{X_SEQUENCE}->[0] if ref $raw->{X_SEQUENCE}; |
|---|
| 453 | Â Â Â Â $mlname =~ s/\s.*$//s; |
|---|
| 454 | Â Â Â Â ## $DEBUG and print $DEBUG "[readHeader] ml_name=$mlname\n"; |
|---|
| 455 | Â Â Â Â $epart->{ml_name} = $mlname if $mlname; |
|---|
| 456 | Â Â } |
|---|
| 457 | |
|---|
| 458 |   # �إå�����ɥ쥹�ϤιԤ��  my $addrkey = { |
|---|
| 459 | Â Â Â Â "FROM"Â =>Â "addr_from", |
|---|
| 460 | Â Â Â Â "TO"Â Â =>Â "addr_to", |
|---|
| 461 | Â Â Â Â "CC"Â Â =>Â "addr_cc", |
|---|
| 462 | Â Â Â Â "BCC"Â Â =>Â "addr_bcc", |
|---|
| 463 |     "REPLY_TO"   => "addr_reply",    # Reply-To: |
|---|
| 464 |     "RETURN_PATH"  => "addr_return",   # Return-Path: |
|---|
| 465 |     "DELIVERED_TO" => "addr_deliv",    # Delivered-To: (qmail) |
|---|
| 466 |     "MESSAGE_ID"  => "message_id",    # Message-Id: |
|---|
| 467 |     "REFERENCES"  => "mess_refer",    # References: |
|---|
| 468 |     "IN_REPLY_TO"  => "mess_reply",    # In-Reply-To: |
|---|
| 469 | Â Â }; |
|---|
| 470 | Â Â foreach my $ikey ( keys %$addrkey ) { |
|---|
| 471 | Â Â Â Â next unless ref $raw->{$ikey}; |
|---|
| 472 | Â Â Â Â my $okey = $addrkey->{$ikey}; |
|---|
| 473 | Â Â Â Â my $list = []; |
|---|
| 474 | Â Â Â Â foreach my $line ( @{$raw->{$ikey}} ) { |
|---|
| 475 | Â Â Â Â Â Â my @pickup = EmailParser::Util::pickup_address( $line ); |
|---|
| 476 | Â Â Â Â Â Â push( @$list, @pickup ) if scalar @pickup; |
|---|
| 477 | Â Â Â Â } |
|---|
| 478 | Â Â Â Â $epart->{$okey} = $list if scalar $list; |
|---|
| 479 | Â Â } |
|---|
| 480 | |
|---|
| 481 |   # �إå���λ |
|---|
| 482 | Â Â $epart; |
|---|
| 483 | } |
|---|
| 484 | # ------------------------------------------------------------------------ |
|---|
| 485 | #  �ѡ��Ȥ��Ȥ˥ե������ؽ�Ϥ��� ------------------------------------------------------------------------ |
|---|
| 486 | sub toFile { |
|---|
| 487 | Â Â my $epart = shift; |
|---|
| 488 | Â Â my $file = shift; |
|---|
| 489 | Â Â ## $DEBUG and print $DEBUG "[toFile] $file\n"; |
|---|
| 490 | Â Â my $ofh = new IO::File( $file, "w" ) or die "$! - $file\n"; |
|---|
| 491 | Â Â $epart->toHandle( $ofh, @_ ); |
|---|
| 492 | Â Â $ofh->close(); |
|---|
| 493 | Â Â $file; |
|---|
| 494 | } |
|---|
| 495 | # ------------------------------------------------------------------------ |
|---|
| 496 | sub toArray { |
|---|
| 497 | Â Â my $epart = shift; |
|---|
| 498 | Â Â ## $DEBUG and print $DEBUG "[toArray]\n"; |
|---|
| 499 | Â Â my $array = []; |
|---|
| 500 | Â Â my $ofh = new IO::ScalarArray( $array ) or die "IO::ScalarArray failed.\n"; |
|---|
| 501 | Â Â $epart->toHandle( $ofh, @_ ); |
|---|
| 502 | Â Â $ofh->close(); |
|---|
| 503 | Â Â $array; |
|---|
| 504 | } |
|---|
| 505 | # ------------------------------------------------------------------------ |
|---|
| 506 | sub toString { |
|---|
| 507 | Â Â my $epart = shift; |
|---|
| 508 | Â Â # $DEBUG and print $DEBUG "[toString]\n"; |
|---|
| 509 | Â Â my $string = ""; |
|---|
| 510 | Â Â my $ofh = new IO::Scalar( \$string ) or die "IO::Scalar failed.\n"; |
|---|
| 511 | Â Â $epart->toHandle( $ofh, @_ ); |
|---|
| 512 | Â Â $ofh->close(); |
|---|
| 513 | Â Â $string; |
|---|
| 514 | } |
|---|
| 515 | # ------------------------------------------------------------------------ |
|---|
| 516 | sub toHandle { |
|---|
| 517 | Â Â my $epart = shift; |
|---|
| 518 |   my $ofh = shift;    # �������  my $ocode = shift;   # ��Ϥ�������  # $DEBUG and print $DEBUG "[toHandle] ",ref($ofh)," $ocode\n"; |
|---|
| 519 | |
|---|
| 520 |   # ������ʸ���ɡʥƥ����ȥѡ��Ȥ��Ľ������ɻ���Τ�Ѵ���  my $icode; |
|---|
| 521 | Â Â if ( $Encode::VERSION && $ocode && $epart->{type} =~ m#^text/#i ) { |
|---|
| 522 | Â Â Â Â # $DEBUG and print $DEBUG "[toHandle] icode=$icode ocode=$ocode\n"; |
|---|
| 523 | Â Â Â Â $icode = $epart->{charset}; |
|---|
| 524 | Â Â } |
|---|
| 525 | |
|---|
| 526 |   # �Х�������� |
|---|
| 527 |   my $selfbnd = $epart->{boundary};      # ���ѡ��ȤΥХ����  my $childbnd = $epart->{child_boundary};  # �ҥѡ��ȤΥХ����  # $DEBUG and print $DEBUG "[toHandle] self_boundary=$selfbnd\n"; |
|---|
| 528 | Â Â # $DEBUG and print $DEBUG "[toHandle] child_boundary=$childbnd\n"; |
|---|
| 529 | |
|---|
| 530 |   # �������ե��åȤ˰������  my $emp = $epart->{email}; |
|---|
| 531 | Â Â my $offset = $epart->{offset}; |
|---|
| 532 | Â Â $emp->seek( $offset ) if defined $offset; |
|---|
| 533 | |
|---|
| 534 |   # �إå����� |
|---|
| 535 | |
|---|
| 536 | Â Â while( 1 ) { |
|---|
| 537 | Â Â Â Â my $line = $emp->getline(); |
|---|
| 538 | Â Â Â Â last unless defined $line; |
|---|
| 539 |     last if ( $line =~ /^$/ );   # �إå��λ |
|---|
| 540 | Â Â Â Â return if ( $selfbnd && $line =~ /^--\Q$selfbnd\E(--)?[\r\n]$/ ); |
|---|
| 541 | Â Â } |
|---|
| 542 | |
|---|
| 543 |   # �ޥ��ѡ��ȷ���ʤ鿿 |
|---|
| 544 | Â Â my $is_multi = ( $childbnd && $epart->{format} =~ m#^multipart/# ); |
|---|
| 545 | |
|---|
| 546 |   # ���󥳡��ǥ���  my $enc64 = ( $epart->{encoding} eq "base64" ); |
|---|
| 547 | Â Â my $encQT = ( $epart->{encoding} eq "quoted-printable" ); |
|---|
| 548 | |
|---|
| 549 |   # �ܥǥ���ƥϥ����Ϥ���  my $b64rest; |
|---|
| 550 | Â Â my $multi_started; |
|---|
| 551 | Â Â while( 1 ) { |
|---|
| 552 | Â Â Â Â my $line = $emp->getline(); |
|---|
| 553 | Â Â Â Â last unless defined $line; |
|---|
| 554 | Â Â Â Â next if $multi_started; |
|---|
| 555 | Â Â Â Â if ( $is_multi && $line =~ /^--\Q$childbnd\E(--)?[\r\n]*$/ ) { |
|---|
| 556 | Â Â Â Â Â Â $multi_started ++; |
|---|
| 557 | Â Â Â Â Â Â next; |
|---|
| 558 | Â Â Â Â } |
|---|
| 559 | Â Â Â Â if ( $selfbnd && $line =~ /^--\Q$selfbnd\E(--)?[\r\n]*$/ ) { |
|---|
| 560 |       $epart->{has_next} ++ unless $1;  # ³���Υѡ��Ȥ�����      last; |
|---|
| 561 | Â Â Â Â } |
|---|
| 562 | Â Â Â Â $line =~ s/[\r\n]*$/\n/s; |
|---|
| 563 | Â Â Â Â if ( $enc64 ) { |
|---|
| 564 | Â Â Â Â Â Â chomp $line; |
|---|
| 565 | Â Â Â Â Â Â if ( $b64rest ne "" ) { |
|---|
| 566 | Â Â Â Â Â Â Â Â $line = $b64rest.$line; |
|---|
| 567 | Â Â Â Â Â Â Â Â $b64rest = undef; |
|---|
| 568 | Â Â Â Â Â Â } |
|---|
| 569 | Â Â Â Â Â Â my $b64len = length($line); |
|---|
| 570 | Â Â Â Â Â Â ## $DEBUG and print $DEBUG "[toHandle] base64_length=$b64len\n"; |
|---|
| 571 |       if ( $b64len % 4 == 0 ) {     # 4��ܿ��� |
|---|
| 572 | Â Â Â Â Â Â Â Â $line = MIME::Base64::decode_base64($line); |
|---|
| 573 | Â Â Â Â Â Â } else { |
|---|
| 574 |         $b64rest = $line;          # ������ |
|---|
| 575 | Â Â Â Â Â Â Â Â next; |
|---|
| 576 | Â Â Â Â Â Â } |
|---|
| 577 | Â Â Â Â } elsif ( $encQT ) { |
|---|
| 578 | Â Â Â Â Â Â $line =~ s/=[\r\n]*$//s; |
|---|
| 579 | Â Â Â Â Â Â $line =~ s/=([0-9a-f]{2})/pack(C=>hex($1))/gei; |
|---|
| 580 | Â Â Â Â } |
|---|
| 581 | Â Â Â Â Encode::from_to( $line, $icode, $ocode ) if $icode; |
|---|
| 582 | Â Â Â Â $ofh->print( $line ); |
|---|
| 583 | Â Â } |
|---|
| 584 | Â Â if ( $b64rest ne "" ) { |
|---|
| 585 | Â Â Â Â my $line = MIME::Base64::decode_base64($b64rest); |
|---|
| 586 | Â Â Â Â Encode::from_to( $line, $icode, $ocode ) if $icode; |
|---|
| 587 |     $ofh->print( $line );  # �Ĥ꤬���ä� |
|---|
| 588 | Â Â } |
|---|
| 589 | |
|---|
| 590 | Â Â $ofh; |
|---|
| 591 | } |
|---|
| 592 | # ------------------------------------------------------------------------ |
|---|
| 593 | #  �ѡ��������إå�������ɤ������  �ʸ������ɤ�Ѵ����ޤ���------------------------------------------------------------------------ |
|---|
| 594 | sub convertCharset { |
|---|
| 595 | Â Â my $epart = shift; |
|---|
| 596 |   my $convto = shift or return;        # ����ʸ����  ## $DEBUG and print $DEBUG "[convertCharset] convto=$convto\n"; |
|---|
| 597 | Â Â return unless $Encode::VERSION; |
|---|
| 598 | |
|---|
| 599 | Â Â my $convfrom = $epart->{current_charset} || $epart->{charset}; |
|---|
| 600 | Â Â ## $DEBUG and print $DEBUG "[convertCharset] convfrom=$convfrom\n"; |
|---|
| 601 |   return unless $convfrom;          # ���ߤ�����ɤ��� |
|---|
| 602 | |
|---|
| 603 | #  return if ( lc($convto) eq lc($convfrom) ); # ���������ɤǤ�������  # $epart->{XXXX} �γ�ͤ�����ɤ������  foreach my $key ( keys %$epart ) { |
|---|
| 604 |     next if ref $epart->{$key};       # �ϥå��塦���Ͻ�   Encode::from_to( $epart->{$key}, $convfrom, $convto ); |
|---|
| 605 | Â Â Â Â EmailParser::Util::decode_mime_head( $convto, $epart->{$key} ); |
|---|
| 606 | Â Â Â Â $epart->{$key} =~ s/[\000-\040\177]/ /gs; |
|---|
| 607 | Â Â } |
|---|
| 608 | |
|---|
| 609 |   # $epart->{raw}->{XXXX} �γ�ͤ�����ɤ������  foreach my $key ( keys %{$epart->{raw}} ) { |
|---|
| 610 | Â Â Â Â foreach my $val ( @{$epart->{raw}->{$key}} ) { |
|---|
| 611 | Â Â Â Â Â Â Encode::from_to( $val, $convfrom, $convto ); |
|---|
| 612 | Â Â Â Â Â Â EmailParser::Util::decode_mime_head( $convto, $val ); |
|---|
| 613 | Â Â Â Â Â Â $val =~ s/[\000-\040\177]/ /gs; |
|---|
| 614 | Â Â Â Â } |
|---|
| 615 | Â Â } |
|---|
| 616 | |
|---|
| 617 | Â Â $epart->{current_charset} = $convto; |
|---|
| 618 | } |
|---|
| 619 | # ------------------------------------------------------------------------ |
|---|
| 620 | Â Â package EmailParser::Util; |
|---|
| 621 | Â Â use strict; |
|---|
| 622 | Â Â use vars qw( $DEBUG ); |
|---|
| 623 | Â Â $DEBUG = \*STDERR; |
|---|
| 624 | # ------------------------------------------------------------------------ |
|---|
| 625 | #  ʸ���ɤλ�꤬�������������� ------------------------------------------------------------------------ |
|---|
| 626 | sub valid_charset { |
|---|
| 627 | Â Â my $charset = shift or return; |
|---|
| 628 | |
|---|
| 629 |   # �̾��� |
|---|
| 630 | Â Â my $CHARSET_ALIAS = { |
|---|
| 631 | Â Â Â Â "sjis"Â =>Â "Shift_JIS", |
|---|
| 632 | Â Â Â Â "euc"Â Â =>Â "EUC-JP", |
|---|
| 633 | Â Â Â Â "jis"Â Â =>Â "ISO-2022-JP", |
|---|
| 634 | Â Â Â Â "utf-8" =>Â "utf8", |
|---|
| 635 | Â Â }; |
|---|
| 636 | Â Â if ( defined $CHARSET_ALIAS->{lc($charset)} ) { |
|---|
| 637 | Â Â Â Â $charset = $CHARSET_ALIAS->{lc($charset)}; |
|---|
| 638 | Â Â } |
|---|
| 639 | |
|---|
| 640 |   # Encode ��������ʾ��ϡ���������ɤ��������  if ( defined $Encode::VERSION ) { |
|---|
| 641 | Â Â Â Â $charset = Encode::resolve_alias($charset) or return undef; |
|---|
| 642 | Â Â } |
|---|
| 643 | |
|---|
| 644 |   $charset;  # �����ɳ����λ |
|---|
| 645 | } |
|---|
| 646 | # ------------------------------------------------------------------------ |
|---|
| 647 | #  �إå�����ɤ�ѹ� |
|---|
| 648 | # ------------------------------------------------------------------------ |
|---|
| 649 | sub decode_mime_head { |
|---|
| 650 | Â Â my $convto = shift or return; |
|---|
| 651 | Â Â return unless defined $Encode::VERSION; |
|---|
| 652 | Â Â foreach ( @_ ) { |
|---|
| 653 | Â Â Â Â s{ |
|---|
| 654 | Â Â Â Â Â Â \=\?([\w\-]+)\?([BQ])\?([^\?]+?)\?\= |
|---|
| 655 | Â Â Â Â }{ |
|---|
| 656 | Â Â Â Â Â Â my( $charset, $method, $str ) = ( $1, $2, $3 ); |
|---|
| 657 | Â Â Â Â Â Â my $convfrom = EmailParser::Util::valid_charset($charset); |
|---|
| 658 | Â Â Â Â Â Â if ( $convfrom ) { |
|---|
| 659 | Â Â Â Â Â Â Â Â if ( $method eq "Q" ) { |
|---|
| 660 | Â Â Â Â Â Â Â Â Â Â $str =~ s/=([a-z0-9]{2})/pack(C=>hex($1))/gei; |
|---|
| 661 | Â Â Â Â Â Â Â Â } else { |
|---|
| 662 | Â Â Â Â Â Â Â Â Â Â $str = MIME::Base64::decode_base64($str); |
|---|
| 663 | Â Â Â Â Â Â Â Â } |
|---|
| 664 | Â Â Â Â Â Â Â Â if ( lc($convfrom) ne lc($convto) ) { |
|---|
| 665 | Â Â Â Â Â Â Â Â Â Â Encode::from_to( $str, $convfrom, $convto ); |
|---|
| 666 | Â Â Â Â Â Â Â Â } |
|---|
| 667 | Â Â Â Â Â Â } |
|---|
| 668 | Â Â Â Â Â Â $str; |
|---|
| 669 | Â Â Â Â }iegx; |
|---|
| 670 | Â Â } |
|---|
| 671 | } |
|---|
| 672 | # ------------------------------------------------------------------------ |
|---|
| 673 | #  �᡼�륢�ɥ쥹��������� ------------------------------------------------------------------------ |
|---|
| 674 | sub pickup_address { |
|---|
| 675 | Â Â my $copy = "" . $_[0]; |
|---|
| 676 | Â Â my $list = []; |
|---|
| 677 | Â Â while ( $copy =~ /\@/ ) { |
|---|
| 678 |     # �᡼�륢�ɥ쥹�κ�¦�Υ����Ȥ򳰤� |
|---|
| 679 | Â Â Â Â $copy =~ s#^( \s*"([^"]|\")*"\s* | [^"][^\@]*\s+ )##xs; |
|---|
| 680 |     # �᡼�륢�ɥ쥹��Ф���    if ( $copy =~ s#( |
|---|
| 681 | Â Â Â Â Â Â (?: " [\x21\x23-\x26\x2a\x2b\x2d-\x3b\x3d\x3f\x41-\x7e]+ " | |
|---|
| 682 | Â Â Â Â Â Â Â Â Â [\x21\x23-\x26\x2a\x2b\x2d-\x3b\x3d\x3f\x41-\x7e]+ ) \@ |
|---|
| 683 | Â Â Â Â Â Â (?: (?:[\w\-]+\.)+\w+ | \[\d+\.\d+\.\d+\.\d+\] ))##xs ) { |
|---|
| 684 | Â Â Â Â Â Â push( @$list, $1 ); |
|---|
| 685 | Â Â Â Â } else { |
|---|
| 686 | Â Â Â Â Â Â last; |
|---|
| 687 | Â Â Â Â } |
|---|
| 688 |     # �᡼�륢�ɥ쥹�α�¦�Υ����Ȥ򳰤� |
|---|
| 689 | Â Â Â Â $copy =~ s#^ [\<\>\s]* \(.*?\) \s* ##xs; |
|---|
| 690 | Â Â Â Â $copy =~ s#^ [^,\@]* , \s* ##xs; |
|---|
| 691 | Â Â } |
|---|
| 692 | Â Â return unless scalar @$list; |
|---|
| 693 | Â Â @$list; |
|---|
| 694 | } |
|---|
| 695 | # ------------------------------------------------------------------------ |
|---|
| 696 | Â Â package EmailParser::Map; |
|---|
| 697 | Â Â use strict; |
|---|
| 698 | # ------------------------------------------------------------------------ |
|---|
| 699 | #  �ɥᥤ�󤴤ȤΥǥե����Υ᡼�����ȡʼ�SPAM��# ------------------------------------------------------------------------ |
|---|
| 700 | sub domain2charset { |
|---|
| 701 | Â Â my $domain2charset = { |
|---|
| 702 | Â Â Â Â "hanmail.net"Â Â =>Â "EUC-KR", |
|---|
| 703 | Â Â Â Â "korea.com"Â Â Â =>Â "EUC-KR", |
|---|
| 704 | Â Â Â Â "sinamail.com"Â =>Â "Big5", |
|---|
| 705 | Â Â Â Â "hinet.net"Â Â Â =>Â "Big5", |
|---|
| 706 | Â Â }; |
|---|
| 707 | } |
|---|
| 708 | # ------------------------------------------------------------------------ |
|---|
| 709 | #  �񤴤ȤΥǥե����Υ᡼������# ------------------------------------------------------------------------ |
|---|
| 710 | sub country2charset { |
|---|
| 711 | Â Â my $country2charset = { |
|---|
| 712 |     cn => "GB2312",    # ���   hk => "Big5",     # �� |
|---|
| 713 |     tw => "Big5",     # �� |
|---|
| 714 |     kr => "EUC-KR",    # �ڹ�   ru => "koi8-r",    # ï¿½í¥·ï¿½ï¿½ |
|---|
| 715 |     us => "iso-8859-1",  # ������ |
|---|
| 716 | Â Â }; |
|---|
| 717 | } |
|---|
| 718 | # ------------------------------------------------------------------------ |
|---|
| 719 | #  ���������פȳ���Ҥ�б� |
|---|
| 720 | # ------------------------------------------------------------------------ |
|---|
| 721 | sub format2ext { |
|---|
| 722 | Â Â my $format2ext = { |
|---|
| 723 | Â Â Â Â "text/plain"Â Â =>Â "txt", |
|---|
| 724 | Â Â Â Â "text/html"Â Â Â =>Â "html", |
|---|
| 725 | Â Â Â Â "image/jpeg"Â Â =>Â "jpg", |
|---|
| 726 | Â Â Â Â "image/gif"Â Â Â =>Â "gif", |
|---|
| 727 | Â Â Â Â "application/vnd.ms-excel"Â =>Â "xls", |
|---|
| 728 | Â Â }; |
|---|
| 729 | } |
|---|
| 730 | # ------------------------------------------------------------------------ |
|---|
| 731 | ;1; |
|---|
| 732 | # ------------------------------------------------------------------------ |
|---|