| 1647 | | if( $content2 =~ m{<meta\s+http-equiv\s*=\s*(["'])Content-Type\1\s+content\s*=\s*(["'])\w+/\w+(?:\+\w+)*\s*;\s*charset=([-\w]+)\2\s*/?>}i ) |
| 1648 | | { |
| 1649 | | my $e = lc($3); |
| 1650 | | $enc = $e =~ /s\w*jis/ ? 'sjis' |
| 1651 | | : $e =~ /euc/ ? 'euc' |
| 1652 | | : $e =~ /utf-?8/ ? 'utf8' |
| 1653 | | : $e =~ /iso-2022-jp/ ? 'jis' |
| 1654 | | : $e =~ /\bjis\b/ ? 'jis' |
| 1655 | | : $enc; |
| 1656 | | $DEBUG and $this->_debug($full_ch_name, "debug: charset $enc from meta ($e)"); |
| 1657 | | } |
| 1658 | | if( $enc eq 'auto' && $headers->{'Content-Type'} && $headers->{'Content-Type'} =~ /;\s*charset=(\S+)/ ) |
| | 1652 | if( $headers->{'Content-Type'} && $headers->{'Content-Type'} =~ /;\s*charset=(\S+)/ ) |
| | 1662 | } |
| | 1663 | if( $enc eq 'auto' && $content2 =~ m{ |
| | 1664 | <meta(?:\s[^>]*?)?\s |
| | 1665 | (?:http-equiv\s*=\s*(["'])Content-Type\1(?:\s[^>]*?)?\scontent\s*=\s*(["'])\w+/\w+(?:\+\w+)*\s*;\s*charset=([-\w]+)\2 |
| | 1666 | |content\s*=\s*(["'])\w+/\w+(?:\+\w+)*\s*;\s*charset=([-\w]+)\4(?:\s[^>]+?)?\shttp-equiv\s*=\s*(["'])Content-Type\6) |
| | 1667 | (?:\s[^>]*|/)?> |
| | 1668 | }ix ) |
| | 1669 | { |
| | 1670 | my $e = lc($3 || $5); |
| | 1671 | $enc = $e =~ /s\w*jis/ ? 'sjis' |
| | 1672 | : $e =~ /euc/ ? 'euc' |
| | 1673 | : $e =~ /utf-?8/ ? 'utf8' |
| | 1674 | : $e =~ /iso-2022-jp/ ? 'jis' |
| | 1675 | : $e =~ /\bjis\b/ ? 'jis' |
| | 1676 | : $enc; |
| | 1677 | $DEBUG and $this->_debug($full_ch_name, "debug: charset $enc from meta ($e)"); |