Show
Ignore:
Timestamp:
10/18/08 12:39:42 (6 years ago)
Author:
tokuhirom
Message:

added katakana_z2h

Location:
lang/perl/Lingua-JA-Regular-Unicode/trunk
Files:
3 modified
1 copied

Legend:

Unmodified
Added
Removed
  • lang/perl/Lingua-JA-Regular-Unicode/trunk/lib/Lingua/JA/Regular/Unicode.pm

    r21573 r21574  
    66use Exporter 'import'; 
    77 
    8 our @EXPORT = qw/ hiragana2katakana alnum_z2h space_z2h katakana2hiragana katakana_h2z/; 
     8our @EXPORT = qw/ hiragana2katakana alnum_z2h space_z2h katakana2hiragana katakana_h2z katakana_z2h/; 
    99 
    1010# regexp is generated by tools/createmap.pl 
     
    3939); 
    4040 
     41my %katakana_z2h_map = ( 
     42    "\x{30B6}" => "\x{FF7B}\x{FF9E}", 
     43    "\x{30D1}" => "\x{FF8A}\x{FF9F}", 
     44    "\x{30C7}" => "\x{FF83}\x{FF9E}", 
     45    "\x{30D4}" => "\x{FF8B}\x{FF9F}", 
     46    "\x{30BE}" => "\x{FF7F}\x{FF9E}", 
     47    "\x{30BC}" => "\x{FF7E}\x{FF9E}", 
     48    "\x{30AE}" => "\x{FF77}\x{FF9E}", 
     49    "\x{30D6}" => "\x{FF8C}\x{FF9E}", 
     50    "\x{30C0}" => "\x{FF80}\x{FF9E}", 
     51    "\x{30DA}" => "\x{FF8D}\x{FF9F}", 
     52    "\x{30D0}" => "\x{FF8A}\x{FF9E}", 
     53    "\x{30D3}" => "\x{FF8B}\x{FF9E}", 
     54    "\x{30C5}" => "\x{FF82}\x{FF9E}", 
     55    "\x{30F4}" => "\x{FF73}\x{FF9E}", 
     56    "\x{30B0}" => "\x{FF78}\x{FF9E}", 
     57    "\x{30B8}" => "\x{FF7C}\x{FF9E}", 
     58    "\x{30B4}" => "\x{FF7A}\x{FF9E}", 
     59    "\x{30D7}" => "\x{FF8C}\x{FF9F}", 
     60    "\x{30D9}" => "\x{FF8D}\x{FF9E}", 
     61    "\x{30C2}" => "\x{FF81}\x{FF9E}", 
     62    "\x{30BA}" => "\x{FF7D}\x{FF9E}", 
     63    "\x{30DD}" => "\x{FF8E}\x{FF9F}", 
     64    "\x{30DC}" => "\x{FF8E}\x{FF9E}", 
     65    "\x{30B2}" => "\x{FF79}\x{FF9E}", 
     66    "\x{30AC}" => "\x{FF76}\x{FF9E}", 
     67    "\x{30C9}" => "\x{FF84}\x{FF9E}" 
     68); 
     69 
    4170sub alnum_z2h { 
    4271    local $_ = shift; 
     
    72101} 
    73102 
     103sub katakana_z2h { 
     104    local $_ = shift; 
     105    s/(\x{30BC}|\x{30C9}|\x{30C0}|\x{30AC}|\x{30D3}|\x{30BA}|\x{30D4}|\x{30F4}|\x{30B2}|\x{30B6}|\x{30DA}|\x{30D1}|\x{30D0}|\x{30C7}|\x{30B4}|\x{30D7}|\x{30D9}|\x{30C5}|\x{30DD}|\x{30D6}|\x{30AE}|\x{30C2}|\x{30B8}|\x{30B0}|\x{30DC}|\x{30BE})/$katakana_z2h_map{$1}/ge;; 
     106    tr/\x{30E3}\x{30E9}\x{30BB}\x{30DE}\x{30C6}\x{30A7}\x{30DB}\x{30D2}\x{3002}\x{30A1}\x{30B7}\x{30CD}\x{30BF}\x{30D5}\x{30AD}\x{30E2}\x{30E1}\x{30ED}\x{30CB}\x{30E6}\x{30A5}\x{30CC}\x{30E7}\x{30E4}\x{30B3}\x{30A3}\x{30D8}\x{30C3}\x{30EF}\x{30A4}\x{30B1}\x{30CA}\x{3001}\x{300C}\x{30EC}\x{30A8}\x{309C}\x{30A2}\x{30BD}\x{30C4}\x{309B}\x{30F2}\x{30FC}\x{30F3}\x{30A6}\x{30B9}\x{30EA}\x{30AB}\x{300D}\x{30E0}\x{30A9}\x{30EB}\x{30E5}\x{30E8}\x{30CF}\x{30CE}\x{30AF}\x{30FB}\x{30AA}\x{30B5}\x{30C8}\x{30DF}\x{30C1}/\x{FF6C}\x{FF97}\x{FF7E}\x{FF8F}\x{FF83}\x{FF6A}\x{FF8E}\x{FF8B}\x{FF61}\x{FF67}\x{FF7C}\x{FF88}\x{FF80}\x{FF8C}\x{FF77}\x{FF93}\x{FF92}\x{FF9B}\x{FF86}\x{FF95}\x{FF69}\x{FF87}\x{FF6E}\x{FF94}\x{FF7A}\x{FF68}\x{FF8D}\x{FF6F}\x{FF9C}\x{FF72}\x{FF79}\x{FF85}\x{FF64}\x{FF62}\x{FF9A}\x{FF74}\x{FF9F}\x{FF71}\x{FF7F}\x{FF82}\x{FF9E}\x{FF66}\x{FF70}\x{FF9D}\x{FF73}\x{FF7D}\x{FF98}\x{FF76}\x{FF63}\x{FF91}\x{FF6B}\x{FF99}\x{FF6D}\x{FF96}\x{FF8A}\x{FF89}\x{FF78}\x{FF65}\x{FF75}\x{FF7B}\x{FF84}\x{FF90}\x{FF81}/; 
     107    $_; 
     108} 
     109 
    741101; 
    75111__END__ 
     
    103139 
    104140=item katakana_z2h 
     141 
     142convert katakanas ZENKAKU to HANKAKU. 
     143 
     144=item katakana_h2z 
    105145 
    106146convert katakanas HANKAKU to ZENKAKU. 
  • lang/perl/Lingua-JA-Regular-Unicode/trunk/t/06_katakana_z2h.t

    r21573 r21574  
    77run { 
    88    my $block = shift; 
    9     is katakana_h2z($block->input), $block->expected; 
     9    is katakana_z2h($block->input), $block->expected; 
    1010} 
    1111 
     
    1414=== 
    1515--- input:    およよABCDEFGbfege123123オヨヨオヨヨ 
    16 --- expected: およよABCDEFGbfege123123オヨヨオヨヨ 
     16--- expected: およよABCDEFGbfege123123オヨヨオヨヨ 
    1717 
    1818=== 
    19 --- input:    ガ 
    20 --- expected:  
     19--- input:     
     20--- expected: ガ 
    2121 
  • lang/perl/Lingua-JA-Regular-Unicode/trunk/tools/createmap.pl

    r21573 r21574  
    8686} 
    8787 
    88 sub hankata2zenkata { 
     88sub katakana_h2z { 
    8989    my $c = sub { sprintf "\\x{%X}", unpack 'U*', decode('euc-jp', shift) }; 
    9090 
     
    118118} 
    119119 
    120 for my $meth (qw/alnum_z2h hiragana2katakana katakana2hiragana hankata2zenkata/) { 
     120sub katakana_z2h { 
     121    my $c = sub { sprintf "\\x{%X}", unpack 'U*', decode('euc-jp', shift) }; 
     122 
     123    my @res; 
     124 
     125    push @res, sub { 
     126        # dakuten 
     127        my (@z, %z2h); 
     128        while (my ($h, $z) = each %Encode::JP::H2Z::_D2Z) { 
     129            my $hhex = join('', map { sprintf '\x{%X}', unpack 'U*', $_ } split //, decode('euc-jp', $h)); 
     130            push @z, $c->($z); 
     131            $z2h{$c->($z)} = $hhex; 
     132        } 
     133        return join("\n", 
     134            Dumper(\%z2h), 
     135            join('', "s/(", join('|', @z), ')/$z2h{$1}/ge;'), 
     136        ); 
     137    }->(); 
     138 
     139    push @res, sub { 
     140        # normal 
     141        my (@h, @z); 
     142        while (my ($h, $z) = each %Encode::JP::H2Z::_H2Z) { 
     143            push @z, $c->($z); 
     144            push @h, $c->($h); 
     145        } 
     146        return join('', "tr/", join('', @z), "/", join('', @h), "/;"); 
     147    }->(); 
     148 
     149    return join "\n", @res; 
     150} 
     151 
     152for my $meth (qw/alnum_z2h hiragana2katakana katakana2hiragana katakana_h2z katakana_z2h/) { 
    121153    say "-- $meth"; 
    122154    say sub { goto &{$meth} }->(); 
  • lang/perl/Lingua-JA-Regular-Unicode/trunk/xt/01_podspell.t

    r21525 r21574  
    3939katakana 
    4040regularizer 
     41katakanas