Changeset 21574 for lang/perl/LinguaJARegularUnicode
 Timestamp:
 10/18/08 12:39:42 (8 years ago)
 Location:
 lang/perl/LinguaJARegularUnicode/trunk
 Files:

 3 modified
 1 copied
Legend:
 Unmodified
 Added
 Removed

lang/perl/LinguaJARegularUnicode/trunk/lib/Lingua/JA/Regular/Unicode.pm
r21573 r21574 6 6 use Exporter 'import'; 7 7 8 our @EXPORT = qw/ hiragana2katakana alnum_z2h space_z2h katakana2hiragana katakana_h2z /;8 our @EXPORT = qw/ hiragana2katakana alnum_z2h space_z2h katakana2hiragana katakana_h2z katakana_z2h/; 9 9 10 10 # regexp is generated by tools/createmap.pl … … 39 39 ); 40 40 41 my %katakana_z2h_map = ( 42 "\x{30B6}" => "\x{FF7B}\x{FF9E}", 43 "\x{30D1}" => "\x{FF8A}\x{FF9F}", 44 "\x{30C7}" => "\x{FF83}\x{FF9E}", 45 "\x{30D4}" => "\x{FF8B}\x{FF9F}", 46 "\x{30BE}" => "\x{FF7F}\x{FF9E}", 47 "\x{30BC}" => "\x{FF7E}\x{FF9E}", 48 "\x{30AE}" => "\x{FF77}\x{FF9E}", 49 "\x{30D6}" => "\x{FF8C}\x{FF9E}", 50 "\x{30C0}" => "\x{FF80}\x{FF9E}", 51 "\x{30DA}" => "\x{FF8D}\x{FF9F}", 52 "\x{30D0}" => "\x{FF8A}\x{FF9E}", 53 "\x{30D3}" => "\x{FF8B}\x{FF9E}", 54 "\x{30C5}" => "\x{FF82}\x{FF9E}", 55 "\x{30F4}" => "\x{FF73}\x{FF9E}", 56 "\x{30B0}" => "\x{FF78}\x{FF9E}", 57 "\x{30B8}" => "\x{FF7C}\x{FF9E}", 58 "\x{30B4}" => "\x{FF7A}\x{FF9E}", 59 "\x{30D7}" => "\x{FF8C}\x{FF9F}", 60 "\x{30D9}" => "\x{FF8D}\x{FF9E}", 61 "\x{30C2}" => "\x{FF81}\x{FF9E}", 62 "\x{30BA}" => "\x{FF7D}\x{FF9E}", 63 "\x{30DD}" => "\x{FF8E}\x{FF9F}", 64 "\x{30DC}" => "\x{FF8E}\x{FF9E}", 65 "\x{30B2}" => "\x{FF79}\x{FF9E}", 66 "\x{30AC}" => "\x{FF76}\x{FF9E}", 67 "\x{30C9}" => "\x{FF84}\x{FF9E}" 68 ); 69 41 70 sub alnum_z2h { 42 71 local $_ = shift; … … 72 101 } 73 102 103 sub katakana_z2h { 104 local $_ = shift; 105 s/(\x{30BC}\x{30C9}\x{30C0}\x{30AC}\x{30D3}\x{30BA}\x{30D4}\x{30F4}\x{30B2}\x{30B6}\x{30DA}\x{30D1}\x{30D0}\x{30C7}\x{30B4}\x{30D7}\x{30D9}\x{30C5}\x{30DD}\x{30D6}\x{30AE}\x{30C2}\x{30B8}\x{30B0}\x{30DC}\x{30BE})/$katakana_z2h_map{$1}/ge;; 106 tr/\x{30E3}\x{30E9}\x{30BB}\x{30DE}\x{30C6}\x{30A7}\x{30DB}\x{30D2}\x{3002}\x{30A1}\x{30B7}\x{30CD}\x{30BF}\x{30D5}\x{30AD}\x{30E2}\x{30E1}\x{30ED}\x{30CB}\x{30E6}\x{30A5}\x{30CC}\x{30E7}\x{30E4}\x{30B3}\x{30A3}\x{30D8}\x{30C3}\x{30EF}\x{30A4}\x{30B1}\x{30CA}\x{3001}\x{300C}\x{30EC}\x{30A8}\x{309C}\x{30A2}\x{30BD}\x{30C4}\x{309B}\x{30F2}\x{30FC}\x{30F3}\x{30A6}\x{30B9}\x{30EA}\x{30AB}\x{300D}\x{30E0}\x{30A9}\x{30EB}\x{30E5}\x{30E8}\x{30CF}\x{30CE}\x{30AF}\x{30FB}\x{30AA}\x{30B5}\x{30C8}\x{30DF}\x{30C1}/\x{FF6C}\x{FF97}\x{FF7E}\x{FF8F}\x{FF83}\x{FF6A}\x{FF8E}\x{FF8B}\x{FF61}\x{FF67}\x{FF7C}\x{FF88}\x{FF80}\x{FF8C}\x{FF77}\x{FF93}\x{FF92}\x{FF9B}\x{FF86}\x{FF95}\x{FF69}\x{FF87}\x{FF6E}\x{FF94}\x{FF7A}\x{FF68}\x{FF8D}\x{FF6F}\x{FF9C}\x{FF72}\x{FF79}\x{FF85}\x{FF64}\x{FF62}\x{FF9A}\x{FF74}\x{FF9F}\x{FF71}\x{FF7F}\x{FF82}\x{FF9E}\x{FF66}\x{FF70}\x{FF9D}\x{FF73}\x{FF7D}\x{FF98}\x{FF76}\x{FF63}\x{FF91}\x{FF6B}\x{FF99}\x{FF6D}\x{FF96}\x{FF8A}\x{FF89}\x{FF78}\x{FF65}\x{FF75}\x{FF7B}\x{FF84}\x{FF90}\x{FF81}/; 107 $_; 108 } 109 74 110 1; 75 111 __END__ … … 103 139 104 140 =item katakana_z2h 141 142 convert katakanas ZENKAKU to HANKAKU. 143 144 =item katakana_h2z 105 145 106 146 convert katakanas HANKAKU to ZENKAKU. 
lang/perl/LinguaJARegularUnicode/trunk/t/06_katakana_z2h.t
r21573 r21574 7 7 run { 8 8 my $block = shift; 9 is katakana_ h2z($block>input), $block>expected;9 is katakana_z2h($block>input), $block>expected; 10 10 } 11 11 … … 14 14 === 15 15  input: およよＡＢＣＤＥＦＧｂｆｅge１２３123オヨヨｵﾖﾖ 16  expected: およよＡＢＣＤＥＦＧｂｆｅge１２３123 オヨヨオヨヨ16  expected: およよＡＢＣＤＥＦＧｂｆｅge１２３123ｵﾖﾖｵﾖﾖ 17 17 18 18 === 19  input: ｶﾞ20  expected: ガ19  input: ガ 20  expected: ｶﾞ 21 21 
lang/perl/LinguaJARegularUnicode/trunk/tools/createmap.pl
r21573 r21574 86 86 } 87 87 88 sub hankata2zenkata{88 sub katakana_h2z { 89 89 my $c = sub { sprintf "\\x{%X}", unpack 'U*', decode('eucjp', shift) }; 90 90 … … 118 118 } 119 119 120 for my $meth (qw/alnum_z2h hiragana2katakana katakana2hiragana hankata2zenkata/) { 120 sub katakana_z2h { 121 my $c = sub { sprintf "\\x{%X}", unpack 'U*', decode('eucjp', shift) }; 122 123 my @res; 124 125 push @res, sub { 126 # dakuten 127 my (@z, %z2h); 128 while (my ($h, $z) = each %Encode::JP::H2Z::_D2Z) { 129 my $hhex = join('', map { sprintf '\x{%X}', unpack 'U*', $_ } split //, decode('eucjp', $h)); 130 push @z, $c>($z); 131 $z2h{$c>($z)} = $hhex; 132 } 133 return join("\n", 134 Dumper(\%z2h), 135 join('', "s/(", join('', @z), ')/$z2h{$1}/ge;'), 136 ); 137 }>(); 138 139 push @res, sub { 140 # normal 141 my (@h, @z); 142 while (my ($h, $z) = each %Encode::JP::H2Z::_H2Z) { 143 push @z, $c>($z); 144 push @h, $c>($h); 145 } 146 return join('', "tr/", join('', @z), "/", join('', @h), "/;"); 147 }>(); 148 149 return join "\n", @res; 150 } 151 152 for my $meth (qw/alnum_z2h hiragana2katakana katakana2hiragana katakana_h2z katakana_z2h/) { 121 153 say " $meth"; 122 154 say sub { goto &{$meth} }>(); 
lang/perl/LinguaJARegularUnicode/trunk/xt/01_podspell.t
r21525 r21574 39 39 katakana 40 40 regularizer 41 katakanas