Changeset 7880 for lang/perl/Acme-Shukugawa-Atom
- Timestamp:
- 03/13/08 10:42:55 (9 months ago)
- Location:
- lang/perl/Acme-Shukugawa-Atom/trunk
- Files:
-
- 2 modified
-
lib/Acme/Shukugawa/Atom.pm (modified) (5 diffs)
-
t/01_basic.t (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
lang/perl/Acme-Shukugawa-Atom/trunk/lib/Acme/Shukugawa/Atom.pm
r7878 r7880 22 22 } 23 23 24 sub preprocess 24 # Special case handling -- this could be optimized further 25 # put it in a sharefile later 26 my (@SPECIAL, $EXCEPTION); 27 BEGIN 25 28 { 26 my ($self, $strref) = @_; 27 28 # Special case handling -- this could be optimized further 29 # put it in a sharefile later 30 my @special = ( 29 @SPECIAL = ( 30 '銀座' => 'ザギン', 31 31 '別に' => 'ジリサワゴネタ', 32 32 '予約した' => 'バミった', … … 37 37 '(?:おおきい|大きい)(?:のか?|か)?' => 'カイデー', 38 38 ); 39 $EXCEPTION = decode_utf8(join("|", 40 map { $SPECIAL[$_ * 2 + 1] } (0..$#SPECIAL/2) )); 41 } 39 42 40 for(0..$#special/2) { 41 my $pattern = $special[$_ * 2]; 42 my $replace = $special[$_ * 2 + 1]; 43 sub preprocess 44 { 45 my ($self, $strref) = @_; 46 47 for(0..$#SPECIAL/2) { 48 my $pattern = $SPECIAL[$_ * 2]; 49 my $replace = $SPECIAL[$_ * 2 + 1]; 43 50 $$strref =~ s/$pattern/$replace/g; 44 51 } … … 54 61 # in hiragana 55 62 my $ret = ''; 56 foreach (my $node = $mecab->parse($$strref);57 $node; $node = $node->next)58 {59 my $surface = decode_utf8($node->surface);60 next unless $surface;61 if ($surface =~ /^\p{InHiragana}+$/ || $surface =~ /^\p{InKatakana}+$/) {62 $ret .= $surface;63 } else {64 my $feature = decode_utf8($node->feature);65 63 66 if (my $yomi = (split(/,/, $feature))[8]) { 67 $ret .= $self->atomize($yomi) || $surface; 64 foreach my $text (split(/($EXCEPTION)/, $$strref)) { 65 if ($text =~ /$EXCEPTION/) { 66 $ret .= $text; 67 next; 68 } 69 70 foreach (my $node = $mecab->parse($text); $node; $node = $node->next) { 71 my $surface = decode_utf8($node->surface); 72 next unless $surface; 73 if ($surface =~ /^\p{InHiragana}+$/) { 74 $ret .= $surface; 68 75 } else { 69 $ret .= $surface; 76 my $feature = decode_utf8($node->feature); 77 78 if (my $yomi = (split(/,/, $feature))[8]) { 79 $ret .= $self->atomize($yomi) || $surface; 80 } else { 81 $ret .= $surface; 82 } 70 83 } 71 84 } … … 81 94 my $small = decode_utf8("[ャュョッー]"); 82 95 my $syllable = decode_utf8("(?:.$small?)"); 83 my $nbar = decode_utf8(" [^ンー]");96 my $nbar = decode_utf8("^ンー"); 84 97 sub apply_shisu_rule 85 98 { … … 95 108 my ($self, $yomi) = @_; 96 109 97 if ($yomi =~ s/^($ syllable[ー]?)(..)$/$2$1/) {110 if ($yomi =~ s/^(${syllable}[$nbar]?)([^$nbar].)$/$2$1/) { 98 111 $yomi =~ s/([^ー])$/$1ー/; 99 112 return $yomi; -
lang/perl/Acme-Shukugawa-Atom/trunk/t/01_basic.t
r7852 r7880 1 1 use strict; 2 2 use utf8; 3 use Test::More (tests => 4);3 use Test::More (tests => 7); 4 4 5 5 BEGIN … … 11 11 "六本木の胸の大きいお姉さんがいる店を予約した" 12 12 => "ギロッポンのパイオツカイデーチャンネーがいるセーミーをバミった" , 13 "ハワイ" => "ワイハー", 14 "寿司" => "シースー", 15 "銀座" => "ザギン", 13 16 "狼" => "カミオー", 17 # "鋏" => "サミハー", <- mecabの辞書にない? 14 18 "おばあさんの口はどうして大きいの?" => 15 19 "チャンバーのチークーはどうしてカイデー?"
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)