Show
Ignore:
Timestamp:
03/13/08 19:51:16 (5 years ago)
Author:
daisuke
Message:

lang/perl/Acme-Shukugawa-Atom; Add OO interface that allows passing custom word lists

Location:
lang/perl/Acme-Shukugawa-Atom/trunk
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • lang/perl/Acme-Shukugawa-Atom/trunk/Changes

    r7895 r7900  
    33 
    440.00004 - 
     5  * Add OO interface 
    56  * Work with new tests (otsune) 
    67  * Add t/02_shisu.t 
  • lang/perl/Acme-Shukugawa-Atom/trunk/Makefile.PL

    r7885 r7900  
    44name('Acme-Shukugawa-Atom'); 
    55all_from('lib/Acme/Shukugawa/Atom.pm'); 
     6requires('Class::Accessor::Fast'); 
    67requires('Text::MeCab', '0.20007'); 
    78requires('Encode'); 
  • lang/perl/Acme-Shukugawa-Atom/trunk/lib/Acme/Shukugawa/Atom.pm

    r7895 r7900  
    44use strict; 
    55use warnings; 
     6use base qw(Class::Accessor::Fast); 
    67use utf8; 
    78use Encode qw(decode_utf8); 
     
    1011our $VERSION = '0.00003'; 
    1112 
    12 sub translate 
    13 { 
    14     my $self   = shift; 
    15     my $string = decode_utf8(shift); 
    16  
    17     $self->preprocess(\$string); 
    18     $self->runthrough(\$string); 
    19     $self->postprocess(\$string); 
    20  
    21     return $string; 
    22 } 
     13__PACKAGE__->mk_accessors($_) for qw(custom_words); 
    2314 
    2415# Special case handling -- this could be optimized further 
    2516# put it in a sharefile later 
    26 my (@SPECIAL, $EXCEPTION, $RE_SMALL, $RE_SYLLABLE, $RE_NBAR); 
     17our (@DEFAULT_WORDS, $RE_EXCEPTION, $RE_SMALL, $RE_SYLLABLE, $RE_NBAR); 
    2718BEGIN 
    2819{ 
     
    3021    $RE_SYLLABLE = decode_utf8("(?:.$RE_SMALL?)"); 
    3122    $RE_NBAR     = decode_utf8("^ンー"); 
    32     @SPECIAL = ( 
     23    @DEFAULT_WORDS = ( 
    3324        '急ぎで|急いでる?' => '巻きで', 
    3425        '小飼弾|(?i)dankogai|(?i)kogaidan' => 'ガイダンコ', 
     
    5041        '片付け|かたづけ' => 'わらう' 
    5142    ); 
    52     $EXCEPTION = decode_utf8(join("|", 
    53         map { $SPECIAL[$_ * 2 + 1] } (0..$#SPECIAL/2) )); 
     43} 
     44 
     45sub _create_exception_re 
     46{ 
     47    my $self = shift; 
     48    my $custom = $self->custom_words; 
     49 
     50    return decode_utf8(join("|", 
     51        map { $custom->[$_ * 2 + 1] } (0..(scalar(@$custom) - 1)/2) )); 
     52} 
     53 
     54sub translate 
     55{ 
     56    my $self   = shift; 
     57    my $string = decode_utf8(shift); 
     58 
     59    if (! ref $self) { 
     60        $self = $self->new({ custom_words => \@DEFAULT_WORDS, @_ }); 
     61    } 
     62 
     63    # Create local RE_EXCEPTION 
     64    local $RE_EXCEPTION = $self->_create_exception_re; 
     65 
     66    $self->preprocess(\$string); 
     67    $self->runthrough(\$string); 
     68    $self->postprocess(\$string); 
     69 
     70    return $string; 
    5471} 
    5572 
     
    5774{ 
    5875    my ($self, $strref) = @_; 
    59  
    60     for(0..$#SPECIAL/2) { 
    61         my $pattern = $SPECIAL[$_ * 2]; 
    62         my $replace = $SPECIAL[$_ * 2 + 1]; 
     76    my $custom = $self->custom_words; 
     77 
     78    for(0..(scalar(@$custom) - 1)/2) { 
     79        my $pattern = $custom->[$_ * 2]; 
     80        my $replace = $custom->[$_ * 2 + 1]; 
    6381        $$strref =~ s/$pattern/$replace/g; 
    6482    } 
     
    7593    my $ret = ''; 
    7694 
    77     foreach my $text (split(/($EXCEPTION)/, $$strref)) { 
    78         if ($text =~ /$EXCEPTION/) { 
     95    foreach my $text (split(/($RE_EXCEPTION)/, $$strref)) { 
     96        if ($text =~ /$RE_EXCEPTION/) { 
    7997            $ret .= $text; 
    8098            next; 
     
    222240  my $newstring = Acme::Shukugawa::Atom->translate($string); 
    223241 
     242  my $atom = Acme::Shukugawa::Atom->new( 
     243    # The default values are stored in @Acme::Shukugawa::Atom::DEFAULT_WORDS 
     244    custom_words => [ 
     245      'regexp1' => 'replacement1' 
     246      'regexp2' => 'replacement2' 
     247      'regexp3' => 'replacement3' 
     248      'regexp4' => 'replacement4' 
     249      .... 
     250    ] 
     251  ); 
     252  my $newstring = $atom->translate($string); 
     253 
     254  # shorter way 
     255  my $newstring = Acme::Shukugawa::Atom->translate($string, 
     256    custom_words => [ ... ] 
     257  ); 
     258 
    224259=head1 DESCRIPTION 
    225260