root/lang/perl/Acme-Samurai/trunk/lib/Acme/Samurai/Base.pm

Revision 18791, 3.3 kB (checked in by tomi-ru, 17 months ago)

Checking in changes prior to tagging of version 0.02. Changelog diff is:

Index: Changes
===================================================================
--- Changes (リビジョン 18217)
+++ Changes (作業コピー)
@@ -1,4 +1,7 @@

Revision history for Perl extension Acme::Samurai


+0.02 Thu Sep 4 07:57:31 JST 2008
+ - first CPAN release
+

0.01 Sun Mar 23 23:58:44 JST 2008

  • original version
Line 
1package Acme::Samurai::Base;
2use strict;
3use warnings;
4use Class::Trigger;
5use Encode;
6use File::ShareDir 'module_file';
7use Text::MeCab;
8
9use base 'Class::Accessor::Fast';
10__PACKAGE__->mk_accessors(qw( text parts mecab_option ));
11
12my $encoding = Encode::find_encoding( Text::MeCab::ENCODING );
13
14sub parts_push {
15    push @{ shift->parts }, @_;
16}
17
18sub parts_pop {
19    pop @{ shift->parts };
20}
21
22sub mecab_new {
23    my $self = shift;
24    my $mecab = Text::MeCab->new({
25        node_format => '%m,%H',
26        unk_format  => '%m,%H',
27        bos_format  => '%m,%H',
28        eos_format  => '%m,%H',
29        userdic     => module_file(ref $self, 'user.dic'),
30        %{ $self->mecab_option || {} },
31    });
32}
33
34sub call {
35    my ($self, $method, @args) = @_;
36   
37    $self->call_trigger("pre.$method", @args);
38   
39    if ($self->can($method)) {
40        $self->$method(@args);
41    }
42   
43    $self->call_trigger("post.$method",  @args);
44}
45
46sub transform {
47    my ($self, $text) = @_;
48   
49    $self->text($text || "");
50    $self->parts([]);
51
52    $self->call('prepare');
53   
54    my $mecab = $self->mecab_new;
55   
56    for my $part (split /(\s+)/, $self->text) {
57        if ($part =~ /\s/) {
58            $self->parts_push($part);
59            next;
60        }
61        foreach (
62            my $node = $mecab->parse( $encoding->encode($part) );
63            $node;
64            $node = $node->next
65        ) {
66            next if $node->stat_type =~ /BOS|EOS/;
67            $self->call('node_filter', $node->decoded_node($mecab));
68        }
69    }
70   
71    $self->text(join "", @{ $self->parts });
72    $self->call('finalize');
73    $self->text;
74}
75
76sub node_filter {
77    my ($self, $node) = @_;
78    my $part = $node->features->{extra} || $node->surface;
79     
80    if (my $sub = $self->can($node->features->{pos} . '_rule') ) {
81        my $ret = $sub->($self, $node);
82        $part = $ret if defined $ret;
83    }
84   
85    $self->parts_push($part);
86}
87
88sub Text::MeCab::Node::stat_type {
89    {
90        0 => 'MECAB_NOR_NODE', # normal
91        1 => 'MECAB_UNK_NODE', # unknown
92        2 => 'MECAB_BOS_NODE', # begin of sentence
93        3 => 'MECAB_EOS_NODE', # end of sentence
94    }->{
95        shift->stat()
96    };
97}
98
99our @dic_keys = qw(
100    pos category1 category2 category3
101    inflect inflect_type original yomi pronounse
102    extra extra2 extra3
103);
104
105sub Text::MeCab::Node::decoded_node {
106    my ($node, $mecab) = @_;
107   
108    my $format = $encoding->decode( $node->format($mecab) );
109    my ($surface, @feature) = split /,/, $format;
110   
111    return Acme::Samurai::Base::Node->new({
112        mecab     => $mecab,
113        node      => $node,
114        stat_type => $node->stat_type,
115        surface   => $surface,
116        feature   => join(",", @feature),
117        features  => do {
118            my %_tmp; @_tmp{ @dic_keys } = @feature;
119            \%_tmp;
120        },
121    });
122}
123
124
125package Acme::Samurai::Base::Node;
126use strict;
127use Scalar::Util qw(weaken);
128
129use base 'Class::Accessor::Fast';
130__PACKAGE__->mk_accessors(qw( mecab node stat_type surface feature features ));
131
132sub new {
133    my $self = shift->SUPER::new(@_);
134    weaken $self->{mecab};
135    $self;
136}
137
138for my $sub (qw( next prev )) {
139    no strict 'refs'; ## no critic
140    *{__PACKAGE__ . "::${sub}_node"} = sub {
141        my $self = shift;
142        my $node = $self->node->$sub;
143        return $node->decoded_node($self->mecab)
144            if $node;
145    };
146}
147
1481;
Note: See TracBrowser for help on using the browser.