| 1 | # $Id$ |
|---|
| 2 | # |
|---|
| 3 | # Copyright (c) 2008 Daisuke Maki <daisuke@endeworks.jp> |
|---|
| 4 | # All rights reserved. |
|---|
| 5 | |
|---|
| 6 | package Text::MeCab::Dict; |
|---|
| 7 | use strict; |
|---|
| 8 | use warnings; |
|---|
| 9 | use base qw(Class::Accessor::Fast); |
|---|
| 10 | use Text::MeCab; |
|---|
| 11 | use Path::Class::Dir; |
|---|
| 12 | use Path::Class::File; |
|---|
| 13 | |
|---|
| 14 | our $MAKE = 'make'; |
|---|
| 15 | |
|---|
| 16 | __PACKAGE__->mk_accessors($_) for qw(entries config dict_source libexecdir input_encoding output_encoding); |
|---|
| 17 | |
|---|
| 18 | sub new |
|---|
| 19 | { |
|---|
| 20 | my $class = shift; |
|---|
| 21 | my %args = @_; |
|---|
| 22 | |
|---|
| 23 | my $libexecdir; |
|---|
| 24 | my $config = $args{mecab_config} || &Text::MeCab::MECAB_CONFIG; |
|---|
| 25 | my $dict_source = $args{dict_source}; |
|---|
| 26 | my $ie = $args{ie} || $args{input_encoding} || &Text::MeCab::ENCODING; |
|---|
| 27 | my $oe = $args{oe} || $args{output_encoding} || &Text::MeCab::ENCODING; |
|---|
| 28 | |
|---|
| 29 | if (! $config) { |
|---|
| 30 | $libexecdir = $args{libexecdir}; |
|---|
| 31 | } else { |
|---|
| 32 | $libexecdir = Path::Class::Dir->new(`$config --libexecdir`); |
|---|
| 33 | } |
|---|
| 34 | |
|---|
| 35 | if (! $dict_source || ! $libexecdir) { |
|---|
| 36 | die "You must specify dict_source and libexecdir"; |
|---|
| 37 | } |
|---|
| 38 | |
|---|
| 39 | my $self = bless { |
|---|
| 40 | config => $config, |
|---|
| 41 | entries => [], |
|---|
| 42 | dict_source => $dict_source, |
|---|
| 43 | libexecdir => $libexecdir, |
|---|
| 44 | input_encoding => $ie, |
|---|
| 45 | output_encoding => $oe, |
|---|
| 46 | }, $class; |
|---|
| 47 | } |
|---|
| 48 | |
|---|
| 49 | sub add |
|---|
| 50 | { |
|---|
| 51 | my $self = shift; |
|---|
| 52 | |
|---|
| 53 | my $entry; |
|---|
| 54 | if (scalar @_ == 1) { |
|---|
| 55 | $entry = shift @_; |
|---|
| 56 | } else { |
|---|
| 57 | my %args = @_; |
|---|
| 58 | $entry = Text::MeCab::Dict::Entry->new(%args); |
|---|
| 59 | } |
|---|
| 60 | push @{ $self->entries }, $entry; |
|---|
| 61 | } |
|---|
| 62 | |
|---|
| 63 | sub write |
|---|
| 64 | { |
|---|
| 65 | my $self = shift; |
|---|
| 66 | my $file = shift; |
|---|
| 67 | my $csv = Text::CSV_XS->new(); |
|---|
| 68 | |
|---|
| 69 | my @output; |
|---|
| 70 | my $entries = $self->entries; |
|---|
| 71 | |
|---|
| 72 | my @columns = qw( |
|---|
| 73 | surface left_id right_id cost pos category1 category2 category3 |
|---|
| 74 | inflect inflect_type original yomi pronounse extra |
|---|
| 75 | ); |
|---|
| 76 | foreach my $entry (@$entries) { |
|---|
| 77 | $csv->combine( map { $entry->$_ } @columns ) or die; |
|---|
| 78 | push @output, $csv->string; |
|---|
| 79 | } |
|---|
| 80 | |
|---|
| 81 | $file = Path::Class::File->new($file); |
|---|
| 82 | my $fh = $file->open(">>"); |
|---|
| 83 | $fh->print(join("\n", @output)); |
|---|
| 84 | $fh->close; |
|---|
| 85 | |
|---|
| 86 | $self->entries([]); |
|---|
| 87 | } |
|---|
| 88 | |
|---|
| 89 | sub rebuild |
|---|
| 90 | { |
|---|
| 91 | my $self = shift; |
|---|
| 92 | |
|---|
| 93 | my $dict_source = $self->dict_source; |
|---|
| 94 | my $dict_index = $self->libexecdir->file('mecab-dict-index'); |
|---|
| 95 | |
|---|
| 96 | my $curdir = Path::Class::Dir->new->absolute; |
|---|
| 97 | eval { |
|---|
| 98 | chdir $dict_source; |
|---|
| 99 | |
|---|
| 100 | my @cmds = ( |
|---|
| 101 | [ $dict_index, '-f', $self->input_encoding, '-t', $self->output_encoding ], |
|---|
| 102 | [ $MAKE, "install" ] |
|---|
| 103 | ); |
|---|
| 104 | |
|---|
| 105 | foreach my $cmd (@cmds) { |
|---|
| 106 | if (system(@$cmd) != 0) { |
|---|
| 107 | die "Failed to execute '@$cmd'"; |
|---|
| 108 | } |
|---|
| 109 | } |
|---|
| 110 | }; |
|---|
| 111 | if (my $e = $@) { |
|---|
| 112 | chdir $curdir; |
|---|
| 113 | die $e; |
|---|
| 114 | } |
|---|
| 115 | } |
|---|
| 116 | |
|---|
| 117 | package Text::MeCab::Dict::Entry; |
|---|
| 118 | use strict; |
|---|
| 119 | use warnings; |
|---|
| 120 | use base qw(Class::Accessor::Fast); |
|---|
| 121 | |
|---|
| 122 | __PACKAGE__->mk_accessors($_) for qw( |
|---|
| 123 | surface left_id right_id cost pos category1 category2 category3 |
|---|
| 124 | inflect inflect_type original yomi pronounse extra |
|---|
| 125 | ); |
|---|
| 126 | |
|---|
| 127 | sub new |
|---|
| 128 | { |
|---|
| 129 | my $class = shift; |
|---|
| 130 | $class->SUPER::new({ |
|---|
| 131 | left_id => -1, |
|---|
| 132 | right_id => -1, |
|---|
| 133 | cost => 0 |
|---|
| 134 | }); |
|---|
| 135 | } |
|---|
| 136 | |
|---|
| 137 | 1; |
|---|
| 138 | |
|---|
| 139 | __END__ |
|---|
| 140 | |
|---|
| 141 | =encoding UTF-8 |
|---|
| 142 | |
|---|
| 143 | =head1 NAME |
|---|
| 144 | |
|---|
| 145 | Text::MeCab::Dict - Utility To Work With MeCab Dictionary |
|---|
| 146 | |
|---|
| 147 | =head1 SYNOPSIS |
|---|
| 148 | |
|---|
| 149 | use Text::MeCab::Dict; |
|---|
| 150 | |
|---|
| 151 | my $dict = Text::MeCab::Dict->new(); |
|---|
| 152 | $dict->add( |
|---|
| 153 | surface => $surface, # 表層形 |
|---|
| 154 | left_id => $left_id, # 左文脈ID |
|---|
| 155 | right_id => $right_id, # 右文脈ID |
|---|
| 156 | cost => $cost, # コスト |
|---|
| 157 | pos => $part_of_speech, # 品詞 |
|---|
| 158 | category1 => $category1, # 品詞細分類1 |
|---|
| 159 | category2 => $category2, # 品詞細分類2 |
|---|
| 160 | category3 => $category3, # 品詞細分類3 |
|---|
| 161 | |
|---|
| 162 | # XXX this below two parameter names need blessing from a knowing |
|---|
| 163 | # expert, and is subject to change |
|---|
| 164 | inflect => $inflect, # 活用形 |
|---|
| 165 | inflect_type => $inflect_type, # 活用型 |
|---|
| 166 | |
|---|
| 167 | original => $original, # 原形 |
|---|
| 168 | yomi => $yomi, # 読み |
|---|
| 169 | pronounce => $pronounce, # 発音 |
|---|
| 170 | extra => \@extras, # ユーザー設定 |
|---|
| 171 | ); |
|---|
| 172 | $dict->write('foo.csv'); |
|---|
| 173 | $dict->build(); |
|---|
| 174 | |
|---|
| 175 | =head1 METHODS |
|---|
| 176 | |
|---|
| 177 | =head2 rebuild |
|---|
| 178 | |
|---|
| 179 | Rebuilds the index. This usually requires that you are have root privileges |
|---|
| 180 | |
|---|
| 181 | =head1 SEE ALSO |
|---|
| 182 | |
|---|
| 183 | http://mecab.sourceforge.net/dic.html |
|---|
| 184 | |
|---|
| 185 | =cut |
|---|