| 1 | package WebService::YahooJP::TextParse; |
|---|
| 2 | |
|---|
| 3 | use strict; |
|---|
| 4 | use warnings; |
|---|
| 5 | |
|---|
| 6 | our $VERSION = '0.01'; |
|---|
| 7 | |
|---|
| 8 | use Array::Iterator::BiDirectional; |
|---|
| 9 | use Encode; |
|---|
| 10 | use LWP::UserAgent; |
|---|
| 11 | use XML::Simple; |
|---|
| 12 | |
|---|
| 13 | sub new { |
|---|
| 14 | my ($class, %opt) = @_; |
|---|
| 15 | |
|---|
| 16 | my $self = bless { |
|---|
| 17 | url => 'http://api.jlp.yahoo.co.jp/MAService/V1/parse', |
|---|
| 18 | %opt, |
|---|
| 19 | }, $class; |
|---|
| 20 | |
|---|
| 21 | $self->init; |
|---|
| 22 | $self; |
|---|
| 23 | } |
|---|
| 24 | |
|---|
| 25 | sub init { |
|---|
| 26 | my $self = shift; |
|---|
| 27 | |
|---|
| 28 | my $ua = LWP::UserAgent->new; |
|---|
| 29 | $ua->timeout($self->{timeout} || 3); |
|---|
| 30 | $ua->env_proxy; |
|---|
| 31 | $self->{ua} = $ua; |
|---|
| 32 | } |
|---|
| 33 | |
|---|
| 34 | sub analysis { |
|---|
| 35 | my ($self, $content) = @_; |
|---|
| 36 | |
|---|
| 37 | $content = Encode::encode('utf-8', $content) if utf8::is_utf8($content); |
|---|
| 38 | |
|---|
| 39 | my %form = ( |
|---|
| 40 | appid => $self->{appid}, |
|---|
| 41 | results => 'ma', |
|---|
| 42 | sentence => $content, |
|---|
| 43 | ); |
|---|
| 44 | |
|---|
| 45 | my $res = $self->{ua}->post( $self->{url}, \%form ); |
|---|
| 46 | |
|---|
| 47 | $XML::Simple::PREFERRED_PARSER = 'XML::Parser'; |
|---|
| 48 | my $parser = XML::Simple->new; |
|---|
| 49 | $self->{result} = $parser->XMLin($res->content); |
|---|
| 50 | $self->{word} = $self->{result}->{ma_result}->{word_list}->{word}; |
|---|
| 51 | $self->{word} = [ $self->{word} ] if ref($self->{word}) and ref($self->{word}) eq 'HASH'; |
|---|
| 52 | @{ $self->{word} } = grep { ref($_->{surface}) ne 'HASH' } @{ $self->{word} }; |
|---|
| 53 | |
|---|
| 54 | return wantarray ? @{ $self->{word} } : Array::Iterator::BiDirectional->new($self->{word}); |
|---|
| 55 | } |
|---|
| 56 | |
|---|
| 57 | 1; |
|---|
| 58 | __END__ |
|---|
| 59 | |
|---|
| 60 | =head1 NAME |
|---|
| 61 | |
|---|
| 62 | WebService::YahooJP::TextParse - Perl interface to Japanese language morphological analysis of Yahoo! JAPAN |
|---|
| 63 | |
|---|
| 64 | =head1 SYNOPSIS |
|---|
| 65 | |
|---|
| 66 | use WebService::YahooJP::TextParse; |
|---|
| 67 | |
|---|
| 68 | my $config = { appid => "your-app-id" }; |
|---|
| 69 | my $ma = WebService::YahooJP::TextParse->new(%$config); |
|---|
| 70 | my $text = "some japanese sentense"; |
|---|
| 71 | my $iter = $ma->analysis($text); |
|---|
| 72 | # $iter is an Array::Iterator::BiDirectional object. |
|---|
| 73 | while ($iter->hasNext) { |
|---|
| 74 | my $word = $iter->getNext(); |
|---|
| 75 | next if ref($word->{surface}) eq 'HASH'; |
|---|
| 76 | |
|---|
| 77 | print Dump $word; |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | =head1 DESCRIPTION |
|---|
| 81 | |
|---|
| 82 | This is a Perl interface to Japanese language morphological analysis of Yahoo! JAPAN. |
|---|
| 83 | See Developers Page L<http://developer.yahoo.co.jp/jlp/MAService/V1/parse.html> for details. |
|---|
| 84 | |
|---|
| 85 | =head1 AUTHOR |
|---|
| 86 | |
|---|
| 87 | woremacx E<lt>woremacx at cpan dot orgE<gt> |
|---|
| 88 | |
|---|
| 89 | =head1 LICENSE |
|---|
| 90 | |
|---|
| 91 | This library is free software; you can redistribute it and/or modify |
|---|
| 92 | it under the same terms as Perl itself. |
|---|
| 93 | |
|---|
| 94 | =head1 SEE ALSO |
|---|
| 95 | |
|---|
| 96 | L<Array::Iterator::BiDirectional> |
|---|
| 97 | |
|---|
| 98 | =cut |
|---|