root/lang/perl/Mezasi/trunk/lib/Mezasi/Util.pm @ 5368

Revision 5368, 4.0 kB (checked in by hiroyukim, 5 years ago)

細かいバグの修正

Line 
1package Mezasi::Util;
2use strict;
3use warnings;
4use utf8;
5use base qw/Class::Accessor::Fast/;
6
7use Data::Dumper;
8use List::Util qw/sum/;
9use List::MoreUtils qw/uniq/;
10use boolean qw/:all/;
11use JSON::Syck;
12
13use Mezasi::Trie;
14
15our $MarkovKeySize = 1;
16
17sub markov {
18    my ( $self, $src, $keywords, $trie ) = @_;
19
20    my $mar    = $self->markov_generate( $src , $trie );
21    my $result = $self->markov_select($mar, $keywords );
22
23    return $result;
24}
25
26sub markov_generate {
27    my ( $self, $src, $trie ) = @_;
28
29    return '' if( scalar( @{$src} ) == 0 );
30
31    my @ary = @{$trie->split_into_terms( join( "\n", @{$src} )."\n", 'true')};
32    my $size = scalar @ary;
33    push @ary, grep {$_} map { $ary[$_] } (0..$MarkovKeySize);
34
35    my %table;
36    for my $idx ( 0..$size - 1 ) {
37        my $key = JSON::Syck::Dump([ grep {$_} map {  $ary[$_]  } ($idx..($idx+$MarkovKeySize - 1)) ]);
38        $table{$key} = [] unless $table{$key};
39        push @{$table{$key}},$ary[$idx + $MarkovKeySize ] ;
40    }
41    my %uniq;
42    my %backup;
43
44    while( my ($key, $value ) = each %table ) {
45        if( scalar( @{$value} ) == 1 ) {
46            $uniq{$key} = $value->[0];
47        }
48        else {
49            # dup の挙動を追うと新規を作ることになる。
50            $backup{$key} = [ @{$table{$key}} ];
51        }
52    }
53    my $key    = JSON::Syck::Dump([ map { $ary[$_] } (0..($MarkovKeySize - 1 )) ]);
54    my $result = join('',@{JSON::Syck::Load($key)});
55    for my $count ( 0..10000 ) {
56        my $str;
57        if( defined $uniq{$key} ) {
58            $str = $uniq{$key};   
59        }
60        else {
61            if( $table{$key} && scalar( @{$table{$key}} ) == 0 ) {
62                $table{$key} =  ( ref $backup{$key} ) ?  [@{$backup{$key}}] : [];
63            }
64            my $idx = rand( ( ref $table{$key} ) ? scalar @{$table{$key}} : 0 );
65            my $str = $table{$key}->[$idx];
66
67            $table{$key}->[$idx] = undef;
68            $table{$key} = [ grep { $_ } @{$table{$key}} ];
69        }
70
71        $result .= $str || '';
72        $key = JSON::Syck::Load($key);
73        push @{$key} , $str ;
74        $key     = JSON::Syck::Dump([ map { $key->[$_] } (1..$MarkovKeySize ) ]) ;
75    }
76
77    return $result;
78}
79
80sub markov_split {
81    my ($self, $str ) = @_;
82
83    my @result;
84    while( $str =~ /\A(.{25,}?)([。、.,]+|[?!.,]+[\s ])[  ]*/ ) {
85        my $m  = $1;
86        my $m2 = $2;
87        my $post_match = $';
88        if( $m2 ) {
89            $m2 =~ s/、/。/m;
90            $m2 =~ s/,/./m;
91        }
92        $m .= $m2;
93        push @result, $m;
94        $str = $post_match;
95    }
96
97    if( scalar( @{[split(//,$str)]} ) > 0  ) {
98        push @result, $str;
99    }
100    return \@result;
101}
102
103sub markov_select {
104    my ($self, $result, $keywords ) = @_;
105
106    my @tmp = split(/\n/, $result) or qw();
107    my @test = map { @{$self->markov_split($_)} } @tmp;
108    my @result_ary = uniq map { @{$self->markov_split($_)} } @tmp;
109    @result_ary = grep { $_ || $_ !~ /\0/ } @result_ary;
110    my %result_hash;
111
112    my $trie = Mezasi::Trie->new([ keys %{$keywords}]);
113
114    for my $str ( @result_ary ) {
115        my @terms          = uniq @{$trie->split_into_terms($str)};
116        $result_hash{$str} = ( sum map { $keywords->{$_} } @terms ) || 0;
117    }
118
119    $result = $self->roulette_select(\%result_hash);
120    return $result || '';
121}
122
123sub roulette_select {
124    my ($self, $h ) = @_;
125
126    return if( scalar( keys %$h ) == 0 );
127
128    my $sum = sum values %$h;
129
130    if( $sum == 0 ) {
131        return $self->random_select( [ keys %$h ] );
132    }
133
134    my $r = int( rand * $sum );
135    while( my ($key , $value ) = each %$h ) {
136        $r -= $value;
137        if( $r <= 0 ) {
138            return $key ;
139        }     
140    }
141
142    return $self->random_select( [ keys %$h ] );
143}
144
145sub random_select {
146    my ($self, $ary ) = @_;
147
148    return $ary->[rand(scalar(@{$ary}))];
149}
150
151sub message_normalize {
152    my ($self, $str ) = @_;
153   
154    $str =~ s/「」//g;
155    $str =~ s/()//g;
156    $str =~ s/『』//g;
157    $str =~ s/\(\)//g;
158
159    return $str;
160}
161
1621;
Note: See TracBrowser for help on using the browser.