| 1 | use strict; |
|---|
| 2 | |
|---|
| 3 | # http://labs.unoh.net/2007/02/post_65.html to dat/convert-map-utf8.yaml |
|---|
| 4 | # perl tools/convert-map-scrape.pl > dat/convert-map-utf8.yaml |
|---|
| 5 | |
|---|
| 6 | use Encode; |
|---|
| 7 | use Encode::JP::Mobile 0.09; |
|---|
| 8 | use LWP::Simple; |
|---|
| 9 | use YAML; |
|---|
| 10 | |
|---|
| 11 | my %files; |
|---|
| 12 | for my $file (qw( emoji_e2is.txt emoji_i2es.txt emoji_s2ie.txt )) { |
|---|
| 13 | $files{$file} = decode('cp932', get("http://labs.unoh.net/$file")); |
|---|
| 14 | } |
|---|
| 15 | |
|---|
| 16 | my $no2uni = {}; |
|---|
| 17 | for my $file (keys %files) { |
|---|
| 18 | for my $line (split /\n/, $files{$file}) { |
|---|
| 19 | next unless $line =~ /^%/; |
|---|
| 20 | my ($no, $byte) = split "\t", $line; |
|---|
| 21 | |
|---|
| 22 | $file eq 'emoji_i2es.txt' && do { |
|---|
| 23 | $no2uni->{$no} = sprintf '%04X', ord decode('x-sjis-docomo', pack 'H*', $byte); |
|---|
| 24 | }; |
|---|
| 25 | |
|---|
| 26 | $file eq 'emoji_e2is.txt' && do { |
|---|
| 27 | $no2uni->{$no} = sprintf '%04X', ord decode('x-sjis-kddi-auto', pack 'H*', $byte); |
|---|
| 28 | }; |
|---|
| 29 | |
|---|
| 30 | $file eq 'emoji_s2ie.txt' && do { |
|---|
| 31 | $no2uni->{$no} = sprintf '%04X', ord decode('x-sjis-softbank', "\x1b\x24$byte\x0f"); |
|---|
| 32 | }; |
|---|
| 33 | } |
|---|
| 34 | } |
|---|
| 35 | |
|---|
| 36 | my %map; |
|---|
| 37 | for my $file (keys %files) { |
|---|
| 38 | for my $line (split /\n/, $files{$file}) { |
|---|
| 39 | next unless $line =~ /^%/; |
|---|
| 40 | chomp $line; |
|---|
| 41 | |
|---|
| 42 | $file eq 'emoji_i2es.txt' && do { |
|---|
| 43 | my ($docomo, undef, $kddi, $softbank) = split "\t", $line; |
|---|
| 44 | $map{docomo}{ $no2uni->{$docomo} }->{kddi} = get_unicode($kddi); |
|---|
| 45 | $map{docomo}{ $no2uni->{$docomo} }->{softbank} = get_unicode($softbank); |
|---|
| 46 | }; |
|---|
| 47 | |
|---|
| 48 | $file eq 'emoji_e2is.txt' && do { |
|---|
| 49 | my ($kddi, undef, $docomo, $softbank) = split "\t", $line; |
|---|
| 50 | $map{kddi}{ $no2uni->{$kddi} }->{docomo} = get_unicode($docomo); |
|---|
| 51 | $map{kddi}{ $no2uni->{$kddi} }->{softbank} = get_unicode($softbank); |
|---|
| 52 | }; |
|---|
| 53 | |
|---|
| 54 | $file eq 'emoji_s2ie.txt' && do { |
|---|
| 55 | my ($softbank, undef, $docomo, $kddi) = split "\t", $line; |
|---|
| 56 | $map{softbank}{ $no2uni->{$softbank} }->{docomo} = get_unicode($docomo); |
|---|
| 57 | $map{softbank}{ $no2uni->{$softbank} }->{kddi} = get_unicode($kddi); |
|---|
| 58 | }; |
|---|
| 59 | } |
|---|
| 60 | } |
|---|
| 61 | |
|---|
| 62 | sub get_unicode($) { |
|---|
| 63 | my $key = shift; |
|---|
| 64 | if ($key =~ /^%/) { |
|---|
| 65 | $key =~ s/(%[^%]+%)/$no2uni->{$1}/ge; |
|---|
| 66 | return +{ type => 'pictogram', unicode => $key }; |
|---|
| 67 | } else { |
|---|
| 68 | return +{ type => 'name', unicode => $key }; |
|---|
| 69 | } |
|---|
| 70 | } |
|---|
| 71 | |
|---|
| 72 | binmode STDOUT, ":utf8"; |
|---|
| 73 | print YAML::Dump(\%map); |
|---|