root/lang/perl/Encode-JP-Mobile/trunk/tools/make-utf8-ucm.pl

Revision 5448, 3.4 kB (checked in by tokuhirom, 9 months ago)

merge Encode::JP::Mobile::Character branch to trunk.

  • Property svn:executable set to *
Line 
1use strict;
2use warnings;
3use YAML;
4use Encode;
5use Encode::JP::Mobile ':props';
6use autobox;
7use autobox::Core;
8use autobox::Encode;
9use FindBin;
10use File::Spec;
11use Path::Class;
12
13my $map = YAML::LoadFile file($FindBin::Bin, '..', 'dat', 'convert-map-utf8.yaml');
14my $cp932_ucm = file($FindBin::Bin, '..', 'ucm', 'cp932.ucm');
15
16my $uni_range_for = {
17    docomo   => InDoCoMoPictograms(),
18    kddi     => InKDDIAutoPictograms(),
19    softbank => InSoftBankPictograms(),
20};
21
22sub SCALAR::to_hex($) { sprintf '%X', $_[0] }
23
24&main;exit;
25
26sub main {
27    for my $to (qw( docomo kddi softbank )) {
28        generate_ucm($to, sub {
29            my $fh = shift;
30
31            # convert map
32            for my $from (qw( docomo kddi softbank )) {
33                next if $from eq $to;
34
35                print {$fh} "\n\n# pictogram convert map ($from => $to)\n";
36
37                for my $srcuni (sort keys %{$map->{$from}}) {
38                    my $dstuni = $map->{$from}->{$srcuni}->{$to} or next;
39                    next unless $dstuni->{type} eq 'pictogram';
40                    printf {$fh} "<U%s> %s |1 # %s\n", $srcuni, unihex2utf8hex($dstuni->{unicode}), comment_for($from);
41                }
42            }
43
44            # original
45            range_each($to, sub {
46                my $unicode = shift;
47                my $unihex = $unicode->to_hex;
48                print {$fh} sprintf "<U%s> %s |0 # %s\n", $unihex, unihex2utf8hex($unihex), "$to pictogram";
49            });
50        });
51    }
52}
53
54sub generate_ucm {
55    my ($to, $generate_pictogram_ucm) = @_;
56    my $fh = file('ucm', "x-utf8-$to.ucm")->openw or die $!;
57    print {$fh} header($to);
58    print {$fh} unicode_ucm($cp932_ucm);
59    print {$fh} '<U301C> \xE3\x80\x9C |0 # WAVE DUSH', "\n"; # ad-hoc solution for  FULLWIDTH TILDE Problem.
60    $generate_pictogram_ucm->($fh);
61    print {$fh} "END CHARMAP\n";
62    $fh->close;
63}
64
65sub comment_for {
66    my $from = shift;
67    return $from eq 'docomo'   ? 'DoCoMo Pictogram'
68      : $from    eq 'kddi'     ? 'KDDI/AU Pictogram'
69      : $from    eq 'softbank' ? 'SoftBank Pictogram'
70      :                          "";
71}
72
73sub header {
74    my $to = shift;
75
76    my %alias = qw(
77        docomo imode
78        kddi ezweb
79        softbank vodafone
80    );
81
82    <<"HEAD";
83<code_set_name> "x-utf8-$to"
84<code_set_alias> "x-utf8-$alias{$to}"
85<mb_cur_min> 1
86<mb_cur_max> 2
87<subchar> \\x3F
88CHARMAP
89HEAD
90}
91
92sub unihex2utf8hex {
93    my $uni = shift;
94    $uni =~ s{(....)}{
95        my $x = 'H*'->unpack($1->hex->chr->encode('utf-8'));
96        $x =~ s/(..)/\\x$1/g;
97        $x;
98    }ge;
99    $uni;
100}
101
102sub unicode_ucm {
103    my $cp932_ucm = shift;
104    my $res = '';
105    my $fh = $cp932_ucm->openr or die $!;
106    while (my $line = <$fh>) {
107        if ($line =~ /^<U(.{4})> \S+ \|0 # (.+)$/) {
108            my ($unihex, $comment) = ($1, $2);
109
110            # for FallBack.
111            next if $comment eq 'PRIVATE USE AREA';
112
113            $res .= sprintf "<U%s> %s |0 # %s\n", $unihex, unihex2utf8hex($unihex), $comment;
114        }
115    }
116    $fh->close;
117    $res;
118}
119
120sub range_each {
121    my ($carrier, $code) = @_;
122
123    my $map = $uni_range_for->{$carrier};
124    for my $range (split /\n/, $map) {
125        my ($min, $max) = map { hex $_ } split /\t/, $range;
126        my $i = $min;
127        if ($max) {
128            while ($i <= $max) {
129                $code->( $i );
130                $i++;
131            }
132        } else {
133            $code->($min);
134        }
135    }
136}
137
Note: See TracBrowser for help on using the browser.