| 1 | package Encode::JP::Mobile; |
|---|
| 2 | our $VERSION = "0.15"; |
|---|
| 3 | |
|---|
| 4 | use Encode; |
|---|
| 5 | use XSLoader; |
|---|
| 6 | XSLoader::load(__PACKAGE__, $VERSION); |
|---|
| 7 | |
|---|
| 8 | use base qw( Exporter ); |
|---|
| 9 | @EXPORT_OK = qw( InDoCoMoPictograms InKDDIPictograms InSoftBankPictograms InAirEdgePictograms InMobileJPPictograms ); |
|---|
| 10 | %EXPORT_TAGS = ( props => [@EXPORT_OK] ); |
|---|
| 11 | |
|---|
| 12 | use Encode::Alias; |
|---|
| 13 | # sjis |
|---|
| 14 | define_alias('x-sjis-docomo' => 'x-sjis-imode'); |
|---|
| 15 | define_alias('x-sjis-ezweb' => 'x-sjis-kddi'); |
|---|
| 16 | define_alias('x-sjis-ezweb-auto' => 'x-sjis-kddi-auto'); |
|---|
| 17 | define_alias('x-sjis-airedge' => 'cp932'); |
|---|
| 18 | define_alias('x-sjis-airh' => 'cp932'); |
|---|
| 19 | define_alias('x-sjis-vodafone-auto' => 'x-sjis-softbank-auto'); |
|---|
| 20 | |
|---|
| 21 | # backward compatiblity |
|---|
| 22 | define_alias('shift_jis-imode' => 'x-sjis-imode'); |
|---|
| 23 | define_alias('shift_jis-kddi' => 'x-sjis-kddi'); |
|---|
| 24 | define_alias('shift_jis-kddi-auto' => 'x-sjis-kddi-auto'); |
|---|
| 25 | define_alias('shift_jis-airedge' => 'cp932'); |
|---|
| 26 | define_alias('shift_jis-docomo' => 'x-sjis-imode'); |
|---|
| 27 | define_alias('shift_jis-ezweb' => 'x-sjis-kddi'); |
|---|
| 28 | define_alias('shift_jis-ezweb-auto' => 'x-sjis-kddi-auto'); |
|---|
| 29 | define_alias('shift_jis-airh' => 'cp932'); |
|---|
| 30 | |
|---|
| 31 | # utf8 |
|---|
| 32 | define_alias( 'x-utf8-imode' => 'x-utf8-docomo' ); |
|---|
| 33 | define_alias( 'x-utf8-ezweb' => 'x-utf8-kddi' ); |
|---|
| 34 | define_alias( 'x-utf8-vodafone' => 'x-utf8-softbank' ); |
|---|
| 35 | |
|---|
| 36 | use Encode::JP::Mobile::Vodafone; |
|---|
| 37 | use Encode::JP::Mobile::KDDIJIS; |
|---|
| 38 | |
|---|
| 39 | sub InDoCoMoPictograms { |
|---|
| 40 | return <<END; |
|---|
| 41 | E63E\tE6A5 |
|---|
| 42 | E6AC\tE6AE |
|---|
| 43 | E6B1\tE6B3 |
|---|
| 44 | E6B7\tE6BA |
|---|
| 45 | E6CE\tE757 |
|---|
| 46 | END |
|---|
| 47 | } |
|---|
| 48 | |
|---|
| 49 | sub InKDDIPictograms { |
|---|
| 50 | return <<END; |
|---|
| 51 | E468\tE5DF |
|---|
| 52 | EA80\tEB88 |
|---|
| 53 | EC40\tEC7E |
|---|
| 54 | EC80\tECFC |
|---|
| 55 | ED40\tED8D |
|---|
| 56 | EF40\tEF7E |
|---|
| 57 | EF80\tEFFC |
|---|
| 58 | F040\tF07E |
|---|
| 59 | F080\tF0FC |
|---|
| 60 | END |
|---|
| 61 | } |
|---|
| 62 | |
|---|
| 63 | sub InSoftBankPictograms { |
|---|
| 64 | return <<END; |
|---|
| 65 | E001\tE05A |
|---|
| 66 | E101\tE15A |
|---|
| 67 | E201\tE253 |
|---|
| 68 | E255\tE257 |
|---|
| 69 | E301\tE34D |
|---|
| 70 | E401\tE44C |
|---|
| 71 | E501\tE537 |
|---|
| 72 | END |
|---|
| 73 | } |
|---|
| 74 | |
|---|
| 75 | sub InAirEdgePictograms { |
|---|
| 76 | return <<END; |
|---|
| 77 | E000\tE096 |
|---|
| 78 | E098 |
|---|
| 79 | E09A |
|---|
| 80 | E09F |
|---|
| 81 | E0A2 |
|---|
| 82 | E0A6 |
|---|
| 83 | E0A8 |
|---|
| 84 | E0AF |
|---|
| 85 | E0BB |
|---|
| 86 | E0C4 |
|---|
| 87 | E0C9 |
|---|
| 88 | END |
|---|
| 89 | } |
|---|
| 90 | |
|---|
| 91 | sub InMobileJPPictograms { |
|---|
| 92 | # +utf8::InDoCoMoPictograms etc. don't work here |
|---|
| 93 | return join "\n", InDoCoMoPictograms, InKDDIPictograms, InSoftBankPictograms, InAirEdgePictograms; |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | 1; |
|---|
| 97 | __END__ |
|---|
| 98 | |
|---|
| 99 | =head1 NAME |
|---|
| 100 | |
|---|
| 101 | Encode::JP::Mobile - Shift_JIS (CP932) variants of Japanese cellphone pictograms |
|---|
| 102 | |
|---|
| 103 | =head1 SYNOPSIS |
|---|
| 104 | |
|---|
| 105 | use Encode; |
|---|
| 106 | use Encode::JP::Mobile; |
|---|
| 107 | |
|---|
| 108 | my $bytes = "\x82\xb1\xf9\x5d\xf8\xa0\x82\xb1"; # Shift_JIS bytes containing NTT DoCoMo pictograms |
|---|
| 109 | my $chars = decode("x-sjis-imode", $bytes); # \x{3053}\x{e6b9}\x{e63f}\x{3053} |
|---|
| 110 | |
|---|
| 111 | use Encode::JP::Mobile ':props'; |
|---|
| 112 | if ($chars =~ /\p{InDoCoMoPictograms}/) { |
|---|
| 113 | warn "It has DoCoMo pictogram characters!"; |
|---|
| 114 | } |
|---|
| 115 | |
|---|
| 116 | =head1 DESCRIPTION |
|---|
| 117 | |
|---|
| 118 | Encode::JP::Mobile is an Encode module to support Shift_JIS (CP032) |
|---|
| 119 | extended characters mapped in Unicode Private Area. |
|---|
| 120 | |
|---|
| 121 | This module is B<EXPERIMENTAL>. That means API and implementations |
|---|
| 122 | will sometimge be backward incompatible. |
|---|
| 123 | |
|---|
| 124 | =head1 ENCODINGS |
|---|
| 125 | |
|---|
| 126 | This module currently supports the following encodings. |
|---|
| 127 | |
|---|
| 128 | =over 4 |
|---|
| 129 | |
|---|
| 130 | =item x-sjis-imode |
|---|
| 131 | |
|---|
| 132 | Mapping for NTT DoCoMo i-mode handsets. Pictograms are mapped in |
|---|
| 133 | Shift_JIS private area and Unicode private area. The conversion rule |
|---|
| 134 | is equivalent to that of cp932. |
|---|
| 135 | |
|---|
| 136 | For example, C<U+E64E> is I<Fine> character (or I<The Sun>) and is |
|---|
| 137 | encoded as C<\xF8\x9F> in this encoding. |
|---|
| 138 | |
|---|
| 139 | This encoding is a subset of cp932 encoding, but has a reverse mapping |
|---|
| 140 | from KDDI/AU Unicode private area characters to DoCoMo pictogram |
|---|
| 141 | encodings. For example, |
|---|
| 142 | |
|---|
| 143 | my $kddi = "\xf6\x59"; # [!] in KDDI/AU |
|---|
| 144 | my $char = decode("x-sjis-kddi", $bytes); # \x{E481} |
|---|
| 145 | my $imode = encode("x-sjis-imode", $char); # \xf9\xdc -- [!] in DoCoMo |
|---|
| 146 | |
|---|
| 147 | I<x-sjis-docomo> is an alias. |
|---|
| 148 | |
|---|
| 149 | =item x-sjis-softbank |
|---|
| 150 | |
|---|
| 151 | Escape sequence based Shift_JIS encoding for SoftBank |
|---|
| 152 | pictograms. Decoding algorithm is not based on an ucm file, but a perl |
|---|
| 153 | code. |
|---|
| 154 | |
|---|
| 155 | I<x-sjis-vodafone> is an alias. |
|---|
| 156 | |
|---|
| 157 | For example, C<U+E001> is I<A Boy> character and is encoded |
|---|
| 158 | as C<\x1b$G!\x0f> in this encoding (C<\x1b$G> is the beginning of |
|---|
| 159 | escape sequence and C<\x0f> is the end.) |
|---|
| 160 | |
|---|
| 161 | =item x-sjis-softbank-auto |
|---|
| 162 | |
|---|
| 163 | Maps Unicode private area characters to Shift_JIS private area (Gaiji) |
|---|
| 164 | characters. This encoding is used in 3GC phones when you input |
|---|
| 165 | pictogram charaters in a web form on Shift_JIS pages and submit. |
|---|
| 166 | Handsets also can decode these encodings and display pictogram characters. |
|---|
| 167 | |
|---|
| 168 | I<x-sjis-vodafone-auto> is an alias. |
|---|
| 169 | |
|---|
| 170 | The private area mapping seems similar to CP932 but with a bit of |
|---|
| 171 | offset. |
|---|
| 172 | |
|---|
| 173 | For example, U<+E001> is I<A Boy> character (same as |
|---|
| 174 | I<x-sjis-softbank>) and is encoded as I<\xF9\x41>. |
|---|
| 175 | |
|---|
| 176 | =item x-sjis-kddi |
|---|
| 177 | |
|---|
| 178 | Mapping for KDDI/AU pictograms. It's based on cp932 (I guess) but |
|---|
| 179 | there are more private characters that are not included in CP932.TXT. |
|---|
| 180 | |
|---|
| 181 | For example, I<U+E481> is I<!> (the exclamation) character and is |
|---|
| 182 | encoded as I<\xF6\x59> (same as cp932). I<U+EB88> is I<Angry> |
|---|
| 183 | character and is encoded in I<\xF4\x8D> while cp932 doesn't have a map |
|---|
| 184 | for it. |
|---|
| 185 | |
|---|
| 186 | I<x-sjis-ezweb> is an alias. |
|---|
| 187 | |
|---|
| 188 | =item x-sjis-kddi-auto |
|---|
| 189 | |
|---|
| 190 | Mapping for KDDI/AU pictograms, based on handset's internal Shift_JIS |
|---|
| 191 | to UTF-8 translations and vice verca. When you input some pictogram |
|---|
| 192 | characters in a web form on a UTF-8 page and submit them, this mapping |
|---|
| 193 | is used (instead of CP932 based I<x-sjis-kddi>) to represent the |
|---|
| 194 | pictogram characters. |
|---|
| 195 | |
|---|
| 196 | I<x-sjis-kddi-auto> and I<x-sjis-kddi> shares Unicode to encoding |
|---|
| 197 | mapping each other and hence round-trip safe, which means: |
|---|
| 198 | |
|---|
| 199 | my $bytes = "\xf6\x59"; # [!] in KDDI/AU |
|---|
| 200 | decode("x-sjis-kddi", $bytes); # \x{E481} |
|---|
| 201 | decode("x-sjis-kddi-auto", $bytes); # \x{EF59} |
|---|
| 202 | encode("x-sjis-kddi", "\x{EF59}"); # same as $bytes |
|---|
| 203 | encode("x-sjis-kddi-auto", "\x{E481}"); # same as $bytes |
|---|
| 204 | |
|---|
| 205 | C<x-sjis-ezweb-auto> is an alias. |
|---|
| 206 | |
|---|
| 207 | =item x-iso-2022-jp-kddi |
|---|
| 208 | |
|---|
| 209 | Encoding used to encode KDDI/AU pictogram characters in Email. It's |
|---|
| 210 | based on I<iso-2022-jp> which is still a de-facto standard encoding |
|---|
| 211 | when we sned emails. |
|---|
| 212 | |
|---|
| 213 | Actually most KDDI/AU cellphones can receive emails encoded in |
|---|
| 214 | Shift_JIS, so you can just use I<x-sjis-kddi> to encode the pictogram |
|---|
| 215 | characters. This encoding might be still needed to decode incoming |
|---|
| 216 | emails sent from KDDI/AU phones containing pictogram characters. |
|---|
| 217 | |
|---|
| 218 | C<x-iso-2022-jp-ezweb> is an alias. |
|---|
| 219 | |
|---|
| 220 | =item x-sjis-airedge |
|---|
| 221 | |
|---|
| 222 | Mapping for AirEDGE pictograms. It's a complete subset of cp932C<x-sjis-airh> is an alias. |
|---|
| 223 | |
|---|
| 224 | =back |
|---|
| 225 | |
|---|
| 226 | =head1 UNICODE PROPERTIES |
|---|
| 227 | |
|---|
| 228 | By importing this module with ':props' flag, you'll have following Unicode properties. |
|---|
| 229 | |
|---|
| 230 | =over 4 |
|---|
| 231 | |
|---|
| 232 | =item InDoCoMoPictograms |
|---|
| 233 | |
|---|
| 234 | =item InKDDIPictograms |
|---|
| 235 | |
|---|
| 236 | =item InSoftBankPictograms |
|---|
| 237 | |
|---|
| 238 | =item InAirEdgePictograms |
|---|
| 239 | |
|---|
| 240 | =back |
|---|
| 241 | |
|---|
| 242 | Note that if the input is one of x-sjis-* variants, first you need to |
|---|
| 243 | know what encoding the bytes are encoded, and decode the bytes back to |
|---|
| 244 | Unicode, to know if the strings contain these pictogram character |
|---|
| 245 | sets. So it might be only handy if the input is UTF-8 in reality. |
|---|
| 246 | |
|---|
| 247 | |
|---|
| 248 | =head1 BACKWARD COMPATIBLITY |
|---|
| 249 | |
|---|
| 250 | As of 0.07, this module now uses I<x-sjis-*> as its encoding names. It |
|---|
| 251 | still supports the old I<shift_jis-*> aliases though. I'm planning to |
|---|
| 252 | deprecate them sometime in the future release. |
|---|
| 253 | |
|---|
| 254 | =head1 NOTES |
|---|
| 255 | |
|---|
| 256 | =over 4 |
|---|
| 257 | |
|---|
| 258 | =item * |
|---|
| 259 | |
|---|
| 260 | Pictogram characters are defined to be round-trip safe. However, they |
|---|
| 261 | use Unicode Private Area for such characters, that means you'll have |
|---|
| 262 | interoperability issues, which this module doesn't try yet to solve |
|---|
| 263 | completely. We have a partial support for roundtrip (automatic |
|---|
| 264 | conversion) between I<x-sjis-imode> and I<x-sjis-kddi>. |
|---|
| 265 | |
|---|
| 266 | =item * |
|---|
| 267 | |
|---|
| 268 | As of version 0.04, this module tries to do auto-conversion of KDDI/AU |
|---|
| 269 | and NTT-DoCoMo pictogram characters. Supporting SoftBank characters |
|---|
| 270 | are still left TODO. |
|---|
| 271 | |
|---|
| 272 | =back |
|---|
| 273 | |
|---|
| 274 | =head1 TODO |
|---|
| 275 | |
|---|
| 276 | =over 4 |
|---|
| 277 | |
|---|
| 278 | =item * |
|---|
| 279 | |
|---|
| 280 | Implement all merged C<x-sjis-mobile-jp> encoding. |
|---|
| 281 | |
|---|
| 282 | =back |
|---|
| 283 | |
|---|
| 284 | =head1 AUTHORS |
|---|
| 285 | |
|---|
| 286 | Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt> with contributions from: |
|---|
| 287 | |
|---|
| 288 | Tokuhiro Matsuno |
|---|
| 289 | |
|---|
| 290 | Naoki Tomita |
|---|
| 291 | |
|---|
| 292 | Masahiro Chiba |
|---|
| 293 | |
|---|
| 294 | =head1 LICENSE |
|---|
| 295 | |
|---|
| 296 | This library is free software, licensed under the same terms with Perl. |
|---|
| 297 | |
|---|
| 298 | =head1 SEE ALSO |
|---|
| 299 | |
|---|
| 300 | L<Encode>, L<HTML::Entities::ImodePictogram>, L<Unicode::Japanese> |
|---|
| 301 | |
|---|
| 302 | http://www.nttdocomo.co.jp/service/imode/make/content/pictograph/basic/ |
|---|
| 303 | http://www.nttdocomo.co.jp/service/imode/make/content/pictograph/extention/ |
|---|
| 304 | http://www.au.kddi.com/ezfactory/tec/spec/3.html |
|---|
| 305 | http://developers.softbankmobile.co.jp/dp/tool_dl/web/picword_top.php |
|---|
| 306 | http://www.willcom-inc.com/ja/service/contents_service/club_air_edge/for_phone/homepage/index.html |
|---|
| 307 | http://www.nttdocomo.co.jp/service/mail/imode_mail/emoji_convert/ |
|---|
| 308 | |
|---|
| 309 | =cut |
|---|