root/lang/perl/Encode-JP-Mobile/trunk/tools/kddi-extract.pl

Revision 4874, 0.9 kB (checked in by miyagawa, 9 months ago)

add svn:executables. rename make_perl_map.pl to make-charnames-map.pl

  • Property svn:executable set to *
Line 
1#!/usr/bin/perl
2use strict;
3use warnings;
4use Encode;
5use Encode::JP::Mobile;
6use CAM::PDF;
7
8# scraping from http://www.au.kddi.com/ezfactory/tec/spec/pdf/typeD.pdf
9
10my $file = shift or die "Usage: kddi-extract.pl typeD.pdf\n";
11my $doc  = CAM::PDF->new($file);
12
13my @res;
14for my $p (1..$doc->numPages()) {
15    my $text = decode("shift_jis", $doc->getPageText($p));
16    while ($text =~ m/(\d+)(?: |[abcdef \x{FF43}\x{3000}]+|\x{306A}\x{3057} )([^ ]*) ([0-9A-F]{4})([0-9A-F]{4})([0-9A-F]{4})([0-9A-F]{4})/gs) {
17        my %data;
18        @data{qw( number name sjis unicode email_jis email_sjis )} = ($1, $2, $3, $4, $5, $6);
19        $data{name} =~ s/\n//g;
20        $data{unicode_auto} = sprintf '%X', ord decode 'x-sjis-kddi-auto', pack "H*", $data{sjis};
21        push @res, \%data;
22    }
23}
24
25@res = sort { $a->{number} <=> $b->{number} } @res;
26
27use YAML;
28binmode STDOUT, ":utf8";
29print Dump \@res;
30
31@res == 641 or die "item count mismatch";
32
Note: See TracBrowser for help on using the browser.