|
Revision 4874, 0.9 kB
(checked in by miyagawa, 9 months ago)
|
|
add svn:executables. rename make_perl_map.pl to make-charnames-map.pl
|
-
Property svn:executable set to
*
|
| Line | |
|---|
| 1 | #!/usr/bin/perl |
|---|
| 2 | use strict; |
|---|
| 3 | use warnings; |
|---|
| 4 | use Encode; |
|---|
| 5 | use Encode::JP::Mobile; |
|---|
| 6 | use CAM::PDF; |
|---|
| 7 | |
|---|
| 8 | # scraping from http://www.au.kddi.com/ezfactory/tec/spec/pdf/typeD.pdf |
|---|
| 9 | |
|---|
| 10 | my $file = shift or die "Usage: kddi-extract.pl typeD.pdf\n"; |
|---|
| 11 | my $doc = CAM::PDF->new($file); |
|---|
| 12 | |
|---|
| 13 | my @res; |
|---|
| 14 | for my $p (1..$doc->numPages()) { |
|---|
| 15 | my $text = decode("shift_jis", $doc->getPageText($p)); |
|---|
| 16 | while ($text =~ m/(\d+)(?: |[abcdef \x{FF43}\x{3000}]+|\x{306A}\x{3057} )([^ ]*) ([0-9A-F]{4})([0-9A-F]{4})([0-9A-F]{4})([0-9A-F]{4})/gs) { |
|---|
| 17 | my %data; |
|---|
| 18 | @data{qw( number name sjis unicode email_jis email_sjis )} = ($1, $2, $3, $4, $5, $6); |
|---|
| 19 | $data{name} =~ s/\n//g; |
|---|
| 20 | $data{unicode_auto} = sprintf '%X', ord decode 'x-sjis-kddi-auto', pack "H*", $data{sjis}; |
|---|
| 21 | push @res, \%data; |
|---|
| 22 | } |
|---|
| 23 | } |
|---|
| 24 | |
|---|
| 25 | @res = sort { $a->{number} <=> $b->{number} } @res; |
|---|
| 26 | |
|---|
| 27 | use YAML; |
|---|
| 28 | binmode STDOUT, ":utf8"; |
|---|
| 29 | print Dump \@res; |
|---|
| 30 | |
|---|
| 31 | @res == 641 or die "item count mismatch"; |
|---|
| 32 | |
|---|