|
Revision 4919, 1.4 kB
(checked in by yappo, 5 years ago)
|
|
revert revision 4905, 4906, 4907, 4908, 4909
|
| Line | |
|---|
| 1 | package WWW::MobileCarrierJP::EZWeb::PictogramInfo; |
|---|
| 2 | use strict; |
|---|
| 3 | use warnings; |
|---|
| 4 | use CAM::PDF; |
|---|
| 5 | use LWP::UserAgent; |
|---|
| 6 | use Carp; |
|---|
| 7 | use Encode; |
|---|
| 8 | |
|---|
| 9 | my $url = 'http://www.au.kddi.com/ezfactory/tec/spec/pdf/typeD.pdf'; |
|---|
| 10 | |
|---|
| 11 | sub scrape { |
|---|
| 12 | my $ua = LWP::UserAgent->new(agent => __PACKAGE__); |
|---|
| 13 | my $res = $ua->get($url); |
|---|
| 14 | if ($res->is_success) { |
|---|
| 15 | return _process_pdf($res->content); |
|---|
| 16 | } else { |
|---|
| 17 | croak "Can't fetch $url"; |
|---|
| 18 | } |
|---|
| 19 | } |
|---|
| 20 | |
|---|
| 21 | sub _process_pdf { |
|---|
| 22 | my $content = shift; |
|---|
| 23 | my $doc = CAM::PDF->new($content); |
|---|
| 24 | |
|---|
| 25 | my @res; |
|---|
| 26 | foreach my $p (1..$doc->numPages()) { |
|---|
| 27 | my $text = decode("shift_jis", $doc->getPageText($p)); |
|---|
| 28 | while ($text =~ m/(\d+)(?: |[abcdef \x{FF43}\x{3000}]+|\x{306A}\x{3057} )([^ ]*) ([0-9A-F]{4})([0-9A-F]{4})([0-9A-F]{4})([0-9A-F]{4})/gs) { |
|---|
| 29 | my %data; |
|---|
| 30 | @data{qw( number name sjis unicode email_jis email_sjis )} = ($1, $2, $3, $4, $5, $6); |
|---|
| 31 | $data{name} =~ s/\n//g; |
|---|
| 32 | push @res, \%data; |
|---|
| 33 | } |
|---|
| 34 | } |
|---|
| 35 | |
|---|
| 36 | @res = sort { $a->{number} <=> $b->{number} } @res; |
|---|
| 37 | return \@res; |
|---|
| 38 | } |
|---|
| 39 | |
|---|
| 40 | 1; |
|---|
| 41 | __END__ |
|---|
| 42 | |
|---|
| 43 | =head1 NAME |
|---|
| 44 | |
|---|
| 45 | WWW::MobileCarrierJP::EZWeb::PictogramInfo - get PictogramInfo informtation from EZWeb site. |
|---|
| 46 | |
|---|
| 47 | =head1 SYNOPSIS |
|---|
| 48 | |
|---|
| 49 | use WWW::MobileCarrierJP::EZWeb::PictogramInfo; |
|---|
| 50 | WWW::MobileCarrierJP::EZWeb::PictogramInfo->scrape(); |
|---|
| 51 | |
|---|
| 52 | =head1 AUTHOR |
|---|
| 53 | |
|---|
| 54 | Tokuhiro Matsuno < tokuhirom gmail com > |
|---|
| 55 | |
|---|
| 56 | =head1 THANKS |
|---|
| 57 | |
|---|
| 58 | This code is copied from Encode-JP-Mobile. |
|---|
| 59 | |
|---|
| 60 | miyagawa++ |
|---|
| 61 | |
|---|
| 62 | |
|---|
| 63 | =head1 SEE ALSO |
|---|
| 64 | |
|---|
| 65 | L<WWW::MobileCarrierJP> |
|---|
| 66 | |
|---|