root/lang/perl/WWW-MobileCarrierJP/tags/0.09/lib/WWW/MobileCarrierJP/EZWeb/PictogramInfo.pm @ 4919

Revision 4919, 1.4 kB (checked in by yappo, 6 years ago)

revert revision 4905, 4906, 4907, 4908, 4909

Line 
1package WWW::MobileCarrierJP::EZWeb::PictogramInfo;
2use strict;
3use warnings;
4use CAM::PDF;
5use LWP::UserAgent;
6use Carp;
7use Encode;
8
9my $url = 'http://www.au.kddi.com/ezfactory/tec/spec/pdf/typeD.pdf';
10
11sub scrape {
12    my $ua = LWP::UserAgent->new(agent => __PACKAGE__);
13    my $res = $ua->get($url);
14    if ($res->is_success) {
15        return _process_pdf($res->content);
16    } else {
17        croak "Can't fetch $url";
18    }
19}
20
21sub _process_pdf {
22    my $content = shift;
23    my $doc  = CAM::PDF->new($content);
24
25    my @res;
26    foreach my $p (1..$doc->numPages()) {
27        my $text = decode("shift_jis", $doc->getPageText($p));
28        while ($text =~ m/(\d+)(?: |[abcdef \x{FF43}\x{3000}]+|\x{306A}\x{3057} )([^ ]*) ([0-9A-F]{4})([0-9A-F]{4})([0-9A-F]{4})([0-9A-F]{4})/gs) {
29            my %data;
30            @data{qw( number name sjis unicode email_jis email_sjis )} = ($1, $2, $3, $4, $5, $6);
31            $data{name} =~ s/\n//g;
32            push @res, \%data;
33        }
34    }
35
36    @res = sort { $a->{number} <=> $b->{number} } @res;
37    return \@res;
38}
39
401;
41__END__
42
43=head1 NAME
44
45WWW::MobileCarrierJP::EZWeb::PictogramInfo - get PictogramInfo informtation from EZWeb site.
46
47=head1 SYNOPSIS
48
49    use WWW::MobileCarrierJP::EZWeb::PictogramInfo;
50    WWW::MobileCarrierJP::EZWeb::PictogramInfo->scrape();
51
52=head1 AUTHOR
53
54Tokuhiro Matsuno < tokuhirom gmail com >
55
56=head1 THANKS
57
58This code is copied from Encode-JP-Mobile.
59
60miyagawa++
61
62
63=head1 SEE ALSO
64
65L<WWW::MobileCarrierJP>
66
Note: See TracBrowser for help on using the browser.