root/lang/perl/Encode-JP-Mobile/trunk/tools/softbank-scrape.pl

Revision 5229, 1.1 kB (checked in by tokuhirom, 9 months ago)

r5455@skinny (orig r5195): tokuhirom | 2008-01-22 00:48:55 +0900
unicode2sjis_auto のマップは別ファイルとしてあらかじめつくっておく。unicode2sjis_auto のスクレイピング
スクリプトは chiba さんにつくっていただく予定になってます。

  • Property svn:executable set to *
Line 
1#!/usr/bin/perl
2use strict;
3use warnings;
4use Web::Scraper;
5use URI;
6use YAML;
7
8my $number = 1;
9
10my $emoji = scraper {
11  process '//table[@width="100%" and @cellpadding="2"]//tr/td/font/../..',
12      'emoji[]' => scraper {
13          # 264-266 are Skymail and 267-270 are J-PHONE chars, removed from their website
14          $number += 4 if $number == 267;
15          process '//td[2]/font', unicode => 'TEXT';
16          process '//td[3]/font', sjis => [ 'TEXT', sub { unpack "H*", shift } ];
17          process '//td[1]/img',  image => [ '@src', sub { $_->as_string } ];
18          process '//td[1]', number => [ 'TEXT', sub { $number++ } ]; # /td[1] etc. is dummy
19      };
20  result 'emoji';
21};
22
23my @urls = map "http://developers.softbankmobile.co.jp/dp/tool_dl/web/picword_0$_.php", 1..6;
24
25my $res;
26foreach my $url (@urls) { push @$res, @{$emoji->scrape(URI->new($url))} };
27fill_sjisauto($res);
28binmode STDOUT, ":utf8";
29print Dump $res;
30
31sub fill_sjisauto {
32    my $res = shift;
33    my $uni2sjisauto = YAML::LoadFile('dat/softbank-unicode2sjis_auto.yaml');
34    for my $row (@$res) {
35        $row->{sjis_auto} = $uni2sjisauto->{$row->{unicode}};
36    }
37}
Note: See TracBrowser for help on using the browser.