|
Revision 5229, 1.1 kB
(checked in by tokuhirom, 9 months ago)
|
r5455@skinny (orig r5195): tokuhirom | 2008-01-22 00:48:55 +0900
unicode2sjis_auto のマップは別ファイルとしてあらかじめつくっておく。unicode2sjis_auto のスクレイピング
スクリプトは chiba さんにつくっていただく予定になってます。
|
-
Property svn:executable set to
*
|
| Line | |
|---|
| 1 | #!/usr/bin/perl |
|---|
| 2 | use strict; |
|---|
| 3 | use warnings; |
|---|
| 4 | use Web::Scraper; |
|---|
| 5 | use URI; |
|---|
| 6 | use YAML; |
|---|
| 7 | |
|---|
| 8 | my $number = 1; |
|---|
| 9 | |
|---|
| 10 | my $emoji = scraper { |
|---|
| 11 | process '//table[@width="100%" and @cellpadding="2"]//tr/td/font/../..', |
|---|
| 12 | 'emoji[]' => scraper { |
|---|
| 13 | # 264-266 are Skymail and 267-270 are J-PHONE chars, removed from their website |
|---|
| 14 | $number += 4 if $number == 267; |
|---|
| 15 | process '//td[2]/font', unicode => 'TEXT'; |
|---|
| 16 | process '//td[3]/font', sjis => [ 'TEXT', sub { unpack "H*", shift } ]; |
|---|
| 17 | process '//td[1]/img', image => [ '@src', sub { $_->as_string } ]; |
|---|
| 18 | process '//td[1]', number => [ 'TEXT', sub { $number++ } ]; # /td[1] etc. is dummy |
|---|
| 19 | }; |
|---|
| 20 | result 'emoji'; |
|---|
| 21 | }; |
|---|
| 22 | |
|---|
| 23 | my @urls = map "http://developers.softbankmobile.co.jp/dp/tool_dl/web/picword_0$_.php", 1..6; |
|---|
| 24 | |
|---|
| 25 | my $res; |
|---|
| 26 | foreach my $url (@urls) { push @$res, @{$emoji->scrape(URI->new($url))} }; |
|---|
| 27 | fill_sjisauto($res); |
|---|
| 28 | binmode STDOUT, ":utf8"; |
|---|
| 29 | print Dump $res; |
|---|
| 30 | |
|---|
| 31 | sub fill_sjisauto { |
|---|
| 32 | my $res = shift; |
|---|
| 33 | my $uni2sjisauto = YAML::LoadFile('dat/softbank-unicode2sjis_auto.yaml'); |
|---|
| 34 | for my $row (@$res) { |
|---|
| 35 | $row->{sjis_auto} = $uni2sjisauto->{$row->{unicode}}; |
|---|
| 36 | } |
|---|
| 37 | } |
|---|