Show
Ignore:
Timestamp:
02/06/08 15:58:41 (10 months ago)
Author:
tomi-ru
Message:

もう堂々とlabs.unoh.netからscrapeするということにする

Files:
1 moved

Legend:

Unmodified
Added
Removed
  • lang/perl/Encode-JP-Mobile/trunk/tools/convert-map-scrape.pl

    r5448 r6288  
    22 
    33# http://labs.unoh.net/2007/02/post_65.html to dat/convert-map-utf8.yaml 
    4 # mkdir dat/conv; download emoji_*.txt to dat/conv/emoji_*.txt 
    5 # perl tools/make-convert-map.pl > dat/convert-map-utf8.yaml 
     4# perl tools/convert-map-scrape.pl > dat/convert-map-utf8.yaml 
    65 
    7 use utf8; 
    86use Encode; 
    97use Encode::JP::Mobile 0.09; 
    10 use File::Slurp qw(slurp); 
     8use LWP::Simple; 
    119use YAML; 
    1210 
     11my %files; 
     12for my $file (qw( emoji_e2is.txt emoji_i2es.txt emoji_s2ie.txt )) { 
     13    $files{$file} = decode('cp932', get("http://labs.unoh.net/$file")); 
     14} 
     15 
    1316my $no2uni = {}; 
    14 for my $file (qw( emoji_e2is.txt emoji_i2es.txt emoji_s2ie.txt )) { 
    15     my @line = slurp "dat/conv/$file"; 
    16     for my $line (@line) { 
     17for my $file (keys %files) { 
     18    for my $line (split /\n/, $files{$file}) { 
    1719        next unless $line =~ /^%/; 
    1820        my ($no, $byte) = split "\t", $line; 
     
    3335 
    3436my %map; 
    35 for my $file (qw( emoji_e2is.txt emoji_i2es.txt emoji_s2ie.txt )) { 
    36     my @line = slurp "dat/conv/$file"; 
    37      
    38     for my $line (@line) { 
     37for my $file (keys %files) { 
     38    for my $line (split /\n/, $files{$file}) { 
    3939        next unless $line =~ /^%/; 
    4040        chomp $line; 
    41         $line = decode 'cp932', $line; 
    4241 
    4342        $file eq 'emoji_i2es.txt' && do { 
     
    7170} 
    7271 
    73 print YAML::Dump \%map; 
     72binmode STDOUT, ":utf8"; 
     73print YAML::Dump(\%map);