Changeset 6288

Show
Ignore:
Timestamp:
02/06/08 15:58:41 (10 months ago)
Author:
tomi-ru
Message:

もう堂々とlabs.unoh.netからscrapeするということにする

Location:
lang/perl/Encode-JP-Mobile/trunk
Files:
2 modified
1 moved

Legend:

Unmodified
Added
Removed
  • lang/perl/Encode-JP-Mobile/trunk/Rakefile

    r5448 r6288  
    4040end 
    4141 
    42 unoh_files = %w(e2is i2es s2ie) 
    43 file 'dat/convert-map-utf8.yaml' => unoh_files.map {|x| "dat/conv/emoji_#{x}.txt" } do 
    44     sh "#{perl} tools/make-convert-map.pl > dat/convert-map-utf8.yaml" 
    45 end 
    46 directory 'dat/conv/' 
    47 unoh_files.each do |f| 
    48     file "dat/conv/emoji_#{f}.txt" => ['dat/conv/'] do 
    49         sh "wget http://labs.unoh.net/emoji_#{f}.txt -O dat/conv/emoji_#{f}.txt" 
    50     end 
     42file 'dat/convert-map-utf8.yaml' do 
     43    sh "#{perl} tools/convert-map-scrape.pl > dat/convert-map-utf8.yaml" 
    5144end 
    5245 
  • lang/perl/Encode-JP-Mobile/trunk/tools/BUILD

    r5231 r6288  
    2222 
    2323# Optional: build convert-utf8-map.yaml 
    24 # ./tools/make-convert-map.pl 
     24# ./tools/convert-map-scrape.pl 
    2525 
    2626# Make x-utf8-*.ucm 
  • lang/perl/Encode-JP-Mobile/trunk/tools/convert-map-scrape.pl

    r5448 r6288  
    22 
    33# http://labs.unoh.net/2007/02/post_65.html to dat/convert-map-utf8.yaml 
    4 # mkdir dat/conv; download emoji_*.txt to dat/conv/emoji_*.txt 
    5 # perl tools/make-convert-map.pl > dat/convert-map-utf8.yaml 
     4# perl tools/convert-map-scrape.pl > dat/convert-map-utf8.yaml 
    65 
    7 use utf8; 
    86use Encode; 
    97use Encode::JP::Mobile 0.09; 
    10 use File::Slurp qw(slurp); 
     8use LWP::Simple; 
    119use YAML; 
    1210 
     11my %files; 
     12for my $file (qw( emoji_e2is.txt emoji_i2es.txt emoji_s2ie.txt )) { 
     13    $files{$file} = decode('cp932', get("http://labs.unoh.net/$file")); 
     14} 
     15 
    1316my $no2uni = {}; 
    14 for my $file (qw( emoji_e2is.txt emoji_i2es.txt emoji_s2ie.txt )) { 
    15     my @line = slurp "dat/conv/$file"; 
    16     for my $line (@line) { 
     17for my $file (keys %files) { 
     18    for my $line (split /\n/, $files{$file}) { 
    1719        next unless $line =~ /^%/; 
    1820        my ($no, $byte) = split "\t", $line; 
     
    3335 
    3436my %map; 
    35 for my $file (qw( emoji_e2is.txt emoji_i2es.txt emoji_s2ie.txt )) { 
    36     my @line = slurp "dat/conv/$file"; 
    37      
    38     for my $line (@line) { 
     37for my $file (keys %files) { 
     38    for my $line (split /\n/, $files{$file}) { 
    3939        next unless $line =~ /^%/; 
    4040        chomp $line; 
    41         $line = decode 'cp932', $line; 
    4241 
    4342        $file eq 'emoji_i2es.txt' && do { 
     
    7170} 
    7271 
    73 print YAML::Dump \%map; 
     72binmode STDOUT, ":utf8"; 
     73print YAML::Dump(\%map);