Changeset 5198

Show
Ignore:
Timestamp:
01/22/08 01:00:22 (6 years ago)
Author:
chiba
Message:

softbank-scrape-autosjisの役割を
dat/softbank-unicode2sjis_auto.yaml
を作るだけとした。
で、そのsoftbank-unicode2sjis_auto.yamlを同梱

Location:
lang/perl/Encode-JP-Mobile/branches/auto-convert-sjis-pictgram
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • lang/perl/Encode-JP-Mobile/branches/auto-convert-sjis-pictgram/dat/softbank-unicode2sjis_auto.yaml

    r5195 r5198  
    263263E252: F7F2 
    264264E253: F7F3 
    265 E255: ~ 
    266 E256: ~ 
    267 E257: ~ 
    268265E301: F9A1 
    269266E302: F9A2 
  • lang/perl/Encode-JP-Mobile/branches/auto-convert-sjis-pictgram/tools/BUILD

    r5133 r5198  
     1# Build softbank-unicode2sjisauto.yaml 
     2./tools/softbank-scrape-autosjis.pl 103-111-HTML_2.0.0.txt > dat/softbank-unicode2sjis_auto.yaml 
     3 
    14# Build YAML files 
    25./tools/docomo-scrape.pl > dat/docomo-table.yaml 
    36./tools/softbank-scrape.pl > dat/softbank-table.yaml 
    47./tools/softbank-scrape-name.pl > tmp 
    5 mv tmp dat/softbank-table.yaml 
    6 ./tools/softbank-scrape-autosjis.pl 103-111-HTML_2.0.0.txt > tmp 
    78mv tmp dat/softbank-table.yaml 
    89./tools/kddi-extract.pl typeD.pdf > dat/kddi-table.yaml 
  • lang/perl/Encode-JP-Mobile/branches/auto-convert-sjis-pictgram/tools/softbank-scrape-autosjis.pl

    r5105 r5198  
    66use FindBin; 
    77 
     8use Data::Dumper; 
     9$Data::Dumper::Terse++; 
     10 
    811# how to make 103-111-HTML_2.0.0.txt 
    912# 1. get PDF from http://www2.developers.softbankmobile.co.jp/dp/tool_dl/download.php?docid=120&companyid= 
     
    1215 
    1316my $pdf_text_file = shift or die "Usage: softbank-scrape-autosjis.pl 103-111-HTML_2.0.0.txt"; 
    14 my $fh =file($pdf_text_file)->openr; 
     17my $pdf_fh =file($pdf_text_file)->openr; 
    1518 
    1619my %map; 
    17 while (my $line = <$fh>) { 
     20while (my $line = <$pdf_fh>) { 
    1821    chomp $line; 
     22    next if $line !~ /^&#\d\d\d\d\d;\s*&#x/; 
     23 
    1924    my @codes = split /\s+/, $line; 
    20  
    21     if ( @codes != 4 || $codes[0] =~ /^&#x/ ) { 
    22         next; 
    23     } 
     25    next if @codes != 4; 
    2426 
    2527    my $unicode  = strip_entity_ref_mark($codes[1]); 
     
    2830    $map{ $unicode } = $shiftjis; 
    2931} 
     32close $pdf_fh; 
    3033 
     34print Dump(\%map); 
    3135 
    32  
    33 my $table_file = "$FindBin::Bin/../dat/softbank-table.yaml"; 
    34 my $table = YAML::LoadFile($table_file); 
    35  
    36 for my $emoji (@$table) { 
    37     $emoji->{sjis_auto} = $map{ $emoji->{unicode} }; 
    38 } 
    39  
    40 print Dump $table; 
    4136 
    4237sub strip_entity_ref_mark {