root/lang/ruby/ssb/trunk/scripts/emoji_scrape.rb @ 18931

Revision 18931, 3.3 kB (checked in by koshigoe, 5 years ago)

* softbank の絵文字を取得できる様に修正

  • Property svn:mime-type set to text/x-ruby; charset=utf-8
  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Date Author Rev URL
Line 
1# scrape.rb - k-tai emoji scraper
2#
3# Author:: MIZOGUCHI Coji <mizoguchi.coji at gmail.com>
4# License:: Distribute under the same terms as Ruby
5#
6# $Id$
7#
8require 'config/common.rb'
9require 'rubygems'
10require 'scrapi'
11require 'open-uri'
12require 'nkf'
13$KCODE = 'utf-8'
14
15module EmojiScraper
16  EMOJI_DIR = File.join(SSB::CONFIG[:public_dir], 'emoji')
17
18  def self.emoji_dir(carrier)
19    File.join(EMOJI_DIR, carrier)
20  end
21
22  def self.scrape_docomo
23    base_uri =
24      ['http://www.nttdocomo.co.jp/service/imode/make/content/pictograph/basic/',
25      'http://www.nttdocomo.co.jp/service/imode/make/content/pictograph/extention/']
26
27    emoji_scraper = Scraper.define do
28      process 'td:nth-child(3) > span.txt', :code => :text
29      process 'td > img', :uri => '@src'
30      result :code, :uri
31    end
32
33    scraper = Scraper.define do
34      array :emoji
35      process 'tr.acenter', :emoji => emoji_scraper
36      result :emoji
37    end
38
39    opt = { :char_encoding => 'utf8' }
40    base_uri.each do |uri|
41      html = NKF::nkf('-w', open(uri).read)
42      scraper.scrape(html, opt).select{|e| e unless e.uri.nil? }.each do |p|
43        tmpfile = File.join(emoji_dir('docomo'), p.code + '.tmp.gif')
44        filename = File.join(emoji_dir('docomo'), p.code + '.gif')
45        pict_uri = URI.parse(uri) + p.uri
46        open(pict_uri) do |img|
47          open(tmpfile, 'w') do |out|
48            out.write img.read
49          end
50        end
51
52        `convert -transparent white -resize 16x16 #{tmpfile} #{filename}`
53        `rm #{tmpfile}`
54        puts pict_uri.to_s + " => " + filename
55      end
56    end
57  end
58
59  def self.scrape_kddi
60    tmpfile = 'ezicon.lzh'
61    open('http://www.au.kddi.com/ezfactory/tec/spec/lzh/icon_image.lzh') do |f|
62      open(tmpfile, 'w') do |out|
63        out.print f.read
64      end
65    end
66
67    `lha -x #{tmpfile}`
68    Dir.glob('icon_image/*.ai') do |src|
69      if(match = src.match(/(\d+).+\.ai$/))
70    puts src
71        out_filename = File.join(emoji_dir('kddi'), match[1] + '.gif')
72        `convert -trim -geometry 16x16 +repage "#{src}" #{out_filename}`
73      end
74    end
75    `rm #{tmpfile}`
76    `rm -Rf icon_image`
77  end
78
79  def self.scrape_thirdforce
80    base_uri = 'http://creation.mb.softbank.jp/web/'
81    page = 'web_pic_%02d.html'
82    1.upto(6) do |n|
83      pict_scraper = Scraper.define {
84          process 'td:nth-child(2)[bgcolor="#FFFFFF"]', :unicode => :text
85          process 'td > img', :image  => '@src'
86          result :unicode, :image
87      }
88      Scraper.define{
89        process 'table[width="100%"] > tr', 'pictograms[]' => pict_scraper
90        result :pictograms
91      }.scrape(URI.parse(base_uri + page % n)).select {|x| not x.nil? }.select {|x| not x.unicode.nil? }.each {|pictinfo|
92        tmpfile = File.join(emoji_dir('softbank'), pictinfo.unicode + '.tmp.gif')
93        filename = File.join(emoji_dir('softbank'), pictinfo.unicode + '.gif')
94        pict_uri = URI.parse(base_uri) + pictinfo.image
95
96        open(pict_uri) do |img|
97          open(tmpfile, 'w') do |out|
98            out.write img.read
99          end
100        end
101
102        `convert -transparent white -resize 16x16 #{tmpfile} #{filename}`
103        `rm #{tmpfile}`
104        puts pict_uri.to_s + " => " + filename
105      }
106    end
107  end
108
109  def self.run
110    scrape_docomo
111    scrape_kddi
112    scrape_thirdforce
113  end
114end
115
116if $0 == __FILE__
117  EmojiScraper.run
118end 
Note: See TracBrowser for help on using the browser.