root/lang/ruby/ssb/scripts/emoji_scrape.rb @ 1996

Revision 1996, 3.2 kB (checked in by tokuhirom, 6 years ago)

lang/rub/ssb: thirdforce の絵文字が表示されるようにした

  • Property svn:mime-type set to text/x-ruby; charset=utf-8
  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Date Author Rev URL
Line 
1# scrape.rb - k-tai emoji scraper
2#
3# Author:: MIZOGUCHI Coji <mizoguchi.coji at gmail.com>
4# License:: Distribute under the same terms as Ruby
5#
6# $Id$
7#
8require 'config/common.rb'
9require 'rubygems'
10require 'scrapi'
11require 'open-uri'
12require 'nkf'
13$KCODE = 'utf-8'
14
15class EmojiScraper
16  EMOJI_DIR = File.join(SSB::CONFIG[:public_dir], 'emoji')
17
18  def emoji_dir(carrier)
19    File.join(EMOJI_DIR, carrier)
20  end
21
22  def scrape_docomo
23    base_uri =
24      ['http://www.nttdocomo.co.jp/service/imode/make/content/pictograph/basic/',
25      'http://www.nttdocomo.co.jp/service/imode/make/content/pictograph/extention/']
26
27    emoji_scraper = Scraper.define do
28      process 'td:nth-child(3) > span.txt', :code => :text
29      process 'td > img', :uri => '@src'
30      result :code, :uri
31    end
32
33    scraper = Scraper.define do
34      array :emoji
35      process 'tr.acenter', :emoji => emoji_scraper
36      result :emoji
37    end
38
39    opt = { :char_encoding => 'utf8' }
40    base_uri.each do |uri|
41      html = NKF::nkf('-w', open(uri).read)
42      scraper.scrape(html, opt).select{|e| e unless e.uri.nil? }.each do |p|
43        tmpfile = File.join(emoji_dir('docomo'), p.code + '.tmp.gif')
44        filename = File.join(emoji_dir('docomo'), p.code + '.gif')
45        pict_uri = URI.parse(uri) + p.uri
46        open(pict_uri) do |img|
47          open(tmpfile, 'w') do |out|
48            out.write img.read
49          end
50        end
51
52        `convert -transparent white -resize 16x16 #{tmpfile} #{filename}`
53        `rm #{tmpfile}`
54        puts pict_uri.to_s + " => " + filename
55      end
56    end
57  end
58
59  def scrape_kddi
60    tmpfile = 'ezicon.lzh'
61    open('http://www.au.kddi.com/ezfactory/tec/spec/lzh/icon_image.lzh') do |f|
62      open(tmpfile, 'w') do |out|
63        out.print f.read
64      end
65    end
66
67    `lha -x #{tmpfile}`
68    Dir.glob('icon_image/*.ai') do |src|
69      if(match = src.match(/(\d+).+\.ai$/))
70    puts src
71        out_filename = File.join(emoji_dir('kddi'), match[1] + '.gif')
72        `convert -trim -geometry 16x16 +repage "#{src}" #{out_filename}`
73      end
74    end
75    `rm #{tmpfile}`
76    `rm -Rf icon_image`
77  end
78
79  def scrape_thirdforce
80    base_uri = 'http://developers.softbankmobile.co.jp/dp/tool_dl/web/'
81    1.upto(6) do |n|
82      pict_scraper = Scraper.define {
83          process 'td:nth-child(2) > font.j10', :unicode => :text
84          process 'td > img',      :image  => '@src'
85          result :unicode, :image
86      }
87      Scraper.define{
88        process 'table[width="100%"] > tr', 'pictograms[]' => pict_scraper
89        result :pictograms
90      }.scrape(URI.parse(base_uri + 'picword_%02d.php' % n)).select {|x| not x.nil? }.select {|x| not x.unicode.nil? }.each {|pictinfo|
91        tmpfile = File.join(emoji_dir('softbank'), pictinfo.unicode + '.tmp.gif')
92        filename = File.join(emoji_dir('softbank'), pictinfo.unicode + '.gif')
93        pict_uri = URI.parse(base_uri) + pictinfo.image
94
95        open(pict_uri) do |img|
96          open(tmpfile, 'w') do |out|
97            out.write img.read
98          end
99        end
100
101        `convert -transparent white -resize 16x16 #{tmpfile} #{filename}`
102        `rm #{tmpfile}`
103        puts pict_uri.to_s + " => " + filename
104      }
105    end
106  end
107
108  def run
109    scrape_docomo
110    scrape_kddi
111    scrape_thirdforce
112  end
113end
114
115app = EmojiScraper.new
116app.run
Note: See TracBrowser for help on using the browser.