root/lang/ruby/video_scraper/lib/www/video_scraper/eic_book.rb @ 29821

Revision 29821, 1.3 kB (checked in by valda, 4 years ago)

eic-book.com のサンプル動画に対応
バージョン 1.0.4

Line 
1# -*- mode:ruby; coding:utf-8 -*-
2
3require File.expand_path(File.dirname(__FILE__) + '/base')
4
5module WWW
6  module VideoScraper
7    class EicBook < Base
8      attr_reader :capture_urls
9      url_regex %r!\Ahttp://www\.eic-book\.com/(detail_\d+\.html).*!
10
11      def scrape
12        uri = URI.parse(@page_url)
13        @page_url = "#{uri.scheme}://#{uri.host}#{uri.path}?flg=sm"
14        html = http_get(@page_url)
15        doc = Hpricot(html.toutf8)
16        raise FileNotFound unless flashvars = doc.at('//object //param[@name="FlashVars"]')
17        flashvars = CGI.parse(flashvars.attributes['value'])
18        @video_url = flashvars['flv'][0]
19        @title = CGI.unescapeHTML(doc.at('//h2[@class="detailTtl"]').inner_html).gsub('&nbsp;', ' ') rescue nil
20        html = http_get("#{uri.scheme}://#{uri.host}#{uri.path}?flg=h4")
21        doc = Hpricot(html.toutf8)
22        if img = doc.at('//div[@class="detailMN"]/img[@class="waku01"]')
23          @thumb_url = URI.join("#{uri.scheme}://#{uri.host}", img.attributes['src']).to_s
24        end
25        html = http_get("#{uri.scheme}://#{uri.host}#{uri.path}?flg=cp")
26        doc = Hpricot(html.toutf8)
27        @capture_urls = []
28        doc.search('//div[@class="detailMN"]/img[@class="waku01"]') do |img|
29          @capture_urls << URI.join("#{uri.scheme}://#{uri.host}", img.attributes['src']).to_s
30        end
31      end
32    end
33  end
34end
Note: See TracBrowser for help on using the browser.