|
Revision 29821, 1.3 kB
(checked in by valda, 4 years ago)
|
|
eic-book.com のサンプル動画に対応
バージョン 1.0.4
|
| Line | |
|---|
| 1 | # -*- mode:ruby; coding:utf-8 -*- |
|---|
| 2 | |
|---|
| 3 | require File.expand_path(File.dirname(__FILE__) + '/base') |
|---|
| 4 | |
|---|
| 5 | module WWW |
|---|
| 6 | module VideoScraper |
|---|
| 7 | class EicBook < Base |
|---|
| 8 | attr_reader :capture_urls |
|---|
| 9 | url_regex %r!\Ahttp://www\.eic-book\.com/(detail_\d+\.html).*! |
|---|
| 10 | |
|---|
| 11 | def scrape |
|---|
| 12 | uri = URI.parse(@page_url) |
|---|
| 13 | @page_url = "#{uri.scheme}://#{uri.host}#{uri.path}?flg=sm" |
|---|
| 14 | html = http_get(@page_url) |
|---|
| 15 | doc = Hpricot(html.toutf8) |
|---|
| 16 | raise FileNotFound unless flashvars = doc.at('//object //param[@name="FlashVars"]') |
|---|
| 17 | flashvars = CGI.parse(flashvars.attributes['value']) |
|---|
| 18 | @video_url = flashvars['flv'][0] |
|---|
| 19 | @title = CGI.unescapeHTML(doc.at('//h2[@class="detailTtl"]').inner_html).gsub(' ', ' ') rescue nil |
|---|
| 20 | html = http_get("#{uri.scheme}://#{uri.host}#{uri.path}?flg=h4") |
|---|
| 21 | doc = Hpricot(html.toutf8) |
|---|
| 22 | if img = doc.at('//div[@class="detailMN"]/img[@class="waku01"]') |
|---|
| 23 | @thumb_url = URI.join("#{uri.scheme}://#{uri.host}", img.attributes['src']).to_s |
|---|
| 24 | end |
|---|
| 25 | html = http_get("#{uri.scheme}://#{uri.host}#{uri.path}?flg=cp") |
|---|
| 26 | doc = Hpricot(html.toutf8) |
|---|
| 27 | @capture_urls = [] |
|---|
| 28 | doc.search('//div[@class="detailMN"]/img[@class="waku01"]') do |img| |
|---|
| 29 | @capture_urls << URI.join("#{uri.scheme}://#{uri.host}", img.attributes['src']).to_s |
|---|
| 30 | end |
|---|
| 31 | end |
|---|
| 32 | end |
|---|
| 33 | end |
|---|
| 34 | end |
|---|