| 1 | # -*- mode:ruby; coding:utf-8 -*- |
|---|
| 2 | |
|---|
| 3 | require 'www/video_scraper/base' |
|---|
| 4 | |
|---|
| 5 | module WWW |
|---|
| 6 | module VideoScraper |
|---|
| 7 | class YourFileHost |
|---|
| 8 | attr_reader :request_url, :response_body, :thumb_url, :video_url, :page_url |
|---|
| 9 | |
|---|
| 10 | class MaximumVideoPlaysReached < TryAgainLater; end |
|---|
| 11 | class BandwidthAllowanceExceeded < TryAgainLater; end |
|---|
| 12 | class NoFileCategory < FileNotFound; end |
|---|
| 13 | |
|---|
| 14 | def initialize(opt) |
|---|
| 15 | @opt = opt.is_a?(String) ? { :url => opt } : opt |
|---|
| 16 | do_query |
|---|
| 17 | end |
|---|
| 18 | |
|---|
| 19 | def self.valid_url?(url) |
|---|
| 20 | url =~ %r{\Ahttp://www\.yourfilehost\.com/media\.php\?cat=video&file=.+$} |
|---|
| 21 | end |
|---|
| 22 | |
|---|
| 23 | def filename |
|---|
| 24 | uri = URI.parse(@url_share || @opt[:url]) |
|---|
| 25 | q = CGI.parse(uri.query) |
|---|
| 26 | q['file'] |
|---|
| 27 | end |
|---|
| 28 | alias :title :filename |
|---|
| 29 | |
|---|
| 30 | private |
|---|
| 31 | def do_query |
|---|
| 32 | url = @opt[:url] |
|---|
| 33 | raise StandardError, 'url param is requred' unless url |
|---|
| 34 | raise StandardError, "url is not YourFileHost link: #{url}" unless YourFileHost.valid_url? url |
|---|
| 35 | open_opt = { 'User-Agent' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)' } |
|---|
| 36 | html = open(url, open_opt) { |res| res.read } |
|---|
| 37 | doc = Hpricot(html) |
|---|
| 38 | if elem = doc.at('//object[@id="objectPlayer"] //param[@name="movie"]') |
|---|
| 39 | value = elem.attributes['value'] |
|---|
| 40 | raise StandardError, 'video information is not found' unless value |
|---|
| 41 | @value = CGI::parse(value) |
|---|
| 42 | if @request_url = @value['video'].shift |
|---|
| 43 | @response_body = open(@request_url, open_opt) { |res| res.read } |
|---|
| 44 | q = CGI::parse(@response_body) |
|---|
| 45 | @thumb_url = q['photo'].shift rescue '' |
|---|
| 46 | @video_url = q['video_id'].shift rescue '' |
|---|
| 47 | @page_url = q['embed'].shift rescue '' |
|---|
| 48 | @url_share = q['url_share'].shift rescue '' |
|---|
| 49 | end |
|---|
| 50 | elsif elem = doc.at('//object[@id="VIDEO"] //param[@name="URL"]') |
|---|
| 51 | @video_url = elem.attributes['value'] |
|---|
| 52 | @page_url = url |
|---|
| 53 | else |
|---|
| 54 | if html =~ /MAXIMUM VIDEO PLAYS REACHED/i |
|---|
| 55 | raise MaximumVideoPlaysReached, 'MAXIMUM VIDEO PLAYS REACHED' |
|---|
| 56 | elsif html =~ /Bandwidth Allowance exceeded/i |
|---|
| 57 | raise BandwidthAllowanceExceeded, 'Bandwidth Allowance exceeded' |
|---|
| 58 | elsif html =~ /url=error\.php\?err=8/i |
|---|
| 59 | raise FileNotFound, 'file not found' |
|---|
| 60 | elsif html =~ /url=error\.php\?err=5/i or html =~ /no file category/i |
|---|
| 61 | raise NoFileCategory, 'no file category' |
|---|
| 62 | elsif html =~ /File not found/i |
|---|
| 63 | raise FileNotFound, 'file not found' |
|---|
| 64 | else |
|---|
| 65 | raise TryAgainLater, 'scrape failed: ' + html |
|---|
| 66 | end |
|---|
| 67 | end |
|---|
| 68 | end |
|---|
| 69 | end |
|---|
| 70 | end |
|---|
| 71 | end |
|---|
| 72 | |
|---|
| 73 | if $0 == __FILE__ |
|---|
| 74 | yfh = VideoScraper::YourFileHost.new('http://www.yourfilehost.com/media.php?cat=video&file=XV436__03.wmv') |
|---|
| 75 | puts yfh.thumb_url |
|---|
| 76 | puts yfh.video_url |
|---|
| 77 | puts yfh.page_url |
|---|
| 78 | puts yfh.title |
|---|
| 79 | puts '-----------' |
|---|
| 80 | yfh = VideoScraper::YourFileHost.new('http://www.yourfilehost.com/media.php?cat=video&file=kawarazaki2_ep3_002.wmv') |
|---|
| 81 | puts yfh.thumb_url |
|---|
| 82 | puts yfh.video_url |
|---|
| 83 | puts yfh.page_url |
|---|
| 84 | puts yfh.title |
|---|
| 85 | end |
|---|