root/lang/ruby/video_scraper/lib/www/video_scraper/your_file_host.rb @ 28495

Revision 28495, 3.1 kB (checked in by valda, 4 years ago)

クラス構造の改善
アゲサゲのテストを追加

Line 
1# -*- mode:ruby; coding:utf-8 -*-
2
3require 'www/video_scraper/base'
4
5module WWW
6  module VideoScraper
7    class YourFileHost
8      attr_reader :request_url, :response_body, :thumb_url, :video_url, :page_url
9
10      class MaximumVideoPlaysReached < TryAgainLater; end
11      class BandwidthAllowanceExceeded < TryAgainLater; end
12      class NoFileCategory < FileNotFound; end
13
14      def initialize(opt)
15        @opt = opt.is_a?(String) ? { :url => opt } : opt
16        do_query
17      end
18
19      def self.valid_url?(url)
20        url =~ %r{\Ahttp://www\.yourfilehost\.com/media\.php\?cat=video&file=.+$}
21      end
22
23      def filename
24        uri = URI.parse(@url_share || @opt[:url])
25        q = CGI.parse(uri.query)
26        q['file']
27      end
28      alias :title :filename
29
30      private
31      def do_query
32        url = @opt[:url]
33        raise StandardError, 'url param is requred' unless url
34        raise StandardError, "url is not YourFileHost link: #{url}" unless YourFileHost.valid_url? url
35        open_opt = { 'User-Agent' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)' }
36        html = open(url, open_opt) { |res| res.read }
37        doc = Hpricot(html)
38        if elem = doc.at('//object[@id="objectPlayer"] //param[@name="movie"]')
39          value = elem.attributes['value']
40          raise StandardError, 'video information is not found' unless value
41          @value = CGI::parse(value)
42          if @request_url = @value['video'].shift
43            @response_body = open(@request_url, open_opt) { |res| res.read }
44            q = CGI::parse(@response_body)
45            @thumb_url = q['photo'].shift rescue ''
46            @video_url = q['video_id'].shift rescue ''
47            @page_url = q['embed'].shift rescue ''
48            @url_share = q['url_share'].shift rescue ''
49          end
50        elsif elem = doc.at('//object[@id="VIDEO"] //param[@name="URL"]')
51          @video_url = elem.attributes['value']
52          @page_url = url
53        else
54          if html =~ /MAXIMUM VIDEO PLAYS REACHED/i
55            raise MaximumVideoPlaysReached, 'MAXIMUM VIDEO PLAYS REACHED'
56          elsif html =~ /Bandwidth Allowance exceeded/i
57            raise BandwidthAllowanceExceeded, 'Bandwidth Allowance exceeded'
58          elsif html =~ /url=error\.php\?err=8/i
59            raise FileNotFound, 'file not found'
60          elsif html =~ /url=error\.php\?err=5/i or html =~ /no file category/i
61            raise NoFileCategory, 'no file category'
62          elsif html =~ /File not found/i
63            raise FileNotFound, 'file not found'
64          else
65            raise TryAgainLater, 'scrape failed: ' + html
66          end
67        end
68      end
69    end
70  end
71end
72
73if $0 == __FILE__
74  yfh = VideoScraper::YourFileHost.new('http://www.yourfilehost.com/media.php?cat=video&file=XV436__03.wmv')
75  puts yfh.thumb_url
76  puts yfh.video_url
77  puts yfh.page_url
78  puts yfh.title
79  puts '-----------'
80  yfh = VideoScraper::YourFileHost.new('http://www.yourfilehost.com/media.php?cat=video&file=kawarazaki2_ep3_002.wmv')
81  puts yfh.thumb_url
82  puts yfh.video_url
83  puts yfh.page_url
84  puts yfh.title
85end
Note: See TracBrowser for help on using the browser.