Changeset 28504
- Timestamp:
- 01/16/09 17:11:59 (4 years ago)
- Location:
- lang/ruby/video_scraper
- Files:
-
- 1 added
- 1 modified
-
lib/www/video_scraper/you_tube.rb (modified) (3 diffs)
-
test/www/video_scraper/test_you_tube.rb (added)
Legend:
- Unmodified
- Added
- Removed
-
lang/ruby/video_scraper/lib/www/video_scraper/you_tube.rb
r28495 r28504 5 5 module WWW 6 6 module VideoScraper 7 class YouTube 8 @@options ||= {}7 class YouTube < Base 8 url_regex %r!\Ahttp://(?:www|jp)\.youtube\.com/watch.*[?&]v=([[:alnum:]]+)! 9 9 10 def self.options 11 @@options 12 end 13 14 def self.options=(opts) 15 @@options = opts 16 end 17 18 def self.configure(&proc) 19 raise ArgumentError, 'Block is required.' unless block_given? 20 yield @@options 21 end 22 23 attr_reader :request_url, :response_body, :page_url, :title, :video_url, :thumb_url, :embed_tag 24 25 def initialize(opt) 26 @opt = opt.is_a?(String) ? { :url => opt } : opt 27 @agent = WWW::Mechanize.new 28 @agent.user_agent_alias = 'Windows IE 6' 29 @agent.keep_alive = false 10 def initialize(url, opt = nil) 11 super 30 12 do_query 31 end32 33 def self.valid_url?(url)34 get_mediaid(url)35 end36 37 def self.get_mediaid(url)38 begin39 uri = URI.parse(url)40 rescue41 return nil42 end43 return nil unless uri.host.match(%r!(?:www|jp)\.youtube\.com!)44 url.match(%r![?&]v=([[:alnum:]]+)!)[1] rescue nil45 13 end 46 14 47 15 private 48 16 def login 49 page = @agent.get("#{@page_uri.scheme}://#{@page_uri.host}/login") 17 uri = URI.parse(@page_url) 18 page = agent.get("#{uri.scheme}://#{uri.host}/login") 50 19 login_form = page.forms.with.name('loginForm').first 51 login_form.username = YouTube.options[:mail]52 login_form.password = YouTube.options[:password]53 @agent.submit(login_form)20 login_form.username = @opt[:you_tube_username] 21 login_form.password = @opt[:you_tube_password] 22 agent.submit(login_form) 54 23 end 55 24 56 25 def pass_verify_age 57 page = @agent.get(@page_url) 26 uri = URI.parse(@page_url) 27 page = agent.get(uri) 58 28 if page.uri.path =~ /verify_age/ 59 # ログインする60 29 login 61 # 確認フォームを送信 62 page = @agent.post(page.uri, 63 'next_url' => "#{@page_uri.path}?#{@page_uri.query}", 64 'action_confirm' => 'Confirm Birth Date') 30 page = agent.post(page.uri, 31 'next_url' => "#{uri.path}?#{uri.query}", 32 'action_confirm' => 'Confirm Birth Date') 65 33 end 66 34 page … … 68 36 69 37 def do_query 70 url = @opt[:url]71 raise StandardError, 'url param is requred' unless url72 raise StandardError, "url is not YouTube link: #{url}" unless YouTube.valid_url? url73 @page_url = url74 uri = URI.parse(@page_url)75 @page_uri = uri76 77 38 page = pass_verify_age 78 @response_body = page.body 79 @title = page.root.at('//head/title').inner_html rescue '' 39 @title = page.root.at('//head/title').inner_html.sub(/^YouTube[\s-]*/, '') rescue '' 80 40 @embed_tag = page.root.at('//input[@id="embed_code"]').attributes['value'] rescue nil 81 uri.path = '/get_video'82 41 page.root.search('//script').each do |script| 83 if script.inner_html.match(/var\s+swfArgs\s*=\s*([^;]+);/) 84 swf_args = JSON::parse($1) 42 if m = script.inner_html.match(/var\s+swfArgs\s*=\s*([^;]+);/) 43 swf_args = JSON::parse(m[1]) 44 uri = URI.parse(@page_url) 45 uri.path = '/get_video' 85 46 uri.query = "video_id=#{swf_args['video_id']}&t=#{swf_args['t']}" 86 47 @video_url = uri.to_s … … 88 49 end 89 50 end 90 raise FileNotFound, 'file not found' if @ embed_tag.nil? and @video_url.nil? and @thumb_url.nil?51 raise FileNotFound, 'file not found' if @video_url.nil? 91 52 end 92 53 end 93 54 end 94 55 end 95 96 if $0 == __FILE__97 require 'yaml'98 y = YAML.load_file(File.join(ENV['HOME'], '.videoscraperrc'))99 VideoScraper::YouTube.configure do |conf|100 conf[:mail] = y['youtube']['mail']101 conf[:password] = y['youtube']['password']102 end103 104 w = VideoScraper::YouTube.new('http://www.youtube.com/watch?v=OFPnvARUOHI&feature=dir')105 puts w.title106 puts w.video_url107 puts w.thumb_url108 puts w.embed_tag109 puts '---------------'110 w = VideoScraper::YouTube.new('http://www.youtube.com/watch?v=ysdSl5kmzFY&feature=bz303')111 puts w.title112 puts w.video_url113 puts w.thumb_url114 puts w.embed_tag115 end
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)