root/platform/pragger/Feed/nicovideo_rank.rb @ 8290

Revision 8290, 2.2 kB (checked in by yasuyuki, 5 years ago)

platform/pragger/Feed: 取得済み動画キャッシュの削除条件を、動画IDからアップロード時刻に変更 21
不要なソートを削除

Line 
1## make ranking nicovideo source.
2##
3## - module: Feed::nicovideo_rank
4##   config:
5##     cache: cache file base name.
6##
7## Copyright (C) 2007-2008, INOUE Yasuyuki <inoue.yasuyuki0@gmail.com>
8## Original version is written by TADA Tadashi <sho@spc.gr.jp>
9## You can redistribute it and/or modify it under GPL3 or any later version.
10
11begin
12        require 'rubygems'
13rescue LoadError
14end
15require 'hpricot'
16require 'yaml'
17require 'pp'
18require 'nicovideo/thumbnail_info'
19
20def nicovideo_rank(config, data)
21        raw_items = {}
22        last_fetched = 0
23        target_hour = 24
24        rank_items = []
25  client = ::NicoVideo::ThumbnailInfo.new
26
27        open( "#{config['rank_data']}.last_fetched" ) do |f| last_fetched = YAML.load( f ) end
28        FileUtils.mkpath(config['root_dir'] + last_fetched.strftime("/%Y/%m/%d"))
29        open( "#{config['rank_data']}.dump" ) do |f| raw_items = YAML.load( f ) end
30        begin
31                open(config['root_dir'] + last_fetched.strftime("/%Y/%m/%d/") + "rank.yaml") do |f| rank_items = YAML.load( f ) end
32        rescue Errno::ENOENT
33        end
34
35        if config['hour'] then
36          target_hour = config['hour'].to_i
37        end
38       
39        target_time = last_fetched - (target_hour * 60 * 60)
40       
41        raw_items.keys.each do |key|
42    begin
43      raw_item = raw_items[key]
44      next if raw_item['uploaded_at'] > target_time
45     
46      begin
47        raw_item = get_rank_item(client, key[%r|watch/(.+)$|, 1])
48      rescue Errno::ENOENT
49        # remove 'nice boat.' movie.
50        raw_items.delete key
51        next
52      resucue TimeoutError
53        # something wrong happend.
54        break
55      end
56
57      rank_items << raw_item
58      raw_items.delete key
59    rescue ArgumentError => e
60      pp e
61      pp raw_items[key]
62    end
63  end
64 
65  open( "#{config['rank_data']}.dump", 'w' ) do |f|
66    YAML.dump(raw_items, f)
67  end
68  open(config['root_dir'] + last_fetched.strftime("/%Y/%m/%d/") + "rank.yaml", 'w' ) do |f|
69    YAML.dump(rank_items, f)
70  end
71  rank_items
72end
73
74def get_rank_item(client, id)
75  thumb = client.get(id)
76  cache = {}
77  cache['link'] = thumb['watch_url']
78  cache['title'] = thumb['title']
79  cache['viewed'] = thumb['view_counter'].to_f
80  cache['mylisted'] = thumb['mylist_counter'].to_f
81  cache['uploaded_at'] = Time.parse(thumb['first_retrieve'])
82  cache
83end
Note: See TracBrowser for help on using the browser.