| 1 | #!ruby -Ku |
|---|
| 2 | =begin |
|---|
| 3 | hpricot 用コマンドライン要素ファインダ |
|---|
| 4 | |
|---|
| 5 | hpricot の XPath/CSS3 実装は完全ではなく |
|---|
| 6 | また、独自実装している部分があるので注意が必要 |
|---|
| 7 | =end |
|---|
| 8 | |
|---|
| 9 | |
|---|
| 10 | require "rubygems" |
|---|
| 11 | # gem install hpricot |
|---|
| 12 | require "hpricot" |
|---|
| 13 | # gem install term-ansicolor |
|---|
| 14 | require "term/ansicolor" |
|---|
| 15 | require "open-uri" |
|---|
| 16 | require "readline" |
|---|
| 17 | |
|---|
| 18 | class XPathFinderCommand |
|---|
| 19 | include Term::ANSIColor |
|---|
| 20 | |
|---|
| 21 | def self.run(argv) |
|---|
| 22 | new.run(argv.dup) |
|---|
| 23 | end |
|---|
| 24 | |
|---|
| 25 | def run(argv) |
|---|
| 26 | Readline.completion_append_character = "" |
|---|
| 27 | Readline.completion_case_fold = true |
|---|
| 28 | Readline.completion_proc = method(:completion) |
|---|
| 29 | @prompt = "> " |
|---|
| 30 | @doc = nil |
|---|
| 31 | @uri = nil |
|---|
| 32 | |
|---|
| 33 | if argv.first |
|---|
| 34 | cmd_get(argv.first) |
|---|
| 35 | end |
|---|
| 36 | |
|---|
| 37 | |
|---|
| 38 | begin |
|---|
| 39 | while l = Readline.readline(@prompt, true) |
|---|
| 40 | next if l.strip.empty? |
|---|
| 41 | |
|---|
| 42 | cmd, arg = l.chomp.split(/\s+/, 2) |
|---|
| 43 | cmd = "cmd_#{cmd}" |
|---|
| 44 | if respond_to? cmd |
|---|
| 45 | send(cmd, arg) |
|---|
| 46 | else |
|---|
| 47 | # puts "#{cmd} command is not found." |
|---|
| 48 | cmd_xpath(l.chomp) |
|---|
| 49 | end |
|---|
| 50 | end |
|---|
| 51 | rescue Interrupt |
|---|
| 52 | puts |
|---|
| 53 | retry |
|---|
| 54 | end |
|---|
| 55 | end |
|---|
| 56 | |
|---|
| 57 | def completion(word) |
|---|
| 58 | # hpricot の XPath 実装だとめんどい |
|---|
| 59 | # case word |
|---|
| 60 | # when /^(.*)\/\/(\w*)$/ |
|---|
| 61 | # context = Regexp.last_match[1] |
|---|
| 62 | # match = Regexp.last_match[2] |
|---|
| 63 | # candi = @doc.search("#{context}//*").select {|e| e.elem? }.map {|e| |
|---|
| 64 | # e.name |
|---|
| 65 | # }.grep(/^#{match}/).map {|i| |
|---|
| 66 | # "#{context}//#{i}" |
|---|
| 67 | # } |
|---|
| 68 | # when /^(.*)\/([^\/]*)$/ |
|---|
| 69 | # context = Regexp.last_match[1] |
|---|
| 70 | # match = Regexp.last_match[2] |
|---|
| 71 | # p context |
|---|
| 72 | # p @doc.search("#{context}/child::*") |
|---|
| 73 | # candi = @doc.search("#{context}/child::*").select {|e| e.elem? }.map {|e| |
|---|
| 74 | # case |
|---|
| 75 | # when e[:id] |
|---|
| 76 | # "#{e.name}[@id='#{e[:id]}']" |
|---|
| 77 | # when e[:class] |
|---|
| 78 | # "#{e.name}[@class='#{e[:class]}']" |
|---|
| 79 | # else |
|---|
| 80 | # e.name |
|---|
| 81 | # end |
|---|
| 82 | # }.grep(/^#{match}/).map {|i| |
|---|
| 83 | # "#{context}/#{i}" |
|---|
| 84 | # } |
|---|
| 85 | # end |
|---|
| 86 | end |
|---|
| 87 | |
|---|
| 88 | def cmd_get(arg) |
|---|
| 89 | @uri = URI(arg) |
|---|
| 90 | @doc = Hpricot(open(@uri).read) |
|---|
| 91 | @prompt = "#{@uri.host + @uri.path}> " |
|---|
| 92 | rescue Exception => e |
|---|
| 93 | puts e |
|---|
| 94 | end |
|---|
| 95 | |
|---|
| 96 | def cmd_xpath(arg) |
|---|
| 97 | puts green { @doc.search(arg).map {|e| e.xpath }.join("\n") } |
|---|
| 98 | end |
|---|
| 99 | alias cmd_x cmd_xpath |
|---|
| 100 | |
|---|
| 101 | def cmd_exit(arg) |
|---|
| 102 | exit |
|---|
| 103 | end |
|---|
| 104 | end |
|---|
| 105 | |
|---|
| 106 | |
|---|
| 107 | if ARGV.empty? |
|---|
| 108 | ARGV << "http://subtech.g.hatena.ne.jp/" |
|---|
| 109 | end |
|---|
| 110 | |
|---|
| 111 | XPathFinderCommand.run(ARGV) |
|---|
| 112 | |
|---|