| 17 | | my $siteinfos = [ |
| 18 | | { |
| 19 | | url => '^http://yaplog\.jp/.*', |
| 20 | | context => '//div[@class="message"]', |
| 21 | | attributes => { |
| 22 | | body => '/div[@class="message_txt"]', |
| 23 | | date => '/div[@class="right s"]', |
| 24 | | title => '/div[@class="message_title"]' |
| 25 | | }, |
| 26 | | }, |
| 27 | | { |
| 28 | | url => '^http://blogs\.yahoo\.co\.jp/.*', |
| 29 | | context => '//p[@class="comentBody"]', |
| 30 | | attributes => { body => '/text()', }, |
| 31 | | }, |
| 32 | | { |
| 33 | | url => '^http://.+\.seesaa\.net/.*', |
| 34 | | context => '//div[@class="comments-body"]/*[@class="text"]', |
| 35 | | attributes => { body => '/text()', }, |
| 36 | | }, |
| 37 | | { |
| 38 | | url => '^http://[^/.]+\.nowa\.jp/.*', |
| 39 | | context => 'id("comment")/dl[@class="article-comment"]', |
| 40 | | attributes => { |
| 41 | | author => '/dd[@class="comment-author-name"]', |
| 42 | | date => '/dd[@class="comment-author-day"]', |
| 43 | | body => '/dd[@class="comment-body"]', |
| 44 | | } |
| 45 | | }, |
| 46 | | { |
| 47 | | url => '^http://blog.livedoor.jp/.*', |
| 48 | | context => '//div[@class="commentttl"]', |
| 49 | | attributes => { |
| 50 | | author => 'substring(/text(), 14)', |
| 51 | | date => '/span', |
| 52 | | body => '/following-sibling::div[@class="commenttext"][1]' |
| 53 | | }, |
| 54 | | }, |
| 55 | | { |
| 56 | | url => '^http://d.hatena.ne.jp.*', |
| 57 | | context => |
| 58 | | '//div[@class="commentshort"]/p[@class!="commentmessage"]', |
| 59 | | attributes => { |
| 60 | | author => '//span[@class="commentator"]', |
| 61 | | body => '//span[@class="commentbody"]', |
| 62 | | date => '//span[@class="timestamp"]', |
| 63 | | } |
| 64 | | }, |
| 65 | | { |
| 66 | | url => '^http://blog\.goo\.ne\.jp/.*', |
| 67 | | context => '//td[@class="cmBody"]', |
| 68 | | attributes => { body => '/text()', }, |
| 69 | | }, |
| 70 | | { |
| 71 | | url => '^http://.*\.cocolog-nifty\.com/.*', |
| 72 | | context => |
| 73 | | '//div[@class="comment-body" or @class="comment-content"]', |
| 74 | | attributes => { body => '/p', } |
| 75 | | }, |
| 76 | | { |
| 77 | | url => '^http://ameblo.jp/.*', |
| 78 | | context => '//div[@class="each_comment"]', |
| 79 | | attributes => { |
| 80 | | title => 'substring(//p[@class="label"], 2)', |
| 81 | | body => '//p[@class="comment_body"]', |
| 82 | | author => '//span[@class="comment_author"]', |
| 83 | | }, |
| 84 | | }, |
| 85 | | { |
| 86 | | url => '^http://.*.jugem.jp/.*', |
| 87 | | context => '//dt[@class="com_desc"]', |
| 88 | | attributes => { |
| 89 | | body => '/text()', |
| 90 | | author => '/following-sibling::dd[1]/ul/li[1]', |
| 91 | | date => '/following-sibling::dd[1]/ul/li[2]', |
| 92 | | }, |
| 93 | | } |
| 94 | | ]; |
| | 19 | sub _load_site_info { |
| | 20 | my $self = shift; |
| | 21 | $self->{__siteinfos} ||= do { |
| | 22 | my $fname = module_file(__PACKAGE__, 'siteinfo.json'); |
| | 23 | |
| | 24 | open my $fh, '<', $fname or die $!; |
| | 25 | my $src = do { local $/; join '', <$fh> }; |
| | 26 | close $fh; |
| | 27 | |
| | 28 | JSON::Any->jsonToObj($src); |
| | 29 | }; |
| | 30 | } |