Show
Ignore:
Timestamp:
11/23/07 13:26:24 (6 years ago)
Author:
tokuhirom
Message:

WWW-CommentGetter: support new hatena style.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • lang/perl/WWW-CommentGetter/trunk/WWW-CommentGetter/lib/WWW/CommentGetter/Plugin/HatenaDiary.pm

    r662 r1922  
    1111    return unless $url =~ m{^http://d.hatena.ne.jp.*}; 
    1212 
    13     # tokuhirom \x{300e}\x{305d}\x{3093}\x{306a}\x{3082}\x{3093}\x{304b}\x{306d}\x{3002}\x{300f} (2007/10/22 07:29) 
    14  
    1513    my $ret = scraper { 
    16         process 'div.commentshort > p', 'bodies[]' => sub { 
    17             my $elem = shift; 
    18             if ($elem->as_text =~ m{^(.+) \x{300e}(.+)\x{300f} \((\d{4}/\d\d/\d\d \d\d:\d\d)\)$}) { 
    19                 return +{ author => $self->trim($1), body => $2, date => $3 }; 
    20             } else { 
    21                 return; 
    22             } 
     14        process '//div[@class="commentshort"]/p[@class!="commentmessage"]', 'comments[]' => scraper { 
     15            process '//span[@class="commentator"]', 'author', 'TEXT'; 
     16            process '//span[@class="commentbody"]', 'body',   'TEXT'; 
     17            process '//span[@class="timestamp"]',   'date',   'TEXT'; 
    2318        }; 
    2419    }->scrape(URI->new($url)); 
    2520 
    26     return [ 
    27         grep { $_ } @{ $ret->{bodies} } 
    28     ]; 
     21    return [ @{ $ret->{comments} } ]; 
    2922} 
    3023