Show
Ignore:
Timestamp:
10/09/07 18:49:32 (6 years ago)
Author:
charsbar
Message:

lang/perl/WWW-Mixi-Scraper: comment links of ViewBBS/ViewEvent now denote pseudo permanent links; former profile links are moved under 'name_link'; added comment id (as a subject) to ViewBBS's comment hash; -> CPAN

Location:
lang/perl/WWW-Mixi-Scraper/trunk
Files:
6 modified

Legend:

Unmodified
Added
Removed
  • lang/perl/WWW-Mixi-Scraper/trunk/Changes

    r380 r439  
    11Revision history for WWW-Mixi-Scraper 
     2 
     30.09  2007/10/09 
     4  - the content of the comment 'link' of ViewBBS/ViewEvent, 
     5    which was commenter's profile uri, is now stored in the 
     6    newly created 'name_link'. And the 'link' now denotes 
     7    pseudo permanent uri of the comment to make plagger/rss 
     8    readers happier. Note that this is *incompatible* with 
     9    WWW::Mixi. 
     10  - now ViewBBS's comments have their comment numbers in 
     11    'subject' (this also is incompatible with WWW::Mixi; 
     12    thanks to false at www.wizard-limit.net). 
     13 
     14not the commenter's 
     15    profile uri but pseudo permanent link for the comment. 
    216 
    3170.08  2007/10/06 
  • lang/perl/WWW-Mixi-Scraper/trunk/lib/WWW/Mixi/Scraper.pm

    r380 r439  
    44use warnings; 
    55 
    6 our $VERSION = '0.08'; 
     6our $VERSION = '0.09'; 
    77 
    88use String::CamelCase qw( decamelize ); 
  • lang/perl/WWW-Mixi-Scraper/trunk/lib/WWW/Mixi/Scraper/Plugin/ViewBBS.pm

    r380 r439  
    3737    process 'dd.bbsContent>dl>dd>div.communityPhoto>table>tr>td', 
    3838      'images[]' => $scraper{images}; 
    39     process 'div#localNavigation>ul.localNaviCommunity>li.top>a', 
    40       link => '@href'; 
    4139    result qw( time subject description name name_link images link ); 
    4240  }; 
     
    4442  # bbs topic is not an array 
    4543  my $stash = $self->post_process($scraper{topic}->scrape(\$html))->[0]; 
     44 
     45  # XXX: this fails when you test with local files. 
     46  # However, this link cannot be extracted from the html, 
     47  # at least as of writing this. ugh. 
     48  $stash->{link} = $self->{uri}; 
    4649 
    4750  $scraper{comments} = scraper { 
     
    5760    process 'dl.commentList01>dt[class="commentDate clearfix"]>span.date', 
    5861      'times[]' => 'TEXT'; 
     62    process 'dl.commentList01>dt[class="commentDate clearfix"]>span.senderId', 
     63      'sender_ids[]' => 'TEXT'; 
    5964    process 'dl.commentList01>dd>dl.commentContent01', 
    6065      'comments[]' => $scraper{comments}; 
    61     result qw( times comments ); 
     66    result qw( times sender_ids comments ); 
    6267  }; 
    6368 
    6469  my $stash_c = $self->post_process($scraper{list}->scrape(\$html))->[0]; 
    6570 
    66   my @comments = @{ $stash_c->{comments} || [] }; 
    67   my @times    = @{ $stash_c->{times} || [] }; 
     71  my @comments   = @{ $stash_c->{comments} || [] }; 
     72  my @times      = @{ $stash_c->{times} || [] }; 
     73  my @sender_ids = @{ $stash_c->{sender_ids} || [] }; 
    6874  foreach my $comment ( @comments ) { 
    69     $comment->{time} = _datetime( shift @times ); 
    70     $comment->{link} = _uri( $comment->{link} ); 
     75    $comment->{time}      = _datetime( shift @times ); 
     76    $comment->{subject}   = shift @sender_ids; 
     77 
     78    # incompatible with WWW::Mixi to let comment links 
     79    # look more 'permanent' to make plagger/rss readers happier 
     80    $comment->{name_link} = _uri( $comment->{link} ); 
     81    $comment->{link}      = $stash->{link} 
     82      ? _uri( $stash->{link} . '#' . $comment->{subject} ) 
     83      : undef; 
    7184  } 
    7285  $stash->{comments} = \@comments; 
     
    108121    comments => [ 
    109122      { 
    110         name => 'commenter', 
    111         link => 'http://mixi.jp/show_friend.pl?id=xxxx', 
    112         time => 'yyyy-mm-dd hh:mm', 
     123        subject   => 1, 
     124        name      => 'commenter', 
     125        name_link => 'http://mixi.jp/show_friend.pl?id=xxxx', 
     126        link      => 'http://mixi.jp/view_bbs.pl?id=xxxx#1', 
     127        time      => 'yyyy-mm-dd hh:mm', 
    113128        description => 'comment body', 
    114129      }, 
  • lang/perl/WWW-Mixi-Scraper/trunk/lib/WWW/Mixi/Scraper/Plugin/ViewEvent.pm

    r380 r439  
    5555  $scraper{comment_body} = scraper { 
    5656    process 'dl.commentContent01>dt>a', 
    57       'link' => '@href', 
    58       'name' => 'TEXT'; 
     57      'name_link' => '@href', 
     58      'name'      => 'TEXT'; 
    5959    process 'dl.commentContent01>dd', 
    6060      'description' => $self->html_or_text; 
    6161    process 'dl.commentContent01>dd>table>tr>td', 
    6262      'images[]' => $scraper{images}; 
    63     result qw( link name description images ); 
     63    result qw( name_link name description images ); 
    6464  }; 
    6565 
     
    104104  my @comments   = @{ $stash_c->{comments} || [] }; 
    105105  foreach my $comment ( @comments ) { 
    106     $comment->{time}    = _datetime( shift @dates ); 
    107     $comment->{subject} = shift @sender_ids; 
    108     $comment->{link}    = _uri( $comment->{link} ); 
     106    $comment->{time}      = _datetime( shift @dates ); 
     107    $comment->{subject}   = shift @sender_ids; 
     108 
     109    # incompatible with WWW::Mixi to let comment links 
     110    # look more 'permanent' to make plagger/rss readers happier 
     111    $comment->{name_link} = _uri( $comment->{name_link} ); 
     112    $comment->{link}      = $stash->{link} 
     113      ? _uri( $stash->{link} . '#' . $comment->{subject} ) 
     114      : undef; 
    109115 
    110116    if ( $comment->{images} ) { 
     
    156162    comments => [ 
    157163      { 
    158         subject => 1, 
    159         name => 'commenter', 
    160         link => 'http://mixi.jp/show_friend.pl?id=xxxx', 
    161         time => 'yyyy-mm-dd hh:mm', 
     164        subject     => 1, 
     165        name        => 'commenter', 
     166        name_link   => 'http://mixi.jp/show_friend.pl?id=xxxx', 
     167        link        => 'http://mixi.jp/view_event.pl?id=xxxx#1', 
     168        time        => 'yyyy-mm-dd hh:mm', 
    162169        description => 'comment body', 
    163170      } 
  • lang/perl/WWW-Mixi-Scraper/trunk/t_live/view_bbs.t

    r333 r439  
    1212  time        => 'datetime', 
    1313  name_link   => 'uri', 
    14   link        => 'uri', 
     14  link        => 'uri_if_remote', 
    1515  comments => { 
     16    subject     => 'string', 
    1617    name        => 'string', 
     18    name_link   => 'uri', 
    1719    description => 'string', 
    1820    time        => 'datetime', 
    19     link        => 'uri', 
     21    link        => 'uri_if_remote', 
    2022  }, 
    2123  images => { 
  • lang/perl/WWW-Mixi-Scraper/trunk/t_live/view_event.t

    r333 r439  
    88my $rules = { 
    99  subject     => 'string', 
    10 # link        => 'uri',  # not yet implemented 
     10  link        => 'uri_if_remote', 
    1111  time        => 'datetime', 
    1212  date        => 'string', 
     
    3333    subject     => 'string', 
    3434    name        => 'string', 
     35    name_link   => 'uri', 
    3536    description => 'string', 
    3637    time        => 'datetime', 
    37     link        => 'uri', 
     38    link        => 'uri_if_remote', 
    3839    images => { 
    3940      link       => 'uri',