Changeset 439 for lang/perl/WWW-Mixi-Scraper
- Timestamp:
- 10/09/07 18:49:32 (6 years ago)
- Location:
- lang/perl/WWW-Mixi-Scraper/trunk
- Files:
-
- 6 modified
-
Changes (modified) (1 diff)
-
lib/WWW/Mixi/Scraper.pm (modified) (1 diff)
-
lib/WWW/Mixi/Scraper/Plugin/ViewBBS.pm (modified) (4 diffs)
-
lib/WWW/Mixi/Scraper/Plugin/ViewEvent.pm (modified) (3 diffs)
-
t_live/view_bbs.t (modified) (1 diff)
-
t_live/view_event.t (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
lang/perl/WWW-Mixi-Scraper/trunk/Changes
r380 r439 1 1 Revision history for WWW-Mixi-Scraper 2 3 0.09 2007/10/09 4 - the content of the comment 'link' of ViewBBS/ViewEvent, 5 which was commenter's profile uri, is now stored in the 6 newly created 'name_link'. And the 'link' now denotes 7 pseudo permanent uri of the comment to make plagger/rss 8 readers happier. Note that this is *incompatible* with 9 WWW::Mixi. 10 - now ViewBBS's comments have their comment numbers in 11 'subject' (this also is incompatible with WWW::Mixi; 12 thanks to false at www.wizard-limit.net). 13 14 not the commenter's 15 profile uri but pseudo permanent link for the comment. 2 16 3 17 0.08 2007/10/06 -
lang/perl/WWW-Mixi-Scraper/trunk/lib/WWW/Mixi/Scraper.pm
r380 r439 4 4 use warnings; 5 5 6 our $VERSION = '0.0 8';6 our $VERSION = '0.09'; 7 7 8 8 use String::CamelCase qw( decamelize ); -
lang/perl/WWW-Mixi-Scraper/trunk/lib/WWW/Mixi/Scraper/Plugin/ViewBBS.pm
r380 r439 37 37 process 'dd.bbsContent>dl>dd>div.communityPhoto>table>tr>td', 38 38 'images[]' => $scraper{images}; 39 process 'div#localNavigation>ul.localNaviCommunity>li.top>a',40 link => '@href';41 39 result qw( time subject description name name_link images link ); 42 40 }; … … 44 42 # bbs topic is not an array 45 43 my $stash = $self->post_process($scraper{topic}->scrape(\$html))->[0]; 44 45 # XXX: this fails when you test with local files. 46 # However, this link cannot be extracted from the html, 47 # at least as of writing this. ugh. 48 $stash->{link} = $self->{uri}; 46 49 47 50 $scraper{comments} = scraper { … … 57 60 process 'dl.commentList01>dt[class="commentDate clearfix"]>span.date', 58 61 'times[]' => 'TEXT'; 62 process 'dl.commentList01>dt[class="commentDate clearfix"]>span.senderId', 63 'sender_ids[]' => 'TEXT'; 59 64 process 'dl.commentList01>dd>dl.commentContent01', 60 65 'comments[]' => $scraper{comments}; 61 result qw( times comments );66 result qw( times sender_ids comments ); 62 67 }; 63 68 64 69 my $stash_c = $self->post_process($scraper{list}->scrape(\$html))->[0]; 65 70 66 my @comments = @{ $stash_c->{comments} || [] }; 67 my @times = @{ $stash_c->{times} || [] }; 71 my @comments = @{ $stash_c->{comments} || [] }; 72 my @times = @{ $stash_c->{times} || [] }; 73 my @sender_ids = @{ $stash_c->{sender_ids} || [] }; 68 74 foreach my $comment ( @comments ) { 69 $comment->{time} = _datetime( shift @times ); 70 $comment->{link} = _uri( $comment->{link} ); 75 $comment->{time} = _datetime( shift @times ); 76 $comment->{subject} = shift @sender_ids; 77 78 # incompatible with WWW::Mixi to let comment links 79 # look more 'permanent' to make plagger/rss readers happier 80 $comment->{name_link} = _uri( $comment->{link} ); 81 $comment->{link} = $stash->{link} 82 ? _uri( $stash->{link} . '#' . $comment->{subject} ) 83 : undef; 71 84 } 72 85 $stash->{comments} = \@comments; … … 108 121 comments => [ 109 122 { 110 name => 'commenter', 111 link => 'http://mixi.jp/show_friend.pl?id=xxxx', 112 time => 'yyyy-mm-dd hh:mm', 123 subject => 1, 124 name => 'commenter', 125 name_link => 'http://mixi.jp/show_friend.pl?id=xxxx', 126 link => 'http://mixi.jp/view_bbs.pl?id=xxxx#1', 127 time => 'yyyy-mm-dd hh:mm', 113 128 description => 'comment body', 114 129 }, -
lang/perl/WWW-Mixi-Scraper/trunk/lib/WWW/Mixi/Scraper/Plugin/ViewEvent.pm
r380 r439 55 55 $scraper{comment_body} = scraper { 56 56 process 'dl.commentContent01>dt>a', 57 ' link' => '@href',58 'name' => 'TEXT';57 'name_link' => '@href', 58 'name' => 'TEXT'; 59 59 process 'dl.commentContent01>dd', 60 60 'description' => $self->html_or_text; 61 61 process 'dl.commentContent01>dd>table>tr>td', 62 62 'images[]' => $scraper{images}; 63 result qw( link name description images );63 result qw( name_link name description images ); 64 64 }; 65 65 … … 104 104 my @comments = @{ $stash_c->{comments} || [] }; 105 105 foreach my $comment ( @comments ) { 106 $comment->{time} = _datetime( shift @dates ); 107 $comment->{subject} = shift @sender_ids; 108 $comment->{link} = _uri( $comment->{link} ); 106 $comment->{time} = _datetime( shift @dates ); 107 $comment->{subject} = shift @sender_ids; 108 109 # incompatible with WWW::Mixi to let comment links 110 # look more 'permanent' to make plagger/rss readers happier 111 $comment->{name_link} = _uri( $comment->{name_link} ); 112 $comment->{link} = $stash->{link} 113 ? _uri( $stash->{link} . '#' . $comment->{subject} ) 114 : undef; 109 115 110 116 if ( $comment->{images} ) { … … 156 162 comments => [ 157 163 { 158 subject => 1, 159 name => 'commenter', 160 link => 'http://mixi.jp/show_friend.pl?id=xxxx', 161 time => 'yyyy-mm-dd hh:mm', 164 subject => 1, 165 name => 'commenter', 166 name_link => 'http://mixi.jp/show_friend.pl?id=xxxx', 167 link => 'http://mixi.jp/view_event.pl?id=xxxx#1', 168 time => 'yyyy-mm-dd hh:mm', 162 169 description => 'comment body', 163 170 } -
lang/perl/WWW-Mixi-Scraper/trunk/t_live/view_bbs.t
r333 r439 12 12 time => 'datetime', 13 13 name_link => 'uri', 14 link => 'uri ',14 link => 'uri_if_remote', 15 15 comments => { 16 subject => 'string', 16 17 name => 'string', 18 name_link => 'uri', 17 19 description => 'string', 18 20 time => 'datetime', 19 link => 'uri ',21 link => 'uri_if_remote', 20 22 }, 21 23 images => { -
lang/perl/WWW-Mixi-Scraper/trunk/t_live/view_event.t
r333 r439 8 8 my $rules = { 9 9 subject => 'string', 10 # link => 'uri', # not yet implemented 10 link => 'uri_if_remote', 11 11 time => 'datetime', 12 12 date => 'string', … … 33 33 subject => 'string', 34 34 name => 'string', 35 name_link => 'uri', 35 36 description => 'string', 36 37 time => 'datetime', 37 link => 'uri ',38 link => 'uri_if_remote', 38 39 images => { 39 40 link => 'uri',
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)