| 1 | package Plagger::Plugin::CustomFeed::MixiScraper;
|
|---|
| 2 | use strict;
|
|---|
| 3 | use base qw( Plagger::Plugin );
|
|---|
| 4 |
|
|---|
| 5 | use DateTime::Format::Strptime;
|
|---|
| 6 | use WWW::Mixi::Scraper;
|
|---|
| 7 | use Time::HiRes;
|
|---|
| 8 |
|
|---|
| 9 | our $MAP = {
|
|---|
| 10 | FriendDiary => {
|
|---|
| 11 | title => 'マイミク最新日記',
|
|---|
| 12 | get_list => 'new_friend_diary',
|
|---|
| 13 | get_detail => 'view_diary',
|
|---|
| 14 | icon => 'owner_id',
|
|---|
| 15 | },
|
|---|
| 16 | # can't get icon
|
|---|
| 17 | Message => {
|
|---|
| 18 | title => 'ミクシィメッセージ受信箱',
|
|---|
| 19 | get_list => 'list_message',
|
|---|
| 20 | get_detail => 'view_message',
|
|---|
| 21 | },
|
|---|
| 22 | # can't get icon & body
|
|---|
| 23 | RecentComment => {
|
|---|
| 24 | title => 'ミクシィ最近のコメント一覧',
|
|---|
| 25 | get_list => 'list_comment',
|
|---|
| 26 | },
|
|---|
| 27 | Log => {
|
|---|
| 28 | title => 'ミクシィ足跡',
|
|---|
| 29 | get_list => 'show_log',
|
|---|
| 30 | icon => 'id',
|
|---|
| 31 | },
|
|---|
| 32 | MyDiary => {
|
|---|
| 33 | title => 'ミクシィ日記',
|
|---|
| 34 | get_list => 'list_diary',
|
|---|
| 35 | get_detail => 'view_diary',
|
|---|
| 36 | icon => 'owner_id',
|
|---|
| 37 | },
|
|---|
| 38 | Calendar => {
|
|---|
| 39 | title => 'ミクシィカレンダー',
|
|---|
| 40 | get_list => 'show_calendar',
|
|---|
| 41 | get_detail => 'view_event',
|
|---|
| 42 | },
|
|---|
| 43 | BBS => {
|
|---|
| 44 | title => 'コミュニティ最新書き込み',
|
|---|
| 45 | get_list => 'new_bbs',
|
|---|
| 46 | get_detail => 'view_bbs',
|
|---|
| 47 | },
|
|---|
| 48 | };
|
|---|
| 49 |
|
|---|
| 50 | sub plugin_id {
|
|---|
| 51 | my $self = shift;
|
|---|
| 52 | $self->class_id . '-' . $self->conf->{email};
|
|---|
| 53 | }
|
|---|
| 54 |
|
|---|
| 55 | sub register {
|
|---|
| 56 | my($self, $context) = @_;
|
|---|
| 57 | $context->register_hook(
|
|---|
| 58 | $self,
|
|---|
| 59 | 'subscription.load' => \&load,
|
|---|
| 60 | );
|
|---|
| 61 | }
|
|---|
| 62 |
|
|---|
| 63 | sub load {
|
|---|
| 64 | my($self, $context) = @_;
|
|---|
| 65 |
|
|---|
| 66 | my $cookie_jar = $self->cookie_jar;
|
|---|
| 67 | if (ref($cookie_jar) ne 'HTTP::Cookies') {
|
|---|
| 68 | # using foreign cookies = don't have to set email/password. Fake them
|
|---|
| 69 | $self->conf->{email} ||= 'plagger@localhost';
|
|---|
| 70 | $self->conf->{password} ||= 'pl4gg5r';
|
|---|
| 71 | }
|
|---|
| 72 |
|
|---|
| 73 | $self->{mixi} = WWW::Mixi::Scraper->new(
|
|---|
| 74 | email => $self->conf->{email},
|
|---|
| 75 | password => $self->conf->{password},
|
|---|
| 76 | cookie_jar => $cookie_jar,
|
|---|
| 77 | );
|
|---|
| 78 |
|
|---|
| 79 | my $feed = Plagger::Feed->new;
|
|---|
| 80 | $feed->aggregator(sub { $self->aggregate(@_) });
|
|---|
| 81 | $context->subscription->add($feed);
|
|---|
| 82 | }
|
|---|
| 83 |
|
|---|
| 84 | sub aggregate {
|
|---|
| 85 | my($self, $context, $args) = @_;
|
|---|
| 86 | for my $type (@{$self->conf->{feed_type} || ['FriendDiary']}) {
|
|---|
| 87 | $context->error("$type not found") unless $MAP->{$type};
|
|---|
| 88 | if ($type eq 'BBS' and $self->conf->{split_bbs_feed}) {
|
|---|
| 89 | $self->aggregate_bbs_feed($context, $type, $args);
|
|---|
| 90 | }
|
|---|
| 91 | else {
|
|---|
| 92 | $self->aggregate_feed($context, $type, $args);
|
|---|
| 93 | }
|
|---|
| 94 | }
|
|---|
| 95 | }
|
|---|
| 96 |
|
|---|
| 97 | sub aggregate_feed {
|
|---|
| 98 | my($self, $context, $type, $args) = @_;
|
|---|
| 99 |
|
|---|
| 100 | my $feed = Plagger::Feed->new;
|
|---|
| 101 | $feed->type('mixi');
|
|---|
| 102 | $feed->title($MAP->{$type}->{title});
|
|---|
| 103 |
|
|---|
| 104 | my $meth = $MAP->{$type}->{get_list};
|
|---|
| 105 | my @msgs = $self->{mixi}->$meth->parse;
|
|---|
| 106 | my $items = $self->conf->{fetch_items} || 20;
|
|---|
| 107 | $self->log(info => 'fetch ' . scalar(@msgs) . ' entries');
|
|---|
| 108 |
|
|---|
| 109 | $feed->link($self->{mixi}->{mech}->uri);
|
|---|
| 110 |
|
|---|
| 111 | my $i = 0;
|
|---|
| 112 | $self->{blocked} = 0;
|
|---|
| 113 | for my $msg (@msgs) {
|
|---|
| 114 | next if $type eq 'FriendDiary' and $msg->{link}->query_param('url'); # external blog
|
|---|
| 115 | last if $i++ >= $items;
|
|---|
| 116 |
|
|---|
| 117 | $self->add_entry( $context, $type, $feed, $msg );
|
|---|
| 118 | }
|
|---|
| 119 |
|
|---|
| 120 | $context->update->add($feed);
|
|---|
| 121 | }
|
|---|
| 122 |
|
|---|
| 123 | sub aggregate_bbs_feed {
|
|---|
| 124 | my($self, $context, $type, $args) = @_;
|
|---|
| 125 |
|
|---|
| 126 | my $meth = $MAP->{$type}->{get_list};
|
|---|
| 127 | my @msgs = $self->{mixi}->$meth->parse;
|
|---|
| 128 | my $items = $self->conf->{fetch_items} || 20;
|
|---|
| 129 | $self->log(info => 'fetch ' . scalar(@msgs) . ' entries');
|
|---|
| 130 |
|
|---|
| 131 | my $i = 0;
|
|---|
| 132 | $self->{blocked} = 0;
|
|---|
| 133 | for my $msg (@msgs) {
|
|---|
| 134 | next if $type eq 'FriendDiary' and $msg->{link}->query_param('url'); # external blog
|
|---|
| 135 | last if $i++ >= $items;
|
|---|
| 136 |
|
|---|
| 137 | my $feed = Plagger::Feed->new;
|
|---|
| 138 | $feed->type('mixi');
|
|---|
| 139 | (my $subject = $msg->{subject}) =~ s/\(\d+\)$//;
|
|---|
| 140 | (my $link = $msg->{link}) =~ s/&comment_count=\d*//;
|
|---|
| 141 | $feed->title($subject);
|
|---|
| 142 | $feed->description($MAP->{$type}->{title}.': '.$msg->{name});
|
|---|
| 143 | $feed->link($link);
|
|---|
| 144 |
|
|---|
| 145 | $self->add_entry( $context, $type, $feed, $msg );
|
|---|
| 146 |
|
|---|
| 147 | $context->update->add($feed);
|
|---|
| 148 | }
|
|---|
| 149 | }
|
|---|
| 150 |
|
|---|
| 151 | my $format = DateTime::Format::Strptime->new(pattern => '%Y-%m-%d %H:%M');
|
|---|
| 152 |
|
|---|
| 153 | sub add_entry {
|
|---|
| 154 | my ($self, $context, $type, $feed, $msg) = @_;
|
|---|
| 155 |
|
|---|
| 156 | my $entry = Plagger::Entry->new;
|
|---|
| 157 | $entry->title($msg->{subject});
|
|---|
| 158 | $entry->link($msg->{link});
|
|---|
| 159 | $entry->author($msg->{name});
|
|---|
| 160 | $entry->date( Plagger::Date->parse($format, $msg->{time}) );
|
|---|
| 161 | $entry->date->set_time_zone('Asia/Tokyo') if $entry->date;
|
|---|
| 162 |
|
|---|
| 163 | if ($self->conf->{show_icon} && !$self->{blocked} && defined $MAP->{$type}->{icon}) {
|
|---|
| 164 | my $owner_id = $msg->{link}->query_param($MAP->{$type}->{icon});
|
|---|
| 165 | $context->log(info => "Fetch icon of id=$owner_id");
|
|---|
| 166 |
|
|---|
| 167 | my $item = $self->cache->get_callback(
|
|---|
| 168 | "outline-$owner_id",
|
|---|
| 169 | sub {
|
|---|
| 170 | Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
|
|---|
| 171 | my $item = $self->{mixi}->show_friend->parse(id => $owner_id)->{outline};
|
|---|
| 172 | $item;
|
|---|
| 173 | },
|
|---|
| 174 | '12 hours',
|
|---|
| 175 | );
|
|---|
| 176 | if ($item && $item->{image} !~ /no_photo/) {
|
|---|
| 177 | # prefer smaller image
|
|---|
| 178 | my $image = $item->{image};
|
|---|
| 179 | $image =~ s/\.jpg$/s.jpg/;
|
|---|
| 180 | $entry->icon({
|
|---|
| 181 | title => $item->{name},
|
|---|
| 182 | url => $image,
|
|---|
| 183 | link => $item->{link},
|
|---|
| 184 | });
|
|---|
| 185 | }
|
|---|
| 186 | }
|
|---|
| 187 |
|
|---|
| 188 | my @comments;
|
|---|
| 189 | if ($self->conf->{fetch_body} && !$self->{blocked} && $msg->{link} =~ /view_/ && defined $MAP->{$type}->{get_detail}) {
|
|---|
| 190 | # view_enquete is not implemented and probably
|
|---|
| 191 | # won't be implemented as it seems redirected to
|
|---|
| 192 | # reply_enquete
|
|---|
| 193 | return if $msg->{link} =~ /view_enquete/;
|
|---|
| 194 | $context->log(info => "Fetch body from $msg->{link}");
|
|---|
| 195 | my $item = $self->cache->get_callback(
|
|---|
| 196 | "item-".$msg->{link},
|
|---|
| 197 | sub {
|
|---|
| 198 | Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
|
|---|
| 199 | my $item = $self->{mixi}->parse($msg->{link});
|
|---|
| 200 | $item;
|
|---|
| 201 | },
|
|---|
| 202 | '12 hours',
|
|---|
| 203 | );
|
|---|
| 204 | if ($item) {
|
|---|
| 205 | my $body = $item->{description};
|
|---|
| 206 | $body =~ s!(\r\n?|\n)!<br />!g;
|
|---|
| 207 | for my $image (@{ $item->{images} || [] }) {
|
|---|
| 208 | $body .= qq(<div><a href="$image->{link}"><img src="$image->{thumb_link}" style="border:0" /></a></div>);
|
|---|
| 209 | my $enclosure = Plagger::Enclosure->new;
|
|---|
| 210 | $enclosure->url($image->{thumb_link});
|
|---|
| 211 | $enclosure->auto_set_type;
|
|---|
| 212 | $enclosure->is_inline(1);
|
|---|
| 213 | $entry->add_enclosure($enclosure);
|
|---|
| 214 | }
|
|---|
| 215 | $entry->body($body);
|
|---|
| 216 |
|
|---|
| 217 | $entry->date( Plagger::Date->parse($format, $item->{time}) );
|
|---|
| 218 | $entry->date->set_time_zone('Asia/Tokyo') if $entry->date;
|
|---|
| 219 | if ($self->conf->{fetch_comment}) {
|
|---|
| 220 | for my $comment (@{ $item->{comments} || [] }) {
|
|---|
| 221 | my $c = Plagger::Entry->new;
|
|---|
| 222 | $c->title($entry->title . ': '. $comment->{subject});
|
|---|
| 223 | $c->body($comment->{description});
|
|---|
| 224 | $c->link($comment->{link});
|
|---|
| 225 | $c->author($comment->{name});
|
|---|
| 226 | $c->date( Plagger::Date->parse($format, $comment->{time}) );
|
|---|
| 227 | $c->date->set_time_zone('Asia/Tokyo') if $c->date;
|
|---|
| 228 | push @comments, $c;
|
|---|
| 229 | }
|
|---|
| 230 | }
|
|---|
| 231 | } else {
|
|---|
| 232 | $context->log(warn => "Fetch body failed. You might be blocked?");
|
|---|
| 233 | $self->{blocked}++;
|
|---|
| 234 | }
|
|---|
| 235 | }
|
|---|
| 236 |
|
|---|
| 237 | $feed->add_entry($entry);
|
|---|
| 238 | for my $comment ( @comments ) {
|
|---|
| 239 | $feed->add_entry($comment);
|
|---|
| 240 | }
|
|---|
| 241 | }
|
|---|
| 242 |
|
|---|
| 243 | 1;
|
|---|
| 244 |
|
|---|
| 245 | __END__
|
|---|
| 246 |
|
|---|
| 247 | =head1 NAME
|
|---|
| 248 |
|
|---|
| 249 | Plagger::Plugin::CustomFeed::MixiScraper - Custom feed for mixi.jp
|
|---|
| 250 |
|
|---|
| 251 | =head1 SYNOPSIS
|
|---|
| 252 |
|
|---|
| 253 | - module: CustomFeed::MixiScraper
|
|---|
| 254 | config:
|
|---|
| 255 | email: email@example.com
|
|---|
| 256 | password: password
|
|---|
| 257 | fetch_body: 1
|
|---|
| 258 | fetch_comment: 0
|
|---|
| 259 | show_icon: 1
|
|---|
| 260 | feed_type:
|
|---|
| 261 | - RecentComment
|
|---|
| 262 | - FriendDiary
|
|---|
| 263 | - Message
|
|---|
| 264 |
|
|---|
| 265 | =head1 DESCRIPTION
|
|---|
| 266 |
|
|---|
| 267 | This plugin fetches your friends diary updates from mixi
|
|---|
| 268 | (L<http://mixi.jp/>) and creates a custom feed.
|
|---|
| 269 |
|
|---|
| 270 | =head1 CONFIGURATION
|
|---|
| 271 |
|
|---|
| 272 | =over 4
|
|---|
| 273 |
|
|---|
| 274 | =item email, password
|
|---|
| 275 |
|
|---|
| 276 | Credential you need to login to mixi.jp.
|
|---|
| 277 |
|
|---|
| 278 | Note that you don't have to supply email and password if you set
|
|---|
| 279 | global cookie_jar in your configuration file and the cookie_jar
|
|---|
| 280 | contains a valid login session there, such as:
|
|---|
| 281 |
|
|---|
| 282 | global:
|
|---|
| 283 | user_agent:
|
|---|
| 284 | cookies: /path/to/cookies.txt
|
|---|
| 285 |
|
|---|
| 286 | See L<Plagger::Cookies> for details.
|
|---|
| 287 |
|
|---|
| 288 | =item fetch_body
|
|---|
| 289 |
|
|---|
| 290 | With this option set, this plugin fetches entry body HTML, not just a
|
|---|
| 291 | link to the entry. Defaults to 0.
|
|---|
| 292 |
|
|---|
| 293 | =item fetch_comment
|
|---|
| 294 |
|
|---|
| 295 | With this option set, this plugin fetches entry's comments as well
|
|---|
| 296 | (meaningless when C<fetch_body> is not set). Defaults to 0.
|
|---|
| 297 |
|
|---|
| 298 | =item fetch_body_interval
|
|---|
| 299 |
|
|---|
| 300 | With C<fetch_body> option set, your Plagger script is recommended to
|
|---|
| 301 | wait for a little, to avoid mixi.jp throttling. Defaults to 1.5.
|
|---|
| 302 |
|
|---|
| 303 | =item show_icon: 1
|
|---|
| 304 |
|
|---|
| 305 | With this option set, this plugin fetches users buddy icon from
|
|---|
| 306 | mixi.jp site, which makes the output HTML very user-friendly.
|
|---|
| 307 |
|
|---|
| 308 | =item split_bbs_feed
|
|---|
| 309 |
|
|---|
| 310 | With this option set, BBS feed will be split up. Defaults to 0.
|
|---|
| 311 |
|
|---|
| 312 | =item feed_type
|
|---|
| 313 |
|
|---|
| 314 | With this option set, you can set the feed types.
|
|---|
| 315 |
|
|---|
| 316 | Now supports: RecentComment, FriendDiary, Message, Log, MyDiary, and Calendar.
|
|---|
| 317 |
|
|---|
| 318 | Default: FriendDiary.
|
|---|
| 319 |
|
|---|
| 320 | =back
|
|---|
| 321 |
|
|---|
| 322 | =head1 SCREENSHOT
|
|---|
| 323 |
|
|---|
| 324 | L<http://blog.bulknews.net/mt/archives/plagger-mixi-icon.gif>
|
|---|
| 325 |
|
|---|
| 326 | =head1 AUTHOR
|
|---|
| 327 |
|
|---|
| 328 | Tatsuhiko Miyagawa, modified by Kenichi Ishigaki
|
|---|
| 329 |
|
|---|
| 330 | =head1 SEE ALSO
|
|---|
| 331 |
|
|---|
| 332 | L<Plagger>, L<WWW::Mixi::Scraper>
|
|---|
| 333 |
|
|---|
| 334 | =cut
|
|---|