root/lang/perl/plagger/lib/Plagger/Plugin/CustomFeed/MixiScraper.pm

Revision 11427, 9.8 kB (checked in by charsbar, 14 months ago)

Plagger::Plugin::CustomFeed::MxiiScraper?: I didn't noticed this was imported here. reluctantly sync-ed with the latest version in the plagger's main repo.

Line 
1package Plagger::Plugin::CustomFeed::MixiScraper;
2use strict;
3use base qw( Plagger::Plugin );
4
5use DateTime::Format::Strptime;
6use WWW::Mixi::Scraper;
7use Time::HiRes;
8
9our $MAP = {
10    FriendDiary => {
11        title      => 'マイミク最新日記',
12        get_list   => 'new_friend_diary',
13        get_detail => 'view_diary',
14        icon       => 'owner_id',
15    },
16    # can't get icon
17    Message => {
18        title      => 'ミクシィメッセージ受信箱',
19        get_list   => 'list_message',
20        get_detail => 'view_message',
21    },
22    # can't get icon & body
23    RecentComment => {
24        title      => 'ミクシィ最近のコメント一覧',
25        get_list   => 'list_comment',
26    },
27    Log => {
28        title      => 'ミクシィ足跡',
29        get_list   => 'show_log',
30        icon       => 'id',
31    },
32    MyDiary => {
33        title      => 'ミクシィ日記',
34        get_list   => 'list_diary',
35        get_detail => 'view_diary',
36        icon       => 'owner_id',
37    },
38    Calendar => {
39        title      => 'ミクシィカレンダー',
40        get_list   => 'show_calendar',
41        get_detail => 'view_event',
42    },
43    BBS => {
44        title      => 'コミュニティ最新書き込み',
45        get_list   => 'new_bbs',
46        get_detail => 'view_bbs',
47    },
48};
49
50sub plugin_id {
51    my $self = shift;
52    $self->class_id . '-' . $self->conf->{email};
53}
54
55sub register {
56    my($self, $context) = @_;
57    $context->register_hook(
58        $self,
59        'subscription.load' => \&load,
60    );
61}
62
63sub load {
64    my($self, $context) = @_;
65
66    my $cookie_jar = $self->cookie_jar;
67    if (ref($cookie_jar) ne 'HTTP::Cookies') {
68        # using foreign cookies = don't have to set email/password. Fake them
69        $self->conf->{email}    ||= 'plagger@localhost';
70        $self->conf->{password} ||= 'pl4gg5r';
71    }
72
73    $self->{mixi} = WWW::Mixi::Scraper->new(
74      email => $self->conf->{email},
75      password => $self->conf->{password},
76      cookie_jar => $cookie_jar,
77    );
78
79    my $feed = Plagger::Feed->new;
80       $feed->aggregator(sub { $self->aggregate(@_) });
81    $context->subscription->add($feed);
82}
83
84sub aggregate {
85    my($self, $context, $args) = @_;
86    for my $type (@{$self->conf->{feed_type} || ['FriendDiary']}) {
87        $context->error("$type not found") unless $MAP->{$type};
88        if ($type eq 'BBS' and $self->conf->{split_bbs_feed}) {
89            $self->aggregate_bbs_feed($context, $type, $args);
90        }
91        else {
92            $self->aggregate_feed($context, $type, $args);
93        }
94    }
95}
96
97sub aggregate_feed {
98    my($self, $context, $type, $args) = @_;
99
100    my $feed = Plagger::Feed->new;
101    $feed->type('mixi');
102    $feed->title($MAP->{$type}->{title});
103
104    my $meth = $MAP->{$type}->{get_list};
105    my @msgs = $self->{mixi}->$meth->parse;
106    my $items = $self->conf->{fetch_items} || 20;
107    $self->log(info => 'fetch ' . scalar(@msgs) . ' entries');
108
109    $feed->link($self->{mixi}->{mech}->uri);
110
111    my $i = 0;
112    $self->{blocked} = 0;
113    for my $msg (@msgs) {
114        next if $type eq 'FriendDiary' and $msg->{link}->query_param('url'); # external blog
115        last if $i++ >= $items;
116
117        $self->add_entry( $context, $type, $feed, $msg );
118    }
119
120    $context->update->add($feed);
121}
122
123sub aggregate_bbs_feed {
124    my($self, $context, $type, $args) = @_;
125
126    my $meth = $MAP->{$type}->{get_list};
127    my @msgs = $self->{mixi}->$meth->parse;
128    my $items = $self->conf->{fetch_items} || 20;
129    $self->log(info => 'fetch ' . scalar(@msgs) . ' entries');
130
131    my $i = 0;
132    $self->{blocked} = 0;
133    for my $msg (@msgs) {
134        next if $type eq 'FriendDiary' and $msg->{link}->query_param('url'); # external blog
135        last if $i++ >= $items;
136
137        my $feed = Plagger::Feed->new;
138        $feed->type('mixi');
139        (my $subject = $msg->{subject}) =~ s/\(\d+\)$//;
140        (my $link = $msg->{link}) =~ s/&comment_count=\d*//;
141        $feed->title($subject);
142        $feed->description($MAP->{$type}->{title}.': '.$msg->{name});
143        $feed->link($link);
144
145        $self->add_entry( $context, $type, $feed, $msg );
146
147        $context->update->add($feed);
148    }
149}
150
151my $format = DateTime::Format::Strptime->new(pattern => '%Y-%m-%d %H:%M');
152
153sub add_entry {
154    my ($self, $context, $type, $feed, $msg) = @_;
155
156    my $entry = Plagger::Entry->new;
157    $entry->title($msg->{subject});
158    $entry->link($msg->{link});
159    $entry->author($msg->{name});
160    $entry->date( Plagger::Date->parse($format, $msg->{time}) );
161    $entry->date->set_time_zone('Asia/Tokyo') if $entry->date;
162
163    if ($self->conf->{show_icon} && !$self->{blocked} && defined $MAP->{$type}->{icon}) {
164        my $owner_id = $msg->{link}->query_param($MAP->{$type}->{icon});
165        $context->log(info => "Fetch icon of id=$owner_id");
166
167        my $item = $self->cache->get_callback(
168            "outline-$owner_id",
169            sub {
170                Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
171                my $item = $self->{mixi}->show_friend->parse(id => $owner_id)->{outline};
172                $item;
173            },
174            '12 hours',
175        );
176        if ($item && $item->{image} !~ /no_photo/) {
177            # prefer smaller image
178            my $image = $item->{image};
179               $image =~ s/\.jpg$/s.jpg/;
180            $entry->icon({
181                title => $item->{name},
182                url   => $image,
183                link  => $item->{link},
184            });
185        }
186    }
187
188    my @comments;
189    if ($self->conf->{fetch_body} && !$self->{blocked} && $msg->{link} =~ /view_/ && defined $MAP->{$type}->{get_detail}) {
190        # view_enquete is not implemented and probably
191        # won't be implemented as it seems redirected to
192        # reply_enquete
193        return if $msg->{link} =~ /view_enquete/;
194        $context->log(info => "Fetch body from $msg->{link}");
195        my $item = $self->cache->get_callback(
196            "item-".$msg->{link},
197            sub {
198                Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
199                my $item = $self->{mixi}->parse($msg->{link});
200                $item;
201            },
202            '12 hours',
203        );
204        if ($item) {
205            my $body = $item->{description};
206               $body =~ s!(\r\n?|\n)!<br />!g;
207            for my $image (@{ $item->{images} || [] }) {
208                $body .= qq(<div><a href="$image->{link}"><img src="$image->{thumb_link}" style="border:0" /></a></div>);
209                my $enclosure = Plagger::Enclosure->new;
210                $enclosure->url($image->{thumb_link});
211                $enclosure->auto_set_type;
212                $enclosure->is_inline(1);
213                $entry->add_enclosure($enclosure);
214            }
215            $entry->body($body);
216
217            $entry->date( Plagger::Date->parse($format, $item->{time}) );
218            $entry->date->set_time_zone('Asia/Tokyo') if $entry->date;
219            if ($self->conf->{fetch_comment}) {
220              for my $comment (@{ $item->{comments} || [] }) {
221                  my $c = Plagger::Entry->new;
222                     $c->title($entry->title . ': '. $comment->{subject});
223                     $c->body($comment->{description});
224                     $c->link($comment->{link});
225                     $c->author($comment->{name});
226                     $c->date( Plagger::Date->parse($format, $comment->{time}) );
227                     $c->date->set_time_zone('Asia/Tokyo') if $c->date;
228                  push @comments, $c;
229              }
230            }
231        } else {
232            $context->log(warn => "Fetch body failed. You might be blocked?");
233            $self->{blocked}++;
234        }
235    }
236
237    $feed->add_entry($entry);
238    for my $comment ( @comments ) {
239        $feed->add_entry($comment);
240    }
241}
242
2431;
244
245__END__
246
247=head1 NAME
248
249Plagger::Plugin::CustomFeed::MixiScraper -  Custom feed for mixi.jp
250
251=head1 SYNOPSIS
252
253    - module: CustomFeed::MixiScraper
254      config:
255        email: email@example.com
256        password: password
257        fetch_body: 1
258        fetch_comment: 0
259        show_icon: 1
260        feed_type:
261          - RecentComment
262          - FriendDiary
263          - Message
264
265=head1 DESCRIPTION
266
267This plugin fetches your friends diary updates from mixi
268(L<http://mixi.jp/>) and creates a custom feed.
269
270=head1 CONFIGURATION
271
272=over 4
273
274=item email, password
275
276Credential you need to login to mixi.jp.
277
278Note that you don't have to supply email and password if you set
279global cookie_jar in your configuration file and the cookie_jar
280contains a valid login session there, such as:
281
282  global:
283    user_agent:
284      cookies: /path/to/cookies.txt
285
286See L<Plagger::Cookies> for details.
287
288=item fetch_body
289
290With this option set, this plugin fetches entry body HTML, not just a
291link to the entry. Defaults to 0.
292
293=item fetch_comment
294
295With this option set, this plugin fetches entry's comments as well
296(meaningless when C<fetch_body> is not set). Defaults to 0.
297
298=item fetch_body_interval
299
300With C<fetch_body> option set, your Plagger script is recommended to
301wait for a little, to avoid mixi.jp throttling. Defaults to 1.5.
302
303=item show_icon: 1
304
305With this option set, this plugin fetches users buddy icon from
306mixi.jp site, which makes the output HTML very user-friendly.
307
308=item split_bbs_feed
309
310With this option set, BBS feed will be split up. Defaults to 0.
311
312=item feed_type
313
314With this option set, you can set the feed types.
315
316Now supports: RecentComment, FriendDiary, Message, Log, MyDiary, and Calendar.
317
318Default: FriendDiary.
319
320=back
321
322=head1 SCREENSHOT
323
324L<http://blog.bulknews.net/mt/archives/plagger-mixi-icon.gif>
325
326=head1 AUTHOR
327
328Tatsuhiko Miyagawa, modified by Kenichi Ishigaki
329
330=head1 SEE ALSO
331
332L<Plagger>, L<WWW::Mixi::Scraper>
333
334=cut
Note: See TracBrowser for help on using the browser.