root/WWW-Mixi-Scraper/trunk/lib/WWW/Mixi/Scraper/Plugin/ShowFriend.pm @ 4

Revision 4, 3.3 kB (checked in by charsbar, 7 years ago)

WWW-Mixi-Scraper: imported

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
Line 
1package WWW::Mixi::Scraper::Plugin::ShowFriend;
2
3use strict;
4use warnings;
5use WWW::Mixi::Scraper::Plugin;
6use WWW::Mixi::Scraper::Utils qw( _uri );
7
8validator {qw( id is_number )};
9
10sub scrape {
11  my ($self, $html) = @_;
12
13  return {
14    profile => $self->_scrape_profile($html),
15    outline => $self->_scrape_outline($html),
16  };
17}
18
19sub _scrape_profile {
20  my ($self, $html) = @_;
21
22  my %scraper;
23  $scraper{items} = scraper {
24    process 'td[width="80"]',
25      key => 'TEXT';
26    process 'td[width!="80"]',
27      value => 'TEXT';
28    result qw( key value );
29  };
30
31  $scraper{profile} = scraper {
32    process 'table[width="425"]>tr[bgcolor="#FFFFFF"]',
33      'items[]' => $scraper{items};
34    result qw( items );
35  };
36
37  my $stash = $self->post_process($scraper{profile}->scrape(\$html));
38
39  my $profile = {};
40  foreach my $item ( @{ $stash } ) {
41    next unless $item->{key};
42    $profile->{$item->{key}} = $item->{value};
43  }
44
45  return $profile;
46}
47
48sub _scrape_outline {
49  my ($self, $html) = @_;
50
51  my %scraper;
52  $scraper{relations} = scraper {
53    process 'a',
54      link => '@href',
55      name => 'TEXT';
56    result qw( link name );
57  };
58
59  $scraper{outline} = scraper {
60    process 'table[bgcolor="#FEC977"]>tr>td[colspan="3"]',
61      'string[]' => 'TEXT';
62    process 'table[width="270"]>tr>td[colspan="3"]>a',
63      'relations[]' => $scraper{relations};
64    process 'table[width="250"]>tr>td>img[vspace="2"]',
65      image => '@src';
66    result qw( image string relations );
67  };
68
69  my $stash = $self->post_process($scraper{outline}->scrape(\$html))->[0];
70
71  my @relations;
72  foreach my $rel (@{ delete $stash->{relations} || [] }) {
73    next unless $rel->{link} =~ /^show_friend/;
74    $rel->{link} = _uri( $rel->{link} );
75    push @relations, $rel;
76  }
77  $stash->{step} = scalar @relations;
78  $stash->{relation} = shift @relations if @relations > 1;
79
80  foreach my $string (@{ delete $stash->{string} || [] }) {
81    if ( $string =~ /^(.+)\((\d+)\)\s+\(([^)]+)\)\s*$/ ) {
82      $stash->{name} = $1;
83      $stash->{count} = $2;
84      $stash->{description} = $3;
85    }
86    elsif ( $string =~ /^(.+)\((\d+)\)\s*$/ ) { # may be yourself
87      $stash->{name} = $1;
88      $stash->{count} = $2;
89    }
90  }
91
92  # XXX: this fails when you test with local files.
93  # In this case, we can scrape the link from the 'snavi' toolbar
94  # but it's ugly.
95  $stash->{link} = $self->{uri};
96
97  return $stash;
98}
99
1001;
101
102__END__
103
104=head1 NAME
105
106WWW::Mixi::Scraper::Plugin::ShowFriend
107
108=head1 DESCRIPTION
109
110This is almost equivalent to WWW::Mixi->parse_show_friend_profile() and WWW::Mixi->parse_show_friend_outline(), though you need one more step to get the hash reference(s) you want.
111
112=head1 METHOD
113
114=head2 scrape
115
116returns a hash reference of the person's profile.
117
118  {
119    profile => { 'profile' => 'hash' },
120    outline => {
121      name => 'name',
122      link => 'http://mixi.jp/show_friend.pl?id=xxx',
123      image => 'http://img.mixi.jp/photo/member/xx/xx/xxx.jpg',
124      description => 'last login time',
125      count => 20,
126      step => 2,
127      relation => {
128        name => 'someone who knows him/her directly',
129        link => 'http://mixi.jp/show_friend.pl?id=yyy',
130      },
131    },
132  }
133
134=head1 AUTHOR
135
136Kenichi Ishigaki, E<lt>ishigaki at cpan.orgE<gt>
137
138=head1 COPYRIGHT AND LICENSE
139
140Copyright (C) 2007 by Kenichi Ishigaki.
141
142This program is free software; you can redistribute it and/or
143modify it under the same terms as Perl itself.
144
145=cut
Note: See TracBrowser for help on using the browser.