Changeset 18428
- Timestamp:
- 08/29/08 16:42:16 (4 months ago)
- Location:
- lang/perl/Net-DMM/trunk
- Files:
-
- 7 added
- 1 removed
- 6 modified
-
Build.PL (added)
-
Changes (modified) (1 diff)
-
MANIFEST (modified) (1 diff)
-
Makefile.PL (modified) (1 diff)
-
README (added)
-
lib/Net/DMM.pm (modified) (3 diffs)
-
lib/Net/DMM/Scraper/PcgamesCaptureImg.pm (modified) (2 diffs)
-
lib/Net/DMM/Scraper/SearchResult.pm (modified) (3 diffs)
-
lib/Net/DMM/Util.pm (added)
-
t/00.load.t (added)
-
t/DMM.t (deleted)
-
t/perlcritic.t (added)
-
t/pod-coverage.t (added)
-
t/pod.t (added)
Legend:
- Unmodified
- Added
- Removed
-
lang/perl/Net-DMM/trunk/Changes
r18142 r18428 1 Revision history for Net-DMM .1 Revision history for Net-DMM 2 2 3 0.0.1 Sun Aug 24 0:12:34 2008 3 0.0.3 Thu Aug 28 17:19:15 2008 4 add Net::DMM::Util. 5 Fixed Net::DMM, Net::DMM::Scraper::SearchResult, Net::DMM::Scraper::PcgamesCaptureImg. 6 add README. 7 Fixed Makefile.PL, MANIFEST. 8 9 0.0.2 Thu Aug 28 17:19:15 2008 10 Fixed all. 11 12 0.0.1 Sun Aug 24 00:12:34 2008 4 13 Initial release. 5 14 -
lang/perl/Net-DMM/trunk/MANIFEST
r18142 r18428 1 Build.PL 1 2 Changes 2 3 Makefile.PL 3 4 MANIFEST 4 5 README 5 t/DMM.t6 6 lib/Net/DMM.pm 7 lib/Net/DMM/Util.pm 7 8 lib/Net/DMM/Scraper/SearchResult.pm 8 9 lib/Net/DMM/Scraper/PcgamesCaptureImg.pm 10 t/00.load.t 11 t/perlcritic.t 12 t/pod-coverage.t 13 t/pod.t -
lang/perl/Net-DMM/trunk/Makefile.PL
r18142 r18428 10 10 PL_FILES => {}, 11 11 PREREQ_PM => { 12 'Test::More' => 0, 13 'version' => 0, 12 'Test::More' => 0, 13 'LWP::UserAgent' => 0, 14 'Encode' => 0, 15 'URI' => 0, 16 'URI::Fetch' => 0, 17 'Web::Scraper' => 0, 18 'Class::Accessor' => 0, 14 19 }, 15 20 dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, -
lang/perl/Net-DMM/trunk/lib/Net/DMM.pm
r18397 r18428 1 1 package Net::DMM; 2 # 3 # DMMから情報を取得する 4 # 2 5 3 use strict; 6 4 use warnings; … … 15 13 16 14 sub new { 17 my $class = shift;18 my $opt = shift;19 $opt->{ua} = LWP::UserAgent->new unless $opt->{ua};20 my $self = bless $opt, $class;21 return $self;15 my $class = shift; 16 my $opt = shift; 17 $opt->{ua} = LWP::UserAgent->new unless $opt->{ua}; 18 my $self = bless $opt, $class; 19 return $self; 22 20 } 23 21 24 22 sub do_scrape { 25 my $self = shift; 26 my $opt = shift; 27 croak "no url." unless $opt->{url}; 28 croak "no scraper." unless $opt->{scraper}; 29 my $scrape_module = __PACKAGE__ . "::Scraper::" . $opt->{scraper}; 30 return $scrape_module->new({ url => $opt->{url}, opt => $self, }); 23 my $self = shift; 24 my $opt = shift; 25 croak "no url." unless $opt->{url}; 26 croak "no scraper." unless $opt->{scraper}; 27 my $scrape_module = __PACKAGE__ . "::Scraper::" . $opt->{scraper}; 28 return $scrape_module->new({ 29 url => $opt->{url}, 30 opt => $self, 31 }); 31 32 } 32 33 33 34 sub search { 34 35 my $self = shift; 35 my $opt = shift;36 return undefif !$opt->{searchstr} && !$self->{searchstr};37 $opt->{area} = $self->{area} unless defined $opt->{area};38 croak "area param is invalid. set 'com' or 'co.jp'."39 if !$opt->{area} || $opt->{area} !~ /^(?:com|co\.jp)$/;40 $self->{svr} = 'www' unless $self->{svr};41 $self->{sort} = 'rank_asc' unless $self->{sort};42 $self->{limit} = 10 unless $self->{limit};43 $self->{search_url} = $self->_make_searchurl($opt);44 $self->{redirect_url} = $self->{ua}->get($self->{search_url})->base if $opt->{redirect_url};45 return {46 search_url => $self->{search_url}->as_string,47 redirect_url => $self->{redirect_url},48 };36 my $opt = shift; 37 return if !$opt->{searchstr} && !$self->{searchstr}; 38 $opt->{area} = $self->{area} unless defined $opt->{area}; 39 croak "area param is invalid. set 'com' or 'co.jp'." 40 if !$opt->{area} || $opt->{area} !~ /^(?:com|co\.jp)$/; 41 $self->{svr} = 'www' unless $self->{svr}; 42 $self->{sort} = 'rank_asc' unless $self->{sort}; 43 $self->{limit} = 10 unless $self->{limit}; 44 $self->{search_url} = $self->_make_searchurl($opt); 45 $self->{redirect_url} = $self->{ua}->get($self->{search_url})->base if $opt->{redirect_url}; 46 return { 47 search_url => $self->{search_url}->as_string, 48 redirect_url => $self->{redirect_url}, 49 }; 49 50 } 50 51 51 52 sub _make_searchurl { 52 my $self = shift;53 my $opt = shift;54 my $searchstr = $opt->{searchstr} || $self->{searchstr};55 my $uri = URI->new("http://".$opt->{svr}.".dmm.".$opt->{area}."/search/");56 $uri->query_form(57 category => $opt->{category} || $self->{category},58 searchstr => $self->_make_searchstr($searchstr),59 sort => $opt->{sort} || $self->{sort},60 limit => $opt->{limit} || $self->{limit},61 redirect => 1,62 );63 return $uri;53 my $self = shift; 54 my $opt = shift; 55 my $searchstr = $opt->{searchstr} || $self->{searchstr}; 56 my $uri = URI->new("http://".$self->{svr}.".dmm.".$opt->{area}."/search/"); 57 $uri->query_form( 58 category => $opt->{category} || $self->{category}, 59 searchstr => $self->_make_searchstr($searchstr), 60 sort => $opt->{sort} || $self->{sort}, 61 limit => $opt->{limit} || $self->{limit}, 62 redirect => 1, 63 ); 64 return $uri; 64 65 } 65 66 66 67 sub _make_searchstr { 67 my $self = shift;68 my $searchstr = shift || return;69 utf8::encode($searchstr) if utf8::is_utf8($searchstr);70 return encode('euc-jp', decode('utf8', $searchstr));68 my $self = shift; 69 my $searchstr = shift || return; 70 utf8::encode($searchstr) if utf8::is_utf8($searchstr); 71 return encode('euc-jp', decode('utf8', $searchstr)); 71 72 } 72 73 … … 79 80 Net::DMM - Search and Scrape DMM http://www.dmm.com/ and DMM.ADULT http://www.dmm.co.jp/ 80 81 81 82 =head1 VERSION83 84 This document describes Net::DMM version 0.0.385 86 87 82 =head1 SYNOPSIS 88 83 89 use Net::DMM;90 use Data::Dumper;84 use Net::DMM; 85 use Data::Dumper; 91 86 92 my $dmm = Net::DMM->new();87 my $dmm = Net::DMM->new(); 93 88 94 my $search = $dmm->search(95 {96 'searchstr' => 'てすと',97 'area' => 'co.jp',98 'category' => 'pcgames',99 }100 );101 print Dumper($search);89 my $search = $dmm->search( 90 { 91 'searchstr' => 'てすと', 92 'area' => 'co.jp', 93 'category' => 'pcgames', 94 } 95 ); 96 print Dumper($search); 102 97 103 my $items = $search->do_scrape( 104 { 105 url => $search->{search_url}, 106 scraper => 'SearchResult', 107 } 108 ); 109 print Dumper($items); 98 my $items = $dmm->do_scrape( 99 { 100 url => $search->{search_url}, 101 scraper => 'SearchResult', 102 } 103 ); 104 print Dumper($items); 105 106 =head1 METHOD 107 108 =over 109 110 =item new(I<$args>) 111 112 =item search(I<$args>) 113 114 =item do_scrape(I<$args>) 115 116 =back 110 117 111 118 =head1 AUTHOR -
lang/perl/Net-DMM/trunk/lib/Net/DMM/Scraper/PcgamesCaptureImg.pm
r18142 r18428 1 1 package Net::DMM::Scraper::PcgamesCaptureImg; 2 # 3 # pcgameカテゴリの個別商品ページからキャプチャ画像をスクレイピング 4 # 2 3 use strict; 4 use warnings; 5 #use utf8; 6 7 use Web::Scraper; 8 use Net::DMM::Util qw( _fetch _rm_space ); 5 9 6 10 use base qw(Class::Accessor); 7 11 __PACKAGE__->mk_accessors( 8 qw( title img )12 qw( title img ) 9 13 ); 10 14 11 use strict;12 use warnings;13 use utf8;14 15 use URI::Fetch;16 use Cache::FileCache;17 use Encode qw(encode decode);18 use URI;19 use Web::Scraper;20 21 our $VERSION = '0.0.2';22 23 15 sub new { 24 my ($class, $opt) = @_;25 my $self = bless {26 url => $opt->{url},27 opt => $opt->{opt},28 img => [],29 }, $class;30 $self->_scrape if $self->{url};31 $self;16 my ($class, $opt) = @_; 17 my $self = bless { 18 url => $opt->{url}, 19 opt => $opt->{opt}, 20 img => [], 21 }, $class; 22 $self->_scrape if $self->{url}; 23 $self; 32 24 } 33 25 34 26 sub _scrape { 35 my $self = shift; 36 my $url = URI->new($self->{url}); 37 my $result = scraper { 38 process '/html/body/table/tr/td[3]/div/p/strong', 39 'title' => 'TEXT'; 40 process '/html/body/table/tr/td[3]/table/tr/td/input', 41 'img[]' => '@src'; 42 }->scrape($self->_fetch($url)); 27 my $self = shift; 28 my $result = scraper { 29 process '/html/body/table/tr/td[3]/div/p/strong', 30 'title' => 'TEXT'; 31 process '/html/body/table/tr/td[3]/table/tr/td/input', 32 'img[]' => '@src'; 33 }->scrape($self->_fetch($self->{url})); 43 34 44 foreach my $key (keys %$result){ 45 if($key =~ /^(img)$/){ 46 foreach my $i (@{$result->{$key}}){ 47 push(@{$self->{$1}}, $i); 48 } 49 }else{ 50 $self->set($key, $self->_delete_space($result->{$key})); 51 } 52 } 53 } 54 55 sub _delete_space { 56 my ($self, $str) = @_; 57 $str =~ s/^\s*(.*?)\s*$/$1/; 58 return $str; 59 } 60 61 sub _fetch { 62 my $self = shift || return; 63 my $url = shift || return; 64 my $page = ''; 65 if($self->{opt}->{fetch}){ 66 $page = URI::Fetch->fetch( 67 $url, 68 UserAgent => $self->{opt}->{ua}, 69 Cache => $self->{opt}->{fetch}->{cache}, 70 NoNetwork => $self->{opt}->{fetch}->{nonetwork}, 71 ); 72 }else{ 73 $page = URI::Fetch->fetch( 74 $url, 75 UserAgent => $self->{opt}->{ua}, 76 ); 77 } 78 return encode('utf8', decode('euc-jp', $page->content)); 35 foreach my $key (keys %$result){ 36 if($key =~ /^(img)$/){ 37 foreach my $i (@{$result->{$key}}){ 38 push(@{$self->{$1}}, $i); 39 } 40 }else{ 41 $self->set($key, $self->_rm_space($result->{$key})); 42 } 43 } 79 44 } 80 45 … … 87 52 Net::DMM::Scraper::PcgamesCaptureImg 88 53 54 =head1 METHOD 89 55 90 = head1 VERSION56 =over 91 57 92 This document describes Net::DMM::Scraper::PcgamesCaptureImg version 0.0.2 58 =item new 93 59 94 95 =head1 SYNOPSIS 96 97 use Net::DMM::Scraper::PcgamesCaptureImg; 60 =back 98 61 99 62 =head1 AUTHOR -
lang/perl/Net-DMM/trunk/lib/Net/DMM/Scraper/SearchResult.pm
r18142 r18428 1 1 package Net::DMM::Scraper::SearchResult; 2 #3 # 検索結果ページをスクレイピング4 #5 6 use base qw(Class::Accessor);7 __PACKAGE__->mk_accessors(8 qw( txt_list img_list title link )9 );10 2 11 3 use strict; … … 13 5 #use utf8; 14 6 15 use URI::Fetch;16 use Cache::FileCache;17 use Encode qw(encode decode);18 use URI;19 7 use Web::Scraper; 8 use Net::DMM::Util qw( _fetch _rm_space ); 20 9 21 our $VERSION = '0.0.2'; 10 use base qw(Class::Accessor); 11 __PACKAGE__->mk_accessors( 12 qw( txt_list img_list title link ) 13 ); 22 14 23 15 sub new { 24 my ($class, $opt) = @_;25 my $self = bless {26 url => $opt->{url},27 opt => $opt->{opt},28 title => [],29 link => [],30 }, $class;31 $self->_scrape if $self->{url};32 $self;16 my ($class, $opt) = @_; 17 my $self = bless { 18 url => $opt->{url}, 19 opt => $opt->{opt}, 20 title => [], 21 link => [], 22 }, $class; 23 $self->_scrape if $self->{url}; 24 $self; 33 25 } 34 26 35 27 sub _scrape { 36 my $self = shift; 37 my $url = URI->new($self->{url}); 38 my $result = scraper { 39 process '/html/body/table/tr/td[3]/table[3]/tr/td/table/tr/td[5]/a', 40 'txt_list' => '@href'; 41 process '/html/body/table/tr/td[3]/table[3]/tr/td/table/tr/td[6]/a', 42 'img_list' => '@href'; 43 process '/html/body/table/tr/td/table/tr/td/table/tr/td[2]/a', 44 'title[]' => 'TEXT', 45 'link[]' => '@href'; 46 }->scrape($self->_fetch($url)); 28 my $self = shift; 29 my $result = scraper { 30 process '/html/body/table/tr/td[3]/table[3]/tr/td/table/tr/td[5]/a', 31 'txt_list' => '@href'; 32 process '/html/body/table/tr/td[3]/table[3]/tr/td/table/tr/td[6]/a', 33 'img_list' => '@href'; 34 process '/html/body/table/tr/td/table/tr/td/table/tr/td[2]/a', 35 'title[]' => 'TEXT', 36 'link[]' => '@href'; 37 }->scrape($self->_fetch($self->{url})); 47 38 48 foreach my $key (keys %$result){ 49 if($key =~ /^(title|link)$/){ 50 foreach my $i (@{$result->{$key}}){ 51 push(@{$self->{$1}}, $i); 52 } 53 }else{ 54 $self->set($key, $self->_delete_space($result->{$key})); 55 } 56 } 57 } 58 59 sub _delete_space { 60 my ($self, $str) = @_; 61 $str =~ s/^\s*(.*?)\s*$/$1/; 62 return $str; 63 } 64 65 sub _fetch { 66 my $self = shift || return; 67 my $url = shift || return; 68 my $page = ''; 69 if($self->{opt}->{fetch}){ 70 $page = URI::Fetch->fetch( 71 $url, 72 UserAgent => $self->{opt}->{ua}, 73 Cache => $self->{opt}->{fetch}->{cache}, 74 NoNetwork => $self->{opt}->{fetch}->{nonetwork}, 75 ); 76 }else{ 77 $page = URI::Fetch->fetch( 78 $url, 79 UserAgent => $self->{opt}->{ua}, 80 ); 81 } 82 return encode('utf8', decode('euc-jp', $page->content)); 39 foreach my $key (keys %$result){ 40 if($key =~ /^(title|link)$/){ 41 foreach my $i (@{$result->{$key}}){ 42 push(@{$self->{$1}}, $i); 43 } 44 }else{ 45 $self->set($key, $self->_rm_space($result->{$key})); 46 } 47 } 83 48 } 84 49 … … 91 56 Net::DMM::Scraper::SearchResult 92 57 58 =head1 METHOD 93 59 94 = head1 VERSION60 =over 95 61 96 This document describes Net::DMM::Scraper::SearchResult version 0.0.2 62 =item new 97 63 98 99 =head1 SYNOPSIS 100 101 use Net::DMM::Scraper::SearchResult; 64 =back 102 65 103 66 =head1 AUTHOR
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)