Changeset 4346 for lang/perl/Acme-Actors-JA
- Timestamp:
- 01/10/08 12:49:58 (11 months ago)
- Location:
- lang/perl/Acme-Actors-JA/trunk
- Files:
-
- 2 added
- 1 modified
-
eg (added)
-
eg/scrape_details.pl (added)
-
lib/Acme/Actors/JA/Scraper.pm (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
lang/perl/Acme-Actors-JA/trunk/lib/Acme/Actors/JA/Scraper.pm
r4315 r4346 25 25 sub scrape_actors_jp 26 26 { 27 my $class = shift; 27 28 my $uri = URI->new("http://ja.wikipedia.org/wiki/%E6%97%A5%E6%9C%AC%E3%81%AE%E7%94%B7%E5%84%AA%E4%B8%80%E8%A6%A7"); 28 29 29 30 my $scraper = scraper { 30 process "#bodyContent table li>a", 'names[]' => 'TEXT' ; 31 process "#bodyContent table li>a", 32 'actors[]' => { 33 'name' => 'TEXT', 34 'wikipedia' => '@href', 35 } 36 ; 31 37 }; 32 38 $scraper->scrape($uri); … … 63 69 } 64 70 71 sub scrape_details 72 { 73 my $class = shift; 74 my $actor = shift; 75 76 my $detail = scraper { 77 process q|//table[@class = 'infobox']/tr[position() = 1]/td/small|, 78 yomi_infobox => 'TEXT' 79 ; 80 81 # This is where the first name usually sits 82 process q|//div[@id = 'bodyContent']/p|, 83 'yomi_text' => [ 'TEXT', sub { 84 use utf8; 85 if (m{[\((]([^、 ]+)}) { 86 $_ = $1; 87 } else { 88 $_ = ''; 89 } 90 } ] 91 ; 92 }->scrape($actor->{wikipedia}); 93 94 $actor->{yomi} ||= $detail->{yomi_infobox} || $detail->{yomi_text}; 95 $actor->{yomi} =~ s/[ \s]+//g; 96 97 $actor; 98 } 99 65 100 1; 66 101
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)