Changeset 4347 for lang/perl/Acme-Actors-JA
- Timestamp:
- 01/10/08 13:09:39 (11 months ago)
- Location:
- lang/perl/Acme-Actors-JA/trunk
- Files:
-
- 2 modified
-
eg/scrape_details.pl (modified) (1 diff)
-
lib/Acme/Actors/JA/Scraper.pm (modified) (7 diffs)
Legend:
- Unmodified
- Added
- Removed
-
lang/perl/Acme-Actors-JA/trunk/eg/scrape_details.pl
r4346 r4347 5 5 use URI; 6 6 7 my $ h = Acme::Actors::JA::Scraper->scrape_actors_jp();7 my $actors = []; 8 8 9 foreach my $actor (@{ $h->{actors} }) { 9 push @$actors, @{Acme::Actors::JA::Scraper->scrape_actress_jp()}; 10 push @$actors, @{Acme::Actors::JA::Scraper->scrape_actors_jp()}; 11 12 foreach my $actor (@$actors) { 10 13 Acme::Actors::JA::Scraper->scrape_details($actor); 11 14 print Dump($actor); -
lang/perl/Acme-Actors-JA/trunk/lib/Acme/Actors/JA/Scraper.pm
r4346 r4347 5 5 6 6 package Acme::Actors::JA::Scraper; 7 use utf8; 7 8 use strict; 8 9 use warnings; … … 36 37 ; 37 38 }; 38 $scraper->scrape($uri) ;39 $scraper->scrape($uri)->{actors}; 39 40 } 40 41 … … 44 45 45 46 my $scraper = scraper { 46 process "#bodyContent table li>a", 'names[]' => 'TEXT' ; 47 process "#bodyContent table li>a", 48 'actors[]' => { 49 'name' => 'TEXT', 50 'wikipedia' => '@href' 51 } 52 ; 47 53 }; 48 $scraper->scrape($uri) ;54 $scraper->scrape($uri)->{actors}; 49 55 } 50 56 … … 54 60 55 61 my $scraper = scraper { 56 process q|//div[@id = 'bodyContent']/ul[position() > 2 and position() != last()]/li/a|, 'names[]' => 'TEXT' ; 62 process q|//div[@id = 'bodyContent']/ul[position() > 2 and position() != last()]/li/a|, 63 'actors[]' => { 64 'name' => 'TEXT', 65 'wikipedia' => '@href', 66 } 67 ; 57 68 }; 58 $scraper->scrape($uri) ;69 $scraper->scrape($uri)->{actors}; 59 70 } 60 71 … … 64 75 65 76 my $scraper = scraper { 66 process q|//div[@id = 'bodyContent']/ul[position() > 2 and position() != last()]/li/a|, 'names[]' => 'TEXT' ; 77 process q|//div[@id = 'bodyContent']/ul[position() > 2 and position() != last()]/li/a|, 78 'actors[]' => { 79 'name' => 'TEXT', 80 'wikipedia' => '@href' 81 } 82 ; 67 83 }; 68 $scraper->scrape($uri) ;84 $scraper->scrape($uri)->{actors}; 69 85 } 70 86 … … 82 98 process q|//div[@id = 'bodyContent']/p|, 83 99 'yomi_text' => [ 'TEXT', sub { 84 use utf8; 85 if (m{[\((]([^、 ]+)}) { 100 if (m{[\((]([^、 )]+)}) { 86 101 $_ = $1; 87 102 } else { … … 93 108 94 109 $actor->{yomi} ||= $detail->{yomi_infobox} || $detail->{yomi_text}; 95 $actor->{yomi} =~ s/[ \s]+//g; 110 if ($actor->{yomi}) { 111 $actor->{yomi} =~ s/[ \s]+//g; 112 } 96 113 97 114 $actor;
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)