Changeset 14566 for lang/perl/plagger

Show
Ignore:
Timestamp:
06/25/08 05:06:47 (6 years ago)
Author:
otsune
Message:
  • add some scrape
  • fix enclosure url
Files:
1 modified

Legend:

Unmodified
Added
Removed
  • lang/perl/plagger/assets/plugins/CustomFeed-Script/lantis-net_com_srw_og.pl

    r14565 r14566  
    77use YAML; 
    88#use Plagger; 
    9 #use Plagger::UserAgent; 
     9use Plagger::UserAgent; 
    1010 
    1111my $stuff   = URI->new("http://lantis-net.com/srw_og/"); 
    1212my $scraper = scraper { 
    13     process '//title',title=>'TEXT'; 
    14     process 'li#top > a',link=>'@href'; 
    15 #    process '//div[@class="copy"]',copyright=>'TEXT'; 
    16     process '.box_02', 'entry[]' => scraper { 
    17         process '//h3/text()', title=>'TEXT'; 
    18         process '.radiotext>p', body=>'HTML'; 
    19         process '.date', date=>'TEXT'; 
    20         process '.radiolink', 'enclosure[]'=> scraper{ 
    21             process 'a:nth-child(2)',url=>'@href'; 
    22         } 
     13    process '//title', 'title' => 'TEXT'; 
     14    process 'li#top > a', 'link' => '@href'; 
     15    process '.logo > h1 > img', 'image' => '@src'; 
     16    process_first '.box_01 > .left_01 > p', 'description' => 'TEXT'; 
     17    process '.box_01 > .left_01:nth-child(2) > p', 'author' => 'TEXT'; 
     18#    process 'div.copy', 'copyright' => 'TEXT'; 
     19    process '#radio', 'entry[]' => scraper { 
     20        process '//h3/text()', 'title' => 'TEXT'; 
     21        process '.radiotext>p', 'body' => 'HTML'; 
     22        process '.date', 'date' => 'TEXT'; 
     23        process '.radiolink', 'enclosure[]' => scraper { 
     24            process 'a:nth-child(2)', 'url' => sub { $_->attr('href') }; 
     25        }; 
    2326    }; 
    2427}; 
    2528 
    26 #$scraper->user_agent( Plagger::UserAgent->new ); 
     29$scraper->user_agent( Plagger::UserAgent->new ); 
    2730my $result = $scraper->scrape($stuff); 
    2831