root/lang/perl/plagger/assets/plugins/CustomFeed-Script/lantis-net_com_srw_og.pl

Revision 14566, 1.0 kB (checked in by otsune, 4 years ago)
  • add some scrape
  • fix enclosure url
  • Property svn:executable set to *
Line 
1#!/usr/bin/perl
2use strict;
3use warnings;
4
5use Web::Scraper;
6use URI;
7use YAML;
8#use Plagger;
9use Plagger::UserAgent;
10
11my $stuff   = URI->new("http://lantis-net.com/srw_og/");
12my $scraper = scraper {
13    process '//title', 'title' => 'TEXT';
14    process 'li#top > a', 'link' => '@href';
15    process '.logo > h1 > img', 'image' => '@src';
16    process_first '.box_01 > .left_01 > p', 'description' => 'TEXT';
17    process '.box_01 > .left_01:nth-child(2) > p', 'author' => 'TEXT';
18#    process 'div.copy', 'copyright' => 'TEXT';
19    process '#radio', 'entry[]' => scraper {
20        process '//h3/text()', 'title' => 'TEXT';
21        process '.radiotext>p', 'body' => 'HTML';
22        process '.date', 'date' => 'TEXT';
23        process '.radiolink', 'enclosure[]' => scraper {
24            process 'a:nth-child(2)', 'url' => sub { $_->attr('href') };
25        };
26    };
27};
28
29$scraper->user_agent( Plagger::UserAgent->new );
30my $result = $scraper->scrape($stuff);
31
32binmode STDOUT, ":utf8";
33print Dump $result;
Note: See TracBrowser for help on using the browser.