| 1 | #!/usr/bin/env perl |
|---|
| 2 | |
|---|
| 3 | use strict; |
|---|
| 4 | use warnings; |
|---|
| 5 | |
|---|
| 6 | use utf8; |
|---|
| 7 | use Web::Scraper; |
|---|
| 8 | use URI; |
|---|
| 9 | use YAML; |
|---|
| 10 | use DateTime; |
|---|
| 11 | |
|---|
| 12 | my $stuff = URI->new( |
|---|
| 13 | "http://www.nhk.or.jp/gogaku/english/business1/index.html" |
|---|
| 14 | ); |
|---|
| 15 | |
|---|
| 16 | my $scraper = scraper { |
|---|
| 17 | process '//h1/img', 'title' => '@alt'; |
|---|
| 18 | process '//h1/img', 'image' => '@src'; |
|---|
| 19 | process '//div[@id="con-procontent"]/p[1]', 'description' => 'TEXT'; |
|---|
| 20 | #process '//div[@id="eng-bus-audionow"]/div[@class="eng-bus-audnwlink" and p ]', |
|---|
| 21 | process '//div[@id="eng-bus-audionow"]/div[@class="eng-bus-audnwlink"]', |
|---|
| 22 | 'entry[]' => scraper { |
|---|
| 23 | process '//a/text()', 'title' => 'TEXT'; |
|---|
| 24 | process '//a/text()', 'body' => 'TEXT'; |
|---|
| 25 | process '//a/text()', 'date' => ['TEXT', \&mk_date]; |
|---|
| 26 | process '//a', 'enclosure[]' => scraper { |
|---|
| 27 | process '//a', 'url' => ['@href', \&to_string]; |
|---|
| 28 | }; |
|---|
| 29 | }; |
|---|
| 30 | }; |
|---|
| 31 | |
|---|
| 32 | my $result = $scraper->scrape($stuff); |
|---|
| 33 | $result->{link} = $stuff; |
|---|
| 34 | |
|---|
| 35 | binmode STDOUT, ":utf8"; |
|---|
| 36 | print Dump $result; |
|---|
| 37 | |
|---|
| 38 | sub to_string { |
|---|
| 39 | $_->as_string; |
|---|
| 40 | } |
|---|
| 41 | |
|---|
| 42 | sub mk_date { |
|---|
| 43 | my $input = shift; |
|---|
| 44 | return unless ($input =~ m!(\d+)月(\d+)日!); |
|---|
| 45 | |
|---|
| 46 | my $month = $1; |
|---|
| 47 | my $day = $2; |
|---|
| 48 | |
|---|
| 49 | my $today = DateTime->now(time_zone => 'Asia/Tokyo')->truncate(to => 'day'); |
|---|
| 50 | my $this = $today->clone->set(month => $month, day => $day); |
|---|
| 51 | my $last = $this->clone->subtract(years => 1); |
|---|
| 52 | my $next = $this->clone->add(years => 1); |
|---|
| 53 | my @date = sort { DateTime::Duration->compare($a->[1], $b->[1], $today) } |
|---|
| 54 | map { [$_->[0], $_->[1]->is_positive ? $_->[1] : $_->[1]->inverse ] } |
|---|
| 55 | map { [$_, $today - $_] } ($this, $last, $next); |
|---|
| 56 | |
|---|
| 57 | return $date[0]->[0]->ymd; |
|---|
| 58 | } |
|---|