| 1 | #!/usr/bin/perl |
|---|
| 2 | use strict; |
|---|
| 3 | use warnings; |
|---|
| 4 | use utf8; |
|---|
| 5 | |
|---|
| 6 | use FindBin; |
|---|
| 7 | use Web::Scraper; |
|---|
| 8 | use DateTime; |
|---|
| 9 | use DateTime::Format::Strptime; |
|---|
| 10 | use YAML; |
|---|
| 11 | use URI; |
|---|
| 12 | use URI::QueryParam; |
|---|
| 13 | use lib "$FindBin::Bin/../lib"; |
|---|
| 14 | use JWatch::ConfigLoader; |
|---|
| 15 | use JWatch::Schema; |
|---|
| 16 | |
|---|
| 17 | my $config = JWatch::ConfigLoader->load; |
|---|
| 18 | my $schema = JWatch::Schema->connect( |
|---|
| 19 | @{$config->{'Model::DBIC'}->{connect_info}} |
|---|
| 20 | ); |
|---|
| 21 | my $clubs = $schema->resultset('Club'); |
|---|
| 22 | my $games = $schema->resultset('Game'); |
|---|
| 23 | my $events = $schema->resultset('Event'); |
|---|
| 24 | |
|---|
| 25 | my $gamecount = $games->search_literal( |
|---|
| 26 | 'now() > kickoff and '. |
|---|
| 27 | 'now() < date_add(kickoff, interval 3 hour)' |
|---|
| 28 | ); |
|---|
| 29 | $gamecount > 0 or exit 0; |
|---|
| 30 | |
|---|
| 31 | my $baseurl = 'http://www.jsgoal.jp'; |
|---|
| 32 | my $strp = DateTime::Format::Strptime->new( |
|---|
| 33 | pattern => '%Y年%m月%d日(%a) %H:%Mキックオフ', |
|---|
| 34 | time_zone => 'Asia/Tokyo', |
|---|
| 35 | locale => 'ja', |
|---|
| 36 | ); |
|---|
| 37 | my $scraper = scraper { |
|---|
| 38 | process '//table[@id="scheduletable"]/tr', 'game[]' => \&schedule_row; |
|---|
| 39 | result 'game'; |
|---|
| 40 | }; |
|---|
| 41 | |
|---|
| 42 | for my $uri (qw( |
|---|
| 43 | /schedule/2008/j1.html |
|---|
| 44 | /schedule/2008/j2.html |
|---|
| 45 | )) { |
|---|
| 46 | for my $game (@{$scraper->scrape(URI->new_abs($uri, $baseurl))}) { |
|---|
| 47 | my $found = $games->find({id => $game->{id}}); |
|---|
| 48 | if ($found) { |
|---|
| 49 | if (! defined $found->score_home && defined $game->{score_home}) { |
|---|
| 50 | $events->create({ |
|---|
| 51 | gameid => $game->{id}, |
|---|
| 52 | type => 'end', |
|---|
| 53 | datetime => DateTime->now(time_zone => 'Asia/Tokyo'), |
|---|
| 54 | }); |
|---|
| 55 | } |
|---|
| 56 | $found->update($game); |
|---|
| 57 | } else { |
|---|
| 58 | $games->create($game); |
|---|
| 59 | } |
|---|
| 60 | } |
|---|
| 61 | } |
|---|
| 62 | |
|---|
| 63 | my $gameno; |
|---|
| 64 | sub schedule_row { |
|---|
| 65 | my $row = shift; |
|---|
| 66 | if (my $game = $row->look_down(_tag => 'td', class => 'term')) { |
|---|
| 67 | ($gameno) = $game->as_text =~ m{第(\d+)節}; |
|---|
| 68 | return; |
|---|
| 69 | } |
|---|
| 70 | $row->look_down(_tag => 'td', class => 'exp') and return; |
|---|
| 71 | my @cell = $row->look_down(_tag => 'td'); |
|---|
| 72 | my $gameurl = $cell[2]->look_down(_tag => 'a') or return; |
|---|
| 73 | $gameurl = $gameurl->attr('href') or return; |
|---|
| 74 | my ($gameid, $category) = $gameurl =~ m{^/game/\d{4}/(\d{4}(\d{2})\d+).html$}; |
|---|
| 75 | my ($score_home, $score_away) = $cell[2]->as_text =~ m{(\d+)-(\d+)}; |
|---|
| 76 | my $kickoff = scraper { |
|---|
| 77 | process 'div.time', 'kickoff' => 'TEXT'; |
|---|
| 78 | result 'kickoff'; |
|---|
| 79 | }->scrape(URI->new_abs($gameurl, $baseurl)); |
|---|
| 80 | $kickoff = $strp->parse_datetime($kickoff); |
|---|
| 81 | my $clubid_home = $clubs->find({shortname => $cell[1]->as_text})->id; |
|---|
| 82 | my $clubid_away = $clubs->find({shortname => $cell[3]->as_text})->id; |
|---|
| 83 | my $stadiumlink = $cell[4]->look_down(_tag => 'a'); |
|---|
| 84 | my $stadiumid = URI->new($stadiumlink->attr('href'))->query_param('s') if $stadiumlink; |
|---|
| 85 | return { |
|---|
| 86 | id => $gameid || '', |
|---|
| 87 | category => $category, |
|---|
| 88 | gameno => $gameno, |
|---|
| 89 | kickoff => $kickoff || '', |
|---|
| 90 | clubid_home => $clubid_home, |
|---|
| 91 | clubid_away => $clubid_away, |
|---|
| 92 | stadiumid => $stadiumid || 0, |
|---|
| 93 | score_home => defined $score_home ? $score_home : undef, |
|---|
| 94 | score_away => defined $score_away ? $score_away : undef, |
|---|
| 95 | }; |
|---|
| 96 | } |
|---|