| 1 | #!/usr/bin/perl |
|---|
| 2 | use strict; |
|---|
| 3 | use warnings; |
|---|
| 4 | use utf8; |
|---|
| 5 | use FindBin; |
|---|
| 6 | use YAML; |
|---|
| 7 | use URI; |
|---|
| 8 | use DateTime; |
|---|
| 9 | use DateTime::Format::MySQL; |
|---|
| 10 | use DateTime::Format::Strptime; |
|---|
| 11 | use Web::Scraper; |
|---|
| 12 | use lib "$FindBin::Bin/../lib"; |
|---|
| 13 | use JWatch::ConfigLoader; |
|---|
| 14 | use JWatch::Schema; |
|---|
| 15 | |
|---|
| 16 | my $config = JWatch::ConfigLoader->load; |
|---|
| 17 | my $schema = JWatch::Schema->connect( |
|---|
| 18 | @{$config->{'Model::DBIC'}->{connect_info}} |
|---|
| 19 | ); |
|---|
| 20 | my $games = $schema->resultset('Game'); |
|---|
| 21 | my $events = $schema->resultset('Event'); |
|---|
| 22 | my $game_players = $schema->resultset('GamePlayer'); |
|---|
| 23 | my $players = $schema->resultset('Player'); |
|---|
| 24 | my $game_officials = $schema->resultset('GameOfficial'); |
|---|
| 25 | my $officials = $schema->resultset('Official'); |
|---|
| 26 | |
|---|
| 27 | my $scraper = scraper { |
|---|
| 28 | process '//tr[td[4]="先発"]/parent::table/tr', 'players[]' => \&get_players; |
|---|
| 29 | process '//tr[td[1]="試合経過"]/following-sibling::tr', 'events[]' => \&get_events; |
|---|
| 30 | process '//tr[td[1]="主審"]/parent::table/tr', 'officials[]' => \&get_officials; |
|---|
| 31 | }; |
|---|
| 32 | my ($game,$gameid,$kickoff,$type,$playerhash); |
|---|
| 33 | |
|---|
| 34 | for ($games->search_literal('now() > date_sub(kickoff, interval 3 hour) and now() < date_add(kickoff, interval 3 hour)')) { |
|---|
| 35 | $game = $_; |
|---|
| 36 | $gameid = $game->id; |
|---|
| 37 | (my $gameid_short = $gameid) =~ s{\d{8}$}{}; |
|---|
| 38 | $kickoff = $game->kickoff; |
|---|
| 39 | my $uri = URI->new( |
|---|
| 40 | 'http://www.jsgoal.jp/score/board/'. |
|---|
| 41 | join('/', $kickoff->year, $kickoff->strftime("%m%d"), $gameid_short). |
|---|
| 42 | '_stats.html' |
|---|
| 43 | ); |
|---|
| 44 | my $res = eval {$scraper->scrape($uri)}; |
|---|
| 45 | $@ and warn $@ and next; |
|---|
| 46 | unless ($events->find({ gameid => $gameid, type => 'member' })) { |
|---|
| 47 | for (@{$res->{players}}) { |
|---|
| 48 | $game_players->find_or_create($_); |
|---|
| 49 | } |
|---|
| 50 | for (@{$res->{officials}}) { |
|---|
| 51 | $game_officials->find_or_create($_); |
|---|
| 52 | } |
|---|
| 53 | if ($#{$res->{officials}} > 0 && $#{$res->{players}} > 0 ) { |
|---|
| 54 | $events->find_or_create({ |
|---|
| 55 | gameid => $gameid, |
|---|
| 56 | type => 'member', |
|---|
| 57 | datetime => DateTime->now(time_zone => 'local'), |
|---|
| 58 | }); |
|---|
| 59 | } |
|---|
| 60 | } |
|---|
| 61 | for (@{$res->{events}}) { |
|---|
| 62 | $events->find($_) and next; |
|---|
| 63 | $_->{datetime} = DateTime->now(time_zone => 'local'); |
|---|
| 64 | $events->create($_); |
|---|
| 65 | } |
|---|
| 66 | } |
|---|
| 67 | |
|---|
| 68 | sub get_players { |
|---|
| 69 | my @cell = $_->look_down(_tag => 'td'); |
|---|
| 70 | if ($#cell == 6) { |
|---|
| 71 | $type = $cell[3]->as_text eq '先発' ? 'starter' : 'reserve'; |
|---|
| 72 | } elsif ($#cell == 2) { |
|---|
| 73 | return; |
|---|
| 74 | } |
|---|
| 75 | $cell[0]->as_text eq 'Pos' and return; |
|---|
| 76 | $playerhash->{$game->clubid_home}->{$cell[2]->as_text} = $cell[1]->as_text; |
|---|
| 77 | $playerhash->{$game->clubid_away}->{$cell[-1]->as_text} = $cell[-2]->as_text; |
|---|
| 78 | return ( |
|---|
| 79 | { |
|---|
| 80 | gameid => $gameid, |
|---|
| 81 | clubid => $game->clubid_home, |
|---|
| 82 | position => $cell[0]->as_text, |
|---|
| 83 | number => $cell[1]->as_text, |
|---|
| 84 | type => $type, |
|---|
| 85 | }, |
|---|
| 86 | { |
|---|
| 87 | gameid => $gameid, |
|---|
| 88 | clubid => $game->clubid_away, |
|---|
| 89 | position => $cell[-3]->as_text, |
|---|
| 90 | number => $cell[-2]->as_text, |
|---|
| 91 | type => $type, |
|---|
| 92 | } |
|---|
| 93 | ); |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | sub get_events { |
|---|
| 97 | my @cell = $_->look_down(_tag => 'td'); |
|---|
| 98 | my ($gametime, $clubid); |
|---|
| 99 | if ($cell[1]->as_text =~ /^(\D+)(\d+)?/) { |
|---|
| 100 | if ($1 eq '前半') { |
|---|
| 101 | $gametime = $2; |
|---|
| 102 | } elsif ($1 eq '後半') { |
|---|
| 103 | $gametime = $2 + 45; |
|---|
| 104 | } elsif ($1 eq 'ハーフタイム') { |
|---|
| 105 | $gametime = 45; |
|---|
| 106 | } elsif ($1 eq '試合終了') { |
|---|
| 107 | $gametime = 90; |
|---|
| 108 | } |
|---|
| 109 | } |
|---|
| 110 | my $needcell; |
|---|
| 111 | if (length($cell[0]->as_trimmed_text) > 1) { |
|---|
| 112 | $clubid = $game->clubid_home; |
|---|
| 113 | $needcell = 0; |
|---|
| 114 | } else { |
|---|
| 115 | $clubid = $game->clubid_away; |
|---|
| 116 | $needcell = 2; |
|---|
| 117 | } |
|---|
| 118 | my %event = &process_event($cell[$needcell], $clubid); |
|---|
| 119 | %event = (%event, |
|---|
| 120 | gameid => $gameid, |
|---|
| 121 | gametime => $gametime, |
|---|
| 122 | clubid => $clubid, |
|---|
| 123 | ); |
|---|
| 124 | |
|---|
| 125 | return \%event; |
|---|
| 126 | } |
|---|
| 127 | |
|---|
| 128 | sub process_event { |
|---|
| 129 | my ($elem,$clubid) = @_; |
|---|
| 130 | my ($player1, $player2, $description); |
|---|
| 131 | my $img = $elem->look_down(_tag => 'img') or return (); |
|---|
| 132 | my ($eventtype) = $img->attr('src') =~ /_(.+)\./; |
|---|
| 133 | ($player1 = $elem->as_trimmed_text) =~ s{^\s+|\s+$}{}g; |
|---|
| 134 | if ($eventtype eq 'change') { |
|---|
| 135 | ($player1, $player2) = $player1 =~ /(.+)\s→\s(.+)/; |
|---|
| 136 | $player2 = $playerhash->{$clubid}->{$player2} || ''; |
|---|
| 137 | warn "player2 not found:", $elem->as_text, ", clubid:", $clubid, ", gameid:", $gameid unless $player2; |
|---|
| 138 | } elsif ($eventtype eq 'other') { |
|---|
| 139 | $description = $elem->as_trimmed_text; |
|---|
| 140 | } |
|---|
| 141 | if ($player1 eq 'オウンゴール') { |
|---|
| 142 | $description = 'own goal'; |
|---|
| 143 | $player1 = ''; |
|---|
| 144 | } else { |
|---|
| 145 | $player1 = $playerhash->{$clubid}->{$player1} || ''; |
|---|
| 146 | warn "player1 not found:", $elem->as_text, ", clubid:", $clubid, ", gameid:", $gameid unless $player1; |
|---|
| 147 | } |
|---|
| 148 | return ( |
|---|
| 149 | player1 => $player1 || 0, |
|---|
| 150 | player2 => $player2 || 0, |
|---|
| 151 | type => $eventtype || 'other', |
|---|
| 152 | description => $description || '', |
|---|
| 153 | ); |
|---|
| 154 | } |
|---|
| 155 | |
|---|
| 156 | sub get_officials { |
|---|
| 157 | my @cell = $_->look_down(_tag => 'td'); |
|---|
| 158 | $type = ($cell[0]->as_text eq '主審') ? 'referee' : 'assistant'; |
|---|
| 159 | my $official = $officials->search_literal("replace(name, ' ','') = ?", $cell[-1]->as_text)->next or return; |
|---|
| 160 | return { |
|---|
| 161 | gameid => $gameid, |
|---|
| 162 | type => $type, |
|---|
| 163 | officialid => $official->id, |
|---|
| 164 | } |
|---|
| 165 | } |
|---|