root/websites/jwatch.jp/trunk/misc/load_schedule_from_jsgoal.pl @ 11332

Revision 11332, 3.0 kB (checked in by lopnor, 5 years ago)

websites/jwatch.jp: little more htmls.

Line 
1#!/usr/bin/perl
2use strict;
3use warnings;
4use utf8;
5
6use FindBin;
7use Web::Scraper;
8use DateTime;
9use DateTime::Format::Strptime;
10use YAML;
11use URI;
12use URI::QueryParam;
13use lib "$FindBin::Bin/../lib";
14use JWatch::ConfigLoader;
15use JWatch::Schema;
16
17my $config = JWatch::ConfigLoader->load;
18my $schema = JWatch::Schema->connect(
19    @{$config->{'Model::DBIC'}->{connect_info}}
20);
21my $clubs = $schema->resultset('Club');
22my $games = $schema->resultset('Game');
23my $events = $schema->resultset('Event');
24
25my $gamecount = $games->search_literal(
26    'now() > kickoff and '.
27    'now() < date_add(kickoff, interval 3 hour)'
28);
29$gamecount > 0 or exit 0;
30
31my $baseurl = 'http://www.jsgoal.jp';
32my $strp = DateTime::Format::Strptime->new(
33    pattern => '%Y年%m月%d日(%a) %H:%Mキックオフ',
34    time_zone => 'Asia/Tokyo',
35    locale => 'ja',
36);
37my $scraper = scraper {
38    process '//table[@id="scheduletable"]/tr', 'game[]' => \&schedule_row;
39    result 'game';
40};
41
42for my $uri (qw(
43    /schedule/2008/j1.html
44    /schedule/2008/j2.html
45)) {
46    for my $game (@{$scraper->scrape(URI->new_abs($uri, $baseurl))}) {
47        my $found = $games->find({id => $game->{id}});
48        if ($found) {
49            if (! defined $found->score_home && defined $game->{score_home}) {
50                $events->create({
51                        gameid => $game->{id},
52                        type => 'end',
53                        datetime => DateTime->now(time_zone => 'Asia/Tokyo'),
54                    });
55            }
56            $found->update($game);
57        } else {
58            $games->create($game);
59        }
60    }
61}
62
63my $gameno;
64sub schedule_row {
65    my $row = shift;
66    if (my $game = $row->look_down(_tag => 'td', class => 'term')) {
67        ($gameno) = $game->as_text =~ m{第(\d+)節};
68        return;
69    }
70    $row->look_down(_tag => 'td', class => 'exp') and return;
71    my @cell = $row->look_down(_tag => 'td');
72    my $gameurl = $cell[2]->look_down(_tag => 'a') or return;
73    $gameurl = $gameurl->attr('href') or return;
74    my ($gameid, $category) = $gameurl =~ m{^/game/\d{4}/(\d{4}(\d{2})\d+).html$};
75    my ($score_home, $score_away) = $cell[2]->as_text =~ m{(\d+)-(\d+)};
76    my $kickoff = scraper {
77        process 'div.time', 'kickoff' => 'TEXT';
78        result 'kickoff';
79    }->scrape(URI->new_abs($gameurl, $baseurl));
80    $kickoff = $strp->parse_datetime($kickoff);
81    my $clubid_home = $clubs->find({shortname => $cell[1]->as_text})->id;
82    my $clubid_away = $clubs->find({shortname => $cell[3]->as_text})->id;
83    my $stadiumlink = $cell[4]->look_down(_tag => 'a');
84    my $stadiumid = URI->new($stadiumlink->attr('href'))->query_param('s') if $stadiumlink;
85    return {
86        id => $gameid || '',
87        category => $category,
88        gameno => $gameno,
89        kickoff => $kickoff || '',
90        clubid_home => $clubid_home,
91        clubid_away => $clubid_away,
92        stadiumid => $stadiumid || 0,
93        score_home => defined $score_home ? $score_home : undef,
94        score_away => defined $score_away ? $score_away : undef,
95    };
96}
Note: See TracBrowser for help on using the browser.