root/lang/perl/DateTime-Lite/trunk/tools/timezone/parse_olson.pl @ 27549

Revision 27549, 14.7 kB (checked in by daisuke, 6 years ago)

LINKS needs to be initialized regardless

  • Property svn:executable set to *
Line 
1#!/usr/bin/perl -w
2
3use strict;
4
5use lib './lib';
6
7use Carp::Always;
8use Data::Dumper;
9use DateTime::Lite qw(Arithmetic);
10use DateTime::Lite::OlsonDB;
11use File::Copy;
12use File::Find::Rule;
13use File::Path;
14use File::Spec;
15use Getopt::Long;
16use YAML;
17
18$Data::Dumper::Indent = 1;
19$Data::Dumper::Sortkeys = 1;
20$Data::Dumper::Terse = 1;
21
22my $VERSION = "0.07";
23
24my $INFINITY  = 100 ** 100 ** 100;
25
26my %opts;
27GetOptions( 'dir:s'     => \$opts{dir},
28            'clean'     => \$opts{clean},
29            'version:s' => \$opts{version},
30            'old'       => \$opts{old},
31            'file:s'    => \$opts{file},
32            'name:s'    => \$opts{name},
33            'help'      => \$opts{help},
34          );
35
36
37$opts{help} = 1
38    unless defined $opts{dir} && -d $opts{dir};
39
40$opts{help} = 1
41    unless defined $opts{version} || $opts{file} || $opts{name};
42
43$opts{version} ||= 'test';
44
45if ( $opts{help} )
46{
47    print <<'EOF';
48
49This script parses the Olson time zone database files and turns them
50into a set of Perl modules.  It also generates the MANIFEST and the
51DateTime::Lite::TimeZone::Catalog module, which contains a list of all the
52available time zone names.
53
54By default, it looks for files named africa, antarctica, asia,
55australasia, europe, northamerica, pacificnew, southamerica, and
56backward.  All other files are ignored.
57
58It takes the following arguments:
59
60  --dir      A directory containing Olson db files.
61
62  --version  The version of the Olson data files being used.
63             Required unless one of the debugging options is given.
64
65  --clean    Remove old generated modules (which may not be valid with
66             the latest Olson database)
67
68  --file     Parse just the file with the given name.  For debugging.
69
70  --name     Only create the specified time zone.  For debugging.
71
72  --old      Also look for files named etcetera, factory, and systemv
73
74  --help     What you are reading
75
76If the --file or --name options are specified, the MANIFEST and
77DateTime::Lite::TimeZone::Catalog files will not be generated.
78
79EOF
80
81    exit;
82}
83
84clean() if $opts{clean};
85
86my @files;
87
88if ( $opts{file} )
89{
90    @files = $opts{file};
91}
92else
93{
94    @files = qw( africa antarctica asia australasia
95                 europe northamerica pacificnew
96                 southamerica backward
97               );
98
99    push @files, qw( etcetera factory systemv )
100        if $opts{old};
101}
102
103my $man;
104unless ( $opts{name} || $opts{file} )
105{
106    copy( 'MANIFEST.base', 'MANIFEST' );
107    open $man, ">>MANIFEST" or die "Cannot write to MANIFEST: $!";
108}
109
110my ( @zones, %categories, %links );
111
112my $autogen_warning = <<"EOF";
113# This file is auto-generated by the Perl DateTime Suite time zone
114# code generator ($VERSION) This code generator comes with the
115# DateTime::Lite::TimeZone module distribution in the tools/ directory
116EOF
117
118parse_file($_) for sort @files;
119
120exit if $opts{name};
121
122clean_links();
123
124make_catalog_pm();
125
126sub clean
127{
128    for my $f ( File::Find::Rule
129                ->file
130                ->name('*.pm')
131                ->grep('This file is auto-generated' )
132                ->in('lib'),
133                File::Find::Rule
134                ->file
135                ->name('zd*.t')
136                ->in('t')
137              )
138    {
139        unlink $f or die "Cannot unlink $f: $!";
140    }
141}
142
143sub parse_file
144{
145    my $file = File::Spec->catfile( $opts{dir}, shift );
146
147    die "No such file $file\n" unless -e $file;
148
149    print "Now parsing $file\n";
150
151    my $odb = DateTime::Lite::OlsonDB->new;
152
153    $odb->parse_file($file);
154
155    %links = ( %links, $odb->links );
156
157    foreach my $zone_name ( sort $odb->zone_names )
158    {
159        if ( $opts{name} )
160        {
161            next unless $zone_name eq $opts{name};
162        }
163        print "  creating zone $zone_name\n";
164
165        push @zones, $zone_name;
166
167        my $name;
168        my @dir;
169        if ( $zone_name =~ m{/} )
170        {
171            my $category;
172            ( $category, $name ) = split /\//, $zone_name, 2;
173            push @{ $categories{$category} }, $name;
174
175            ($dir[0] = $category) =~ tr/-/_/;
176        }
177        else
178        {
179            $name = $zone_name;
180        }
181
182        (my $outfile1 = $name) =~ tr/-/_/;
183
184        (my $mod_name = $zone_name) =~ s/\//::/g;
185        $mod_name =~ tr/-/_/;
186
187        my $max_year = (localtime)[5] + 1910;
188        my $zone = $odb->expanded_zone( name => $zone_name,
189                                        expand_to_year => $max_year,
190                                      );
191
192        my $spans = serialize_spans(zone_as_spans($zone));
193
194=head1
195        $spans =~ s/-inf/DateTime::Lite::TimeZone::NEG_INFINITY/g;
196        $spans =~ s/inf/DateTime::Lite::TimeZone::INFINITY/g;
197
198        $spans =~ s/('(?:start|end)_date'\s+=>\s+)'(\d+)'/$1$2/g;
199=cut
200
201        my %generator = zone_generator($zone);
202
203        my $has_dst_changes = grep { $_->is_dst } $zone->sorted_changes;
204
205        my $from = "Generated from $file.";
206        $from .= "  Olson data version $opts{version}"
207            if defined $opts{version};
208
209        my $body = <<"EOF";
210$autogen_warning
211#
212# $from
213#
214# Do not edit this file directly.
215#
216EOF
217        $body .= Data::Dumper::Dumper({
218            olson_version => $opts{version},
219            name => $zone_name,
220            has_dst_changes => $has_dst_changes,
221            max_year => $max_year,
222            spans => $spans,
223            %generator,
224        });
225
226        my @name_pieces = split /\//, $outfile1;
227        my $filename = (pop @name_pieces) . '.dat';
228
229        my $outdir = File::Spec->catdir( qw( lib DateTime Lite TimeZone ),
230                                         @dir, @name_pieces  );
231
232        mkpath( $outdir, 1, 0755 );
233
234        my $outfile2 = File::Spec->catfile( $outdir, $filename );
235
236        open my $fh, ">$outfile2" or die "Cannot write to $outfile2: $!";
237        print $fh $body or die "Cannot write to $outfile2: $!";
238        close $fh or die "Cannot write to $outfile2: $!";
239
240        unless ( $opts{name} || $opts{file} )
241        {
242            print $man "$outfile2\n" or die "Cannot write to MANIFEST: $!"
243        }
244    }
245}
246
247sub zone_as_spans
248{
249    my $zone = shift;
250
251    my @spans;
252
253    my @changes = $zone->sorted_changes;
254
255    for ( my $x = 1; $x < @changes; $x++ )
256    {
257        my $last_total_offset = $x > 1 ? $changes[ $x - 2 ]->total_offset : undef;
258
259        my $span =
260            DateTime::Lite::OlsonDB::Change::two_changes_as_span
261                ( @changes[ $x - 1, $x ], $last_total_offset );
262
263        push @spans, $span;
264
265        if (@spans > 2)
266        {
267            die "Gap in UTC end/start datetime for " . $zone->name
268                unless $spans[-2]{utc_end} == $spans[-1]{utc_start};
269        }
270    }
271
272    unless ( $zone->infinite_rules )
273    {
274        my $last_change = $changes[-1];
275
276        my $last_observance = $last_change->observance;
277
278        if ( $last_change->utc_start_datetime )
279        {
280            push @spans, { utc_start   => $last_change->utc_start_datetime->utc_rd_as_seconds,
281                           utc_end     => $INFINITY,
282                           local_start => $last_change->local_start_datetime->utc_rd_as_seconds,
283                           local_end   => $INFINITY,
284                           short_name  => $last_change->short_name,
285                           offset      => $last_change->total_offset,
286                       is_dst      => $last_change->is_dst,
287                         };
288        }
289        # This happens with zones that have only one rule and no real changes (Pacific/Johnston)
290        else
291        {
292            my $utc_start =
293                @spans ? $spans[-1]{utc_end} : -1 * $INFINITY;
294
295            push @spans, { utc_start   => $utc_start,
296                           utc_end     => $INFINITY,
297                           local_start => $utc_start - $last_observance->total_offset,
298                           local_end   => $INFINITY,
299                           short_name  => sprintf( $last_observance->format, '' ),
300                           offset      => $last_observance->total_offset,
301                           is_dst      => 0,
302                         };
303        }
304    }
305
306    return \@spans;
307}
308
309sub serialize_spans
310{
311    my $spans = shift;
312
313    return [ map { serialize_span($_) } @$spans ];
314=head1
315    my $string = "[\n";
316    $string .= join "\n", map { serialize_span($_) } @$spans;
317    $string .= "\n]";
318
319    return $string;
320=cut
321}
322
323sub serialize_span
324{
325    my $span = shift;
326    # must correspond to constants in DT::TZ, and short_name is always last
327    my @keys = qw( utc_start utc_end local_start local_end offset is_dst short_name );
328
329    return [ map {
330        my $v = $span->{$_};
331=head1
332print "'$v' -> ";
333        if ($v eq '-inf') {
334print "(should set to NEG_INF) ";
335            $v = &DateTime::Lite::NEG_INFINITY;
336        } elsif ($v eq 'inf') {
337print "(should set to INF) ";
338            $v = &DateTime::Lite::INFINITY;
339        }
340print "'$v'\n";
341=cut
342        $v
343    } @keys ];
344=head1
345    my $string = "    [\n";
346    $string .= join ",\n", @$span{@keys};
347    $string .= ",\n'$span->{short_name}'";
348    $string .= "\n    ],";
349
350    return $string;
351=cut
352}
353
354sub zone_generator
355{
356    my $zone = shift;
357
358    return () unless $zone->infinite_rules;
359
360    my $generator = <<'EOF';
361sub _last_offset { !OFFSET }
362
363my $last_observance = !LAST_OBSERVANCE;
364sub _last_observance { $last_observance }
365
366my $rules = !RULES;
367sub _rules { $rules }
368EOF
369
370    my $last_observance = ($zone->sorted_changes)[-1]->observance;
371
372    # hack to trim size of dumped object
373    delete $last_observance->{utc_start_datetime}{locale};
374    delete $last_observance->{local_start_datetime}{locale};
375    delete $last_observance->{utc_start_datetime}{local_c};
376    delete $last_observance->{local_start_datetime}{local_c};
377    delete $last_observance->{rules};
378    delete $last_observance->{first_rule};
379
380    # This assumes that there is only one observance from end of
381    # changes til end of time, which should be guaranteed by code in
382    # OlsonDB module.
383    my $offset = $last_observance->total_offset;
384
385    my @rules = $zone->infinite_rules;
386
387=head1
388    # This is cleaner than making the above a double-quoted string
389    $generator =~ s/!RULES/Dumper \@rules/eg;
390    $generator =~ s/!LAST_OBSERVANCE/Dumper $last_observance/eg;
391    $generator =~
392        s/\$VAR1->{'local_start_datetime'}{'tz'}/bless( {
393      'name' => 'floating',
394      'offset' => 0
395    }, 'DateTime::Lite::TimeZone::Floating' )/;
396    $generator =~
397        s/\$VAR1->{'utc_start_datetime'}{'tz'}/bless( {
398      'name' => 'floating',
399      'offset' => 0
400    }, 'DateTime::Lite::TimeZone::Floating' )/;
401    $generator =~ s/!OFFSET/$offset/g;
402
403    return $generator;
404=cut
405    return (
406        rules => \@rules,
407        last_observance => $last_observance,
408        last_offset => $offset
409    );
410
411}
412
413sub clean_links
414{
415    # override some links and add others
416    %links =
417        ( %links,
418          'Etc/GMT'       => 'UTC',
419          'Etc/GMT+0'     => 'UTC',
420          'Etc/Universal' => 'UTC',
421          'Etc/UCT'       => 'UTC',
422          'Etc/UTC'       => 'UTC',
423          'Etc/Zulu'      => 'UTC',
424          'GMT0'          => 'UTC',
425          'GMT'           => 'UTC',
426          'AKST9AKDT'     => 'America/Anchorage',
427          'JST-9'         => 'Asia/Tokyo',
428        );
429
430    delete $links{UTC};
431
432    # Some links resolve to other links - chase them down until they point
433    # to a real zone.
434    while ( my @k = grep { $links{ $links{$_} } } keys %links )
435    {
436        for my $k (@k)
437        {
438            $links{$k} = $links{ $links{$k} };
439        }
440    }
441}
442
443sub make_catalog_pm
444{
445    my $links = Dumper(\%links);
446    $links =~ s/{/(/;
447    $links =~ s/}/)/;
448
449    my $zones = join "\n", sort @zones;
450    my $cat_names = join "\n", sort keys %categories;
451    my $cat = '';
452    foreach my $c ( sort keys %categories )
453    {
454        $cat .= join("\t", $c, sort @{ $categories{$c} }) . "\n";
455    }
456
457    my %countries = parse_zone_tab();
458    # hard-code this alias per request of David Cantrell on the list.
459    $countries{UK} = $countries{GB};
460
461    my $countries = '';
462    for my $c ( sort keys %countries )
463    {
464        $countries .= join("\t", lc $c, @{ $countries{$c} }) . "\n";
465#        $countries .= qq|  '\L$c' => [ qw(\n|;
466#        # We explicitly do not sort these because the order in
467#        # zones.tab is by population.
468#        $countries .= join "\n", map { "    $_" } @{ $countries{$c} };
469#        $countries .= "\n) ],\n";
470    }
471
472    my $zonecatalog = <<"EOF";
473$autogen_warning
474#
475# Do not edit this file directly.
476
477package DateTime::Lite::TimeZone::Catalog;
478
479use strict;
480
481use vars qw( \@ALL \@CATEGORY_NAMES \%CATEGORIES \%ZONES_BY_COUNTRY \%LINKS \$LOADED_CATALOG );
482
483sub OlsonVersion { '$opts{version}' }
484
485%LINKS = $links;
486
487sub load {
488    my \$mode = '';
489    while (<DATA>) {
490        chomp;
491        next unless length($_);
492
493        if (/^__(.+)__\$/) {
494            if (\$1 eq 'ZONES') {
495                \$mode = 'zone';
496            } elsif (\$1 eq 'CATEGORY_NAMES') {
497                \$mode = 'category_names';
498            } elsif (\$1 eq 'CATEGORIES') {
499                \$mode = 'categories';
500            } elsif (\$1 eq 'BY_COUNTRY') {
501                \$mode = 'by_country';
502            } elsif (\$1 eq 'END') {
503                last;
504            } else {
505                \$mode = '';
506            }
507            next;
508        }
509
510        if (\$mode eq 'zone') {
511            push \@ALL, \$_;
512        } elsif (\$mode eq 'category_names') {
513            push \@CATEGORY_NAMES, \$_;
514        } elsif (\$mode eq 'by_country') {
515            my (\$name, \@list) = split(/\\t/, \$_);
516            \$ZONES_BY_COUNTRY{\$name} = \\\@list;
517        } elsif (\$mode eq 'categories') {
518            my (\$name, \@list) = split(/\\t/, \$_);
519            \$CATEGORIES{\$name} = \\\@list;
520        }
521    }
522    \$LOADED_CATALOG = 1;
523}
524
5251;
526
527__DATA__
528__ZONES__
529$zones
530__CATEGORY_NAMES__
531$cat_names
532__CATEGORIES__
533$cat
534__BY_COUNTRY__
535$countries
536__END__
537
538=head1 NAME
539
540DateTime::Lite::TimeZone::Catalog - Provides a list of all valid time zone names
541
542=head1 SYNOPSIS
543
544See DateTime::Lite::TimeZone for API details.
545
546=head1 DESCRIPTION
547
548This module contains an enumerated list of all known system timezones,
549so that applications can easily present a list of timezones.
550
551=head1 AVAILABLE ZONES
552
553=head2 Zones by Region
554
555EOF
556
557    for my $category ( sort keys %categories )
558    {
559        $zonecatalog .= "=head3 $category\n\n";
560
561        for my $zone ( @{ $categories{$category} } )
562        {
563            $zonecatalog .= "  $category/$zone\n";
564        }
565
566        $zonecatalog .= "\n";
567    }
568
569    open my $fh, ">lib/DateTime/Lite/TimeZone/Catalog.pm" or die $!;
570    print $fh $zonecatalog or die $!;
571    close $fh or die $!;
572}
573
574
575sub parse_zone_tab
576{
577    my $file = File::Spec->catfile( $opts{dir}, 'zone.tab' );
578
579    open my $fh, "<$file" or die "Cannot read $file: $!";
580
581    my %countries;
582    while (<$fh>)
583    {
584        next if /^\#/;
585        chomp;
586
587        my ( $cc, undef, $tz, undef ) = split /\t/, $_;
588
589        push @{ $countries{$cc} }, $tz;
590    }
591
592    return %countries;
593}
594
Note: See TracBrowser for help on using the browser.