root/lang/perl/Encode/trunk/bin/ucmlint @ 1788

Revision 1788, 4.9 kB (checked in by miyagawa, 6 years ago)

make ucmlint executable

  • Property svn:executable set to *
Line 
1#!/usr/local/bin/perl
2#
3# $Id: ucmlint,v 2.1 2006/05/03 18:24:10 dankogai Exp $
4#
5
6use strict;
7our  $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
8
9use Getopt::Std;
10our %Opt;
11getopts("Dehfv", \%Opt);
12
13if ($Opt{e}){
14   eval{ require Encode; };
15   $@ and die "can't load Encode : $@";
16}
17
18$Opt{h} and help();
19@ARGV or help();
20
21sub help{
22    print <<"";
23$0 -[Dehfv] [ucm files ...]
24  -D debug mode on
25  -e test with Encode module also (requires perl 5.7.3 or higher)
26  -h shows this message
27  -f forces roundtrip check even for |[123]
28  -v verbose mode
29
30}
31
32$| = 1;
33my (%Hdr, %U2E, %E2U);
34my $in_charmap = 0;
35my $nerror = 0;
36my $nwarning = 0;
37
38sub nit($;$){
39    my ($msg, $level) = @_;
40    my $lstr;
41    if ($level == 2){
42    $lstr = 'notice';
43    }elsif ($level == 1){
44    $lstr = 'warning'; $nwarning++;
45    }else{
46    $lstr = 'error'; $nerror++;
47    }
48    print "$ARGV:$lstr in line $.: $msg\n";
49}
50
51for $ARGV (@ARGV){
52    open UCM, $ARGV or die "$ARGV:$!";
53    %Hdr = %U2E = %E2U = ();
54    $in_charmap = $nerror = $nwarning = 0;
55    $. = 0;
56    while(<UCM>){
57    chomp;
58    s/\s*#.*$//o; /^$/ and next;
59    if ($_ eq "CHARMAP"){
60        $in_charmap = 1;
61        for my $must (qw/code_set_name mb_cur_min mb_cur_max/){
62        exists $Hdr{$must} or nit "<$must> nonexistent";
63        }
64        $Hdr{mb_cur_min} > $Hdr{mb_cur_max}
65        and nit sprintf("mb_cur_min(%d) > mb_cur_max(%d)",
66                $Hdr{mb_cur_min},$Hdr{mb_cur_max});
67        $in_charmap = 1;
68        next;
69    }
70    unless ($in_charmap){
71        my($hkey, $hvalue) = /^<(\S+)>\s+[\"\']?([^\"\']+)/o or next;
72        $Opt{D} and warn "$hkey => $hvalue";
73        if ($hkey eq "code_set_name"){ # name check
74        exists $Hdr{code_set_name}
75        and nit "Duplicate <code_set_name>: $hkey";
76        }
77        if ($hkey eq "code_set_alias"){ # alias check
78        $hvalue eq $Hdr{code_set_name}
79        and nit qq(alias "$hvalue" is already in <code_set_name>);
80        }
81        $Hdr{$hkey} = $hvalue;
82    }else{
83        my $name = $Hdr{code_set_name};
84        my($unistr, $encstr, $fb) = /^(\S+)\s+(\S+)\s(\S+)/o or next;
85        $Opt{v} and nit $_, 2;
86        my $uni = uniparse($unistr);
87        my $enc = encparse($encstr);
88        $fb =~ /^\|([0123])$/ or nit "malformed fallback: $fb";
89        $fb = $1;
90        $Opt{f} and $fb = 0;
91        unless ($fb == 1){ # check uni -> enc
92        if (exists $U2E{$uni}){
93            nit "dupe encode map: U$uni => $U2E{$uni} and $enc", 1;
94        }else{
95            $U2E{$uni} = $enc;
96            if ($Opt{e} and $fb != 3) {
97            my $e = hex2enc($enc);
98            my $u = hex2uni($uni);
99            my $eu = Encode::encode($name, $u);
100            $e eq $eu
101                or nit qq(encode('$name', $uni) != $enc);
102            }
103        }
104        }
105        unless ($fb == 3){  # check enc -> uni
106        if (exists $E2U{$enc}){
107            nit "dupe decode map: $enc => U$E2U{$enc} and U$uni", 1;
108        }else{
109            $E2U{$enc} = $uni;
110            if ($Opt{e} and $fb != 1) {
111            my $e = hex2enc($enc);
112            my $u = hex2uni($uni);
113            $Opt{D} and warn "$uni, $enc";
114            my $de = Encode::decode($name, $e);
115            $de eq $u
116                or nit qq(decode('$name', $enc) != $uni);
117            }
118        }
119        }
120        # warn "$uni, $enc, $fb";
121    }
122    }
123    $in_charmap or nit "Where is CHARMAP?";
124    checkRT();
125    printf ("$ARGV: %s error%s found\n",
126        ($nerror == 0 ? 'no' : $nerror),
127        ($nerror > 1 ? 's' : ''));
128}
129
130exit;
131
132sub hex2enc{
133    pack("C*", map {hex($_)} split(",", shift));
134}
135sub hex2uni{
136    join("", map { chr(hex($_)) } split(",", shift));
137}
138
139sub checkRT{
140    for my $uni (keys %E2U){
141    my $enc = $U2E{$uni} or next; # okay
142    $E2U{$U2E{$uni}} eq $uni or
143        nit "RT failure: U$uni => $enc =>U$E2U{$U2E{$uni}}";
144    }
145    for my $enc (keys %E2U){
146    my $uni =  $E2U{$enc} or next; # okay
147    $U2E{$E2U{$enc}} eq $enc or
148        nit "RT failure: $enc => U$uni => $U2E{$E2U{$enc}}";
149    }
150}
151
152
153sub uniparse{
154    my $str = shift;
155    my @u;
156    push @u, $1 while($str =~ /\G<U(.*?)>/ig);
157    for my $u (@u){
158    $u =~ /^([0-9A-Za-z]+)$/o
159        or nit "malformed Unicode character: $u";
160    }
161    return join(',', @u);
162}
163
164sub encparse{
165    my $str = shift;
166    my @e;
167    for my $e (split /\\x/io, $str){
168    $e or next; # first \x
169    $e =~ /^([0-9A-Za-z]{1,2})$/io
170        or nit "Hex $e in $str is bogus";
171    push @e, $1;
172    }
173    return join(',', @e);
174}
175
176
177
178__END__
179
180A UCM file looks like this.
181
182  #
183  # Comments
184  #
185  <code_set_name> "US-ascii" # Required
186  <code_set_alias> "ascii"   # Optional
187  <mb_cur_min> 1             # Required; usually 1
188  <mb_cur_max> 1             # Max. # of bytes/char
189  <subchar> \x3F             # Substitution char
190  #
191  CHARMAP
192  <U0000> \x00 |0 # <control>
193  <U0001> \x01 |0 # <control>
194  <U0002> \x02 |0 # <control>
195  ....
196  <U007C> \x7C |0 # VERTICAL LINE
197  <U007D> \x7D |0 # RIGHT CURLY BRACKET
198  <U007E> \x7E |0 # TILDE
199  <U007F> \x7F |0 # <control>
200  END CHARMAP
201
Note: See TracBrowser for help on using the browser.