Changeset 1572
- Timestamp:
- 11/15/07 16:03:28 (6 years ago)
- Location:
- lang/perl/Geography-AddressExtract-Japan/trunk
- Files:
-
- 4 modified
-
bin/make_maps.pl (modified) (2 diffs)
-
lib/Geography/AddressExtract/Japan.pm (modified) (2 diffs)
-
lib/Geography/AddressExtract/Japan/Regexp/Number.pm (modified) (1 diff)
-
test.pl (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
lang/perl/Geography-AddressExtract-Japan/trunk/bin/make_maps.pl
r1571 r1572 9 9 10 10 my $out; 11 my $ra_city = Regexp::Assemble->new; 12 my $ra_number = Regexp::Assemble->new; 13 my $ra_aza = Regexp::Assemble->new; 14 15 my $dash = '[-���ݤΥ�'; 16 my $number = '(?:(?:[���������Ȭ����)?[���������Ȭ�塻]+|\d+)'; 17 my $number_prefix = '[���������岼]'; 18 my $numbers = sprintf("(?:%s?%s|[a-zA-Z����)", $number_prefix, $number); 19 my $chome = sprintf("(?:%s(?:��|%s))?", $number, $dash); 20 my $ban = '��?'; 21 11 22 if (0) { 12 my $ra_city = Regexp::Assemble->new;13 14 23 my $csv = Text::CSV_PP->new({binary => 1}); 15 24 my $io = IO::File->new('./ken_all.csv', '<:encoding(shiftjis)') or die $!; … … 197 206 198 207 } 199 200 my $ra_number = Regexp::Assemble->new;201 my $ra_aza = Regexp::Assemble->new;202 203 my $dash = '[-���ݤΥ�';204 my $number = '(?:(?:[���������Ȭ����)?[���������Ȭ�塻]+|\d+)';205 my $number_prefix = '[��������]';206 my $numbers = sprintf("(?:%s?%s|[a-zA-Z����)", $number_prefix, $number);207 my $chome = sprintf("(?:%s(?:��|%s))?", $number, $dash);208 my $ban = '��?';209 208 210 209 $ra_number->add('\d+'); -
lang/perl/Geography-AddressExtract-Japan/trunk/lib/Geography/AddressExtract/Japan.pm
r1570 r1572 2 2 use strict; 3 3 use warnings; 4 use encoding "euc-jp"; 4 5 5 6 use Carp; … … 66 67 $opt{aza} = $2 if $2; 67 68 $opt{number} = $3 if $3; 68 push @{ $self->{addresses} }, Geography::AddressExtract::Japan::Address->new(%opt); 69 70 $self->normalize($', \%opt);#'); 71 72 push @{ $self->{addresses} }, Geography::AddressExtract::Japan::Address->new(%opt); 69 73 } 74 } 75 76 sub normalize { 77 my($self, $right, $opt) = @_; 78 79 if ($opt->{number} && $opt->{number} =~ /^([���������岼])/) { 80 my $prefix = $1; 81 if ($right =~ /^((?:��?)?[-���ݤΥ�?(?:(?:(?:[���������Ȭ����)?[���������Ȭ�塻]+|\d+)|[a-zA-Z����)��/) { 82 my $append = $1; 83 $opt->{aza} .= $prefix; 84 $opt->{number} =~ s/^$prefix//; 85 $opt->{number} .= $append; 86 } 87 } 88 70 89 } 71 90 72 91 sub dedupe { 73 92 my $self = shift; 93 94 return unless @{ $self->addresses }; 74 95 75 96 # sort -
lang/perl/Geography-AddressExtract-Japan/trunk/lib/Geography/AddressExtract/Japan/Regexp/Number.pm
r1571 r1572 8 8 my $re = '(?-xism:(?:(?:(?:(?:[一二三四五六七八九]?十)?[一二三四 9 9 五六七八九〇]+|\d+)(?:丁目|[-‐−のノ]))?(?:(?:[東西南北 10 左右 ]?(?:(?:[一二三四五六七八九]?十)?[一二三四五六七八九〇]+|\11 d+)|[a-zA-Za-zA-Z])(?:番地?(?:[-‐−のノ](?:[東 12 西南北左右]?(?:(?:[一二三四五六七八九]?十)?[一二三四五六七八九〇] 13 +|\d+)|[a-zA-Za-zA-Z])号?|(?:[東西南北左右]?(?: 14 (?:[一二三四五六七八九]?十)?[一二三四五六七八九〇]+|\d+)|[a- 15 zA-Za-zA-Z])号?)?|[-‐−のノ](?:[東西南北左右]?(?:( 16 ?:[一二三四五六七八九]?十)?[一二三四五六七八九〇]+|\d+)|[a-z 17 A-Za-zA-Z])号?|号)|\d+)|\d+))';10 左右上下]?(?:(?:[一二三四五六七八九]?十)?[一二三四五六七八九〇]+ 11 |\d+)|[a-zA-Za-zA-Z])(?:番地?(?:[-‐−のノ](?: 12 [東西南北左右上下]?(?:(?:[一二三四五六七八九]?十)?[一二三四五六七 13 八九〇]+|\d+)|[a-zA-Za-zA-Z])号?|(?:[東西南北左右上 14 下]?(?:(?:[一二三四五六七八九]?十)?[一二三四五六七八九〇]+|\d 15 +)|[a-zA-Za-zA-Z])号?)?|[-‐−のノ](?:[東西南北左右 16 上下]?(?:(?:[一二三四五六七八九]?十)?[一二三四五六七八九〇]+|\ 17 d+)|[a-zA-Za-zA-Z])号?|号)|\d+)|\d+))'; 18 18 $re =~ s/\n//g; 19 19 $re; -
lang/perl/Geography-AddressExtract-Japan/trunk/test.pl
r1571 r1572 50 50 ���Ź�����ϰ褫�餵������: ��ɩ�գƣʿ���� 51 51 (JR���������03-3492-1411. ��ͳ����, 142, �գƣʿ��, ��������ͳ������0��2 (��ͳ���ֱ����˪�βȱ���� 03-3718-1147 ... ������, 52 610, ��ɩ���, ��������Բ������������ (�;��������, 075-211-7161 ...52 610, ��ɩ���, ��������Բ�������������;��������, 075-211-7161 ... 53 53 www.tr.mufg.jp/ippan/tenpo/chiiki_list.html - 34k - �������� ����ڡ��� 54 54 … … 67 67 ���ƻ����λͤ�����ɽ�� 68 68 69 ��������ͳ���ְ��-�͡���'); 69 ��������ͳ���ְ��-�͡�����ͻԤ������Ա�ϲ�Ŵ�ر��Ԥǵ�0�ˤΤä� 70 ������������������Ծ�ɶɤǤ��� 71 72 ʡ���Ա�5������ԥ�33094-10�Ȥ����ԥ�3864��8�Ȥ����������������2-3-0921 73 �������2���λ͡�������������2�ζ��͡�������������2�ζ���͡�������); 70 74 71 75 #print Dumper($ex);
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)