| 1 | /*
|
|---|
| 2 |
|
|---|
| 3 | NTT�^�E���y�[�W / ZENRIN ZMap �d�q�f�[�^�����v���O����
|
|---|
| 4 |
|
|---|
| 5 | */
|
|---|
| 6 |
|
|---|
| 7 | /**
|
|---|
| 8 |
|
|---|
| 9 | Copyright (c) 2007 Satoshi Ueyama, Yuki Akiyama
|
|---|
| 10 |
|
|---|
| 11 | Permission is hereby granted, free of charge, to any person obtaining a copy
|
|---|
| 12 | of this software and associated documentation files (the "Software"), to deal
|
|---|
| 13 | in the Software without restriction, including without limitation the rights
|
|---|
| 14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|---|
| 15 | copies of the Software, and to permit persons to whom the Software is
|
|---|
| 16 | furnished to do so, subject to the following conditions:
|
|---|
| 17 |
|
|---|
| 18 | The above copyright notice and this permission notice shall be included in
|
|---|
| 19 | all copies or substantial portions of the Software.
|
|---|
| 20 |
|
|---|
| 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|---|
| 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|---|
| 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|---|
| 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|---|
| 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|---|
| 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|---|
| 27 | THE SOFTWARE.
|
|---|
| 28 |
|
|---|
| 29 | **/
|
|---|
| 30 |
|
|---|
| 31 | #include "stdafx.h"
|
|---|
| 32 | #include "Directory.h"
|
|---|
| 33 | #include "KeywordDic.h"
|
|---|
| 34 | #include "CharFilter.h"
|
|---|
| 35 | #include "compare.h"
|
|---|
| 36 | #include "LocalWordsExtractor.h"
|
|---|
| 37 |
|
|---|
| 38 | #define OPTION_NO_LOCAL 1
|
|---|
| 39 | #define OPTION_LOCALNG_3 2
|
|---|
| 40 | #define OPTION_SETTH 3
|
|---|
| 41 |
|
|---|
| 42 | struct NGTh gNGThOption;
|
|---|
| 43 |
|
|---|
| 44 | KeywordDic* loadKeywords();
|
|---|
| 45 |
|
|---|
| 46 | int KWmain(int argc, _TCHAR* argv[])
|
|---|
| 47 | {
|
|---|
| 48 | TCHAR* infile = NULL;
|
|---|
| 49 | if (argc < 2)
|
|---|
| 50 | {
|
|---|
| 51 | _tprintf(_T("specify [infile]"));
|
|---|
| 52 | return -1;
|
|---|
| 53 | }
|
|---|
| 54 | infile = argv[1];
|
|---|
| 55 |
|
|---|
| 56 | _tcprintf(_T("�����L�[���[�h��ݍ�����܂�...\n"));
|
|---|
| 57 | KeywordDic* kdic = loadKeywords();
|
|---|
| 58 | CharFilter* cfilter = new CharFilter();
|
|---|
| 59 |
|
|---|
| 60 | IReaderNameFilter* filters[2] = {kdic, cfilter};
|
|---|
| 61 |
|
|---|
| 62 | Directory* dir = Directory::fromFile(infile, 48000, filters, 2);
|
|---|
| 63 |
|
|---|
| 64 |
|
|---|
| 65 | for (int i = 0;i < dir->getCount();i++)
|
|---|
| 66 | {
|
|---|
| 67 | Site* s = dir->getAt(i);
|
|---|
| 68 |
|
|---|
| 69 | _tprintf(_T("%s, %s\n"), s->Org_KanaName, s->Filt_KanaName);
|
|---|
| 70 | }
|
|---|
| 71 |
|
|---|
| 72 | delete dir;
|
|---|
| 73 |
|
|---|
| 74 | return 0;
|
|---|
| 75 | }
|
|---|
| 76 |
|
|---|
| 77 |
|
|---|
| 78 | int identifyOption(const _TCHAR* arg)
|
|---|
| 79 | {
|
|---|
| 80 | if ( _tcscmp(arg, _T("-n")) == 0 || _tcscmp(arg, _T("-N")) == 0)
|
|---|
| 81 | return OPTION_NO_LOCAL;
|
|---|
| 82 |
|
|---|
| 83 | if ( _tcscmp(arg, _T("-3")) == 0)
|
|---|
| 84 | return OPTION_LOCALNG_3;
|
|---|
| 85 |
|
|---|
| 86 | if (arg[0] == _T('-') &&
|
|---|
| 87 | (arg[1] == _T('t') || arg[1] == _T('T')))
|
|---|
| 88 | {
|
|---|
| 89 | _stscanf(arg+2, _T("%f,%f,%f,%f,%f"), &gNGThOption.generic, &gNGThOption.lv0, &gNGThOption.lv3, &gNGThOption.mig, &gNGThOption.redev);
|
|---|
| 90 |
|
|---|
| 91 | return OPTION_SETTH;
|
|---|
| 92 | }
|
|---|
| 93 |
|
|---|
| 94 | return 0;
|
|---|
| 95 | }
|
|---|
| 96 |
|
|---|
| 97 | int _tmain(int argc, _TCHAR* argv[])
|
|---|
| 98 | {
|
|---|
| 99 | _tsetlocale(LC_ALL, _T(""));
|
|---|
| 100 | TCHAR* infile_old = NULL;
|
|---|
| 101 | TCHAR* infile_new = NULL;
|
|---|
| 102 | bool use_local_kw = true;
|
|---|
| 103 | int lw_mode = LWMODE_N4;
|
|---|
| 104 | int i;
|
|---|
| 105 |
|
|---|
| 106 | //////////////////////////////////
|
|---|
| 107 | //return KWmain(argc, argv);
|
|---|
| 108 |
|
|---|
| 109 | gNGThOption.lv0 = gNGThOption.lv3 = gNGThOption.mig = gNGThOption.redev = 0;
|
|---|
| 110 |
|
|---|
| 111 | if (argc < 3)
|
|---|
| 112 | {
|
|---|
| 113 | _tprintf(_T("specify [infile_old] [infile_new]"));
|
|---|
| 114 | return -1;
|
|---|
| 115 | }
|
|---|
| 116 | else
|
|---|
| 117 | {
|
|---|
| 118 | /*
|
|---|
| 119 | if (argc == 4)
|
|---|
| 120 | {
|
|---|
| 121 | use_local_kw = false;
|
|---|
| 122 | infile_old = argv[2];
|
|---|
| 123 | infile_new = argv[3];
|
|---|
| 124 | _tcprintf(_T("�I�v�V����: ���[�J���p�o������\n"));
|
|---|
| 125 | }
|
|---|
| 126 | else
|
|---|
| 127 | {
|
|---|
| 128 | infile_old = argv[1];
|
|---|
| 129 | infile_new = argv[2];
|
|---|
| 130 | }*/
|
|---|
| 131 | int n_options = argc - 3;
|
|---|
| 132 |
|
|---|
| 133 | infile_old = argv[1 + n_options];
|
|---|
| 134 | infile_new = argv[2 + n_options];
|
|---|
| 135 |
|
|---|
| 136 | for (i = 0;i < n_options;i++)
|
|---|
| 137 | {
|
|---|
| 138 | switch( identifyOption(argv[1+i]) )
|
|---|
| 139 | {
|
|---|
| 140 | case OPTION_NO_LOCAL:
|
|---|
| 141 | _tcprintf(_T("�I�v�V����: ���[�J���p�o������\n"));
|
|---|
| 142 | use_local_kw = false;
|
|---|
| 143 | break;
|
|---|
| 144 | case OPTION_LOCALNG_3:
|
|---|
| 145 | _tcprintf(_T("�I�v�V����: ���[�J���p�o�����o��3-Gram��p���܂�\n"));
|
|---|
| 146 | lw_mode = LWMODE_N3;
|
|---|
| 147 | break;
|
|---|
| 148 | case OPTION_SETTH:
|
|---|
| 149 | _tcprintf(_T("�I�v�V����: NGram����l��X���܂�\n") );
|
|---|
| 150 |
|
|---|
| 151 | break;
|
|---|
| 152 | }
|
|---|
| 153 | }
|
|---|
| 154 | }
|
|---|
| 155 | _tcprintf(_T("NGram����l ��:%f Lv0:%f Lv3:%f �ړ��ĊJ��:%f \n"), gNGThOption.generic, gNGThOption.lv0, gNGThOption.lv3, gNGThOption.mig, gNGThOption.redev);
|
|---|
| 156 |
|
|---|
| 157 |
|
|---|
| 158 | _tcprintf(_T("�����L�[���[�h��ݍ�����܂�...\n"));
|
|---|
| 159 | KeywordDic* kdic = loadKeywords();
|
|---|
| 160 | CharFilter* cfilter = new CharFilter();
|
|---|
| 161 |
|
|---|
| 162 | IReaderNameFilter* filters[2] = {kdic, cfilter};
|
|---|
| 163 |
|
|---|
| 164 | Directory* d_old = Directory::fromFile(infile_old, 45000, filters, 2);
|
|---|
| 165 | Directory* d_new = Directory::fromFile(infile_new, 45000, filters, 2);
|
|---|
| 166 |
|
|---|
| 167 | GridMap* gm_new = d_new->getGridMap();
|
|---|
| 168 | GridMap* gm_old = d_old->getGridMap();
|
|---|
| 169 | /*
|
|---|
| 170 | GMCellIterator* it = gm_new->cellIterator();
|
|---|
| 171 | for(;it->hasNext();)
|
|---|
| 172 | {
|
|---|
| 173 | long gk = it->getKey();
|
|---|
| 174 | _tcprintf(_T("%d,%d "), GridMap::getCXfromGMKey(gk), GridMap::getCYfromGMKey(gk));
|
|---|
| 175 | it->moveNext();
|
|---|
| 176 | }
|
|---|
| 177 | delete it;*/
|
|---|
| 178 |
|
|---|
| 179 |
|
|---|
| 180 |
|
|---|
| 181 | if (use_local_kw)
|
|---|
| 182 | {
|
|---|
| 183 | _tcprintf(_T("���[�J���p�o�����o���Ă��܂�\n"));
|
|---|
| 184 | LocalWordsExtractor lwe(lw_mode);
|
|---|
| 185 | lwe.doExtract(gm_new, gm_old);
|
|---|
| 186 | }
|
|---|
| 187 | else
|
|---|
| 188 | {
|
|---|
| 189 | }
|
|---|
| 190 |
|
|---|
| 191 | compareDirectories(d_old, d_new, gNGThOption);
|
|---|
| 192 |
|
|---|
| 193 | delete d_old;
|
|---|
| 194 | delete d_new;
|
|---|
| 195 |
|
|---|
| 196 | return 0;
|
|---|
| 197 | }
|
|---|
| 198 |
|
|---|
| 199 |
|
|---|
| 200 |
|
|---|
| 201 | KeywordDic* loadKeywords()
|
|---|
| 202 | {
|
|---|
| 203 | KeywordDic* kdic = new KeywordDic();
|
|---|
| 204 | kdic->load(_T("tokyo1.kwd.csv"));
|
|---|
| 205 | kdic->load(_T("station2.kwd.csv"));
|
|---|
| 206 | kdic->load(_T("other.kwd.csv"));
|
|---|
| 207 | kdic->load(_T("dpt.kwd.csv"));
|
|---|
| 208 | return kdic;
|
|---|
| 209 | } |
|---|