| 1 | #include "StdAfx.h"
|
|---|
| 2 | #include <tchar.h>
|
|---|
| 3 | #include <stdio.h>
|
|---|
| 4 | #include <stdlib.h>
|
|---|
| 5 | #include "FloorExtractor.h"
|
|---|
| 6 |
|
|---|
| 7 | static void chopT(TCHAR* s)
|
|---|
| 8 | {
|
|---|
| 9 | size_t len = _tcslen(s);
|
|---|
| 10 | if (len == 0) return;
|
|---|
| 11 |
|
|---|
| 12 | len--;
|
|---|
| 13 | for(;len >= 0;len--)
|
|---|
| 14 | {
|
|---|
| 15 | if (s[len] == _T(' ') || s[len] == _T('�@'))
|
|---|
| 16 | s[len] = _T('\0');
|
|---|
| 17 | else
|
|---|
| 18 | break;
|
|---|
| 19 | }
|
|---|
| 20 | }
|
|---|
| 21 |
|
|---|
| 22 | FloorExtractor::FloorExtractor(void)
|
|---|
| 23 | {
|
|---|
| 24 | regexA = regexB = regexC = NULL;
|
|---|
| 25 | makeRegexObj();
|
|---|
| 26 | }
|
|---|
| 27 |
|
|---|
| 28 | FloorExtractor::~FloorExtractor(void)
|
|---|
| 29 | {
|
|---|
| 30 | if (regexA != NULL)
|
|---|
| 31 | onig_free(regexA);
|
|---|
| 32 | if (regexB != NULL)
|
|---|
| 33 | onig_free(regexB);
|
|---|
| 34 | if (regexC != NULL)
|
|---|
| 35 | onig_free(regexC);
|
|---|
| 36 |
|
|---|
| 37 | if (regex_fnum != NULL)
|
|---|
| 38 | onig_free(regex_fnum);
|
|---|
| 39 |
|
|---|
| 40 | if (regex_rnum != NULL)
|
|---|
| 41 | onig_free(regex_rnum);
|
|---|
| 42 | }
|
|---|
| 43 |
|
|---|
| 44 | static UChar* AlphabetBldPtn = (UChar*)_T("[a-zA-Z��-���`-�y]{2,}[a-zAC-Z��-���`�b-�y]");
|
|---|
| 45 | static UChar* BldNumberPtn = (UChar*)_T("[a-zA-Z��-���`-�y1-9�P-�X](��|�g�E)");
|
|---|
| 46 | static UChar* fnumPtn = (UChar*)_T("(B|�n��|�`�J)?[1-9][0-9]*([�KF�e]|�J�C|�G�t|$)");
|
|---|
| 47 | static UChar* roomPtn = (UChar*)_T("([1-9a-zA-Z](^|[0-9]*([���S�ԃo]?)))|�Ǘ��l��");
|
|---|
| 48 |
|
|---|
| 49 | static UChar* patternA = (UChar* )_T("((B|�a|�n��|�`�J)[1-9�P-�X]�K?)|((B|�a|�n��|�`�J)?[1-9�P-�X][0-9�O-�X]*(�K|�J�C|�e|F|�G�t)(([1-9�P-�Xa-zA-Z��-���`-�y][0-9�O-�X]*(��|�S�E|�o��|��?)|�Ǘ��l��)?)");
|
|---|
| 50 | static UChar* patternB = (UChar* )_T("((([a-zA-Z��-���`-�y]|[1-9�P-�X][0-9�O-�X]*)[�|\\-]?)?[1-9�P-�X][0-9�O-�X]*(��|�S�E|�o��|��)");
|
|---|
| 51 | static UChar* patternC = (UChar* )_T("([1-9�P-�X][0-9�O-�X]*�G�t)|(BF|�a�e|�n�K|���n��|���Q�K|����(���n��)|(�m?�`�J$))|([1-9�P-�X]?[a-zA-Z��-���`-�y](�S�E|��))|([1-9�P-�X][0-9�O-�X]*[\\-�|][a-zA-Z��-���`-�y0-9�O-�X])|(([a-zA-Z��-���`-�y][�|\\-])?[1-9�P-�X][0-9�O-�X]+)");
|
|---|
| 52 |
|
|---|
| 53 | bool FloorExtractor::makeRegexObj()
|
|---|
| 54 | {
|
|---|
| 55 | OnigErrorInfo einfo;
|
|---|
| 56 | int r;
|
|---|
| 57 |
|
|---|
| 58 | r = onig_new(®ex_fnum, fnumPtn, (UChar*)((TCHAR*)fnumPtn + _tcslen((TCHAR*)fnumPtn)),
|
|---|
| 59 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF16_LE, ONIG_SYNTAX_RUBY, &einfo);
|
|---|
| 60 | if (r != ONIG_NORMAL)
|
|---|
| 61 | return false;
|
|---|
| 62 |
|
|---|
| 63 | r = onig_new(®ex_rnum, roomPtn, (UChar*)((TCHAR*)roomPtn + _tcslen((TCHAR*)roomPtn)),
|
|---|
| 64 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF16_LE, ONIG_SYNTAX_RUBY, &einfo);
|
|---|
| 65 | if (r != ONIG_NORMAL)
|
|---|
| 66 | return false;
|
|---|
| 67 |
|
|---|
| 68 | r = onig_new(®ex_AlphaBetBld, AlphabetBldPtn, (UChar*)((TCHAR*)AlphabetBldPtn + _tcslen((TCHAR*)AlphabetBldPtn)),
|
|---|
| 69 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF16_LE, ONIG_SYNTAX_RUBY, &einfo);
|
|---|
| 70 | if (r != ONIG_NORMAL)
|
|---|
| 71 | return false;
|
|---|
| 72 |
|
|---|
| 73 | r = onig_new(®ex_BldNumber, BldNumberPtn, (UChar*)((TCHAR*)BldNumberPtn + _tcslen((TCHAR*)BldNumberPtn)),
|
|---|
| 74 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF16_LE, ONIG_SYNTAX_RUBY, &einfo);
|
|---|
| 75 | if (r != ONIG_NORMAL)
|
|---|
| 76 | return false;
|
|---|
| 77 |
|
|---|
| 78 | r = onig_new(®exA, patternA, (UChar*)((TCHAR*)patternA + _tcslen((TCHAR*)patternA)),
|
|---|
| 79 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF16_LE, ONIG_SYNTAX_RUBY, &einfo);
|
|---|
| 80 | if (r != ONIG_NORMAL)
|
|---|
| 81 | return false;
|
|---|
| 82 |
|
|---|
| 83 | r = onig_new(®exB, patternB, (UChar*)((TCHAR*)patternB + _tcslen((TCHAR*)patternB)),
|
|---|
| 84 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF16_LE, ONIG_SYNTAX_RUBY, &einfo);
|
|---|
| 85 | if (r != ONIG_NORMAL)
|
|---|
| 86 | return false;
|
|---|
| 87 |
|
|---|
| 88 | r = onig_new(®exC, patternC, (UChar*)((TCHAR*)patternC + _tcslen((TCHAR*)patternC)),
|
|---|
| 89 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF16_LE, ONIG_SYNTAX_RUBY, &einfo);
|
|---|
| 90 | if (r != ONIG_NORMAL)
|
|---|
| 91 | return false;
|
|---|
| 92 |
|
|---|
| 93 |
|
|---|
| 94 | return true;
|
|---|
| 95 | }
|
|---|
| 96 |
|
|---|
| 97 | FXRESULT* FloorExtractor::extract(const TCHAR* target)
|
|---|
| 98 | {
|
|---|
| 99 |
|
|---|
| 100 | int l = _tcslen(target);
|
|---|
| 101 |
|
|---|
| 102 | if (l == 0)
|
|---|
| 103 | return NULL;
|
|---|
| 104 |
|
|---|
| 105 | OnigRegion* rgn = onig_region_new();
|
|---|
| 106 |
|
|---|
| 107 | int o_beg, o_end, r;
|
|---|
| 108 | int b_beg, b_end;
|
|---|
| 109 |
|
|---|
| 110 | b_beg = -1;
|
|---|
| 111 | o_end = 0;
|
|---|
| 112 |
|
|---|
| 113 | // �u�g�n�s�d�k�Q�O�R���v��Q�O�R�Ǝ��Ȃ��悤�� |
|---|
| 114 | r = onig_search(regex_AlphaBetBld, (UChar*)target, (UChar*)(target+l), (UChar*)(target+o_end),
|
|---|
| 115 | (UChar*)(target+l), rgn, ONIG_OPTION_NONE);
|
|---|
| 116 | if (rgn->beg[0] != -1)
|
|---|
| 117 | {
|
|---|
| 118 | o_end = rgn->end[0]/2;
|
|---|
| 119 |
|
|---|
| 120 | // �n�C�t���������ꍇ ���u�����s�n�j�x�n�`�|�S�O�Q�v�Ȃǂ̏ꍇ�́u�`�|�S�O�Q�v���K���Ǝv���� if (o_end > 0)
|
|---|
| 121 | {
|
|---|
| 122 | TCHAR t = target[o_end];
|
|---|
| 123 | if (t == _T('�|') || t == _T('-'))
|
|---|
| 124 | o_end--;
|
|---|
| 125 |
|
|---|
| 126 | }
|
|---|
| 127 | }
|
|---|
| 128 |
|
|---|
| 129 | r = onig_search(regex_BldNumber, (UChar*)target, (UChar*)(target+l), (UChar*)(target+o_end),
|
|---|
| 130 | (UChar*)(target+l), rgn, ONIG_OPTION_NONE);
|
|---|
| 131 | if (rgn->beg[0] != -1)
|
|---|
| 132 | {
|
|---|
| 133 | b_beg = rgn->beg[0]/2;
|
|---|
| 134 | b_end = rgn->end[0]/2;
|
|---|
| 135 | }
|
|---|
| 136 |
|
|---|
| 137 | for(o_beg = -1;;)
|
|---|
| 138 | {
|
|---|
| 139 |
|
|---|
| 140 | r = onig_search(regexA, (UChar*)target, (UChar*)(target+l), (UChar*)(target+o_end),
|
|---|
| 141 | (UChar*)(target+l), rgn, ONIG_OPTION_NONE);
|
|---|
| 142 |
|
|---|
| 143 |
|
|---|
| 144 | if (rgn->beg[0] == -1)
|
|---|
| 145 | r = onig_search(regexB, (UChar*)target, (UChar*)(target+l), (UChar*)(target+o_end),
|
|---|
| 146 | (UChar*)(target+l), rgn, ONIG_OPTION_NONE);
|
|---|
| 147 | if (rgn->beg[0] == -1)
|
|---|
| 148 | r = onig_search(regexC, (UChar*)target, (UChar*)(target+l), (UChar*)(target+o_end),
|
|---|
| 149 | (UChar*)(target+l), rgn, ONIG_OPTION_NONE);
|
|---|
| 150 |
|
|---|
| 151 | if (rgn->beg[0] == -1)
|
|---|
| 152 | break;
|
|---|
| 153 |
|
|---|
| 154 | o_beg = rgn->beg[0]/2;
|
|---|
| 155 | o_end = rgn->end[0]/2;
|
|---|
| 156 | }
|
|---|
| 157 |
|
|---|
| 158 | TCHAR* matchstr = NULL;
|
|---|
| 159 | TCHAR* bldname = NULL;
|
|---|
| 160 |
|
|---|
| 161 | FXRESULT* ret = NULL;
|
|---|
| 162 |
|
|---|
| 163 | if (o_beg == -1)
|
|---|
| 164 | {
|
|---|
| 165 | //fprintf(stderr, "*NO\n");
|
|---|
| 166 | }
|
|---|
| 167 | else
|
|---|
| 168 | {
|
|---|
| 169 |
|
|---|
| 170 | ret = new FXRESULT;
|
|---|
| 171 | int mlen = o_end - o_beg;
|
|---|
| 172 | int blen = 0;
|
|---|
| 173 | int bname_beg = o_beg;
|
|---|
| 174 |
|
|---|
| 175 |
|
|---|
| 176 | if (b_beg != -1)
|
|---|
| 177 | {
|
|---|
| 178 | blen = b_end - b_beg;
|
|---|
| 179 | if (b_beg < bname_beg)
|
|---|
| 180 | bname_beg = b_beg;
|
|---|
| 181 | }
|
|---|
| 182 |
|
|---|
| 183 | matchstr = new TCHAR[mlen+blen + 1];
|
|---|
| 184 | bldname = new TCHAR[bname_beg + 1];
|
|---|
| 185 |
|
|---|
| 186 | if (b_beg != -1)
|
|---|
| 187 | _tcsncpy(matchstr, target + b_beg, blen);
|
|---|
| 188 |
|
|---|
| 189 | _tcsncpy(matchstr+blen, target + o_beg, mlen);
|
|---|
| 190 | matchstr[mlen+blen] = _T('\0');
|
|---|
| 191 |
|
|---|
| 192 | _tcsncpy(bldname, target, bname_beg+1);
|
|---|
| 193 | bldname[bname_beg] = _T('\0');
|
|---|
| 194 |
|
|---|
| 195 |
|
|---|
| 196 | //_tcprintf(_T("%s : %s "), bldname, matchstr);
|
|---|
| 197 | parseFloor(matchstr, ret);
|
|---|
| 198 |
|
|---|
| 199 | chopT(bldname);
|
|---|
| 200 |
|
|---|
| 201 | if (_tcslen(bldname) > 0)
|
|---|
| 202 | ret->bld = bldname;
|
|---|
| 203 | delete[] matchstr;
|
|---|
| 204 |
|
|---|
| 205 | if (ret->bld && ret->room && !ret->flr)
|
|---|
| 206 | {
|
|---|
| 207 | if (_tcslen(ret->room) == 3)
|
|---|
| 208 | {
|
|---|
| 209 | int num = _ttoi(ret->room);
|
|---|
| 210 | if (num >= 100 && num <= 999)
|
|---|
| 211 | {
|
|---|
| 212 | ret->flr = new TCHAR[2];
|
|---|
| 213 | ret->flr[0] = _T('0'+(num/100));
|
|---|
| 214 | ret->flr[1] = _T('\0');
|
|---|
| 215 | //_tcprintf(_T("�������o %s -> %s\n"), ret->room, ret->flr);
|
|---|
| 216 |
|
|---|
| 217 | }
|
|---|
| 218 | }
|
|---|
| 219 | }
|
|---|
| 220 |
|
|---|
| 221 | if (!ret->room && ret->flr)
|
|---|
| 222 | {
|
|---|
| 223 | if (_tcslen(ret->flr) >= 3 && ret->flr[0] != _T('-'))
|
|---|
| 224 | {
|
|---|
| 225 | ret->room = ret->flr;
|
|---|
| 226 | ret->flr = NULL;
|
|---|
| 227 | }
|
|---|
| 228 | }
|
|---|
| 229 | }
|
|---|
| 230 |
|
|---|
| 231 |
|
|---|
| 232 | onig_region_free(rgn, 1);
|
|---|
| 233 | return ret;
|
|---|
| 234 | }
|
|---|
| 235 |
|
|---|
| 236 | int FloorExtractor::getRoomNum(TCHAR* s, FXRESULT* fxr)
|
|---|
| 237 | {
|
|---|
| 238 | OnigRegion* rgn_fnum = onig_region_new();
|
|---|
| 239 |
|
|---|
| 240 | int l = _tcslen(s);
|
|---|
| 241 |
|
|---|
| 242 | int gbeg = -1;
|
|---|
| 243 | int beg = -1;
|
|---|
| 244 | int end = 0;
|
|---|
| 245 | int glen = 0;
|
|---|
| 246 | int rl = 0;
|
|---|
| 247 | for (;;)
|
|---|
| 248 | {
|
|---|
| 249 | int r = onig_search(regex_rnum, (UChar*)s, (UChar*)(s+l), (UChar*)(s+end),
|
|---|
| 250 | (UChar*)(s+l), rgn_fnum, ONIG_OPTION_NONE);
|
|---|
| 251 |
|
|---|
| 252 | if (rgn_fnum->beg[0] == -1)
|
|---|
| 253 | break;
|
|---|
| 254 |
|
|---|
| 255 | beg = rgn_fnum->beg[0]/2;
|
|---|
| 256 | end = rgn_fnum->end[0]/2;
|
|---|
| 257 | gbeg = rgn_fnum->beg[3]/2;
|
|---|
| 258 |
|
|---|
| 259 | if (gbeg != -1)
|
|---|
| 260 | glen = (rgn_fnum->end[3] - rgn_fnum->beg[3])/2;
|
|---|
| 261 | if (beg != -1)
|
|---|
| 262 | rl = end-beg;
|
|---|
| 263 | }
|
|---|
| 264 |
|
|---|
| 265 | if (beg != -1)
|
|---|
| 266 | {
|
|---|
| 267 |
|
|---|
| 268 | if (rl <= 6)
|
|---|
| 269 | {
|
|---|
| 270 | TCHAR* rtmp = new TCHAR[rl-glen+1];
|
|---|
| 271 | _tcsncpy(rtmp, s+beg, rl-glen);
|
|---|
| 272 | rtmp[rl-glen] = _T('\0');
|
|---|
| 273 |
|
|---|
| 274 | fxr->room = rtmp;
|
|---|
| 275 |
|
|---|
| 276 | }
|
|---|
| 277 | }
|
|---|
| 278 |
|
|---|
| 279 |
|
|---|
| 280 | onig_region_free(rgn_fnum, 1);
|
|---|
| 281 |
|
|---|
| 282 | return 0;
|
|---|
| 283 | }
|
|---|
| 284 |
|
|---|
| 285 | int FloorExtractor::getFloorNum(TCHAR* s, FXRESULT* fxr)
|
|---|
| 286 | {
|
|---|
| 287 | OnigRegion* rgn_fnum = onig_region_new();
|
|---|
| 288 |
|
|---|
| 289 | int l = _tcslen(s);
|
|---|
| 290 | int r = onig_search(regex_fnum, (UChar*)s, (UChar*)(s+l), (UChar*)s,
|
|---|
| 291 | (UChar*)(s+l), rgn_fnum, ONIG_OPTION_NONE);
|
|---|
| 292 |
|
|---|
| 293 | if (rgn_fnum->beg[0] != -1)
|
|---|
| 294 | {
|
|---|
| 295 | int fl = (rgn_fnum->end[0]-rgn_fnum->beg[0])/2;
|
|---|
| 296 | if (fl <= 6)
|
|---|
| 297 | {
|
|---|
| 298 | TCHAR* ftmp = new TCHAR[7];
|
|---|
| 299 | int flen = (rgn_fnum->end[2] - rgn_fnum->beg[2])/2;
|
|---|
| 300 |
|
|---|
| 301 | if (rgn_fnum->beg[1] != -1)
|
|---|
| 302 | {
|
|---|
| 303 | int blen = (rgn_fnum->end[1] - rgn_fnum->beg[1])/2;
|
|---|
| 304 | _tcsncpy(ftmp+1, s+(rgn_fnum->beg[0]/2)+blen, fl-flen-blen);
|
|---|
| 305 | ftmp[0] = _T('-');
|
|---|
| 306 | ftmp[fl-flen-blen+1] = _T('\0');
|
|---|
| 307 | }
|
|---|
| 308 | else
|
|---|
| 309 | {
|
|---|
| 310 | _tcsncpy(ftmp, s+(rgn_fnum->beg[0]/2), fl-flen);
|
|---|
| 311 | ftmp[fl-flen] = _T('\0');
|
|---|
| 312 | }
|
|---|
| 313 |
|
|---|
| 314 | TCHAR* p = &s[rgn_fnum->beg[0]/2];
|
|---|
| 315 | for (int i = 0;i < fl;i++)
|
|---|
| 316 | {
|
|---|
| 317 | p[i] = _T('*');
|
|---|
| 318 | }
|
|---|
| 319 |
|
|---|
| 320 | fxr->flr = ftmp;
|
|---|
| 321 | }
|
|---|
| 322 | }
|
|---|
| 323 |
|
|---|
| 324 | onig_region_free(rgn_fnum, 1);
|
|---|
| 325 |
|
|---|
| 326 | return 0;
|
|---|
| 327 | }
|
|---|
| 328 |
|
|---|
| 329 | void FloorExtractor::parseFloor(const TCHAR* s, FXRESULT* fxr)
|
|---|
| 330 | {
|
|---|
| 331 | TCHAR* ds = _tcsdup(s);
|
|---|
| 332 |
|
|---|
| 333 | TCHAR* p = ds;
|
|---|
| 334 | for(;*p != _T('\0');p++)
|
|---|
| 335 | {
|
|---|
| 336 | if (*p >= _T('�O') && *p <= _T('�X'))
|
|---|
| 337 | *p = _T('0')+(*p - _T('�O'));
|
|---|
| 338 | if (*p >= _T('�`') && *p <= _T('�y'))
|
|---|
| 339 | *p = _T('A')+(*p - _T('�`'));
|
|---|
| 340 | if (*p >= _T('��') && *p <= _T('��'))
|
|---|
| 341 | *p = _T('a')+(*p - _T('��'));
|
|---|
| 342 |
|
|---|
| 343 | if (*p == _T('�|'))
|
|---|
| 344 | *p = _T('-');
|
|---|
| 345 | }
|
|---|
| 346 |
|
|---|
| 347 | getFloorNum(ds, fxr);
|
|---|
| 348 | getRoomNum(ds, fxr);
|
|---|
| 349 | //_tcprintf(_T(" : %s\n"), ds);
|
|---|
| 350 |
|
|---|
| 351 | free(ds);
|
|---|
| 352 | } |
|---|