| | 10 | int utf_char2bytes(int c, unsigned char* buf) { |
| | 11 | if (c < 0x80) { |
| | 12 | buf[0] = c; |
| | 13 | return 1; |
| | 14 | } |
| | 15 | if (c < 0x800) { |
| | 16 | buf[0] = 0xc0 + ((unsigned)c >> 6); |
| | 17 | buf[1] = 0x80 + (c & 0x3f); |
| | 18 | return 2; |
| | 19 | } |
| | 20 | if (c < 0x10000) { |
| | 21 | buf[0] = 0xe0 + ((unsigned)c >> 12); |
| | 22 | buf[1] = 0x80 + (((unsigned)c >> 6) & 0x3f); |
| | 23 | buf[2] = 0x80 + (c & 0x3f); |
| | 24 | return 3; |
| | 25 | } |
| | 26 | if (c < 0x200000) { |
| | 27 | buf[0] = 0xf0 + ((unsigned)c >> 18); |
| | 28 | buf[1] = 0x80 + (((unsigned)c >> 12) & 0x3f); |
| | 29 | buf[2] = 0x80 + (((unsigned)c >> 6) & 0x3f); |
| | 30 | buf[3] = 0x80 + (c & 0x3f); |
| | 31 | return 4; |
| | 32 | } |
| | 33 | if (c < 0x4000000) { |
| | 34 | buf[0] = 0xf8 + ((unsigned)c >> 24); |
| | 35 | buf[1] = 0x80 + (((unsigned)c >> 18) & 0x3f); |
| | 36 | buf[2] = 0x80 + (((unsigned)c >> 12) & 0x3f); |
| | 37 | buf[3] = 0x80 + (((unsigned)c >> 6) & 0x3f); |
| | 38 | buf[4] = 0x80 + (c & 0x3f); |
| | 39 | return 5; |
| | 40 | } |
| | 41 | /* 31 bits */ |
| | 42 | buf[0] = 0xfc + ((unsigned)c >> 30); |
| | 43 | buf[1] = 0x80 + (((unsigned)c >> 24) & 0x3f); |
| | 44 | buf[2] = 0x80 + (((unsigned)c >> 18) & 0x3f); |
| | 45 | buf[3] = 0x80 + (((unsigned)c >> 12) & 0x3f); |
| | 46 | buf[4] = 0x80 + (((unsigned)c >> 6) & 0x3f); |
| | 47 | buf[5] = 0x80 + (c & 0x3f); |
| | 48 | return 6; |
| | 49 | } |
| | 50 | |
| | 51 | unsigned char* ucs2utf_alloc(uint16_t *instr) { |
| | 52 | uint16_t *p = instr; |
| | 53 | int inlen = 0; |
| | 54 | |
| | 55 | while(*p++) inlen++; |
| | 56 | unsigned char* outstr = new unsigned char[inlen * 8]; |
| | 57 | unsigned char* outp = outstr; |
| | 58 | memset(outstr, 0, inlen * 8); |
| | 59 | |
| | 60 | p = instr; |
| | 61 | while (*p) outstr += utf_char2bytes(*p++, outstr); |
| | 62 | return outp; |
| | 63 | } |
| | 64 | |