Changeset 34703
- Timestamp:
- 08/02/09 07:27:53 (4 years ago)
- Files:
-
- 1 modified
Legend:
- Unmodified
- Added
- Removed
-
lang/objective-cplusplus/i3/trunk/src/os-windows/msvc/win_iconv.c
r25302 r34703 1 1 /* 2 * iconv library implemented with Win32 API.2 * iconv library using Win32 API to conversion. 3 3 * 4 4 * This file is placed in the public domain. 5 5 * 6 * Maintainer: Yukihiro Nakadaira <yukihiro.nakadaira@gmail.com>6 * Last Change: 2009-07-06 7 7 * 8 * If $WINICONV_LIBICONV_DLL environment variable was defined, win_iconv 9 * loads the specified DLL dynamically and uses it. If loading the DLL 10 * or iconv_open() failed, falls back to internal conversion. 11 * $WINICONV_LIBICONV_DLL is a comma separated list. The first loadable 12 * DLL is used. The specified DLL should have iconv_open(), 13 * iconv_close() and iconv() functions. Or these functions can be 14 * libiconv_open(), libiconv_close() and libiconv(). 8 * ENVIRONMENT VARIABLE: 9 * WINICONV_LIBICONV_DLL 10 * If $WINICONV_LIBICONV_DLL is set, win_iconv uses the DLL. If 11 * loading the DLL or iconv_open() failed, falls back to internal 12 * conversion. If a few DLL are specified as comma separated list, 13 * the first loadable DLL is used. The DLL should have iconv_open(), 14 * iconv_close() and iconv(). Or libiconv_open(), libiconv_close() 15 * and libiconv(). 16 * (only available when USE_LIBICONV_DLL is defined at compile time) 15 17 * 16 18 * Win32 API does not support strict encoding conversion for some … … 26 28 27 29 #include <windows.h> 30 #include <errno.h> 28 31 #include <string.h> 29 32 #include <stdlib.h> 30 31 #ifndef _WIN32_WCE32 #include <errno.h>33 #else34 35 #ifdef _MSC_VER36 __declspec (thread) int errno = 0;37 #elif (MINGW32_MAJOR_VERSION >= 4)38 __thread int errno = 0;39 #else40 #error "tls required"41 #endif42 43 static int* _errno(void)44 {45 return &errno;46 }47 48 #define E2BIG 7 /* Arg list too long */49 #define ENOMEM 12 /* Not enough space */50 #define EINVAL 22 /* Invalid argument */51 #define EILSEQ 42 /* Illegal byte sequence */52 #undef GetProcAddress53 #define GetProcAddress(hModule, lpProcName) GetProcAddressW(hModule, L##lpProcName)54 #endif55 33 56 34 #if 0 … … 69 47 #define UNICODE_MODE_SWAPPED 2 70 48 71 #define FLAG_USE_BOM _ENDIAN149 #define FLAG_USE_BOM 1 72 50 #define FLAG_TRANSLIT 2 /* //TRANSLIT */ 73 51 #define FLAG_IGNORE 4 /* //IGNORE (not implemented) */ 74 75 #define return_error(code) \76 do { \77 errno = code; \78 return -1; \79 } while (0)80 81 #define xstrlcpy(dst, src, size) \82 do { \83 strncpy(dst, src, size); \84 dst[size - 1] = 0; \85 } while (0)86 87 #define xstrlcpyn(dst, src, srclen, size) \88 xstrlcpy(dst, src, xmin((srclen) + 1, size))89 90 #define xmin(a, b) ((a) < (b) ? (a) : (b))91 #define xmax(a, b) ((a) > (b) ? (a) : (b))92 93 #define STATIC_STRLEN(arr) (sizeof(arr) - 1)94 52 95 53 typedef unsigned char uchar; … … 164 122 165 123 static int load_mlang(); 166 static csconv_t make_csconv(const char *name);124 static int make_csconv(const char *name, csconv_t *cv); 167 125 static int name_to_codepage(const char *name); 168 126 static uint utf16_to_ucs4(const ushort *wbuf); 169 127 static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize); 170 static int is_unicode(int codepage);171 128 static int mbtowc_flags(int codepage); 172 129 static int must_use_null_useddefaultchar(int codepage); 173 static void check_utf_bom(rec_iconv_t *cd, ushort *wbuf, int *wbufsize);174 130 static char *strrstr(const char *str, const char *token); 131 static char *xstrndup(const char *s, size_t n); 132 static int seterror(int err); 175 133 176 134 #if defined(USE_LIBICONV_DLL) … … 180 138 181 139 static HMODULE hwiniconv; 182 static HMODULE hlastdll; /* keep dll loaded for efficiency (unnecessary?) */183 140 #endif 184 141 … … 722 679 if (ConvertINetString != NULL) 723 680 return TRUE; 724 h = LoadLibrary( TEXT("mlang.dll"));681 h = LoadLibrary("mlang.dll"); 725 682 if (!h) 726 683 return FALSE; … … 741 698 cd = (rec_iconv_t *)calloc(1, sizeof(rec_iconv_t)); 742 699 if (cd == NULL) 743 {744 errno = ENOMEM;745 700 return (iconv_t)(-1); 746 }747 701 748 702 #if defined(USE_LIBICONV_DLL) 703 errno = 0; 749 704 if (libiconv_iconv_open(cd, tocode, fromcode)) 750 705 return (iconv_t)cd; 751 706 #endif 752 707 708 /* reset the errno to prevent reporting wrong error code. 709 * 0 for unsorted error. */ 710 errno = 0; 753 711 if (win_iconv_open(cd, tocode, fromcode)) 754 712 return (iconv_t)cd; 755 713 756 714 free(cd); 757 errno = EINVAL; 715 758 716 return (iconv_t)(-1); 759 717 } … … 786 744 win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode) 787 745 { 788 cd->from = make_csconv(fromcode); 789 cd->to = make_csconv(tocode); 790 if (cd->from.codepage == -1 || cd->to.codepage == -1) 746 if (!make_csconv(fromcode, &cd->from) || !make_csconv(tocode, &cd->to)) 791 747 return FALSE; 792 748 cd->iconv_close = win_iconv_close; … … 811 767 int outsize; 812 768 int wsize; 813 DWORD mode; 769 DWORD frommode; 770 DWORD tomode; 814 771 uint wc; 815 772 compat_t *cp; … … 820 777 if (outbuf != NULL && *outbuf != NULL && cd->to.flush != NULL) 821 778 { 779 tomode = cd->to.mode; 822 780 outsize = cd->to.flush(&cd->to, (uchar *)*outbuf, *outbytesleft); 823 781 if (outsize == -1) 782 { 783 cd->to.mode = tomode; 824 784 return (size_t)(-1); 785 } 825 786 *outbuf += outsize; 826 787 *outbytesleft -= outsize; 827 788 } 828 if (is_unicode(cd->from.codepage) && (cd->from.mode & UNICODE_MODE_SWAPPED))829 cd->from.codepage ^= 1;830 789 cd->from.mode = 0; 831 790 cd->to.mode = 0; … … 835 794 while (*inbytesleft != 0) 836 795 { 837 mode = cd->from.mode; 796 frommode = cd->from.mode; 797 tomode = cd->to.mode; 838 798 wsize = MB_CHAR_MAX; 839 799 840 800 insize = cd->from.mbtowc(&cd->from, (const uchar *)*inbuf, *inbytesleft, wbuf, &wsize); 841 801 if (insize == -1) 802 { 803 cd->from.mode = frommode; 842 804 return (size_t)(-1); 843 844 if (is_unicode(cd->from.codepage) && !(cd->from.mode & UNICODE_MODE_BOM_DONE))845 {846 check_utf_bom(cd, wbuf, &wsize);847 cd->from.mode |= UNICODE_MODE_BOM_DONE;848 805 } 849 806 … … 886 843 if (outsize == -1) 887 844 { 888 cd->from.mode = mode; 845 cd->from.mode = frommode; 846 cd->to.mode = tomode; 889 847 return (size_t)(-1); 890 848 } … … 899 857 } 900 858 901 static csconv_t902 make_csconv(const char *_name )859 static int 860 make_csconv(const char *_name, csconv_t *cv) 903 861 { 904 862 CPINFOEX cpinfoex; 905 csconv_t cv;906 863 int use_compat = TRUE; 907 864 int flag = 0; 908 char name[128];865 char *name; 909 866 char *p; 910 867 911 xstrlcpy(name, _name, sizeof(name)); 868 name = xstrndup(_name, strlen(_name)); 869 if (name == NULL) 870 return FALSE; 912 871 913 872 /* check for option "enc_name//opt1//opt2" */ … … 923 882 } 924 883 925 cv.mode = 0; 926 cv.flags = flag; 927 cv.mblen = NULL; 928 cv.flush = NULL; 929 cv.compat = NULL; 930 cv.codepage = name_to_codepage(name); 931 if (cv.codepage == 1200 || cv.codepage == 1201) 932 { 933 cv.mbtowc = utf16_mbtowc; 934 cv.wctomb = utf16_wctomb; 935 if (_stricmp(name, "UTF-16") == 0 || 936 _stricmp(name, "UTF16") == 0 || 937 _stricmp(name, "UCS-2") == 0) 938 cv.flags |= FLAG_USE_BOM_ENDIAN; 939 } 940 else if (cv.codepage == 12000 || cv.codepage == 12001) 941 { 942 cv.mbtowc = utf32_mbtowc; 943 cv.wctomb = utf32_wctomb; 884 cv->mode = 0; 885 cv->flags = flag; 886 cv->mblen = NULL; 887 cv->flush = NULL; 888 cv->compat = NULL; 889 cv->codepage = name_to_codepage(name); 890 if (cv->codepage == 1200 || cv->codepage == 1201) 891 { 892 cv->mbtowc = utf16_mbtowc; 893 cv->wctomb = utf16_wctomb; 894 if (_stricmp(name, "UTF-16") == 0 || _stricmp(name, "UTF16") == 0) 895 cv->flags |= FLAG_USE_BOM; 896 } 897 else if (cv->codepage == 12000 || cv->codepage == 12001) 898 { 899 cv->mbtowc = utf32_mbtowc; 900 cv->wctomb = utf32_wctomb; 944 901 if (_stricmp(name, "UTF-32") == 0 || _stricmp(name, "UTF32") == 0) 945 cv .flags |= FLAG_USE_BOM_ENDIAN;946 } 947 else if (cv .codepage == 65001)948 { 949 cv .mbtowc = kernel_mbtowc;950 cv .wctomb = kernel_wctomb;951 cv .mblen = utf8_mblen;952 } 953 else if ((cv .codepage == 50220 || cv.codepage == 50221 || cv.codepage == 50222) && load_mlang())954 { 955 cv .mbtowc = iso2022jp_mbtowc;956 cv .wctomb = iso2022jp_wctomb;957 cv .flush = iso2022jp_flush;958 } 959 else if (cv .codepage == 51932 && load_mlang())960 { 961 cv .mbtowc = mlang_mbtowc;962 cv .wctomb = mlang_wctomb;963 cv .mblen = eucjp_mblen;964 } 965 else if (IsValidCodePage(cv .codepage)966 && GetCPInfoEx(cv .codepage, 0, &cpinfoex) != 0)967 { 968 cv .mbtowc = kernel_mbtowc;969 cv .wctomb = kernel_wctomb;902 cv->flags |= FLAG_USE_BOM; 903 } 904 else if (cv->codepage == 65001) 905 { 906 cv->mbtowc = kernel_mbtowc; 907 cv->wctomb = kernel_wctomb; 908 cv->mblen = utf8_mblen; 909 } 910 else if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && load_mlang()) 911 { 912 cv->mbtowc = iso2022jp_mbtowc; 913 cv->wctomb = iso2022jp_wctomb; 914 cv->flush = iso2022jp_flush; 915 } 916 else if (cv->codepage == 51932 && load_mlang()) 917 { 918 cv->mbtowc = mlang_mbtowc; 919 cv->wctomb = mlang_wctomb; 920 cv->mblen = eucjp_mblen; 921 } 922 else if (IsValidCodePage(cv->codepage) 923 && GetCPInfoEx(cv->codepage, 0, &cpinfoex) != 0) 924 { 925 cv->mbtowc = kernel_mbtowc; 926 cv->wctomb = kernel_wctomb; 970 927 if (cpinfoex.MaxCharSize == 1) 971 cv .mblen = sbcs_mblen;928 cv->mblen = sbcs_mblen; 972 929 else if (cpinfoex.MaxCharSize == 2) 973 cv .mblen = dbcs_mblen;930 cv->mblen = dbcs_mblen; 974 931 else 975 cv .mblen = mbcs_mblen;932 cv->mblen = mbcs_mblen; 976 933 } 977 934 else 978 935 { 979 936 /* not supported */ 980 cv.codepage = -1; 981 } 937 free(name); 938 errno = EINVAL; 939 return FALSE; 940 } 941 982 942 if (use_compat) 983 943 { 984 switch (cv.codepage) 985 { 986 case 932: cv.compat = cp932_compat; break; 987 case 20932: cv.compat = cp20932_compat; break; 988 case 51932: cv.compat = cp51932_compat; break; 989 case 50220: case 50221: case 50222: cv.compat = cp5022x_compat; break; 990 } 991 } 992 return cv; 944 switch (cv->codepage) 945 { 946 case 932: cv->compat = cp932_compat; break; 947 case 20932: cv->compat = cp20932_compat; break; 948 case 51932: cv->compat = cp51932_compat; break; 949 case 50220: case 50221: case 50222: cv->compat = cp5022x_compat; break; 950 } 951 } 952 953 free(name); 954 955 return TRUE; 993 956 } 994 957 … … 1045 1008 } 1046 1009 1047 static int1048 is_unicode(int codepage)1049 {1050 return (codepage == 1200 || codepage == 1201 ||1051 codepage == 12000 || codepage == 12001 ||1052 codepage == 65000 || codepage == 65001);1053 }1054 1055 1010 /* 1056 1011 * Check if codepage is one of those for which the dwFlags parameter … … 1094 1049 } 1095 1050 1096 static void1097 check_utf_bom(rec_iconv_t *cd, ushort *wbuf, int *wbufsize)1098 {1099 /* If we have a BOM, trust it, despite what the caller said */1100 if (wbuf[0] == 0xFFFE && (cd->from.flags & FLAG_USE_BOM_ENDIAN))1101 {1102 /* swap endian: 1200 <-> 1201 or 12000 <-> 12001 */1103 cd->from.codepage ^= 1;1104 cd->from.mode |= UNICODE_MODE_SWAPPED;1105 wbuf[0] = 0xFEFF;1106 }1107 1108 /*1109 * Remove BOM.1110 * Don't do this if "to" is Unicode,1111 * except if "to" is UTF-8.1112 */1113 if (wbuf[0] == 0xFEFF && (!is_unicode(cd->to.codepage) || cd->to.codepage == 65001))1114 *wbufsize = 0;1115 }1116 1117 1051 static char * 1118 1052 strrstr(const char *str, const char *token) … … 1127 1061 } 1128 1062 1063 static char * 1064 xstrndup(const char *s, size_t n) 1065 { 1066 char *p; 1067 1068 p = malloc(n + 1); 1069 if (p == NULL) 1070 return NULL; 1071 memcpy(p, s, n); 1072 p[n] = '\0'; 1073 return p; 1074 } 1075 1076 static int 1077 seterror(int err) 1078 { 1079 errno = err; 1080 return -1; 1081 } 1082 1129 1083 #if defined(USE_LIBICONV_DLL) 1130 1084 static int 1131 libiconv_iconv_open(rec_iconv_t *cd, const char * fromcode, const char *tocode)1085 libiconv_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode) 1132 1086 { 1133 1087 HMODULE hlibiconv = NULL; 1134 1088 HMODULE hmsvcrt = NULL; 1135 char dllname[_MAX_PATH];1089 char *dllname; 1136 1090 const char *p; 1137 1091 const char *e; … … 1154 1108 else if (e == NULL) 1155 1109 e = p + strlen(p); 1156 xstrlcpyn(dllname, p, e - p, sizeof(dllname)); 1110 dllname = xstrndup(p, e - p); 1111 if (dllname == NULL) 1112 return FALSE; 1157 1113 hlibiconv = LoadLibrary(dllname); 1114 free(dllname); 1158 1115 if (hlibiconv != NULL) 1159 1116 { … … 1166 1123 break; 1167 1124 } 1168 }1169 1170 if (hlastdll != NULL)1171 {1172 /* decrement reference count */1173 FreeLibrary(hlastdll);1174 hlastdll = NULL;1175 1125 } 1176 1126 … … 1195 1145 || cd->iconv == NULL || cd->_errno == NULL) 1196 1146 goto failed; 1197 1198 /* increment reference count */1199 hlastdll = LoadLibrary(dllname);1200 1147 1201 1148 cd->cd = _iconv_open(tocode, fromcode); … … 1285 1232 int len = IsDBCSLeadByteEx(cv->codepage, buf[0]) ? 2 : 1; 1286 1233 if (bufsize < len) 1287 return _error(EINVAL);1234 return seterror(EINVAL); 1288 1235 return len; 1289 1236 } … … 1304 1251 buf[1] >= 0x30 && buf[1] <= 0x39) len = 4; 1305 1252 else 1306 return _error(EINVAL);1253 return seterror(EINVAL); 1307 1254 return len; 1308 1255 } 1309 1256 else 1310 return _error(EINVAL);1257 return seterror(EINVAL); 1311 1258 } 1312 1259 … … 1324 1271 1325 1272 if (len == 0) 1326 return _error(EILSEQ);1273 return seterror(EILSEQ); 1327 1274 else if (bufsize < len) 1328 return _error(EINVAL);1275 return seterror(EINVAL); 1329 1276 return len; 1330 1277 } … … 1338 1285 { 1339 1286 if (bufsize < 2) 1340 return _error(EINVAL);1287 return seterror(EINVAL); 1341 1288 else if (!(0xA1 <= buf[1] && buf[1] <= 0xDF)) 1342 return _error(EILSEQ);1289 return seterror(EILSEQ); 1343 1290 return 2; 1344 1291 } … … 1346 1293 { 1347 1294 if (bufsize < 3) 1348 return _error(EINVAL);1295 return seterror(EINVAL); 1349 1296 else if (!(0xA1 <= buf[1] && buf[1] <= 0xFE) 1350 1297 || !(0xA1 <= buf[2] && buf[2] <= 0xFE)) 1351 return _error(EILSEQ);1298 return seterror(EILSEQ); 1352 1299 return 3; 1353 1300 } … … 1355 1302 { 1356 1303 if (bufsize < 2) 1357 return _error(EINVAL);1304 return seterror(EINVAL); 1358 1305 else if (!(0xA1 <= buf[0] && buf[0] <= 0xFE) 1359 1306 || !(0xA1 <= buf[1] && buf[1] <= 0xFE)) 1360 return _error(EILSEQ);1307 return seterror(EILSEQ); 1361 1308 return 2; 1362 1309 } … … 1374 1321 (const char *)buf, len, (wchar_t *)wbuf, *wbufsize); 1375 1322 if (*wbufsize == 0) 1376 return _error(EILSEQ);1323 return seterror(EILSEQ); 1377 1324 return len; 1378 1325 } … … 1387 1334 1388 1335 if (bufsize == 0) 1389 return _error(E2BIG);1336 return seterror(E2BIG); 1390 1337 if (!must_use_null_useddefaultchar(cv->codepage)) 1391 1338 { … … 1401 1348 { 1402 1349 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) 1403 return _error(E2BIG);1404 return _error(EILSEQ);1350 return seterror(E2BIG); 1351 return seterror(EILSEQ); 1405 1352 } 1406 1353 else if (usedDefaultChar) 1407 return _error(EILSEQ);1354 return seterror(EILSEQ); 1408 1355 else if (cv->mblen(cv, buf, len) != len) /* validate result */ 1409 return _error(EILSEQ);1356 return seterror(EILSEQ); 1410 1357 return len; 1411 1358 } … … 1433 1380 (const char *)buf, &insize, (wchar_t *)wbuf, wbufsize); 1434 1381 if (hr != S_OK || insize != len) 1435 return _error(EILSEQ);1382 return seterror(EILSEQ); 1436 1383 return len; 1437 1384 } … … 1448 1395 (const wchar_t *)wbuf, &wbufsize, tmpbuf, &tmpsize); 1449 1396 if (hr != S_OK || insize != wbufsize) 1450 return _error(EILSEQ);1397 return seterror(EILSEQ); 1451 1398 else if (bufsize < tmpsize) 1452 return _error(E2BIG);1399 return seterror(E2BIG); 1453 1400 else if (cv->mblen(cv, (uchar *)tmpbuf, tmpsize) != tmpsize) 1454 return _error(EILSEQ);1401 return seterror(EILSEQ); 1455 1402 memcpy(buf, tmpbuf, tmpsize); 1456 1403 return tmpsize; … … 1460 1407 utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) 1461 1408 { 1409 int codepage = cv->codepage; 1410 1411 /* swap endian: 1200 <-> 1201 */ 1412 if (cv->mode & UNICODE_MODE_SWAPPED) 1413 codepage ^= 1; 1414 1462 1415 if (bufsize < 2) 1463 return _error(EINVAL);1464 if (c v->codepage == 1200) /* little endian */1416 return seterror(EINVAL); 1417 if (codepage == 1200) /* little endian */ 1465 1418 wbuf[0] = (buf[1] << 8) | buf[0]; 1466 else if (c v->codepage == 1201) /* big endian */1419 else if (codepage == 1201) /* big endian */ 1467 1420 wbuf[0] = (buf[0] << 8) | buf[1]; 1421 1422 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) 1423 { 1424 cv->mode |= UNICODE_MODE_BOM_DONE; 1425 if (wbuf[0] == 0xFFFE) 1426 { 1427 cv->mode |= UNICODE_MODE_SWAPPED; 1428 *wbufsize = 0; 1429 return 2; 1430 } 1431 else if (wbuf[0] == 0xFEFF) 1432 { 1433 *wbufsize = 0; 1434 return 2; 1435 } 1436 } 1437 1468 1438 if (0xDC00 <= wbuf[0] && wbuf[0] <= 0xDFFF) 1469 return _error(EILSEQ);1439 return seterror(EILSEQ); 1470 1440 if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) 1471 1441 { 1472 1442 if (bufsize < 4) 1473 return _error(EINVAL);1474 if (c v->codepage == 1200) /* little endian */1443 return seterror(EINVAL); 1444 if (codepage == 1200) /* little endian */ 1475 1445 wbuf[1] = (buf[3] << 8) | buf[2]; 1476 else if (c v->codepage == 1201) /* big endian */1446 else if (codepage == 1201) /* big endian */ 1477 1447 wbuf[1] = (buf[2] << 8) | buf[3]; 1478 1448 if (!(0xDC00 <= wbuf[1] && wbuf[1] <= 0xDFFF)) 1479 return _error(EILSEQ);1449 return seterror(EILSEQ); 1480 1450 *wbufsize = 2; 1481 1451 return 4; … … 1488 1458 utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) 1489 1459 { 1460 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) 1461 { 1462 int r; 1463 1464 cv->mode |= UNICODE_MODE_BOM_DONE; 1465 if (bufsize < 2) 1466 return seterror(E2BIG); 1467 if (cv->codepage == 1200) /* little endian */ 1468 memcpy(buf, "\xFF\xFE", 2); 1469 else if (cv->codepage == 1201) /* big endian */ 1470 memcpy(buf, "\xFE\xFF", 2); 1471 1472 r = utf16_wctomb(cv, wbuf, wbufsize, buf + 2, bufsize - 2); 1473 if (r == -1) 1474 return -1; 1475 return r + 2; 1476 } 1477 1490 1478 if (bufsize < 2) 1491 return _error(E2BIG);1479 return seterror(E2BIG); 1492 1480 if (cv->codepage == 1200) /* little endian */ 1493 1481 { … … 1503 1491 { 1504 1492 if (bufsize < 4) 1505 return _error(E2BIG);1493 return seterror(E2BIG); 1506 1494 if (cv->codepage == 1200) /* little endian */ 1507 1495 { … … 1522 1510 utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) 1523 1511 { 1512 int codepage = cv->codepage; 1524 1513 uint wc; 1525 1514 1515 /* swap endian: 12000 <-> 12001 */ 1516 if (cv->mode & UNICODE_MODE_SWAPPED) 1517 codepage ^= 1; 1518 1526 1519 if (bufsize < 4) 1527 return _error(EINVAL);1528 if (c v->codepage == 12000) /* little endian */1520 return seterror(EINVAL); 1521 if (codepage == 12000) /* little endian */ 1529 1522 wc = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; 1530 else if (c v->codepage == 12001) /* big endian */1523 else if (codepage == 12001) /* big endian */ 1531 1524 wc = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; 1525 1526 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) 1527 { 1528 cv->mode |= UNICODE_MODE_BOM_DONE; 1529 if (wc == 0xFFFE0000) 1530 { 1531 cv->mode |= UNICODE_MODE_SWAPPED; 1532 *wbufsize = 0; 1533 return 4; 1534 } 1535 else if (wc == 0x0000FEFF) 1536 { 1537 *wbufsize = 0; 1538 return 4; 1539 } 1540 } 1541 1532 1542 if ((0xD800 <= wc && wc <= 0xDFFF) || 0x10FFFF < wc) 1533 return _error(EILSEQ);1543 return seterror(EILSEQ); 1534 1544 ucs4_to_utf16(wc, wbuf, wbufsize); 1535 1545 return 4; … … 1541 1551 uint wc; 1542 1552 1553 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) 1554 { 1555 int r; 1556 1557 cv->mode |= UNICODE_MODE_BOM_DONE; 1558 if (bufsize < 4) 1559 return seterror(E2BIG); 1560 if (cv->codepage == 12000) /* little endian */ 1561 memcpy(buf, "\xFF\xFE\x00\x00", 4); 1562 else if (cv->codepage == 12001) /* big endian */ 1563 memcpy(buf, "\x00\x00\xFE\xFF", 4); 1564 1565 r = utf32_wctomb(cv, wbuf, wbufsize, buf + 4, bufsize - 4); 1566 if (r == -1) 1567 return -1; 1568 return r + 4; 1569 } 1570 1543 1571 if (bufsize < 4) 1544 return _error(E2BIG);1572 return seterror(E2BIG); 1545 1573 wc = utf16_to_ucs4(wbuf); 1546 1574 if (cv->codepage == 12000) /* little endian */ … … 1633 1661 { 1634 1662 if (strncmp((char *)buf, iesc[i].esc, bufsize) == 0) 1635 return _error(EINVAL);1663 return seterror(EINVAL); 1636 1664 } 1637 1665 else … … 1646 1674 } 1647 1675 /* not supported escape sequence */ 1648 return _error(EILSEQ);1676 return seterror(EILSEQ); 1649 1677 } 1650 1678 else if (buf[0] == iso2022_SO_seq[0]) … … 1673 1701 len = iesc[cs].len; 1674 1702 if (bufsize < len) 1675 return _error(EINVAL);1703 return seterror(EINVAL); 1676 1704 for (i = 0; i < len; ++i) 1677 1705 if (!(buf[i] < 0x80)) 1678 return _error(EILSEQ);1706 return seterror(EILSEQ); 1679 1707 esc_len = iesc[cs].esc_len; 1680 1708 memcpy(tmp, iesc[cs].esc, esc_len); … … 1700 1728 (const char *)tmp, &insize, (wchar_t *)wbuf, wbufsize); 1701 1729 if (hr != S_OK || insize != len + esc_len) 1702 return _error(EILSEQ);1730 return seterror(EILSEQ); 1703 1731 1704 1732 /* Check for conversion error. Assuming defaultChar is 0x3F. */ … … 1706 1734 if (wbuf[0] == buf[0] 1707 1735 && cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) 1708 return _error(EILSEQ);1736 return seterror(EILSEQ); 1709 1737 1710 1738 /* reset the mode for informal sequence */ … … 1739 1767 (const wchar_t *)wbuf, &wbufsize, tmp, &tmpsize); 1740 1768 if (hr != S_OK || insize != wbufsize) 1741 return _error(EILSEQ);1769 return seterror(EILSEQ); 1742 1770 else if (bufsize < tmpsize) 1743 return _error(E2BIG);1771 return seterror(E2BIG); 1744 1772 1745 1773 if (tmpsize == 1) … … 1761 1789 if (iesc[i].esc == NULL) 1762 1790 /* not supported escape sequence */ 1763 return _error(EILSEQ);1791 return seterror(EILSEQ); 1764 1792 } 1765 1793 … … 1776 1804 /* ascii should be converted from ascii */ 1777 1805 if (cs == ISO2022JP_CS_ASCII && !(wbuf[0] < 0x80)) 1778 return _error(EILSEQ);1806 return seterror(EILSEQ); 1779 1807 else if (tmpsize < esc_len + len) 1780 return _error(EILSEQ);1808 return seterror(EILSEQ); 1781 1809 1782 1810 if (cv->mode == ISO2022_MODE(cs, shift)) … … 1805 1833 1806 1834 if (bufsize < len + esc_len) 1807 return _error(E2BIG);1835 return seterror(E2BIG); 1808 1836 memcpy(buf, tmp, len + esc_len); 1809 1837 cv->mode = ISO2022_MODE(cs, shift); … … 1825 1853 esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; 1826 1854 if (bufsize < esc_len) 1827 return _error(E2BIG);1855 return seterror(E2BIG); 1828 1856 1829 1857 esc_len = 0; … … 1933 1961 r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft); 1934 1962 fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, stdout); 1935 if (r == (size_t)(-1) && errno != E INVAL && errno != E2BIG)1963 if (r == (size_t)(-1) && errno != E2BIG && (errno != EINVAL || feof(in))) 1936 1964 { 1937 1965 perror("conversion error");
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)