Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

762 lines
21 KiB

  1. #include "stdafx.h"
  2. #include "conveng.h"
  3. #include "convdata.tbl"
  4. // These file contain 3 parts:
  5. // First part, Some basic service functions for Ansi char format convert,
  6. // Distance/Advance calculate and Binary search algorithm copied from STL
  7. // Second part, Unicode to Ansi
  8. // Third part, Ansi to Unicode
  9. // ****************************************************************************
  10. // Frist part, Ansi char convert functions
  11. //
  12. // This part not use any data base in .tbl file
  13. // ****************************************************************************
  14. // Binary search algorithm
  15. // Copy from STL, only very little modify
  16. template <class RandomAccessIterator, class T>
  17. RandomAccessIterator __lower_bound(RandomAccessIterator first,
  18. RandomAccessIterator last, const T& value) {
  19. INT_PTR len = last - first;
  20. INT_PTR half;
  21. RandomAccessIterator middle;
  22. while (len > 0) {
  23. half = len / 2;
  24. middle = first + half;
  25. if (*middle < value) {
  26. first = middle + 1;
  27. len = len - half - 1;
  28. } else {
  29. len = half;
  30. }
  31. }
  32. return first;
  33. }
  34. template <class RandomAccessIterator, class T>
  35. RandomAccessIterator __upper_bound(RandomAccessIterator first,
  36. RandomAccessIterator last, const T& value) {
  37. DWORD len = last - first;
  38. DWORD half;
  39. RandomAccessIterator middle;
  40. while (len > 0) {
  41. half = len / 2;
  42. middle = first + half;
  43. if (!(value < *middle)) {
  44. first = middle + 1;
  45. len = len - half - 1;
  46. } else {
  47. len = half;
  48. }
  49. }
  50. return first;
  51. }
  52. template<class T>
  53. inline ValueIn(
  54. T Value,
  55. T Low,
  56. T High)
  57. {
  58. return (Value >= Low && Value < High);
  59. }
  60. inline BOOL IsValidSurrogateLeadWord(
  61. WCHAR wchUnicode)
  62. {
  63. return ValueIn(wchUnicode, cg_wchSurrogateLeadWordLow, cg_wchSurrogateLeadWordHigh);
  64. }
  65. inline BOOL IsValidSurrogateTailWord(
  66. WCHAR wchUnicode)
  67. {
  68. return ValueIn(wchUnicode, cg_wchSurrogateTailWordLow, cg_wchSurrogateTailWordHigh);
  69. }
  70. inline BOOL IsValidQByteAnsiLeadByte(
  71. BYTE byAnsi)
  72. {
  73. return ValueIn(byAnsi, cg_byQByteAnsiLeadByteLow, cg_byQByteAnsiLeadByteHigh);
  74. }
  75. inline BOOL IsValidQByteAnsiTailByte(
  76. BYTE byAnsi)
  77. {
  78. return ValueIn(byAnsi, cg_byQByteAnsiTailByteLow, cg_byQByteAnsiTailByteHigh);
  79. }
  80. // Generate QByte Ansi. The Ansi char is in DWORD format,
  81. // in another word, it's in reverse order of GB18030 standard
  82. DWORD QByteAnsiBaseAddOffset(
  83. DWORD dwBaseAnsi, // In reverse order
  84. int nOffset)
  85. {
  86. DWORD dwAnsi = dwBaseAnsi;
  87. PBYTE pByte = (PBYTE)&dwAnsi;
  88. // dwOffset should less than 1M
  89. ASSERT (nOffset < 0x100000);
  90. nOffset += pByte[0] - 0x30;
  91. pByte[0] = 0x30 + nOffset % 10;
  92. nOffset /= 10;
  93. nOffset += pByte[1] - 0x81;
  94. pByte[1] = 0x81 + nOffset % 126;
  95. nOffset /= 126;
  96. nOffset += pByte[2] - 0x30;
  97. pByte[2] = 0x30 + nOffset % 10;
  98. nOffset /= 10;
  99. nOffset += pByte[3] - 0x81;
  100. pByte[3] = 0x81 + nOffset % 126;
  101. nOffset /= 126;
  102. ASSERT(nOffset == 0);
  103. return dwAnsi;
  104. }
  105. // Get "distance" of 2 QByte Ansi
  106. int CalcuDistanceOfQByteAnsi(
  107. DWORD dwAnsi1, // In reverse order
  108. DWORD dwAnsi2) // In reverse order
  109. {
  110. signed char* pschAnsi1 = (signed char*)&dwAnsi1;
  111. signed char* pschAnsi2 = (signed char*)&dwAnsi2;
  112. int nDistance = 0;
  113. nDistance += (pschAnsi1[0] - pschAnsi2[0]);
  114. nDistance += (pschAnsi1[1] - pschAnsi2[1])*10;
  115. nDistance += (pschAnsi1[2] - pschAnsi2[2])*1260;
  116. nDistance += (pschAnsi1[3] - pschAnsi2[3])*12600;
  117. return nDistance;
  118. }
  119. // Reverse 4 Bytes order, from DWORD format to GB format,
  120. // or GB to DWORD
  121. void ReverseQBytesOrder(
  122. PBYTE pByte)
  123. {
  124. BYTE by;
  125. by = pByte[0];
  126. pByte[0] = pByte[3];
  127. pByte[3] = by;
  128. by = pByte[1];
  129. pByte[1] = pByte[2];
  130. pByte[2] = by;
  131. return;
  132. }
  133. // ****************************************************************************
  134. // Second part, Unicode to Ansi
  135. // ****************************************************************************
  136. // ------------------------------------------------
  137. // Two helper function for UnicodeToAnsi
  138. // return Ansi char code
  139. // the Ansi is in GB standard order (not Word value order)
  140. //
  141. // Unicode to double bytes Ansi char
  142. //
  143. // Return Ansi char code, 0 means fail (internal error, etc.)
  144. //
  145. WORD UnicodeToDByteAnsi(
  146. WCHAR wchUnicode)
  147. {
  148. char achAnsiBuf[4];
  149. WORD wAnsi = 0;
  150. int cLen = 0;
  151. // Code changed from GBK to GB18030, or code not compatible
  152. // from CP936 to CP54936
  153. for (int i = 0; i < sizeof(asAnsiCodeChanged)/sizeof(SAnsiCodeChanged); i++) {
  154. if (wchUnicode == asAnsiCodeChanged[i].wchUnicode) {
  155. wAnsi = asAnsiCodeChanged[i].wchAnsiNew;
  156. goto Exit;
  157. }
  158. }
  159. // Not in Changed code list, that is same with GBK, or CP936
  160. // (Most DByte Ansi char code should compatible from GBK to GB18030)
  161. cLen = WideCharToMultiByte(936,
  162. WC_COMPOSITECHECK, &wchUnicode, 1,
  163. achAnsiBuf, sizeof(achAnsiBuf)-1, NULL, NULL);
  164. if (cLen != 2) {
  165. ASSERT(cLen == 2);
  166. wAnsi = 0;
  167. } else {
  168. wAnsi = *(PWORD)achAnsiBuf;
  169. }
  170. Exit:
  171. return wAnsi;
  172. }
  173. // Unicode to quad bytes Ansi char
  174. //
  175. // Return Ansi char code
  176. // 0 means fail (interal error)
  177. //
  178. DWORD UnicodeToQByteAnsi(
  179. int nSection,
  180. int nOffset)
  181. {
  182. DWORD dwBaseAnsi;
  183. if (nSection < 0 || nSection >= sizeof(adwAnsiQBytesAreaStartValue)/sizeof(DWORD)) {
  184. ASSERT(FALSE);
  185. return 0;
  186. }
  187. dwBaseAnsi = adwAnsiQBytesAreaStartValue[nSection];
  188. // Check adwAnsiQByteAreaStartValue array is correctly
  189. #ifdef _DEBUG
  190. int ncQByteAnsiNum = 0;
  191. for (int i = 0; i < nSection; i++) {
  192. // Calcu QByte Ansi char numbers
  193. ncQByteAnsiNum += awchAnsiDQByteBound[2*i+1] - awchAnsiDQByteBound[2*i];
  194. }
  195. ASSERT(dwBaseAnsi == QByteAnsiBaseAddOffset(cg_dwQByteAnsiStart, ncQByteAnsiNum));
  196. #endif
  197. DWORD dwAnsi = QByteAnsiBaseAddOffset(dwBaseAnsi, nOffset);
  198. // Value order to standard order
  199. ReverseQBytesOrder((PBYTE)(&dwAnsi));
  200. return dwAnsi;
  201. }
  202. // ---------------------------------------------------------
  203. // Two function support 2 bytes Unicode (BMP)
  204. // and 4 bytes Unicode (Surrogate) translate to Ansi
  205. // 2 bytes Unicode (BMP)
  206. // Return Ansi str len, when success, should be 2 or 4;
  207. // return 0 means fail (internal error, etc.)
  208. int UnicodeToAnsi(
  209. WCHAR wchUnicode,
  210. char* pchAnsi,
  211. DWORD dwBufSize)
  212. {
  213. // Classic Unicode, not support surrogate in this function
  214. ASSERT(!IsValidSurrogateLeadWord(wchUnicode)
  215. && !IsValidSurrogateTailWord(wchUnicode));
  216. DWORD lAnsiLen = 0;
  217. const WORD* p;
  218. INT_PTR i;
  219. // ASCII, 0 - 0x7f
  220. if (wchUnicode <= 0x7f) {
  221. *pchAnsi = (char)wchUnicode;
  222. lAnsiLen = 1;
  223. goto Exit;
  224. }
  225. // BMP, 4 byte or 2 byte
  226. p = __lower_bound(awchAnsiDQByteBound, awchAnsiDQByteBound
  227. + sizeof(awchAnsiDQByteBound)/sizeof(WCHAR), wchUnicode);
  228. if (p == awchAnsiDQByteBound
  229. + sizeof(awchAnsiDQByteBound)/sizeof(WCHAR)) {
  230. p --;
  231. } else if (wchUnicode < *p) {
  232. p --;
  233. } else if (wchUnicode == *p) {
  234. } else {
  235. ASSERT(FALSE);
  236. }
  237. i = p - awchAnsiDQByteBound;
  238. ASSERT(i >= 0);
  239. // Stop when >= *(((PWORD)asAnsi2ByteArea) + i);
  240. if (i%2) { // Odd, in 2 bytes area
  241. WORD wAnsi = UnicodeToDByteAnsi(wchUnicode);
  242. if (wAnsi && dwBufSize >= 2) {
  243. *(UNALIGNED WORD*)pchAnsi = wAnsi;
  244. lAnsiLen = 2;
  245. } else {
  246. lAnsiLen = 0;
  247. }
  248. } else { // Duel, in 4 bytes area
  249. DWORD dwAnsi = UnicodeToQByteAnsi
  250. ((int)i/2, wchUnicode - awchAnsiDQByteBound[i]);
  251. if (dwAnsi && dwBufSize >= 4) {
  252. *(UNALIGNED DWORD*)pchAnsi = dwAnsi;
  253. lAnsiLen = 4;
  254. } else {
  255. lAnsiLen = 0;
  256. }
  257. }
  258. Exit:
  259. return lAnsiLen;
  260. }
  261. // 4 bytes Unicode (Surrogate)
  262. // Return Ansi str length, when success, should be 4
  263. // return 0 means fail (Buffer overflow)
  264. int SurrogateToAnsi(
  265. PCWCH pwchUnicode,
  266. PCHAR pchAnsi,
  267. DWORD dwBufSize)
  268. {
  269. ASSERT(IsValidSurrogateLeadWord(pwchUnicode[0]));
  270. ASSERT(IsValidSurrogateTailWord(pwchUnicode[1]));
  271. // dwOffset is ISO char code - 0x10000
  272. DWORD dwOffset = ((pwchUnicode[0] - cg_wchSurrogateLeadWordLow)<<10)
  273. + (pwchUnicode[1] - cg_wchSurrogateTailWordLow)
  274. + 0x10000 - 0x10000;
  275. if (dwBufSize < 4) {
  276. return 0;
  277. }
  278. *(UNALIGNED DWORD*)pchAnsi = QByteAnsiBaseAddOffset
  279. (cg_dwQByteAnsiToSurrogateStart, dwOffset);
  280. ReverseQBytesOrder((PBYTE)pchAnsi);
  281. return 4;
  282. }
  283. // API: high level service for Unicode to Ansi
  284. // return result Ansi str length (in byte)
  285. // return -1 means fail (Buffer overflow, internal error, etc.)
  286. int UnicodeStrToAnsiStr(
  287. PCWCH pwchUnicodeStr,
  288. int ncUnicodeStr, // in WCHAR
  289. PCHAR pchAnsiStrBuf,
  290. int ncAnsiStrBufSize) // in BYTE
  291. {
  292. int ncAnsiStr = 0;
  293. int ncAnsiCharSize;
  294. for (int i = 0; i < ncUnicodeStr; i++, pwchUnicodeStr++) {
  295. if (ncAnsiStr > (ncAnsiStrBufSize-4)) {
  296. // Buffer overflow
  297. break;
  298. }
  299. if (IsValidSurrogateLeadWord(pwchUnicodeStr[0])) {
  300. if ((i+1 < ncUnicodeStr)
  301. && (IsValidSurrogateTailWord(pwchUnicodeStr[1]))) {
  302. ncAnsiCharSize = SurrogateToAnsi(pwchUnicodeStr, pchAnsiStrBuf, 4);
  303. ASSERT(ncAnsiCharSize == 4);
  304. if (ncAnsiCharSize == 0) {
  305. ASSERT(FALSE);
  306. break;
  307. }
  308. ncAnsiStr += ncAnsiCharSize;
  309. pchAnsiStrBuf += ncAnsiCharSize;
  310. pwchUnicodeStr++;
  311. i++;
  312. } else {
  313. // Invalide Uncode char, skip
  314. }
  315. } else if (*pwchUnicodeStr == 0) {
  316. *pchAnsiStrBuf = 0;
  317. pchAnsiStrBuf ++;
  318. ncAnsiStr ++;
  319. } else {
  320. ncAnsiCharSize = UnicodeToAnsi(*pwchUnicodeStr, pchAnsiStrBuf, 4);
  321. if (ncAnsiCharSize == 0) {
  322. ASSERT(FALSE);
  323. break;
  324. }
  325. pchAnsiStrBuf += ncAnsiCharSize;
  326. ncAnsiStr += ncAnsiCharSize;
  327. }
  328. }
  329. if (i < ncUnicodeStr) { return -1; }
  330. return ncAnsiStr;
  331. }
  332. // ****************************************************************************
  333. // Third part, Ansi to Unicode
  334. // ****************************************************************************
  335. // Return Unicode number (number always equal to 1 when success)
  336. // return 0 if can't find corresponding Unicode
  337. // -1 means fail (internal error, etc.)
  338. int QByteAnsiToSingleUnicode(
  339. DWORD dwAnsi,
  340. PWCH pwchUnicode)
  341. {
  342. const DWORD* p;
  343. INT_PTR i;
  344. // 0x8431a439(cg_dwQByteAnsiToBMPLast) to 0x85308130 haven't Unicode corresponding
  345. // 0x85308130 to 0x90308130(cg_dwQByteAnsiToSurrogateStart) are reserved zone,
  346. // haven't Unicode corresponding
  347. if (dwAnsi > cg_dwQByteAnsiToBMPLast) {
  348. return 0;
  349. }
  350. // Invalid input value
  351. if (dwAnsi < adwAnsiQBytesAreaStartValue[0]) {
  352. return -1;
  353. }
  354. p = __lower_bound(adwAnsiQBytesAreaStartValue,
  355. adwAnsiQBytesAreaStartValue + sizeof(adwAnsiQBytesAreaStartValue)/sizeof(DWORD),
  356. dwAnsi);
  357. if (p == adwAnsiQBytesAreaStartValue
  358. + sizeof(adwAnsiQBytesAreaStartValue)/sizeof(DWORD)) {
  359. p --;
  360. } else if (dwAnsi < *p) {
  361. p --;
  362. } else if (dwAnsi == *p) {
  363. } else {
  364. ASSERT(FALSE);
  365. }
  366. i = p - adwAnsiQBytesAreaStartValue;
  367. if (i < 0) {
  368. ASSERT(i >= 0);
  369. return -1;
  370. }
  371. *pwchUnicode = awchAnsiDQByteBound[2*i] + CalcuDistanceOfQByteAnsi(dwAnsi, *p);
  372. #ifdef _DEBUG
  373. {
  374. int nAnsiCharDistance = CalcuDistanceOfQByteAnsi(dwAnsi, *p);
  375. ASSERT(nAnsiCharDistance >= 0);
  376. WCHAR wchUnicodeDbg;
  377. if ((p+1) < adwAnsiQBytesAreaStartValue
  378. + sizeof(adwAnsiQBytesAreaStartValue)/sizeof(DWORD)) {
  379. nAnsiCharDistance = CalcuDistanceOfQByteAnsi(dwAnsi, *(p+1));
  380. wchUnicodeDbg = awchAnsiDQByteBound[2*i+1] + nAnsiCharDistance;
  381. } else if ((p+1) == adwAnsiQBytesAreaStartValue
  382. + sizeof(adwAnsiQBytesAreaStartValue)/sizeof(DWORD)) {
  383. nAnsiCharDistance = CalcuDistanceOfQByteAnsi(dwAnsi, 0x8431A530);
  384. wchUnicodeDbg = 0x10000 + nAnsiCharDistance;
  385. } else {
  386. ASSERT(FALSE);
  387. }
  388. ASSERT(nAnsiCharDistance < 0);
  389. ASSERT(wchUnicodeDbg == *pwchUnicode);
  390. }
  391. #endif
  392. return 1;
  393. }
  394. // Return Unicode number (number always 2 when success)
  395. // return 0 if can't find corresponding Unicode
  396. int QByteAnsiToDoubleUnicode(
  397. DWORD dwAnsi,
  398. PWCH pwchUnicode)
  399. {
  400. int nDistance = CalcuDistanceOfQByteAnsi(dwAnsi, cg_dwQByteAnsiToSurrogateStart);
  401. ASSERT (nDistance >= 0);
  402. if (nDistance >= 0x100000) {
  403. return 0;
  404. }
  405. pwchUnicode[1] = nDistance % 0x400 + 0xDC00;
  406. pwchUnicode[0] = nDistance / 0x400 + 0xD800;
  407. return 2;
  408. }
  409. // Return Unicode number (1 or 2 when success)
  410. // return 0 if can't find corresponding Unicode
  411. // return -1 if fail (Buffer overflow, invalid GB char code input,
  412. // internal error, etc.)
  413. int QByteAnsiToUnicode(
  414. const BYTE* pbyAnsiChar,
  415. PWCH pwchUnicode,
  416. DWORD dwBufLen) // In WCHAR
  417. {
  418. DWORD dwAnsi;
  419. int nLen = -1;
  420. if ( IsValidQByteAnsiLeadByte(pbyAnsiChar[0])
  421. && IsValidQByteAnsiTailByte(pbyAnsiChar[1])
  422. && IsValidQByteAnsiLeadByte(pbyAnsiChar[2])
  423. && IsValidQByteAnsiTailByte(pbyAnsiChar[3])) {
  424. } else {
  425. return -1; // Invalid char
  426. }
  427. dwAnsi = *(UNALIGNED DWORD*)pbyAnsiChar;
  428. ReverseQBytesOrder((PBYTE)(&dwAnsi));
  429. if (dwAnsi >= cg_dwQByteAnsiToSurrogateStart) {
  430. if (dwBufLen >= 2) {
  431. nLen = QByteAnsiToDoubleUnicode(dwAnsi, pwchUnicode);
  432. }
  433. } else {
  434. if (dwBufLen >= 1) {
  435. nLen = QByteAnsiToSingleUnicode(dwAnsi, pwchUnicode);
  436. }
  437. }
  438. return nLen;
  439. }
  440. // Unicode to double bytes Ansi char
  441. // Return: Unicode char code, 0 means fail (internal error, etc.)
  442. WCHAR DByteAnsiToUnicode(
  443. const BYTE* pbyAnsi)
  444. {
  445. WORD wAnsi = *(UNALIGNED WORD*)pbyAnsi;
  446. int cLen = 1;
  447. WCHAR wchUnicode;
  448. // Code changed from GBK to GB18030, or code not compatible
  449. // from CP936 to CP54936
  450. for (int i = 0; i < sizeof(asAnsiCodeChanged)/sizeof(SAnsiCodeChanged); i++) {
  451. if (wAnsi == asAnsiCodeChanged[i].wchAnsiNew) {
  452. wchUnicode = asAnsiCodeChanged[i].wchUnicode;
  453. goto Exit;
  454. }
  455. }
  456. // Not in Changed code list, that is same with GBK, or CP936
  457. // (Most DByte Ansi char code should compatible from GBK to GB18030)
  458. cLen = MultiByteToWideChar(936, MB_PRECOMPOSED,
  459. (PCCH)pbyAnsi, 2, &wchUnicode, 1);
  460. if (cLen != 1) {
  461. wchUnicode = 0;
  462. }
  463. Exit:
  464. return wchUnicode;
  465. }
  466. // API: High level service for Ansi to Unicode
  467. // return Unicode str length (in WCHAR)
  468. // return -1 means fail (Buffer overflow, etc.)
  469. int AnsiStrToUnicodeStr(
  470. const BYTE* pbyAnsiStr,
  471. int ncAnsiStrSize, // In char
  472. PWCH pwchUnicodeBuf,
  473. int ncBufLen) // In WCHAR
  474. {
  475. int nCharLen;
  476. int ncUnicodeBuf = 0;
  477. for (int i = 0; i < ncAnsiStrSize; ) {
  478. if (ncUnicodeBuf > (ncBufLen-4)) {
  479. // Buffer overflow
  480. break;
  481. }
  482. // 1 byte Ansi char
  483. if (*pbyAnsiStr < 0x80) {
  484. *pwchUnicodeBuf = (WCHAR)*pbyAnsiStr;
  485. pwchUnicodeBuf ++;
  486. ncUnicodeBuf ++;
  487. i++;
  488. pbyAnsiStr++;
  489. // 2 byte Ansi char
  490. } else if ((i+1 < ncAnsiStrSize) && pbyAnsiStr[1] >= 0x40) {
  491. *pwchUnicodeBuf = DByteAnsiToUnicode(pbyAnsiStr);
  492. if (*pwchUnicodeBuf == 0) {
  493. *pwchUnicodeBuf = '?';
  494. }
  495. pwchUnicodeBuf ++;
  496. ncUnicodeBuf ++;
  497. i += 2;
  498. pbyAnsiStr += 2;
  499. // 4 byte Ansi char
  500. } else if ((i+3 < ncAnsiStrSize)
  501. && IsValidQByteAnsiLeadByte(pbyAnsiStr[0])
  502. && IsValidQByteAnsiTailByte(pbyAnsiStr[1])
  503. && IsValidQByteAnsiLeadByte(pbyAnsiStr[2])
  504. && IsValidQByteAnsiTailByte(pbyAnsiStr[3])) {
  505. // QByte GB char
  506. nCharLen = QByteAnsiToUnicode(pbyAnsiStr, pwchUnicodeBuf, 4);
  507. if (nCharLen < 0) {
  508. ASSERT(FALSE); // Invalid Ansi char input, or buffer overflow, etc.
  509. // Should never happen but an internal error
  510. break;
  511. } else if (nCharLen == 0) { // hasn't corresponding Unicode Char
  512. *pwchUnicodeBuf = '?';
  513. pwchUnicodeBuf ++;
  514. ncUnicodeBuf ++;
  515. } else if (nCharLen > 0) {
  516. ASSERT(nCharLen <= 2);
  517. pwchUnicodeBuf += nCharLen;
  518. ncUnicodeBuf += nCharLen;
  519. } else {
  520. ASSERT(FALSE);
  521. }
  522. i += 4;
  523. pbyAnsiStr += 4;
  524. // Invalid Ansi char
  525. } else {
  526. // Invalid
  527. i++;
  528. pbyAnsiStr++;
  529. }
  530. }
  531. if (i < ncAnsiStrSize) { return -1; }
  532. return ncUnicodeBuf;
  533. }
  534. // ******************************************************
  535. // Testing program
  536. // ******************************************************
  537. /*
  538. "\u0080", <0x81;0x30;0x81;0x30>
  539. "\u00A3", <0x81;0x30;0x84;0x35>
  540. "\u00A4", <0xA1;0xE8>
  541. "\u00A5", <0x81;0x30;0x84;0x36>
  542. "\u00A6", <0x81;0x30;0x84;0x37>
  543. "\u00A7", <0xA1;0xEC>
  544. "\u00A8", <0xA1;0xA7>
  545. "\u00A9", <0x81;0x30;0x84;0x38>
  546. "\u00AF", <0x81;0x30;0x85;0x34>
  547. "\u00B0", <0xA1;0xE3>
  548. "\u00B1", <0xA1;0xC0>
  549. "\u00B2", <0x81;0x30;0x85;0x35>
  550. {0x20AC, 0xe3a2},
  551. {0x01f9, 0xbfa8},
  552. {0x303e, 0x89a9},
  553. {0x2ff0, 0x8aa9},
  554. {0x2ff1, 0x8ba9},
  555. 50EF 836A
  556. 50F0 836B
  557. 50F1 836C
  558. 50F2 836D
  559. */
  560. #if 0
  561. int test (void)
  562. {
  563. const WCHAR awchUnicodeStr[] = {0x01, 0x7f, 0x80, 0x81, 0x82,
  564. 0xa2,
  565. 0xa3, // 0x81;0x30;0x84;0x35
  566. 0xa4, // 0xA1;0xE8
  567. 0xa5, // 0x81;0x30;0x84;0x36
  568. 0xa6, // 0x81;0x30;0x84;0x37
  569. 0xaf, // 0x81;0x30;0x85;0x34
  570. 0xb0, // 0xA1;0xE3
  571. 0xb1, // 0xA1;0xC0
  572. 0xb6, // 0x81;0x30;0x85;0x39
  573. 0xb7, // 0xA1;0xA4
  574. // Some normal DByte Ansi char
  575. 0x50ef, // 0x83, 0x6A
  576. 0x50f2, // 0x83, 0x6D
  577. // Some ansi char code changed in new standard
  578. 0x20ac, // 0xa2, 0xe3
  579. 0xE76C, // not (0xa2, 0xe3), should some QByte char
  580. 0x2ff0, // 0xa9, 0x8A
  581. 0x2ff1, // 0xa9, 0x8B
  582. 0x4723, // 0xFE, 0x80
  583. // Ansi char arround DC00 to E000
  584. 0xd7ff, // 0x83, 0x36, 0xC7, 0x38
  585. 0xe76c, // 0x83, 0x36, 0xC7, 0x39
  586. 0xE76B, // 0xA2, 0xB0
  587. 0xffff, // 0x84, 0x31, 0xa4, 0x39,
  588. 0x00};
  589. char* pchAnsiStr = new char[sizeof(awchUnicodeStr)*2+5];
  590. UnicodeStrToAnsiStr(awchUnicodeStr, sizeof(awchUnicodeStr)/sizeof(WCHAR),
  591. pchAnsiStr, sizeof(awchUnicodeStr)*2+5);
  592. delete[] pchAnsiStr;
  593. BYTE abyAnsiStr2[] = {
  594. 0x81, 0x30, 0x81, 0x30,
  595. 0x81, 0x30, 0x84, 0x35,
  596. 0xA1, 0xE8,
  597. 0x81, 0x30, 0x84, 0x36,
  598. 0x81, 0x30, 0x84, 0x37,
  599. 0xA1, 0xEC,
  600. 0xA1, 0xA7,
  601. 0x81, 0x30, 0x84, 0x38,
  602. 0x81, 0x30, 0x85, 0x34,
  603. 0xA1, 0xE3,
  604. 0xA1, 0xC0,
  605. 0x81, 0x30, 0x85, 0x35,
  606. // Testing D800 to DE00
  607. 0x82, 0x35, 0x8f, 0x33, // 0x9FA6
  608. 0x83, 0x36, 0xC7, 0x38, // 0xD7FF
  609. 0xA2, 0xB0, // 0xE76B
  610. 0x83, 0x36, 0xC7, 0x39, // 0xE76C
  611. // Testing last char in BMP
  612. 0x84, 0x31, 0xa4, 0x39, // 0xFFFF
  613. // Some char code changed in new GB standard
  614. 0xa2, 0xe3, // 0x20AC,
  615. 0xa8, 0xbf, // 0x01f9,
  616. 0xa9, 0x89, // 0x303e,
  617. 0xa9, 0x8a, // 0x2ff0,
  618. 0xa9, 0x8b, // 0x2ff1,
  619. 0xFE, 0x9F, // 0x4dae
  620. 0x83, 0x6A, // 50EF
  621. 0x83, 0x6B, // 50F0
  622. 0x83, 0x6C, // 50F1
  623. 0x83, 0x6D // 50F2
  624. };
  625. WCHAR* pwchUnicodeStr2 = new WCHAR[sizeof(abyAnsiStr2)+3];
  626. AnsiStrToUnicodeStr(abyAnsiStr2, sizeof(abyAnsiStr2),
  627. pwchUnicodeStr2, sizeof(abyAnsiStr2)+3);
  628. delete[] pwchUnicodeStr2;
  629. return 0;
  630. }
  631. #endif