Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1427 lines
43 KiB

  1. /*++
  2. Copyright (c) 1991-1999, Microsoft Corporation All rights reserved.
  3. Module Name:
  4. c_is2022.c
  5. Abstract:
  6. This file contains the main functions for this module.
  7. External Routines in this file:
  8. DllEntry
  9. NlsDllCodePageTranslation
  10. Revision History:
  11. 10-30-96 JulieB Created.
  12. --*/
  13. ////////////////////////////////////////////////////////////////////////////
  14. //
  15. // 50220 ISO-2022-JP Japanese JIS X 0202-1984 with no halfwidth Katakana
  16. // 50221 ISO-2022-JP Japanese JIS X 0202-1984 with <ESC>(I for halfwidth Katakana
  17. // 50222 ISO-2022-JP Japanese JIS X 0201-1989 with <ESC>(J+SO for halfwidth Katakana
  18. // ;RFC 1468
  19. //
  20. // 50225 ISO-2022-KR Korean KSC-5601-1987 ;RFC 1557
  21. //
  22. // 50227 ISO 2022-CN Traditional Chinese ;RFC 1922:CNS-11643-1,CNS-11643-2
  23. // 50229 ISO 2022-CN Simplified Chinese ;RFC 1922:GB-2312-80
  24. //
  25. // 52936 HZ-GB2312 Simplified Chinese
  26. //
  27. ////////////////////////////////////////////////////////////////////////////
  28. //
  29. // Include Files.
  30. //
  31. #include <share.h>
  32. //
  33. // Macro Definitions.
  34. //
  35. #define NLS_CODEPAGE(cp) (NLS_CP[(cp) % 10])
  36. #define SHIFT_OUT ((BYTE)0x0E)
  37. #define SHIFT_IN ((BYTE)0x0F)
  38. #define ESCAPE ((BYTE)0x1B)
  39. #define LEADBYTE_HALFWIDTH ((BYTE)0x8E)
  40. #define MODE_ASCII 11
  41. #define MODE_HALFWIDTH_KATAKANA 0
  42. #define MODE_JIS_0208 1
  43. #define MODE_JIS_0212 2
  44. #define MODE_KSC_5601 5
  45. #define MODE_HZ 6
  46. #define MODE_GB_2312 7
  47. #define MODE_CNS_11643_1 9
  48. #define MODE_CNS_11643_2 10
  49. //
  50. // Global Variables.
  51. //
  52. DWORD NLS_CP[] =
  53. {
  54. 20932, // 50220 ISO-2022-JP, MODE_HALFWIDTH_KATAKANA
  55. 20932, // 50221 ISO-2022-JP, MODE_JIS_0208
  56. 20932, // 50222 ISO-2022-JP, MODE_JIS_0212
  57. 0,
  58. 0,
  59. 20949, // 50225 ISO-2022-KR, MODE_KSC_5601
  60. 20936, // 52936 HZ-GB2312, MODE_HZ
  61. 20936, // 50227 ISO-2022-CN, MODE_GB_2312
  62. 0,
  63. 20000, // 50229 ISO-2022-CN, MODE_CNS_11643_1
  64. 20000, // 50229 ISO-2022-CN, MODE_CNS_11643_2
  65. 0 // MODE_ASCII
  66. };
  67. //
  68. // Forward Declarations.
  69. //
  70. DWORD
  71. ParseMB_CP5022J(
  72. DWORD CodePage,
  73. LPSTR lpMultiByteStr,
  74. int cchMultiByte,
  75. LPSTR lpMBNoEscStr,
  76. int cchMBCount);
  77. DWORD
  78. ParseMB_CP5022_579(
  79. DWORD CodePage,
  80. LPSTR lpMultiByteStr,
  81. int cchMultiByte,
  82. LPSTR lpMBNoEscStr,
  83. int cchMBCount);
  84. DWORD
  85. ParseMB_CP52936(
  86. LPSTR lpMultiByteStr,
  87. int cchMultiByte,
  88. LPSTR lpMBNoEscStr,
  89. int cchMBCount);
  90. DWORD
  91. MBToWC_CP5022X(
  92. LPSTR lpMultiByteStr,
  93. int cchMultiByte,
  94. LPWSTR lpWideCharStr,
  95. int cchWideChar);
  96. DWORD
  97. MBToWC_CP52936(
  98. LPSTR lpMultiByteStr,
  99. int cchMultiByte,
  100. LPWSTR lpWideCharStr,
  101. int cchWideChar);
  102. //-------------------------------------------------------------------------//
  103. // DLL ENTRY POINT //
  104. //-------------------------------------------------------------------------//
  105. ////////////////////////////////////////////////////////////////////////////
  106. //
  107. // DllEntry
  108. //
  109. // DLL Entry initialization procedure.
  110. //
  111. // 10-30-96 JulieB Created.
  112. ////////////////////////////////////////////////////////////////////////////
  113. BOOL DllEntry(
  114. HANDLE hModule,
  115. DWORD dwReason,
  116. LPVOID lpRes)
  117. {
  118. switch (dwReason)
  119. {
  120. case ( DLL_THREAD_ATTACH ) :
  121. {
  122. return (TRUE);
  123. }
  124. case ( DLL_THREAD_DETACH ) :
  125. {
  126. return (TRUE);
  127. }
  128. case ( DLL_PROCESS_ATTACH ) :
  129. {
  130. return (TRUE);
  131. }
  132. case ( DLL_PROCESS_DETACH ) :
  133. {
  134. return (TRUE);
  135. }
  136. }
  137. return (FALSE);
  138. hModule;
  139. lpRes;
  140. }
  141. //-------------------------------------------------------------------------//
  142. // EXTERNAL ROUTINES //
  143. //-------------------------------------------------------------------------//
  144. ////////////////////////////////////////////////////////////////////////////
  145. //
  146. // NlsDllCodePageTranslation
  147. //
  148. // This routine is the main exported procedure for the functionality in
  149. // this DLL. All calls to this DLL must go through this function.
  150. //
  151. // 10-30-96 JulieB Created.
  152. ////////////////////////////////////////////////////////////////////////////
  153. DWORD NlsDllCodePageTranslation(
  154. DWORD CodePage,
  155. DWORD dwFlags,
  156. LPSTR lpMultiByteStr,
  157. int cchMultiByte,
  158. LPWSTR lpWideCharStr,
  159. int cchWideChar,
  160. LPCPINFO lpCPInfo)
  161. {
  162. DWORD NlsCodePage = NLS_CODEPAGE(CodePage);
  163. if (!IsValidCodePage(NlsCodePage))
  164. {
  165. SetLastError(ERROR_INVALID_PARAMETER);
  166. return (0);
  167. }
  168. switch (dwFlags)
  169. {
  170. case ( NLS_CP_CPINFO ) :
  171. {
  172. memset(lpCPInfo, 0, sizeof(CPINFO));
  173. lpCPInfo->MaxCharSize = 5;
  174. lpCPInfo->DefaultChar[0] = 0x3f;
  175. //
  176. // The lead-byte does not apply here, leave them all NULL.
  177. //
  178. return (TRUE);
  179. }
  180. case ( NLS_CP_MBTOWC ) :
  181. {
  182. if (cchMultiByte == -1)
  183. {
  184. cchMultiByte = strlen(lpMultiByteStr) + 1;
  185. }
  186. switch (CodePage)
  187. {
  188. case (50220) :
  189. case (50221) :
  190. case (50222) :
  191. case (50225) :
  192. case (50227) :
  193. case (50229) :
  194. {
  195. return (MBToWC_CP5022X( lpMultiByteStr,
  196. cchMultiByte,
  197. lpWideCharStr,
  198. cchWideChar ));
  199. }
  200. case (52936) :
  201. {
  202. return (MBToWC_CP52936( lpMultiByteStr,
  203. cchMultiByte,
  204. lpWideCharStr,
  205. cchWideChar ));
  206. }
  207. }
  208. break;
  209. }
  210. case ( NLS_CP_WCTOMB ) :
  211. {
  212. int cchMBCount;
  213. LPSTR lpMBNoEscStr;
  214. if (cchWideChar == -1)
  215. {
  216. cchWideChar = wcslen(lpWideCharStr) + 1;
  217. }
  218. lpMBNoEscStr = (LPSTR)NLS_ALLOC_MEM(cchWideChar * sizeof(WCHAR));
  219. if (lpMBNoEscStr == NULL)
  220. {
  221. SetLastError(ERROR_OUTOFMEMORY);
  222. return (0);
  223. }
  224. cchMBCount = WideCharToMultiByte( NlsCodePage,
  225. WC_NO_BEST_FIT_CHARS,
  226. lpWideCharStr,
  227. cchWideChar,
  228. lpMBNoEscStr,
  229. cchWideChar * sizeof(WCHAR),
  230. NULL,
  231. NULL );
  232. if (cchMBCount != 0)
  233. {
  234. switch (CodePage)
  235. {
  236. case (50220) :
  237. case (50221) :
  238. case (50222) :
  239. {
  240. cchMBCount = ParseMB_CP5022J( CodePage,
  241. lpMultiByteStr,
  242. cchMultiByte,
  243. lpMBNoEscStr,
  244. cchMBCount );
  245. break;
  246. }
  247. case (50225) :
  248. case (50227) :
  249. case (50229) :
  250. {
  251. cchMBCount = ParseMB_CP5022_579( CodePage,
  252. lpMultiByteStr,
  253. cchMultiByte,
  254. lpMBNoEscStr,
  255. cchMBCount );
  256. break;
  257. }
  258. case (52936) :
  259. {
  260. cchMBCount = ParseMB_CP52936( lpMultiByteStr,
  261. cchMultiByte,
  262. lpMBNoEscStr,
  263. cchMBCount );
  264. break;
  265. }
  266. }
  267. }
  268. NLS_FREE_MEM (lpMBNoEscStr);
  269. return (cchMBCount);
  270. }
  271. }
  272. //
  273. // This shouldn't happen since this function gets called by
  274. // the NLS API routines.
  275. //
  276. SetLastError(ERROR_INVALID_PARAMETER);
  277. return (0);
  278. }
  279. //-------------------------------------------------------------------------//
  280. // INTERNAL ROUTINES //
  281. //-------------------------------------------------------------------------//
  282. ////////////////////////////////////////////////////////////////////////////
  283. //
  284. // MBToWC_CP5022X
  285. //
  286. // This routine does the translations from ISO-2022 to Unicode.
  287. //
  288. ////////////////////////////////////////////////////////////////////////////
  289. DWORD MBToWC_CP5022X(
  290. LPSTR lpMultiByteStr,
  291. int cchMultiByte,
  292. LPWSTR lpWideCharStr,
  293. int cchWideChar)
  294. {
  295. int ctr, cchMBTemp = 0, cchWCCount = 0;
  296. LPSTR lpMBTempStr, lpMBNoEscStr, lpMBStrStart;
  297. WORD wMode, wModePrev, wModeSO;
  298. LPWSTR lpWCTempStr;
  299. int rc;
  300. //
  301. // Allocate a buffer of the appropriate size.
  302. // Use sizeof(WCHAR) because size could potentially double if
  303. // the buffer contains all halfwidth Katakanas.
  304. //
  305. lpMBStrStart = (LPSTR)NLS_ALLOC_MEM(cchMultiByte * sizeof(WCHAR));
  306. if (lpMBStrStart == NULL)
  307. {
  308. SetLastError(ERROR_OUTOFMEMORY);
  309. return (0);
  310. }
  311. lpWCTempStr = (LPWSTR)NLS_ALLOC_MEM(cchMultiByte * sizeof(WCHAR));
  312. if (lpWCTempStr == NULL)
  313. {
  314. SetLastError(ERROR_OUTOFMEMORY);
  315. return (0);
  316. }
  317. if (cchWideChar)
  318. {
  319. *lpWideCharStr = 0;
  320. }
  321. lpMBTempStr = lpMBNoEscStr = lpMBStrStart;
  322. wModePrev = wMode = wModeSO = MODE_ASCII;
  323. //
  324. // Remove esc sequence, then convert to Unicode.
  325. //
  326. for (ctr = 0; ctr < cchMultiByte;)
  327. {
  328. if ((BYTE)lpMultiByteStr[ctr] == ESCAPE)
  329. {
  330. wMode = wModeSO = MODE_ASCII;
  331. if (ctr >= (cchMultiByte - 2))
  332. {
  333. //
  334. // Incomplete escape sequence.
  335. //
  336. }
  337. else if ((BYTE)lpMultiByteStr[ctr + 1] == '(')
  338. {
  339. if ((BYTE)lpMultiByteStr[ctr + 2] == 'B') // <esc>(B
  340. {
  341. wMode = wModeSO = MODE_ASCII;
  342. ctr += 3;
  343. }
  344. else if ((BYTE)lpMultiByteStr[ctr + 2] == 'J') // <esc>(J
  345. {
  346. wMode = MODE_ASCII;
  347. wModeSO = MODE_HALFWIDTH_KATAKANA;
  348. ctr += 3;
  349. }
  350. else if ((BYTE)lpMultiByteStr[ctr + 2] == 'I') // <esc>(I
  351. {
  352. wMode = wModeSO = MODE_HALFWIDTH_KATAKANA;
  353. ctr += 3;
  354. }
  355. }
  356. else if ((BYTE)lpMultiByteStr[ctr + 1] == '$')
  357. {
  358. if (((BYTE)lpMultiByteStr[ctr + 2] == '@') || // <esc>$@
  359. ((BYTE)lpMultiByteStr[ctr + 2] == 'B')) // <esc>$B
  360. {
  361. wMode = wModeSO = MODE_JIS_0208;
  362. ctr += 3;
  363. }
  364. else
  365. {
  366. if (ctr >= (cchMultiByte - 3))
  367. {
  368. //
  369. // Imcomplete escape sequence.
  370. //
  371. }
  372. else if ((BYTE)lpMultiByteStr[ctr + 2] == '(')
  373. {
  374. if (((BYTE)lpMultiByteStr[ctr + 3] == '@') || // <esc>$(@
  375. ((BYTE)lpMultiByteStr[ctr + 3] == 'B')) // <esc>$(B
  376. {
  377. wMode = wModeSO = MODE_JIS_0208;
  378. ctr += 4;
  379. }
  380. else if ((BYTE)lpMultiByteStr[ctr + 3] == 'D') // <esc>$(D
  381. {
  382. wMode = wModeSO = MODE_JIS_0212;
  383. ctr += 4;
  384. }
  385. }
  386. else if ((BYTE)lpMultiByteStr[ctr + 2] == ')')
  387. {
  388. if ((BYTE)lpMultiByteStr[ctr + 3] == 'C') // <esc>$)C
  389. {
  390. wMode = wModeSO = MODE_KSC_5601;
  391. ctr += 4;
  392. }
  393. else if ((BYTE)lpMultiByteStr[ctr + 3] == 'A') // <esc>$)A
  394. {
  395. wMode = wModeSO = MODE_GB_2312;
  396. ctr += 4;
  397. }
  398. else if ((BYTE)lpMultiByteStr[ctr + 3] == 'G') // <esc>$)G
  399. {
  400. wMode = wModeSO = MODE_CNS_11643_1;
  401. ctr += 4;
  402. }
  403. }
  404. else if (((BYTE)lpMultiByteStr[ctr + 2] == '*') && // <esc>$*H
  405. ((BYTE)lpMultiByteStr[ctr + 3] == 'H'))
  406. {
  407. wMode = wModeSO = MODE_CNS_11643_2;
  408. ctr += 4;
  409. }
  410. }
  411. }
  412. else if (lpMultiByteStr[ctr + 1] == '&')
  413. {
  414. if (ctr >= (cchMultiByte - 5))
  415. {
  416. //
  417. // Incomplete escape sequence.
  418. //
  419. }
  420. else if (((BYTE)lpMultiByteStr[ctr + 2] == '@') &&
  421. ((BYTE)lpMultiByteStr[ctr + 3] == ESCAPE) &&
  422. ((BYTE)lpMultiByteStr[ctr + 4] == '$') &&
  423. ((BYTE)lpMultiByteStr[ctr + 5] == 'B'))
  424. {
  425. wMode = wModeSO = MODE_JIS_0208;
  426. ctr += 6;
  427. }
  428. }
  429. }
  430. else if ((BYTE)lpMultiByteStr[ctr] == SHIFT_OUT)
  431. {
  432. wMode = wModeSO;
  433. ctr++;
  434. }
  435. else if ((BYTE)lpMultiByteStr[ctr] == SHIFT_IN)
  436. {
  437. wMode = MODE_ASCII;
  438. ctr++;
  439. }
  440. switch (wMode)
  441. {
  442. case ( MODE_JIS_0208 ) :
  443. case ( MODE_KSC_5601 ) :
  444. case ( MODE_GB_2312 ) :
  445. case ( MODE_CNS_11643_1 ) :
  446. {
  447. //
  448. // To handle errors, we need to check:
  449. // 1. if trailbyte is there
  450. // 2. if code is valid
  451. //
  452. while (ctr < cchMultiByte && lpMultiByteStr[ctr] == SHIFT_OUT)
  453. {
  454. ctr++;
  455. }
  456. while ((ctr < (cchMultiByte - 1)) &&
  457. (lpMultiByteStr[ctr] != ESCAPE) &&
  458. (lpMultiByteStr[ctr] != SHIFT_IN))
  459. {
  460. *lpMBTempStr++ = lpMultiByteStr[ctr++] | 0x80;
  461. *lpMBTempStr++ = lpMultiByteStr[ctr++] | 0x80;
  462. cchMBTemp += 2;
  463. }
  464. break;
  465. }
  466. case ( MODE_JIS_0212 ) :
  467. case ( MODE_CNS_11643_2 ) :
  468. {
  469. while (ctr < cchMultiByte && lpMultiByteStr[ctr] == SHIFT_OUT)
  470. {
  471. ctr++;
  472. }
  473. while ((ctr < (cchMultiByte - 1)) &&
  474. (lpMultiByteStr[ctr] != ESCAPE) &&
  475. (lpMultiByteStr[ctr] != SHIFT_IN))
  476. {
  477. *lpMBTempStr++ = lpMultiByteStr[ctr++] | 0x80;
  478. *lpMBTempStr++ = lpMultiByteStr[ctr++];
  479. cchMBTemp += 2;
  480. }
  481. break;
  482. }
  483. case ( MODE_HALFWIDTH_KATAKANA ) :
  484. {
  485. while (ctr < cchMultiByte && lpMultiByteStr[ctr] == SHIFT_OUT)
  486. {
  487. ctr++;
  488. }
  489. while ((ctr < cchMultiByte) &&
  490. (lpMultiByteStr[ctr] != ESCAPE) &&
  491. (lpMultiByteStr[ctr] != SHIFT_IN))
  492. {
  493. *lpMBTempStr++ = (BYTE)0x8E;
  494. *lpMBTempStr++ = lpMultiByteStr[ctr++] | 0x80;
  495. cchMBTemp += 2;
  496. }
  497. break;
  498. }
  499. default : // MODE_ASCII
  500. {
  501. while (ctr < cchMultiByte && lpMultiByteStr[ctr] == SHIFT_IN)
  502. {
  503. ctr++;
  504. }
  505. while ((ctr < cchMultiByte) &&
  506. (lpMultiByteStr[ctr] != ESCAPE) &&
  507. (lpMultiByteStr[ctr] != SHIFT_OUT))
  508. {
  509. *lpMBTempStr++ = lpMultiByteStr[ctr++];
  510. cchMBTemp++;
  511. }
  512. }
  513. }
  514. if (cchMBTemp == 0)
  515. {
  516. break;
  517. }
  518. rc = MultiByteToWideChar( NLS_CP[wMode],
  519. 0,
  520. lpMBNoEscStr,
  521. cchMBTemp,
  522. lpWCTempStr,
  523. cchMultiByte );
  524. if (cchWideChar)
  525. {
  526. if ((cchWCCount + rc) > cchWideChar)
  527. {
  528. //
  529. // Output buffer is too small.
  530. //
  531. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  532. cchWCCount = 0;
  533. break;
  534. }
  535. else
  536. {
  537. memcpy( lpWideCharStr + cchWCCount,
  538. lpWCTempStr,
  539. rc * sizeof(WCHAR) );
  540. }
  541. }
  542. cchWCCount += rc;
  543. lpMBNoEscStr += cchMBTemp;
  544. cchMBTemp = 0;
  545. }
  546. //
  547. // Clean up memory allocations.
  548. //
  549. NLS_FREE_MEM(lpMBStrStart);
  550. NLS_FREE_MEM(lpWCTempStr);
  551. //
  552. // Return the result.
  553. //
  554. return (cchWCCount);
  555. }
  556. ////////////////////////////////////////////////////////////////////////////
  557. //
  558. // ParseMB_CP5022J
  559. //
  560. // --> ISO-2022-JP
  561. //
  562. // for 50220 : convert all halfwidth katakana to fullwidth
  563. // 50221 : use <esc>(I for halfwidth katakana
  564. // 50222 : use <esc>(J<SO> for halfwidth katakana
  565. //
  566. ////////////////////////////////////////////////////////////////////////////
  567. DWORD ParseMB_CP5022J(
  568. DWORD CodePage,
  569. LPSTR lpMultiByteStr,
  570. int cchMultiByte,
  571. LPSTR lpMBNoEscStr,
  572. int cchMBCount)
  573. {
  574. int ctr, cchMBTemp = 0;
  575. WORD wMode, wModeSO;
  576. LPSTR lpMBTempStr;
  577. static WORD HalfToFullWidthKanaTable[] =
  578. {
  579. 0xa1a3, // 0x8ea1 : Halfwidth Ideographic Period
  580. 0xa1d6, // 0x8ea2 : Halfwidth Opening Corner Bracket
  581. 0xa1d7, // 0x8ea3 : Halfwidth Closing Corner Bracket
  582. 0xa1a2, // 0x8ea4 : Halfwidth Ideographic Comma
  583. 0xa1a6, // 0x8ea5 : Halfwidth Katakana Middle Dot
  584. 0xa5f2, // 0x8ea6 : Halfwidth Katakana Wo
  585. 0xa5a1, // 0x8ea7 : Halfwidth Katakana Small A
  586. 0xa5a3, // 0x8ea8 : Halfwidth Katakana Small I
  587. 0xa5a5, // 0x8ea9 : Halfwidth Katakana Small U
  588. 0xa5a7, // 0x8eaa : Halfwidth Katakana Small E
  589. 0xa5a9, // 0x8eab : Halfwidth Katakana Small O
  590. 0xa5e3, // 0x8eac : Halfwidth Katakana Small Ya
  591. 0xa5e5, // 0x8ead : Halfwidth Katakana Small Yu
  592. 0xa5e7, // 0x8eae : Halfwidth Katakana Small Yo
  593. 0xa5c3, // 0x8eaf : Halfwidth Katakana Small Tu
  594. 0xa1bc, // 0x8eb0 : Halfwidth Katakana-Hiragana Prolonged Sound Mark
  595. 0xa5a2, // 0x8eb1 : Halfwidth Katakana A
  596. 0xa5a4, // 0x8eb2 : Halfwidth Katakana I
  597. 0xa5a6, // 0x8eb3 : Halfwidth Katakana U
  598. 0xa5a8, // 0x8eb4 : Halfwidth Katakana E
  599. 0xa5aa, // 0x8eb5 : Halfwidth Katakana O
  600. 0xa5ab, // 0x8eb6 : Halfwidth Katakana Ka
  601. 0xa5ad, // 0x8eb7 : Halfwidth Katakana Ki
  602. 0xa5af, // 0x8eb8 : Halfwidth Katakana Ku
  603. 0xa5b1, // 0x8eb9 : Halfwidth Katakana Ke
  604. 0xa5b3, // 0x8eba : Halfwidth Katakana Ko
  605. 0xa5b5, // 0x8ebb : Halfwidth Katakana Sa
  606. 0xa5b7, // 0x8ebc : Halfwidth Katakana Si
  607. 0xa5b9, // 0x8ebd : Halfwidth Katakana Su
  608. 0xa5bb, // 0x8ebe : Halfwidth Katakana Se
  609. 0xa5bd, // 0x8ebf : Halfwidth Katakana So
  610. 0xa5bf, // 0x8ec0 : Halfwidth Katakana Ta
  611. 0xa5c1, // 0x8ec1 : Halfwidth Katakana Ti
  612. 0xa5c4, // 0x8ec2 : Halfwidth Katakana Tu
  613. 0xa5c6, // 0x8ec3 : Halfwidth Katakana Te
  614. 0xa5c8, // 0x8ec4 : Halfwidth Katakana To
  615. 0xa5ca, // 0x8ec5 : Halfwidth Katakana Na
  616. 0xa5cb, // 0x8ec6 : Halfwidth Katakana Ni
  617. 0xa5cc, // 0x8ec7 : Halfwidth Katakana Nu
  618. 0xa5cd, // 0x8ec8 : Halfwidth Katakana Ne
  619. 0xa5ce, // 0x8ec9 : Halfwidth Katakana No
  620. 0xa5cf, // 0x8eca : Halfwidth Katakana Ha
  621. 0xa5d2, // 0x8ecb : Halfwidth Katakana Hi
  622. 0xa5d5, // 0x8ecc : Halfwidth Katakana Hu
  623. 0xa5d8, // 0x8ecd : Halfwidth Katakana He
  624. 0xa5db, // 0x8ece : Halfwidth Katakana Ho
  625. 0xa5de, // 0x8ecf : Halfwidth Katakana Ma
  626. 0xa5df, // 0x8ed0 : Halfwidth Katakana Mi
  627. 0xa5e0, // 0x8ed1 : Halfwidth Katakana Mu
  628. 0xa5e1, // 0x8ed2 : Halfwidth Katakana Me
  629. 0xa5e2, // 0x8ed3 : Halfwidth Katakana Mo
  630. 0xa5e4, // 0x8ed4 : Halfwidth Katakana Ya
  631. 0xa5e6, // 0x8ed5 : Halfwidth Katakana Yu
  632. 0xa5e8, // 0x8ed6 : Halfwidth Katakana Yo
  633. 0xa5e9, // 0x8ed7 : Halfwidth Katakana Ra
  634. 0xa5ea, // 0x8ed8 : Halfwidth Katakana Ri
  635. 0xa5eb, // 0x8ed9 : Halfwidth Katakana Ru
  636. 0xa5ec, // 0x8eda : Halfwidth Katakana Re
  637. 0xa5ed, // 0x8edb : Halfwidth Katakana Ro
  638. 0xa5ef, // 0x8edc : Halfwidth Katakana Wa
  639. 0xa5f3, // 0x8edd : Halfwidth Katakana N
  640. 0xa1ab, // 0x8ede : Halfwidth Katakana Voiced Sound Mark
  641. 0xa1ac // 0x8edf : Halfwidth Katakana Semi-Voiced Sound Mark
  642. };
  643. wMode = wModeSO = MODE_ASCII;
  644. //
  645. // Code page 50220 does not use halfwidth Katakana.
  646. // Convert to fullwidth.
  647. //
  648. if (CodePage == 50220)
  649. {
  650. for (ctr = 0; ctr < cchMBCount; ctr++)
  651. {
  652. WORD wFWKana;
  653. if ((BYTE)lpMBNoEscStr[ctr] == LEADBYTE_HALFWIDTH)
  654. {
  655. wFWKana = HalfToFullWidthKanaTable[(BYTE)lpMBNoEscStr[ctr + 1] - 0xA1];
  656. lpMBNoEscStr[ctr++] = HIBYTE(wFWKana);
  657. lpMBNoEscStr[ctr] = LOBYTE(wFWKana);
  658. }
  659. }
  660. }
  661. lpMBTempStr = lpMultiByteStr;
  662. for (ctr = 0; ctr < cchMBCount; ctr++)
  663. {
  664. if ((BYTE)lpMBNoEscStr[ctr] == LEADBYTE_HALFWIDTH)
  665. {
  666. //
  667. // It's halfwidth Katakana.
  668. //
  669. ctr++;
  670. if (CodePage == 50222)
  671. {
  672. if (wMode != MODE_HALFWIDTH_KATAKANA)
  673. {
  674. if (wModeSO != MODE_HALFWIDTH_KATAKANA)
  675. {
  676. if (cchMultiByte)
  677. {
  678. if (cchMBTemp < (cchMultiByte - 2))
  679. {
  680. *lpMBTempStr++ = ESCAPE;
  681. *lpMBTempStr++ = '(';
  682. *lpMBTempStr++ = 'J';
  683. }
  684. else
  685. {
  686. //
  687. // Output buffer is too small.
  688. //
  689. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  690. return (0);
  691. }
  692. }
  693. cchMBTemp += 3;
  694. wModeSO = MODE_HALFWIDTH_KATAKANA;
  695. }
  696. if (cchMultiByte)
  697. {
  698. if (cchMBTemp < cchMultiByte)
  699. {
  700. *lpMBTempStr++ = SHIFT_OUT;
  701. }
  702. else
  703. {
  704. //
  705. // Output buffer is too small.
  706. //
  707. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  708. return (0);
  709. }
  710. }
  711. cchMBTemp++;
  712. wMode = MODE_HALFWIDTH_KATAKANA;
  713. }
  714. }
  715. else // CodePage = 50221
  716. {
  717. if (wMode != MODE_HALFWIDTH_KATAKANA)
  718. {
  719. if (cchMultiByte)
  720. {
  721. if (cchMBTemp < (cchMultiByte - 2))
  722. {
  723. *lpMBTempStr++ = ESCAPE;
  724. *lpMBTempStr++ = '(';
  725. *lpMBTempStr++ = 'I';
  726. }
  727. else
  728. {
  729. //
  730. // Output buffer is too small.
  731. //
  732. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  733. return (0);
  734. }
  735. }
  736. cchMBTemp += 3;
  737. wMode = MODE_HALFWIDTH_KATAKANA;
  738. }
  739. }
  740. if (cchMultiByte)
  741. {
  742. if (cchMBTemp < cchMultiByte)
  743. {
  744. *lpMBTempStr++ = lpMBNoEscStr[ctr] & 0x7F;
  745. }
  746. else
  747. {
  748. //
  749. // Output buffer is too small.
  750. //
  751. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  752. return (0);
  753. }
  754. }
  755. cchMBTemp++;
  756. }
  757. else if (IsDBCSLeadByteEx(20932, lpMBNoEscStr[ctr]))
  758. {
  759. //
  760. // It's a double byte character.
  761. //
  762. if (lpMBNoEscStr[ctr + 1] & 0x80) // JIS X 0208
  763. {
  764. if (wMode != MODE_JIS_0208)
  765. {
  766. if (cchMultiByte)
  767. {
  768. if (cchMBTemp < (cchMultiByte - 2))
  769. {
  770. *lpMBTempStr++ = ESCAPE;
  771. *lpMBTempStr++ = '$';
  772. *lpMBTempStr++ = 'B';
  773. }
  774. else
  775. {
  776. //
  777. // Output buffer is too small.
  778. //
  779. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  780. return (0);
  781. }
  782. }
  783. cchMBTemp += 3;
  784. wMode = MODE_JIS_0208;
  785. }
  786. }
  787. else // JIS X 0212
  788. {
  789. if (wMode != MODE_JIS_0212)
  790. {
  791. if (cchMultiByte)
  792. {
  793. if (cchMBTemp < (cchMultiByte - 3))
  794. {
  795. *lpMBTempStr++ = ESCAPE;
  796. *lpMBTempStr++ = '$';
  797. *lpMBTempStr++ = '(';
  798. *lpMBTempStr++ = 'D';
  799. }
  800. else
  801. {
  802. //
  803. // Output buffer is too small.
  804. //
  805. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  806. return (0);
  807. }
  808. }
  809. cchMBTemp += 4;
  810. wMode = MODE_JIS_0212;
  811. }
  812. }
  813. if (ctr >= (cchMBCount - 1))
  814. {
  815. //
  816. // Missing trail byte.
  817. //
  818. break;
  819. }
  820. if (cchMultiByte)
  821. {
  822. if (cchMBTemp < (cchMultiByte - 1))
  823. {
  824. *lpMBTempStr++ = lpMBNoEscStr[ctr] & 0x7F;
  825. *lpMBTempStr++ = lpMBNoEscStr[ctr + 1] & 0x7F;
  826. }
  827. else
  828. {
  829. //
  830. // Output buffer is too small.
  831. //
  832. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  833. return (0);
  834. }
  835. }
  836. ctr++;
  837. cchMBTemp += 2;
  838. }
  839. else // Single byte Char
  840. {
  841. if (wMode != MODE_ASCII)
  842. {
  843. if (cchMultiByte)
  844. {
  845. if (cchMBTemp < (cchMultiByte - 2))
  846. {
  847. *lpMBTempStr++ = ESCAPE;
  848. *lpMBTempStr++ = '(';
  849. *lpMBTempStr++ = 'B';
  850. }
  851. else
  852. {
  853. //
  854. // Output buffer is too small.
  855. //
  856. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  857. return (0);
  858. }
  859. }
  860. cchMBTemp += 3;
  861. wMode = MODE_ASCII;
  862. }
  863. if (cchMultiByte)
  864. {
  865. if (cchMBTemp < cchMultiByte)
  866. {
  867. *lpMBTempStr++ = lpMBNoEscStr[ctr];
  868. }
  869. else
  870. {
  871. //
  872. // Output buffer is too small.
  873. //
  874. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  875. return (0);
  876. }
  877. }
  878. cchMBTemp++;
  879. }
  880. }
  881. if (cchMultiByte && (cchMBTemp > cchMultiByte))
  882. {
  883. //
  884. // Output buffer is too small.
  885. //
  886. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  887. return (0);
  888. }
  889. return (cchMBTemp);
  890. }
  891. ////////////////////////////////////////////////////////////////////////////
  892. //
  893. // ParseMB_CP5022_579
  894. //
  895. // KSC --> ISO-2022-KR (CP-50225)
  896. // GB --> ISO-2022-CN (CP-50227)
  897. // CNS --> ISO-2022-CN (CP-50229)
  898. //
  899. ////////////////////////////////////////////////////////////////////////////
  900. DWORD ParseMB_CP5022_579(
  901. DWORD CodePage,
  902. LPSTR lpMultiByteStr,
  903. int cchMultiByte,
  904. LPSTR lpMBNoEscStr,
  905. int cchMBCount)
  906. {
  907. int ctr, cchMBTemp = 0;
  908. WORD wMode, wModeSO, wModeCP;
  909. char EscChar;
  910. LPSTR lpMBTempStr;
  911. lpMBTempStr = lpMultiByteStr;
  912. wMode = wModeSO = MODE_ASCII;
  913. wModeCP = (WORD)(CodePage % 10);
  914. EscChar = ( wModeCP == MODE_KSC_5601 ? 'C' :
  915. (wModeCP == MODE_GB_2312 ? 'A' : 'G'));
  916. for (ctr = 0; ctr < cchMBCount; ctr++)
  917. {
  918. if (IsDBCSLeadByteEx(NLS_CODEPAGE(CodePage), lpMBNoEscStr[ctr]))
  919. {
  920. //
  921. // It's a double byte character.
  922. //
  923. if (lpMBNoEscStr[ctr + 1] & 0x80) // KSC, GB or CNS-1
  924. {
  925. if (wModeSO != wModeCP)
  926. {
  927. if (cchMultiByte)
  928. {
  929. if (cchMBTemp < (cchMultiByte - 3))
  930. {
  931. *lpMBTempStr++ = ESCAPE;
  932. *lpMBTempStr++ = '$';
  933. *lpMBTempStr++ = ')';
  934. *lpMBTempStr++ = EscChar;
  935. }
  936. else
  937. {
  938. //
  939. // Output buffer is too small.
  940. //
  941. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  942. return (0);
  943. }
  944. }
  945. cchMBTemp += 4;
  946. wModeSO = wModeCP;
  947. }
  948. }
  949. else
  950. {
  951. //
  952. // lpMBNoEscStr[ctr + 1] & 0x80 == 0 indicates CNS-2
  953. //
  954. if (wModeSO != MODE_CNS_11643_2)
  955. {
  956. if (cchMultiByte)
  957. {
  958. if (cchMBTemp < (cchMultiByte - 3))
  959. {
  960. *lpMBTempStr++ = ESCAPE;
  961. *lpMBTempStr++ = '$';
  962. *lpMBTempStr++ = '*';
  963. *lpMBTempStr++ = 'H';
  964. }
  965. else
  966. {
  967. //
  968. // Output buffer is too small.
  969. //
  970. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  971. return (0);
  972. }
  973. }
  974. cchMBTemp += 4;
  975. wModeSO = MODE_CNS_11643_2;
  976. }
  977. }
  978. if (wMode == MODE_ASCII)
  979. {
  980. if (cchMultiByte)
  981. {
  982. if (cchMBTemp < cchMultiByte)
  983. {
  984. *lpMBTempStr++ = SHIFT_OUT;
  985. }
  986. else
  987. {
  988. //
  989. // Output buffer is too small.
  990. //
  991. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  992. return (0);
  993. }
  994. }
  995. cchMBTemp++;
  996. wMode = wModeSO;
  997. }
  998. if (ctr >= (cchMBCount - 1))
  999. {
  1000. //
  1001. // Missing trail byte.
  1002. //
  1003. break;
  1004. }
  1005. if (cchMultiByte)
  1006. {
  1007. if (cchMBTemp < (cchMultiByte - 1))
  1008. {
  1009. *lpMBTempStr++ = lpMBNoEscStr[ctr] & 0x7F;
  1010. *lpMBTempStr++ = lpMBNoEscStr[ctr + 1] & 0x7F;
  1011. }
  1012. else
  1013. {
  1014. //
  1015. // Output buffer is too small.
  1016. //
  1017. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1018. return (0);
  1019. }
  1020. }
  1021. ctr++;
  1022. cchMBTemp += 2;
  1023. }
  1024. else
  1025. {
  1026. //
  1027. // It's a single byte character.
  1028. //
  1029. if (wMode != MODE_ASCII)
  1030. {
  1031. if (cchMultiByte)
  1032. {
  1033. if (cchMBTemp < cchMultiByte)
  1034. {
  1035. *lpMBTempStr++ = SHIFT_IN;
  1036. }
  1037. else
  1038. {
  1039. //
  1040. // Output buffer is too small.
  1041. //
  1042. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1043. return (0);
  1044. }
  1045. }
  1046. cchMBTemp++;
  1047. wMode = MODE_ASCII;
  1048. }
  1049. if (cchMultiByte)
  1050. {
  1051. if (cchMBTemp < cchMultiByte)
  1052. {
  1053. *lpMBTempStr++ = lpMBNoEscStr[ctr];
  1054. }
  1055. else
  1056. {
  1057. //
  1058. // Output buffer is too small.
  1059. //
  1060. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1061. return (0);
  1062. }
  1063. }
  1064. cchMBTemp++;
  1065. }
  1066. }
  1067. if (cchMultiByte && (cchMBTemp > cchMultiByte))
  1068. {
  1069. //
  1070. // Output buffer is too small.
  1071. //
  1072. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1073. return (0);
  1074. }
  1075. return (cchMBTemp);
  1076. }
  1077. ////////////////////////////////////////////////////////////////////////////
  1078. //
  1079. // ParseMB_CP52936
  1080. //
  1081. // GB-2312 --> HZ (CP-52936)
  1082. //
  1083. ////////////////////////////////////////////////////////////////////////////
  1084. DWORD ParseMB_CP52936(
  1085. LPSTR lpMultiByteStr,
  1086. int cchMultiByte,
  1087. LPSTR lpMBNoEscStr,
  1088. int cchMBCount)
  1089. {
  1090. int ctr, cchMBTemp = 0;
  1091. WORD wMode;
  1092. LPSTR lpMBTempStr;
  1093. lpMBTempStr = lpMultiByteStr;
  1094. wMode = MODE_ASCII;
  1095. for (ctr = 0; ctr < cchMBCount; ctr++)
  1096. {
  1097. if (lpMBNoEscStr[ctr] & 0x80)
  1098. {
  1099. if (wMode != MODE_HZ)
  1100. {
  1101. if (cchMultiByte)
  1102. {
  1103. if (cchMBTemp < (cchMultiByte - 1))
  1104. {
  1105. *lpMBTempStr++ = '~';
  1106. *lpMBTempStr++ = '{';
  1107. }
  1108. else
  1109. {
  1110. //
  1111. // Output buffer is too small.
  1112. //
  1113. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1114. return (0);
  1115. }
  1116. }
  1117. wMode = MODE_HZ;
  1118. cchMBTemp += 2;
  1119. }
  1120. if (ctr >= (cchMBCount - 1))
  1121. {
  1122. //
  1123. // Missing trail byte.
  1124. //
  1125. break;
  1126. }
  1127. if (cchMultiByte)
  1128. {
  1129. if (cchMBTemp < (cchMultiByte - 1))
  1130. {
  1131. *lpMBTempStr++ = lpMBNoEscStr[ctr] & 0x7F;
  1132. *lpMBTempStr++ = lpMBNoEscStr[ctr + 1] & 0x7F;
  1133. }
  1134. else
  1135. {
  1136. //
  1137. // Output buffer is too small.
  1138. //
  1139. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1140. return (0);
  1141. }
  1142. }
  1143. ctr++;
  1144. cchMBTemp += 2;
  1145. }
  1146. else
  1147. {
  1148. if (wMode != MODE_ASCII)
  1149. {
  1150. if (cchMultiByte)
  1151. {
  1152. if (cchMBTemp < (cchMultiByte - 1))
  1153. {
  1154. *lpMBTempStr++ = '~';
  1155. *lpMBTempStr++ = '}';
  1156. }
  1157. else
  1158. {
  1159. //
  1160. // Output buffer is too small.
  1161. //
  1162. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1163. return (0);
  1164. }
  1165. }
  1166. wMode = MODE_ASCII;
  1167. cchMBTemp += 2;
  1168. }
  1169. if ((BYTE)lpMBNoEscStr[ctr] == '~')
  1170. {
  1171. if (cchMultiByte)
  1172. {
  1173. if (cchMBTemp < cchMultiByte)
  1174. {
  1175. *lpMBTempStr++ = '~';
  1176. }
  1177. else
  1178. {
  1179. //
  1180. // Output buffer is too small.
  1181. //
  1182. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1183. return (0);
  1184. }
  1185. }
  1186. cchMBTemp++;
  1187. }
  1188. if (cchMultiByte)
  1189. {
  1190. if (cchMBTemp < cchMultiByte)
  1191. {
  1192. *lpMBTempStr++ = lpMBNoEscStr[ctr];
  1193. }
  1194. else
  1195. {
  1196. //
  1197. // Output buffer is too small.
  1198. //
  1199. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1200. return (0);
  1201. }
  1202. }
  1203. cchMBTemp++;
  1204. }
  1205. }
  1206. if (cchMultiByte && (cchMBTemp > cchMultiByte))
  1207. {
  1208. //
  1209. // Output buffer is too small.
  1210. //
  1211. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1212. return (0);
  1213. }
  1214. return (cchMBTemp);
  1215. }
  1216. ////////////////////////////////////////////////////////////////////////////
  1217. //
  1218. // MBToWC_CP52936
  1219. //
  1220. // HZ (CP-52936) --> Unicode
  1221. //
  1222. ////////////////////////////////////////////////////////////////////////////
  1223. DWORD MBToWC_CP52936(
  1224. LPSTR lpMultiByteStr,
  1225. int cchMultiByte,
  1226. LPWSTR lpWideCharStr,
  1227. int cchWideChar)
  1228. {
  1229. int ctr, cchMBTemp, cchWCCount;
  1230. WORD wMode;
  1231. LPSTR lpMBNoEscStr;
  1232. lpMBNoEscStr = (LPSTR)NLS_ALLOC_MEM(cchMultiByte * sizeof(WCHAR));
  1233. if (lpMBNoEscStr == NULL)
  1234. {
  1235. SetLastError(ERROR_OUTOFMEMORY);
  1236. return (0);
  1237. }
  1238. cchMBTemp = 0;
  1239. wMode = MODE_ASCII;
  1240. for (ctr = 0; ctr < cchMultiByte; ctr++)
  1241. {
  1242. if (((BYTE)lpMultiByteStr[ctr] == '~') && (ctr < (cchMultiByte - 1)))
  1243. {
  1244. if ((BYTE)lpMultiByteStr[ctr + 1] == '{')
  1245. {
  1246. wMode = MODE_HZ;
  1247. ctr += 2;
  1248. }
  1249. else if ((BYTE)lpMultiByteStr[ctr + 1] == '}')
  1250. {
  1251. wMode = MODE_ASCII;
  1252. ctr += 2;
  1253. }
  1254. else if ((BYTE)lpMultiByteStr[ctr + 1] == '~')
  1255. {
  1256. ctr++;
  1257. }
  1258. else if (((BYTE)lpMultiByteStr[ctr + 1] == '\\') &&
  1259. (ctr < (cchMultiByte - 2)) &&
  1260. (((BYTE)lpMultiByteStr[ctr + 2] == 'n') ||
  1261. ((BYTE)lpMultiByteStr[ctr + 2] == 'N' )))
  1262. {
  1263. ctr += 2;
  1264. }
  1265. }
  1266. if (wMode == MODE_HZ)
  1267. {
  1268. if (ctr < (cchMultiByte - 1))
  1269. {
  1270. lpMBNoEscStr[cchMBTemp++] = lpMultiByteStr[ctr++] | 0x80;
  1271. lpMBNoEscStr[cchMBTemp++] = lpMultiByteStr[ctr] | 0x80;
  1272. }
  1273. }
  1274. else
  1275. {
  1276. if (ctr < cchMultiByte)
  1277. {
  1278. lpMBNoEscStr[cchMBTemp++] = lpMultiByteStr[ctr];
  1279. }
  1280. }
  1281. }
  1282. cchWCCount = MultiByteToWideChar ( 20936,
  1283. 0,
  1284. lpMBNoEscStr,
  1285. cchMBTemp,
  1286. lpWideCharStr,
  1287. cchWideChar );
  1288. NLS_FREE_MEM(lpMBNoEscStr);
  1289. return (cchWCCount);
  1290. }