Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

914 lines
26 KiB

  1. // ============================================================================
  2. // Internet Character Set Conversion: Input from ISO-2022-JP
  3. // ============================================================================
  4. #include "private.h"
  5. #include "fechrcnv.h"
  6. #include "jisobj.h"
  7. #include "codepage.h"
  8. /******************************************************************************
  9. ************************** C O N S T R U C T O R **************************
  10. ******************************************************************************/
  11. CInccJisIn::CInccJisIn(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)
  12. {
  13. Reset(); // initialization
  14. return ;
  15. }
  16. /******************************************************************************
  17. ******************************* R E S E T *********************************
  18. ******************************************************************************/
  19. void CInccJisIn::Reset()
  20. {
  21. m_pfnConv = ConvMain;
  22. m_pfnCleanUp = CleanUpMain;
  23. m_fShift = FALSE;
  24. m_fJapan = FALSE;
  25. m_fLeadByte = FALSE ;
  26. m_tcLeadByte = 0 ;
  27. m_nESCBytes = 0 ;
  28. m_eEscState = JIS_ASCII ;
  29. return ;
  30. }
  31. /******************************************************************************
  32. ************************* C O N V E R T C H A R *************************
  33. ******************************************************************************/
  34. HRESULT CInccJisIn::ConvertChar(UCHAR tc, int cchSrc)
  35. {
  36. BOOL fDone = (this->*m_pfnConv)(tc);
  37. if (fDone)
  38. return S_OK;
  39. else
  40. return E_FAIL;
  41. }
  42. /******************************************************************************
  43. ***************************** C L E A N U P *****************************
  44. ******************************************************************************/
  45. BOOL CInccJisIn::CleanUp()
  46. {
  47. return (this->*m_pfnCleanUp)();
  48. }
  49. /******************************************************************************
  50. **************************** C O N V M A I N ****************************
  51. ******************************************************************************/
  52. BOOL CInccJisIn::ConvMain(UCHAR tc)
  53. {
  54. BOOL fDone = TRUE;
  55. switch (tc) {
  56. case SO:
  57. m_fShift = TRUE;
  58. break;
  59. case SI:
  60. m_fShift = FALSE;
  61. m_fLeadByte = FALSE;
  62. break;
  63. default:
  64. if (m_fShift) {
  65. fDone = Output(tc | 0x80);
  66. // it may continue to convert to Unicode, so we need to know
  67. // whether current byte is a lead byte or not
  68. m_fLeadByte = ~m_fLeadByte ;
  69. } else {
  70. if (tc == ESC) {
  71. m_pfnConv = ConvEsc;
  72. m_pfnCleanUp = CleanUpEsc;
  73. m_nESCBytes++;
  74. } else {
  75. if (m_fJapan) {
  76. if (tc == '*') {
  77. m_pfnConv = ConvStar;
  78. m_pfnCleanUp = CleanUpStar;
  79. } else {
  80. m_pfnConv = ConvDoubleByte;
  81. m_pfnCleanUp = CleanUpDoubleByte;
  82. m_tcLeadByte = tc;
  83. }
  84. } else {
  85. switch ( m_eEscState )
  86. {
  87. case JIS_ASCII:
  88. fDone = Output(tc);
  89. break ;
  90. case JIS_Roman:
  91. #if 0
  92. if ( tc == 0x7e ) /* tilde in ACSII -> overline */
  93. {
  94. Output(0x81);
  95. fDone = Output(0x50);
  96. }
  97. else
  98. fDone = Output(tc);
  99. #else
  100. fDone = Output(tc);
  101. #endif
  102. break ;
  103. case JIS_Kana:
  104. fDone = Output(tc | 0x80 );
  105. break ;
  106. default:
  107. fDone = Output(tc);
  108. break ;
  109. }
  110. }
  111. }
  112. }
  113. break;
  114. }
  115. return fDone;
  116. }
  117. /******************************************************************************
  118. ************************ C L E A N U P M A I N ************************
  119. ******************************************************************************/
  120. BOOL CInccJisIn::CleanUpMain()
  121. {
  122. return TRUE;
  123. }
  124. /******************************************************************************
  125. ***************************** C O N V E S C *****************************
  126. ******************************************************************************/
  127. BOOL CInccJisIn::ConvEsc(UCHAR tc)
  128. {
  129. switch (tc) {
  130. case ISO2022_IN_CHAR:
  131. m_pfnConv = ConvIsoIn;
  132. m_pfnCleanUp = CleanUpIsoIn;
  133. m_nESCBytes++;
  134. return TRUE;
  135. case ISO2022_OUT_CHAR:
  136. m_pfnConv = ConvIsoOut;
  137. m_pfnCleanUp = CleanUpIsoOut;
  138. m_nESCBytes++;
  139. return TRUE;
  140. default:
  141. m_pfnConv = ConvMain;
  142. m_pfnCleanUp = CleanUpMain;
  143. m_nESCBytes = 0 ;
  144. (void)Output(ESC);
  145. if (SUCCEEDED(ConvertChar(tc)))
  146. return TRUE;
  147. else
  148. return FALSE;
  149. }
  150. }
  151. /******************************************************************************
  152. ************************* C L E A N U P E S C *************************
  153. ******************************************************************************/
  154. BOOL CInccJisIn::CleanUpEsc()
  155. {
  156. m_pfnConv = ConvMain;
  157. m_pfnCleanUp = CleanUpMain;
  158. return Output(ESC);
  159. }
  160. /******************************************************************************
  161. ************************** C O N V I S O I N **************************
  162. ******************************************************************************/
  163. BOOL CInccJisIn::ConvIsoIn(UCHAR tc)
  164. {
  165. switch (tc) {
  166. case ISO2022_IN_JP_CHAR1: /* 'B' */
  167. case ISO2022_IN_JP_CHAR2: /* '@' */
  168. m_pfnConv = ConvMain;
  169. m_pfnCleanUp = CleanUpMain;
  170. m_fJapan = TRUE;
  171. m_nESCBytes = 0 ;
  172. return TRUE;
  173. case ISO2022_IN_JP_CHAR3_1: /* '(' */
  174. m_pfnConv = ConvIsoInJp;
  175. m_pfnCleanUp = CleanUpIsoInJp;
  176. m_nESCBytes++ ;
  177. return TRUE;
  178. default:
  179. m_pfnConv = ConvMain;
  180. m_pfnCleanUp = CleanUpMain;
  181. m_nESCBytes = 0 ;
  182. (void)Output(ESC);
  183. (void)ConvertChar(ISO2022_IN_CHAR);
  184. if (SUCCEEDED(ConvertChar(tc)))
  185. return TRUE;
  186. else
  187. return FALSE;
  188. }
  189. }
  190. /******************************************************************************
  191. ********************** C L E A N U P I S O I N **********************
  192. ******************************************************************************/
  193. BOOL CInccJisIn::CleanUpIsoIn()
  194. {
  195. m_pfnConv = ConvMain;
  196. m_pfnCleanUp = CleanUpMain;
  197. (void)Output(ESC);
  198. (void)ConvertChar(ISO2022_IN_CHAR);
  199. return CleanUp();
  200. }
  201. /******************************************************************************
  202. *********************** C O N V I S O I N J P ***********************
  203. ******************************************************************************/
  204. BOOL CInccJisIn::ConvIsoInJp(UCHAR tc)
  205. {
  206. m_pfnConv = ConvMain;
  207. m_pfnCleanUp = CleanUpMain;
  208. m_nESCBytes = 0 ;
  209. if (tc == ISO2022_IN_JP_CHAR3_2) {
  210. m_fJapan = TRUE;
  211. return TRUE;
  212. } else {
  213. (void)Output(ESC);
  214. (void)ConvertChar(ISO2022_IN_CHAR);
  215. (void)ConvertChar(ISO2022_IN_JP_CHAR3_1);
  216. if (SUCCEEDED(ConvertChar(tc)))
  217. return TRUE;
  218. else
  219. return FALSE;
  220. }
  221. }
  222. /******************************************************************************
  223. ******************* C L E A N U P I S O I N J P *******************
  224. ******************************************************************************/
  225. BOOL CInccJisIn::CleanUpIsoInJp()
  226. {
  227. m_pfnConv = ConvMain;
  228. m_pfnCleanUp = CleanUpMain;
  229. m_nESCBytes = 0 ;
  230. (void)Output(ESC);
  231. (void)ConvertChar(ISO2022_IN_CHAR);
  232. (void)ConvertChar(ISO2022_IN_JP_CHAR3_1);
  233. return CleanUp();
  234. }
  235. /******************************************************************************
  236. ************************* C O N V I S O O U T *************************
  237. ******************************************************************************/
  238. BOOL CInccJisIn::ConvIsoOut(UCHAR tc)
  239. {
  240. m_pfnConv = ConvMain;
  241. m_pfnCleanUp = CleanUpMain;
  242. m_nESCBytes = 0 ;
  243. switch (tc) {
  244. case ISO2022_OUT_JP_CHAR1: /* B */
  245. m_fJapan = FALSE;
  246. m_eEscState = JIS_ASCII ;
  247. return TRUE;
  248. case ISO2022_OUT_JP_CHAR2: /* J */
  249. case ISO2022_OUT_JP_CHAR4: /* H */
  250. m_fJapan = FALSE;
  251. m_eEscState = JIS_Roman ;
  252. return TRUE;
  253. case ISO2022_OUT_JP_CHAR3: /* I */
  254. m_fJapan = FALSE;
  255. m_eEscState = JIS_Kana ;
  256. return TRUE;
  257. default:
  258. (void)Output(ESC);
  259. (void)ConvertChar(ISO2022_OUT_CHAR);
  260. if (SUCCEEDED(ConvertChar(tc)))
  261. return TRUE;
  262. else
  263. return FALSE;
  264. }
  265. }
  266. /******************************************************************************
  267. ********************* C L E A N U P I S O O U T *********************
  268. ******************************************************************************/
  269. BOOL CInccJisIn::CleanUpIsoOut()
  270. {
  271. m_pfnConv = ConvMain;
  272. m_pfnCleanUp = CleanUpMain;
  273. m_nESCBytes = 0 ;
  274. (void)Output(ESC);
  275. (void)ConvertChar(ISO2022_OUT_CHAR);
  276. return CleanUp();
  277. }
  278. /******************************************************************************
  279. **************************** C O N V S T A R ****************************
  280. ******************************************************************************/
  281. BOOL CInccJisIn::ConvStar(UCHAR tc)
  282. {
  283. m_pfnConv = ConvMain;
  284. m_pfnCleanUp = CleanUpMain;
  285. return Output(tc | 0x80);
  286. }
  287. /******************************************************************************
  288. ************************ C L E A N U P S T A R ************************
  289. ******************************************************************************/
  290. BOOL CInccJisIn::CleanUpStar()
  291. {
  292. m_pfnConv = ConvMain;
  293. m_pfnCleanUp = CleanUpMain;
  294. return Output('*');
  295. }
  296. /******************************************************************************
  297. ********************* C O N V D O U B L E B Y T E *********************
  298. ******************************************************************************/
  299. BOOL CInccJisIn::ConvDoubleByte(UCHAR tc)
  300. {
  301. BOOL bRet ;
  302. UCHAR tcSJisLB;
  303. UCHAR tcSJisTB;
  304. m_pfnConv = ConvMain;
  305. m_pfnCleanUp = CleanUpMain;
  306. tcSJisLB = ((m_tcLeadByte - 0x21) >> 1) + 0x81;
  307. if (tcSJisLB > 0x9f)
  308. tcSJisLB += 0x40;
  309. tcSJisTB = tc + (m_tcLeadByte & 1 ? 0x1f : 0x7d);
  310. if (tcSJisTB >= 0x7f)
  311. tcSJisTB++;
  312. (void)Output(tcSJisLB);
  313. bRet = Output(tcSJisTB);
  314. m_tcLeadByte = 0 ;
  315. return bRet ;
  316. }
  317. /******************************************************************************
  318. ***************** C L E A N U P D O U B L E B Y T E *****************
  319. ******************************************************************************/
  320. BOOL CInccJisIn::CleanUpDoubleByte()
  321. {
  322. BOOL bRet ;
  323. m_pfnConv = ConvMain;
  324. m_pfnCleanUp = CleanUpMain;
  325. bRet = Output(m_tcLeadByte);
  326. m_tcLeadByte = 0 ;
  327. return bRet ;
  328. }
  329. int CInccJisIn::GetUnconvertBytes()
  330. {
  331. if ( m_tcLeadByte || m_fLeadByte )
  332. return 1 ;
  333. else if ( m_nESCBytes )
  334. return m_nESCBytes < 4 ? m_nESCBytes : 3 ;
  335. else
  336. return 0 ;
  337. }
  338. DWORD CInccJisIn::GetConvertMode()
  339. {
  340. // 0xC42C -> 50220 ISO-2022-JP
  341. return ( m_fJapan ? 1 : 0 ) + ( m_fShift ? 2 : 0 ) | 0xC42C0000 ;
  342. }
  343. void CInccJisIn::SetConvertMode(DWORD mode)
  344. {
  345. Reset();
  346. if ( mode & 0x00000001 )
  347. m_fJapan = TRUE ;
  348. if ( mode & 0x00000002 )
  349. m_fShift = TRUE ;
  350. return ;
  351. }
  352. // ============================================================================
  353. // Internet Character Set Conversion: Output to ISO-2022-JP
  354. // ============================================================================
  355. #define VOICE_MARK_OFFSET 0xA0
  356. #define VOICE_MARK_DEDF_OFFSET 0xC8
  357. #if 0 // Shift JIS Table - not used
  358. // this is the table used to determine whether the kana char is voiced sound markable
  359. // if it is, what is the combined full width kana.
  360. static WCHAR g_wVoiceMarkKana[48] =
  361. {
  362. /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, */
  363. /* a0-af */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x8394, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
  364. /* b0-bf */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x834B, 0x834D, 0x834f, 0x8351, 0x8353, 0x8355, 0x8357, 0x8359, 0x835B, 0x835D,
  365. /* c0-cf */ 0x835F, 0x8361, 0x8364, 0x8366, 0x8368, 0x0, 0x0, 0x0, 0x0, 0x0, 0x836F, 0x8372, 0x8375, 0x8378, 0x837B, 0x0,
  366. };
  367. // special voiced sound mark '0xde' conversion
  368. static WCHAR g_wMarkDEKana[16] =
  369. {
  370. /* c8-cf */ 0x0, 0x0, 0x836F, 0x8372, 0x8375, 0x8378, 0x837B, 0x0,
  371. };
  372. // special voiced sound mark '0xdf' conversion
  373. static WCHAR g_wMarkDFKana[16] =
  374. {
  375. /* c8-cf */ 0x0, 0x0, 0x8370, 0x8373, 0x8376, 0x8379, 0x837C, 0x0,
  376. };
  377. // this is the table used to convert half width kana to full width kana
  378. static WCHAR g_wFullWKana[64] =
  379. {
  380. /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, */
  381. /* a0-af */ 0x0, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, 0x8342, 0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362,
  382. /* b0-bf */ 0x815B, 0x8341, 0x8343, 0x8345, 0x8347, 0x8349, 0x834A, 0x834C, 0x834E, 0x8350, 0x8352, 0x8354, 0x8356, 0x8358, 0x835A, 0x835C,
  383. /* c0-cf */ 0x835E, 0x8360, 0x8363, 0x8365, 0x8367, 0x8369, 0x836A, 0x836B, 0x836C, 0x836D, 0x836E, 0x8371, 0x8374, 0x8377, 0x837A, 0x837D,
  384. /* d0-df */ 0x837E, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386, 0x8388, 0x8389, 0x838A, 0x838B, 0x838C, 0x838D, 0x838F, 0x8393, 0x814A, 0x814B,
  385. };
  386. #endif
  387. // JIS Table
  388. // this is the table used to determine whether the kana char is voiced sound markable
  389. // if it is, what is the combined full width kana.
  390. static WCHAR g_wVoiceMarkKana[48] =
  391. {
  392. /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, */
  393. /* a0-af */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2574, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
  394. /* b0-bf */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x252c, 0x252e, 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253A, 0x253C, 0x253E,
  395. /* c0-cf */ 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2550, 0x2553, 0x2556, 0x2559, 0x255C, 0x0,
  396. };
  397. // special voiced sound mark '0xde' conversion
  398. static WCHAR g_wMarkDEKana[16] =
  399. {
  400. /* c8-cf */ 0x0, 0x0, 0x2550, 0x2553, 0x2556, 0x2559, 0x255C, 0x0,
  401. };
  402. // special voiced sound mark '0xdf' conversion
  403. static WCHAR g_wMarkDFKana[16] =
  404. {
  405. /* c8-cf */ 0x0, 0x0, 0x2551, 0x2554, 0x2557, 0x255A, 0x255D, 0x0,
  406. };
  407. // this is the table used to convert half width kana to full width kana
  408. static WCHAR g_wFullWKana[64] =
  409. {
  410. /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, */
  411. /* a0-af */ 0x0, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521, 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
  412. /* b0-bf */ 0x213C, 0x2522, 0x2524, 0x2526, 0x2528, 0x252A, 0x252B, 0x252D, 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253B, 0x253D,
  413. /* c0-cf */ 0x253F, 0x2541, 0x2544, 0x2546, 0x2548, 0x254A, 0x254B, 0x254C, 0x254D, 0x254E, 0x254F, 0x2552, 0x2555, 0x2558, 0x255B, 0x255E,
  414. /* d0-df */ 0x255F, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x256D, 0x256F, 0x2573, 0x212B, 0x212C,
  415. };
  416. /******************************************************************************
  417. ************************** C O N S T R U C T O R **************************
  418. ******************************************************************************/
  419. CInccJisOut::CInccJisOut(UINT uCodePage, int nCodeSet, DWORD dwFlag, WCHAR *lpFallBack) : CINetCodeConverter(uCodePage, nCodeSet)
  420. {
  421. m_dwFlag = dwFlag;
  422. // Per Office team's request, we should disable BESTFITCHARS feature for now.
  423. m_dwFlag &= ~MLCONVCHARF_NOBESTFITCHARS;
  424. m_lpFallBack = lpFallBack;
  425. Reset(); // initialization
  426. return ;
  427. }
  428. /******************************************************************************
  429. ******************************* R E S E T *********************************
  430. ******************************************************************************/
  431. void CInccJisOut::Reset()
  432. {
  433. m_fDoubleByte = FALSE;
  434. m_fKana = FALSE;
  435. m_fJapan = FALSE;
  436. m_fSaveByte = FALSE;
  437. m_tcLeadByte = 0 ;
  438. m_tcPrevByte = 0 ;
  439. m_eKanaMode = SIO_MODE ;
  440. return ;
  441. }
  442. /******************************************************************************
  443. ************************* C O N V E R T C H A R *************************
  444. ******************************************************************************/
  445. HRESULT CInccJisOut::ConvertChar(UCHAR tc, int cchSrc)
  446. {
  447. BOOL fDone = TRUE;
  448. HRESULT hr = S_OK;
  449. if (!m_fDoubleByte) {
  450. if ((tc >= 0x81 && tc <= 0x9f) || (tc >= 0xe0 && tc <= 0xfc )) {
  451. // Switch to Double Byte Code
  452. if (m_fKana) {
  453. if ( SIO_MODE == m_eKanaMode )
  454. fDone = Output(SI);
  455. else if ( (FULL_MODE == m_eKanaMode) && !(m_dwFlag & MLCONVCHARF_NOBESTFITCHARS))
  456. {
  457. fDone = KanaCleanUp();
  458. m_fJapan = TRUE; // in FULL mode, Kana are bouble byte code too.
  459. }
  460. m_fKana = FALSE;
  461. }
  462. if (!m_fJapan) {
  463. (void)Output(ESC); // ESC $ B - JIS-83
  464. (void)Output(ISO2022_IN_CHAR);
  465. fDone = Output(ISO2022_IN_JP_CHAR1);
  466. m_fJapan = TRUE;
  467. }
  468. m_fDoubleByte = TRUE;
  469. m_tcLeadByte = tc;
  470. } else if (tc >= 0xa1 && tc <= 0xdf) {
  471. // Single Byte Katakana Code
  472. if (m_fJapan) {
  473. if ( (FULL_MODE == m_eKanaMode) && !(m_dwFlag & MLCONVCHARF_NOBESTFITCHARS))
  474. m_fKana = TRUE; // no mode changing
  475. else if ( (SIO_MODE == m_eKanaMode) || (FULL_MODE == m_eKanaMode))
  476. {
  477. (void)Output(ESC); // ESC ( B - ACSII
  478. (void)Output(ISO2022_OUT_CHAR);
  479. fDone = Output(ISO2022_OUT_JP_CHAR1);
  480. }
  481. m_fJapan = FALSE;
  482. }
  483. if (!m_fKana) {
  484. switch ( m_eKanaMode )
  485. {
  486. case SIO_MODE :
  487. fDone = Output(SO);
  488. break ;
  489. case ESC_MODE :
  490. (void)Output(ESC); // ESC ( I - Kana mode
  491. (void)Output(ISO2022_OUT_CHAR);
  492. fDone = Output(ISO2022_OUT_JP_CHAR3);
  493. break ;
  494. case FULL_MODE :
  495. // Don't switch if NO_BEST_FIT_CHAR
  496. if (!(m_dwFlag & MLCONVCHARF_NOBESTFITCHARS))
  497. {
  498. (void)Output(ESC); // ESC $ B - JIS 83
  499. (void)Output(ISO2022_IN_CHAR);
  500. fDone = Output(ISO2022_IN_JP_CHAR1);
  501. }
  502. break;
  503. }
  504. m_fKana = TRUE;
  505. }
  506. if ( FULL_MODE == m_eKanaMode )
  507. {
  508. hr = ConvFullWidthKana(tc);
  509. if (SUCCEEDED(hr))
  510. fDone = TRUE;
  511. else
  512. fDone = FALSE;
  513. }
  514. else
  515. fDone = Output(tc & 0x7f);
  516. } else {
  517. // Single Byte Code
  518. if (m_fKana) {
  519. if ( SIO_MODE == m_eKanaMode )
  520. fDone = Output(SI);
  521. else {
  522. if ( FULL_MODE == m_eKanaMode )
  523. fDone = KanaCleanUp();
  524. (void)Output(ESC); // ESC ( B - ACSII
  525. (void)Output(ISO2022_OUT_CHAR);
  526. fDone = Output(ISO2022_OUT_JP_CHAR1);
  527. }
  528. m_fKana = FALSE;
  529. }
  530. if (m_fJapan) {
  531. (void)Output(ESC); // ESC ( B - ACSII
  532. (void)Output(ISO2022_OUT_CHAR);
  533. fDone = Output(ISO2022_OUT_JP_CHAR1);
  534. m_fJapan = FALSE;
  535. }
  536. fDone = Output(tc);
  537. }
  538. } else {
  539. // map extended char (0xfa40-0xfc4b) to a special range
  540. if (m_tcLeadByte >= 0xfa && m_tcLeadByte <= 0xfc && tc >= 0x40 )
  541. {
  542. WCHAR wcDBCS ;
  543. wcDBCS = ((WCHAR) m_tcLeadByte ) << 8 | tc ;
  544. if ( wcDBCS >= 0xfa40 && wcDBCS <= 0xfa5b )
  545. {
  546. if ( wcDBCS <= 0xfa49 )
  547. wcDBCS = wcDBCS - 0x0b51 ;
  548. else if ( wcDBCS >= 0xfa4a && wcDBCS <= 0xfa53 )
  549. wcDBCS = wcDBCS - 0x072f6 ;
  550. else if ( wcDBCS >= 0xfa54 && wcDBCS <= 0xfa57 )
  551. wcDBCS = wcDBCS - 0x0b5b ;
  552. else if ( wcDBCS == 0xfa58 )
  553. wcDBCS = 0x878a ;
  554. else if ( wcDBCS == 0xfa59 )
  555. wcDBCS = 0x8782 ;
  556. else if ( wcDBCS == 0xfa5a )
  557. wcDBCS = 0x8784 ;
  558. else if ( wcDBCS == 0xfa5b )
  559. wcDBCS = 0x879a ;
  560. }
  561. else if ( wcDBCS >= 0xfa5c && wcDBCS <= 0xfc4b )
  562. {
  563. if ( tc < 0x5c )
  564. wcDBCS = wcDBCS - 0x0d5f;
  565. else if ( tc >= 0x80 && tc <= 0x9B )
  566. wcDBCS = wcDBCS - 0x0d1d;
  567. else
  568. wcDBCS = wcDBCS - 0x0d1c;
  569. }
  570. tc = (UCHAR) wcDBCS ;
  571. m_tcLeadByte = (UCHAR) ( wcDBCS >> 8 ) ;
  572. }
  573. // Convert Double Byte Code
  574. m_tcLeadByte -= ((m_tcLeadByte > 0x9f) ? 0xb1 : 0x71);
  575. m_tcLeadByte = m_tcLeadByte * 2 + 1;
  576. if (tc > 0x9e) {
  577. tc -= 0x7e;
  578. m_tcLeadByte++;
  579. } else {
  580. if (tc > 0x7e)
  581. tc--;
  582. tc -= 0x1f;
  583. }
  584. (void)Output(m_tcLeadByte);
  585. fDone = Output(tc);
  586. m_fDoubleByte = FALSE;
  587. m_tcLeadByte = 0 ;
  588. }
  589. if (fDone)
  590. return hr;
  591. else
  592. return E_FAIL;
  593. }
  594. /******************************************************************************
  595. ***************************** C L E A N U P *****************************
  596. ******************************************************************************/
  597. BOOL CInccJisOut::CleanUp()
  598. {
  599. BOOL fDone = TRUE;
  600. // Discard m_byLeadByte: if (m_fDoubleByte) Output(m_byLeadByte);
  601. fDone = KanaCleanUp();
  602. if (m_fKana)
  603. {
  604. if ( SIO_MODE == m_eKanaMode )
  605. fDone = Output(SI);
  606. else if (!(m_dwFlag & MLCONVCHARF_NOBESTFITCHARS)) // FULL mode and ESC mode
  607. {
  608. (void)Output(ESC); // ESC ( B - ASCII
  609. (void)Output(ISO2022_OUT_CHAR);
  610. fDone = Output(ISO2022_OUT_JP_CHAR1);
  611. }
  612. m_fKana = FALSE ;
  613. }
  614. if (m_fJapan) {
  615. (void)Output(ESC); // ESC ( B - ASCII
  616. (void)Output(ISO2022_OUT_CHAR);
  617. fDone = Output(ISO2022_OUT_JP_CHAR1);
  618. m_fJapan = FALSE ;
  619. }
  620. return fDone;
  621. }
  622. /******************************************************************************
  623. ************************* C O N V E R T C H A R *************************
  624. ******************************************************************************/
  625. HRESULT CInccJisOut::ConvFullWidthKana(UCHAR tc)
  626. {
  627. BOOL fDone = TRUE ;
  628. int index ;
  629. WCHAR DoubleBytes ;
  630. HRESULT hr = S_OK;
  631. if (m_dwFlag & MLCONVCHARF_NOBESTFITCHARS)
  632. {
  633. UCHAR szDefaultChar[3] = {0x3f}; // possible DBCS + null
  634. if (m_lpFallBack && (m_dwFlag & MLCONVCHARF_USEDEFCHAR))
  635. {
  636. // only take SBCS, no DBCS character
  637. if ( 1 != WideCharToMultiByte(CP_JPN_SJ, 0,
  638. (LPCWSTR)m_lpFallBack, 1,
  639. (LPSTR)szDefaultChar, ARRAYSIZE(szDefaultChar), NULL, NULL ))
  640. szDefaultChar[0] = 0x3f;
  641. }
  642. if (m_dwFlag & (MLCONVCHARF_NCR_ENTITIZE|MLCONVCHARF_NAME_ENTITIZE))
  643. {
  644. char szChar[2] = {0};
  645. char szDstStr[10] = {0};
  646. WCHAR szwChar[2];
  647. int cCount;
  648. szChar[0] = tc;
  649. if (MultiByteToWideChar(CP_JPN_SJ, 0, szChar, -1, szwChar, ARRAYSIZE(szwChar)))
  650. {
  651. // Output NCR entity
  652. Output('&');
  653. Output('#');
  654. _ultoa((unsigned long)szwChar[0], (char*)szDstStr, 10);
  655. cCount = lstrlenA(szDstStr);
  656. for (int i=0; i< cCount; i++)
  657. {
  658. Output(szDstStr[i]);
  659. }
  660. fDone = Output(';');
  661. }
  662. else
  663. {
  664. fDone = Output(szDefaultChar[0]); // use default char
  665. hr = S_FALSE;
  666. }
  667. }
  668. else
  669. {
  670. fDone = Output(szDefaultChar[0]); // use default char
  671. hr = S_FALSE;
  672. }
  673. }
  674. else
  675. {
  676. // voiced sound mark or semi-voiced sound mark
  677. if ( m_fSaveByte && ( tc == 0xde || tc == 0xdf ) )
  678. {
  679. if ( m_tcPrevByte >= 0x0CA && m_tcPrevByte <= 0x0CE )
  680. {
  681. index = m_tcPrevByte - VOICE_MARK_DEDF_OFFSET ;
  682. if ( tc == 0xde )
  683. DoubleBytes = g_wMarkDEKana[index] ;
  684. else
  685. DoubleBytes = g_wMarkDFKana[index] ;
  686. }
  687. else
  688. {
  689. index = m_tcPrevByte - VOICE_MARK_OFFSET ;
  690. DoubleBytes = g_wVoiceMarkKana[index] ;
  691. }
  692. Output( (UCHAR) (DoubleBytes >> 8 ));
  693. fDone = Output( (UCHAR) DoubleBytes );
  694. m_fSaveByte = FALSE ;
  695. m_tcPrevByte = '\0' ;
  696. }
  697. else
  698. {
  699. // output previous saved voice sound markable char
  700. if ( m_fSaveByte )
  701. {
  702. index = m_tcPrevByte - VOICE_MARK_OFFSET ;
  703. DoubleBytes = g_wFullWKana[index] ;
  704. Output( (UCHAR) (DoubleBytes >> 8 ) );
  705. fDone = Output( (UCHAR) DoubleBytes );
  706. m_fSaveByte = FALSE ;
  707. m_tcPrevByte = '\0' ;
  708. }
  709. // half width kana
  710. if ( tc >= 0xa1 && tc <= 0xdf )
  711. {
  712. index = tc - VOICE_MARK_OFFSET ;
  713. // check if this char can be combined with voice sound mark
  714. if ( g_wVoiceMarkKana[index] )
  715. {
  716. m_fSaveByte = TRUE ;
  717. m_tcPrevByte = tc ;
  718. }
  719. // convert half width kana to full width kana
  720. else
  721. {
  722. DoubleBytes = g_wFullWKana[index] ;
  723. Output( (UCHAR) ( DoubleBytes >> 8 ));
  724. fDone = Output( (UCHAR) DoubleBytes );
  725. }
  726. }
  727. else
  728. fDone = Output(tc);
  729. }
  730. }
  731. if (fDone)
  732. return hr;
  733. else
  734. return E_FAIL;
  735. }
  736. /******************************************************************************
  737. *************************** K A N A C L E A N U P ************************
  738. ******************************************************************************/
  739. BOOL CInccJisOut::KanaCleanUp()
  740. {
  741. BOOL fDone = TRUE;
  742. WCHAR DoubleBytes ;
  743. int index ;
  744. // output previous saved voice sound markable char
  745. if ( m_fSaveByte )
  746. {
  747. index = m_tcPrevByte - VOICE_MARK_OFFSET ;
  748. DoubleBytes = g_wFullWKana[index] ;
  749. Output( (UCHAR) ( DoubleBytes >> 8 ));
  750. fDone = Output( (UCHAR) DoubleBytes );
  751. m_fSaveByte = FALSE ;
  752. m_tcPrevByte = '\0' ;
  753. }
  754. return fDone;
  755. }
  756. int CInccJisOut::GetUnconvertBytes()
  757. {
  758. if ( m_tcLeadByte )
  759. return 1 ;
  760. else
  761. return 0 ;
  762. }
  763. DWORD CInccJisOut::GetConvertMode()
  764. {
  765. return ( m_fJapan ? 1 : 0 ) + ( m_fKana ? 2 : 0 ) ;
  766. }
  767. void CInccJisOut::SetConvertMode(DWORD mode)
  768. {
  769. Reset();
  770. if ( mode & 0x00000001 )
  771. m_fJapan = TRUE ;
  772. if ( mode & 0x00000002 )
  773. m_fKana = TRUE ;
  774. return ;
  775. }
  776. void CInccJisOut::SetKanaMode(UINT uCodePage)
  777. {
  778. switch ( uCodePage )
  779. {
  780. case CP_ISO_2022_JP_ESC:
  781. m_eKanaMode = ESC_MODE ;
  782. break ;
  783. case CP_ISO_2022_JP_SIO:
  784. m_eKanaMode = SIO_MODE ;
  785. break ;
  786. default :
  787. m_eKanaMode = FULL_MODE ;
  788. break ;
  789. }
  790. return ;
  791. }