Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

460 lines
13 KiB

  1. // ============================================================================
  2. // Internet Character Set Conversion: Input from ISO-2022-KR
  3. // ============================================================================
  4. #include "private.h"
  5. #include "fechrcnv.h"
  6. #include "kscobj.h"
  7. #include "codepage.h"
  8. /******************************************************************************
  9. ************************** C O N S T R U C T O R **************************
  10. ******************************************************************************/
  11. CInccKscIn::CInccKscIn(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)
  12. {
  13. Reset(); // initialization
  14. return ;
  15. }
  16. /******************************************************************************
  17. ******************************* R E S E T *********************************
  18. ******************************************************************************/
  19. void CInccKscIn::Reset()
  20. {
  21. m_pfnConv = ConvMain;
  22. m_pfnCleanUp = CleanUpMain;
  23. m_fShift = FALSE;
  24. // bug #57570, Korean ISP DACOM only labels one designator in the
  25. // conversion of a MIME mail. To decode the other part of MIME correctly,
  26. // we need to decode the ISO document or MIME message even there is no
  27. // designator "esc ) C".
  28. m_fKorea = TRUE;
  29. m_nESCBytes = 0 ;
  30. m_fLeadByte = FALSE ;
  31. return ;
  32. }
  33. /******************************************************************************
  34. ************************* C O N V E R T C H A R *************************
  35. ******************************************************************************/
  36. HRESULT CInccKscIn::ConvertChar(UCHAR tc, int cchSrc)
  37. {
  38. BOOL fDone = (this->*m_pfnConv)(tc);
  39. if (fDone)
  40. return S_OK;
  41. else
  42. return E_FAIL;
  43. }
  44. /******************************************************************************
  45. ***************************** C L E A N U P *****************************
  46. ******************************************************************************/
  47. BOOL CInccKscIn::CleanUp()
  48. {
  49. return (this->*m_pfnCleanUp)();
  50. }
  51. /******************************************************************************
  52. **************************** C O N V M A I N ****************************
  53. ******************************************************************************/
  54. BOOL CInccKscIn::ConvMain(UCHAR tc)
  55. {
  56. BOOL fDone = TRUE;
  57. if (tc == ESC) {
  58. m_pfnConv = ConvEsc;
  59. m_pfnCleanUp = CleanUpEsc;
  60. m_nESCBytes++ ;
  61. } else {
  62. if (m_fKorea) {
  63. switch (tc) {
  64. case SO:
  65. m_fShift = TRUE;
  66. break;
  67. case SI:
  68. m_fShift = FALSE;
  69. m_fLeadByte = FALSE ;
  70. break;
  71. default:
  72. if (m_fShift) {
  73. switch (tc) {
  74. case ' ':
  75. case '\t':
  76. case '\n':
  77. fDone = Output(tc);
  78. break;
  79. default:
  80. fDone = Output(tc | 0x80);
  81. m_fLeadByte = ~m_fLeadByte ;
  82. break;
  83. }
  84. } else {
  85. fDone = Output(tc);
  86. }
  87. break;
  88. }
  89. } else {
  90. fDone = Output(tc);
  91. }
  92. }
  93. return fDone;
  94. }
  95. /******************************************************************************
  96. ************************ C L E A N U P M A I N ************************
  97. ******************************************************************************/
  98. BOOL CInccKscIn::CleanUpMain()
  99. {
  100. return TRUE;
  101. }
  102. /******************************************************************************
  103. ***************************** C O N V E S C *****************************
  104. ******************************************************************************/
  105. BOOL CInccKscIn::ConvEsc(UCHAR tc)
  106. {
  107. if (tc == ISO2022_IN_CHAR) {
  108. m_pfnConv = ConvIsoIn;
  109. m_pfnCleanUp = CleanUpIsoIn;
  110. m_nESCBytes++ ;
  111. return TRUE;
  112. } else {
  113. m_pfnConv = ConvMain;
  114. m_pfnCleanUp = CleanUpMain;
  115. m_nESCBytes = 0 ;
  116. (void)Output(ESC);
  117. if (SUCCEEDED(ConvertChar(tc)))
  118. return TRUE;
  119. else
  120. return FALSE;
  121. }
  122. }
  123. /******************************************************************************
  124. ************************* C L E A N U P E S C *************************
  125. ******************************************************************************/
  126. BOOL CInccKscIn::CleanUpEsc()
  127. {
  128. m_pfnConv = ConvMain;
  129. m_pfnCleanUp = CleanUpMain;
  130. m_nESCBytes = 0 ;
  131. return Output(ESC);
  132. }
  133. /******************************************************************************
  134. ************************** C O N V I S O I N **************************
  135. ******************************************************************************/
  136. BOOL CInccKscIn::ConvIsoIn(UCHAR tc)
  137. {
  138. if (tc == ISO2022_IN_KR_CHAR_1) {
  139. m_pfnConv = ConvIsoInKr;
  140. m_pfnCleanUp = CleanUpIsoInKr;
  141. m_nESCBytes++ ;
  142. return TRUE;
  143. } else {
  144. m_pfnConv = ConvMain;
  145. m_pfnCleanUp = CleanUpMain;
  146. m_nESCBytes = 0 ;
  147. (void)Output(ESC);
  148. (void)ConvertChar(ISO2022_IN_CHAR);
  149. if (SUCCEEDED(ConvertChar(tc)))
  150. return TRUE;
  151. else
  152. return FALSE;
  153. }
  154. }
  155. /******************************************************************************
  156. ********************** C L E A N U P I S O I N **********************
  157. ******************************************************************************/
  158. BOOL CInccKscIn::CleanUpIsoIn()
  159. {
  160. m_pfnConv = ConvMain;
  161. m_pfnCleanUp = CleanUpMain;
  162. m_nESCBytes = 0 ;
  163. (void)Output(ESC);
  164. (void)ConvertChar(ISO2022_IN_CHAR);
  165. return CleanUp();
  166. }
  167. /******************************************************************************
  168. *********************** C O N V I S O I N K R ***********************
  169. ******************************************************************************/
  170. BOOL CInccKscIn::ConvIsoInKr(UCHAR tc)
  171. {
  172. m_pfnConv = ConvMain;
  173. m_pfnCleanUp = CleanUpMain;
  174. m_nESCBytes = 0 ;
  175. if (tc == ISO2022_IN_KR_CHAR_2) {
  176. m_fKorea = TRUE;
  177. return TRUE;
  178. } else {
  179. (void)Output(ESC);
  180. (void)ConvertChar(ISO2022_IN_CHAR);
  181. (void)ConvertChar(ISO2022_IN_KR_CHAR_1);
  182. if (SUCCEEDED(ConvertChar(tc)))
  183. return TRUE;
  184. else
  185. return FALSE;
  186. }
  187. }
  188. /******************************************************************************
  189. ******************* C L E A N U P I S O I N K R *******************
  190. ******************************************************************************/
  191. BOOL CInccKscIn::CleanUpIsoInKr()
  192. {
  193. m_pfnConv = ConvMain;
  194. m_pfnCleanUp = CleanUpMain;
  195. m_nESCBytes = 0 ;
  196. (void)Output(ESC);
  197. (void)ConvertChar(ISO2022_IN_CHAR);
  198. (void)ConvertChar(ISO2022_IN_KR_CHAR_1);
  199. return CleanUp();
  200. }
  201. int CInccKscIn::GetUnconvertBytes()
  202. {
  203. if ( m_fLeadByte )
  204. return 1 ;
  205. else if ( m_nESCBytes )
  206. return m_nESCBytes < 4 ? m_nESCBytes : 3 ;
  207. else
  208. return 0 ;
  209. }
  210. DWORD CInccKscIn::GetConvertMode()
  211. {
  212. // 0xC431 -> 50225 ISO-2022-KR
  213. return ( m_fKorea ? 1 : 0 ) + ( m_fShift ? 2 : 0 ) | 0xC4310000 ;
  214. }
  215. void CInccKscIn::SetConvertMode(DWORD mode)
  216. {
  217. Reset(); // initialization
  218. if ( mode & 0x00000001 )
  219. m_fKorea = TRUE ;
  220. if ( mode & 0x00000002 )
  221. m_fShift = TRUE ;
  222. return ;
  223. }
  224. // ============================================================================
  225. // Internet Character Set Conversion: Output to ISO-2022-KSC
  226. // ============================================================================
  227. /******************************************************************************
  228. ************************** C O N S T R U C T O R **************************
  229. ******************************************************************************/
  230. CInccKscOut::CInccKscOut(UINT uCodePage, int nCodeSet, DWORD dwFlag, WCHAR *lpFallBack) : CINetCodeConverter(uCodePage, nCodeSet)
  231. {
  232. Reset(); // initialization
  233. _dwFlag = dwFlag;
  234. _lpFallBack = lpFallBack;
  235. return ;
  236. }
  237. /******************************************************************************
  238. ******************************* R E S E T *********************************
  239. ******************************************************************************/
  240. void CInccKscOut::Reset()
  241. {
  242. m_fDoubleByte = FALSE;
  243. m_fShift = FALSE;
  244. m_fKorea = FALSE;
  245. m_tcLeadByte = 0 ;
  246. return ;
  247. }
  248. /******************************************************************************
  249. ************************* C O N V E R T C H A R *************************
  250. ******************************************************************************/
  251. HRESULT CInccKscOut::ConvertChar(UCHAR tc, int cchSrc)
  252. {
  253. BOOL fDone = TRUE;
  254. HRESULT hr = S_OK;
  255. //
  256. //IE RAID #103403 weiwu 03/16/00
  257. //
  258. //Per Korean PM (sykim), we don't have to prepend iso-2022-kr designator to conversion result string
  259. //Also considering that URLMON can't handle encoded ASCII iso-2022-kr string
  260. //We now remove following code, if it triggers any compatibility issues, we should re-enable it
  261. //
  262. #if 0
  263. // put designator to the top of the document
  264. if (!m_fKorea) {
  265. (void)Output(ESC);
  266. (void)Output(ISO2022_IN_CHAR);
  267. (void)Output(ISO2022_IN_KR_CHAR_1);
  268. (void)Output(ISO2022_IN_KR_CHAR_2);
  269. m_fKorea = TRUE;
  270. }
  271. #endif
  272. if (!m_fDoubleByte) {
  273. //
  274. // We're not using IsDBCSLeadByteEx() due to perf. concern
  275. // We should assert that our hard code table match IsDBCSLeadByteEx(),
  276. // But, MLang ships with down level platforms and assert won't be valid if there is a range change
  277. //
  278. if (IS_KOR_LEADBYTE(tc)) {
  279. m_fDoubleByte = TRUE;
  280. m_tcLeadByte = tc;
  281. } else {
  282. if (m_fKorea && m_fShift) {
  283. (void)Output(SI);
  284. m_fShift = FALSE;
  285. }
  286. fDone = Output(tc);
  287. }
  288. } else {
  289. m_fDoubleByte = FALSE;
  290. if (tc > 0x40) { // Check if trail byte indicates Hangeul
  291. if (m_tcLeadByte > 0xa0 && tc > 0xa0) { // Check if it's a Wansung
  292. if (!m_fShift) {
  293. if (!m_fKorea) {
  294. (void)Output(ESC);
  295. (void)Output(ISO2022_IN_CHAR);
  296. (void)Output(ISO2022_IN_KR_CHAR_1);
  297. (void)Output(ISO2022_IN_KR_CHAR_2);
  298. m_fKorea = TRUE;
  299. }
  300. (void)Output(SO);
  301. m_fShift = TRUE;
  302. }
  303. (void)Output(m_tcLeadByte & 0x7f);
  304. fDone = Output(tc & 0x7f);
  305. } else {
  306. UCHAR szDefaultChar[3] = {0x3f}; // possible DBCS + null
  307. if (_lpFallBack && (_dwFlag & MLCONVCHARF_USEDEFCHAR))
  308. {
  309. // only take SBCS, no DBCS character
  310. if ( 1 != WideCharToMultiByte(CP_KOR_5601, 0,
  311. (LPCWSTR)_lpFallBack, 1,
  312. (LPSTR)szDefaultChar, ARRAYSIZE(szDefaultChar), NULL, NULL ))
  313. szDefaultChar[0] = 0x3f;
  314. }
  315. // shift out if we're in DBCS mode
  316. if (m_fKorea && m_fShift) {
  317. (void)Output(SI);
  318. m_fShift = FALSE;
  319. }
  320. if (_dwFlag & (MLCONVCHARF_NCR_ENTITIZE|MLCONVCHARF_NAME_ENTITIZE))
  321. {
  322. char szChar[2];
  323. char szDstStr[10];
  324. WCHAR szwChar[2];
  325. int cCount;
  326. szChar[0] = m_tcLeadByte;
  327. szChar[1] = tc;
  328. if (MultiByteToWideChar(CP_KOR_5601, 0, szChar, 2, szwChar, ARRAYSIZE(szwChar)))
  329. {
  330. // Output NCR entity
  331. Output('&');
  332. Output('#');
  333. _ultoa((unsigned long)szwChar[0], (char*)szDstStr, 10);
  334. cCount = lstrlenA(szDstStr);
  335. for (int i=0; i< cCount; i++)
  336. {
  337. Output(szDstStr[i]);
  338. }
  339. fDone = Output(';');
  340. }
  341. else
  342. {
  343. fDone = Output(szDefaultChar[0]); // use default char
  344. hr = S_FALSE;
  345. }
  346. }
  347. else
  348. {
  349. fDone = Output(szDefaultChar[0]); // use default char
  350. hr = S_FALSE;
  351. }
  352. }
  353. } else {
  354. if (m_fKorea && m_fShift) {
  355. (void)Output(SI);
  356. m_fShift = FALSE;
  357. }
  358. (void)Output(m_tcLeadByte);
  359. fDone = Output(tc);
  360. }
  361. m_tcLeadByte = 0 ;
  362. }
  363. if (!fDone)
  364. hr = E_FAIL;
  365. return hr;
  366. }
  367. /******************************************************************************
  368. ***************************** C L E A N U P *****************************
  369. ******************************************************************************/
  370. BOOL CInccKscOut::CleanUp()
  371. {
  372. BOOL fDone = TRUE;
  373. if ( m_fShift)
  374. {
  375. fDone = Output(SI);
  376. m_fShift = FALSE;
  377. }
  378. return fDone ;
  379. }
  380. int CInccKscOut::GetUnconvertBytes()
  381. {
  382. if (m_tcLeadByte)
  383. return 1 ;
  384. else
  385. return 0 ;
  386. }
  387. DWORD CInccKscOut::GetConvertMode()
  388. {
  389. // for output, we don't need write back code page. 0xC431 -> 50225 ISO-2022-KR
  390. return ( m_fKorea ? 1 : 0 ) + ( m_fShift ? 2 : 0 ) ;
  391. }
  392. void CInccKscOut::SetConvertMode(DWORD mode)
  393. {
  394. Reset(); // initialization
  395. if ( mode & 0x00000001 )
  396. m_fKorea = TRUE ;
  397. if ( mode & 0x00000002 )
  398. m_fShift = TRUE ;
  399. return ;
  400. }