Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

389 lines
11 KiB

  1. // ============================================================================
  2. // Internet Character Set Conversion: Input from HZ-GB-2312
  3. // ============================================================================
  4. #include "private.h"
  5. #include "fechrcnv.h"
  6. #include "hzgbobj.h"
  7. #include "codepage.h"
  8. /******************************************************************************
  9. ************************** C O N S T R U C T O R **************************
  10. ******************************************************************************/
  11. CInccHzGbIn::CInccHzGbIn(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)
  12. {
  13. Reset(); // initialization
  14. return ;
  15. }
  16. /******************************************************************************
  17. ******************************* R E S E T *********************************
  18. ******************************************************************************/
  19. void CInccHzGbIn::Reset()
  20. {
  21. m_pfnConv = ConvMain;
  22. m_pfnCleanUp = CleanUpMain;
  23. m_fGBMode = FALSE;
  24. m_tcLeadByte = 0 ;
  25. m_nESCBytes = 0 ;
  26. return ;
  27. }
  28. /******************************************************************************
  29. ************************* C O N V E R T C H A R *************************
  30. ******************************************************************************/
  31. HRESULT CInccHzGbIn::ConvertChar(UCHAR tc, int cchSrc)
  32. {
  33. BOOL fDone = (this->*m_pfnConv)(tc);
  34. if (fDone)
  35. return S_OK;
  36. else
  37. return E_FAIL;
  38. }
  39. /******************************************************************************
  40. ***************************** C L E A N U P *****************************
  41. ******************************************************************************/
  42. BOOL CInccHzGbIn::CleanUp()
  43. {
  44. return (this->*m_pfnCleanUp)();
  45. }
  46. /******************************************************************************
  47. **************************** C O N V M A I N ****************************
  48. ******************************************************************************/
  49. BOOL CInccHzGbIn::ConvMain(UCHAR tc)
  50. {
  51. BOOL fDone = TRUE;
  52. if (!m_fGBMode) {
  53. if (tc == '~') {
  54. m_pfnConv = ConvTilde;
  55. m_pfnCleanUp = CleanUpTilde;
  56. m_nESCBytes = 1 ;
  57. } else {
  58. fDone = Output(tc);
  59. }
  60. } else {
  61. if (tc >= 0x20 && tc <= 0x7e) {
  62. m_pfnConv = ConvDoubleByte;
  63. m_pfnCleanUp = CleanUpDoubleByte;
  64. m_tcLeadByte = tc;
  65. } else {
  66. fDone = Output(tc);
  67. }
  68. }
  69. return fDone;
  70. }
  71. /******************************************************************************
  72. ************************ C L E A N U P M A I N ************************
  73. ******************************************************************************/
  74. BOOL CInccHzGbIn::CleanUpMain()
  75. {
  76. return TRUE;
  77. }
  78. /******************************************************************************
  79. *************************** C O N V T I L D E ***************************
  80. ******************************************************************************/
  81. BOOL CInccHzGbIn::ConvTilde(UCHAR tc)
  82. {
  83. m_pfnConv = ConvMain;
  84. m_pfnCleanUp = CleanUpMain;
  85. m_nESCBytes = 0 ;
  86. switch (tc) {
  87. case '~':
  88. return Output('~');
  89. case '{':
  90. m_fGBMode = TRUE;
  91. return TRUE;
  92. case '\n':
  93. return TRUE; // Just eat it
  94. default:
  95. (void)Output('~');
  96. if (SUCCEEDED(ConvertChar(tc)))
  97. return TRUE;
  98. else
  99. return FALSE;
  100. }
  101. }
  102. /******************************************************************************
  103. *********************** C L E A N U P T I L D E ***********************
  104. ******************************************************************************/
  105. BOOL CInccHzGbIn::CleanUpTilde()
  106. {
  107. m_pfnConv = ConvMain;
  108. m_pfnCleanUp = CleanUpMain;
  109. return Output('~');
  110. }
  111. /******************************************************************************
  112. ********************* C O N V D O U B L E B Y T E *********************
  113. ******************************************************************************/
  114. BOOL CInccHzGbIn::ConvDoubleByte(UCHAR tc)
  115. {
  116. BOOL fRet ;
  117. m_pfnConv = ConvMain;
  118. m_pfnCleanUp = CleanUpMain;
  119. if (m_tcLeadByte >= 0x21 && m_tcLeadByte <= 0x77 && tc >= 0x21 && tc <= 0x7e) { // Check if GB char
  120. (void)Output(m_tcLeadByte | 0x80);
  121. fRet = Output(tc | 0x80);
  122. } else if (m_tcLeadByte == '~' && tc == '}') { // 0x7e7d
  123. m_fGBMode = FALSE;
  124. fRet = TRUE;
  125. } else if (m_tcLeadByte >= 0x78 && m_tcLeadByte <= 0x7d && tc >= 0x21 && tc <= 0x7e) { // Check if non standard extended code
  126. (void)Output((UCHAR)0xa1); // Output blank box symbol
  127. fRet = Output((UCHAR)0xf5);
  128. } else if (m_tcLeadByte == '~') {
  129. (void)Output('~'); // Output blank box symbol
  130. fRet = Output(tc);
  131. } else if (m_tcLeadByte == ' ') {
  132. fRet = Output(tc);
  133. } else if (tc == ' ') {
  134. (void)Output((UCHAR)0xa1); // Output space symbol
  135. fRet = Output((UCHAR)0xa1);
  136. } else {
  137. (void)Output(m_tcLeadByte);
  138. fRet = Output(tc);
  139. }
  140. m_tcLeadByte = 0 ;
  141. return fRet ;
  142. }
  143. /******************************************************************************
  144. ***************** C L E A N U P D O U B L E B Y T E *****************
  145. ******************************************************************************/
  146. BOOL CInccHzGbIn::CleanUpDoubleByte()
  147. {
  148. m_pfnConv = ConvMain;
  149. m_pfnCleanUp = CleanUpMain;
  150. return Output(m_tcLeadByte);
  151. }
  152. int CInccHzGbIn::GetUnconvertBytes()
  153. {
  154. if (m_tcLeadByte)
  155. return 1;
  156. else if ( m_nESCBytes )
  157. return 1;
  158. else
  159. return 0;
  160. }
  161. DWORD CInccHzGbIn::GetConvertMode()
  162. {
  163. return ( m_fGBMode ? 1 : 0 ) ;
  164. }
  165. void CInccHzGbIn::SetConvertMode(DWORD mode)
  166. {
  167. Reset(); // initialization
  168. if ( mode & 0x01 )
  169. m_fGBMode = TRUE ;
  170. else
  171. m_fGBMode = FALSE ;
  172. return ;
  173. }
  174. // ============================================================================
  175. // Internet Character Set Conversion: Output to HZ-GB-2312
  176. // ============================================================================
  177. /******************************************************************************
  178. ************************** C O N S T R U C T O R **************************
  179. ******************************************************************************/
  180. CInccHzGbOut::CInccHzGbOut(UINT uCodePage, int nCodeSet, DWORD dwFlag, WCHAR * lpFallBack) : CINetCodeConverter(uCodePage, nCodeSet)
  181. {
  182. Reset(); // initialization
  183. _dwFlag = dwFlag;
  184. _lpFallBack = lpFallBack;
  185. return ;
  186. }
  187. /******************************************************************************
  188. ******************************* R E S E T *********************************
  189. ******************************************************************************/
  190. void CInccHzGbOut::Reset()
  191. {
  192. m_fDoubleByte = FALSE;
  193. m_fGBMode = FALSE;
  194. m_tcLeadByte = 0 ;
  195. return ;
  196. }
  197. /******************************************************************************
  198. ************************* C O N V E R T C H A R *************************
  199. ******************************************************************************/
  200. HRESULT CInccHzGbOut::ConvertChar(UCHAR tc, int cchSrc)
  201. {
  202. BOOL fDone = TRUE;
  203. HRESULT hr = S_OK;
  204. if (!m_fDoubleByte)
  205. {
  206. //
  207. // We're not using IsDBCSLeadByteEx() due to perf. concern
  208. // We should assert that our hard code table match IsDBCSLeadByteEx(),
  209. // But, MLang ships with down level platforms and assert won't be valid if there is a range change
  210. //
  211. if (IS_CHS_LEADBYTE(tc))
  212. {
  213. m_fDoubleByte = TRUE;
  214. m_tcLeadByte = tc;
  215. }
  216. else
  217. {
  218. if (m_fGBMode)
  219. {
  220. Output('~');
  221. fDone = Output('}');
  222. m_fGBMode = FALSE;
  223. }
  224. // tilde should be encoded as two tildes
  225. if (tc == '~')
  226. Output('~');
  227. fDone = Output(tc);
  228. }
  229. }
  230. else
  231. {
  232. m_fDoubleByte = FALSE;
  233. // a-ehuang: Bug# 31726, send all out of range code to convert to NCR
  234. // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e
  235. if ( (m_tcLeadByte < 0xa1 || m_tcLeadByte > 0xf7) || (tc < 0xa1 || tc > 0xfe) )
  236. // end-31726
  237. {
  238. UCHAR szDefaultChar[3] = {0x3f}; // possible DBCS + null
  239. if (_lpFallBack && (_dwFlag & MLCONVCHARF_USEDEFCHAR))
  240. {
  241. // only take SBCS, no DBCS character
  242. if ( 1 != WideCharToMultiByte(CP_CHN_GB, 0,
  243. (LPCWSTR)_lpFallBack, 1,
  244. (LPSTR)szDefaultChar, ARRAYSIZE(szDefaultChar), NULL, NULL ))
  245. szDefaultChar[0] = 0x3f;
  246. }
  247. // End Escape sequence for NCR entity output
  248. if (m_fGBMode)
  249. {
  250. Output('~');
  251. Output('}');
  252. m_fGBMode = FALSE;
  253. }
  254. if (_dwFlag & (MLCONVCHARF_NCR_ENTITIZE|MLCONVCHARF_NAME_ENTITIZE))
  255. {
  256. char szChar[2];
  257. char szDstStr[10];
  258. WCHAR szwChar[2];
  259. int cCount;
  260. szChar[0] = m_tcLeadByte;
  261. szChar[1] = tc;
  262. if (MultiByteToWideChar(CP_CHN_GB, 0, szChar, 2, szwChar, ARRAYSIZE(szwChar)))
  263. {
  264. // Output NCR entity
  265. Output('&');
  266. Output('#');
  267. _ultoa((unsigned long)szwChar[0], (char*)szDstStr, 10);
  268. cCount = lstrlenA(szDstStr);
  269. for (int i=0; i< cCount; i++)
  270. {
  271. Output(szDstStr[i]);
  272. }
  273. fDone = Output(';');
  274. }
  275. else
  276. {
  277. fDone = Output(szDefaultChar[0]);
  278. hr = S_FALSE;
  279. }
  280. }
  281. else
  282. {
  283. fDone = Output(szDefaultChar[0]);
  284. hr = S_FALSE;
  285. }
  286. }
  287. else
  288. {
  289. if (!m_fGBMode)
  290. {
  291. Output('~');
  292. Output('{');
  293. m_fGBMode = TRUE;
  294. }
  295. Output(m_tcLeadByte & 0x7f);
  296. fDone = Output(tc & 0x7f);
  297. }
  298. m_tcLeadByte = 0 ;
  299. }
  300. if (!fDone)
  301. hr = E_FAIL;
  302. return hr;
  303. }
  304. /******************************************************************************
  305. ***************************** C L E A N U P *****************************
  306. ******************************************************************************/
  307. BOOL CInccHzGbOut::CleanUp()
  308. {
  309. if (!m_fGBMode) {
  310. return TRUE;
  311. } else {
  312. m_fGBMode = FALSE ;
  313. (void)Output('~');
  314. return Output('}');
  315. }
  316. }
  317. int CInccHzGbOut::GetUnconvertBytes()
  318. {
  319. if (m_tcLeadByte)
  320. return 1;
  321. else
  322. return 0;
  323. }
  324. DWORD CInccHzGbOut::GetConvertMode()
  325. {
  326. return 0 ;
  327. }
  328. void CInccHzGbOut::SetConvertMode(DWORD mode)
  329. {
  330. Reset(); // initialization
  331. return ;
  332. }