Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

282 lines
9.6 KiB

  1. #include "private.h"
  2. #include "jisobj.h"
  3. #include "eucjobj.h"
  4. #include "hzgbobj.h"
  5. #include "kscobj.h"
  6. #include "utf8obj.h"
  7. #include "utf7obj.h"
  8. #include "fechrcnv.h"
  9. #include "codepage.h"
  10. #include "ichrcnv.h"
  11. HRESULT CICharConverter::KSC5601ToEUCKR(LPCSTR lpSrcStr, LPINT lpnSrcSize, LPSTR lpDestStr, int cchDest, LPINT lpnSize)
  12. {
  13. int nSize=0;
  14. int i=0;
  15. HRESULT hr = S_OK;
  16. UCHAR szDefaultChar[3] = {0x3f}; // possible DBCS + null
  17. if (_lpFallBack && (_dwFlag & MLCONVCHARF_USEDEFCHAR))
  18. {
  19. // only take SBCS, no DBCS character
  20. if ( 1 != WideCharToMultiByte(CP_KOR_5601, 0,
  21. (LPCWSTR)_lpFallBack, 1,
  22. (LPSTR)szDefaultChar, ARRAYSIZE(szDefaultChar), NULL, NULL ))
  23. szDefaultChar[0] = 0x3f;
  24. }
  25. while(i < *lpnSrcSize)
  26. {
  27. // Check space
  28. if (lpDestStr && (nSize > cchDest))
  29. break;
  30. // DBCS
  31. if (((UCHAR)lpSrcStr[i] >= 0x81 && (UCHAR)lpSrcStr[i] <= 0xFE) && (i+1 < *lpnSrcSize))
  32. {
  33. // UHC
  34. if (!((UCHAR)lpSrcStr[i] >= 0xA1 && (UCHAR)lpSrcStr[i] <= 0xFE &&
  35. (UCHAR)lpSrcStr[i+1] >= 0xA1 && (UCHAR)lpSrcStr[i+1] <= 0xFE))
  36. {
  37. // use NCR if flag specified
  38. if (_dwFlag & (MLCONVCHARF_NCR_ENTITIZE|MLCONVCHARF_NAME_ENTITIZE))
  39. {
  40. char szDstStr[10] = {0};
  41. WCHAR szwChar[2];
  42. int cCount;
  43. if (MultiByteToWideChar(CP_KOR_5601, 0, &lpSrcStr[i], 2, szwChar, ARRAYSIZE(szwChar)))
  44. {
  45. // Caculate NCR length
  46. _ultoa((unsigned long)szwChar[0], (char*)szDstStr, 10);
  47. cCount = lstrlenA(szDstStr)+3;
  48. // Not enough space for NCR entity
  49. if (lpDestStr)
  50. {
  51. if (nSize+cCount > cchDest)
  52. break;
  53. // Output NCR entity
  54. else
  55. {
  56. *lpDestStr ++= '&';
  57. *lpDestStr ++= '#';
  58. for (int j=0; j< cCount-3; j++)
  59. *lpDestStr++=szDstStr[j];
  60. *lpDestStr ++= ';';
  61. }
  62. }
  63. nSize += cCount;
  64. }
  65. else
  66. {
  67. if (lpDestStr)
  68. {
  69. if (nSize+1 > cchDest)
  70. break;
  71. *lpDestStr++=szDefaultChar[0];
  72. }
  73. nSize++;
  74. hr = S_FALSE;
  75. }
  76. }
  77. // use default char, question mark
  78. else
  79. {
  80. if (lpDestStr)
  81. {
  82. if (nSize+1 > cchDest)
  83. break;
  84. *lpDestStr++=szDefaultChar[0];
  85. }
  86. nSize++;
  87. hr = S_FALSE;
  88. }
  89. i += 2;
  90. }
  91. // Wansung
  92. else
  93. {
  94. if (lpDestStr)
  95. {
  96. if (nSize+2 > cchDest)
  97. break;
  98. *lpDestStr++=lpSrcStr[i];
  99. *lpDestStr++=lpSrcStr[i+1];
  100. }
  101. i+=2;
  102. nSize += 2;
  103. }
  104. }
  105. // SBCS
  106. else
  107. {
  108. if (lpDestStr)
  109. {
  110. if (nSize+1 > cchDest)
  111. break;
  112. *lpDestStr++=lpSrcStr[i];
  113. }
  114. nSize++;
  115. i++;
  116. }
  117. } // End of loop
  118. if (lpnSize)
  119. *lpnSize = nSize;
  120. return hr;
  121. }
  122. /******************************************************************************
  123. ****************** C O N V E R T I N E T S T R I N G ******************
  124. ******************************************************************************/
  125. HRESULT CICharConverter::CreateINetString(BOOL fInbound, UINT uCodePage, int nCodeSet)
  126. {
  127. if (_hcins)
  128. {
  129. delete _hcins ;
  130. _hcins = NULL ;
  131. }
  132. if (fInbound) { // Inbound
  133. if (uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP ||
  134. nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO ))
  135. // JIS
  136. _hcins = new CInccJisIn(uCodePage, nCodeSet);
  137. else if (uCodePage == CP_JPN_SJ && nCodeSet == CP_EUC_JP ) // EUC
  138. _hcins = new CInccEucJIn(uCodePage, nCodeSet);
  139. else if (uCodePage == CP_CHN_GB && nCodeSet == CP_CHN_HZ ) // HZ-GB
  140. _hcins = new CInccHzGbIn(uCodePage, nCodeSet);
  141. else if (uCodePage == CP_KOR_5601 && nCodeSet == CP_ISO_2022_KR )
  142. _hcins = new CInccKscIn(uCodePage, nCodeSet);
  143. else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_8 )
  144. _hcins = new CInccUTF8In(uCodePage, nCodeSet);
  145. else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_7 )
  146. _hcins = new CInccUTF7In(uCodePage, nCodeSet);
  147. } else { // Outbound
  148. if (uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP ||
  149. nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO ))
  150. // JIS
  151. _hcins = new CInccJisOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack);
  152. else if (uCodePage == CP_JPN_SJ && nCodeSet == CP_EUC_JP ) // EUC
  153. _hcins = new CInccEucJOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack);
  154. else if (uCodePage == CP_CHN_GB && nCodeSet == CP_CHN_HZ ) // HZ-GB
  155. _hcins = new CInccHzGbOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack);
  156. else if (uCodePage == CP_KOR_5601 && nCodeSet == CP_ISO_2022_KR )
  157. _hcins = new CInccKscOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack);
  158. else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_8 )
  159. _hcins = new CInccUTF8Out(uCodePage, nCodeSet);
  160. else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_7 )
  161. _hcins = new CInccUTF7Out(uCodePage, nCodeSet);
  162. }
  163. // recode the dst codepage
  164. if ( _hcins )
  165. _hcins_dst = nCodeSet ;
  166. return S_OK ;
  167. }
  168. HRESULT CICharConverter::DoConvertINetString(LPDWORD lpdwMode, BOOL fInbound, UINT uCodePage, int nCodeSet,
  169. LPCSTR lpSrcStr, LPINT lpnSrcSize, LPSTR lpDestStr, int cchDest, LPINT lpnSize)
  170. {
  171. HRESULT hr = S_OK;
  172. HCINS hcins = NULL;
  173. int nSize = 0 ;
  174. int cchSrc = *lpnSrcSize ;
  175. if (!lpnSize)
  176. lpnSize = &nSize;
  177. if (!uCodePage) // Get default code page if nothing speicified
  178. uCodePage = g_uACP;
  179. if (!lpSrcStr && cchSrc < 0) // Get length of lpSrcStr if not given, assuming lpSrcStr is a zero terminate string.
  180. cchSrc = lstrlenA(lpSrcStr) + 1;
  181. if (!_hcins || ( nCodeSet != _hcins_dst ) )
  182. CreateINetString(fInbound,uCodePage,nCodeSet);
  183. if (_hcins ) { // Context created, it means DBCS
  184. int nTempSize = 0 ;
  185. // restore previous mode SO/SI ESC etc.
  186. ((CINetCodeConverter*)_hcins)->SetConvertMode(*lpdwMode);
  187. // if it is a JIS output set Kana mode
  188. if (!fInbound && uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP ||
  189. nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO ))
  190. // JIS
  191. ((CInccJisOut*)_hcins)->SetKanaMode(nCodeSet);
  192. if (!lpDestStr || !cchDest) // Get the converted size
  193. {
  194. hr = ((CINetCodeConverter*)_hcins)->GetStringSizeA(lpSrcStr, cchSrc, lpnSize);
  195. if (0 == fInbound)
  196. {
  197. HRESULT _hr = ((CINetCodeConverter*)_hcins)->GetStringSizeA(NULL, 0, &nTempSize);
  198. if (S_OK != _hr)
  199. hr = _hr;
  200. }
  201. }
  202. else // Perform actual converting
  203. {
  204. hr = ((CINetCodeConverter*)_hcins)->ConvertStringA(lpSrcStr, cchSrc, lpDestStr, cchDest, lpnSize);
  205. if (0 == fInbound)
  206. {
  207. HRESULT _hr = ((CINetCodeConverter*)_hcins)->ConvertStringA(NULL, 0, lpDestStr+*lpnSize, cchDest-*lpnSize, &nTempSize);
  208. if (S_OK != _hr)
  209. hr = _hr;
  210. }
  211. }
  212. *lpnSize += nTempSize;
  213. // get number of unconvetable bytes
  214. if ( lpnSrcSize && ((CINetCodeConverter*)_hcins)->GetUnconvertBytes() )
  215. *lpnSrcSize = cchSrc -((CINetCodeConverter*)_hcins)->GetUnconvertBytes();
  216. // only save current mode SO/SI ESC if we are perform actual converting
  217. // we need this if statement because for two stages plus conversion.
  218. // It will inquire the size first then convert from IWUU or UUWI.
  219. if (lpDestStr && lpdwMode )
  220. *lpdwMode = ((CINetCodeConverter*)_hcins)->GetConvertMode();
  221. // delete hcins;
  222. } else {
  223. // Internet encodings that have same encoding scheme as their family encodings
  224. switch (nCodeSet)
  225. {
  226. case CP_EUC_KR:
  227. hr = KSC5601ToEUCKR(lpSrcStr, lpnSrcSize, lpDestStr, cchDest, lpnSize);
  228. break;
  229. default:
  230. if (!lpDestStr || !cchDest) // Get the converted size
  231. *lpnSize = cchSrc ;
  232. else
  233. {
  234. *lpnSize = min(cchSrc, cchDest);
  235. if (*lpnSize)
  236. MoveMemory(lpDestStr, lpSrcStr, *lpnSize);
  237. }
  238. }
  239. }
  240. return hr;
  241. }