Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

246 lines
6.4 KiB

  1. #include "private.h"
  2. #include "convobj.h"
  3. #include "detcbase.h"
  4. #include "codepage.h"
  5. #include "detcjpn.h"
  6. #include "detckrn.h"
  7. CMLangConvertCharset::CMLangConvertCharset(void)
  8. {
  9. DllAddRef();
  10. lpCharConverter = NULL ;
  11. m_dwSrcEncoding = 0 ;
  12. m_dwDetectSrcEncoding = 0 ;
  13. m_dwDstEncoding = 0 ;
  14. m_dwMode = 0 ;
  15. return ;
  16. }
  17. CMLangConvertCharset::~CMLangConvertCharset(void)
  18. {
  19. if (lpCharConverter)
  20. delete lpCharConverter ;
  21. DllRelease();
  22. return ;
  23. }
  24. //
  25. // CMLangConvertCharset implementation
  26. //
  27. STDAPI CMLangConvertCharset::Initialize(UINT uiSrcCodePage, UINT uiDstCodePage, DWORD dwProperty)
  28. {
  29. HRESULT hr = S_OK ;
  30. DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::Initialize called."));
  31. if ( m_dwSrcEncoding != uiSrcCodePage ||
  32. m_dwDstEncoding != uiDstCodePage )
  33. {
  34. m_dwSrcEncoding = uiSrcCodePage ;
  35. m_dwDstEncoding = uiDstCodePage ;
  36. if (lpCharConverter)
  37. delete lpCharConverter ;
  38. lpCharConverter = new CICharConverter ;
  39. if (!lpCharConverter)
  40. return E_FAIL ;
  41. hr = lpCharConverter->ConvertSetup(&m_dwSrcEncoding, m_dwDstEncoding);
  42. }
  43. m_dwMode = 0 ;
  44. m_dwProperty = dwProperty ;
  45. return hr ;
  46. }
  47. STDAPI CMLangConvertCharset::GetSourceCodePage(UINT *puiSrcCodePage)
  48. {
  49. DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::GetSourceCodePage called."));
  50. if (puiSrcCodePage)
  51. {
  52. *puiSrcCodePage = m_dwSrcEncoding ;
  53. return S_OK ;
  54. }
  55. else
  56. return E_INVALIDARG ;
  57. }
  58. STDAPI CMLangConvertCharset::GetDestinationCodePage(UINT *puiDstCodePage)
  59. {
  60. DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::GetDestinationCodePage called."));
  61. if (puiDstCodePage)
  62. {
  63. *puiDstCodePage = m_dwDstEncoding ;
  64. return S_OK ;
  65. }
  66. else
  67. return E_INVALIDARG ;
  68. }
  69. STDAPI CMLangConvertCharset::GetDeterminedSrcCodePage(UINT *puiCodePage)
  70. {
  71. DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::GetDeterminedSrcCodePage called."));
  72. if (m_dwDetectSrcEncoding)
  73. {
  74. if (puiCodePage)
  75. {
  76. *puiCodePage = m_dwDetectSrcEncoding;
  77. return S_OK ;
  78. }
  79. else
  80. return E_INVALIDARG ;
  81. }
  82. else
  83. return S_FALSE ;
  84. }
  85. STDAPI CMLangConvertCharset::GetProperty(DWORD *pdwProperty)
  86. {
  87. DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::GetProperty called."));
  88. if (pdwProperty)
  89. {
  90. *pdwProperty = m_dwProperty;
  91. return S_OK ;
  92. }
  93. else
  94. return E_INVALIDARG ;
  95. }
  96. STDAPI CMLangConvertCharset::DoConversion(BYTE *pSrcStr, UINT *pcSrcSize, BYTE *pDstStr, UINT *pcDstSize)
  97. {
  98. HRESULT hr ;
  99. DWORD dwMode = m_dwMode ;
  100. int nSrcSize = -1 ;
  101. int nDstSize = 0 ;
  102. DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::DoConversion called."));
  103. // no converter was set up
  104. if (!lpCharConverter)
  105. return E_FAIL ;
  106. if (pcSrcSize)
  107. nSrcSize = *pcSrcSize ;
  108. if ( pSrcStr && nSrcSize == -1 ) // Get length of lpSrcStr if not given, assuming lpSrcStr is a zero terminate string.
  109. {
  110. if ( m_dwSrcEncoding == CP_UCS_2 )
  111. nSrcSize = (lstrlenW( (WCHAR*) pSrcStr) << 1 ) ;
  112. else
  113. nSrcSize = lstrlenA( (CHAR*) pSrcStr) ;
  114. }
  115. if (pcDstSize)
  116. nDstSize = *pcDstSize ;
  117. if ( m_dwSrcEncoding == CP_JP_AUTO ) // Auto Detection for Japan
  118. {
  119. CIncdJapanese DetectJapan;
  120. DWORD dwSrcEncoding ;
  121. dwSrcEncoding = DetectJapan.DetectStringA((LPSTR)pSrcStr, nSrcSize);
  122. // if dwSrcEncoding is zero means there is an ambiguity, we don't return
  123. // the detected codepage to caller, instead we defaut its codepage internally
  124. // to SJIS
  125. if (dwSrcEncoding)
  126. {
  127. m_dwDetectSrcEncoding = m_dwSrcEncoding = dwSrcEncoding ;
  128. m_dwProperty |= MLCONVCHARF_AUTODETECT ;
  129. }
  130. else
  131. dwSrcEncoding = CP_JPN_SJ;
  132. hr = lpCharConverter->ConvertSetup(&dwSrcEncoding, m_dwDstEncoding);
  133. if ( hr != S_OK )
  134. return hr ;
  135. }
  136. else if ( m_dwSrcEncoding == CP_KR_AUTO ) // Auto Detection for Korean
  137. {
  138. CIncdKorean DetectKorean;
  139. m_dwDetectSrcEncoding = m_dwSrcEncoding = DetectKorean.DetectStringA((LPSTR)pSrcStr, nSrcSize);
  140. hr = lpCharConverter->ConvertSetup(&m_dwSrcEncoding, m_dwDstEncoding);
  141. if ( hr != S_OK )
  142. return hr ;
  143. m_dwProperty |= MLCONVCHARF_AUTODETECT ;
  144. }
  145. else if ( m_dwSrcEncoding == CP_AUTO ) // General Auto Detection for all code pages
  146. {
  147. int _nSrcSize = DETECTION_MAX_LEN < nSrcSize ? DETECTION_MAX_LEN : nSrcSize;
  148. INT nScores = 1;
  149. DWORD dwSrcEncoding ;
  150. DetectEncodingInfo Encoding;
  151. if ( S_OK == _DetectInputCodepage(MLDETECTCP_HTML, 1252, (char *)pSrcStr, &_nSrcSize, &Encoding, &nScores))
  152. {
  153. m_dwDetectSrcEncoding = m_dwSrcEncoding = dwSrcEncoding = Encoding.nCodePage;
  154. m_dwProperty |= MLCONVCHARF_AUTODETECT ;
  155. }
  156. else
  157. {
  158. dwSrcEncoding = 1252;
  159. }
  160. hr = lpCharConverter->ConvertSetup(&dwSrcEncoding, m_dwDstEncoding);
  161. if ( hr != S_OK )
  162. {
  163. return hr ;
  164. }
  165. }
  166. hr = lpCharConverter->DoCodeConvert(&dwMode, (LPCSTR) pSrcStr, &nSrcSize, (LPSTR) pDstStr, &nDstSize, m_dwProperty, NULL);
  167. // return the number of bytes processed for the source.
  168. if (pcSrcSize)
  169. *pcSrcSize = lpCharConverter->_nSrcSize ;
  170. if (pcDstSize)
  171. *pcDstSize = nDstSize;
  172. if (pDstStr)
  173. m_dwMode = dwMode ;
  174. lpCharConverter->ConvertCleanUp();
  175. return hr ;
  176. }
  177. STDAPI CMLangConvertCharset::DoConversionToUnicode(CHAR *pSrcStr, UINT *pcSrcSize, WCHAR *pDstStr, UINT *pcDstSize)
  178. {
  179. HRESULT hr ;
  180. UINT nByteCountSize = (pcDstSize ? *pcDstSize * sizeof(WCHAR) : 0 ) ;
  181. DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::DoConversionToUnicode called."));
  182. hr = DoConversion((BYTE*)pSrcStr,pcSrcSize,(BYTE*)pDstStr,&nByteCountSize);
  183. if (pcDstSize)
  184. *pcDstSize = nByteCountSize / sizeof(WCHAR);
  185. return hr;
  186. }
  187. STDAPI CMLangConvertCharset::DoConversionFromUnicode(WCHAR *pSrcStr, UINT *pcSrcSize, CHAR *pDstStr, UINT *pcDstSize)
  188. {
  189. HRESULT hr ;
  190. UINT nByteCountSize = (pcSrcSize ? *pcSrcSize * sizeof(WCHAR) : 0 ) ;
  191. DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::DoConversionFromUnicode called."));
  192. hr = DoConversion((BYTE*)pSrcStr,&nByteCountSize,(BYTE*)pDstStr,pcDstSize);
  193. if (pcSrcSize)
  194. *pcSrcSize = nByteCountSize / sizeof(WCHAR);
  195. return hr ;
  196. }