#include "private.h" #include "jisobj.h" #include "eucjobj.h" #include "hzgbobj.h" #include "kscobj.h" #include "utf8obj.h" #include "utf7obj.h" #include "fechrcnv.h" #include "codepage.h" #include "ichrcnv.h" HRESULT CICharConverter::KSC5601ToEUCKR(LPCSTR lpSrcStr, LPINT lpnSrcSize, LPSTR lpDestStr, int cchDest, LPINT lpnSize) { int nSize=0; int i=0; HRESULT hr = S_OK; UCHAR szDefaultChar[3] = {0x3f}; // possible DBCS + null if (_lpFallBack && (_dwFlag & MLCONVCHARF_USEDEFCHAR)) { // only take SBCS, no DBCS character if ( 1 != WideCharToMultiByte(CP_KOR_5601, 0, (LPCWSTR)_lpFallBack, 1, (LPSTR)szDefaultChar, ARRAYSIZE(szDefaultChar), NULL, NULL )) szDefaultChar[0] = 0x3f; } while(i < *lpnSrcSize) { // Check space if (lpDestStr && (nSize > cchDest)) break; // DBCS if (((UCHAR)lpSrcStr[i] >= 0x81 && (UCHAR)lpSrcStr[i] <= 0xFE) && (i+1 < *lpnSrcSize)) { // UHC if (!((UCHAR)lpSrcStr[i] >= 0xA1 && (UCHAR)lpSrcStr[i] <= 0xFE && (UCHAR)lpSrcStr[i+1] >= 0xA1 && (UCHAR)lpSrcStr[i+1] <= 0xFE)) { // use NCR if flag specified if (_dwFlag & (MLCONVCHARF_NCR_ENTITIZE|MLCONVCHARF_NAME_ENTITIZE)) { char szDstStr[10] = {0}; WCHAR szwChar[2]; int cCount; if (MultiByteToWideChar(CP_KOR_5601, 0, &lpSrcStr[i], 2, szwChar, ARRAYSIZE(szwChar))) { // Caculate NCR length _ultoa((unsigned long)szwChar[0], (char*)szDstStr, 10); cCount = lstrlenA(szDstStr)+3; // Not enough space for NCR entity if (lpDestStr) { if (nSize+cCount > cchDest) break; // Output NCR entity else { *lpDestStr ++= '&'; *lpDestStr ++= '#'; for (int j=0; j< cCount-3; j++) *lpDestStr++=szDstStr[j]; *lpDestStr ++= ';'; } } nSize += cCount; } else { if (lpDestStr) { if (nSize+1 > cchDest) break; *lpDestStr++=szDefaultChar[0]; } nSize++; hr = S_FALSE; } } // use default char, question mark else { if (lpDestStr) { if (nSize+1 > cchDest) break; *lpDestStr++=szDefaultChar[0]; } nSize++; hr = S_FALSE; } i += 2; } // Wansung else { if (lpDestStr) { if (nSize+2 > cchDest) break; *lpDestStr++=lpSrcStr[i]; *lpDestStr++=lpSrcStr[i+1]; } i+=2; nSize += 2; } } // SBCS else { if (lpDestStr) { if (nSize+1 > cchDest) break; *lpDestStr++=lpSrcStr[i]; } nSize++; i++; } } // End of loop if (lpnSize) *lpnSize = nSize; return hr; } /****************************************************************************** ****************** C O N V E R T I N E T S T R I N G ****************** ******************************************************************************/ HRESULT CICharConverter::CreateINetString(BOOL fInbound, UINT uCodePage, int nCodeSet) { if (_hcins) { delete _hcins ; _hcins = NULL ; } if (fInbound) { // Inbound if (uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP || nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO )) // JIS _hcins = new CInccJisIn(uCodePage, nCodeSet); else if (uCodePage == CP_JPN_SJ && nCodeSet == CP_EUC_JP ) // EUC _hcins = new CInccEucJIn(uCodePage, nCodeSet); else if (uCodePage == CP_CHN_GB && nCodeSet == CP_CHN_HZ ) // HZ-GB _hcins = new CInccHzGbIn(uCodePage, nCodeSet); else if (uCodePage == CP_KOR_5601 && nCodeSet == CP_ISO_2022_KR ) _hcins = new CInccKscIn(uCodePage, nCodeSet); else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_8 ) _hcins = new CInccUTF8In(uCodePage, nCodeSet); else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_7 ) _hcins = new CInccUTF7In(uCodePage, nCodeSet); } else { // Outbound if (uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP || nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO )) // JIS _hcins = new CInccJisOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack); else if (uCodePage == CP_JPN_SJ && nCodeSet == CP_EUC_JP ) // EUC _hcins = new CInccEucJOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack); else if (uCodePage == CP_CHN_GB && nCodeSet == CP_CHN_HZ ) // HZ-GB _hcins = new CInccHzGbOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack); else if (uCodePage == CP_KOR_5601 && nCodeSet == CP_ISO_2022_KR ) _hcins = new CInccKscOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack); else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_8 ) _hcins = new CInccUTF8Out(uCodePage, nCodeSet); else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_7 ) _hcins = new CInccUTF7Out(uCodePage, nCodeSet); } // recode the dst codepage if ( _hcins ) _hcins_dst = nCodeSet ; return S_OK ; } HRESULT CICharConverter::DoConvertINetString(LPDWORD lpdwMode, BOOL fInbound, UINT uCodePage, int nCodeSet, LPCSTR lpSrcStr, LPINT lpnSrcSize, LPSTR lpDestStr, int cchDest, LPINT lpnSize) { HRESULT hr = S_OK; HCINS hcins = NULL; int nSize = 0 ; int cchSrc = *lpnSrcSize ; if (!lpnSize) lpnSize = &nSize; if (!uCodePage) // Get default code page if nothing speicified uCodePage = g_uACP; if (!lpSrcStr && cchSrc < 0) // Get length of lpSrcStr if not given, assuming lpSrcStr is a zero terminate string. cchSrc = lstrlenA(lpSrcStr) + 1; if (!_hcins || ( nCodeSet != _hcins_dst ) ) CreateINetString(fInbound,uCodePage,nCodeSet); if (_hcins ) { // Context created, it means DBCS int nTempSize = 0 ; // restore previous mode SO/SI ESC etc. ((CINetCodeConverter*)_hcins)->SetConvertMode(*lpdwMode); // if it is a JIS output set Kana mode if (!fInbound && uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP || nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO )) // JIS ((CInccJisOut*)_hcins)->SetKanaMode(nCodeSet); if (!lpDestStr || !cchDest) // Get the converted size { hr = ((CINetCodeConverter*)_hcins)->GetStringSizeA(lpSrcStr, cchSrc, lpnSize); if (0 == fInbound) { HRESULT _hr = ((CINetCodeConverter*)_hcins)->GetStringSizeA(NULL, 0, &nTempSize); if (S_OK != _hr) hr = _hr; } } else // Perform actual converting { hr = ((CINetCodeConverter*)_hcins)->ConvertStringA(lpSrcStr, cchSrc, lpDestStr, cchDest, lpnSize); if (0 == fInbound) { HRESULT _hr = ((CINetCodeConverter*)_hcins)->ConvertStringA(NULL, 0, lpDestStr+*lpnSize, cchDest-*lpnSize, &nTempSize); if (S_OK != _hr) hr = _hr; } } *lpnSize += nTempSize; // get number of unconvetable bytes if ( lpnSrcSize && ((CINetCodeConverter*)_hcins)->GetUnconvertBytes() ) *lpnSrcSize = cchSrc -((CINetCodeConverter*)_hcins)->GetUnconvertBytes(); // only save current mode SO/SI ESC if we are perform actual converting // we need this if statement because for two stages plus conversion. // It will inquire the size first then convert from IWUU or UUWI. if (lpDestStr && lpdwMode ) *lpdwMode = ((CINetCodeConverter*)_hcins)->GetConvertMode(); // delete hcins; } else { // Internet encodings that have same encoding scheme as their family encodings switch (nCodeSet) { case CP_EUC_KR: hr = KSC5601ToEUCKR(lpSrcStr, lpnSrcSize, lpDestStr, cchDest, lpnSize); break; default: if (!lpDestStr || !cchDest) // Get the converted size *lpnSize = cchSrc ; else { *lpnSize = min(cchSrc, cchDest); if (*lpnSize) MoveMemory(lpDestStr, lpSrcStr, *lpnSize); } } } return hr; }