|
|
#include "private.h"
#include "jisobj.h"
#include "eucjobj.h"
#include "hzgbobj.h"
#include "kscobj.h"
#include "utf8obj.h"
#include "utf7obj.h"
#include "fechrcnv.h"
#include "codepage.h"
#include "ichrcnv.h"
HRESULT CICharConverter::KSC5601ToEUCKR(LPCSTR lpSrcStr, LPINT lpnSrcSize, LPSTR lpDestStr, int cchDest, LPINT lpnSize) { int nSize=0; int i=0; HRESULT hr = S_OK; UCHAR szDefaultChar[3] = {0x3f}; // possible DBCS + null
if (_lpFallBack && (_dwFlag & MLCONVCHARF_USEDEFCHAR)) { // only take SBCS, no DBCS character
if ( 1 != WideCharToMultiByte(CP_KOR_5601, 0, (LPCWSTR)_lpFallBack, 1, (LPSTR)szDefaultChar, ARRAYSIZE(szDefaultChar), NULL, NULL )) szDefaultChar[0] = 0x3f; }
while(i < *lpnSrcSize) { // Check space
if (lpDestStr && (nSize > cchDest)) break;
// DBCS
if (((UCHAR)lpSrcStr[i] >= 0x81 && (UCHAR)lpSrcStr[i] <= 0xFE) && (i+1 < *lpnSrcSize)) {
// UHC
if (!((UCHAR)lpSrcStr[i] >= 0xA1 && (UCHAR)lpSrcStr[i] <= 0xFE && (UCHAR)lpSrcStr[i+1] >= 0xA1 && (UCHAR)lpSrcStr[i+1] <= 0xFE))
{ // use NCR if flag specified
if (_dwFlag & (MLCONVCHARF_NCR_ENTITIZE|MLCONVCHARF_NAME_ENTITIZE)) { char szDstStr[10] = {0}; WCHAR szwChar[2]; int cCount; if (MultiByteToWideChar(CP_KOR_5601, 0, &lpSrcStr[i], 2, szwChar, ARRAYSIZE(szwChar))) { // Caculate NCR length
_ultoa((unsigned long)szwChar[0], (char*)szDstStr, 10); cCount = lstrlenA(szDstStr)+3; // Not enough space for NCR entity
if (lpDestStr) { if (nSize+cCount > cchDest) break; // Output NCR entity
else { *lpDestStr ++= '&'; *lpDestStr ++= '#'; for (int j=0; j< cCount-3; j++) *lpDestStr++=szDstStr[j]; *lpDestStr ++= ';'; } } nSize += cCount; } else { if (lpDestStr) { if (nSize+1 > cchDest) break; *lpDestStr++=szDefaultChar[0]; } nSize++; hr = S_FALSE; } } // use default char, question mark
else { if (lpDestStr) { if (nSize+1 > cchDest) break; *lpDestStr++=szDefaultChar[0]; } nSize++; hr = S_FALSE; } i += 2; } // Wansung
else { if (lpDestStr) { if (nSize+2 > cchDest) break; *lpDestStr++=lpSrcStr[i]; *lpDestStr++=lpSrcStr[i+1]; } i+=2; nSize += 2; } } // SBCS
else { if (lpDestStr) { if (nSize+1 > cchDest) break; *lpDestStr++=lpSrcStr[i]; } nSize++; i++; } } // End of loop
if (lpnSize) *lpnSize = nSize;
return hr; }
/******************************************************************************
****************** C O N V E R T I N E T S T R I N G ****************** ******************************************************************************/ HRESULT CICharConverter::CreateINetString(BOOL fInbound, UINT uCodePage, int nCodeSet) { if (_hcins) { delete _hcins ; _hcins = NULL ; }
if (fInbound) { // Inbound
if (uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP || nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO )) // JIS
_hcins = new CInccJisIn(uCodePage, nCodeSet); else if (uCodePage == CP_JPN_SJ && nCodeSet == CP_EUC_JP ) // EUC
_hcins = new CInccEucJIn(uCodePage, nCodeSet); else if (uCodePage == CP_CHN_GB && nCodeSet == CP_CHN_HZ ) // HZ-GB
_hcins = new CInccHzGbIn(uCodePage, nCodeSet); else if (uCodePage == CP_KOR_5601 && nCodeSet == CP_ISO_2022_KR ) _hcins = new CInccKscIn(uCodePage, nCodeSet); else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_8 ) _hcins = new CInccUTF8In(uCodePage, nCodeSet); else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_7 ) _hcins = new CInccUTF7In(uCodePage, nCodeSet);
} else { // Outbound
if (uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP || nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO )) // JIS
_hcins = new CInccJisOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack); else if (uCodePage == CP_JPN_SJ && nCodeSet == CP_EUC_JP ) // EUC
_hcins = new CInccEucJOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack); else if (uCodePage == CP_CHN_GB && nCodeSet == CP_CHN_HZ ) // HZ-GB
_hcins = new CInccHzGbOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack); else if (uCodePage == CP_KOR_5601 && nCodeSet == CP_ISO_2022_KR ) _hcins = new CInccKscOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack); else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_8 ) _hcins = new CInccUTF8Out(uCodePage, nCodeSet); else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_7 ) _hcins = new CInccUTF7Out(uCodePage, nCodeSet);
}
// recode the dst codepage
if ( _hcins ) _hcins_dst = nCodeSet ;
return S_OK ; }
HRESULT CICharConverter::DoConvertINetString(LPDWORD lpdwMode, BOOL fInbound, UINT uCodePage, int nCodeSet, LPCSTR lpSrcStr, LPINT lpnSrcSize, LPSTR lpDestStr, int cchDest, LPINT lpnSize) { HRESULT hr = S_OK; HCINS hcins = NULL; int nSize = 0 ; int cchSrc = *lpnSrcSize ;
if (!lpnSize) lpnSize = &nSize;
if (!uCodePage) // Get default code page if nothing speicified
uCodePage = g_uACP;
if (!lpSrcStr && cchSrc < 0) // Get length of lpSrcStr if not given, assuming lpSrcStr is a zero terminate string.
cchSrc = lstrlenA(lpSrcStr) + 1;
if (!_hcins || ( nCodeSet != _hcins_dst ) ) CreateINetString(fInbound,uCodePage,nCodeSet);
if (_hcins ) { // Context created, it means DBCS
int nTempSize = 0 ; // restore previous mode SO/SI ESC etc.
((CINetCodeConverter*)_hcins)->SetConvertMode(*lpdwMode);
// if it is a JIS output set Kana mode
if (!fInbound && uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP || nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO )) // JIS
((CInccJisOut*)_hcins)->SetKanaMode(nCodeSet);
if (!lpDestStr || !cchDest) // Get the converted size
{ hr = ((CINetCodeConverter*)_hcins)->GetStringSizeA(lpSrcStr, cchSrc, lpnSize); if (0 == fInbound) { HRESULT _hr = ((CINetCodeConverter*)_hcins)->GetStringSizeA(NULL, 0, &nTempSize); if (S_OK != _hr) hr = _hr; } } else // Perform actual converting
{ hr = ((CINetCodeConverter*)_hcins)->ConvertStringA(lpSrcStr, cchSrc, lpDestStr, cchDest, lpnSize); if (0 == fInbound) { HRESULT _hr = ((CINetCodeConverter*)_hcins)->ConvertStringA(NULL, 0, lpDestStr+*lpnSize, cchDest-*lpnSize, &nTempSize); if (S_OK != _hr) hr = _hr; } }
*lpnSize += nTempSize;
// get number of unconvetable bytes
if ( lpnSrcSize && ((CINetCodeConverter*)_hcins)->GetUnconvertBytes() ) *lpnSrcSize = cchSrc -((CINetCodeConverter*)_hcins)->GetUnconvertBytes();
// only save current mode SO/SI ESC if we are perform actual converting
// we need this if statement because for two stages plus conversion.
// It will inquire the size first then convert from IWUU or UUWI.
if (lpDestStr && lpdwMode ) *lpdwMode = ((CINetCodeConverter*)_hcins)->GetConvertMode();
// delete hcins;
} else { // Internet encodings that have same encoding scheme as their family encodings
switch (nCodeSet) { case CP_EUC_KR: hr = KSC5601ToEUCKR(lpSrcStr, lpnSrcSize, lpDestStr, cchDest, lpnSize); break;
default: if (!lpDestStr || !cchDest) // Get the converted size
*lpnSize = cchSrc ; else { *lpnSize = min(cchSrc, cchDest); if (*lpnSize) MoveMemory(lpDestStr, lpSrcStr, *lpnSize); } } }
return hr; }
|