|
|
#include "stdafx.h"
#include "Msg.h"
#include "ConvEng.h"
#include "TextFile.h"
#ifdef RTF_SUPPORT
#include "RtfParser.h"
#endif
BOOL ConvertTextFile( PBYTE pbySource, DWORD dwFileSize, PBYTE pbyTarget, DWORD dwTargetSize, BOOL fAnsiToUnicode, PINT pnTargetFileSize) { BOOL fRet = FALSE;
if (!fAnsiToUnicode && *((PWORD)pbySource) != 0xFEFF) { MsgNotUnicodeTextSourceFile(); return FALSE; }
if (fAnsiToUnicode && *((PWORD)pbySource) == 0xFEFF) { MsgNotAnsiTextSourceFile(); return FALSE; }
if (fAnsiToUnicode) { PWCH pwchTarget = (PWCH)pbyTarget; // Put Unicode text file flag
*pwchTarget = 0xFEFF; *pnTargetFileSize = 1;
// Null file
if (!dwFileSize) { fRet = TRUE; goto Exit; }
// Convert
*pnTargetFileSize += AnsiStrToUnicodeStr(pbySource, dwFileSize, pwchTarget+1, dwTargetSize-2); *pnTargetFileSize *= sizeof(WCHAR); } else { // Check and skip Uncode text file flag
if (dwFileSize < 2) { goto Exit; } PWCH pwchData = (PWCH)pbySource; if (*pwchData != 0xFEFF) { goto Exit; } pwchData++;
// Null file w/ Unicode flag only
if (dwFileSize == 2) { fRet = TRUE; goto Exit; }
// Convert
*pnTargetFileSize = UnicodeStrToAnsiStr(pwchData, dwFileSize/sizeof(WCHAR) - 1, (PCHAR)pbyTarget, dwTargetSize);
}
if (*pnTargetFileSize) { fRet = TRUE; }
Exit: return fRet; }
BOOL ConvertHtmlFile( PBYTE pbySource, DWORD dwFileSize, PBYTE pbyTarget, DWORD dwTargetSize, BOOL fAnsiToUnicode, PINT pnTargetFileSize) { BOOL fRet = FALSE;
if (!ConvertTextFile(pbySource, dwFileSize, pbyTarget, // Reserve the last space to explicitly assign zero to the last
// character in the buffer
dwTargetSize - (fAnsiToUnicode ? sizeof(WCHAR):sizeof(char)), fAnsiToUnicode, pnTargetFileSize)) { return FALSE; } // Change charset
if (fAnsiToUnicode) { const WCHAR* const wszUnicodeCharset = L"charset=unicode"; WCHAR *pwch1, *pwch2; int nLengthIncrease;
*((PWCH)(pbyTarget+*pnTargetFileSize)) = 0; pwch1 = wcsstr((PWCH)pbyTarget, L"charset="); if (!pwch1) { // Some Html file may haven't code page flag,
// We skip charset replace step for this kind of files
fRet = TRUE; goto Exit; }
pwch2 = wcschr(pwch1, L'\"'); if (!pwch2 || (pwch2 - pwch1 >= 20)) { goto Exit; }
nLengthIncrease = (int)(wcslen(wszUnicodeCharset) - (pwch2 - pwch1));
if (*pnTargetFileSize + nLengthIncrease*sizeof(WCHAR) > dwTargetSize) { goto Exit; }
MoveMemory(pwch2 + nLengthIncrease, pwch2, pbyTarget + *pnTargetFileSize - (PBYTE)pwch2); CopyMemory(pwch1, wszUnicodeCharset, wcslen(wszUnicodeCharset)*sizeof(WCHAR)); *pnTargetFileSize += nLengthIncrease*sizeof(WCHAR);
fRet = TRUE;
} else { const CHAR* const szGBCharset = "charset=gb18030"; CHAR *pch1, *pch2; int nLengthIncrease;
*((PCHAR)(pbyTarget+*pnTargetFileSize)) = 0; pch1 = strstr((PCHAR)pbyTarget, "charset="); if (!pch1) { // Some Html file may haven't code page flag,
// We skip charset replace step for this kind of files
fRet = TRUE; goto Exit; }
pch2 = strchr(pch1, '\"'); if (!pch2 || (pch2 - pch1 >= 20)) { goto Exit; }
nLengthIncrease = (int)(strlen(szGBCharset) - (pch2 - pch1)); if (*pnTargetFileSize + nLengthIncrease > (int)dwTargetSize) { goto Exit; }
MoveMemory(pch2 + nLengthIncrease, pch2, (PCHAR)pbyTarget + *pnTargetFileSize - pch2); CopyMemory(pch1, szGBCharset, strlen(szGBCharset)*sizeof(char)); *pnTargetFileSize += nLengthIncrease*sizeof(char);
fRet = TRUE; }
Exit: return fRet; }
#ifdef XML_SUPPORT
BOOL ConvertXmlFile( PBYTE pbySource, DWORD dwFileSize, PBYTE pbyTarget, DWORD dwTargetSize, BOOL fAnsiToUnicode, PINT pnTargetFileSize) { BOOL fRet = FALSE;
if (!ConvertTextFile(pbySource, dwFileSize, pbyTarget, // Reserve the last space to explicitly assign zero to the last
// character in the buffer
dwTargetSize - (fAnsiToUnicode ? sizeof(WCHAR):sizeof(char)), fAnsiToUnicode, pnTargetFileSize)) { return FALSE; } // Change charset
if (fAnsiToUnicode) { const WCHAR* const wszUnicodeCharset = L"UTF-16"; WCHAR *pwchEnd, *pwch1, *pwch2; int nLengthIncrease;
*((PWCH)(pbyTarget+*pnTargetFileSize)) = 0; pwch1 = wcsstr((PWCH)pbyTarget, L"<?xml"); if (!pwch1) { goto Exit; } pwchEnd = wcsstr(pwch1, L"?>"); if (!pwchEnd) { goto Exit; }
// temp set to null-terminal
*pwchEnd = 0;
pwch1 = wcsstr(pwch1, L"encoding="); if (!pwch1) { // Some Html file may haven't code page flag,
// We skip charset replace step for this kind of files
fRet = TRUE; *pwchEnd = '?'; goto Exit; }
pwch1 += wcslen(L"encoding="); WCHAR wch = *pwch1; pwch1++;
if (wch != '\"' && wch != '\'') { *pwchEnd = '?'; goto Exit; }
pwch2 = wcschr(pwch1, wch); if (!pwch2 || (pwch2 - pwch1 >= 20)) { *pwchEnd = '?'; goto Exit; }
// restore *pwch2
*pwchEnd = '?'; nLengthIncrease = (int)(wcslen(wszUnicodeCharset) - (pwch2 - pwch1));
if (*pnTargetFileSize + nLengthIncrease*sizeof(WCHAR) > dwTargetSize) { goto Exit; }
MoveMemory(pwch2 + nLengthIncrease, pwch2, pbyTarget + *pnTargetFileSize - (PBYTE)pwch2); CopyMemory(pwch1, wszUnicodeCharset, wcslen(wszUnicodeCharset)*sizeof(WCHAR)); *pnTargetFileSize += nLengthIncrease*sizeof(WCHAR);
fRet = TRUE;
} else { const char* const szGBCharset = "GB18030"; char *pchEnd, *pch1, *pch2; int nLengthIncrease;
*((PCH)(pbyTarget+*pnTargetFileSize)) = 0; pch1 = strstr((char*)pbyTarget, "<?xml"); if (!pch1) { goto Exit; } pchEnd = strstr(pch1, "?>"); if (!pchEnd) { goto Exit; }
// temp set to null-terminal
*pchEnd = 0;
pch1 = strstr(pch1, "encoding="); if (!pch1) { // Some Html file may haven't code page flag,
// We skip charset replace step for this kind of files
fRet = TRUE; *pchEnd = '?'; goto Exit; }
pch1 += strlen("encoding="); CHAR ch = *pch1; pch1++;
if (ch != '\"' && ch != '\'') { *pchEnd = '?'; goto Exit; }
pch2 = strchr(pch1, ch); if (!pch2 || (pch2 - pch1 >= 20)) { *pchEnd = '?'; goto Exit; }
// restore *pwch2
*pchEnd = '?'; nLengthIncrease = (int)(strlen(szGBCharset) - (pch2 - pch1));
if (*pnTargetFileSize + nLengthIncrease > (int)dwTargetSize) { goto Exit; }
MoveMemory(pch2 + nLengthIncrease, pch2, pbyTarget + *pnTargetFileSize - (PBYTE)pch2); CopyMemory(pch1, szGBCharset, strlen(szGBCharset)); *pnTargetFileSize += nLengthIncrease;
fRet = TRUE;
}
Exit: return fRet; } #endif
#ifdef RTF_SUPPORT
BOOL ConvertRtfFile( PBYTE pBuf, // Read buf
DWORD dwSize, // File size
PBYTE pWrite, // Write buf
DWORD dwWriteSize, BOOL fAnsiToUnicode, PINT pnTargetFileSize) { CRtfParser* pcParser; DWORD dwVersion; DWORD dwCodepage; BOOL fRet = FALSE;
pcParser = new CRtfParser(pBuf, dwSize, pWrite, dwSize*3); if (!pcParser) { MsgOverflow(); goto gotoExit; }
if (!pcParser->fRTFFile()) { MsgNotRtfSourceFile(); goto gotoExit; }
if (ecOK != pcParser->GetVersion(&dwVersion) || dwVersion != 1) { MsgNotRtfSourceFile(); goto gotoExit; } if (ecOK != pcParser->GetCodepage(&dwCodepage) || dwCodepage != 936) { MsgNotRtfSourceFile(); goto gotoExit; }
// Explain WordID by corresponding word text
if (ecOK != pcParser->Do()) { MsgNotRtfSourceFile(); goto gotoExit; }
pcParser->GetResult((PDWORD)pnTargetFileSize); fRet = TRUE;
gotoExit: if (pcParser) { delete pcParser; } return fRet; }
#endif
|