windows-server-2003/base/win32/winnls/data/dlls/tools/gb18030/gbunicnv/textfile.cpp


								#include "stdafx.h"

								#include "Msg.h"

								#include "ConvEng.h"

								#include "TextFile.h"


								#ifdef RTF_SUPPORT

								#include "RtfParser.h"

								#endif


								BOOL ConvertTextFile(

								    PBYTE pbySource,

								    DWORD dwFileSize,

								    PBYTE pbyTarget,

								    DWORD dwTargetSize,

								    BOOL  fAnsiToUnicode,

								    PINT  pnTargetFileSize)

								{

								    BOOL  fRet = FALSE;


								    if (!fAnsiToUnicode && *((PWORD)pbySource) != 0xFEFF) {

								        MsgNotUnicodeTextSourceFile();

								        return FALSE;

								    }


								    if (fAnsiToUnicode && *((PWORD)pbySource) == 0xFEFF) {

								        MsgNotAnsiTextSourceFile();

								        return FALSE;

								    }


								    if (fAnsiToUnicode) {

								        PWCH pwchTarget = (PWCH)pbyTarget;

								        // Put Unicode text file flag

								        *pwchTarget = 0xFEFF;

								        *pnTargetFileSize = 1;


								        // Null file

								        if (!dwFileSize) {

								            fRet = TRUE;

								            goto Exit;

								        }


								        // Convert

								        *pnTargetFileSize += AnsiStrToUnicodeStr(pbySource, dwFileSize,

								            pwchTarget+1, dwTargetSize-2);


								        *pnTargetFileSize *= sizeof(WCHAR);

								    } else {

								        // Check and skip Uncode text file flag

								        if (dwFileSize < 2) {

								            goto Exit;

								        }


								        PWCH pwchData = (PWCH)pbySource;

								        if (*pwchData != 0xFEFF) {

								            goto Exit;

								        }

								        pwchData++;


								        // Null file w/ Unicode flag only

								        if (dwFileSize == 2) {

								            fRet = TRUE;

								            goto Exit;

								        }


								        // Convert

								        *pnTargetFileSize = UnicodeStrToAnsiStr(pwchData,

								            dwFileSize/sizeof(WCHAR) - 1, (PCHAR)pbyTarget, dwTargetSize);


								    }


								    if (*pnTargetFileSize) {

								        fRet = TRUE;

								    }


								Exit:

								    return fRet;

								}


								BOOL ConvertHtmlFile(

								    PBYTE pbySource,

								    DWORD dwFileSize,

								    PBYTE pbyTarget,

								    DWORD dwTargetSize,

								    BOOL  fAnsiToUnicode,

								    PINT  pnTargetFileSize)

								{

								    BOOL fRet = FALSE;


								    if (!ConvertTextFile(pbySource, dwFileSize, pbyTarget,

								        // Reserve the last space to explicitly assign zero to the last

								        //  character in the buffer

								        dwTargetSize - (fAnsiToUnicode ? sizeof(WCHAR):sizeof(char)),

								        fAnsiToUnicode, pnTargetFileSize)) {

								        return FALSE;

								    }


								    // Change charset

								    if (fAnsiToUnicode) {

								        const WCHAR* const wszUnicodeCharset = L"charset=unicode";

								        WCHAR *pwch1, *pwch2;

								        int nLengthIncrease;


								        *((PWCH)(pbyTarget+*pnTargetFileSize)) = 0;

								        pwch1 = wcsstr((PWCH)pbyTarget, L"charset=");


								        if (!pwch1) {

								            // Some Html file may haven't code page flag,

								            //  We skip charset replace step for this kind of files

								            fRet = TRUE;

								            goto Exit;

								        }


								        pwch2 = wcschr(pwch1, L'\"');

								        if (!pwch2 || (pwch2 - pwch1 >= 20)) {

								            goto Exit;

								        }


								        nLengthIncrease = (int)(wcslen(wszUnicodeCharset) - (pwch2 - pwch1));


								        if (*pnTargetFileSize + nLengthIncrease*sizeof(WCHAR) > dwTargetSize) {

								            goto Exit;

								        }


								        MoveMemory(pwch2 + nLengthIncrease, pwch2,

								            pbyTarget + *pnTargetFileSize - (PBYTE)pwch2);

								        CopyMemory(pwch1, wszUnicodeCharset, wcslen(wszUnicodeCharset)*sizeof(WCHAR));

								        *pnTargetFileSize += nLengthIncrease*sizeof(WCHAR);


								        fRet = TRUE;


								    } else {

								        const CHAR*  const szGBCharset = "charset=gb18030";

								        CHAR *pch1, *pch2;

								        int nLengthIncrease;


								        *((PCHAR)(pbyTarget+*pnTargetFileSize)) = 0;

								        pch1 = strstr((PCHAR)pbyTarget, "charset=");


								        if (!pch1) {

								            // Some Html file may haven't code page flag,

								            //  We skip charset replace step for this kind of files

								            fRet = TRUE;

								            goto Exit;

								        }


								        pch2 = strchr(pch1, '\"');

								        if (!pch2 || (pch2 - pch1 >= 20)) {

								            goto Exit;

								        }


								        nLengthIncrease = (int)(strlen(szGBCharset) - (pch2 - pch1));


								        if (*pnTargetFileSize + nLengthIncrease > (int)dwTargetSize) {

								            goto Exit;

								        }


								        MoveMemory(pch2 + nLengthIncrease, pch2,

								            (PCHAR)pbyTarget + *pnTargetFileSize - pch2);

								        CopyMemory(pch1, szGBCharset, strlen(szGBCharset)*sizeof(char));

								        *pnTargetFileSize += nLengthIncrease*sizeof(char);


								        fRet = TRUE;

								    }


								Exit:

								    return fRet;

								}


								#ifdef XML_SUPPORT

								BOOL ConvertXmlFile(

								    PBYTE pbySource,

								    DWORD dwFileSize,

								    PBYTE pbyTarget,

								    DWORD dwTargetSize,

								    BOOL  fAnsiToUnicode,

								    PINT  pnTargetFileSize)

								{

								    BOOL fRet = FALSE;


								    if (!ConvertTextFile(pbySource, dwFileSize, pbyTarget,

								        // Reserve the last space to explicitly assign zero to the last

								        //  character in the buffer

								        dwTargetSize - (fAnsiToUnicode ? sizeof(WCHAR):sizeof(char)),

								        fAnsiToUnicode, pnTargetFileSize)) {

								        return FALSE;

								    }


								    // Change charset

								    if (fAnsiToUnicode) {

								        const WCHAR* const wszUnicodeCharset = L"UTF-16";

								        WCHAR *pwchEnd, *pwch1, *pwch2;

								        int nLengthIncrease;


								        *((PWCH)(pbyTarget+*pnTargetFileSize)) = 0;


								        pwch1 = wcsstr((PWCH)pbyTarget, L"<?xml");

								        if (!pwch1) {

								            goto Exit;

								        }

								        pwchEnd = wcsstr(pwch1, L"?>");

								        if (!pwchEnd) {

								            goto Exit;

								        }


								        // temp set to null-terminal

								        *pwchEnd = 0;


								        pwch1 = wcsstr(pwch1, L"encoding=");


								        if (!pwch1) {

								            // Some Html file may haven't code page flag,

								            //  We skip charset replace step for this kind of files

								            fRet = TRUE;

								            *pwchEnd = '?';

								            goto Exit;

								        }


								        pwch1 += wcslen(L"encoding=");

								        WCHAR wch = *pwch1;

								        pwch1++;


								        if (wch != '\"' && wch != '\'') {

								            *pwchEnd = '?';

								            goto Exit;

								        }


								        pwch2 = wcschr(pwch1, wch);

								        if (!pwch2 || (pwch2 - pwch1 >= 20)) {

								            *pwchEnd = '?';

								            goto Exit;

								        }


								        // restore *pwch2

								        *pwchEnd = '?';


								        nLengthIncrease = (int)(wcslen(wszUnicodeCharset) - (pwch2 - pwch1));


								        if (*pnTargetFileSize + nLengthIncrease*sizeof(WCHAR) > dwTargetSize) {

								            goto Exit;

								        }


								        MoveMemory(pwch2 + nLengthIncrease, pwch2,

								            pbyTarget + *pnTargetFileSize - (PBYTE)pwch2);

								        CopyMemory(pwch1, wszUnicodeCharset, wcslen(wszUnicodeCharset)*sizeof(WCHAR));

								        *pnTargetFileSize += nLengthIncrease*sizeof(WCHAR);


								        fRet = TRUE;


								    } else {

								        const char* const szGBCharset = "GB18030";

								        char *pchEnd, *pch1, *pch2;

								        int nLengthIncrease;


								        *((PCH)(pbyTarget+*pnTargetFileSize)) = 0;


								        pch1 = strstr((char*)pbyTarget, "<?xml");

								        if (!pch1) {

								            goto Exit;

								        }

								        pchEnd = strstr(pch1, "?>");

								        if (!pchEnd) {

								            goto Exit;

								        }


								        // temp set to null-terminal

								        *pchEnd = 0;


								        pch1 = strstr(pch1, "encoding=");


								        if (!pch1) {

								            // Some Html file may haven't code page flag,

								            //  We skip charset replace step for this kind of files

								            fRet = TRUE;

								            *pchEnd = '?';

								            goto Exit;

								        }


								        pch1 += strlen("encoding=");

								        CHAR ch = *pch1;

								        pch1++;


								        if (ch != '\"' && ch != '\'') {

								            *pchEnd = '?';

								            goto Exit;

								        }


								        pch2 = strchr(pch1, ch);

								        if (!pch2 || (pch2 - pch1 >= 20)) {

								            *pchEnd = '?';

								            goto Exit;

								        }


								        // restore *pwch2

								        *pchEnd = '?';


								        nLengthIncrease = (int)(strlen(szGBCharset) - (pch2 - pch1));


								        if (*pnTargetFileSize + nLengthIncrease > (int)dwTargetSize) {

								            goto Exit;

								        }


								        MoveMemory(pch2 + nLengthIncrease, pch2,

								            pbyTarget + *pnTargetFileSize - (PBYTE)pch2);

								        CopyMemory(pch1, szGBCharset, strlen(szGBCharset));

								        *pnTargetFileSize += nLengthIncrease;


								        fRet = TRUE;


								    }


								Exit:

								    return fRet;

								}

								#endif


								#ifdef RTF_SUPPORT

								BOOL ConvertRtfFile(

								    PBYTE pBuf,     // Read buf

								    DWORD dwSize,   // File size

								    PBYTE pWrite,   // Write buf

								    DWORD dwWriteSize,

								    BOOL  fAnsiToUnicode,

								    PINT  pnTargetFileSize)

								{

								    CRtfParser* pcParser;

								    DWORD dwVersion;

								    DWORD dwCodepage;

								    BOOL  fRet = FALSE;


								    pcParser = new CRtfParser(pBuf, dwSize, pWrite, dwSize*3);

								    if (!pcParser) {

								        MsgOverflow();

								        goto gotoExit;

								    }


								    if (!pcParser->fRTFFile()) {

								        MsgNotRtfSourceFile();

								        goto gotoExit;

								    }


								    if (ecOK != pcParser->GetVersion(&dwVersion) ||

								        dwVersion != 1) {

								        MsgNotRtfSourceFile();

								        goto gotoExit;

								    }


								    if (ecOK != pcParser->GetCodepage(&dwCodepage) ||

								        dwCodepage != 936) {

								        MsgNotRtfSourceFile();

								        goto gotoExit;

								    }


								    // Explain WordID by corresponding word text

								    if (ecOK != pcParser->Do()) {

								        MsgNotRtfSourceFile();

								        goto gotoExit;

								    }


								    pcParser->GetResult((PDWORD)pnTargetFileSize);

								    fRet = TRUE;


								gotoExit:

								    if (pcParser) {

								        delete pcParser;

								    }

								    return fRet;

								}


								#endif