Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

560 lines
21 KiB

//-----------------------------------------------------------------------------
//
//
// File: dsn_utf7.cpp
//
// Description:
//
// Author: Mike Swafford (MikeSwa)
//
// History:
// 10/20/98 - MikeSwa Created
//
// Copyright (C) 1998 Microsoft Corporation
//
//-----------------------------------------------------------------------------
#include "precomp.h"
//---[ CUTF7ConversionContext::chNeedsEncoding ]--------------------------------
//
//
// Description:
// Determines if a character needs to be encoded... returns it's ASCII
// equivalent if not.
// Parameters:
// wch Wide character to check
// Returns:
// 0, if the character needs encoding
// The ASCII equivalent if not.
// History:
// 10/23/98 - MikeSwa Created
//
//-----------------------------------------------------------------------------
CHAR CUTF7ConversionContext::chNeedsEncoding(WCHAR wch)
{
CHAR ch = 0;
//First look for characters that are a straight ASCII conversion for all
//cases. This is Set D and Set O in the RFC1642
if (((L'a' <= wch) && (L'z' >= wch)) ||
((L'A' <= wch) && (L'Z' >= wch)) ||
((L'0' <= wch) && (L'9' >= wch)) ||
((L'!'<= wch) && (L'*' >= wch)) ||
((L',' <= wch) && (L'/' >= wch)) ||
((L';' <= wch) && (L'@' >= wch)) ||
((L']' <= wch) && (L'`' >= wch)) ||
((L'{' <= wch) && (L'}' >= wch)) ||
(L' ' == wch) || (L'\t' == wch) ||
(L'[' == wch))
{
ch = (CHAR) wch & 0x00FF;
}
//Check things are not converted for content, but are for headers
else if (!(UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState))
{
//Handle whitespace
if ((L'\r' == wch) || (L'\n' == wch))
ch = (CHAR) wch & 0x00FF;
}
//NOTE - We not not want to handle UNICODE <LINE SEPARATOR> (0x2028)
//and <PARAGRAPH SEPARATOR> (0x2029)... which should ideally be
//converted to CRLF. We will consider this a mal-formed resource. ASSERT
//in Debug and encode as UNICODE on retail.
_ASSERT((0x2028 != wch) && "Malformed Resource String");
_ASSERT((0x2029 != wch) && "Malformed Resource String");
return ch;
}
//---[ UTF7ConversionContext::CUTF7ConversionContext ]-------------------------
//
//
// Description:
// Constuctor for UTF7ConversionContext object
// Parameters:
// IN fIsRFC1522Subject TRUE if we need to worry about converting
// to an RFC1522 Subject (defaults to FALSE)
// Returns:
// -
// History:
// 10/20/98 - MikeSwa Created
//
//-----------------------------------------------------------------------------
CUTF7ConversionContext::CUTF7ConversionContext(BOOL fIsRFC1522Subject)
{
m_dwSignature = UTF7_CONTEXT_SIG;
m_dwCurrentState = UTF7_INITIAL_STATE;
if (fIsRFC1522Subject)
m_dwCurrentState |= UTF7_ENCODING_RFC1522_SUBJECT;
m_cBytesSinceCRLF = 0;
}
//---[ <function> ]------------------------------------------------------------
//
//
// Description:
// Writes a single character to the output buffer... used by
// fConvertBuffer. Also updates relevant member vars/
// Parameters:
// IN ch Character to write
// IN OUT ppbBuffer Buffer to write it to
// IN OUT pcbWritten Running total of bytes written
// Returns:
// -
// History:
// 10/26/98 - MikeSwa Created
//
//-----------------------------------------------------------------------------
inline void CUTF7ConversionContext::WriteChar(IN CHAR ch,
IN OUT BYTE ** ppbBuffer,
IN OUT DWORD *pcbWritten)
{
_ASSERT(ppbBuffer);
_ASSERT(*ppbBuffer);
_ASSERT(pcbWritten);
**ppbBuffer = (BYTE) ch;
(*ppbBuffer)++;
(*pcbWritten)++;
m_cBytesSinceCRLF++;
if (UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState)
_ASSERT(UTF7_RFC1522_MAX_LENGTH >= m_cBytesSinceCRLF);
}
//---[ CUTF7ConversionContext::fWriteString ]----------------------------------
//
//
// Description:
// Used by fConvertBuffer to write a string to the outputt buffer.
// Updates m_cBytesSinceCRLF in the process.
// Parameters:
// IN szString String to write
// IN cbString Size of string
// IN cbBuffer Total size of output buffer
// IN OUT ppbBuffer Buffer to write it to
// IN OUT pcbWritten Running total of bytes written
// Returns:
//
// History:
// 10/26/98 - MikeSwa Created
//
//-----------------------------------------------------------------------------
inline BOOL CUTF7ConversionContext::fWriteString(IN LPSTR szString, IN DWORD cbString,
IN DWORD cbBuffer,
IN OUT BYTE ** ppbBuffer,
IN OUT DWORD *pcbWritten)
{
_ASSERT(szString);
_ASSERT(ppbBuffer);
_ASSERT(*ppbBuffer);
_ASSERT(pcbWritten);
if (cbString > (cbBuffer - *pcbWritten))
return FALSE; //There is not enough room to write our buffer
memcpy(*ppbBuffer, szString, cbString);
(*ppbBuffer) += cbString;
(*pcbWritten) += cbString;
m_cBytesSinceCRLF += cbString;
if (UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState)
_ASSERT(UTF7_RFC1522_MAX_LENGTH >= m_cBytesSinceCRLF);
return TRUE;
}
//---[ CUTF7ConversionContext::fSubjectNeedsEncodin ]--------------------------
//
//
// Description:
// Determines if a subject needs to be UTF7 encoded... or can be
// transmitted as is.
// Parameters:
// IN pbInputBuffer Pointer to UNICODE string buffer
// IN cbInputBuffer Size (in bytes) of string buffer
// Returns:
// TRUE if we need to encode the buffer
// FALSE if we do not
// History:
// 10/26/98 - MikeSwa Created
//
//-----------------------------------------------------------------------------
BOOL CUTF7ConversionContext::fSubjectNeedsEncoding(IN BYTE *pbBuffer,
IN DWORD cbBuffer)
{
LPWSTR wszBuffer = (LPWSTR) pbBuffer;
LPWSTR wszBufferEnd = (LPWSTR) (pbBuffer + cbBuffer);
WCHAR wch = L'\0';
while (wszBuffer < wszBufferEnd)
{
wch = *wszBuffer;
if ((127 < wch) || (L'\r' == wch) || (L'\n' == wch))
{
//Encountered a non-valid char... must encode
return TRUE;
}
wszBuffer++;
}
return FALSE;
}
//---[ UTF7ConversionContext::fConvertBufferTo7BitASCII ]----------------------
//
//
// Description:
// Converts a buffer that is UNICODE contianing only 7bit ASCII characters
// to an ASCII buffer.
// Parameters:
// IN pbInputBuffer Pointer to UNICODE string buffer
// IN cbInputBuffer Size (in bytes) of string buffer
// IN pbOutputBuffer Buffer to write data to
// IN cbOutputBuffer Size of buffer to write data to
// OUT pcbWritten # of bytes written to output bufferbuffer
// OUT pcbRead # of bytes read from Input buffer
// Returns:
// TRUE if entire input buffer was processed
// FALSE if buffer needs to be processe some more
// History:
// 10/26/98 - MikeSwa Created
//
//-----------------------------------------------------------------------------
BOOL CUTF7ConversionContext::fConvertBufferTo7BitASCII(
IN PBYTE pbInputBuffer,
IN DWORD cbInputBuffer,
IN PBYTE pbOutputBuffer,
IN DWORD cbOutputBuffer,
OUT DWORD *pcbWritten,
OUT DWORD *pcbRead)
{
LPWSTR wszBuffer = (LPWSTR) pbInputBuffer;
LPWSTR wszBufferEnd = (LPWSTR) (pbInputBuffer + cbInputBuffer);
WCHAR wch = L'\0';
BYTE *pbCurrentOut = pbOutputBuffer;
_ASSERT(pbCurrentOut);
while ((*pcbWritten < cbOutputBuffer) && (wszBuffer < wszBufferEnd))
{
_ASSERT(!(0xFF80 & *wszBuffer)); //must be only 7-bit
WriteChar((CHAR) *wszBuffer, &pbCurrentOut, pcbWritten);
wszBuffer++;
*pcbRead += sizeof(WCHAR);
}
return (wszBuffer == wszBufferEnd);
}
//---[ CUTF7ConversionContext::fUTF7EncodeBuffer ]------------------------------
//
//
// Description:
// Converts buffer to UTF7 Encoding
//
// This function implements the main state machine for UTF7 encoding. It
// handles encoding of both RFC1522 subject encoding as well as regular
// UTF7 content-encoding.
// Parameters:
// IN pbInputBuffer Pointer to UNICODE string buffer
// IN cbInputBuffer Size (in bytes) of string buffer
// IN pbOutputBuffer Buffer to write data to
// IN cbOutputBuffer Size of buffer to write data to
// OUT pcbWritten # of bytes written to output bufferbuffer
// OUT pcbRead # of bytes read from Input buffer
// Returns:
// TRUE if entire input buffer was processed
// FALSE if buffer needs to be processe some more
// History:
// 10/26/98 - MikeSwa Created
//
//-----------------------------------------------------------------------------
BOOL CUTF7ConversionContext::fUTF7EncodeBuffer(
IN PBYTE pbInputBuffer,
IN DWORD cbInputBuffer,
IN PBYTE pbOutputBuffer,
IN DWORD cbOutputBuffer,
OUT DWORD *pcbWritten,
OUT DWORD *pcbRead)
{
LPWSTR wszBuffer = (LPWSTR) pbInputBuffer;
WCHAR wch = L'\0';
CHAR ch = '\0';
BYTE *pbCurrentOut = pbOutputBuffer;
BOOL fDone = FALSE;
//Use loop to make sure we never exceed our buffers
while (*pcbWritten < cbOutputBuffer)
{
//See if we need to handle any state that does not require reading
//from the input buffer.
if (UTF7_FLUSH_BUFFERS & m_dwCurrentState)
{
//We have converted characters buffered up... we need to write them
//to the output buffer
if (!m_Base64Stream.fNextValidChar(&ch))
{
//Nothing left to write
m_dwCurrentState ^= UTF7_FLUSH_BUFFERS;
continue;
}
WriteChar(ch, &pbCurrentOut, pcbWritten);
}
else if (UTF7_RFC1522_CHARSET_PENDING & m_dwCurrentState)
{
//We need to start with the =?charset?Q?+ stuff
if (!fWriteString(UTF7_RFC1522_ENCODE_START,
sizeof(UTF7_RFC1522_ENCODE_START)-sizeof(CHAR),
cbOutputBuffer, &pbCurrentOut, pcbWritten))
{
return FALSE;
}
m_dwCurrentState ^= UTF7_RFC1522_CHARSET_PENDING;
m_dwCurrentState |= (UTF7_ENCODING_WORD | UTF7_RFC1522_CURRENTLY_ENCODING);
}
else if (UTF7_WORD_CLOSING_PENDING & m_dwCurrentState)
{
//Need to write closing '-'
m_dwCurrentState ^= UTF7_WORD_CLOSING_PENDING;
WriteChar(UTF7_STOP_STREAM_CHAR, &pbCurrentOut, pcbWritten);
}
else if (UTF7_RFC1522_CLOSING_PENDING & m_dwCurrentState)
{
if (!fWriteString(UTF7_RFC1522_ENCODE_STOP,
sizeof(UTF7_RFC1522_ENCODE_STOP)-sizeof(CHAR),
cbOutputBuffer, &pbCurrentOut, pcbWritten))
{
return FALSE;
}
m_dwCurrentState ^= (UTF7_RFC1522_CLOSING_PENDING | UTF7_FOLD_HEADER_PENDING);
}
else if (UTF7_FOLD_HEADER_PENDING & m_dwCurrentState)
{
if (*pcbRead >= cbInputBuffer) //there is no more text to read.. we don't need to wrap
{
fDone = TRUE;
m_dwCurrentState ^= UTF7_FOLD_HEADER_PENDING;
break;
}
m_cBytesSinceCRLF = 0; //We're writing a CRLF now
if (!fWriteString(UTF7_RFC1522_PHRASE_SEPARATOR,
sizeof(UTF7_RFC1522_PHRASE_SEPARATOR)-sizeof(CHAR),
cbOutputBuffer, &pbCurrentOut, pcbWritten))
{
return FALSE;
}
m_cBytesSinceCRLF = sizeof(CHAR);//set count to leading tab
m_dwCurrentState ^= UTF7_FOLD_HEADER_PENDING;
}
else if (*pcbRead >= cbInputBuffer)
{
//We have read our entire input buffer... now we need to handle
//any sort of cleanup.
if (m_Base64Stream.fTerminateStream(TRUE))
{
_ASSERT(UTF7_ENCODING_WORD & m_dwCurrentState);
m_dwCurrentState |= UTF7_FLUSH_BUFFERS;
}
else if (UTF7_ENCODING_WORD & m_dwCurrentState)
{
//We have already written everything to output.. but we
//still need to write the close of the stream
_ASSERT(!(UTF7_WORD_CLOSING_PENDING & m_dwCurrentState));
m_dwCurrentState ^= (UTF7_ENCODING_WORD | UTF7_WORD_CLOSING_PENDING);
}
else if (UTF7_RFC1522_CURRENTLY_ENCODING & m_dwCurrentState)
{
//Need to write closing ?=
m_dwCurrentState |= UTF7_RFC1522_CLOSING_PENDING;
}
else
{
fDone = TRUE;
break; //We're done
}
}
else //need to process more of the input buffer
{
wch = *wszBuffer;
ch = chNeedsEncoding(wch);
//Are we at the end of a RFC1522 phrase? (ch will be 0)
if ((UTF7_RFC1522_CURRENTLY_ENCODING & m_dwCurrentState) &&
!ch && iswspace(wch))
{
//reset state
if (UTF7_ENCODING_WORD & m_dwCurrentState)
m_dwCurrentState |= UTF7_WORD_CLOSING_PENDING; //need to write -
m_dwCurrentState |= UTF7_RFC1522_CLOSING_PENDING;
m_dwCurrentState &= ~(UTF7_ENCODING_WORD |
UTF7_RFC1522_CURRENTLY_ENCODING);
//eat up any extra whitespace
do
{
wszBuffer++;
*pcbRead += sizeof(WCHAR);
if (*pcbRead >= cbInputBuffer)
break;
wch = *wszBuffer;
} while (iswspace(wch));
}
else if (UTF7_ENCODING_WORD & m_dwCurrentState)
{
if (ch) //we need to stop encoding
{
m_Base64Stream.fTerminateStream(TRUE);
_ASSERT(!(UTF7_WORD_CLOSING_PENDING & m_dwCurrentState));
m_dwCurrentState ^= (UTF7_ENCODING_WORD | UTF7_WORD_CLOSING_PENDING | UTF7_FLUSH_BUFFERS);
}
else if (!m_Base64Stream.fProcessWideChar(wch))
{
//flush our buffers and then continue on as we were
m_dwCurrentState |= UTF7_FLUSH_BUFFERS;
}
else
{
//The write worked...
wszBuffer++;
*pcbRead += sizeof(WCHAR);
}
}
else if (!ch)
{
//we need to start encoding
if ((UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState) &&
!(UTF7_RFC1522_CURRENTLY_ENCODING & m_dwCurrentState))
{
//We need to start with the =?charset?Q?+ stuff
m_dwCurrentState |= UTF7_RFC1522_CHARSET_PENDING;
}
else
{
//We are either not encoding RFC1522... or are already
//in the middle of a RFC1522 encoded phrase.. in this case
//we only need to write the '+'
WriteChar(UTF7_START_STREAM_CHAR, &pbCurrentOut, pcbWritten);
m_dwCurrentState |= UTF7_ENCODING_WORD;
}
}
else
{
//
// NOTE: It is not clear why we do not close out the UTF7 word
// i.e. why we do not go into the UTF7 word closing pending state
// like we do when we encounter an iswspace char. This means that
// when a string is <jpn-char><0x0020><jpn-char> we will encode it
// as =?charset?Q?+stuff, while if it is <jpn-char><0x3000><jpn-char>
// where 0x3000 == Japanese whitespace, we will encode as:
// =?charset?Q?+stuff<CRLF>=?charset?Q?+stuff.
//
// If this is "fixed" in the future (i.e. we start closing out UTF7
// encodings when we encounter 0x0020) be aware that we rely on the
// current (non-closing) functionality for HrWriteModifiedUnicodeString.
//
//we are not encoding... and character can be written normally
WriteChar(ch, &pbCurrentOut, pcbWritten);
wszBuffer++;
*pcbRead += sizeof(WCHAR);
//if it was a space... and we are doing headers... lets fold
//the header
if ((UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState)
&& isspace((UCHAR)ch))
{
//eat up any extra whitespace
while (iswspace(*wszBuffer))
{
wszBuffer++;
*pcbRead += sizeof(WCHAR);
if (*pcbRead >= cbInputBuffer)
break;
}
m_dwCurrentState |= UTF7_FOLD_HEADER_PENDING;
}
}
}
}
return fDone;
}
//---[ CUTF7ConversionContext::fConvertBuffer ]--------------------------------
//
//
// Description:
// Converts UNICODE string to UTF7
// Parameters:
// IN fASCII TRUE if buffer is ASCII
// IN pbInputBuffer Pointer to UNICODE string buffer
// IN cbInputBuffer Size (in bytes) of string buffer
// IN pbOutputBuffer Buffer to write data to
// IN cbOutputBuffer Size of buffer to write data to
// OUT pcbWritten # of bytes written to output bufferbuffer
// OUT pcbRead # of bytes read from Input buffer
// Returns:
// TRUE if entire input buffer was processed
// FALSE if buffer needs to be processe some more
// History:
// 10/21/98 - MikeSwa Created
//
//-----------------------------------------------------------------------------
BOOL CUTF7ConversionContext::fConvertBuffer(
IN BOOL fASCII,
IN PBYTE pbInputBuffer,
IN DWORD cbInputBuffer,
IN PBYTE pbOutputBuffer,
IN DWORD cbOutputBuffer,
OUT DWORD *pcbWritten,
OUT DWORD *pcbRead)
{
_ASSERT(pcbWritten);
_ASSERT(pcbRead);
_ASSERT(pbInputBuffer);
_ASSERT(pbOutputBuffer);
//Let the default implementation handle straight ASCII
if (fASCII)
{
return CDefaultResourceConversionContext::fConvertBuffer(fASCII,
pbInputBuffer, cbInputBuffer, pbOutputBuffer, cbOutputBuffer,
pcbWritten, pcbRead);
}
//Now we know it is UNICODE... cbInputBuffer should be a multiple of sizeof(WCHAR)
_ASSERT(0 == (cbInputBuffer % sizeof(WCHAR)));
//If we are encoding the subject, and we haven't classified it yet,
//we need to check to see if it needs encoding
if (UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState &&
!((UTF7_SOME_INVALID_RFC822_CHARS | UFT7_ALL_VALID_RFC822_CHARS) &
m_dwCurrentState))
{
if (fSubjectNeedsEncoding(pbInputBuffer, cbInputBuffer))
m_dwCurrentState |= UTF7_SOME_INVALID_RFC822_CHARS;
else
m_dwCurrentState |= UFT7_ALL_VALID_RFC822_CHARS;
}
*pcbWritten = 0;
*pcbRead = 0;
if (UFT7_ALL_VALID_RFC822_CHARS & m_dwCurrentState)
{
return fConvertBufferTo7BitASCII(pbInputBuffer, cbInputBuffer, pbOutputBuffer,
cbOutputBuffer, pcbWritten, pcbRead);
}
else //we must convert
{
return fUTF7EncodeBuffer(pbInputBuffer, cbInputBuffer, pbOutputBuffer,
cbOutputBuffer, pcbWritten, pcbRead);
}
}