mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1435 lines
37 KiB
1435 lines
37 KiB
/**********************************************************************/
|
|
/** Microsoft Windows NT **/
|
|
/** Copyright(c) Microsoft Corp., 1994 **/
|
|
/**********************************************************************/
|
|
|
|
/*
|
|
string.cxx
|
|
|
|
This module contains a light weight string class
|
|
|
|
|
|
FILE HISTORY:
|
|
Johnl 15-Aug-1994 Created
|
|
MuraliK 27-Feb-1995 Modified to be a standalone module with buffer.
|
|
MuraliK 2-June-1995 Made into separate library
|
|
|
|
*/
|
|
|
|
#include "precomp.hxx"
|
|
|
|
|
|
//
|
|
// Normal includes only for this module to be active
|
|
//
|
|
|
|
# include <opt_time.h>
|
|
|
|
extern "C" {
|
|
# include <nt.h>
|
|
# include <ntrtl.h>
|
|
# include <nturtl.h>
|
|
# include <windows.h>
|
|
};
|
|
|
|
# include "dbgutil.h"
|
|
# include <string.hxx>
|
|
# include <auxctrs.h>
|
|
|
|
# include <tchar.h>
|
|
# include <mbstring.h>
|
|
|
|
//
|
|
// String globals
|
|
//
|
|
|
|
typedef UCHAR * ( __cdecl * PFNSTRCASE ) ( UCHAR * );
|
|
typedef INT ( __cdecl * PFNSTRNICMP ) ( const UCHAR *, const UCHAR *, size_t );
|
|
typedef INT ( __cdecl * PFNSTRICMP ) ( const UCHAR *, const UCHAR * );
|
|
typedef size_t ( __cdecl * PFNSTRLEN ) ( const UCHAR * );
|
|
typedef UCHAR * (__cdecl * PFNSTRRCHR) (const UCHAR *, UINT);
|
|
|
|
PFNSTRCASE g_pfnStrupr = _mbsupr;
|
|
PFNSTRCASE g_pfnStrlwr = _mbslwr;
|
|
PFNSTRNICMP g_pfnStrnicmp = _mbsnicmp;
|
|
PFNSTRICMP g_pfnStricmp = _mbsicmp;
|
|
PFNSTRLEN g_pfnStrlen = _mbslen;
|
|
PFNSTRRCHR g_pfnStrrchr = _mbsrchr;
|
|
|
|
BOOL g_fFavorDBCS = FALSE;
|
|
|
|
#define UTF8_HACK_KEY "System\\CurrentControlSet\\Services\\InetInfo\\Parameters"
|
|
#define UTF8_HACK_VALUE "FavorDBCS"
|
|
|
|
//
|
|
// Private Definations
|
|
//
|
|
|
|
//
|
|
// When appending data, this is the extra amount we request to avoid
|
|
// reallocations
|
|
//
|
|
#define STR_SLOP 128
|
|
|
|
//
|
|
// Converts a value between zero and fifteen to the appropriate hex digit
|
|
//
|
|
#define HEXDIGIT( nDigit ) \
|
|
(TCHAR)((nDigit) > 9 ? \
|
|
(nDigit) - 10 + 'A' \
|
|
: (nDigit) + '0')
|
|
|
|
//
|
|
// Converts a single hex digit to its decimal equivalent
|
|
//
|
|
#define TOHEX( ch ) \
|
|
((ch) > '9' ? \
|
|
(ch) >= 'a' ? \
|
|
(ch) - 'a' + 10 : \
|
|
(ch) - 'A' + 10 \
|
|
: (ch) - '0')
|
|
|
|
|
|
/*******************************************************************
|
|
|
|
NAME: STR::STR
|
|
|
|
SYNOPSIS: Construct a string object
|
|
|
|
ENTRY: Optional object initializer
|
|
|
|
NOTES: If the object is not valid (i.e. !IsValid()) then GetLastError
|
|
should be called.
|
|
|
|
The object is guaranteed to construct successfully if nothing
|
|
or NULL is passed as the initializer.
|
|
|
|
********************************************************************/
|
|
|
|
// Inlined in string.hxx
|
|
|
|
|
|
VOID
|
|
STR::AuxInit( const BYTE * pInit )
|
|
{
|
|
BOOL fRet;
|
|
|
|
if ( pInit )
|
|
{
|
|
INT cbCopy = (::strlen( (const CHAR * ) pInit ) + 1) * sizeof(CHAR);
|
|
fRet = Resize( cbCopy );
|
|
|
|
if ( fRet ) {
|
|
CopyMemory( QueryPtr(), pInit, cbCopy );
|
|
m_cchLen = (cbCopy)/sizeof(CHAR) - 1;
|
|
} else {
|
|
BUFFER::SetValid( FALSE);
|
|
}
|
|
|
|
} else {
|
|
|
|
*((CHAR *) QueryPtr()) = '\0';
|
|
m_cchLen = 0;
|
|
}
|
|
|
|
return;
|
|
} // STR::AuxInit()
|
|
|
|
|
|
|
|
/*******************************************************************
|
|
|
|
NAME: STR::AuxAppend
|
|
|
|
SYNOPSIS: Appends the string onto this one.
|
|
|
|
ENTRY: Object to append
|
|
********************************************************************/
|
|
|
|
BOOL STR::AuxAppend( const BYTE * pStr, UINT cbStr, BOOL fAddSlop )
|
|
{
|
|
DBG_ASSERT( pStr != NULL );
|
|
|
|
UINT cbThis = QueryCB();
|
|
|
|
//
|
|
// Only resize when we have to. When we do resize, we tack on
|
|
// some extra space to avoid extra reallocations.
|
|
//
|
|
// Note: QuerySize returns the requested size of the string buffer,
|
|
// *not* the strlen of the buffer
|
|
//
|
|
|
|
AcIncrement( CacStringAppend);
|
|
if ( QuerySize() < cbThis + cbStr + sizeof(CHAR) )
|
|
{
|
|
if ( !Resize( cbThis + cbStr + (fAddSlop ? STR_SLOP : sizeof(CHAR) )) )
|
|
return FALSE;
|
|
}
|
|
|
|
// copy the exact string and append a null character
|
|
memcpy( (BYTE *) QueryPtr() + cbThis,
|
|
pStr,
|
|
cbStr);
|
|
m_cchLen += cbStr/sizeof(CHAR);
|
|
*((CHAR *) QueryPtr() + m_cchLen) = '\0'; // append an explicit null char
|
|
|
|
return TRUE;
|
|
} // STR::AuxAppend()
|
|
|
|
|
|
#if 0
|
|
// STR::SetLen() is inlined now
|
|
BOOL
|
|
STR::SetLen( IN DWORD cchLen)
|
|
/*++
|
|
Truncates the length of the string stored in this buffer
|
|
to specified value.
|
|
|
|
--*/
|
|
{
|
|
if ( cchLen >= QuerySize()) {
|
|
|
|
// the buffer itself is not sufficient for this length. return error.
|
|
return ( FALSE);
|
|
}
|
|
|
|
// null terminate the string at specified location
|
|
*((CHAR *) QueryPtr() + cchLen) = '\0';
|
|
m_cchLen = cchLen;
|
|
|
|
return ( TRUE);
|
|
} // STR::SetLen()
|
|
|
|
#endif // 0
|
|
|
|
|
|
/*******************************************************************
|
|
|
|
NAME: STR::LoadString
|
|
|
|
SYNOPSIS: Loads a string resource from this module's string table
|
|
or from the system string table
|
|
|
|
ENTRY: dwResID - System error or module string ID
|
|
lpszModuleName - name of the module from which to load.
|
|
If NULL, then load the string from system table.
|
|
|
|
********************************************************************/
|
|
|
|
BOOL STR::LoadString( IN DWORD dwResID,
|
|
IN LPCTSTR lpszModuleName, // Optional
|
|
IN DWORD dwLangID // Optional
|
|
)
|
|
{
|
|
BOOL fReturn = FALSE;
|
|
INT cch;
|
|
|
|
//
|
|
// If lpszModuleName is NULL, load the string from system's string table.
|
|
//
|
|
|
|
if ( lpszModuleName == NULL) {
|
|
|
|
BYTE * pchBuff = NULL;
|
|
|
|
//
|
|
// Call the appropriate function so we don't have to do the Unicode
|
|
// conversion
|
|
//
|
|
|
|
cch = ::FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
|
FORMAT_MESSAGE_IGNORE_INSERTS |
|
|
FORMAT_MESSAGE_MAX_WIDTH_MASK |
|
|
FORMAT_MESSAGE_FROM_SYSTEM,
|
|
NULL,
|
|
dwResID,
|
|
dwLangID,
|
|
(LPSTR) &pchBuff,
|
|
1024,
|
|
NULL );
|
|
|
|
if ( cch ) {
|
|
|
|
fReturn = Copy( (LPCSTR) pchBuff, cch );
|
|
}
|
|
|
|
//
|
|
// Free the buffer FormatMessage allocated
|
|
//
|
|
|
|
if ( cch )
|
|
{
|
|
::LocalFree( (VOID*) pchBuff );
|
|
}
|
|
|
|
} else {
|
|
|
|
CHAR ach[STR_MAX_RES_SIZE];
|
|
cch = ::LoadStringA( GetModuleHandle( lpszModuleName),
|
|
dwResID,
|
|
(CHAR *) ach,
|
|
sizeof(ach));
|
|
if ( cch )
|
|
{
|
|
fReturn = Copy( (LPSTR) ach, cch );
|
|
}
|
|
}
|
|
|
|
return ( fReturn);
|
|
|
|
} // STR::LoadString()
|
|
|
|
|
|
|
|
|
|
BOOL STR::LoadString( IN DWORD dwResID,
|
|
IN HMODULE hModule
|
|
)
|
|
{
|
|
DBG_ASSERT( hModule != NULL );
|
|
|
|
BOOL fReturn = FALSE;
|
|
INT cch;
|
|
CHAR ach[STR_MAX_RES_SIZE];
|
|
|
|
cch = ::LoadStringA(hModule,
|
|
dwResID,
|
|
(CHAR *) ach,
|
|
sizeof(ach));
|
|
if ( cch ) {
|
|
|
|
fReturn = Copy( (LPSTR) ach, cch );
|
|
}
|
|
|
|
return ( fReturn);
|
|
|
|
} // STR::LoadString()
|
|
|
|
|
|
|
|
BOOL
|
|
STR::FormatString(
|
|
IN DWORD dwResID,
|
|
IN LPCTSTR apszInsertParams[],
|
|
IN LPCTSTR lpszModuleName,
|
|
IN DWORD cbMaxMsg
|
|
)
|
|
{
|
|
DWORD cch;
|
|
LPSTR pchBuff;
|
|
BOOL fRet = FALSE;
|
|
|
|
cch = ::FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
|
FORMAT_MESSAGE_ARGUMENT_ARRAY |
|
|
FORMAT_MESSAGE_FROM_HMODULE,
|
|
GetModuleHandle( lpszModuleName ),
|
|
dwResID,
|
|
0,
|
|
(LPSTR) &pchBuff,
|
|
cbMaxMsg * sizeof(WCHAR),
|
|
(va_list *) apszInsertParams );
|
|
|
|
if ( cch )
|
|
{
|
|
fRet = Copy( (LPCSTR) pchBuff, cch );
|
|
|
|
::LocalFree( (VOID*) pchBuff );
|
|
}
|
|
|
|
/* INTRINSA suppress = uninitialized */
|
|
return fRet;
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************
|
|
|
|
NAME: STR::Escape
|
|
|
|
SYNOPSIS: Replaces non-ASCII characters with their hex equivalent
|
|
|
|
NOTES:
|
|
|
|
HISTORY:
|
|
Johnl 17-Aug-1994 Created
|
|
|
|
********************************************************************/
|
|
|
|
BOOL STR::Escape( VOID )
|
|
{
|
|
CHAR * pch = QueryStr();
|
|
int i = 0;
|
|
CHAR ch;
|
|
|
|
DBG_ASSERT( pch );
|
|
|
|
while ( ch = pch[i] )
|
|
{
|
|
//
|
|
// Escape characters that are in the non-printable range
|
|
// but ignore CR and LF
|
|
//
|
|
|
|
if ( (((ch >= 0) && (ch <= 32)) ||
|
|
((ch >= 128) && (ch <= 159))||
|
|
(ch == '%') || (ch == '?') || (ch == '+') || (ch == '&') ||
|
|
(ch == '#')) &&
|
|
!(ch == '\n' || ch == '\r') )
|
|
{
|
|
if ( !Resize( QuerySize() + 2 * sizeof(CHAR) ))
|
|
return FALSE;
|
|
|
|
//
|
|
// Resize can change the base pointer
|
|
//
|
|
|
|
pch = QueryStr();
|
|
|
|
//
|
|
// Insert the escape character
|
|
//
|
|
|
|
pch[i] = '%';
|
|
|
|
//
|
|
// Insert a space for the two hex digits (memory can overlap)
|
|
//
|
|
|
|
/* INTRINSA suppress = uninitialized */
|
|
|
|
::memmove( &pch[i+3],
|
|
&pch[i+1],
|
|
(::strlen( &pch[i+1] ) + 1) * sizeof(CHAR));
|
|
|
|
//
|
|
// Convert the low then the high character to hex
|
|
//
|
|
|
|
UINT nDigit = (UINT)(ch % 16);
|
|
|
|
pch[i+2] = HEXDIGIT( nDigit );
|
|
|
|
ch /= 16;
|
|
nDigit = (UINT)(ch % 16);
|
|
|
|
pch[i+1] = HEXDIGIT( nDigit );
|
|
|
|
i += 3;
|
|
}
|
|
else
|
|
i++;
|
|
}
|
|
|
|
m_cchLen = ::strlen( QueryStr()); // to be safe recalc the new length
|
|
return TRUE;
|
|
} // STR::Escape()
|
|
|
|
|
|
/*******************************************************************
|
|
|
|
NAME: STR::EscapeSpaces
|
|
|
|
SYNOPSIS: Replaces all spaces with their hex equivalent
|
|
|
|
NOTES:
|
|
|
|
HISTORY:
|
|
Johnl 17-Aug-1994 Created
|
|
|
|
********************************************************************/
|
|
|
|
BOOL STR::EscapeSpaces( VOID )
|
|
{
|
|
CHAR * pch = QueryStr();
|
|
CHAR * pchTmp;
|
|
int i = 0;
|
|
|
|
DBG_ASSERT( pch );
|
|
|
|
while ( pchTmp = strchr( pch + i, ' ' ))
|
|
{
|
|
i = DIFF( pchTmp - QueryStr() );
|
|
|
|
if ( !Resize( QuerySize() + 2 * sizeof(CHAR) ))
|
|
return FALSE;
|
|
|
|
//
|
|
// Resize can change the base pointer
|
|
//
|
|
|
|
pch = QueryStr();
|
|
|
|
//
|
|
// Insert the escape character
|
|
//
|
|
|
|
pch[i] = '%';
|
|
|
|
//
|
|
// Insert a space for the two hex digits (memory can overlap)
|
|
//
|
|
|
|
::memmove( &pch[i+3],
|
|
&pch[i+1],
|
|
(::strlen( &pch[i+1] ) + 1) * sizeof(CHAR));
|
|
|
|
//
|
|
// This routine only replaces spaces
|
|
//
|
|
|
|
pch[i+1] = '2';
|
|
pch[i+2] = '0';
|
|
}
|
|
|
|
//
|
|
// If i is zero then no spaces were found
|
|
//
|
|
|
|
if ( i != 0 )
|
|
{
|
|
m_cchLen = ::strlen( QueryStr()); // to be safe recalc the new length
|
|
}
|
|
|
|
return TRUE;
|
|
|
|
} // STR::EscapeSpaces()
|
|
|
|
|
|
|
|
/*******************************************************************
|
|
|
|
NAME: STR::Unescape
|
|
|
|
SYNOPSIS: Replaces hex escapes with the Latin-1 equivalent
|
|
|
|
NOTES: This is a Unicode only method
|
|
|
|
HISTORY:
|
|
Johnl 17-Aug-1994 Created
|
|
|
|
********************************************************************/
|
|
|
|
BOOL STR::Unescape( VOID )
|
|
{
|
|
CHAR *pScan;
|
|
CHAR *pDest;
|
|
CHAR *pNextScan;
|
|
wchar_t wch;
|
|
DWORD dwLen;
|
|
BOOL fChanged = FALSE;
|
|
|
|
pDest = pScan = strchr( QueryStr(), '%');
|
|
|
|
while (pScan)
|
|
{
|
|
if ( (pScan[1] == 'u' || pScan[1] == 'U') &&
|
|
::isxdigit( (UCHAR)pScan[2] ) &&
|
|
::isxdigit( (UCHAR)pScan[3] ) &&
|
|
::isxdigit( (UCHAR)pScan[4] ) &&
|
|
::isxdigit( (UCHAR)pScan[5] ) )
|
|
{
|
|
wch = TOHEX(pScan[2]) * 4096 + TOHEX(pScan[3]) * 256;
|
|
wch += TOHEX(pScan[4]) * 16 + TOHEX(pScan[5]);
|
|
|
|
dwLen = WideCharToMultiByte( CP_ACP,
|
|
0,
|
|
&wch,
|
|
1,
|
|
(LPSTR) pDest,
|
|
2,
|
|
NULL,
|
|
NULL );
|
|
|
|
pDest += dwLen;
|
|
pScan += 6;
|
|
fChanged = TRUE;
|
|
}
|
|
else if ( ::isxdigit( (UCHAR)pScan[1] ) && // WinSE 4944
|
|
::isxdigit( (UCHAR)pScan[2] ))
|
|
{
|
|
*pDest = TOHEX(pScan[1]) * 16 + TOHEX(pScan[2]);
|
|
|
|
pDest ++;
|
|
pScan += 3;
|
|
fChanged = TRUE;
|
|
}
|
|
else // Not an escaped char, just a '%'
|
|
{
|
|
if (fChanged)
|
|
*pDest = *pScan;
|
|
|
|
pDest++;
|
|
pScan++;
|
|
}
|
|
|
|
//
|
|
// Copy all the information between this and the next escaped char
|
|
//
|
|
pNextScan = strchr( pScan, '%');
|
|
|
|
if (fChanged) // pScan!=pDest, so we have to copy the char's
|
|
{
|
|
if (!pNextScan) // That was the last '%' in the string
|
|
{
|
|
::memmove( pDest,
|
|
pScan,
|
|
(::strlen( pScan ) + 1) * sizeof(CHAR)); // +1 to copy '\0'
|
|
}
|
|
else // There is another '%', and it is not back to back with this one
|
|
if (dwLen = DIFF(pNextScan - pScan))
|
|
{
|
|
::memmove( pDest,
|
|
pScan,
|
|
dwLen * sizeof(CHAR));
|
|
pDest += dwLen;
|
|
}
|
|
}
|
|
|
|
pScan = pNextScan;
|
|
}
|
|
|
|
if ( fChanged )
|
|
{
|
|
m_cchLen = ::strlen( QueryStr()); // for safety recalc the length
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
|
|
BOOL
|
|
STR::CopyToBuffer( WCHAR * lpszBuffer, LPDWORD lpcch) const
|
|
/*++
|
|
Description:
|
|
Copies the string into the WCHAR buffer passed in if the buffer
|
|
is sufficient to hold the translated string.
|
|
If the buffer is small, the function returns small and sets *lpcch
|
|
to contain the required number of characters.
|
|
|
|
Arguments:
|
|
lpszBuffer pointer to WCHAR buffer which on return contains
|
|
the UNICODE version of string on success.
|
|
lpcch pointer to DWORD containing the length of the buffer.
|
|
If *lpcch == 0 then the function returns TRUE with
|
|
the count of characters required stored in *lpcch.
|
|
Also in this case lpszBuffer is not affected.
|
|
Returns:
|
|
TRUE on success.
|
|
FALSE on failure. Use GetLastError() for further details.
|
|
|
|
History:
|
|
MuraliK 11-30-94
|
|
--*/
|
|
{
|
|
BOOL fReturn = TRUE;
|
|
|
|
if ( lpcch == NULL) {
|
|
SetLastError( ERROR_INVALID_PARAMETER);
|
|
return ( FALSE);
|
|
}
|
|
|
|
if ( *lpcch == 0) {
|
|
|
|
//
|
|
// Inquiring the size of buffer alone
|
|
//
|
|
*lpcch = QueryCCH() + 1; // add one character for terminating null
|
|
} else {
|
|
|
|
//
|
|
// Copy after conversion from ANSI to Unicode
|
|
//
|
|
int iRet;
|
|
iRet = MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED,
|
|
QueryStrA(), QueryCCH() + 1,
|
|
lpszBuffer, (int )*lpcch);
|
|
|
|
if ( iRet == 0 || iRet != (int ) *lpcch) {
|
|
|
|
//
|
|
// Error in conversion.
|
|
//
|
|
fReturn = FALSE;
|
|
}
|
|
}
|
|
|
|
return ( fReturn);
|
|
} // STR::CopyToBuffer()
|
|
|
|
|
|
BOOL
|
|
STR::CopyToBuffer( CHAR * lpszBuffer, LPDWORD lpcch) const
|
|
/*++
|
|
Description:
|
|
Copies the string into the CHAR buffer passed in if the buffer
|
|
is sufficient to hold the translated string.
|
|
If the buffer is small, the function returns small and sets *lpcch
|
|
to contain the required number of characters.
|
|
|
|
Arguments:
|
|
lpszBuffer pointer to CHAR buffer which on return contains
|
|
the string on success.
|
|
lpcch pointer to DWORD containing the length of the buffer.
|
|
If *lpcch == 0 then the function returns TRUE with
|
|
the count of characters required stored in *lpcch.
|
|
Also in this case lpszBuffer is not affected.
|
|
Returns:
|
|
TRUE on success.
|
|
FALSE on failure. Use GetLastError() for further details.
|
|
|
|
History:
|
|
MuraliK 20-Nov-1996
|
|
--*/
|
|
{
|
|
BOOL fReturn = TRUE;
|
|
|
|
if ( lpcch == NULL) {
|
|
SetLastError( ERROR_INVALID_PARAMETER);
|
|
return ( FALSE);
|
|
}
|
|
|
|
register DWORD cch = QueryCCH() + 1;
|
|
|
|
if ( (*lpcch >= cch) && ( NULL != lpszBuffer)) {
|
|
|
|
DBG_ASSERT( lpszBuffer);
|
|
CopyMemory( lpszBuffer, QueryStrA(), cch);
|
|
} else {
|
|
DBG_ASSERT( (NULL == lpszBuffer) || (*lpcch < cch));
|
|
SetLastError( ERROR_INSUFFICIENT_BUFFER);
|
|
fReturn = FALSE;
|
|
}
|
|
|
|
*lpcch = cch;
|
|
|
|
return ( fReturn);
|
|
} // STR::CopyToBuffer()
|
|
|
|
BOOL
|
|
STR::SafeCopy( const CHAR * pchInit )
|
|
{
|
|
DWORD cchLen = 0;
|
|
char cFirstByte = '\0';
|
|
BOOL bReturn = TRUE;
|
|
if ( QueryPtr() ) {
|
|
cFirstByte = *(QueryStr());
|
|
cchLen = m_cchLen;
|
|
*(QueryStr()) = '\0';
|
|
m_cchLen = 0;
|
|
}
|
|
if (pchInit != NULL) {
|
|
bReturn = AuxAppend( (const BYTE *) pchInit, ::strlen( pchInit ), FALSE );
|
|
if (!bReturn && QueryPtr()) {
|
|
*(QueryStr()) = cFirstByte;
|
|
m_cchLen = cchLen;
|
|
}
|
|
}
|
|
return bReturn;
|
|
}
|
|
|
|
|
|
/*******************************************************************
|
|
|
|
NAME: ::CollapseWhite
|
|
|
|
SYNOPSIS: Collapses white space starting at the passed pointer.
|
|
|
|
RETURNS: Returns a pointer to the next chunk of white space or the
|
|
end of the string.
|
|
|
|
NOTES: This is a Unicode only method
|
|
|
|
HISTORY:
|
|
Johnl 24-Aug-1994 Created
|
|
|
|
********************************************************************/
|
|
|
|
WCHAR * CollapseWhite( WCHAR * pch )
|
|
{
|
|
LPWSTR pchStart = pch;
|
|
|
|
while ( ISWHITE( *pch ) )
|
|
pch++;
|
|
|
|
::memmove( pchStart,
|
|
pch,
|
|
DIFF(pch - pchStart) );
|
|
|
|
while ( *pch && !ISWHITE( *pch ))
|
|
pch++;
|
|
|
|
return pch;
|
|
} // CollapseWhite()
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
// Private constants.
|
|
//
|
|
|
|
#define ACTION_NOTHING 0x00000000
|
|
#define ACTION_EMIT_CH 0x00010000
|
|
#define ACTION_EMIT_DOT_CH 0x00020000
|
|
#define ACTION_EMIT_DOT_DOT_CH 0x00030000
|
|
#define ACTION_BACKUP 0x00040000
|
|
#define ACTION_MASK 0xFFFF0000
|
|
|
|
|
|
//
|
|
// Private globals.
|
|
//
|
|
|
|
INT p_StateTable[16] =
|
|
{
|
|
// state 0
|
|
0 , // other
|
|
0 , // "."
|
|
4 , // EOS
|
|
1 , // "\"
|
|
|
|
// state 1
|
|
0 , // other
|
|
2 , // "."
|
|
4 , // EOS
|
|
1 , // "\"
|
|
|
|
// state 2
|
|
0 , // other
|
|
3 , // "."
|
|
4 , // EOS
|
|
1 , // "\"
|
|
|
|
// state 3
|
|
0 , // other
|
|
0 , // "."
|
|
4 , // EOS
|
|
1 // "\"
|
|
};
|
|
|
|
|
|
|
|
INT p_ActionTable[16] =
|
|
{
|
|
// state 0
|
|
ACTION_EMIT_CH, // other
|
|
ACTION_EMIT_CH, // "."
|
|
ACTION_EMIT_CH, // EOS
|
|
ACTION_EMIT_CH, // "\"
|
|
|
|
// state 1
|
|
ACTION_EMIT_CH, // other
|
|
ACTION_NOTHING, // "."
|
|
ACTION_EMIT_CH, // EOS
|
|
ACTION_NOTHING, // "\"
|
|
|
|
// state 2
|
|
ACTION_EMIT_DOT_CH, // other
|
|
ACTION_NOTHING, // "."
|
|
ACTION_EMIT_CH, // EOS
|
|
ACTION_NOTHING, // "\"
|
|
|
|
// state 3
|
|
ACTION_EMIT_DOT_DOT_CH, // other
|
|
ACTION_EMIT_DOT_DOT_CH, // "."
|
|
ACTION_BACKUP, // EOS
|
|
ACTION_BACKUP // "\"
|
|
};
|
|
|
|
// since max states = 4, we calculat the index by multiplying with 4.
|
|
# define IndexFromState( st) ( (st) * 4)
|
|
|
|
|
|
// the following table provides the index for various ISA Latin1 characters
|
|
// in the incoming URL.
|
|
// It assumes that the URL is ISO Latin1 == ASCII
|
|
INT p_rgIndexForChar[] = {
|
|
|
|
2, // null char
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 thru 10
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 11 thru 20
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21 thru 30
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 31 thru 40
|
|
0, 0, 0, 0, 0, 1, 3, 0, 0, 0, // 41 thru 50 46 = '.' 47 = '/'
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 51 thru 60
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 61 thru 70
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 71 thru 80
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 81 thru 90
|
|
0, 3, 0, 0, 0, 0, 0, 0, 0, 0, // 91 thru 100 92 = '\\'
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 101 thru 110
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 111 thru 120
|
|
0, 0, 0, 0, 0, 0, 0, 0 // 121 thru 128
|
|
};
|
|
|
|
#define IS_UTF8_TRAILBYTE(ch) (((ch) & 0xc0) == 0x80)
|
|
|
|
|
|
/*******************************************************************
|
|
|
|
NAME: IsUTF8URL
|
|
|
|
ENTRY: pszPath - The path to sanitize.
|
|
|
|
HISTORY:
|
|
atsusk 06-Jan-1998 Created.
|
|
|
|
********************************************************************/
|
|
|
|
BOOL IsUTF8URL(CHAR * pszPath)
|
|
{
|
|
CHAR ch;
|
|
|
|
if ( g_fFavorDBCS )
|
|
{
|
|
return ( MultiByteToWideChar( CP_ACP,
|
|
MB_ERR_INVALID_CHARS,
|
|
pszPath,
|
|
-1,
|
|
NULL,
|
|
0) == 0);
|
|
}
|
|
|
|
while (ch = *pszPath++) {
|
|
|
|
if (ch & 0x80) {
|
|
wchar_t wch;
|
|
int iLen;
|
|
BOOL bDefault = FALSE;
|
|
char chTrail1;
|
|
char chTrail2;
|
|
|
|
chTrail1 = *pszPath++;
|
|
if (chTrail1) {
|
|
chTrail2 = *pszPath;
|
|
} else {
|
|
chTrail2 = 0;
|
|
}
|
|
|
|
if ( ((ch & 0xF0) == 0xE0) &&
|
|
IS_UTF8_TRAILBYTE(chTrail1) &&
|
|
IS_UTF8_TRAILBYTE(chTrail2) ) {
|
|
|
|
// handle three byte case
|
|
// 1110xxxx 10xxxxxx 10xxxxxx
|
|
wch = (wchar_t) (((ch & 0x0f) << 12) |
|
|
((chTrail1 & 0x3f) << 6) |
|
|
(chTrail2 & 0x3f));
|
|
pszPath++;
|
|
|
|
} else
|
|
if ( ((ch & 0xE0) == 0xC0) &&
|
|
IS_UTF8_TRAILBYTE(chTrail1) ) {
|
|
|
|
// handle two byte case
|
|
// 110xxxxx 10xxxxxx
|
|
|
|
wch = (wchar_t) (((ch & 0x1f) << 6) | (chTrail1 & 0x3f));
|
|
|
|
} else
|
|
return FALSE;
|
|
|
|
iLen = WideCharToMultiByte( CP_ACP,
|
|
0,
|
|
&wch,
|
|
1,
|
|
NULL,
|
|
0,
|
|
NULL,
|
|
&bDefault );
|
|
|
|
if (bDefault == TRUE || iLen == 0 || iLen > 2)
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
return TRUE;
|
|
} // IsUTF8URL()
|
|
|
|
|
|
/*******************************************************************
|
|
|
|
NAME: CanonURL
|
|
|
|
SYNOPSIS: Sanitizes a path by removing bogus path elements.
|
|
|
|
As expected, "/./" entries are simply removed, and
|
|
"/../" entries are removed along with the previous
|
|
path element.
|
|
|
|
To maintain compatibility with URL path semantics
|
|
additional transformations are required. All backward
|
|
slashes "\\" are converted to forward slashes. Any
|
|
repeated forward slashes (such as "///") are mapped to
|
|
single backslashes.
|
|
|
|
A state table (see the p_StateTable global at the
|
|
beginning of this file) is used to perform most of
|
|
the transformations. The table's rows are indexed
|
|
by current state, and the columns are indexed by
|
|
the current character's "class" (either slash, dot,
|
|
NULL, or other). Each entry in the table consists
|
|
of the new state tagged with an action to perform.
|
|
See the ACTION_* constants for the valid action
|
|
codes.
|
|
|
|
ENTRY: pszPath - The path to sanitize.
|
|
fIsDBCSLocale - Indicates the server is in a
|
|
locale that uses DBCS.
|
|
|
|
HISTORY:
|
|
KeithMo 07-Sep-1994 Created.
|
|
MuraliK 28-Apr-1995 Adopted this for symbolic paths
|
|
|
|
********************************************************************/
|
|
INT
|
|
CanonURL(
|
|
CHAR * pszPath,
|
|
BOOL fIsDBCSLocale
|
|
)
|
|
{
|
|
UCHAR * pszSrc;
|
|
UCHAR * pszDest;
|
|
DWORD ch;
|
|
INT index;
|
|
BOOL fDBCS = FALSE;
|
|
DWORD cchMultiByte = 0;
|
|
|
|
DBG_ASSERT( pszPath != NULL );
|
|
|
|
//
|
|
// Always look for UTF8 except when DBCS characters are detected
|
|
//
|
|
BOOL fScanForUTF8 = IsUTF8URL(pszPath);
|
|
|
|
// If fScanForUTF8 is true, this URL is UTF8. don't recognize DBCS.
|
|
if (fIsDBCSLocale && fScanForUTF8) {
|
|
fIsDBCSLocale = FALSE;
|
|
}
|
|
|
|
//
|
|
// Start our scan at the first character
|
|
//
|
|
|
|
pszSrc = pszDest = (UCHAR *) pszPath;
|
|
|
|
//
|
|
// State 0 is the initial state.
|
|
//
|
|
index = 0; // State = 0
|
|
|
|
//
|
|
// Loop until we enter state 4 (the final, accepting state).
|
|
//
|
|
|
|
do {
|
|
|
|
//
|
|
// Grab the next character from the path and compute its
|
|
// next state. While we're at it, map any forward
|
|
// slashes to backward slashes.
|
|
//
|
|
|
|
index = IndexFromState( p_StateTable[index]); // 4 = # states
|
|
ch = (DWORD ) *pszSrc++;
|
|
|
|
//
|
|
// If this is a DBCS trailing byte - skip it
|
|
//
|
|
|
|
if ( !fIsDBCSLocale )
|
|
{
|
|
index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
|
|
}
|
|
else
|
|
{
|
|
if ( fDBCS )
|
|
{
|
|
//
|
|
// If this is a 0 terminator, we need to set next
|
|
// state accordingly
|
|
//
|
|
|
|
if ( ch == 0 )
|
|
{
|
|
index += p_rgIndexForChar[ ch ];
|
|
}
|
|
|
|
//
|
|
// fDBCS == TRUE means this byte was a trail byte.
|
|
// index is implicitly set to zero.
|
|
//
|
|
fDBCS = FALSE;
|
|
}
|
|
else
|
|
{
|
|
index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
|
|
|
|
if ( IsDBCSLeadByte( (UCHAR)ch ) )
|
|
{
|
|
//
|
|
// This is a lead byte, so the next is a trail.
|
|
//
|
|
fDBCS = TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Interesting UTF8 characters always have the top bit set
|
|
//
|
|
|
|
if ( (ch & 0x80) && fScanForUTF8 )
|
|
{
|
|
wchar_t wch;
|
|
UCHAR mbstr[2];
|
|
|
|
//
|
|
// This is a UTF8 character, convert it here.
|
|
// index is implicitly set to zero.
|
|
//
|
|
if ( cchMultiByte < 2 )
|
|
{
|
|
char chTrail1;
|
|
char chTrail2;
|
|
|
|
chTrail1 = *pszSrc;
|
|
if (chTrail1) {
|
|
chTrail2 = *(pszSrc+1);
|
|
} else {
|
|
chTrail2 = 0;
|
|
}
|
|
wch = 0;
|
|
|
|
if ((ch & 0xf0) == 0xe0)
|
|
{
|
|
// handle three byte case
|
|
// 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
wch = (wchar_t) (((ch & 0x0f) << 12) |
|
|
((chTrail1 & 0x3f) << 6) |
|
|
(chTrail2 & 0x3f));
|
|
|
|
cchMultiByte = WideCharToMultiByte( CP_ACP,
|
|
0,
|
|
&wch,
|
|
1,
|
|
(LPSTR) mbstr,
|
|
2,
|
|
NULL,
|
|
NULL );
|
|
|
|
ch = mbstr[0];
|
|
pszSrc += (3 - cchMultiByte);
|
|
|
|
// WinSE 12843: Security Fix, Index should be updated for this character
|
|
index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
|
|
|
|
} else if ((ch & 0xe0) == 0xc0)
|
|
{
|
|
// handle two byte case
|
|
// 110xxxxx 10xxxxxx
|
|
|
|
wch = (wchar_t) (((ch & 0x1f) << 6) | (chTrail1 & 0x3f));
|
|
|
|
cchMultiByte = WideCharToMultiByte( CP_ACP,
|
|
0,
|
|
&wch,
|
|
1,
|
|
(LPSTR) mbstr,
|
|
2,
|
|
NULL,
|
|
NULL );
|
|
|
|
ch = mbstr[0];
|
|
pszSrc += (2 - cchMultiByte);
|
|
|
|
// WinSE 12843: Security Fix, Index should be updated for this character
|
|
index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
|
|
}
|
|
|
|
} else {
|
|
//
|
|
// get ready to emit 2nd byte of converted character
|
|
//
|
|
ch = mbstr[1];
|
|
cchMultiByte = 0;
|
|
}
|
|
}
|
|
|
|
|
|
//
|
|
// Perform the action associated with the state.
|
|
//
|
|
|
|
switch( p_ActionTable[index] )
|
|
{
|
|
case ACTION_EMIT_DOT_DOT_CH :
|
|
*pszDest++ = '.';
|
|
/* fall through */
|
|
|
|
case ACTION_EMIT_DOT_CH :
|
|
*pszDest++ = '.';
|
|
/* fall through */
|
|
|
|
case ACTION_EMIT_CH :
|
|
*pszDest++ = (CHAR ) ch;
|
|
/* fall through */
|
|
|
|
case ACTION_NOTHING :
|
|
break;
|
|
|
|
case ACTION_BACKUP :
|
|
if( (pszDest > ( (UCHAR *) pszPath + 1 ) ) && (*pszPath == '/'))
|
|
{
|
|
pszDest--;
|
|
DBG_ASSERT( *pszDest == '/' );
|
|
|
|
*pszDest = '\0';
|
|
pszDest = (UCHAR *) strrchr( pszPath, '/') + 1;
|
|
}
|
|
|
|
*pszDest = '\0';
|
|
break;
|
|
|
|
default :
|
|
DBG_ASSERT( !"Invalid action code in state table!" );
|
|
index = IndexFromState(0) + 2; // move to invalid state
|
|
DBG_ASSERT( p_StateTable[index] == 4);
|
|
*pszDest++ = '\0';
|
|
break;
|
|
}
|
|
|
|
} while( p_StateTable[index] != 4 );
|
|
|
|
//
|
|
// point to terminating nul
|
|
//
|
|
if (p_ActionTable[index] == ACTION_EMIT_CH) {
|
|
pszDest--;
|
|
}
|
|
|
|
DBG_ASSERT(*pszDest == '\0' && pszDest > (UCHAR*) pszPath);
|
|
|
|
return DIFF(pszDest - (UCHAR*)pszPath);
|
|
} // CanonURL()
|
|
|
|
|
|
|
|
DWORD
|
|
InitializeStringFunctions(
|
|
VOID
|
|
)
|
|
/*++
|
|
Initializes the string function pointers depending on the system code page.
|
|
If the code page doesn't have multi-byte characters, then pointers
|
|
resolve to regular single byte functions. Otherwise, they resolve to more
|
|
expense multi-byte functions.
|
|
|
|
Arguments:
|
|
None
|
|
|
|
Returns:
|
|
0 if successful, else Win32 Error
|
|
|
|
--*/
|
|
{
|
|
CPINFO CodePageInfo;
|
|
BOOL bRet;
|
|
HKEY hKey;
|
|
DWORD dwRet;
|
|
|
|
bRet = GetCPInfo( CP_ACP, &CodePageInfo );
|
|
|
|
if ( bRet && CodePageInfo.MaxCharSize == 1 )
|
|
{
|
|
g_pfnStrlwr = (PFNSTRCASE) _strlwr;
|
|
g_pfnStrupr = (PFNSTRCASE) _strupr;
|
|
g_pfnStrnicmp = (PFNSTRNICMP) _strnicmp;
|
|
g_pfnStricmp = (PFNSTRICMP) _stricmp;
|
|
g_pfnStrlen = (PFNSTRLEN) strlen;
|
|
g_pfnStrrchr = (PFNSTRRCHR) strrchr;
|
|
}
|
|
|
|
//
|
|
// Do we need to hack for Korean?
|
|
//
|
|
|
|
dwRet = RegOpenKeyEx( HKEY_LOCAL_MACHINE,
|
|
UTF8_HACK_KEY,
|
|
0,
|
|
KEY_READ,
|
|
&hKey );
|
|
if ( dwRet == ERROR_SUCCESS )
|
|
{
|
|
DWORD dwValue = 0;
|
|
DWORD cbValue = sizeof( dwValue );
|
|
|
|
dwRet = RegQueryValueEx( hKey,
|
|
UTF8_HACK_VALUE,
|
|
NULL,
|
|
NULL,
|
|
(LPBYTE) &dwValue,
|
|
&cbValue );
|
|
if ( dwRet == ERROR_SUCCESS )
|
|
{
|
|
g_fFavorDBCS = !!dwValue;
|
|
}
|
|
|
|
DBG_REQUIRE( RegCloseKey( hKey ) == ERROR_SUCCESS );
|
|
}
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
UCHAR *
|
|
IISstrupr(
|
|
UCHAR * pszString
|
|
)
|
|
/*++
|
|
Wrapper for strupr() call.
|
|
|
|
Arguments:
|
|
pszString - String to uppercase
|
|
|
|
Returns:
|
|
Pointer to string uppercased
|
|
|
|
--*/
|
|
{
|
|
DBG_ASSERT( g_pfnStrupr != NULL );
|
|
|
|
return g_pfnStrupr( pszString );
|
|
}
|
|
|
|
UCHAR *
|
|
IISstrlwr(
|
|
UCHAR * pszString
|
|
)
|
|
/*++
|
|
Wrapper for strlwr() call.
|
|
|
|
Arguments:
|
|
pszString - String to lowercase
|
|
|
|
Returns:
|
|
Pointer to string lowercased
|
|
|
|
--*/
|
|
{
|
|
DBG_ASSERT( g_pfnStrlwr != NULL );
|
|
|
|
return g_pfnStrlwr( pszString );
|
|
}
|
|
|
|
size_t
|
|
IISstrlen(
|
|
UCHAR * pszString
|
|
)
|
|
/*++
|
|
Wrapper for strlen() call.
|
|
|
|
Arguments:
|
|
pszString - String to check
|
|
|
|
Returns:
|
|
Length of string
|
|
|
|
--*/
|
|
{
|
|
DBG_ASSERT( g_pfnStrlen != NULL );
|
|
|
|
return g_pfnStrlen( pszString );
|
|
}
|
|
|
|
INT
|
|
IISstrnicmp(
|
|
UCHAR * pszString1,
|
|
UCHAR * pszString2,
|
|
size_t size
|
|
)
|
|
/*++
|
|
Wrapper for strnicmp() call.
|
|
|
|
Arguments:
|
|
pszString1 - String1
|
|
pszString2 - String2
|
|
size - # characters to compare upto
|
|
|
|
Returns:
|
|
0 if equal, -1 if pszString1 < pszString2, else 1
|
|
|
|
--*/
|
|
{
|
|
DBG_ASSERT( g_pfnStrnicmp != NULL );
|
|
|
|
return g_pfnStrnicmp( pszString1, pszString2, size );
|
|
}
|
|
|
|
|
|
INT
|
|
IISstricmp(
|
|
UCHAR * pszString1,
|
|
UCHAR * pszString2
|
|
)
|
|
/*++
|
|
Wrapper for stricmp() call.
|
|
|
|
Arguments:
|
|
pszString1 - String1
|
|
pszString2 - String2
|
|
|
|
Returns:
|
|
0 if equal, -1 if pszString1 < pszString2, else 1
|
|
|
|
--*/
|
|
{
|
|
DBG_ASSERT( g_pfnStricmp != NULL );
|
|
|
|
return g_pfnStricmp( pszString1, pszString2 );
|
|
}
|
|
|
|
|
|
// like strncpy, but doesn't pad the end of the string with zeroes, which
|
|
// is expensive when `source' is short and `count' is large
|
|
char *
|
|
IISstrncpy(
|
|
char * dest,
|
|
const char * source,
|
|
size_t count)
|
|
{
|
|
char *start = dest;
|
|
|
|
while (count && (*dest++ = *source++)) /* copy string */
|
|
count--;
|
|
|
|
if (count) /* append one zero */
|
|
*dest = '\0';
|
|
|
|
return(start);
|
|
}
|
|
|
|
UCHAR *
|
|
IISstrrchr(
|
|
const UCHAR * pszString,
|
|
UINT c
|
|
)
|
|
/*++
|
|
Wrapper for strrchr() call.
|
|
|
|
Arguments:
|
|
pszString - String
|
|
c - Character to find.
|
|
|
|
Returns:
|
|
pointer to the char or NULL.
|
|
|
|
--*/
|
|
{
|
|
DBG_ASSERT( g_pfnStrrchr != NULL );
|
|
|
|
return g_pfnStrrchr( pszString, c );
|
|
}
|
|
|