|
|
//+---------------------------------------------------------------------------
//
// Microsoft Windows
// Copyright (C) Microsoft Corporation, 1996 - 1998.
//
// File: cgiesc.cxx
//
// Contents: WEB CGI escape & unescape classes
//
// History: 96/Jan/3 DwightKr Created
//
//----------------------------------------------------------------------------
#include <pch.cxx>
#pragma hdrstop
#include <cgiesc.hxx>
//+---------------------------------------------------------------------------
//
// Function: DecodeURLEscapes - Decode URL escapes
//
// Synopsis: Removes the escape characters from a string, converting to
// Unicode along the way.
//
// Arguments: [pIn] - string to convert
// [l] - length of string in chars, updated on return
// [pOut] - converted string
// [ulCodePage] - code page for translation
//
//----------------------------------------------------------------------------
void DecodeURLEscapes( BYTE * pIn, ULONG & l, WCHAR * pOut, ULONG ulCodePage ) { WCHAR * p2 = pOut; WCHAR c1; WCHAR c2;
XArray<BYTE> xDeferBuf; BYTE * pDefer = 0;
ULONG l2 = l;
for( ; l2; l2-- ) { BOOL fSaveAsUnicode = FALSE;
// Convert ASCII to corresponding character
// If Latin-1 character, save for MB translation, accumulate char
// If Unicode escape, flush accumulated chars and save converted char
c1 = *pIn;
//
// Spaces are escaped by converting them into plus signs.
// Convert them back.
//
if ( c1 == '+' ) { c1 = ' '; pIn++; } else if (c1 == '%') { //
// Special characters are converted to values of the format %XY
// where XY is the HEX code for the ASCII character.
//
// A percent sign is transmitted as %%.
//
if (*(pIn+1) == '%') { c1 = '%'; pIn += 2; l2--; } else if (l2 >= 3) { pIn++; c1 = (WCHAR) toupper(*pIn); c2 = (WCHAR) toupper(*(pIn+1));
if ( c1 == 'U' && l2 >= 6 ) { // Unicode escape, %uxxxx
c1 = c2; c2 = (WCHAR) toupper(*(pIn+2)); WCHAR c3 = (WCHAR) toupper(*(pIn+3)); WCHAR c4 = (WCHAR) toupper(*(pIn+4)); if ( isxdigit( c1 ) && isxdigit( c2 ) && isxdigit( c3 ) && isxdigit( c4 ) ) { c1 = ((c1 >= 'A') ? (c1-'A')+10 : c1-'0') << 12; c1 += ((c2 >= 'A') ? (c2-'A')+10 : c2-'0') << 8; c1 += ((c3 >= 'A') ? (c3-'A')+10 : c3-'0') << 4; c1 += ((c4 >= 'A') ? (c4-'A')+10 : c4-'0');
if ( pDefer ) { unsigned cchDefer = CiPtrToUint( pDefer - xDeferBuf.GetPointer() );
cchDefer = MultiByteToWideChar( ulCodePage, 0, (char *) xDeferBuf.GetPointer(), cchDefer, p2, cchDefer );
Win4Assert( cchDefer != 0 ); pDefer = 0; p2 += cchDefer; } pIn += 5; l2 -= 5; fSaveAsUnicode = TRUE; } else { c1 = '%'; } } else if ( isxdigit( c1 ) && isxdigit( c2 ) ) { c1 = ( ((c1 >= 'A') ? (c1-'A')+10 : c1-'0')*16 + ((c2 >= 'A') ? (c2-'A')+10 : c2-'0') ); pIn += 2; l2 -= 2; if ( c1 >= 0x80 && 0 == pDefer ) { // The character needs to be deferred for MBCS
// translation.
if (xDeferBuf.GetPointer() == 0) { xDeferBuf.Init( l2+1 ); } pDefer = xDeferBuf.GetPointer(); } } else c1 = '%'; } else { pIn++; if ( c1 >= 0x80 && 0 == pDefer ) { // The character needs to be deferred for MBCS
// translation.
if (xDeferBuf.GetPointer() == 0) { xDeferBuf.Init( l2+1 ); } pDefer = xDeferBuf.GetPointer(); } } } else { pIn++; }
if (! fSaveAsUnicode) { if ( c1 >= 0x80 && 0 == pDefer ) { // The character needs to be deferred for MBCS
// translation.
if (xDeferBuf.GetPointer() == 0) { xDeferBuf.Init( l2+1 ); } pDefer = xDeferBuf.GetPointer(); } } else { Win4Assert( pDefer == 0 ); }
if (pDefer) { Win4Assert( c1 < 0x100 ); *pDefer++ = (BYTE) c1; } else { *p2++ = c1; } }
if ( pDefer ) { unsigned cchDefer = CiPtrToUint( pDefer - xDeferBuf.GetPointer() );
cchDefer = MultiByteToWideChar( ulCodePage, 0, (char *) xDeferBuf.GetPointer(), cchDefer, p2, cchDefer );
Win4Assert( cchDefer != 0 ); pDefer = 0; p2 += cchDefer; } *p2 = 0; l = CiPtrToUlong( p2 - pOut ); }
void DecodeEscapes( WCHAR * p, ULONG & l ) { DecodeEscapes( p, l, p ); }
void DecodeEscapes( WCHAR * pIn, ULONG & l, WCHAR * pOut ) { WCHAR * p2; int c1; int c2; ULONG l2 = l;
for( p2=pOut; l2; l2-- ) { //
// Spaces are escaped by converting them into plus signs.
// Convert them back.
//
if ( *pIn == L'+' ) { *p2++ = L' '; pIn++; } else if (*pIn == L'%') { //
// Special characters are converted to values of the format %XY
// where XY is the HEX code for the ASCII character.
//
// A percent sign is transmitted as %%.
//
if (*(pIn+1) == L'%') { *p2++ = L'%'; pIn += 2; l2--; } else if (l2 > 2) { pIn++;
c1=towupper(*pIn); c2=towupper(*(pIn+1));
if ( isxdigit( c1 ) && isxdigit( c2 ) ) { *p2++ = ( ((c1 >= L'A') ? (c1-L'A')+10 : c1-L'0')*16 + ((c2 >= L'A') ? (c2-L'A')+10 : c2-L'0') ); pIn += 2; l2 -= 2; } else *p2++ = L'%'; } else { *p2++ = *pIn++; } } else { *p2++ = *pIn++; } }
*p2 = 0; l = CiPtrToUlong( p2 - pOut ); }
//+---------------------------------------------------------------------------
//
// Function: DecodeHtmlNumeric - decode HTML numeric entity
//
// Synopsis: Looks for sequences like "〹" and converts in-place
// to a single unicode character.
//
// Arguments: [pIn] - string to convert
//
//----------------------------------------------------------------------------
void DecodeHtmlNumeric( WCHAR * pIn ) { pIn = wcschr( pIn, L'&' ); WCHAR * p2 = pIn;
while (pIn && *pIn) { if (*pIn == L'&' && pIn[1] == L'#') { pIn += 2; USHORT ch = 0; while (*pIn && *pIn != L';') { if (*pIn >= L'0' && *pIn <= L'9') ch = ch*10 + (*pIn - L'0'); pIn++; } if (*pIn) pIn++; *p2++ = ch; } else { *p2++ = *pIn++; } }
if (p2) *p2 = 0; }
|