You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
139 lines
3.6 KiB
139 lines
3.6 KiB
//+-------------------------------------------------------------------------
|
|
//
|
|
// Microsoft Windows
|
|
// Copyright (C) Microsoft Corporation, 1992-1998.
|
|
//
|
|
// File: XlatChar.hxx
|
|
//
|
|
// Contents: Character translation class.
|
|
//
|
|
// Classes: CXlatChar
|
|
//
|
|
// History: 01-20-92 KyleP Created
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
#pragma once
|
|
|
|
#ifdef DISPLAY_INCLUDES
|
|
#pragma message( "#include <" __FILE__ ">..." )
|
|
#endif
|
|
|
|
//
|
|
// Special character equivalence classes. I suppose in theory these
|
|
// could doubly map to character classes in the regex but nearly
|
|
// every unicode symbol must be treated uniquely somewhere in the
|
|
// regex for that to happen.
|
|
//
|
|
|
|
UINT const symAny = 1; // Any single character (except BEG/END LINE)
|
|
|
|
UINT const symBeginLine = 2; // Special character indicating beginning
|
|
// of line.
|
|
|
|
UINT const symEndLine = 3; // Special character indicating end of line.
|
|
|
|
UINT const symInvalid = 4; // Guaranteed invalid
|
|
|
|
UINT const symEpsilon = 5; // Epsilon move.
|
|
|
|
UINT const symDot = 6; // '.' Don't ask. It's for DOS compliance
|
|
|
|
UINT const cSpecialCharClasses = symDot; // 'normal' character classes
|
|
// start here.
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Class: CXlatChar
|
|
//
|
|
// Purpose: Maps UniCode characters to equivalence class(es).
|
|
//
|
|
// History: 20-Jan-92 KyleP Created
|
|
//
|
|
// Notes: Equivalence classes must consist of sequential characters.
|
|
// The implementation for this class is a sorted array of
|
|
// characters. Each character marks the end of a range. The
|
|
// class for a given character is found by binary searching
|
|
// the array until you end up in the appropriate range.
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
class CXlatChar
|
|
{
|
|
public:
|
|
|
|
CXlatChar( BOOLEAN fCaseSens );
|
|
|
|
CXlatChar( CXlatChar const & src );
|
|
|
|
inline ~CXlatChar();
|
|
|
|
void AddRange( WCHAR wcStart, WCHAR wcEnd );
|
|
|
|
UINT Translate( WCHAR wc ) const;
|
|
|
|
UINT TranslateRange( WCHAR wcStart, WCHAR wcEnd );
|
|
|
|
inline UINT NumClasses() const;
|
|
|
|
void Prepare();
|
|
|
|
private:
|
|
|
|
UINT _Search( WCHAR wc );
|
|
|
|
void _Realloc();
|
|
|
|
WCHAR * _pwcRangeEnd; // Character in position i is the end
|
|
// of the ith range.
|
|
UINT _cRange;
|
|
UINT _cAllocation;
|
|
|
|
UINT _iPrevRange;
|
|
|
|
BOOLEAN _fCaseSens; // TRUE if case sensitive mapping.
|
|
|
|
#if (CIDBG == 1)
|
|
BOOLEAN _fPrepared;
|
|
|
|
public:
|
|
|
|
//
|
|
// Debug methods
|
|
//
|
|
|
|
void Display() const;
|
|
|
|
#endif
|
|
};
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Member: CXlatChar::NumClasses, public
|
|
//
|
|
// Returns: The number of different equivalence classes.
|
|
//
|
|
// History: 20-Jan-92 KyleP Created
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
inline UINT CXlatChar::NumClasses() const
|
|
{
|
|
return( _cRange + cSpecialCharClasses );
|
|
}
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Member: CXlatChar::~CXlatChar, public
|
|
//
|
|
// Synopsis: Destroys class.
|
|
//
|
|
// History: 20-Jan-92 KyleP Created
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
inline CXlatChar::~CXlatChar()
|
|
{
|
|
delete _pwcRangeEnd;
|
|
}
|
|
|