You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
135 lines
3.6 KiB
135 lines
3.6 KiB
//+-------------------------------------------------------------------------
|
|
//
|
|
// Copyright (C) 1991, Microsoft Corporation.
|
|
//
|
|
// File: XlatChar.hxx
|
|
//
|
|
// Contents: Character translation class.
|
|
//
|
|
// Classes: CXlatChar
|
|
//
|
|
// History: 01-20-92 KyleP Created
|
|
// 03-11-97 arunk Modified for regex lib
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
#ifndef __XLATCHAR_HXX__
|
|
#define __XLATCHAR_HXX__
|
|
|
|
|
|
#include <windows.h>
|
|
|
|
//
|
|
// Special character equivalence classes. I suppose in theory these
|
|
// could doubly map to character classes in the regex but nearly
|
|
// every unicode symbol must be treated uniquely somewhere in the
|
|
// regex for that to happen.
|
|
//
|
|
|
|
UINT const symAny = 1; // Any single character (except BEG/END LINE)
|
|
|
|
UINT const symBeginLine = 2; // Special character indicating beginning
|
|
// of line.
|
|
|
|
UINT const symEndLine = 3; // Special character indicating end of line.
|
|
|
|
UINT const symInvalid = 4; // Guaranteed invalid
|
|
|
|
UINT const symEpsilon = 5; // Epsilon move.
|
|
|
|
UINT const symDot = 6; // '.' Don't ask. It's for DOS compliance
|
|
|
|
UINT const cSpecialCharClasses = symDot; // 'normal' character classes
|
|
// start here.
|
|
|
|
//
|
|
// Wide character values less than or equal to symLastValidCharacter have
|
|
// no special signifigance.
|
|
//
|
|
|
|
UINT const symLastValidChar = 0xffffffff;
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Class: CXlatChar
|
|
//
|
|
// Purpose: Maps UniCode characters to equivalence class(es).
|
|
//
|
|
// History: 20-Jan-92 KyleP Created
|
|
//
|
|
// Notes: Equivalence classes must consist of sequential characters.
|
|
// The implementation for this class is a sorted array of
|
|
// characters. Each character marks the end of a range. The
|
|
// class for a given character is found by binary searching
|
|
// the array until you end up in the appropriate range.
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
class CXlatChar
|
|
{
|
|
public:
|
|
|
|
CXlatChar( bool fCaseSens );
|
|
|
|
CXlatChar( CXlatChar const & src );
|
|
|
|
inline ~CXlatChar();
|
|
|
|
void AddRange( WCHAR wcStart, WCHAR wcEnd );
|
|
|
|
UINT Translate( WCHAR wc );
|
|
|
|
UINT TranslateRange( WCHAR wcStart, WCHAR wcEnd );
|
|
|
|
inline UINT NumClasses() const;
|
|
|
|
void Prepare();
|
|
|
|
private:
|
|
|
|
UINT _Search( WCHAR wc );
|
|
|
|
void _Realloc();
|
|
|
|
WCHAR * _pwcRangeEnd; // Character in position i is the end
|
|
// of the ith range.
|
|
UINT _cRange;
|
|
UINT _cAllocation;
|
|
|
|
UINT _iPrevRange;
|
|
|
|
bool _fCaseSens; // true if case sensitive mapping.
|
|
|
|
};
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Member: CXlatChar::NumClasses, public
|
|
//
|
|
// Returns: The number of different equivalence classes.
|
|
//
|
|
// History: 20-Jan-92 KyleP Created
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
inline UINT CXlatChar::NumClasses() const
|
|
{
|
|
return( _cRange + cSpecialCharClasses );
|
|
}
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Member: CXlatChar::~CXlatChar, public
|
|
//
|
|
// Synopsis: Destroys class.
|
|
//
|
|
// History: 20-Jan-92 KyleP Created
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
inline CXlatChar::~CXlatChar()
|
|
{
|
|
delete _pwcRangeEnd;
|
|
}
|
|
|
|
#endif //__XLATCHAR_HXX__
|