Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

398 lines
9.6 KiB

  1. //+-------------------------------------------------------------------------
  2. //
  3. // Copyright (C) 1991, Microsoft Corporation.
  4. //
  5. // File: FA.hxx
  6. //
  7. // Contents: Non-deterministic finite automata
  8. //
  9. // Classes: CFA
  10. // CNFA
  11. // CDFA
  12. //
  13. // History: 20-Jan-92 KyleP Created
  14. // 19-Jun-92 KyleP Cleanup
  15. // 11-Mar-97 arunk Cleaned up for Kessel
  16. //
  17. //--------------------------------------------------------------------------
  18. #if !defined( __FA_HXX__ )
  19. #define __FA_HXX__
  20. #include <state.hxx>
  21. #include <xlatstat.hxx>
  22. #include <xlatchar.hxx>
  23. class CInternalPropertyRestriction;
  24. WCHAR const wcAnySingle = '?';
  25. WCHAR const wcAnyMultiple = '*';
  26. WCHAR const wcDOSDot = '.';
  27. WCHAR const wcRepeatZero = '*';
  28. WCHAR const wcRepeatOne = '+';
  29. WCHAR const wcRepeatZeroOrOne = '?';
  30. WCHAR const wcBeginRange = '[';
  31. WCHAR const wcEndRange = ']';
  32. WCHAR const wcInvertRange = '^';
  33. WCHAR const wcRangeSep = '-';
  34. WCHAR const wcEscape = '|';
  35. WCHAR const wcOr = ',';
  36. WCHAR const wcBeginParen = '(';
  37. WCHAR const wcEndParen = ')';
  38. WCHAR const wcBeginRepeat = '{';
  39. WCHAR const wcEndRepeat = '}';
  40. WCHAR const wcNextRepeat = ',';
  41. //
  42. // Note that these are the 'top level' special characters.
  43. // Characters *on or after* these characters may have special meaning.
  44. //
  45. WCHAR const awcSpecialRegex[] = L"?*.|";
  46. WCHAR const awcSpecialRegexReverse[] = L"?*.|+]),}";
  47. //+-------------------------------------------------------------------------
  48. //
  49. // Class: CFA
  50. //
  51. // Purpose: Base class for finite automata.
  52. //
  53. // History: 20-Jan-92 KyleP Created
  54. //
  55. //--------------------------------------------------------------------------
  56. class CFA
  57. {
  58. protected:
  59. inline CFA();
  60. CFA( CFA const & src );
  61. ~CFA();
  62. void Add( CFAState * pState );
  63. CFAState * Get( UINT iState );
  64. inline UINT Count();
  65. private:
  66. UINT _cTotal;
  67. CFAState ** _ppState;
  68. };
  69. //+-------------------------------------------------------------------------
  70. //
  71. // Class: CNFA
  72. //
  73. // Purpose: Non-deterministic finite automata.
  74. //
  75. // History: 20-Jan-92 Kylep Created
  76. //
  77. //--------------------------------------------------------------------------
  78. class CNFA
  79. {
  80. public:
  81. CNFA( WCHAR const * pwcs, bool fCaseSens );
  82. CNFA( CNFA const & src );
  83. ~CNFA();
  84. inline UINT StartState();
  85. void EpsClosure( UINT StateNum, CStateSet & ssOut );
  86. void EpsClosure( CStateSet & ssIn, CStateSet & ssOut );
  87. void Move( CStateSet & ssIn, CStateSet & ssOut, UINT symbol = symEpsilon );
  88. bool IsFinal( CStateSet & ss );
  89. inline CXlatChar & Translate();
  90. inline UINT NumStates() const;
  91. private:
  92. inline CNFAState * Get( UINT iState );
  93. void Parse( WCHAR const * wcs,
  94. UINT * iStart,
  95. UINT * iEnd,
  96. WCHAR const * * pwcsEnd = 0,
  97. WCHAR wcHalt = 0 );
  98. void ParseRepeat( WCHAR const * & wcs, unsigned & cRepeat1, unsigned & cRepeat2 );
  99. void FindCharClasses( WCHAR const * wcs );
  100. void Replicate( UINT iStart,
  101. UINT iEnd,
  102. UINT * piNewStart,
  103. UINT * piNewEnd );
  104. unsigned _iStart; // Start state
  105. unsigned _iNextState;
  106. static WCHAR * _wcsNull;
  107. CXlatChar _chars; // Wide character translator
  108. unsigned _cState; // Used during copy construction
  109. CNFAState * _pState; // State array.
  110. };
  111. //+-------------------------------------------------------------------------
  112. //
  113. // Class: CDFA
  114. //
  115. // Purpose: Deterministic finite automata.
  116. //
  117. // History: 20-Jan-92 Kylep Created
  118. //
  119. //--------------------------------------------------------------------------
  120. class CDFA : public CFA
  121. {
  122. public:
  123. CDFA( WCHAR const * pwcs, bool fCaseSens );
  124. CDFA( CDFA const & CDFA );
  125. ~CDFA();
  126. bool Recognize( WCHAR * wcs );
  127. private:
  128. void CommonCtor( );
  129. inline bool IsFinal( UINT state );
  130. inline UINT Move( UINT state, UINT sym );
  131. inline void AddTransition( UINT state, UINT sym, UINT newstate );
  132. inline bool IsComputed( UINT state );
  133. void Add( UINT state, bool fFinal );
  134. void Realloc();
  135. CNFA _nfa; // This must be the first member variable.
  136. CXlatState _xs; // Translate NFA state set to DFA state.
  137. UINT _stateStart; // Starting DFA state.
  138. UINT _cState; // Number of states
  139. UINT * _pStateTrans; // Array of state transitions.
  140. bool * _pStateFinal; // _pStateFinal[i] true if i is final state.
  141. };
  142. //+-------------------------------------------------------------------------
  143. //
  144. // Member: CFA::CFA, protected
  145. //
  146. // Synopsis: Intializes a generic finite automata.
  147. //
  148. // History: 20-Jan-92 KyleP Created
  149. //
  150. //--------------------------------------------------------------------------
  151. inline CFA::CFA()
  152. : _cTotal( 0 ),
  153. _ppState( 0 ){
  154. }
  155. //+-------------------------------------------------------------------------
  156. //
  157. // Member: CFA::Count, protected
  158. //
  159. // Synopsis: Returns the count of states.
  160. //
  161. // History: 20-Jan-92 KyleP Created
  162. //
  163. //--------------------------------------------------------------------------
  164. inline UINT CFA::Count()
  165. {
  166. return( _cTotal );
  167. }
  168. //+-------------------------------------------------------------------------
  169. //
  170. // Member: CNFA::Get, private
  171. //
  172. // Arguments: [iState] -- Index of state.
  173. //
  174. // Returns: The appropriate state.
  175. //
  176. // History: 20-Jan-92 Kylep Created
  177. //
  178. //--------------------------------------------------------------------------
  179. inline CNFAState * CNFA::Get( UINT iState )
  180. {
  181. if ( iState > _cState )
  182. {
  183. unsigned cNewState = iState + 10;
  184. CNFAState * pNewState = new CNFAState [cNewState];
  185. for ( unsigned i = 0; i < _cState; i++ )
  186. pNewState[i].Init( _pState[i] );
  187. for ( ; i < cNewState; i++ )
  188. pNewState[i].Init(i+1);
  189. delete [] _pState;
  190. _pState = pNewState;
  191. _cState = cNewState;
  192. }
  193. return &_pState[ iState - 1 ];
  194. }
  195. //+-------------------------------------------------------------------------
  196. //
  197. // Member: CNFA::StartState, private
  198. //
  199. // Returns: The start state.
  200. //
  201. // History: 20-Jan-92 Kylep Created
  202. //
  203. //--------------------------------------------------------------------------
  204. inline UINT CNFA::StartState()
  205. {
  206. return( _iStart );
  207. }
  208. //+-------------------------------------------------------------------------
  209. //
  210. // Member: CNFA::Translate, private
  211. //
  212. // Returns: The character translator.
  213. //
  214. // History: 20-Jan-92 Kylep Created
  215. //
  216. //--------------------------------------------------------------------------
  217. inline CXlatChar & CNFA::Translate()
  218. {
  219. return( _chars );
  220. }
  221. //+-------------------------------------------------------------------------
  222. //
  223. // Member: CNFA::NumStates, public
  224. //
  225. // Returns: The count of states currently in the automata.
  226. //
  227. // History: 20-Jan-92 Kylep Created
  228. //
  229. //--------------------------------------------------------------------------
  230. inline UINT CNFA::NumStates() const
  231. {
  232. return( _iNextState );
  233. }
  234. //+-------------------------------------------------------------------------
  235. //
  236. // Member: CDFA::IsFinal, public
  237. //
  238. // Arguments: [state] -- Index of state.
  239. //
  240. // Returns: true if state [state] is final.
  241. //
  242. // History: 20-Jan-92 Kylep Created
  243. //
  244. //--------------------------------------------------------------------------
  245. inline bool CDFA::IsFinal( UINT state )
  246. {
  247. return( _pStateFinal[ state ] );
  248. }
  249. //+-------------------------------------------------------------------------
  250. //
  251. // Member: CDFA::Move, public
  252. //
  253. // Arguments: [state] -- Index of state.
  254. // [sym] -- Input symbol
  255. //
  256. // Returns: The new state reached from state [state] on an input
  257. // symbol [sym].
  258. //
  259. // History: 20-Jan-92 Kylep Created
  260. //
  261. //--------------------------------------------------------------------------
  262. inline UINT CDFA::Move( UINT state, UINT sym )
  263. {
  264. return( _pStateTrans[state * (_nfa.Translate().NumClasses() + 1) + sym] );
  265. }
  266. //+-------------------------------------------------------------------------
  267. //
  268. // Member: CDFA::AddTransition, private
  269. //
  270. // Effects: Adds a transtion from state [state] on input symbol [sym]
  271. // to state [newstate].
  272. //
  273. // Arguments: [state] -- Index of state.
  274. // [sym] -- Input symbol.
  275. // [newstate] -- Index of state
  276. //
  277. // History: 20-Jan-92 Kylep Created
  278. //
  279. //--------------------------------------------------------------------------
  280. inline void CDFA::AddTransition( UINT state, UINT sym, UINT newstate )
  281. {
  282. _pStateTrans[ state * ( _nfa.Translate().NumClasses() + 1 ) + sym ] =
  283. newstate;
  284. }
  285. //+-------------------------------------------------------------------------
  286. //
  287. // Member: CDFA::IsComputed, private
  288. //
  289. // Arguments: [state] -- Index of state.
  290. //
  291. // Returns: true if the DFA contains a transition mapping for state
  292. // [state].
  293. //
  294. // History: 20-Jan-92 Kylep Created
  295. //
  296. // Notes: An uncomputed state is one for which IsFinal has not been
  297. // computed. All transitions other transitions are
  298. // automatically set to stateUncomputed at allocation time.
  299. //
  300. //--------------------------------------------------------------------------
  301. inline bool CDFA::IsComputed( UINT state )
  302. {
  303. return ( state <= _cState &&
  304. Move( state, 0 ) != stateUndefined );
  305. }
  306. #endif // __FA_HXX__