Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

456 lines
11 KiB

  1. //+-------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991-1998.
  5. //
  6. // File: FA.hxx
  7. //
  8. // Contents: Non-deterministic finite automata
  9. //
  10. // Classes: CFA
  11. // CNFA
  12. // CDFA
  13. //
  14. // History: 20-Jan-92 KyleP Created
  15. // 19-Jun-92 KyleP Cleanup
  16. //
  17. //--------------------------------------------------------------------------
  18. #pragma once
  19. #include <xpr.hxx>
  20. #include <state.hxx>
  21. #include <xlatstat.hxx>
  22. #include <xlatchar.hxx>
  23. #include <timlimit.hxx>
  24. class CInternalPropertyRestriction;
  25. WCHAR const wcAnySingle = '?';
  26. WCHAR const wcAnyMultiple = '*';
  27. WCHAR const wcDOSDot = '.';
  28. WCHAR const wcRepeatZero = '*';
  29. WCHAR const wcRepeatOne = '+';
  30. WCHAR const wcRepeatZeroOrOne = '?';
  31. WCHAR const wcBeginRange = '[';
  32. WCHAR const wcEndRange = ']';
  33. WCHAR const wcInvertRange = '^';
  34. WCHAR const wcRangeSep = '-';
  35. WCHAR const wcEscape = '|';
  36. WCHAR const wcOr = ',';
  37. WCHAR const wcBeginParen = '(';
  38. WCHAR const wcEndParen = ')';
  39. WCHAR const wcBeginRepeat = '{';
  40. WCHAR const wcEndRepeat = '}';
  41. WCHAR const wcNextRepeat = ',';
  42. WCHAR const wcLastValidChar = 0xFFFF;
  43. //
  44. // Note that these are the 'top level' special characters.
  45. // Characters *on or after* these characters may have special meaning.
  46. //
  47. WCHAR const awcSpecialRegex[] = L"?*.|";
  48. char const acSpecialRegex[] = "?*.|";
  49. WCHAR const awcSpecialRegexReverse[] = L"?*.|+]),}";
  50. //+-------------------------------------------------------------------------
  51. //
  52. // Class: CFA
  53. //
  54. // Purpose: Base class for finite automata.
  55. //
  56. // History: 20-Jan-92 KyleP Created
  57. //
  58. //--------------------------------------------------------------------------
  59. class CFA
  60. {
  61. protected:
  62. inline CFA();
  63. CFA( CFA const & src );
  64. ~CFA();
  65. void Add( CFAState * pState );
  66. CFAState * Get( unsigned iState );
  67. inline unsigned Count();
  68. private:
  69. unsigned _cTotal;
  70. CFAState ** _ppState;
  71. };
  72. //+-------------------------------------------------------------------------
  73. //
  74. // Class: CNFA
  75. //
  76. // Purpose: Non-deterministic finite automata.
  77. //
  78. // History: 20-Jan-92 Kylep Created
  79. //
  80. //--------------------------------------------------------------------------
  81. class CNFA
  82. {
  83. public:
  84. CNFA( WCHAR const * pwcs, BOOLEAN fCaseSens );
  85. CNFA( CNFA const & src );
  86. ~CNFA();
  87. inline unsigned StartState();
  88. void EpsClosure( unsigned StateNum, CStateSet & ssOut );
  89. void EpsClosure( CStateSet & ssIn, CStateSet & ssOut );
  90. void Move( CStateSet & ssIn, CStateSet & ssOut, unsigned symbol = symEpsilon );
  91. BOOLEAN IsFinal( CStateSet & ss );
  92. inline CXlatChar const & Translate() const;
  93. inline unsigned NumStates() const;
  94. private:
  95. inline CNFAState * Get( unsigned iState );
  96. void Parse( WCHAR const * wcs,
  97. unsigned * iStart,
  98. unsigned * iEnd,
  99. WCHAR const * * pwcsEnd = 0,
  100. WCHAR wcHalt = 0 );
  101. void ParseRepeat( WCHAR const * & wcs,
  102. unsigned & cRepeat1,
  103. unsigned & cRepeat2 );
  104. void FindCharClasses( WCHAR const * wcs );
  105. void Replicate( unsigned iStart,
  106. unsigned iEnd,
  107. unsigned * piNewStart,
  108. unsigned * piNewEnd );
  109. unsigned _iStart; // Start state
  110. unsigned _iNextState;
  111. static WCHAR * _wcsNull;
  112. CXlatChar _chars; // Wide character translator
  113. XArray<CNFAState> _aState; // State array.
  114. #if (CIDBG == 1)
  115. public:
  116. //
  117. // Debug methods.
  118. //
  119. void Display();
  120. #endif // (CIDBG == 1)
  121. };
  122. //+-------------------------------------------------------------------------
  123. //
  124. // Class: CDFA
  125. //
  126. // Purpose: Deterministic finite automata.
  127. //
  128. // History: 20-Jan-92 Kylep Created
  129. //
  130. //--------------------------------------------------------------------------
  131. class CDFA : public CFA
  132. {
  133. public:
  134. CDFA( WCHAR const * pwcs, CTimeLimit & timeLimit, BOOLEAN fCaseSens );
  135. CDFA( CDFA const & CDFA );
  136. ~CDFA();
  137. BOOLEAN Recognize( WCHAR const * wcs );
  138. private:
  139. void CommonCtor( );
  140. inline BOOLEAN IsFinal( unsigned state );
  141. inline unsigned Move( unsigned state, unsigned sym ) const;
  142. inline void AddTransition( unsigned state, unsigned sym, unsigned newstate );
  143. inline BOOLEAN IsComputed( unsigned state );
  144. void Add( unsigned state, BOOLEAN fFinal );
  145. void Realloc();
  146. # if CIDBG == 1
  147. void ValidateStateTransitions();
  148. # endif // CIDBG == 1
  149. CNFA _nfa; // This must be the first member variable.
  150. CXlatState _xs; // Translate NFA state set to DFA state.
  151. unsigned _stateStart; // Starting DFA state.
  152. unsigned _cState; // Number of states
  153. XArray<unsigned> _xStateTrans; // Array of state transitions.
  154. XArray<BOOLEAN> _xStateFinal; // _xStateFinal[i] TRUE if i is final state.
  155. CReadWriteAccess _rwa; // Locking.
  156. CTimeLimit & _timeLimit; // Execution time limit
  157. };
  158. //+-------------------------------------------------------------------------
  159. //
  160. // Class: CRegXpr (regx)
  161. //
  162. // Purpose: Performs regular expression matches on properties
  163. //
  164. // History: 15-Apr-92 KyleP Created
  165. //
  166. //--------------------------------------------------------------------------
  167. class CRegXpr : public CXpr
  168. {
  169. public:
  170. CRegXpr( CInternalPropertyRestriction * prst, CTimeLimit& timeLimit );
  171. CRegXpr( CRegXpr const & regxpr );
  172. virtual ~CRegXpr() {};
  173. virtual CXpr * Clone();
  174. virtual void SelectIndexing( CIndexStrategy & strategy );
  175. virtual BOOL IsMatch( CRetriever & obj );
  176. private:
  177. CXprPropertyValue _pxpval; // Retrieves value from database
  178. XPtr<CRestriction> _xrstContentHelper; // Use content indexing
  179. CStorageVariant _varPrefix; // Fixed prefix (for value indexing)
  180. CDFA _dfa; // Finite automata engine
  181. ULONG _ulCodePage; // Code page of system
  182. };
  183. //+-------------------------------------------------------------------------
  184. //
  185. // Member: CFA::CFA, protected
  186. //
  187. // Synopsis: Intializes a generic finite automata.
  188. //
  189. // History: 20-Jan-92 KyleP Created
  190. //
  191. //--------------------------------------------------------------------------
  192. inline CFA::CFA()
  193. : _cTotal( 0 ),
  194. _ppState( 0 )
  195. {
  196. }
  197. //+-------------------------------------------------------------------------
  198. //
  199. // Member: CFA::Count, protected
  200. //
  201. // Synopsis: Returns the count of states.
  202. //
  203. // History: 20-Jan-92 KyleP Created
  204. //
  205. //--------------------------------------------------------------------------
  206. inline unsigned CFA::Count()
  207. {
  208. return( _cTotal );
  209. }
  210. //+-------------------------------------------------------------------------
  211. //
  212. // Member: CNFA::Get, private
  213. //
  214. // Arguments: [iState] -- Index of state.
  215. //
  216. // Returns: The appropriate state.
  217. //
  218. // History: 20-Jan-92 Kylep Created
  219. //
  220. //--------------------------------------------------------------------------
  221. inline CNFAState * CNFA::Get( unsigned iState )
  222. {
  223. if ( iState > _aState.Count() )
  224. {
  225. unsigned cNewState = iState + 10;
  226. XArray<CNFAState> xState( cNewState );
  227. for ( unsigned i = 0; i < _aState.Count(); i++ )
  228. xState[i].Init( _aState[i] );
  229. for ( ; i < cNewState; i++ )
  230. xState[i].Init(i+1);
  231. _aState.Free();
  232. _aState.Set( cNewState, xState.Acquire() );
  233. }
  234. return &_aState[ iState - 1 ];
  235. }
  236. //+-------------------------------------------------------------------------
  237. //
  238. // Member: CNFA::StartState, private
  239. //
  240. // Returns: The start state.
  241. //
  242. // History: 20-Jan-92 Kylep Created
  243. //
  244. //--------------------------------------------------------------------------
  245. inline unsigned CNFA::StartState()
  246. {
  247. return( _iStart );
  248. }
  249. //+-------------------------------------------------------------------------
  250. //
  251. // Member: CNFA::Translate, private
  252. //
  253. // Returns: The character translator.
  254. //
  255. // History: 20-Jan-92 Kylep Created
  256. //
  257. //--------------------------------------------------------------------------
  258. inline CXlatChar const & CNFA::Translate() const
  259. {
  260. return( _chars );
  261. }
  262. //+-------------------------------------------------------------------------
  263. //
  264. // Member: CNFA::NumStates, public
  265. //
  266. // Returns: The count of states currently in the automata.
  267. //
  268. // History: 20-Jan-92 Kylep Created
  269. //
  270. //--------------------------------------------------------------------------
  271. inline unsigned CNFA::NumStates() const
  272. {
  273. return( _iNextState );
  274. }
  275. //+-------------------------------------------------------------------------
  276. //
  277. // Member: CDFA::IsFinal, public
  278. //
  279. // Arguments: [state] -- Index of state.
  280. //
  281. // Returns: TRUE if state [state] is final.
  282. //
  283. // History: 20-Jan-92 Kylep Created
  284. //
  285. //--------------------------------------------------------------------------
  286. inline BOOLEAN CDFA::IsFinal( unsigned state )
  287. {
  288. return( _xStateFinal[ state ] );
  289. }
  290. //+-------------------------------------------------------------------------
  291. //
  292. // Member: CDFA::Move, public
  293. //
  294. // Arguments: [state] -- Index of state.
  295. // [sym] -- Input symbol
  296. //
  297. // Returns: The new state reached from state [state] on an input
  298. // symbol [sym].
  299. //
  300. // History: 20-Jan-92 Kylep Created
  301. //
  302. // Notes: If this function is ever changed to modify data, then
  303. // you need to also investigate the locking in CDFA::Recognize.
  304. //
  305. //--------------------------------------------------------------------------
  306. inline unsigned CDFA::Move( unsigned state, unsigned sym ) const
  307. {
  308. return( _xStateTrans[state * (_nfa.Translate().NumClasses() + 1) + sym] );
  309. }
  310. //+-------------------------------------------------------------------------
  311. //
  312. // Member: CDFA::AddTransition, private
  313. //
  314. // Effects: Adds a transtion from state [state] on input symbol [sym]
  315. // to state [newstate].
  316. //
  317. // Arguments: [state] -- Index of state.
  318. // [sym] -- Input symbol.
  319. // [newstate] -- Index of state
  320. //
  321. // History: 20-Jan-92 Kylep Created
  322. //
  323. //--------------------------------------------------------------------------
  324. inline void CDFA::AddTransition( unsigned state, unsigned sym, unsigned newstate )
  325. {
  326. _xStateTrans[ state * ( _nfa.Translate().NumClasses() + 1 ) + sym ] =
  327. newstate;
  328. }
  329. //+-------------------------------------------------------------------------
  330. //
  331. // Member: CDFA::IsComputed, private
  332. //
  333. // Arguments: [state] -- Index of state.
  334. //
  335. // Returns: TRUE if the DFA contains a transition mapping for state
  336. // [state].
  337. //
  338. // History: 20-Jan-92 Kylep Created
  339. //
  340. // Notes: An uncomputed state is one for which IsFinal has not been
  341. // computed. All transitions other transitions are
  342. // automatically set to stateUncomputed at allocation time.
  343. //
  344. //--------------------------------------------------------------------------
  345. inline BOOLEAN CDFA::IsComputed( unsigned state )
  346. {
  347. return ( state <= _cState &&
  348. Move( state, 0 ) != stateUndefined );
  349. }