Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

796 lines
22 KiB

  1. /*++
  2. Copyright (c) 1989 Microsoft Corporation
  3. Module Name:
  4. match.cpp
  5. Originally name.c by Gary Kimura
  6. Abstract:
  7. The unicode name support package is for manipulating unicode strings
  8. The routines allow the caller to dissect and compare strings.
  9. This package uses the same FSRTL_COMPARISON_RESULT typedef used by name.c
  10. The following routines are provided by this package:
  11. o FsRtlDissectName - This routine takes a path name string and breaks
  12. into two parts. The first name in the string and the remainder.
  13. It also checks that the first name is valid for an NT file.
  14. o FsRtlColateNames - This routine is used to colate directories
  15. according to lexical ordering. Lexical ordering is strict unicode
  16. numerical oerdering.
  17. o FsRtlDoesNameContainsWildCards - This routine tells the caller if
  18. a string contains any wildcard characters.
  19. o FsRtlIsNameInExpression - This routine is used to compare a string
  20. against a template (possibly containing wildcards) to sees if the
  21. string is in the language denoted by the template.
  22. Author:
  23. Gary Kimura [GaryKi] 5-Feb-1990
  24. Revision History:
  25. Stefan Steiner [SSteiner] 23-Mar-2000
  26. For use with fsdump - I tried to do as little changes to the actual
  27. matching code as possible.
  28. --*/
  29. #include "stdafx.h"
  30. //
  31. // Local support routine prototypes
  32. //
  33. static BOOLEAN
  34. FsRtlIsNameInExpressionPrivate (
  35. IN const CBsString& InExpression,
  36. IN const CBsString& InName,
  37. IN BOOLEAN IgnoreCase,
  38. IN PWCH UpcaseTable
  39. );
  40. static BOOLEAN
  41. FsRtlDoesNameContainWildCards (
  42. IN PUNICODE_STRING Name
  43. )
  44. /*++
  45. Routine Description:
  46. This routine simply scans the input Name string looking for any Nt
  47. wild card characters.
  48. Arguments:
  49. Name - The string to check.
  50. Return Value:
  51. BOOLEAN - TRUE if one or more wild card characters was found.
  52. --*/
  53. {
  54. INT i;
  55. //
  56. // The wildcards include the standard FsRtl wildcard characters
  57. //
  58. LPWSTR lpsz = ::wcspbrk( Name->Buffer, L"*?\"<>" );
  59. return (lpsz == NULL) ? FALSE : TRUE;
  60. }
  61. //
  62. // The following routine is just a wrapper around
  63. // FsRtlIsNameInExpressionPrivate to make a last minute fix a bit safer.
  64. //
  65. BOOLEAN
  66. FsdRtlIsNameInExpression (
  67. IN const CBsString& Expression, // Must be uppercased
  68. IN const CBsString& Name // Must be uppercased
  69. )
  70. {
  71. BOOLEAN Result;
  72. //
  73. // Now call the main routine, remembering to free the upcased string
  74. // if we allocated one.
  75. //
  76. Result = FsRtlIsNameInExpressionPrivate( Expression,
  77. Name,
  78. FALSE,
  79. NULL );
  80. return Result;
  81. }
  82. #define MATCHES_ARRAY_SIZE 16
  83. //
  84. // Local support routine prototypes
  85. //
  86. static BOOLEAN
  87. FsRtlIsNameInExpressionPrivate (
  88. IN const CBsString& InExpression,
  89. IN const CBsString& InName,
  90. IN BOOLEAN IgnoreCase,
  91. IN PWCH UpcaseTable
  92. )
  93. /*++
  94. Routine Description:
  95. This routine compares a Dbcs name and an expression and tells the caller
  96. if the name is in the language defined by the expression. The input name
  97. cannot contain wildcards, while the expression may contain wildcards.
  98. Expression wild cards are evaluated as shown in the nondeterministic
  99. finite automatons below. Note that ~* and ~? are DOS_STAR and DOS_QM.
  100. ~* is DOS_STAR, ~? is DOS_QM, and ~. is DOS_DOT
  101. S
  102. <-----<
  103. X | | e Y
  104. X * Y == (0)----->-(1)->-----(2)-----(3)
  105. S-.
  106. <-----<
  107. X | | e Y
  108. X ~* Y == (0)----->-(1)->-----(2)-----(3)
  109. X S S Y
  110. X ?? Y == (0)---(1)---(2)---(3)---(4)
  111. X . . Y
  112. X ~.~. Y == (0)---(1)----(2)------(3)---(4)
  113. | |________|
  114. | ^ |
  115. |_______________|
  116. ^EOF or .^
  117. X S-. S-. Y
  118. X ~?~? Y == (0)---(1)-----(2)-----(3)---(4)
  119. | |________|
  120. | ^ |
  121. |_______________|
  122. ^EOF or .^
  123. where S is any single character
  124. S-. is any single character except the final .
  125. e is a null character transition
  126. EOF is the end of the name string
  127. In words:
  128. * matches 0 or more characters.
  129. ? matches exactly 1 character.
  130. DOS_STAR matches 0 or more characters until encountering and matching
  131. the final . in the name.
  132. DOS_QM matches any single character, or upon encountering a period or
  133. end of name string, advances the expression to the end of the
  134. set of contiguous DOS_QMs.
  135. DOS_DOT matches either a . or zero characters beyond name string.
  136. Arguments:
  137. Expression - Supplies the input expression to check against
  138. (Caller must already upcase if passing CaseInsensitive TRUE.)
  139. Name - Supplies the input name to check for.
  140. CaseInsensitive - TRUE if Name should be Upcased before comparing.
  141. Return Value:
  142. BOOLEAN - TRUE if Name is an element in the set of strings denoted
  143. by the input Expression and FALSE otherwise.
  144. --*/
  145. {
  146. // The following code is to make use of most of this function without many
  147. // changes.
  148. UNICODE_STRING sExpression;
  149. UNICODE_STRING sName;
  150. PUNICODE_STRING Expression = &sExpression;
  151. PUNICODE_STRING Name = &sName;
  152. sExpression.Length = ( USHORT )( InExpression.GetLength() * sizeof( WCHAR ) );
  153. sExpression.MaximumLength = sExpression.Length;
  154. sExpression.Buffer = ( PWSTR )InExpression.c_str();
  155. sName.Length = ( USHORT )( InName.GetLength() * sizeof( WCHAR ) );
  156. sName.MaximumLength = sName.Length;
  157. sName.Buffer = ( PWSTR )InName.c_str();
  158. USHORT NameOffset;
  159. USHORT ExprOffset;
  160. ULONG SrcCount;
  161. ULONG DestCount;
  162. ULONG PreviousDestCount;
  163. ULONG MatchesCount;
  164. WCHAR NameChar, ExprChar;
  165. USHORT LocalBuffer[MATCHES_ARRAY_SIZE * 2];
  166. USHORT *AuxBuffer = NULL;
  167. USHORT *PreviousMatches;
  168. USHORT *CurrentMatches;
  169. USHORT MaxState;
  170. USHORT CurrentState;
  171. BOOLEAN NameFinished = FALSE;
  172. //
  173. // The idea behind the algorithm is pretty simple. We keep track of
  174. // all possible locations in the regular expression that are matching
  175. // the name. If when the name has been exhausted one of the locations
  176. // in the expression is also just exhausted, the name is in the language
  177. // defined by the regular expression.
  178. //
  179. //
  180. // If one string is empty return FALSE. If both are empty return TRUE.
  181. //
  182. if ( (Name->Length == 0) || (Expression->Length == 0) ) {
  183. return (BOOLEAN)(!(Name->Length + Expression->Length));
  184. }
  185. //
  186. // Special case by far the most common wild card search of *
  187. //
  188. if ((Expression->Length == 2) && (Expression->Buffer[0] == L'*')) {
  189. return TRUE;
  190. }
  191. ASSERT( !IgnoreCase || ARGUMENT_PRESENT(UpcaseTable) );
  192. //
  193. // Also special case expressions of the form *X. With this and the prior
  194. // case we have covered virtually all normal queries.
  195. //
  196. if (Expression->Buffer[0] == L'*') {
  197. UNICODE_STRING LocalExpression;
  198. LocalExpression = *Expression;
  199. LocalExpression.Buffer += 1;
  200. LocalExpression.Length -= 2;
  201. //
  202. // Only special case an expression with a single *
  203. //
  204. if ( !FsRtlDoesNameContainWildCards( &LocalExpression ) ) {
  205. ULONG StartingNameOffset;
  206. if (Name->Length < (USHORT)(Expression->Length - sizeof(WCHAR))) {
  207. return FALSE;
  208. }
  209. StartingNameOffset = ( Name->Length -
  210. LocalExpression.Length ) / sizeof(WCHAR);
  211. //
  212. // Do a simple memory compare if case sensitive, otherwise
  213. // we have got to check this one character at a time.
  214. //
  215. if ( !IgnoreCase ) {
  216. return (BOOLEAN) RtlEqualMemory( LocalExpression.Buffer,
  217. Name->Buffer + StartingNameOffset,
  218. LocalExpression.Length );
  219. } else {
  220. for ( ExprOffset = 0;
  221. ExprOffset < (USHORT)(LocalExpression.Length / sizeof(WCHAR));
  222. ExprOffset += 1 ) {
  223. NameChar = Name->Buffer[StartingNameOffset + ExprOffset];
  224. NameChar = UpcaseTable[NameChar];
  225. ExprChar = LocalExpression.Buffer[ExprOffset];
  226. ASSERT( ExprChar == UpcaseTable[ExprChar] );
  227. if ( NameChar != ExprChar ) {
  228. return FALSE;
  229. }
  230. }
  231. return TRUE;
  232. }
  233. }
  234. }
  235. //
  236. // Walk through the name string, picking off characters. We go one
  237. // character beyond the end because some wild cards are able to match
  238. // zero characters beyond the end of the string.
  239. //
  240. // With each new name character we determine a new set of states that
  241. // match the name so far. We use two arrays that we swap back and forth
  242. // for this purpose. One array lists the possible expression states for
  243. // all name characters up to but not including the current one, and other
  244. // array is used to build up the list of states considering the current
  245. // name character as well. The arrays are then switched and the process
  246. // repeated.
  247. //
  248. // There is not a one-to-one correspondence between state number and
  249. // offset into the expression. This is evident from the NFAs in the
  250. // initial comment to this function. State numbering is not continuous.
  251. // This allows a simple conversion between state number and expression
  252. // offset. Each character in the expression can represent one or two
  253. // states. * and DOS_STAR generate two states: ExprOffset*2 and
  254. // ExprOffset*2 + 1. All other expreesion characters can produce only
  255. // a single state. Thus ExprOffset = State/2.
  256. //
  257. //
  258. // Here is a short description of the variables involved:
  259. //
  260. // NameOffset - The offset of the current name char being processed.
  261. //
  262. // ExprOffset - The offset of the current expression char being processed.
  263. //
  264. // SrcCount - Prior match being investigated with current name char
  265. //
  266. // DestCount - Next location to put a matching assuming current name char
  267. //
  268. // NameFinished - Allows one more itteration through the Matches array
  269. // after the name is exhusted (to come *s for example)
  270. //
  271. // PreviousDestCount - This is used to prevent entry duplication, see coment
  272. //
  273. // PreviousMatches - Holds the previous set of matches (the Src array)
  274. //
  275. // CurrentMatches - Holds the current set of matches (the Dest array)
  276. //
  277. // AuxBuffer, LocalBuffer - the storage for the Matches arrays
  278. //
  279. //
  280. // Set up the initial variables
  281. //
  282. PreviousMatches = &LocalBuffer[0];
  283. CurrentMatches = &LocalBuffer[MATCHES_ARRAY_SIZE];
  284. PreviousMatches[0] = 0;
  285. MatchesCount = 1;
  286. NameOffset = 0;
  287. MaxState = (USHORT)(Expression->Length * 2);
  288. while ( !NameFinished ) {
  289. if ( NameOffset < Name->Length ) {
  290. NameChar = Name->Buffer[NameOffset / sizeof(WCHAR)];
  291. NameOffset += sizeof(WCHAR);;
  292. } else {
  293. NameFinished = TRUE;
  294. //
  295. // if we have already exhasted the expression, cool. Don't
  296. // continue.
  297. //
  298. if ( PreviousMatches[MatchesCount-1] == MaxState ) {
  299. break;
  300. }
  301. }
  302. //
  303. // Now, for each of the previous stored expression matches, see what
  304. // we can do with this name character.
  305. //
  306. SrcCount = 0;
  307. DestCount = 0;
  308. PreviousDestCount = 0;
  309. while ( SrcCount < MatchesCount ) {
  310. USHORT Length;
  311. //
  312. // We have to carry on our expression analysis as far as possible
  313. // for each character of name, so we loop here until the
  314. // expression stops matching. A clue here is that expression
  315. // cases that can match zero or more characters end with a
  316. // continue, while those that can accept only a single character
  317. // end with a break.
  318. //
  319. ExprOffset = (USHORT)((PreviousMatches[SrcCount++] + 1) / 2);
  320. Length = 0;
  321. while ( TRUE ) {
  322. if ( ExprOffset == Expression->Length ) {
  323. break;
  324. }
  325. //
  326. // The first time through the loop we don't want
  327. // to increment ExprOffset.
  328. //
  329. ExprOffset += Length;
  330. Length = sizeof(WCHAR);
  331. CurrentState = (USHORT)(ExprOffset * 2);
  332. if ( ExprOffset == Expression->Length ) {
  333. CurrentMatches[DestCount++] = MaxState;
  334. break;
  335. }
  336. ExprChar = Expression->Buffer[ExprOffset / sizeof(WCHAR)];
  337. ASSERT( !IgnoreCase || !((ExprChar >= L'a') && (ExprChar <= L'z')) );
  338. //
  339. // Before we get started, we have to check for something
  340. // really gross. We may be about to exhaust the local
  341. // space for ExpressionMatches[][], so we have to allocate
  342. // some pool if this is the case. Yuk!
  343. //
  344. if ( (DestCount >= MATCHES_ARRAY_SIZE - 2) &&
  345. (AuxBuffer == NULL) ) {
  346. ULONG ExpressionChars;
  347. ExpressionChars = Expression->Length / sizeof(WCHAR);
  348. AuxBuffer = ( USHORT *)malloc(
  349. (ExpressionChars+1) *
  350. sizeof(USHORT)*2*2 );
  351. if ( AuxBuffer == NULL ) // fix a future prefix bug
  352. throw E_OUTOFMEMORY;
  353. RtlCopyMemory( AuxBuffer,
  354. CurrentMatches,
  355. MATCHES_ARRAY_SIZE * sizeof(USHORT) );
  356. CurrentMatches = AuxBuffer;
  357. RtlCopyMemory( AuxBuffer + (ExpressionChars+1)*2,
  358. PreviousMatches,
  359. MATCHES_ARRAY_SIZE * sizeof(USHORT) );
  360. PreviousMatches = AuxBuffer + (ExpressionChars+1)*2;
  361. }
  362. //
  363. // * matches any character zero or more times.
  364. //
  365. if (ExprChar == L'*') {
  366. CurrentMatches[DestCount++] = CurrentState;
  367. CurrentMatches[DestCount++] = CurrentState + 3;
  368. continue;
  369. }
  370. //
  371. // DOS_STAR matches any character except . zero or more times.
  372. //
  373. if (ExprChar == DOS_STAR) {
  374. BOOLEAN ICanEatADot = FALSE;
  375. //
  376. // If we are at a period, determine if we are allowed to
  377. // consume it, ie. make sure it is not the last one.
  378. //
  379. if ( !NameFinished && (NameChar == '.') ) {
  380. USHORT Offset;
  381. for ( Offset = NameOffset;
  382. Offset < Name->Length;
  383. Offset += Length ) {
  384. if (Name->Buffer[Offset / sizeof(WCHAR)] == L'.') {
  385. ICanEatADot = TRUE;
  386. break;
  387. }
  388. }
  389. }
  390. if (NameFinished || (NameChar != L'.') || ICanEatADot) {
  391. CurrentMatches[DestCount++] = CurrentState;
  392. CurrentMatches[DestCount++] = CurrentState + 3;
  393. continue;
  394. } else {
  395. //
  396. // We are at a period. We can only match zero
  397. // characters (ie. the epsilon transition).
  398. //
  399. CurrentMatches[DestCount++] = CurrentState + 3;
  400. continue;
  401. }
  402. }
  403. //
  404. // The following expreesion characters all match by consuming
  405. // a character, thus force the expression, and thus state
  406. // forward.
  407. //
  408. CurrentState += (USHORT)(sizeof(WCHAR) * 2);
  409. //
  410. // DOS_QM is the most complicated. If the name is finished,
  411. // we can match zero characters. If this name is a '.', we
  412. // don't match, but look at the next expression. Otherwise
  413. // we match a single character.
  414. //
  415. if ( ExprChar == DOS_QM ) {
  416. if ( NameFinished || (NameChar == L'.') ) {
  417. continue;
  418. }
  419. CurrentMatches[DestCount++] = CurrentState;
  420. break;
  421. }
  422. //
  423. // A DOS_DOT can match either a period, or zero characters
  424. // beyond the end of name.
  425. //
  426. if (ExprChar == DOS_DOT) {
  427. if ( NameFinished ) {
  428. continue;
  429. }
  430. if (NameChar == L'.') {
  431. CurrentMatches[DestCount++] = CurrentState;
  432. break;
  433. }
  434. }
  435. //
  436. // From this point on a name character is required to even
  437. // continue, let alone make a match.
  438. //
  439. if ( NameFinished ) {
  440. break;
  441. }
  442. //
  443. // If this expression was a '?' we can match it once.
  444. //
  445. if (ExprChar == L'?') {
  446. CurrentMatches[DestCount++] = CurrentState;
  447. break;
  448. }
  449. //
  450. // Finally, check if the expression char matches the name char
  451. //
  452. if (ExprChar == (WCHAR)(IgnoreCase ?
  453. UpcaseTable[NameChar] : NameChar)) {
  454. CurrentMatches[DestCount++] = CurrentState;
  455. break;
  456. }
  457. //
  458. // The expression didn't match so go look at the next
  459. // previous match.
  460. //
  461. break;
  462. }
  463. //
  464. // Prevent duplication in the destination array.
  465. //
  466. // Each of the arrays is montonically increasing and non-
  467. // duplicating, thus we skip over any source element in the src
  468. // array if we just added the same element to the destination
  469. // array. This guarentees non-duplication in the dest. array.
  470. //
  471. if ((SrcCount < MatchesCount) &&
  472. (PreviousDestCount < DestCount) ) {
  473. while (PreviousDestCount < DestCount) {
  474. while ( PreviousMatches[SrcCount] <
  475. CurrentMatches[PreviousDestCount] ) {
  476. SrcCount += 1;
  477. }
  478. PreviousDestCount += 1;
  479. }
  480. }
  481. }
  482. //
  483. // If we found no matches in the just finished itteration, it's time
  484. // to bail.
  485. //
  486. if ( DestCount == 0 ) {
  487. if (AuxBuffer != NULL) { free( AuxBuffer ); }
  488. return FALSE;
  489. }
  490. //
  491. // Swap the meaning the two arrays
  492. //
  493. {
  494. USHORT *Tmp;
  495. Tmp = PreviousMatches;
  496. PreviousMatches = CurrentMatches;
  497. CurrentMatches = Tmp;
  498. }
  499. MatchesCount = DestCount;
  500. }
  501. CurrentState = PreviousMatches[MatchesCount-1];
  502. if (AuxBuffer != NULL) { free( AuxBuffer ); }
  503. return (BOOLEAN)(CurrentState == MaxState);
  504. }
  505. /*++
  506. Routine Description:
  507. Converts some wildcard chars to FsRtl special wildcards
  508. in order to use FsRtlIsNameInExpressionPrivate.
  509. Code originally from filefind.c in Win32 subsystem by MarkL.
  510. Arguments:
  511. Return Value:
  512. <Enter return values here>
  513. --*/
  514. VOID
  515. FsdRtlConvertWildCards(
  516. IN OUT CBsString &FileName
  517. )
  518. {
  519. //
  520. // Special case *.* to * since it is so common. Otherwise transmogrify
  521. // the input name according to the following rules:
  522. //
  523. // - Change all ? to DOS_QM
  524. // - Change all . followed by ? or * to DOS_DOT
  525. // - Change all * followed by a . into DOS_STAR
  526. //
  527. // These transmogrifications are all done in place.
  528. //
  529. if ( FileName == L"*.*") {
  530. FileName = L"*";
  531. } else {
  532. INT Index;
  533. WCHAR *NameChar;
  534. for ( Index = 0, NameChar = (WCHAR *)FileName.c_str();
  535. Index < FileName.GetLength();
  536. Index += 1, NameChar += 1) {
  537. if (Index && (*NameChar == L'.') && (*(NameChar - 1) == L'*')) {
  538. *(NameChar - 1) = DOS_STAR;
  539. }
  540. if ((*NameChar == L'?') || (*NameChar == L'*')) {
  541. if (*NameChar == L'?')
  542. {
  543. *NameChar = DOS_QM;
  544. }
  545. if (Index && *(NameChar-1) == L'.')
  546. {
  547. *(NameChar-1) = DOS_DOT;
  548. }
  549. }
  550. }
  551. if ( ( FileName.Right( 1 ) == L"." ) && *(NameChar - 1) == L'*')
  552. {
  553. *(NameChar-1) = DOS_STAR;
  554. }
  555. }
  556. }
  557.