Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

721 lines
20 KiB

  1. /*++
  2. Copyright (c) 1989 Microsoft Corporation
  3. Module Name:
  4. name.cxx
  5. Abstract:
  6. Support for pattern-matching file names versus file specs, used by
  7. FtpFindFirstFile() to form the list of found files. Lifted from
  8. ntos\fsrtl\name.c, and trimmed to fit.
  9. This module was included in the ftphelp project because of the need
  10. for binary portability to Chicago.
  11. ---
  12. The unicode name support package is for manipulating unicode strings
  13. The routines allow the caller to dissect and compare strings.
  14. This package uses the same FSRTL_COMPARISON_RESULT typedef used by name.c
  15. The following routines are provided by this package:
  16. o MyFsRtlDissectName - removed
  17. o MyFsRtlColateNames - removed
  18. o MyFsRtlDoesNameContainsWildCards - This routine tells the caller if
  19. a string contains any wildcard characters.
  20. o MyFsRtlIsNameInExpression - This routine is used to compare a string
  21. against a template (possibly containing wildcards) to sees if the
  22. string is in the language denoted by the template.
  23. Author:
  24. Gary Kimura [GaryKi] 5-Feb-1990
  25. Revision History:
  26. Heath Hunnicutt [t-heathh] 13-Jul-1994
  27. --*/
  28. #include <wininetp.h>
  29. #include "ftpapih.h"
  30. #include "namep.h"
  31. //
  32. // Trace level for the module
  33. //
  34. #define Dbg (0x10000000)
  35. //
  36. // Some special debugging stuff
  37. //
  38. #if DBG
  39. extern ULONG DaveDebug;
  40. #define DavePrint if (DaveDebug) DbgPrint
  41. #else
  42. #define DavePrint NOTHING
  43. #endif
  44. //
  45. // Local support routine prototypes
  46. //
  47. BOOLEAN
  48. MyFsRtlIsNameInExpressionPrivate
  49. (
  50. IN ANSI_STRING *Expression,
  51. IN ANSI_STRING *Name
  52. );
  53. BOOLEAN
  54. MyFsRtlDoesNameContainWildCards
  55. (
  56. IN LPCSTR pszName
  57. )
  58. /*++
  59. Routine Description:
  60. This routine simply scans the input Name string looking for any Nt
  61. wild card characters.
  62. Arguments:
  63. Name - The string to check.
  64. Return Value:
  65. BOOLEAN - TRUE if one or more wild card characters was found.
  66. --*/
  67. {
  68. ULONG i;
  69. USHORT Length;
  70. // PAGED_CODE();
  71. //
  72. // Check each character in the name to see if it's a wildcard
  73. // character.
  74. //
  75. Length =(unsigned short) lstrlenA( pszName );
  76. for (i = 0; i < Length; i += 1) {
  77. //
  78. // check for a wild card character
  79. //
  80. if (MyFsRtlIsAnsiCharacterWild( pszName[i] )) {
  81. //
  82. // Tell caller that this name contains wild cards
  83. //
  84. return TRUE;
  85. }
  86. }
  87. //
  88. // No wildcard characters were found, so return to our caller
  89. //
  90. return FALSE;
  91. }
  92. //
  93. // The following routine is just a wrapper around
  94. // MyFsRtlIsNameInExpressionPrivate to make a last minute fix a bit safer.
  95. //
  96. BOOLEAN
  97. MyFsRtlIsNameInExpression
  98. (
  99. IN LPCSTR pszExpression,
  100. IN LPCSTR pszName,
  101. IN BOOLEAN IgnoreCase
  102. )
  103. {
  104. BOOLEAN Result=FALSE;
  105. ANSI_STRING Expression;
  106. ANSI_STRING Name;
  107. Name.Buffer = NewString( pszName );
  108. if ( Name.Buffer==NULL )
  109. {
  110. return( FALSE );
  111. }
  112. Expression.Buffer = NewString( pszExpression );
  113. if ( Expression.Buffer==NULL )
  114. {
  115. FREE_MEMORY( Name.Buffer );
  116. return( FALSE );
  117. }
  118. if ( IgnoreCase )
  119. {
  120. strupr( Name.Buffer );
  121. strupr( Expression.Buffer );
  122. }
  123. Name.Length = (unsigned short) lstrlenA( Name.Buffer );
  124. Name.MaximumLength = Name.Length;
  125. Expression.Length = (unsigned short) lstrlenA( Expression.Buffer );
  126. Expression.MaximumLength = Expression.Length;
  127. //
  128. // Now call the main routine, remembering to free the upcased string
  129. // if we allocated one.
  130. //
  131. __try {
  132. Result = MyFsRtlIsNameInExpressionPrivate( &Expression,
  133. &Name );
  134. } __finally {
  135. FREE_MEMORY(Name.Buffer);
  136. FREE_MEMORY(Expression.Buffer);
  137. }
  138. ENDFINALLY
  139. return Result;
  140. }
  141. #define MATCHES_ARRAY_SIZE 16
  142. //
  143. // Local support routine prototypes
  144. //
  145. BOOLEAN
  146. MyFsRtlIsNameInExpressionPrivate
  147. (
  148. IN ANSI_STRING *Expression,
  149. IN ANSI_STRING *Name
  150. )
  151. /*++
  152. Routine Description:
  153. This routine compares a Dbcs name and an expression and tells the caller
  154. if the name is in the language defined by the expression. The input name
  155. cannot contain wildcards, while the expression may contain wildcards.
  156. Expression wild cards are evaluated as shown in the nondeterministic
  157. finite automatons below. Note that ~* and ~? are DOS_STAR and DOS_QM.
  158. ~* is DOS_STAR, ~? is DOS_QM, and ~. is DOS_DOT
  159. S
  160. <-----<
  161. X | | e Y
  162. X * Y == (0)----->-(1)->-----(2)-----(3)
  163. S-.
  164. <-----<
  165. X | | e Y
  166. X ~* Y == (0)----->-(1)->-----(2)-----(3)
  167. X S S Y
  168. X ?? Y == (0)---(1)---(2)---(3)---(4)
  169. X . . Y
  170. X ~.~. Y == (0)---(1)----(2)------(3)---(4)
  171. | |________|
  172. | ^ |
  173. |_______________|
  174. ^EOF or .^
  175. X S-. S-. Y
  176. X ~?~? Y == (0)---(1)-----(2)-----(3)---(4)
  177. | |________|
  178. | ^ |
  179. |_______________|
  180. ^EOF or .^
  181. where S is any single character
  182. S-. is any single character except .
  183. e is a null character transition
  184. EOF is the end of the name string
  185. The last construction, ~? (the DOS question mark), can either match any
  186. single character, or upon encountering a period or end of input string,
  187. advances the expression to the end of the set of contiguous ~?s. This may
  188. seem somewhat convoluted, but is what DOS needs.
  189. Arguments:
  190. Expression - Supplies the input expression to check against
  191. (Caller must already upcase if passing CaseInsensitive TRUE.)
  192. Name - Supplies the input name to check for.
  193. CaseInsensitive - TRUE if Name should be Upcased before comparing.
  194. Return Value:
  195. BOOLEAN - TRUE if Name is an element in the set of strings denoted
  196. by the input Expression and FALSE otherwise.
  197. --*/
  198. {
  199. USHORT NameOffset;
  200. USHORT ExprOffset;
  201. ULONG SrcCount;
  202. ULONG DestCount;
  203. ULONG PreviousDestCount;
  204. ULONG MatchesCount;
  205. ULONG StartingNameOffset;
  206. CHAR NameChar, ExprChar;
  207. USHORT LocalBuffer[MATCHES_ARRAY_SIZE * 2];
  208. USHORT *AuxBuffer = NULL;
  209. USHORT *PreviousMatches;
  210. USHORT *CurrentMatches;
  211. USHORT MaxState;
  212. USHORT CurrentState;
  213. BOOLEAN NameFinished = FALSE;
  214. //
  215. // The idea behind the algorithm is pretty simple. We keep track of
  216. // all possible locations in the regular expression that are matching
  217. // the name. If when the name has been exhausted one of the locations
  218. // in the expression is also just exhausted, the name is in the language
  219. // defined by the regular expression.
  220. //
  221. // PAGED_CODE();
  222. INET_ASSERT(Name->Length != 0);
  223. INET_ASSERT(Expression->Length != 0);
  224. //
  225. // If one string is empty return FALSE. If both are empty return TRUE.
  226. //
  227. if ( (Name->Length == 0) || (Expression->Length == 0) ) {
  228. return (BOOLEAN)(!(Name->Length + Expression->Length));
  229. }
  230. //
  231. // Special case by far the most common wild card search of *, or *.*
  232. //
  233. if ((Expression->Length == 1 && Expression->Buffer[0] == '*')
  234. || (Expression->Length == 3 && memcmp(Expression->Buffer,"*.*",3)==0)) {
  235. return TRUE;
  236. }
  237. INET_ASSERT(MyFsRtlDoesNameContainWildCards(Expression->Buffer));
  238. //
  239. // Before special casing *X, we must special case *., as people tend
  240. // to use that expression incorrectly to mean "all files without
  241. // extensions." However, if this case, fails, it falls through to the
  242. // next special case, wherein files ending in dots match *.
  243. //
  244. if ( Expression->Length == 2 && memcmp(Expression->Buffer,"*.",2)==0 ) {
  245. PVOID pvDot;
  246. //
  247. // Attempt to find a dot in the name buffer. A dot would indicate
  248. // the presence of an extension.
  249. //
  250. pvDot = memchr( Name->Buffer, '.', Name->Length );
  251. //
  252. // If there is no dot, return that this name matches the expression "*."
  253. //
  254. if ( pvDot==NULL ) {
  255. return( TRUE );
  256. }
  257. }
  258. //
  259. // Also special case expressions of the form *X. With this and the prior
  260. // case we have covered virtually all normal queries.
  261. //
  262. if (Expression->Buffer[0] == '*') {
  263. //
  264. // Only special case an expression with a single *, recognized
  265. // by the fact that the tail contains no wildcards.
  266. //
  267. if ( !MyFsRtlDoesNameContainWildCards( Expression->Buffer + 1 ) ) {
  268. if (Name->Length < (USHORT)(Expression->Length - sizeof(CHAR))) {
  269. return FALSE;
  270. }
  271. //
  272. // Calculate the offset to the Name's tail.
  273. //
  274. StartingNameOffset = ( Name->Length - ( Expression->Length - 1 ) );
  275. //
  276. // Compare the tail of the expression with the name.
  277. //
  278. return( (BOOLEAN)
  279. memcmp( Expression->Buffer + 1,
  280. Name->Buffer + StartingNameOffset,
  281. Name->Length - StartingNameOffset ) == 0 );
  282. }
  283. }
  284. //
  285. // Walk through the name string, picking off characters. We go one
  286. // character beyond the end because some wild cards are able to match
  287. // zero characters beyond the end of the string.
  288. //
  289. // With each new name character we determine a new set of states that
  290. // match the name so far. We use two arrays that we swap back and forth
  291. // for this purpose. One array lists the possible expression states for
  292. // all name characters up to but not including the current one, and other
  293. // array is used to build up the list of states considering the current
  294. // name character as well. The arrays are then switched and the process
  295. // repeated.
  296. //
  297. // There is not a one-to-one correspondence between state number and
  298. // offset into the expression. This is evident from the NFAs in the
  299. // initial comment to this function. State numbering is not continuous.
  300. // This allows a simple conversion between state number and expression
  301. // offset. Each character in the expression can represent one or two
  302. // states. * and DOS_STAR generate two states: ExprOffset*2 and
  303. // ExprOffset*2 + 1. All other expreesion characters can produce only
  304. // a single state. Thus ExprOffset = State/2.
  305. //
  306. //
  307. // Here is a short description of the variables involved:
  308. //
  309. // NameOffset - The offset of the current name char being processed.
  310. //
  311. // ExprOffset - The offset of the current expression char being processed.
  312. //
  313. // SrcCount - Prior match being investigated with current name char
  314. //
  315. // DestCount - Next location to put a matching assuming current name char
  316. //
  317. // NameFinished - Allows one more itteration through the Matches array
  318. // after the name is exhusted (to come *s for example)
  319. //
  320. // PreviousDestCount - This is used to prevent entry duplication, see coment
  321. //
  322. // PreviousMatches - Holds the previous set of matches (the Src array)
  323. //
  324. // CurrentMatches - Holds the current set of matches (the Dest array)
  325. //
  326. // AuxBuffer, LocalBuffer - the storage for the Matches arrays
  327. //
  328. //
  329. // Set up the initial variables
  330. //
  331. PreviousMatches = &LocalBuffer[0];
  332. CurrentMatches = &LocalBuffer[MATCHES_ARRAY_SIZE];
  333. PreviousMatches[0] = 0;
  334. MatchesCount = 1;
  335. NameOffset = 0;
  336. MaxState = (USHORT)(Expression->Length * 2);
  337. while ( !NameFinished ) {
  338. if ( NameOffset < Name->Length ) {
  339. NameChar = Name->Buffer[ NameOffset ];
  340. NameOffset ++;
  341. } else {
  342. NameFinished = TRUE;
  343. //
  344. // if we have already exhasted the expression, cool. Don't
  345. // continue.
  346. //
  347. if ( PreviousMatches[MatchesCount-1] == MaxState ) {
  348. break;
  349. }
  350. }
  351. //
  352. // Now, for each of the previous stored expression matches, see what
  353. // we can do with this name character.
  354. //
  355. SrcCount = 0;
  356. DestCount = 0;
  357. PreviousDestCount = 0;
  358. while ( SrcCount < MatchesCount )
  359. {
  360. USHORT Length;
  361. //
  362. // We have to carry on our expression analysis as far as possible
  363. // for each character of name, so we loop here until the
  364. // expression stops matching. A clue here is that expression
  365. // cases that can match zero or more characters end with a
  366. // continue, while those that can accept only a single character
  367. // end with a break.
  368. //
  369. ExprOffset = (USHORT)((PreviousMatches[SrcCount++] + 1) / 2);
  370. Length = 0;
  371. while ( TRUE ) {
  372. INET_ASSERT(ExprOffset >= 0);
  373. INET_ASSERT(ExprOffset <= Expression->Length);
  374. if ( ExprOffset == Expression->Length ) {
  375. break;
  376. }
  377. //
  378. // The first time through the loop we don't want
  379. // to increment ExprOffset.
  380. //
  381. ExprOffset += Length;
  382. Length = sizeof(CHAR);
  383. CurrentState = (USHORT)(ExprOffset*2);
  384. if ( ExprOffset == Expression->Length ) {
  385. CurrentMatches[DestCount++] = MaxState;
  386. break;
  387. }
  388. ExprChar = Expression->Buffer[ExprOffset];
  389. //
  390. // Before we get started, we have to check for something
  391. // really gross. We may be about to exhaust the local
  392. // space for ExpressionMatches[][], so we have to allocate
  393. // some pool if this is the case. Yuk!
  394. //
  395. if ( (DestCount >= MATCHES_ARRAY_SIZE - 2) && (AuxBuffer == NULL) ) {
  396. ULONG ExpressionChars;
  397. ExpressionChars = Expression->Length / sizeof(CHAR);
  398. AuxBuffer = (USHORT *)
  399. ALLOCATE_MEMORY(LMEM_FIXED,
  400. (ExpressionChars + 1)
  401. * sizeof(USHORT) * 2 * 2
  402. );
  403. CopyMemory( AuxBuffer,
  404. CurrentMatches,
  405. MATCHES_ARRAY_SIZE * sizeof(USHORT) );
  406. CurrentMatches = AuxBuffer;
  407. CopyMemory( AuxBuffer + (ExpressionChars+1)*2,
  408. PreviousMatches,
  409. MATCHES_ARRAY_SIZE * sizeof(USHORT) );
  410. PreviousMatches = AuxBuffer + (ExpressionChars+1)*2;
  411. }
  412. //
  413. // * matches any character zero or more times.
  414. //
  415. if (ExprChar == '*') {
  416. CurrentMatches[DestCount++] = CurrentState;
  417. CurrentMatches[DestCount++] = CurrentState + 1;
  418. continue;
  419. }
  420. //
  421. // The following expreesion characters all match by consuming
  422. // a character, thus force the expression, and thus state
  423. // forward.
  424. //
  425. CurrentState += (USHORT)(sizeof(CHAR) * 2);
  426. //
  427. // A DOS_DOT can match either a period, or zero characters
  428. // beyond the end of name.
  429. //
  430. if (ExprChar == DOS_DOT) {
  431. INET_ASSERT(FALSE);
  432. if ( NameFinished ) {
  433. continue;
  434. }
  435. if (NameChar == '.') {
  436. CurrentMatches[DestCount++] = CurrentState;
  437. break;
  438. }
  439. }
  440. //
  441. // From this point on a name character is required to even
  442. // continue, let alone make a match.
  443. //
  444. if ( NameFinished ) {
  445. break;
  446. }
  447. //
  448. // If this expression was a '?' we can match it once.
  449. //
  450. if (ExprChar == '?') {
  451. CurrentMatches[DestCount++] = CurrentState;
  452. break;
  453. }
  454. //
  455. // Check if the expression char matches the name char
  456. //
  457. if ( !NameFinished && ExprChar == NameChar ) {
  458. CurrentMatches[DestCount++] = CurrentState;
  459. break;
  460. }
  461. //
  462. // The expression didn't match so go look at the next
  463. // previous match.
  464. //
  465. break;
  466. } // while() that tries to use up expression chars
  467. //
  468. // Prevent duplication in the destination array.
  469. //
  470. // Each of the arrays is montonically increasing and non-
  471. // duplicating, thus we skip over any source element in the src
  472. // array if we just added the same element to the destination
  473. // array. This guarentees non-duplication in the dest. array.
  474. //
  475. if ((SrcCount < MatchesCount) && (PreviousDestCount < DestCount) ) {
  476. while (PreviousDestCount < DestCount) {
  477. if ( PreviousMatches[SrcCount] < CurrentMatches[PreviousDestCount] ) {
  478. SrcCount += 1;
  479. }
  480. PreviousDestCount += 1;
  481. }
  482. }
  483. } // while() that uses up name chars
  484. //
  485. // If we found no matches in the just finished itteration, it's time
  486. // to bail.
  487. //
  488. if ( DestCount == 0 ) {
  489. if (AuxBuffer != NULL) {
  490. FREE_MEMORY(AuxBuffer);
  491. }
  492. return FALSE;
  493. }
  494. //
  495. // Swap the meaning the two arrays
  496. //
  497. {
  498. USHORT *Tmp;
  499. Tmp = PreviousMatches;
  500. PreviousMatches = CurrentMatches;
  501. CurrentMatches = Tmp;
  502. }
  503. MatchesCount = DestCount;
  504. }
  505. CurrentState = PreviousMatches[MatchesCount-1];
  506. if (AuxBuffer != NULL) {
  507. FREE_MEMORY(AuxBuffer);
  508. }
  509. return (BOOLEAN)(CurrentState == MaxState);
  510. }