Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

324 lines
12 KiB

  1. #define INCL_INETSRV_INCS
  2. #include "smtpinc.h"
  3. #include "wildmat.h"
  4. //---[ Description of the Wildmat standard ]-----------------------------------
  5. //
  6. // Taken from:
  7. //
  8. // INTERNET-DRAFT S. Barber
  9. // Expires: September 1, 1996 Academ Consulting Services
  10. // April 1996
  11. // Common NNTP Extensions
  12. // draft-barber-nntp-imp-03.txt
  13. //
  14. // The WILDMAT format was first developed by Rich Salz to provide
  15. // a uniform mechanism for matching patterns in the same manner
  16. // that the UNIX shell matches filenames. There are five pattern
  17. // matching operations other than a strict one-to-one match
  18. // between the pattern and the source to be checked for a match.
  19. // The first is an asterisk (*) to match any sequence of zero or
  20. // more characters. The second is a question mark (?) to match any
  21. // single character. The third specifies a specific set of
  22. // characters. The set is specified as a list of characters, or as
  23. // a range of characters where the beginning and end of the range
  24. // are separated by a minus (or dash) character, or as any
  25. // combination of lists and ranges. The dash can also be included
  26. // in the range as a character it if is the beginning or end of
  27. // the range. This set is enclosed in square brackets. The close
  28. // square bracket (]) may be used in a range if it is the first
  29. // character in the set. The fourth operation is the same as the
  30. // logical not of the third operation and is specified the same
  31. // way as the third with the addition of a caret character (^) at
  32. // the beginning of the test string just inside the open square
  33. // bracket. The final operation uses the backslash character to
  34. // invalidate the special meaning of the a open square bracket ([),
  35. // the asterisk, or the question mark.
  36. //
  37. // 3.3.1 Examples
  38. //
  39. // a. [^]-] -- matches any character other than a close square bracket
  40. // or a minus sign/dash.
  41. //
  42. // b. *bdc -- matches any string that ends with the string "bdc"
  43. // including the string "bdc" (without quotes).
  44. //
  45. // c. [0-9a-zA-Z] -- matches any string containing any alphanumeric string
  46. // in English.
  47. //
  48. // d. a??d -- matches any four character string which begins
  49. // with a and ends with d.
  50. //
  51. //-----------------------------------------------------------------------------
  52. //----[ NOTES ]----------------------------------------------------------------
  53. //
  54. // This function will raise an invalid access exception if either pszText
  55. // or pszPattern is invalid or not null terminated while dereferencing the
  56. // string. If this is possible, surround the call in a try-except block.
  57. //
  58. //-----------------------------------------------------------------------------
  59. //---[ Defines ]---------------------------------------------------------------
  60. #define STACK_SIZE 256
  61. //---[ HrMatchWildmat ]--------------------------------------------------------
  62. //
  63. // Description:
  64. //
  65. // Provides support for the "Wildmat" wild-card matching standard. See
  66. // description above.
  67. //
  68. // Params:
  69. //
  70. // pszText String to test
  71. // pszPattern Pattern to test against
  72. //
  73. // Returns:
  74. //
  75. // ERROR_SUCCESS If function succeeded, and match was found
  76. // ERROR_INVALID_PARAMETER Text or pattern string is invalid
  77. //
  78. // ERROR_CAN_NOT_COMPLETE Some other error occurred.
  79. //
  80. //
  81. //-----------------------------------------------------------------------------
  82. HRESULT HrMatchWildmat(const char* pszText, const char* pszPattern)
  83. {
  84. _ASSERT(pszText != NULL && pszPattern != NULL);
  85. BOOL fCharSet = FALSE; // TRUE if currently parsing a character set in a pattern
  86. BOOL fNegated = FALSE; // TRUE if there is a '^' at the beginning of the set
  87. BOOL fInSet = FALSE; // indicates when matching of a character set has completed
  88. // used to short-circuit character set evaluation
  89. int iStackPtr = 0; // stack pointer
  90. const char* textStack[STACK_SIZE]; // stack of text pointers
  91. const char* patternStack[STACK_SIZE]; // stack of pattern pointers
  92. // If the pattern consists solely of a * then any text will match
  93. if (strcmp(pszPattern, "*") == 0)
  94. return ERROR_SUCCESS;
  95. while (TRUE)
  96. {
  97. switch (*pszPattern)
  98. {
  99. case '*':
  100. if (fCharSet)
  101. goto DEFAULT; // according to unix solution this is not an error
  102. // If there is a * at the end of the pattern then at this point we are
  103. // sure that we got a match
  104. if (pszPattern[1] == '\0')
  105. return ERROR_SUCCESS;
  106. // We could write a simpler recursive wildmat function. Here we would
  107. // recursively call wildmat. Instead, for performance reasons this
  108. // solution is iterative.
  109. // Here we save the current values of the text pointer and stack pointer
  110. // on a stack and we leave the * in the pattern, with the effect of
  111. // matching one character with the *. The next time through the while
  112. // loop, the * will still be in the pattern, thus we will try to match
  113. // the rest of the input with this *. If it turns to fail, we go back
  114. // one character.
  115. // See the comments right before the BACK label below.
  116. if (*pszText != '\0')
  117. {
  118. if (iStackPtr == STACK_SIZE)
  119. return ERROR_CAN_NOT_COMPLETE; // stack overflow
  120. textStack[iStackPtr] = pszText; // save current text pointer
  121. patternStack[iStackPtr] = pszPattern; // save current pattern pointer
  122. iStackPtr++;
  123. pszPattern--; // leave * in the input pattern and match one character
  124. }
  125. break;
  126. case '?':
  127. if (fCharSet)
  128. goto DEFAULT; // according to unix solution this is not an error
  129. if (*pszText == '\0')
  130. goto BACK;
  131. break;
  132. case '[':
  133. if (fCharSet)
  134. return ERROR_INVALID_PARAMETER;
  135. fCharSet = TRUE; // beginning a character set
  136. fNegated = FALSE; // so far we haven't seen a '^'
  137. fInSet = FALSE; // used to short-circuit the evaluation of
  138. // membership to the character set
  139. // treat '^', '-' and ']' as special cases if they are
  140. // at the beginning of the character set (also "[^-a]" and "[^]a]")
  141. if (pszPattern[1] == '^')
  142. {
  143. fNegated = TRUE;
  144. pszPattern++;
  145. }
  146. // '-' and ']' are literals if they appear at the beggining of the set
  147. if (pszPattern[1] == '-' || pszPattern[1] == ']')
  148. {
  149. fInSet = (*pszText == pszPattern[1]);
  150. pszPattern++;
  151. }
  152. break;
  153. case ']':
  154. if (fCharSet)
  155. {
  156. if ((!fNegated && !fInSet) || (fNegated && fInSet))
  157. goto BACK;
  158. fCharSet = FALSE; // this marks the end of a character set
  159. }
  160. else
  161. {
  162. if (*pszText != *pszPattern)
  163. goto BACK;
  164. }
  165. break;
  166. case '-':
  167. if (fCharSet)
  168. {
  169. unsigned char startRange = pszPattern[-1]; // we use unsigned char
  170. unsigned char endRange; // to support extended
  171. unsigned char ch; // characters
  172. if (pszPattern[1] == '\0')
  173. return ERROR_INVALID_PARAMETER;
  174. else
  175. {
  176. if (pszPattern[1] == ']') // a dash at the end of the set is
  177. fInSet = (*pszText == '-'); // treated as a literal
  178. else
  179. { // we have a range
  180. if (pszPattern[1] == '\\') // escape character, skip it
  181. {
  182. pszPattern++;
  183. if (pszPattern[1] == '\0')
  184. return ERROR_INVALID_PARAMETER;
  185. }
  186. ch = *pszText;
  187. endRange = pszPattern[1];
  188. if (startRange > endRange)
  189. return ERROR_INVALID_PARAMETER;
  190. // here is where we could need unsigned characters
  191. fInSet = (ch >= startRange && ch <= endRange);
  192. pszPattern++;
  193. break;
  194. }
  195. }
  196. }
  197. else
  198. { // outside a character set '-' has no special meaning
  199. if (*pszText != *pszPattern)
  200. goto BACK;
  201. }
  202. break;
  203. case '\0': // end of the pattern
  204. if (fCharSet)
  205. return ERROR_INVALID_PARAMETER;
  206. if (*pszText == '\0')
  207. return ERROR_SUCCESS;
  208. else
  209. goto BACK;
  210. break;
  211. default:
  212. DEFAULT:
  213. if (*pszPattern == '\\')
  214. {
  215. pszPattern++; // escape character, treat the next character as a literal
  216. if (*pszPattern == '\0')
  217. return ERROR_INVALID_PARAMETER;
  218. }
  219. if (!fCharSet)
  220. { // any other character is treated as a literal
  221. if (*pszText != *pszPattern)
  222. goto BACK;
  223. }
  224. else
  225. {
  226. // the following if takes care of the two "special" cases:
  227. // [c-a] (we don't want to accept c), and
  228. // [c-] (we want to accept c)
  229. if (!(pszPattern[1] == '-' && pszPattern[2] != ']'))
  230. fInSet = (*pszText == *pszPattern);
  231. }
  232. break;
  233. } // switch
  234. pszPattern++;
  235. if (!fCharSet)
  236. {
  237. if (*pszText != '\0')
  238. pszText++;
  239. }
  240. else
  241. { // code to short-circuit character set evaluation
  242. if (fInSet) // skip the rest of the character set
  243. {
  244. while (*pszPattern != '\0' && *pszPattern != ']')
  245. {
  246. if (*pszPattern == '\\')
  247. { // escape character, treat the next character as a literal
  248. pszPattern++;
  249. if (*pszPattern == '\0')
  250. return ERROR_INVALID_PARAMETER;
  251. }
  252. pszPattern++;
  253. }
  254. }
  255. }
  256. continue; // the continue statement is to jump to the beginning of the loop,
  257. // we could have used used goto some label but that's what continue's
  258. // are for.
  259. // This is only reached by jumping to BACK.
  260. // This is equivalent to returning from a recursive solution of wildmat.
  261. // If the stack pointer is zero then the bottommost "recursive call" failed,
  262. // otherwise we "unwind one stack frame" and resume execution of the previous
  263. // call at the top of the while loop. Notice that since "recursive calls" are
  264. // only done when we find a '*' in the pattern outside a character set, the
  265. // value of fCharSet has to be set to false.
  266. BACK:
  267. if (iStackPtr == 0) // we exhausted all possibilities
  268. return ERROR_FILE_NOT_FOUND;
  269. iStackPtr--; // try matching no characters with the '*'
  270. pszText = textStack[iStackPtr];
  271. pszPattern = patternStack[iStackPtr] + 1; // eat the '*' matching no input characters
  272. fCharSet = FALSE; // this has to be the case
  273. } // while
  274. // should never get here
  275. _ASSERT(FALSE);
  276. }