Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

324 lines
9.6 KiB

  1. //---------------------------------------------------------------------------
  2. // Copyright (C) Microsoft Corporation, 1997 - 1999
  3. //
  4. // regexp.c
  5. //
  6. // Simple regular expression matching.
  7. //
  8. // Author:
  9. // 06-02-97 Edward Reus Initial version.
  10. //
  11. //---------------------------------------------------------------------------
  12. #include <sysinc.h>
  13. #include <mbstring.h>
  14. #include "regexp.h"
  15. //-------------------------------------------------------------------------
  16. // MatchREi()
  17. //
  18. // Match the test string (pszString) against the specifed pattern. If they
  19. // match return TRUE, else return FALSE. This version works with ANSI
  20. // characters and is case independent.
  21. //
  22. // In this function patterns are made up from "literal" characters plus
  23. // some control characters, "*", "?". Asterix (*) is a place
  24. // holder for "zero or more" of any character. Question Mark (?) is a place
  25. // holder for "any single character". The square brackets ([]) contain a
  26. // list of matching characters, in this case "-" is used to denote a range
  27. // of characters (i.e. [A-Z] matches any alpha character), but I didn't
  28. // pub brackets in this one yet...
  29. //
  30. //-------------------------------------------------------------------------
  31. BOOL MatchREi( unsigned char *pszString,
  32. unsigned char *pszPattern )
  33. {
  34. unsigned char *pchRangeLow;
  35. while (TRUE)
  36. {
  37. // Walk throuh the pattern, matching it against the string.
  38. switch (*pszPattern)
  39. {
  40. case '*':
  41. // Match zero or more characters.
  42. pszPattern = _mbsinc(pszPattern);
  43. while (*pszString)
  44. {
  45. if (MatchREi(pszString,pszPattern))
  46. {
  47. return TRUE;
  48. }
  49. pszString = _mbsinc(pszString);
  50. }
  51. return MatchREi(pszString,pszPattern);
  52. case '?':
  53. // Match any single character.
  54. if (*pszString == 0)
  55. {
  56. // Not at end of string, so no match.
  57. return FALSE;
  58. }
  59. pszString = _mbsinc(pszString);
  60. break;
  61. #if FALSE
  62. case '[':
  63. // Match a set of characters.
  64. if (*pszString == 0)
  65. {
  66. // Syntax error, no matching close bracket "]".
  67. return FALSE;
  68. }
  69. pchRangeLow = 0;
  70. while (*pszPattern)
  71. {
  72. if (*pszPattern == ']')
  73. {
  74. // End of char set, no match found.
  75. return FALSE;
  76. }
  77. if (*pszPattern == '-')
  78. {
  79. // check a range of chars?
  80. pszPattern = _mbsinc(pszPattern);
  81. // get high limit of range:
  82. if ((*pszPattern == 0)||(*pszPattern == ']'))
  83. {
  84. // Syntax error.
  85. return FALSE;
  86. }
  87. if ( (_mbsnicoll(pszString,pchRangeLow,1) >= 0)
  88. &&(_mbsnicoll(pszString,pszPattern,1) <= 0))
  89. {
  90. // In range, go to next character.
  91. break;
  92. }
  93. }
  94. pchRangeLow = pchPattern;
  95. // See if character matches this pattern element.
  96. if (_mbsnicoll(pszString,pszPattern,1) == 0)
  97. {
  98. // Character match, go on.
  99. break;
  100. }
  101. pszPattern = _mbsinc(pszPattern);
  102. }
  103. // Have a match in the character set, skip to the end of the set.
  104. while ((*pszPattern != 0)&&(*pszPattern != ']'))
  105. {
  106. pszPattern = _mbsinc(pszPattern);
  107. }
  108. break;
  109. #endif
  110. case 0:
  111. // End of pattern, return TRUE if at end of string.
  112. return ((*pszString)? FALSE : TRUE);
  113. default:
  114. // Check for exact character match.
  115. if (_mbsnicoll(pszString,pszPattern,1))
  116. {
  117. // No match.
  118. return FALSE;
  119. }
  120. pszString = _mbsinc(pszString);
  121. break;
  122. }
  123. pszPattern = _mbsinc(pszPattern);
  124. }
  125. // Can never exit from here.
  126. }
  127. #if FALSE
  128. ... not currently used ...
  129. //-------------------------------------------------------------------------
  130. // MatchRE()
  131. //
  132. // Match the test string (pszString) against the specifed pattern. If they
  133. // match return TRUE, else return FALSE.
  134. //
  135. // In this function patterns are made up from "literal" characters plus
  136. // some control characters, "*", "?", "[" and "]". Asterix (*) is a place
  137. // holder for "zero or more" of any character. Question Mark (?) is a place
  138. // holder for "any single character". The square brackets ([]) contain a
  139. // list of matching characters, in this case "-" is used to denote a range
  140. // of characters (i.e. [a-zA-Z] matches any alpha character).
  141. //
  142. // Note: Currently there is no support for "or" (|) operator.
  143. //
  144. // Note: Ranges are simple, there is no support for dash at the begining
  145. // of a range to denote the dash itself.
  146. //-------------------------------------------------------------------------
  147. BOOL MatchRE( unsigned char *pszString,
  148. unsigned char *pszPattern )
  149. {
  150. unsigned char ch;
  151. unsigned char chPattern;
  152. unsigned char chRangeLow;
  153. while (TRUE)
  154. {
  155. // Walk throuh the pattern, matching it against the string.
  156. switch (chPattern = *pszPattern++)
  157. {
  158. case '*':
  159. // Match zero or more characters.
  160. while (*pszString)
  161. {
  162. if (MatchRE(pszString++,pszPattern))
  163. {
  164. return TRUE;
  165. }
  166. }
  167. return MatchRE(pszString,pszPattern);
  168. case '?':
  169. // Match any single character.
  170. if (*pszString++ == 0)
  171. {
  172. // Not at end of string, so no match.
  173. return FALSE;
  174. }
  175. break;
  176. case '[':
  177. // Match a set of characters.
  178. if ( (ch = *pszString++) == 0)
  179. {
  180. // Syntax error, no matching close bracket "]".
  181. return FALSE;
  182. }
  183. // ch = toupper(ch);
  184. chRangeLow = 0;
  185. while (chPattern = *pszPattern++)
  186. {
  187. if (chPattern == ']')
  188. {
  189. // End of char set, no match found.
  190. return FALSE;
  191. }
  192. if (chPattern == '-')
  193. {
  194. // check a range of chars?
  195. chPattern = *pszPattern; // get high limit of range
  196. if ((chPattern == 0)||(chPattern == ']'))
  197. {
  198. // Syntax error.
  199. return FALSE;
  200. }
  201. if ((ch >= chRangeLow)&&(ch <= chPattern))
  202. {
  203. // In range, go to next character.
  204. break;
  205. }
  206. }
  207. chRangeLow = chPattern;
  208. // See if character matches this pattern element.
  209. if (ch == chPattern)
  210. {
  211. // Character match, go on.
  212. break;
  213. }
  214. }
  215. // Have a match in the character set, skip to the end of the set.
  216. while ((chPattern)&&(chPattern != ']'))
  217. {
  218. chPattern = *pszPattern++;
  219. }
  220. break;
  221. case 0:
  222. // End of pattern, return TRUE if at end of string.
  223. return ((*pszString)? FALSE : TRUE);
  224. default:
  225. ch = *pszString++;
  226. // Check for exact character match.
  227. // Note: CASE doesn't matter...
  228. if (tolower(ch) != tolower(chPattern))
  229. {
  230. // No match.
  231. return FALSE;
  232. }
  233. break;
  234. }
  235. }
  236. // Can never exit from here.
  237. }
  238. //-------------------------------------------------------------------------
  239. // MatchREList()
  240. //
  241. // Match a string against a list (array) of RE pattens, return TRUE iff
  242. // the string matches one of the RE patterns. The list of patterns is a
  243. // NULL terminated array of pointers to RE pattern strings.
  244. //-------------------------------------------------------------------------
  245. BOOL MatchREList( unsigned char *pszString,
  246. unsigned char **ppszREList )
  247. {
  248. unsigned char *pszPattern;
  249. if (ppszREList)
  250. {
  251. pszPattern = *ppszREList;
  252. while (pszPattern)
  253. {
  254. if (MatchRE(pszString,pszPattern))
  255. {
  256. return TRUE;
  257. }
  258. pszPattern = *(++ppszREList);
  259. }
  260. }
  261. return FALSE;
  262. }
  263. //-------------------------------------------------------------------------
  264. // MatchExactList()
  265. //
  266. //-------------------------------------------------------------------------
  267. BOOL MatchExactList( unsigned char *pszString,
  268. unsigned char **ppszREList )
  269. {
  270. unsigned char *pszPattern;
  271. if (ppszREList)
  272. {
  273. pszPattern = *ppszREList;
  274. while (pszPattern)
  275. {
  276. if (!_mbsicmp(pszString,pszPattern))
  277. {
  278. return TRUE;
  279. }
  280. pszPattern = *(++ppszREList);
  281. }
  282. }
  283. return FALSE;
  284. }
  285. #endif