Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

593 lines
17 KiB

  1. /*++
  2. Copyright (c) 1989-91 Microsoft Corporation
  3. Module Name:
  4. gettoken.c
  5. Abstract:
  6. The GetToken() function, which takes a pathname splits it into
  7. individual tokens. This function is a lexical analyzer which
  8. is called by the parsing routines of NetpPathType.
  9. Author:
  10. Danny Glasser (dannygl) 19 June 1989
  11. Notes:
  12. For efficiency, there is code here which is different for the
  13. DBCS and non-DBCS environments. This allows us to take advantage
  14. of short cuts which are not valid in the DBCS world (such as
  15. scanning a string from right to left).
  16. See the comments below for a detailed description of the behavior
  17. of this function.
  18. Revision History:
  19. 27-Sep-1991 JohnRo
  20. Changed TEXT macro usage to allow UNICODE.
  21. 06 May 1991 rfirth
  22. 32-bit version
  23. --*/
  24. #include "nticanon.h"
  25. #include "winnls.h"
  26. #define TEXT_LENGTH(s) ((sizeof(s)/sizeof(TCHAR)) - 1)
  27. static TCHAR szAUXName[] = TEXT("AUX");
  28. static TCHAR szCOMMName[] = TEXT("COMM");
  29. static TCHAR szCONName[] = TEXT("CON");
  30. static TCHAR szDEVName[] = TEXT("DEV");
  31. static TCHAR szMAILSLOTName[] = TEXT("MAILSLOT");
  32. static TCHAR szNULName[] = TEXT("NUL");
  33. static TCHAR szPIPEName[] = TEXT("PIPE");
  34. static TCHAR szPRINTName[] = TEXT("PRINT");
  35. static TCHAR szPRNName[] = TEXT("PRN");
  36. static TCHAR szQUEUESName[] = TEXT("QUEUES");
  37. static TCHAR szSEMName[] = TEXT("SEM");
  38. static TCHAR szSHAREMEMName[] = TEXT("SHAREMEM");
  39. static TCHAR szLPTName[] = TEXT("LPT");
  40. static TCHAR szCOMName[] = TEXT("COM");
  41. #define LPT_TOKEN_LEN TEXT_LENGTH(szLPTName)
  42. #define COM_TOKEN_LEN TEXT_LENGTH(szCOMName)
  43. static TCHAR szWildcards[] = TEXT("*?");
  44. static TCHAR szIllegalChars[] = ILLEGAL_CHARS;
  45. static TCHAR szNonComponentChars[] = NON_COMPONENT_CHARS ILLEGAL_CHARS;
  46. static TCHAR _text_SingleDot[] = TEXT(".");
  47. typedef struct {
  48. LPTSTR pszTokenName;
  49. DWORD cbTokenLen;
  50. DWORD flTokenType;
  51. } STRING_TOKEN;
  52. //
  53. // IMPORTANT: In order for the binary table traversal to work, the strings
  54. // in this table MUST be in lexically-sorted order. Please
  55. // bear this in mind when adding strings to the table.
  56. //
  57. STATIC STRING_TOKEN StringTokenTable[] = {
  58. szDEVName, TEXT_LENGTH(szDEVName), TOKEN_TYPE_DEV
  59. };
  60. #define NUM_STRING_TOKENS (sizeof(StringTokenTable) / sizeof(*StringTokenTable))
  61. STATIC DWORD TrailingDotsAndSpaces(LPTSTR pszToken, DWORD cbTokenLen );
  62. STATIC BOOL IsIllegalCharacter(LPTSTR pszString);
  63. DWORD
  64. GetToken(
  65. IN LPTSTR pszBegin,
  66. OUT LPTSTR* ppszEnd,
  67. OUT LPDWORD pflTokenType,
  68. IN DWORD flFlags
  69. )
  70. /*++
  71. Routine Description:
  72. GetToken attempts to locate and type the next token. It takes the
  73. beginning of the token and determines the end of the token (i.e.
  74. the beginning of the next token, so that it can be called again).
  75. It also sets the TOKEN_TYPE_* bits for all of the token types which
  76. are appropriate to the specified type.
  77. Arguments:
  78. pszBegin - A pointer to the first character in the token.
  79. ppszEnd - A pointer to the location in which to store the end of
  80. the current token (actually, the first character of the
  81. next token).
  82. pflTokenType- The place to store the token type. Token types are
  83. defined in TOKEN.H.
  84. flFlags - Flags to determine operation. Currently MBZ.
  85. Return Value:
  86. DWORD
  87. Success - 0
  88. Failure - ERROR_INVALID_PARAMETER
  89. ERROR_INVALID_NAME
  90. ERROR_FILENAME_EXCED_RANGE
  91. --*/
  92. {
  93. register TCHAR chFirstChar;
  94. register DWORD cbTokenLen;
  95. BOOL fComputernameOnly = FALSE;
  96. DWORD usNameError = 0;
  97. DWORD cbTrailingDotSpace;
  98. DWORD iLow, iHigh, iMid;
  99. LONG iCmpVal;
  100. LCID lcid = GetThreadLocale();
  101. BOOL bDBCS = (PRIMARYLANGID( LANGIDFROMLCID(lcid)) == LANG_JAPANESE) ||
  102. (PRIMARYLANGID(LANGIDFROMLCID(lcid)) == LANG_KOREAN) ||
  103. (PRIMARYLANGID(LANGIDFROMLCID(lcid)) == LANG_CHINESE);
  104. extern DWORD cbMaxPathCompLen;
  105. //
  106. // This macro is used to make sure that the error value is set only
  107. // once in the computername-only case.
  108. //
  109. #define SET_COMPUTERNAMEONLY(err) if (! fComputernameOnly) \
  110. { \
  111. fComputernameOnly = TRUE; \
  112. usNameError = err; \
  113. }
  114. if (flFlags & GTF_RESERVED) {
  115. return ERROR_INVALID_PARAMETER;
  116. }
  117. //
  118. // Initialize the token type to 0
  119. //
  120. *pflTokenType = 0;
  121. //
  122. // Store the first character
  123. //
  124. chFirstChar = *pszBegin;
  125. //
  126. // Return immediately if the string is a null string
  127. //
  128. if (chFirstChar == TCHAR_EOS) {
  129. *ppszEnd = pszBegin;
  130. *pflTokenType = TOKEN_TYPE_EOS;
  131. #ifdef DEVDEBUG
  132. DbgPrint("GetToken - returning TOKEN_TYPE_EOS\n");
  133. #endif
  134. return 0;
  135. }
  136. //
  137. // Handle single-character, non-component tokens
  138. //
  139. if ((chFirstChar == TCHAR_BACKSLASH) || (chFirstChar == TCHAR_FWDSLASH)) {
  140. *pflTokenType = TOKEN_TYPE_SLASH;
  141. } else if (chFirstChar == TCHAR_COLON) {
  142. *pflTokenType = TOKEN_TYPE_COLON;
  143. }
  144. //
  145. // If we get here and the token type is non-zero, we have a single
  146. // character token. We set <ppszEnd> and return 0.
  147. //
  148. if (*pflTokenType) {
  149. *ppszEnd = pszBegin + 1;
  150. #ifdef DEVDEBUG
  151. DbgPrint("GetToken - *pflTokenType=%x\n", *pflTokenType);
  152. #endif
  153. return 0;
  154. }
  155. //
  156. // If we get here, the token is a component, find the end of the
  157. // component by looking for the first character in the string which
  158. // isn't a valid component character.
  159. //
  160. // IMPORTANT: There are certain names which are not valid component
  161. // names but which may be valid computernames. If we hit
  162. // such a name, we set the <fComputernameOnly> flag. Later
  163. // on, we check to see if the name is a valid computername.
  164. // If it is, we allow it; otherwise, we return an error.
  165. //
  166. cbTokenLen = STRCSPN(pszBegin, szNonComponentChars);
  167. //
  168. // We return an error if the first character is not a valid component
  169. // character, if the component is too long, or if the first
  170. // non-component character in the string is an illegal character.
  171. //
  172. if (cbTokenLen == 0) {
  173. #ifdef DEVDEBUG
  174. DbgPrint("GetToken - returning ERROR_INVALID_NAME (token len = 0)\n");
  175. #endif
  176. return ERROR_INVALID_NAME;
  177. }
  178. if (cbTokenLen > cbMaxPathCompLen) {
  179. SET_COMPUTERNAMEONLY(ERROR_FILENAME_EXCED_RANGE);
  180. }
  181. if (IsIllegalCharacter(pszBegin + cbTokenLen)) {
  182. #ifdef DEVDEBUG
  183. DbgPrint("GetToken - returning ERROR_INVALID_NAME (illegal char)\n");
  184. #endif
  185. return ERROR_INVALID_NAME;
  186. }
  187. //
  188. // Now we need to determine where the trailing dots and spaces begin,
  189. // and make sure that the component name contains something other
  190. // than dots and spaces, unless it's "." or ".."
  191. //
  192. // NOTE: If there are not trailing dots or spaces, <cbTrailingDotSpace>
  193. // is set to <cbTokenLen>.
  194. //
  195. cbTrailingDotSpace = TrailingDotsAndSpaces(pszBegin, cbTokenLen );
  196. //
  197. // See if the token has only trailing dots and spaces
  198. //
  199. if (cbTrailingDotSpace == 0) {
  200. //
  201. // Return an error if the length of the token is greater than 2.
  202. //
  203. if (cbTokenLen > 2) {
  204. SET_COMPUTERNAMEONLY(ERROR_INVALID_NAME);
  205. }
  206. //
  207. // Return an error if the first character is not a dot or if the
  208. // token length is 2 and the second character is not a dot.
  209. //
  210. if ((chFirstChar != TCHAR_DOT) || ((cbTokenLen == 2) && (pszBegin[1] != TCHAR_DOT))) {
  211. SET_COMPUTERNAMEONLY(ERROR_INVALID_NAME);
  212. }
  213. //
  214. // Now we're OK, since the token is either "." or ".."
  215. //
  216. }
  217. //
  218. // WE HAVE A VALID COMPONENT
  219. //
  220. *pflTokenType = TOKEN_TYPE_COMPONENT;
  221. //
  222. // Now we determine if this token matches any of the component-based
  223. // types.
  224. //
  225. //
  226. // Is it a drive?
  227. //
  228. if (IS_DRIVE(chFirstChar) && (cbTokenLen == 1)) {
  229. *pflTokenType |= TOKEN_TYPE_DRIVE;
  230. }
  231. //
  232. // Is it "." or ".." ?
  233. //
  234. // Since we've already validated this string, we know that if it
  235. // contains nothing but dots and spaces it must be one of these
  236. // two.
  237. //
  238. if (cbTrailingDotSpace == 0) {
  239. *pflTokenType |= cbTokenLen == 1 ? TOKEN_TYPE_DOT : TOKEN_TYPE_DOTDOT;
  240. }
  241. //
  242. // If the 8.3 flag is specified, we also have to check that the
  243. // component is in 8.3 format. We determine this as follows:
  244. //
  245. // Find the first dot in the token (or the end of the token).
  246. // Verify that at least 1 and at most 8 characters precede it.
  247. // Verify that at most 3 characters follow it.
  248. // Verify that none of the characters which follow it are dots.
  249. //
  250. // The exceptions to this are "." and "..". Therefore, we don't check
  251. // this until after we've already determined that this component is
  252. // neither of those.
  253. //
  254. if ((cbTrailingDotSpace != 0) && (flFlags & GTF_8_DOT_3)) {
  255. DWORD cbFirstDot;
  256. BOOL fNoDot;
  257. cbFirstDot = STRCSPN(pszBegin, _text_SingleDot);
  258. if (fNoDot = cbFirstDot >= cbTokenLen) {
  259. cbFirstDot = cbTokenLen;
  260. }
  261. if (cbFirstDot == 0
  262. || cbFirstDot > 8
  263. || cbTokenLen - cbFirstDot > 4
  264. || (! fNoDot && STRCSPN(pszBegin + cbFirstDot + 1, _text_SingleDot)
  265. < cbTokenLen - (cbFirstDot + 1))) {
  266. SET_COMPUTERNAMEONLY(ERROR_INVALID_NAME);
  267. }
  268. if( bDBCS ) {
  269. //
  270. // In case of MBCS, We also need to check the string is valid in MBCS
  271. // because Unicode character count is not eqaul MBCS byte count
  272. CHAR szCharToken[13]; // 8 + 3 + dot + null
  273. int cbConverted = 0;
  274. BOOL bDefaultUsed = FALSE;
  275. // Convert Unicode string to Mbcs.
  276. cbConverted = WideCharToMultiByte( CP_OEMCP, 0,
  277. pszBegin, -1,
  278. szCharToken, sizeof(szCharToken),
  279. NULL, &bDefaultUsed );
  280. // If the converted langth is larger than the buffer, or the WideChar string
  281. // contains some character that is can not be repesented by MultiByte code page,
  282. // set error.
  283. if( cbConverted == FALSE || bDefaultUsed == TRUE ) {
  284. SET_COMPUTERNAMEONLY(ERROR_INVALID_NAME);
  285. } else {
  286. cbConverted -= 1; // Remove NULL;
  287. cbFirstDot = strcspn(szCharToken, ".");
  288. if (fNoDot = cbFirstDot >= (DWORD)cbConverted) {
  289. cbFirstDot = cbConverted;
  290. }
  291. if (cbFirstDot == 0
  292. || cbFirstDot > 8
  293. || cbConverted - cbFirstDot > 4
  294. || (! fNoDot && strcspn(szCharToken + cbFirstDot + 1, ".")
  295. < cbConverted - (cbFirstDot + 1))) {
  296. SET_COMPUTERNAMEONLY(ERROR_INVALID_NAME);
  297. }
  298. }
  299. }
  300. }
  301. //
  302. // Does it contain wildcards?
  303. //
  304. // If so, set the appropriate flag(s).
  305. //
  306. // If not, it may be a valid computername.
  307. //
  308. if (STRCSPN(pszBegin, szWildcards) < cbTokenLen) {
  309. *pflTokenType |= TOKEN_TYPE_WILDCARD;
  310. //
  311. // Special case the single '*' token
  312. //
  313. if (cbTokenLen == 1 && chFirstChar == TCHAR_STAR) {
  314. *pflTokenType |= TOKEN_TYPE_WILDONE;
  315. }
  316. } else {
  317. if( cbTokenLen <= MAX_PATH ) {
  318. *pflTokenType |= TOKEN_TYPE_COMPUTERNAME;
  319. }
  320. }
  321. //
  322. // IMPORTANT: Now we've determined if the token is a valid computername.
  323. // If the <fComputernameOnly> flag is set and it's a valid
  324. // computername, then we turn off all other bits. If it's
  325. // not a valid computername, we return the stored error.
  326. // If the flag isn't set, we continue with the component name
  327. // processing.
  328. //
  329. if (fComputernameOnly) {
  330. if (*pflTokenType & TOKEN_TYPE_COMPUTERNAME) {
  331. *pflTokenType = TOKEN_TYPE_COMPUTERNAME;
  332. } else {
  333. #ifdef DEVDEBUG
  334. DbgPrint("GetToken - returning usNameError (%u)\n", usNameError);
  335. #endif
  336. return usNameError;
  337. }
  338. } else {
  339. //
  340. // Is this an LPT[1-9] token?
  341. //
  342. if (STRNICMP(pszBegin, szLPTName, LPT_TOKEN_LEN) == 0
  343. && IS_NON_ZERO_DIGIT(pszBegin[LPT_TOKEN_LEN])
  344. && cbTrailingDotSpace == LPT_TOKEN_LEN + 1) {
  345. *pflTokenType |= TOKEN_TYPE_LPT;
  346. }
  347. //
  348. // Is this an COM[1-9] token?
  349. //
  350. if (STRNICMP(pszBegin, szCOMName, COM_TOKEN_LEN) == 0
  351. && IS_NON_ZERO_DIGIT(pszBegin[COM_TOKEN_LEN])
  352. && cbTrailingDotSpace == COM_TOKEN_LEN + 1) {
  353. *pflTokenType |= TOKEN_TYPE_COM;
  354. }
  355. //
  356. // The remainder of the component-based token types are determined
  357. // by string comparisons. In order to speed things up, we store
  358. // these strings in sorted order and do a binary search on them,
  359. // which reduces the worst-case number of comparisons from N to
  360. // log N (where N is the number of strings).
  361. //
  362. iLow = (ULONG)-1;
  363. iHigh = NUM_STRING_TOKENS;
  364. while (iHigh - iLow > 1) {
  365. iMid = (iLow + iHigh) / 2;
  366. //
  367. // We do the comparison up to the length of the longer of the
  368. // two strings. This guarantees us a valid non-zero value for
  369. // iCmpVal if they don't match. It also means that they won't
  370. // match unless they're the same length.
  371. //
  372. iCmpVal = STRNICMP(pszBegin,
  373. StringTokenTable[iMid].pszTokenName,
  374. max(StringTokenTable[iMid].cbTokenLen,
  375. cbTrailingDotSpace) );
  376. if (iCmpVal < 0) {
  377. iHigh = iMid;
  378. } else if (iCmpVal > 0) {
  379. iLow = iMid;
  380. } else {
  381. //
  382. // We have a match!
  383. //
  384. *pflTokenType |= StringTokenTable[iMid].flTokenType;
  385. //
  386. // We can only match one, so don't bother continuing
  387. //
  388. break;
  389. }
  390. }
  391. }
  392. //
  393. // We're done; set the end pointer and return with success
  394. //
  395. *ppszEnd = pszBegin + cbTokenLen;
  396. #ifdef DEVDEBUG
  397. DbgPrint("GetToken - returning success\n");
  398. #endif
  399. return 0;
  400. }
  401. STATIC DWORD TrailingDotsAndSpaces(LPTSTR pszToken, DWORD cbTokenLen )
  402. {
  403. LPTSTR pszDotSpace = pszToken + cbTokenLen - 1;
  404. //
  405. // Scan the token until we reach the beginning or we find a
  406. // non-dot/space.
  407. //
  408. while (pszDotSpace >= pszToken
  409. && (*pszDotSpace == TCHAR_DOT || *pszDotSpace == TCHAR_SPACE)) {
  410. pszDotSpace--;
  411. }
  412. //
  413. // Increment pszDotSpace so that it points to the beginning of
  414. // the trailing dots and spaces (or one past the end of the token
  415. // if there are no trailing dots or spaces).
  416. //
  417. pszDotSpace++;
  418. //
  419. // Return the index of the first trailing dot or space (or the length
  420. // of the token if there were none).
  421. //
  422. return (DWORD)(pszDotSpace - pszToken);
  423. }
  424. STATIC BOOL IsIllegalCharacter(LPTSTR pszString)
  425. {
  426. // TCHAR chTemp;
  427. // BOOL fRetVal;
  428. //
  429. // Return FALSE immediately for a null character
  430. //
  431. if (*pszString == TCHAR_EOS) {
  432. return FALSE;
  433. }
  434. //
  435. // If the character is a single-byte character, we can simply see if
  436. // it's illegal by calling strchrf() on the illegal character array.
  437. // If it's a double-byte character, we have to do it the slower way
  438. // (with strcspnf).
  439. //
  440. // if (!IS_LEAD_BYTE(*pszString)) {
  441. return (STRCHR(szIllegalChars, *pszString) != NULL);
  442. // } else {
  443. //
  444. // //
  445. // // We set the character after the double-byte character to the
  446. // // null character, to speed things up.
  447. // //
  448. //
  449. // chTemp = pszString[2];
  450. // pszString[2] = TCHAR_EOS;
  451. // fRetVal = STRCSPN(pszString, szIllegalChars) == 0;
  452. // pszString[2] = chTemp;
  453. //
  454. // return fRetVal;
  455. // }
  456. }