Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

574 lines
18 KiB

  1. /****************************** Module Header ******************************\
  2. * Module Name: wstrings.c
  3. *
  4. * Copyright (c) 1985 - 1999, Microsoft Corporation
  5. *
  6. * History:
  7. * 03-20-91 IanJa Created
  8. \***************************************************************************/
  9. #include "precomp.h"
  10. #pragma hdrstop
  11. /* LATER these should be in a public header file!!!
  12. * Assorted defines used to support the standard Windows ANSI code page
  13. * (now known as code page 1252 and officially registered by IBM).
  14. * This is intended only for the PDK release. Subsequent releases will
  15. * use the NLSAPI and Unicode.
  16. */
  17. #define LATIN_CAPITAL_LETTER_A_GRAVE (WCHAR)0xc0
  18. #define LATIN_CAPITAL_LETTER_THORN (WCHAR)0xde
  19. #define LATIN_SMALL_LETTER_SHARP_S (WCHAR)0xdf
  20. #define LATIN_SMALL_LETTER_Y_DIAERESIS (WCHAR)0xff
  21. #define DIVISION_SIGN (WCHAR)0xf7
  22. #define MULTIPLICATION_SIGN (WCHAR)0xd7
  23. /*
  24. * Temporary defines to support Unicode block 1 (0x0000 - 0x00ff).
  25. */
  26. #define WCTOA(wch) ((wch) & 0xff)
  27. #define IS_UNICODE_BLK1(wch) ((int)(wch) <= 0x00ff)
  28. /***************************************************************************\
  29. * CharLowerW (API)
  30. *
  31. * Convert either a single character or an entire string to lower case. The
  32. * two cases are differentiated by checking the high-word of pwsz. If it is
  33. * 0 then we just convert the low-word of pwsz.
  34. *
  35. * History:
  36. * 06-24-91 GregoryW Created. Supports Unicode equivalent of code
  37. * page 1252 (simple zero extension). This is for
  38. * the PDK release only. After the PDK this routine
  39. * will be modified to use the NLSAPI.
  40. * 02-11-93 IanJa Modified to use NLS API.
  41. \***************************************************************************/
  42. FUNCLOG1(LOG_GENERAL, LPWSTR, WINAPI, CharLowerW, LPWSTR, pwsz)
  43. LPWSTR WINAPI CharLowerW(
  44. LPWSTR pwsz)
  45. {
  46. /*
  47. * Early out for NULL string or '\0'
  48. */
  49. if (pwsz == NULL) {
  50. return pwsz;
  51. }
  52. if (!IS_PTR(pwsz)) {
  53. if (!LCMapStringW(
  54. LOCALE_USER_DEFAULT,
  55. LCMAP_LOWERCASE,
  56. (LPWSTR)&pwsz,
  57. 1,
  58. (LPWSTR)&pwsz,
  59. 1
  60. )) {
  61. /*
  62. * We don't expect LCMapString to fail! The caller is not expecting
  63. * failure, CharLowerW does not have a failure indicator, so we do
  64. * nothing.
  65. */
  66. RIPMSG1(RIP_WARNING, "CharLowerW(%#p): LCMapString failed\n", pwsz);
  67. }
  68. return pwsz;
  69. }
  70. /*
  71. * pwsz is a null-terminated string
  72. */
  73. CharLowerBuffW(pwsz, wcslen(pwsz)+1);
  74. return pwsz;
  75. }
  76. /***************************************************************************\
  77. * CharUpperW (API)
  78. *
  79. * Convert either a single character or an entire string to upper case. The
  80. * two cases are differentiated by checking the high-word of pwsz. If it is
  81. * 0 then we just convert the low-word of pwsz.
  82. *
  83. * History:
  84. * 06-24-91 GregoryW Created. Supports Unicode equivalent of code
  85. * page 1252 (simple zero extension). This is for
  86. * the PDK release only. After the PDK this routine
  87. * will be modified to use the NLSAPI.
  88. * 02-11-93 IanJa Modified to use NLS API.
  89. \***************************************************************************/
  90. FUNCLOG1(LOG_GENERAL, LPWSTR, WINAPI, CharUpperW, LPWSTR, pwsz)
  91. LPWSTR WINAPI CharUpperW(
  92. LPWSTR pwsz)
  93. {
  94. /*
  95. * Early out for NULL string or '\0'
  96. */
  97. if (pwsz == NULL) {
  98. return pwsz;
  99. }
  100. if (!IS_PTR(pwsz)) {
  101. if (!LCMapStringW(
  102. LOCALE_USER_DEFAULT,
  103. LCMAP_UPPERCASE,
  104. (LPWSTR)&pwsz,
  105. 1,
  106. (LPWSTR)&pwsz,
  107. 1
  108. )) {
  109. /*
  110. * We don't expect LCMapString to fail! The caller is not expecting
  111. * failure, CharLowerW does not have a failure indicator, so we do
  112. * nothing.
  113. */
  114. RIPMSG1(RIP_WARNING, "CharUpperW(%#p): LCMapString failed", pwsz);
  115. }
  116. return pwsz;
  117. }
  118. /*
  119. * pwsz is a null-terminated string
  120. */
  121. CharUpperBuffW(pwsz, wcslen(pwsz)+1);
  122. return pwsz;
  123. }
  124. /***************************************************************************\
  125. * CharNextW (API)
  126. *
  127. * Move to next character in string unless already at '\0' terminator
  128. *
  129. * History:
  130. * 06-24-91 GregoryW Created. This routine will not work for non-spacing
  131. * characters!! This version is only intended for
  132. * limited use in the PDK release.
  133. * 02-20-92 GregoryW Modified to work with combining marks (formerly known
  134. * as non-spacing).
  135. * 09-21-93 JulieB Added ALPHA to combining mark code.
  136. \***************************************************************************/
  137. FUNCLOG1(LOG_GENERAL, LPWSTR, WINAPI, CharNextW, LPCWSTR, lpwCurrentChar)
  138. LPWSTR WINAPI CharNextW(
  139. LPCWSTR lpwCurrentChar)
  140. {
  141. WORD ctype3info;
  142. if (*lpwCurrentChar) {
  143. //
  144. // Examine each code element. Skip all combining elements.
  145. //
  146. while (*(++lpwCurrentChar)) {
  147. if (!GetStringTypeW(
  148. CT_CTYPE3,
  149. lpwCurrentChar,
  150. 1,
  151. &ctype3info)) {
  152. /*
  153. * GetStringTypeW failed! The caller is not expecting failure,
  154. * CharNextW does not have a failure indicator, so just return
  155. * a pointer to the character we couldn't analyze.
  156. */
  157. RIPMSG2(RIP_WARNING, "CharNextW failed, L'\\x%.4x' at %#p",
  158. *lpwCurrentChar, lpwCurrentChar);
  159. break;
  160. }
  161. if (!((ctype3info & C3_NONSPACING) && (!(ctype3info & C3_ALPHA)))) {
  162. break;
  163. }
  164. }
  165. }
  166. return (LPWSTR)lpwCurrentChar;
  167. }
  168. /***************************************************************************\
  169. * CharPrevW (API)
  170. *
  171. * Move to previous character in string, unless already at start
  172. *
  173. * History:
  174. * 06-24-91 GregoryW Created. This routine will not work for non-spacing
  175. * characters!! This version is only intended for
  176. * limited use in the PDK release.
  177. * 02-20-92 GregoryW Modified to work with combining marks (formerly
  178. * known as non-spacing).
  179. * 09-21-93 JulieB Added ALPHA to combining mark code.
  180. * 12-06-93 JulieB Fixed combining mark code.
  181. \***************************************************************************/
  182. FUNCLOG2(LOG_GENERAL, LPWSTR, WINAPI, CharPrevW, LPCWSTR, lpwStart, LPCWSTR, lpwCurrentChar)
  183. LPWSTR WINAPI CharPrevW(
  184. LPCWSTR lpwStart,
  185. LPCWSTR lpwCurrentChar)
  186. {
  187. WORD ctype3info;
  188. LPWSTR lpwValidChar = (LPWSTR)lpwCurrentChar;
  189. if (lpwCurrentChar > lpwStart) {
  190. //
  191. // Examine each code element. Skip all combining elements.
  192. //
  193. while (lpwCurrentChar-- > lpwStart) {
  194. if (!GetStringTypeW(
  195. CT_CTYPE3,
  196. lpwCurrentChar,
  197. 1,
  198. &ctype3info)) {
  199. /*
  200. * GetStringTypeW failed! The caller is not expecting failure,
  201. * CharPrevW does not have a failure indicator, so just return
  202. * a pointer to the character we couldn't analyze.
  203. */
  204. RIPMSG2(RIP_WARNING, "CharPrevW failed, L'\\x%.4x' at %#p",
  205. *lpwCurrentChar, lpwCurrentChar);
  206. break;
  207. }
  208. if (!((ctype3info & C3_NONSPACING) && (!(ctype3info & C3_ALPHA)))) {
  209. lpwValidChar = (LPWSTR)lpwCurrentChar;
  210. break; // found non-combining code element
  211. }
  212. }
  213. /*
  214. * We need to always go back one character. If we've looped through
  215. * the entire buffer and haven't found a "legitimate" character, just
  216. * step back. See bug #27649.
  217. */
  218. if(lpwCurrentChar < lpwStart){
  219. --lpwValidChar;
  220. UserAssert(lpwValidChar >= lpwStart);
  221. }
  222. }
  223. return (LPWSTR)lpwValidChar;
  224. }
  225. /***************************************************************************\
  226. * CharLowerBuffW (API)
  227. *
  228. * History:
  229. * 06-24-91 GregoryW Created. This version only supports Unicode
  230. * block 1 (0x0000 - 0x00ff). All other code points
  231. * are copied verbatim. This version is intended
  232. * only for the PDK release.
  233. * 02-11-93 IanJa Modified to use NLS API.
  234. \***************************************************************************/
  235. FUNCLOG2(LOG_GENERAL, DWORD, WINAPI, CharLowerBuffW, LPWSTR, pwsz, DWORD, cwch)
  236. DWORD WINAPI CharLowerBuffW(
  237. LPWSTR pwsz,
  238. DWORD cwch)
  239. {
  240. int cwchT;
  241. DWORD i;
  242. if (cwch == 0) {
  243. return 0;
  244. }
  245. cwchT = LCMapStringW(LOCALE_USER_DEFAULT, LCMAP_LOWERCASE,
  246. pwsz, cwch, pwsz, cwch);
  247. if (cwchT != 0) {
  248. return cwchT;
  249. }
  250. /*
  251. * LCMapString failed! The caller is not expecting failure,
  252. * CharLowerBuffW does not have a failure indicator, so we
  253. * convert the buffer to lower case as best we can.
  254. */
  255. RIPMSG1(RIP_WARNING, "CharLowerBuffW(%ls) failed", pwsz);
  256. for (i=0; i < cwch; i++) {
  257. if (IS_UNICODE_BLK1(pwsz[i]) && IsCharUpperA((char)pwsz[i])) {
  258. pwsz[i] += 'a'-'A';
  259. }
  260. }
  261. return cwch;
  262. }
  263. /***************************************************************************\
  264. * CharUpperBuffW (API)
  265. *
  266. * History:
  267. * 06-24-91 GregoryW Created. This version only supports Unicode
  268. * block 1 (0x0000 - 0x00ff). All other code points
  269. * are copied verbatim. This version is intended
  270. * only for the PDK release.
  271. * 02-11-93 IanJa Modified to use NLS API.
  272. \***************************************************************************/
  273. FUNCLOG2(LOG_GENERAL, DWORD, WINAPI, CharUpperBuffW, LPWSTR, pwsz, DWORD, cwch)
  274. DWORD WINAPI CharUpperBuffW(
  275. LPWSTR pwsz,
  276. DWORD cwch)
  277. {
  278. int cwchT;
  279. DWORD i;
  280. if (cwch == 0) {
  281. return 0;
  282. }
  283. cwchT = LCMapStringW(LOCALE_USER_DEFAULT, LCMAP_UPPERCASE,
  284. pwsz, cwch, pwsz, cwch);
  285. if (cwchT != 0) {
  286. return cwchT;
  287. }
  288. /*
  289. * LCMapString failed! The caller is not expecting failure,
  290. * CharUpperBuffW does not have a failure indicator, so we
  291. * convert the buffer to upper case as best we can.
  292. */
  293. RIPMSG1(RIP_WARNING, "CharUpperBuffW(%ls) failed", pwsz);
  294. for (i=0; i < cwch; i++) {
  295. if (IS_UNICODE_BLK1(pwsz[i]) &&
  296. IsCharLowerA((char)pwsz[i]) &&
  297. (pwsz[i] != LATIN_SMALL_LETTER_SHARP_S) &&
  298. (pwsz[i] != LATIN_SMALL_LETTER_Y_DIAERESIS)) {
  299. pwsz[i] += (WCHAR)('A'-'a');
  300. }
  301. }
  302. return cwch;
  303. }
  304. /***************************************************************************\
  305. * IsCharLowerW (API)
  306. *
  307. * History:
  308. * 06-24-91 GregoryW Created. This version only supports Unicode
  309. * block 1 (0x0000 - 0x00ff). FALSE is returned
  310. * for all other code points. This version is intended
  311. * only for the PDK release.
  312. * 02-20-92 GregoryW Modified to use NLS API.
  313. \***************************************************************************/
  314. FUNCLOG1(LOG_GENERAL, BOOL, WINAPI, IsCharLowerW, WCHAR, wChar)
  315. BOOL WINAPI IsCharLowerW(
  316. WCHAR wChar)
  317. {
  318. WORD ctype1info;
  319. if (GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info)) {
  320. if (ctype1info & C1_LOWER) {
  321. return TRUE;
  322. } else {
  323. return FALSE;
  324. }
  325. }
  326. /*
  327. * GetStringTypeW failed! The caller is not expecting
  328. * failure, IsCharLowerW does not have a failure indicator, so we
  329. * determine the case as best we can.
  330. */
  331. RIPMSG1(RIP_WARNING, "IsCharLowerW(L'\\x%.4lx') failed", wChar);
  332. if (IS_UNICODE_BLK1(wChar)) {
  333. return IsCharLowerA((CHAR)wChar);
  334. } else {
  335. return FALSE;
  336. }
  337. }
  338. /***************************************************************************\
  339. * IsCharUpperW (API)
  340. *
  341. * History:
  342. * 06-24-91 GregoryW Created. This version only supports Unicode
  343. * block 1 (0x0000 - 0x00ff). FALSE is returned
  344. * for all other code points. This version is intended
  345. * only for the PDK release.
  346. * 02-20-92 GregoryW Modified to use NLS API.
  347. \***************************************************************************/
  348. FUNCLOG1(LOG_GENERAL, BOOL, WINAPI, IsCharUpperW, WCHAR, wChar)
  349. BOOL WINAPI IsCharUpperW(
  350. WCHAR wChar)
  351. {
  352. WORD ctype1info;
  353. if (GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info)) {
  354. if (ctype1info & C1_UPPER) {
  355. return TRUE;
  356. } else {
  357. return FALSE;
  358. }
  359. }
  360. /*
  361. * GetStringTypeW failed! The caller is not expecting
  362. * failure, IsCharLowerW does not have a failure indicator, so we
  363. * determine the case as best we can.
  364. */
  365. RIPMSG1(RIP_WARNING, "IsCharUpper(L'\\x%.4lx') failed", wChar);
  366. if (IS_UNICODE_BLK1(wChar)) {
  367. return IsCharUpperA((CHAR)wChar);
  368. } else {
  369. return FALSE;
  370. }
  371. }
  372. /***************************************************************************\
  373. * IsCharAlphaNumericW (API)
  374. *
  375. * Returns TRUE if character is alphabetical or numerical, otherwise FALSE
  376. *
  377. * History:
  378. * 06-24-91 GregoryW Created. This version only supports Unicode
  379. * block 1 (0x0000 - 0x00ff).
  380. * This version is intended only for the PDK release.
  381. * 02-20-92 GregoryW Modified to use NLS API.
  382. \***************************************************************************/
  383. FUNCLOG1(LOG_GENERAL, BOOL, WINAPI, IsCharAlphaNumericW, WCHAR, wChar)
  384. BOOL WINAPI IsCharAlphaNumericW(
  385. WCHAR wChar)
  386. {
  387. WORD ctype1info;
  388. if (!GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info)) {
  389. //
  390. // GetStringTypeW returned an error! IsCharAlphaNumericW has no
  391. // provision for returning an error... The best we can do is to
  392. // return FALSE
  393. //
  394. UserAssert(FALSE);
  395. return FALSE;
  396. }
  397. //
  398. // LATER 20 Feb 92 GregoryW
  399. // We may need to check ctype 3 info if we want to check for
  400. // digits other than ASCII '0'-'9' (such as Lao digits or
  401. // Tibetan digits, etc.).
  402. //
  403. #ifdef FE_SB // IsCharAlphaNumericW()
  404. if (ctype1info & C1_ALPHA) {
  405. WORD ctype3info = 0;
  406. /*
  407. * We don't want to return TRUE for halfwidth katakana.
  408. * Katakana is linguistic character (C1_ALPHA), but it is not
  409. * alphabet character.
  410. */
  411. if (!GetStringTypeW(CT_CTYPE3, &wChar, 1, &ctype3info)) {
  412. UserAssert(FALSE);
  413. /*
  414. * Assume, it is alphabet character, because it has
  415. * C1_ALPHA attribute.
  416. */
  417. return TRUE;
  418. }
  419. if (ctype3info & (C3_KATAKANA|C3_HIRAGANA)) {
  420. /*
  421. * This is 'Katakana'.
  422. */
  423. return FALSE;
  424. } else {
  425. return TRUE;
  426. }
  427. } else if (ctype1info & C1_DIGIT) {
  428. return TRUE;
  429. } else {
  430. return FALSE;
  431. }
  432. #else
  433. if ((ctype1info & C1_ALPHA) || (ctype1info & C1_DIGIT)) {
  434. return TRUE;
  435. } else {
  436. return FALSE;
  437. }
  438. #endif // FE_SB
  439. }
  440. /***************************************************************************\
  441. * IsCharAlphaW (API)
  442. *
  443. * Returns TRUE if character is alphabetical, otherwise FALSE
  444. *
  445. * History:
  446. * 06-24-91 GregoryW Created. This version only supports Unicode
  447. * block 1 (0x0000 - 0x00ff).
  448. * This version is intended only for the PDK release.
  449. * 02-20-92 GregoryW Modified to use NLS API.
  450. \***************************************************************************/
  451. FUNCLOG1(LOG_GENERAL, BOOL, WINAPI, IsCharAlphaW, WCHAR, wChar)
  452. BOOL WINAPI IsCharAlphaW(
  453. WCHAR wChar)
  454. {
  455. WORD ctype1info;
  456. if (!GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info)) {
  457. //
  458. // GetStringTypeW returned an error! IsCharAlphaW has no
  459. // provision for returning an error... The best we can do
  460. // is to return FALSE
  461. //
  462. UserAssert(FALSE);
  463. return FALSE;
  464. }
  465. if (ctype1info & C1_ALPHA) {
  466. #ifdef FE_SB // IsCharAlphaA()
  467. WORD ctype3info = 0;
  468. /*
  469. * We don't want to return TRUE for halfwidth katakana.
  470. * Katakana is linguistic character (C1_ALPHA), but it is not
  471. * alphabet character.
  472. */
  473. if (!GetStringTypeW(CT_CTYPE3, &wChar, 1, &ctype3info)) {
  474. UserAssert(FALSE);
  475. /*
  476. * Assume, it is alphabet character, because it has
  477. * C1_ALPHA attribute.
  478. */
  479. return TRUE;
  480. }
  481. if (ctype3info & (C3_KATAKANA|C3_HIRAGANA)) {
  482. /*
  483. * This is 'Katakana'.
  484. */
  485. return FALSE;
  486. } else {
  487. return TRUE;
  488. }
  489. #else
  490. return TRUE;
  491. #endif // FE_SB
  492. } else {
  493. return FALSE;
  494. }
  495. }