Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3745 lines
154 KiB

  1. /*++
  2. Copyright (c) 1991-2000, Microsoft Corporation All rights reserved.
  3. Module Name:
  4. string.c
  5. Abstract:
  6. This file contains functions that deal with characters and strings.
  7. APIs found in this file:
  8. CompareStringW
  9. GetStringTypeExW
  10. GetStringTypeW
  11. Revision History:
  12. 05-31-91 JulieB Created.
  13. --*/
  14. //
  15. // Include Files.
  16. //
  17. #include "nls.h"
  18. #include "jamo.h"
  19. //
  20. // Constant Declarations.
  21. //
  22. //
  23. // State Table.
  24. //
  25. #define STATE_DW 1 // normal diacritic weight state
  26. #define STATE_REVERSE_DW 2 // reverse diacritic weight state
  27. #define STATE_CW 4 // case weight state
  28. #define STATE_JAMO_WEIGHT 8 // jamo weight state
  29. //
  30. // Invalid weight value.
  31. //
  32. #define CMP_INVALID_WEIGHT 0xffffffff
  33. #define CMP_INVALID_FAREAST 0xffff0000
  34. #define CMP_INVALID_UW 0xffff
  35. //
  36. // Forward Declarations.
  37. //
  38. int
  39. LongCompareStringW(
  40. PLOC_HASH pHashN,
  41. DWORD dwCmpFlags,
  42. LPCWSTR lpString1,
  43. int cchCount1,
  44. LPCWSTR lpString2,
  45. int cchCount2,
  46. BOOL fModify);
  47. int
  48. FindJamoDifference(
  49. PLOC_HASH pHashN,
  50. LPCWSTR* ppString1,
  51. int* ctr1,
  52. int cchCount1,
  53. DWORD* pWeight1,
  54. LPCWSTR* ppString2,
  55. int* ctr2,
  56. int cchCount2,
  57. DWORD* pWeight2,
  58. LPCWSTR* pLastJamo,
  59. WORD* uw1,
  60. WORD* uw2,
  61. int* pState,
  62. int* WhichJamo,
  63. BOOL fModify);
  64. //-------------------------------------------------------------------------//
  65. // INTERNAL MACROS //
  66. //-------------------------------------------------------------------------//
  67. ////////////////////////////////////////////////////////////////////////////
  68. //
  69. // NOT_END_STRING
  70. //
  71. // Checks to see if the search has reached the end of the string.
  72. // It returns TRUE if the counter is not at zero (counting backwards) and
  73. // the null termination has not been reached (if -1 was passed in the count
  74. // parameter.
  75. //
  76. // 11-04-92 JulieB Created.
  77. ////////////////////////////////////////////////////////////////////////////
  78. #define NOT_END_STRING(ct, ptr, cchIn) \
  79. ((ct != 0) && (!((*(ptr) == 0) && (cchIn == -2))))
  80. ////////////////////////////////////////////////////////////////////////////
  81. //
  82. // AT_STRING_END
  83. //
  84. // Checks to see if the pointer is at the end of the string.
  85. // It returns TRUE if the counter is zero or if the null termination
  86. // has been reached (if -2 was passed in the count parameter).
  87. //
  88. // 11-04-92 JulieB Created.
  89. ////////////////////////////////////////////////////////////////////////////
  90. #define AT_STRING_END(ct, ptr, cchIn) \
  91. ((ct == 0) || ((*(ptr) == 0) && (cchIn == -2)))
  92. ////////////////////////////////////////////////////////////////////////////
  93. //
  94. // REMOVE_STATE
  95. //
  96. // Removes the current state from the state table. This should only be
  97. // called when the current state should not be entered for the remainder
  98. // of the comparison. It decrements the counter going through the state
  99. // table and decrements the number of states in the table.
  100. //
  101. // 11-04-92 JulieB Created.
  102. ////////////////////////////////////////////////////////////////////////////
  103. #define REMOVE_STATE(value) (State &= ~value)
  104. ////////////////////////////////////////////////////////////////////////////
  105. //
  106. // POINTER_FIXUP
  107. //
  108. // Fixup the string pointers if expansion characters were found.
  109. // Then, advance the string pointers and decrement the string counters.
  110. //
  111. // 11-04-92 JulieB Created.
  112. ////////////////////////////////////////////////////////////////////////////
  113. #define POINTER_FIXUP() \
  114. { \
  115. /* \
  116. * Fixup the pointers (if necessary). \
  117. */ \
  118. if (pSave1 && (--cExpChar1 == 0)) \
  119. { \
  120. /* \
  121. * Done using expansion temporary buffer. \
  122. */ \
  123. pString1 = pSave1; \
  124. pSave1 = NULL; \
  125. } \
  126. \
  127. if (pSave2 && (--cExpChar2 == 0)) \
  128. { \
  129. /* \
  130. * Done using expansion temporary buffer. \
  131. */ \
  132. pString2 = pSave2; \
  133. pSave2 = NULL; \
  134. } \
  135. \
  136. /* \
  137. * Advance the string pointers. \
  138. */ \
  139. pString1++; \
  140. pString2++; \
  141. }
  142. ////////////////////////////////////////////////////////////////////////////
  143. //
  144. // SCAN_LONGER_STRING
  145. //
  146. // Scans the longer string for diacritic, case, and special weights.
  147. //
  148. // 11-04-92 JulieB Created.
  149. ////////////////////////////////////////////////////////////////////////////
  150. #define SCAN_LONGER_STRING( ct, \
  151. ptr, \
  152. cchIn, \
  153. ret ) \
  154. { \
  155. /* \
  156. * Search through the rest of the longer string to make sure \
  157. * all characters are not to be ignored. If find a character that \
  158. * should not be ignored, return the given return value immediately. \
  159. * \
  160. * The only exception to this is when a nonspace mark is found. If \
  161. * another DW difference has been found earlier, then use that. \
  162. */ \
  163. while (NOT_END_STRING(ct, ptr, cchIn)) \
  164. { \
  165. Weight1 = GET_DWORD_WEIGHT(pHashN, *ptr); \
  166. switch (GET_SCRIPT_MEMBER(&Weight1)) \
  167. { \
  168. case ( UNSORTABLE ): \
  169. { \
  170. break; \
  171. } \
  172. case ( NONSPACE_MARK ): \
  173. { \
  174. if ((!fIgnoreDiacritic) && (!WhichDiacritic)) \
  175. { \
  176. return (ret); \
  177. } \
  178. break; \
  179. } \
  180. case ( PUNCTUATION ) : \
  181. case ( SYMBOL_1 ) : \
  182. case ( SYMBOL_2 ) : \
  183. case ( SYMBOL_3 ) : \
  184. case ( SYMBOL_4 ) : \
  185. case ( SYMBOL_5 ) : \
  186. { \
  187. if (!fIgnoreSymbol) \
  188. { \
  189. return (ret); \
  190. } \
  191. break; \
  192. } \
  193. case ( EXPANSION ) : \
  194. case ( FAREAST_SPECIAL ) : \
  195. case ( JAMO_SPECIAL ) : \
  196. case ( EXTENSION_A ) : \
  197. default : \
  198. { \
  199. return (ret); \
  200. } \
  201. } \
  202. \
  203. /* \
  204. * Advance pointer and decrement counter. \
  205. */ \
  206. ptr++; \
  207. ct--; \
  208. } \
  209. \
  210. /* \
  211. * Need to check diacritic, case, extra, and special weights for \
  212. * final return value. Still could be equal if the longer part of \
  213. * the string contained only characters to be ignored. \
  214. * \
  215. * NOTE: The following checks MUST REMAIN IN THIS ORDER: \
  216. * Diacritic, Case, Extra, Punctuation. \
  217. */ \
  218. if (WhichDiacritic) \
  219. { \
  220. return (WhichDiacritic); \
  221. } \
  222. if (WhichCase) \
  223. { \
  224. return (WhichCase); \
  225. } \
  226. if (WhichExtra) \
  227. { \
  228. if (!fIgnoreDiacritic) \
  229. { \
  230. if (GET_WT_FOUR(&WhichExtra)) \
  231. { \
  232. return (GET_WT_FOUR(&WhichExtra)); \
  233. } \
  234. if (GET_WT_FIVE(&WhichExtra)) \
  235. { \
  236. return (GET_WT_FIVE(&WhichExtra)); \
  237. } \
  238. } \
  239. if (GET_WT_SIX(&WhichExtra)) \
  240. { \
  241. return (GET_WT_SIX(&WhichExtra)); \
  242. } \
  243. if (GET_WT_SEVEN(&WhichExtra)) \
  244. { \
  245. return (GET_WT_SEVEN(&WhichExtra)); \
  246. } \
  247. } \
  248. if (WhichJamo) \
  249. { \
  250. return (WhichJamo); \
  251. } \
  252. if (WhichPunct1) \
  253. { \
  254. return (WhichPunct1); \
  255. } \
  256. if (WhichPunct2) \
  257. { \
  258. return (WhichPunct2); \
  259. } \
  260. \
  261. return (CSTR_EQUAL); \
  262. }
  263. ////////////////////////////////////////////////////////////////////////////
  264. //
  265. // QUICK_SCAN_LONGER_STRING
  266. //
  267. // Scans the longer string for diacritic, case, and special weights.
  268. // Assumes that both strings are null-terminated.
  269. //
  270. // 11-04-92 JulieB Created.
  271. ////////////////////////////////////////////////////////////////////////////
  272. #define QUICK_SCAN_LONGER_STRING( ptr, \
  273. ret ) \
  274. { \
  275. /* \
  276. * Search through the rest of the longer string to make sure \
  277. * all characters are not to be ignored. If find a character that \
  278. * should not be ignored, return the given return value immediately. \
  279. * \
  280. * The only exception to this is when a nonspace mark is found. If \
  281. * another DW difference has been found earlier, then use that. \
  282. */ \
  283. while (*ptr != 0) \
  284. { \
  285. switch (GET_SCRIPT_MEMBER(&(pHashN->pSortkey[*ptr]))) \
  286. { \
  287. case ( UNSORTABLE ): \
  288. { \
  289. break; \
  290. } \
  291. case ( NONSPACE_MARK ): \
  292. { \
  293. if (!WhichDiacritic) \
  294. { \
  295. return (ret); \
  296. } \
  297. break; \
  298. } \
  299. default : \
  300. { \
  301. return (ret); \
  302. } \
  303. } \
  304. \
  305. /* \
  306. * Advance pointer. \
  307. */ \
  308. ptr++; \
  309. } \
  310. \
  311. /* \
  312. * Need to check diacritic, case, extra, and special weights for \
  313. * final return value. Still could be equal if the longer part of \
  314. * the string contained only unsortable characters. \
  315. * \
  316. * NOTE: The following checks MUST REMAIN IN THIS ORDER: \
  317. * Diacritic, Case, Extra, Punctuation. \
  318. */ \
  319. if (WhichDiacritic) \
  320. { \
  321. return (WhichDiacritic); \
  322. } \
  323. if (WhichCase) \
  324. { \
  325. return (WhichCase); \
  326. } \
  327. if (WhichExtra) \
  328. { \
  329. if (GET_WT_FOUR(&WhichExtra)) \
  330. { \
  331. return (GET_WT_FOUR(&WhichExtra)); \
  332. } \
  333. if (GET_WT_FIVE(&WhichExtra)) \
  334. { \
  335. return (GET_WT_FIVE(&WhichExtra)); \
  336. } \
  337. if (GET_WT_SIX(&WhichExtra)) \
  338. { \
  339. return (GET_WT_SIX(&WhichExtra)); \
  340. } \
  341. if (GET_WT_SEVEN(&WhichExtra)) \
  342. { \
  343. return (GET_WT_SEVEN(&WhichExtra)); \
  344. } \
  345. } \
  346. if (WhichJamo) \
  347. { \
  348. return (WhichJamo); \
  349. } \
  350. if (WhichPunct1) \
  351. { \
  352. return (WhichPunct1); \
  353. } \
  354. if (WhichPunct2) \
  355. { \
  356. return (WhichPunct2); \
  357. } \
  358. \
  359. return (CSTR_EQUAL); \
  360. }
  361. ////////////////////////////////////////////////////////////////////////////
  362. //
  363. // GET_FAREAST_WEIGHT
  364. //
  365. // Returns the weight for the far east special case in "wt". This currently
  366. // includes the Cho-on, the Repeat, and the Kana characters.
  367. //
  368. // 08-19-93 JulieB Created.
  369. ////////////////////////////////////////////////////////////////////////////
  370. #define GET_FAREAST_WEIGHT( wt, \
  371. uw, \
  372. mask, \
  373. pBegin, \
  374. pCur, \
  375. ExtraWt, \
  376. fModify ) \
  377. { \
  378. int ct; /* loop counter */ \
  379. BYTE PrevSM; /* previous script member value */ \
  380. BYTE PrevAW; /* previous alphanumeric value */ \
  381. BYTE PrevCW; /* previous case value */ \
  382. BYTE AW; /* alphanumeric value */ \
  383. BYTE CW; /* case value */ \
  384. DWORD PrevWt; /* previous weight */ \
  385. \
  386. \
  387. /* \
  388. * Get the alphanumeric weight and the case weight of the \
  389. * current code point. \
  390. */ \
  391. AW = GET_ALPHA_NUMERIC(&wt); \
  392. CW = GET_CASE(&wt); \
  393. ExtraWt = (DWORD)0; \
  394. \
  395. /* \
  396. * Special case Repeat and Cho-On. \
  397. * AW = 0 => Repeat \
  398. * AW = 1 => Cho-On \
  399. * AW = 2+ => Kana \
  400. */ \
  401. if (AW <= MAX_SPECIAL_AW) \
  402. { \
  403. /* \
  404. * If the script member of the previous character is \
  405. * invalid, then give the special character an \
  406. * invalid weight (highest possible weight) so that it \
  407. * will sort AFTER everything else. \
  408. */ \
  409. ct = 1; \
  410. PrevWt = CMP_INVALID_FAREAST; \
  411. while ((pCur - ct) >= pBegin) \
  412. { \
  413. PrevWt = GET_DWORD_WEIGHT(pHashN, *(pCur - ct)); \
  414. PrevWt &= mask; \
  415. PrevSM = GET_SCRIPT_MEMBER(&PrevWt); \
  416. if (PrevSM < FAREAST_SPECIAL) \
  417. { \
  418. if (PrevSM == EXPANSION) \
  419. { \
  420. PrevWt = CMP_INVALID_FAREAST; \
  421. } \
  422. else \
  423. { \
  424. /* \
  425. * UNSORTABLE or NONSPACE_MARK. \
  426. * \
  427. * Just ignore these, since we only care about the \
  428. * previous UW value. \
  429. */ \
  430. PrevWt = CMP_INVALID_FAREAST; \
  431. ct++; \
  432. continue; \
  433. } \
  434. } \
  435. else if (PrevSM == FAREAST_SPECIAL) \
  436. { \
  437. PrevAW = GET_ALPHA_NUMERIC(&PrevWt); \
  438. if (PrevAW <= MAX_SPECIAL_AW) \
  439. { \
  440. /* \
  441. * Handle case where two special chars follow \
  442. * each other. Keep going back in the string. \
  443. */ \
  444. PrevWt = CMP_INVALID_FAREAST; \
  445. ct++; \
  446. continue; \
  447. } \
  448. \
  449. UNICODE_WT(&PrevWt) = \
  450. MAKE_UNICODE_WT(KANA, PrevAW, fModify); \
  451. \
  452. /* \
  453. * Only build weights 4, 5, 6, and 7 if the \
  454. * previous character is KANA. \
  455. * \
  456. * Always: \
  457. * 4W = previous CW & ISOLATE_SMALL \
  458. * 6W = previous CW & ISOLATE_KANA \
  459. * \
  460. */ \
  461. PrevCW = GET_CASE(&PrevWt); \
  462. GET_WT_FOUR(&ExtraWt) = PrevCW & ISOLATE_SMALL; \
  463. GET_WT_SIX(&ExtraWt) = PrevCW & ISOLATE_KANA; \
  464. \
  465. if (AW == AW_REPEAT) \
  466. { \
  467. /* \
  468. * Repeat: \
  469. * UW = previous UW \
  470. * 5W = WT_FIVE_REPEAT \
  471. * 7W = previous CW & ISOLATE_WIDTH \
  472. */ \
  473. uw = UNICODE_WT(&PrevWt); \
  474. GET_WT_FIVE(&ExtraWt) = WT_FIVE_REPEAT; \
  475. GET_WT_SEVEN(&ExtraWt) = PrevCW & ISOLATE_WIDTH; \
  476. } \
  477. else \
  478. { \
  479. /* \
  480. * Cho-On: \
  481. * UW = previous UW & CHO_ON_UW_MASK \
  482. * 5W = WT_FIVE_CHO_ON \
  483. * 7W = current CW & ISOLATE_WIDTH \
  484. */ \
  485. uw = UNICODE_WT(&PrevWt) & CHO_ON_UW_MASK; \
  486. GET_WT_FIVE(&ExtraWt) = WT_FIVE_CHO_ON; \
  487. GET_WT_SEVEN(&ExtraWt) = CW & ISOLATE_WIDTH; \
  488. } \
  489. } \
  490. else \
  491. { \
  492. uw = GET_UNICODE_MOD(&PrevWt, fModify); \
  493. } \
  494. \
  495. break; \
  496. } \
  497. } \
  498. else \
  499. { \
  500. /* \
  501. * Kana: \
  502. * SM = KANA \
  503. * AW = current AW \
  504. * 4W = current CW & ISOLATE_SMALL \
  505. * 5W = WT_FIVE_KANA \
  506. * 6W = current CW & ISOLATE_KANA \
  507. * 7W = current CW & ISOLATE_WIDTH \
  508. */ \
  509. uw = MAKE_UNICODE_WT(KANA, AW, fModify); \
  510. GET_WT_FOUR(&ExtraWt) = CW & ISOLATE_SMALL; \
  511. GET_WT_FIVE(&ExtraWt) = WT_FIVE_KANA; \
  512. GET_WT_SIX(&ExtraWt) = CW & ISOLATE_KANA; \
  513. GET_WT_SEVEN(&ExtraWt) = CW & ISOLATE_WIDTH; \
  514. } \
  515. \
  516. /* \
  517. * Get the weight for the far east special case and store it in wt. \
  518. */ \
  519. if ((AW > MAX_SPECIAL_AW) || (PrevWt != CMP_INVALID_FAREAST)) \
  520. { \
  521. /* \
  522. * Always: \
  523. * DW = current DW \
  524. * CW = minimum CW \
  525. */ \
  526. UNICODE_WT(&wt) = uw; \
  527. CASE_WT(&wt) = MIN_CW; \
  528. } \
  529. else \
  530. { \
  531. uw = CMP_INVALID_UW; \
  532. wt = CMP_INVALID_FAREAST; \
  533. ExtraWt = 0; \
  534. } \
  535. }
  536. //-------------------------------------------------------------------------//
  537. // API ROUTINES //
  538. //-------------------------------------------------------------------------//
  539. ////////////////////////////////////////////////////////////////////////////
  540. //
  541. // CompareStringW
  542. //
  543. // Compares two wide character strings of the same locale according to the
  544. // supplied locale handle.
  545. //
  546. // 05-31-91 JulieB Created.
  547. ////////////////////////////////////////////////////////////////////////////
  548. int WINAPI CompareStringW(
  549. LCID Locale,
  550. DWORD dwCmpFlags,
  551. LPCWSTR lpString1,
  552. int cchCount1,
  553. LPCWSTR lpString2,
  554. int cchCount2)
  555. {
  556. register LPWSTR pString1; // ptr to go thru string 1
  557. register LPWSTR pString2; // ptr to go thru string 2
  558. PLOC_HASH pHashN; // ptr to LOC hash node
  559. BOOL fIgnorePunct; // flag to ignore punctuation (not symbol)
  560. BOOL fModify; // flag to use modified script member weights
  561. DWORD State; // state table
  562. DWORD Mask; // mask for weights
  563. DWORD Weight1; // full weight of char - string 1
  564. DWORD Weight2; // full weight of char - string 2
  565. int JamoFlag = FALSE;
  566. LPCWSTR pLastJamo = lpString1;
  567. int WhichDiacritic; // DW => 1 = str1 smaller, 3 = str2 smaller
  568. int WhichCase; // CW => 1 = str1 smaller, 3 = str2 smaller
  569. int WhichJamo; // XW for Jamo
  570. int WhichPunct1; // SW => 1 = str1 smaller, 3 = str2 smaller
  571. int WhichPunct2; // SW => 1 = str1 smaller, 3 = str2 smaller
  572. LPWSTR pSave1; // ptr to saved pString1
  573. LPWSTR pSave2; // ptr to saved pString2
  574. int cExpChar1, cExpChar2; // ct of expansions in tmp
  575. DWORD ExtraWt1, ExtraWt2; // extra weight values (for far east)
  576. DWORD WhichExtra; // XW => wts 4, 5, 6, 7 (for far east)
  577. //
  578. // Invalid Parameter Check:
  579. // - validate LCID
  580. // - either string is null
  581. //
  582. VALIDATE_LANGUAGE(Locale, pHashN, 0, TRUE);
  583. if ((pHashN == NULL) ||
  584. (lpString1 == NULL) || (lpString2 == NULL))
  585. {
  586. SetLastError(ERROR_INVALID_PARAMETER);
  587. return (0);
  588. }
  589. //
  590. // Make sure the appropriate sorting tables are available. If not,
  591. // return an error.
  592. //
  593. if ((pHashN->pSortkey == NULL) ||
  594. (pHashN->IfIdeographFailure == TRUE))
  595. {
  596. KdPrint(("NLSAPI: Appropriate Sorting Tables Not Loaded.\n"));
  597. SetLastError(ERROR_FILE_NOT_FOUND);
  598. return (0);
  599. }
  600. //
  601. // Call longer compare string if any of the following is true:
  602. // - compression locale
  603. // - either count is not -1
  604. // - dwCmpFlags is not 0 or ignore case (see NOTE below)
  605. // - locale is Korean - script member weight adjustment needed
  606. //
  607. // NOTE: If the value of NORM_IGNORECASE ever changes, this
  608. // code should check for:
  609. // ( (dwCmpFlags != 0) && (dwCmpFlags != NORM_IGNORECASE) )
  610. // Since NORM_IGNORECASE is equal to 1, we can optimize this
  611. // by checking for > 1.
  612. //
  613. dwCmpFlags &= (~LOCALE_USE_CP_ACP);
  614. fModify = IS_KOREAN(Locale);
  615. if ( (pHashN->IfCompression) ||
  616. (cchCount1 > -1) || (cchCount2 > -1) ||
  617. (dwCmpFlags > NORM_IGNORECASE) ||
  618. (fModify == TRUE) )
  619. {
  620. return (LongCompareStringW( pHashN,
  621. dwCmpFlags,
  622. lpString1,
  623. ((cchCount1 <= -1) ? -2 : cchCount1),
  624. lpString2,
  625. ((cchCount2 <= -1) ? -2 : cchCount2),
  626. fModify ));
  627. }
  628. //
  629. // Initialize string pointers.
  630. //
  631. pString1 = (LPWSTR)lpString1;
  632. pString2 = (LPWSTR)lpString2;
  633. //
  634. // Do a wchar by wchar compare.
  635. //
  636. while (TRUE)
  637. {
  638. //
  639. // See if characters are equal.
  640. // If characters are equal, increment pointers and continue
  641. // string compare.
  642. //
  643. // NOTE: Loop is unrolled 8 times for performance.
  644. //
  645. if ((*pString1 != *pString2) || (*pString1 == 0))
  646. {
  647. break;
  648. }
  649. pString1++;
  650. pString2++;
  651. if ((*pString1 != *pString2) || (*pString1 == 0))
  652. {
  653. break;
  654. }
  655. pString1++;
  656. pString2++;
  657. if ((*pString1 != *pString2) || (*pString1 == 0))
  658. {
  659. break;
  660. }
  661. pString1++;
  662. pString2++;
  663. if ((*pString1 != *pString2) || (*pString1 == 0))
  664. {
  665. break;
  666. }
  667. pString1++;
  668. pString2++;
  669. if ((*pString1 != *pString2) || (*pString1 == 0))
  670. {
  671. break;
  672. }
  673. pString1++;
  674. pString2++;
  675. if ((*pString1 != *pString2) || (*pString1 == 0))
  676. {
  677. break;
  678. }
  679. pString1++;
  680. pString2++;
  681. if ((*pString1 != *pString2) || (*pString1 == 0))
  682. {
  683. break;
  684. }
  685. pString1++;
  686. pString2++;
  687. if ((*pString1 != *pString2) || (*pString1 == 0))
  688. {
  689. break;
  690. }
  691. pString1++;
  692. pString2++;
  693. }
  694. //
  695. // If strings are both at null terminators, return equal.
  696. //
  697. if (*pString1 == *pString2)
  698. {
  699. return (CSTR_EQUAL);
  700. }
  701. //
  702. // Initialize flags, pointers, and counters.
  703. //
  704. fIgnorePunct = FALSE;
  705. WhichDiacritic = 0;
  706. WhichCase = 0;
  707. WhichJamo = 0;
  708. WhichPunct1 = 0;
  709. WhichPunct2 = 0;
  710. pSave1 = NULL;
  711. pSave2 = NULL;
  712. ExtraWt1 = (DWORD)0;
  713. WhichExtra = (DWORD)0;
  714. //
  715. // Switch on the different flag options. This will speed up
  716. // the comparisons of two strings that are different.
  717. //
  718. // The only two possibilities in this optimized section are
  719. // no flags and the ignore case flag.
  720. //
  721. if (dwCmpFlags == 0)
  722. {
  723. Mask = CMP_MASKOFF_NONE;
  724. }
  725. else
  726. {
  727. Mask = CMP_MASKOFF_CW;
  728. }
  729. State = (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW;
  730. State |= (STATE_CW | STATE_JAMO_WEIGHT);
  731. //
  732. // Compare each character's sortkey weight in the two strings.
  733. //
  734. while ((*pString1 != 0) && (*pString2 != 0))
  735. {
  736. Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1);
  737. Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
  738. Weight1 &= Mask;
  739. Weight2 &= Mask;
  740. if (Weight1 != Weight2)
  741. {
  742. BYTE sm1 = GET_SCRIPT_MEMBER(&Weight1); // script member 1
  743. BYTE sm2 = GET_SCRIPT_MEMBER(&Weight2); // script member 2
  744. WORD uw1 = GET_UNICODE_SM(&Weight1, sm1); // unicode weight 1
  745. WORD uw2 = GET_UNICODE_SM(&Weight2, sm2); // unicode weight 2
  746. BYTE dw1; // diacritic weight 1
  747. BYTE dw2; // diacritic weight 2
  748. BOOL fContinue; // flag to continue loop
  749. DWORD Wt; // temp weight holder
  750. WCHAR pTmpBuf1[MAX_TBL_EXPANSION]; // temp buffer for exp 1
  751. WCHAR pTmpBuf2[MAX_TBL_EXPANSION]; // temp buffer for exp 2
  752. //
  753. // If Unicode Weights are different and no special cases,
  754. // then we're done. Otherwise, we need to do extra checking.
  755. //
  756. // Must check ENTIRE string for any possibility of Unicode Weight
  757. // differences. As soon as a Unicode Weight difference is found,
  758. // then we're done. If no UW difference is found, then the
  759. // first Diacritic Weight difference is used. If no DW difference
  760. // is found, then use the first Case Difference. If no CW
  761. // difference is found, then use the first Extra Weight
  762. // difference. If no XW difference is found, then use the first
  763. // Special Weight difference.
  764. //
  765. if ((uw1 != uw2) ||
  766. (sm1 == FAREAST_SPECIAL) ||
  767. (sm1 == EXTENSION_A))
  768. {
  769. //
  770. // Initialize the continue flag.
  771. //
  772. fContinue = FALSE;
  773. //
  774. // Check for Unsortable characters and skip them.
  775. // This needs to be outside the switch statement. If EITHER
  776. // character is unsortable, must skip it and start over.
  777. //
  778. if (sm1 == UNSORTABLE)
  779. {
  780. pString1++;
  781. fContinue = TRUE;
  782. }
  783. if (sm2 == UNSORTABLE)
  784. {
  785. pString2++;
  786. fContinue = TRUE;
  787. }
  788. if (fContinue)
  789. {
  790. continue;
  791. }
  792. //
  793. // Switch on the script member of string 1 and take care
  794. // of any special cases.
  795. //
  796. switch (sm1)
  797. {
  798. case ( NONSPACE_MARK ) :
  799. {
  800. //
  801. // Nonspace only - look at diacritic weight only.
  802. //
  803. if ((WhichDiacritic == 0) ||
  804. (State & STATE_REVERSE_DW))
  805. {
  806. WhichDiacritic = CSTR_GREATER_THAN;
  807. //
  808. // Remove state from state machine.
  809. //
  810. REMOVE_STATE(STATE_DW);
  811. }
  812. //
  813. // Adjust pointer and set flags.
  814. //
  815. pString1++;
  816. fContinue = TRUE;
  817. break;
  818. }
  819. case ( PUNCTUATION ) :
  820. {
  821. //
  822. // If the ignore punctuation flag is set, then skip
  823. // over the punctuation.
  824. //
  825. if (fIgnorePunct)
  826. {
  827. pString1++;
  828. fContinue = TRUE;
  829. }
  830. else if (sm2 != PUNCTUATION)
  831. {
  832. //
  833. // The character in the second string is
  834. // NOT punctuation.
  835. //
  836. if (WhichPunct2)
  837. {
  838. //
  839. // Set WP 2 to show that string 2 is smaller,
  840. // since a punctuation char had already been
  841. // found at an earlier position in string 2.
  842. //
  843. // Set the Ignore Punctuation flag so we just
  844. // skip over any other punctuation chars in
  845. // the string.
  846. //
  847. WhichPunct2 = CSTR_GREATER_THAN;
  848. fIgnorePunct = TRUE;
  849. }
  850. else
  851. {
  852. //
  853. // Set WP 1 to show that string 2 is smaller,
  854. // and that string 1 has had a punctuation
  855. // char - since no punctuation chars have
  856. // been found in string 2.
  857. //
  858. WhichPunct1 = CSTR_GREATER_THAN;
  859. }
  860. //
  861. // Advance pointer 1, and set flag to true.
  862. //
  863. pString1++;
  864. fContinue = TRUE;
  865. }
  866. //
  867. // Do NOT want to advance the pointer in string 1 if
  868. // string 2 is also a punctuation char. This will
  869. // be done later.
  870. //
  871. break;
  872. }
  873. case ( EXPANSION ) :
  874. {
  875. //
  876. // Save pointer in pString1 so that it can be
  877. // restored.
  878. //
  879. if (pSave1 == NULL)
  880. {
  881. pSave1 = pString1;
  882. }
  883. pString1 = pTmpBuf1;
  884. //
  885. // Expand character into temporary buffer.
  886. //
  887. pTmpBuf1[0] = GET_EXPANSION_1(&Weight1);
  888. pTmpBuf1[1] = GET_EXPANSION_2(&Weight1);
  889. //
  890. // Set cExpChar1 to the number of expansion characters
  891. // stored.
  892. //
  893. cExpChar1 = MAX_TBL_EXPANSION;
  894. fContinue = TRUE;
  895. break;
  896. }
  897. case ( FAREAST_SPECIAL ) :
  898. {
  899. if (sm2 != EXPANSION)
  900. {
  901. //
  902. // Get the weight for the far east special case
  903. // and store it in Weight1.
  904. //
  905. GET_FAREAST_WEIGHT( Weight1,
  906. uw1,
  907. Mask,
  908. lpString1,
  909. pString1,
  910. ExtraWt1,
  911. FALSE );
  912. if (sm2 != FAREAST_SPECIAL)
  913. {
  914. //
  915. // The character in the second string is
  916. // NOT a fareast special char.
  917. //
  918. // Set each of weights 4, 5, 6, and 7 to show
  919. // that string 2 is smaller (if not already set).
  920. //
  921. if ((GET_WT_FOUR(&WhichExtra) == 0) &&
  922. (GET_WT_FOUR(&ExtraWt1) != 0))
  923. {
  924. GET_WT_FOUR(&WhichExtra) = CSTR_GREATER_THAN;
  925. }
  926. if ((GET_WT_FIVE(&WhichExtra) == 0) &&
  927. (GET_WT_FIVE(&ExtraWt1) != 0))
  928. {
  929. GET_WT_FIVE(&WhichExtra) = CSTR_GREATER_THAN;
  930. }
  931. if ((GET_WT_SIX(&WhichExtra) == 0) &&
  932. (GET_WT_SIX(&ExtraWt1) != 0))
  933. {
  934. GET_WT_SIX(&WhichExtra) = CSTR_GREATER_THAN;
  935. }
  936. if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
  937. (GET_WT_SEVEN(&ExtraWt1) != 0))
  938. {
  939. GET_WT_SEVEN(&WhichExtra) = CSTR_GREATER_THAN;
  940. }
  941. }
  942. }
  943. break;
  944. }
  945. case ( JAMO_SPECIAL ) :
  946. {
  947. int ctr1; // dummy variables for FindJamoDifference
  948. LPWSTR pStr1 = pString1;
  949. LPWSTR pStr2 = pString2;
  950. //
  951. // Set the JamoFlag so we don't handle it again.
  952. //
  953. JamoFlag = TRUE;
  954. fContinue = FindJamoDifference(
  955. pHashN,
  956. &pStr1, &ctr1, -2, &Weight1,
  957. &pStr2, &ctr1, -2, &Weight2,
  958. &pLastJamo,
  959. &uw1, &uw2,
  960. &State,
  961. &WhichJamo,
  962. fModify );
  963. if (WhichJamo)
  964. {
  965. return (WhichJamo);
  966. }
  967. pString1 = pStr1;
  968. pString2 = pStr2;
  969. break;
  970. }
  971. case ( EXTENSION_A ) :
  972. {
  973. //
  974. // Compare the weights.
  975. //
  976. if (Weight1 == Weight2)
  977. {
  978. //
  979. // Adjust pointers and set flag.
  980. //
  981. pString1++; pString2++;
  982. fContinue = TRUE;
  983. }
  984. else
  985. {
  986. //
  987. // Get the actual UW to compare.
  988. //
  989. if (sm2 == EXTENSION_A)
  990. {
  991. //
  992. // Set the UW values to be the AW and DW since
  993. // both strings contain an extension A char.
  994. //
  995. uw1 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight1),
  996. GET_DIACRITIC(&Weight1),
  997. FALSE );
  998. uw2 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight2),
  999. GET_DIACRITIC(&Weight2),
  1000. FALSE );
  1001. }
  1002. else
  1003. {
  1004. //
  1005. // Only string1 contains an extension A char,
  1006. // so set the UW value to be the first UW
  1007. // value for extension A (default values):
  1008. // SM_EXT_A, AW_EXT_A
  1009. //
  1010. uw1 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
  1011. }
  1012. }
  1013. break;
  1014. }
  1015. case ( UNSORTABLE ) :
  1016. {
  1017. //
  1018. // Fill out the case statement so the compiler
  1019. // will use a jump table.
  1020. //
  1021. break;
  1022. }
  1023. }
  1024. //
  1025. // Switch on the script member of string 2 and take care
  1026. // of any special cases.
  1027. //
  1028. switch (sm2)
  1029. {
  1030. case ( NONSPACE_MARK ) :
  1031. {
  1032. //
  1033. // Nonspace only - look at diacritic weight only.
  1034. //
  1035. if ((WhichDiacritic == 0) ||
  1036. (State & STATE_REVERSE_DW))
  1037. {
  1038. WhichDiacritic = CSTR_LESS_THAN;
  1039. //
  1040. // Remove state from state machine.
  1041. //
  1042. REMOVE_STATE(STATE_DW);
  1043. }
  1044. //
  1045. // Adjust pointer and set flags.
  1046. //
  1047. pString2++;
  1048. fContinue = TRUE;
  1049. break;
  1050. }
  1051. case ( PUNCTUATION ) :
  1052. {
  1053. //
  1054. // If the ignore punctuation flag is set, then skip
  1055. // over the punctuation.
  1056. //
  1057. if (fIgnorePunct)
  1058. {
  1059. //
  1060. // Pointer 2 will be advanced after if-else
  1061. // statement.
  1062. //
  1063. ;
  1064. }
  1065. else if (sm1 != PUNCTUATION)
  1066. {
  1067. //
  1068. // The character in the first string is
  1069. // NOT punctuation.
  1070. //
  1071. if (WhichPunct1)
  1072. {
  1073. //
  1074. // Set WP 1 to show that string 1 is smaller,
  1075. // since a punctuation char had already
  1076. // been found at an earlier position in
  1077. // string 1.
  1078. //
  1079. // Set the Ignore Punctuation flag so we just
  1080. // skip over any other punctuation in the
  1081. // string.
  1082. //
  1083. WhichPunct1 = CSTR_LESS_THAN;
  1084. fIgnorePunct = TRUE;
  1085. }
  1086. else
  1087. {
  1088. //
  1089. // Set WP 2 to show that string 1 is smaller,
  1090. // and that string 2 has had a punctuation
  1091. // char - since no punctuation chars have
  1092. // been found in string 1.
  1093. //
  1094. WhichPunct2 = CSTR_LESS_THAN;
  1095. }
  1096. //
  1097. // Pointer 2 will be advanced after if-else
  1098. // statement.
  1099. //
  1100. }
  1101. else
  1102. {
  1103. //
  1104. // Both code points are punctuation.
  1105. //
  1106. // See if either of the strings has encountered
  1107. // punctuation chars previous to this.
  1108. //
  1109. if (WhichPunct1)
  1110. {
  1111. //
  1112. // String 1 has had a punctuation char, so
  1113. // it should be the smaller string (since
  1114. // both have punctuation chars).
  1115. //
  1116. WhichPunct1 = CSTR_LESS_THAN;
  1117. }
  1118. else if (WhichPunct2)
  1119. {
  1120. //
  1121. // String 2 has had a punctuation char, so
  1122. // it should be the smaller string (since
  1123. // both have punctuation chars).
  1124. //
  1125. WhichPunct2 = CSTR_GREATER_THAN;
  1126. }
  1127. else
  1128. {
  1129. //
  1130. // Position is the same, so compare the
  1131. // special weights. Set WhichPunct1 to
  1132. // the smaller special weight.
  1133. //
  1134. WhichPunct1 = (((GET_ALPHA_NUMERIC(&Weight1) <
  1135. GET_ALPHA_NUMERIC(&Weight2)))
  1136. ? CSTR_LESS_THAN
  1137. : CSTR_GREATER_THAN);
  1138. }
  1139. //
  1140. // Set the Ignore Punctuation flag so we just
  1141. // skip over any other punctuation in the string.
  1142. //
  1143. fIgnorePunct = TRUE;
  1144. //
  1145. // Advance pointer 1. Pointer 2 will be
  1146. // advanced after if-else statement.
  1147. //
  1148. pString1++;
  1149. }
  1150. //
  1151. // Advance pointer 2 and set flag to true.
  1152. //
  1153. pString2++;
  1154. fContinue = TRUE;
  1155. break;
  1156. }
  1157. case ( EXPANSION ) :
  1158. {
  1159. //
  1160. // Save pointer in pString1 so that it can be
  1161. // restored.
  1162. //
  1163. if (pSave2 == NULL)
  1164. {
  1165. pSave2 = pString2;
  1166. }
  1167. pString2 = pTmpBuf2;
  1168. //
  1169. // Expand character into temporary buffer.
  1170. //
  1171. pTmpBuf2[0] = GET_EXPANSION_1(&Weight2);
  1172. pTmpBuf2[1] = GET_EXPANSION_2(&Weight2);
  1173. //
  1174. // Set cExpChar2 to the number of expansion characters
  1175. // stored.
  1176. //
  1177. cExpChar2 = MAX_TBL_EXPANSION;
  1178. fContinue = TRUE;
  1179. break;
  1180. }
  1181. case ( FAREAST_SPECIAL ) :
  1182. {
  1183. if (sm1 != EXPANSION)
  1184. {
  1185. //
  1186. // Get the weight for the far east special case
  1187. // and store it in Weight2.
  1188. //
  1189. GET_FAREAST_WEIGHT( Weight2,
  1190. uw2,
  1191. Mask,
  1192. lpString2,
  1193. pString2,
  1194. ExtraWt2,
  1195. FALSE );
  1196. if (sm1 != FAREAST_SPECIAL)
  1197. {
  1198. //
  1199. // The character in the first string is
  1200. // NOT a fareast special char.
  1201. //
  1202. // Set each of weights 4, 5, 6, and 7 to show
  1203. // that string 1 is smaller (if not already set).
  1204. //
  1205. if ((GET_WT_FOUR(&WhichExtra) == 0) &&
  1206. (GET_WT_FOUR(&ExtraWt2) != 0))
  1207. {
  1208. GET_WT_FOUR(&WhichExtra) = CSTR_LESS_THAN;
  1209. }
  1210. if ((GET_WT_FIVE(&WhichExtra) == 0) &&
  1211. (GET_WT_FIVE(&ExtraWt2) != 0))
  1212. {
  1213. GET_WT_FIVE(&WhichExtra) = CSTR_LESS_THAN;
  1214. }
  1215. if ((GET_WT_SIX(&WhichExtra) == 0) &&
  1216. (GET_WT_SIX(&ExtraWt2) != 0))
  1217. {
  1218. GET_WT_SIX(&WhichExtra) = CSTR_LESS_THAN;
  1219. }
  1220. if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
  1221. (GET_WT_SEVEN(&ExtraWt2) != 0))
  1222. {
  1223. GET_WT_SEVEN(&WhichExtra) = CSTR_LESS_THAN;
  1224. }
  1225. }
  1226. else
  1227. {
  1228. //
  1229. // Characters in both strings are fareast
  1230. // special chars.
  1231. //
  1232. // Set each of weights 4, 5, 6, and 7
  1233. // appropriately (if not already set).
  1234. //
  1235. if ( (GET_WT_FOUR(&WhichExtra) == 0) &&
  1236. ( GET_WT_FOUR(&ExtraWt1) !=
  1237. GET_WT_FOUR(&ExtraWt2) ) )
  1238. {
  1239. GET_WT_FOUR(&WhichExtra) =
  1240. ( GET_WT_FOUR(&ExtraWt1) <
  1241. GET_WT_FOUR(&ExtraWt2) )
  1242. ? CSTR_LESS_THAN
  1243. : CSTR_GREATER_THAN;
  1244. }
  1245. if ( (GET_WT_FIVE(&WhichExtra) == 0) &&
  1246. ( GET_WT_FIVE(&ExtraWt1) !=
  1247. GET_WT_FIVE(&ExtraWt2) ) )
  1248. {
  1249. GET_WT_FIVE(&WhichExtra) =
  1250. ( GET_WT_FIVE(&ExtraWt1) <
  1251. GET_WT_FIVE(&ExtraWt2) )
  1252. ? CSTR_LESS_THAN
  1253. : CSTR_GREATER_THAN;
  1254. }
  1255. if ( (GET_WT_SIX(&WhichExtra) == 0) &&
  1256. ( GET_WT_SIX(&ExtraWt1) !=
  1257. GET_WT_SIX(&ExtraWt2) ) )
  1258. {
  1259. GET_WT_SIX(&WhichExtra) =
  1260. ( GET_WT_SIX(&ExtraWt1) <
  1261. GET_WT_SIX(&ExtraWt2) )
  1262. ? CSTR_LESS_THAN
  1263. : CSTR_GREATER_THAN;
  1264. }
  1265. if ( (GET_WT_SEVEN(&WhichExtra) == 0) &&
  1266. ( GET_WT_SEVEN(&ExtraWt1) !=
  1267. GET_WT_SEVEN(&ExtraWt2) ) )
  1268. {
  1269. GET_WT_SEVEN(&WhichExtra) =
  1270. ( GET_WT_SEVEN(&ExtraWt1) <
  1271. GET_WT_SEVEN(&ExtraWt2) )
  1272. ? CSTR_LESS_THAN
  1273. : CSTR_GREATER_THAN;
  1274. }
  1275. }
  1276. }
  1277. break;
  1278. }
  1279. case ( JAMO_SPECIAL ) :
  1280. {
  1281. if (!JamoFlag)
  1282. {
  1283. int ctr1, ctr2; // dummy variables for FindJamoDifference
  1284. LPWSTR pStr1 = pString1;
  1285. LPWSTR pStr2 = pString2;
  1286. //
  1287. // Set the JamoFlag so we don't handle it again.
  1288. //
  1289. JamoFlag = TRUE;
  1290. fContinue = FindJamoDifference(
  1291. pHashN,
  1292. &pStr1, &ctr1, -2, &Weight1,
  1293. &pStr2, &ctr2, -2, &Weight2,
  1294. &pLastJamo,
  1295. &uw1, &uw2,
  1296. &State,
  1297. &WhichJamo,
  1298. fModify );
  1299. if (WhichJamo)
  1300. {
  1301. return (WhichJamo);
  1302. }
  1303. pString1 = pStr1;
  1304. pString2 = pStr2;
  1305. }
  1306. else
  1307. {
  1308. JamoFlag = FALSE;
  1309. }
  1310. break;
  1311. }
  1312. case ( EXTENSION_A ) :
  1313. {
  1314. //
  1315. // If sm1 is an extension A character, then
  1316. // both sm1 and sm2 have been handled. We should
  1317. // only get here when either sm1 is not an
  1318. // extension A character or the two extension A
  1319. // characters are different.
  1320. //
  1321. if (sm1 != EXTENSION_A)
  1322. {
  1323. //
  1324. // Get the actual UW to compare.
  1325. //
  1326. // Only string2 contains an extension A char,
  1327. // so set the UW value to be the first UW
  1328. // value for extension A (default values):
  1329. // SM_EXT_A, AW_EXT_A
  1330. //
  1331. uw2 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
  1332. }
  1333. //
  1334. // We should then fall through to the comparison
  1335. // of the Unicode weights.
  1336. //
  1337. break;
  1338. }
  1339. case ( UNSORTABLE ) :
  1340. {
  1341. //
  1342. // Fill out the case statement so the compiler
  1343. // will use a jump table.
  1344. //
  1345. break;
  1346. }
  1347. }
  1348. //
  1349. // See if the comparison should start again.
  1350. //
  1351. if (fContinue)
  1352. {
  1353. continue;
  1354. }
  1355. //
  1356. // We're not supposed to drop down into the state table if
  1357. // unicode weights are different, so stop comparison and
  1358. // return result of unicode weight comparison.
  1359. //
  1360. if (uw1 != uw2)
  1361. {
  1362. return ((uw1 < uw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN);
  1363. }
  1364. }
  1365. //
  1366. // For each state in the state table, do the appropriate
  1367. // comparisons. (UW1 == UW2)
  1368. //
  1369. if (State & (STATE_DW | STATE_REVERSE_DW))
  1370. {
  1371. //
  1372. // Get the diacritic weights.
  1373. //
  1374. dw1 = GET_DIACRITIC(&Weight1);
  1375. dw2 = GET_DIACRITIC(&Weight2);
  1376. if (dw1 != dw2)
  1377. {
  1378. //
  1379. // Look ahead to see if diacritic follows a
  1380. // minimum diacritic weight. If so, get the
  1381. // diacritic weight of the nonspace mark.
  1382. //
  1383. while (*(pString1 + 1) != 0)
  1384. {
  1385. Wt = GET_DWORD_WEIGHT(pHashN, *(pString1 + 1));
  1386. if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
  1387. {
  1388. dw1 += GET_DIACRITIC(&Wt);
  1389. pString1++;
  1390. }
  1391. else
  1392. {
  1393. break;
  1394. }
  1395. }
  1396. while (*(pString2 + 1) != 0)
  1397. {
  1398. Wt = GET_DWORD_WEIGHT(pHashN, *(pString2 + 1));
  1399. if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
  1400. {
  1401. dw2 += GET_DIACRITIC(&Wt);
  1402. pString2++;
  1403. }
  1404. else
  1405. {
  1406. break;
  1407. }
  1408. }
  1409. //
  1410. // Save which string has the smaller diacritic
  1411. // weight if the diacritic weights are still
  1412. // different.
  1413. //
  1414. if (dw1 != dw2)
  1415. {
  1416. WhichDiacritic = (dw1 < dw2)
  1417. ? CSTR_LESS_THAN
  1418. : CSTR_GREATER_THAN;
  1419. //
  1420. // Remove state from state machine.
  1421. //
  1422. REMOVE_STATE(STATE_DW);
  1423. }
  1424. }
  1425. }
  1426. if (State & STATE_CW)
  1427. {
  1428. //
  1429. // Get the case weights.
  1430. //
  1431. if (GET_CASE(&Weight1) != GET_CASE(&Weight2))
  1432. {
  1433. //
  1434. // Save which string has the smaller case weight.
  1435. //
  1436. WhichCase = (GET_CASE(&Weight1) < GET_CASE(&Weight2))
  1437. ? CSTR_LESS_THAN
  1438. : CSTR_GREATER_THAN;
  1439. //
  1440. // Remove state from state machine.
  1441. //
  1442. REMOVE_STATE(STATE_CW);
  1443. }
  1444. }
  1445. }
  1446. //
  1447. // Fixup the pointers.
  1448. //
  1449. POINTER_FIXUP();
  1450. }
  1451. //
  1452. // If the end of BOTH strings has been reached, then the unicode
  1453. // weights match exactly. Check the diacritic, case and special
  1454. // weights. If all are zero, then return success. Otherwise,
  1455. // return the result of the weight difference.
  1456. //
  1457. // NOTE: The following checks MUST REMAIN IN THIS ORDER:
  1458. // Diacritic, Case, Punctuation.
  1459. //
  1460. if (*pString1 == 0)
  1461. {
  1462. if (*pString2 == 0)
  1463. {
  1464. if (WhichDiacritic)
  1465. {
  1466. return (WhichDiacritic);
  1467. }
  1468. if (WhichCase)
  1469. {
  1470. return (WhichCase);
  1471. }
  1472. if (WhichExtra)
  1473. {
  1474. if (GET_WT_FOUR(&WhichExtra))
  1475. {
  1476. return (GET_WT_FOUR(&WhichExtra));
  1477. }
  1478. if (GET_WT_FIVE(&WhichExtra))
  1479. {
  1480. return (GET_WT_FIVE(&WhichExtra));
  1481. }
  1482. if (GET_WT_SIX(&WhichExtra))
  1483. {
  1484. return (GET_WT_SIX(&WhichExtra));
  1485. }
  1486. if (GET_WT_SEVEN(&WhichExtra))
  1487. {
  1488. return (GET_WT_SEVEN(&WhichExtra));
  1489. }
  1490. }
  1491. if (WhichPunct1)
  1492. {
  1493. return (WhichPunct1);
  1494. }
  1495. if (WhichPunct2)
  1496. {
  1497. return (WhichPunct2);
  1498. }
  1499. return (CSTR_EQUAL);
  1500. }
  1501. else
  1502. {
  1503. //
  1504. // String 2 is longer.
  1505. //
  1506. pString1 = pString2;
  1507. }
  1508. }
  1509. //
  1510. // Scan to the end of the longer string.
  1511. //
  1512. QUICK_SCAN_LONGER_STRING( pString1,
  1513. ((*pString2 == 0)
  1514. ? CSTR_GREATER_THAN
  1515. : CSTR_LESS_THAN) );
  1516. }
  1517. ////////////////////////////////////////////////////////////////////////////
  1518. //
  1519. // GetStringTypeExW
  1520. //
  1521. // Returns character type information about a particular Unicode string.
  1522. //
  1523. // 01-18-94 JulieB Created.
  1524. ////////////////////////////////////////////////////////////////////////////
  1525. BOOL WINAPI GetStringTypeExW(
  1526. LCID Locale,
  1527. DWORD dwInfoType,
  1528. LPCWSTR lpSrcStr,
  1529. int cchSrc,
  1530. LPWORD lpCharType)
  1531. {
  1532. PLOC_HASH pHashN; // ptr to LOC hash node
  1533. //
  1534. // Invalid Parameter Check:
  1535. // - Validate LCID
  1536. //
  1537. VALIDATE_LOCALE(Locale, pHashN, FALSE);
  1538. if (pHashN == NULL)
  1539. {
  1540. SetLastError(ERROR_INVALID_PARAMETER);
  1541. return (0);
  1542. }
  1543. //
  1544. // Return the result of GetStringTypeW.
  1545. //
  1546. return (GetStringTypeW( dwInfoType,
  1547. lpSrcStr,
  1548. cchSrc,
  1549. lpCharType ));
  1550. }
  1551. ////////////////////////////////////////////////////////////////////////////
  1552. //
  1553. // GetStringTypeW
  1554. //
  1555. // Returns character type information about a particular Unicode string.
  1556. //
  1557. // NOTE: The number of parameters is different from GetStringTypeA.
  1558. // The 16-bit OLE product shipped GetStringTypeA with the wrong
  1559. // parameters (ported from Chicago) and now we must support it.
  1560. //
  1561. // Use GetStringTypeEx to get the same set of parameters between
  1562. // the A and W version.
  1563. //
  1564. // 05-31-91 JulieB Created.
  1565. ////////////////////////////////////////////////////////////////////////////
  1566. BOOL WINAPI GetStringTypeW(
  1567. DWORD dwInfoType,
  1568. LPCWSTR lpSrcStr,
  1569. int cchSrc,
  1570. LPWORD lpCharType)
  1571. {
  1572. int Ctr; // loop counter
  1573. //
  1574. // Invalid Parameter Check:
  1575. // - lpSrcStr NULL
  1576. // - cchSrc is 0
  1577. // - lpCharType NULL
  1578. // - same buffer - src and destination
  1579. // - (flags will be checked in switch statement below)
  1580. //
  1581. if ( (lpSrcStr == NULL) || (cchSrc == 0) ||
  1582. (lpCharType == NULL) || (lpSrcStr == lpCharType) )
  1583. {
  1584. SetLastError(ERROR_INVALID_PARAMETER);
  1585. return (FALSE);
  1586. }
  1587. //
  1588. // If cchSrc is -1, then the source string is null terminated and we
  1589. // need to get the length of the source string. Add one to the
  1590. // length to include the null termination.
  1591. // (This will always be at least 1.)
  1592. //
  1593. if (cchSrc <= -1)
  1594. {
  1595. cchSrc = NlsStrLenW(lpSrcStr) + 1;
  1596. }
  1597. //
  1598. // Make sure the ctype table is mapped in.
  1599. //
  1600. if (GetCTypeFileInfo())
  1601. {
  1602. SetLastError(ERROR_FILE_NOT_FOUND);
  1603. return (FALSE);
  1604. }
  1605. //
  1606. // Return the appropriate information in the lpCharType parameter
  1607. // based on the dwInfoType parameter.
  1608. //
  1609. switch (dwInfoType)
  1610. {
  1611. case ( CT_CTYPE1 ) :
  1612. {
  1613. //
  1614. // Return the ctype 1 information for the string.
  1615. //
  1616. for (Ctr = 0; Ctr < cchSrc; Ctr++)
  1617. {
  1618. lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType1);
  1619. }
  1620. break;
  1621. }
  1622. case ( CT_CTYPE2 ) :
  1623. {
  1624. //
  1625. // Return the ctype 2 information.
  1626. //
  1627. for (Ctr = 0; Ctr < cchSrc; Ctr++)
  1628. {
  1629. lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType2);
  1630. }
  1631. break;
  1632. }
  1633. case ( CT_CTYPE3 ) :
  1634. {
  1635. //
  1636. // Return the ctype 3 information.
  1637. //
  1638. for (Ctr = 0; Ctr < cchSrc; Ctr++)
  1639. {
  1640. lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType3);
  1641. }
  1642. break;
  1643. }
  1644. default :
  1645. {
  1646. //
  1647. // Invalid flag parameter, so return failure.
  1648. //
  1649. SetLastError(ERROR_INVALID_FLAGS);
  1650. return (FALSE);
  1651. }
  1652. }
  1653. //
  1654. // Return success.
  1655. //
  1656. return (TRUE);
  1657. }
  1658. //-------------------------------------------------------------------------//
  1659. // INTERNAL ROUTINES //
  1660. //-------------------------------------------------------------------------//
  1661. ////////////////////////////////////////////////////////////////////////////
  1662. //
  1663. // LongCompareStringW
  1664. //
  1665. // Compares two wide character strings of the same locale according to the
  1666. // supplied locale handle.
  1667. //
  1668. // 05-31-91 JulieB Created.
  1669. ////////////////////////////////////////////////////////////////////////////
  1670. int LongCompareStringW(
  1671. PLOC_HASH pHashN,
  1672. DWORD dwCmpFlags,
  1673. LPCWSTR lpString1,
  1674. int cchCount1,
  1675. LPCWSTR lpString2,
  1676. int cchCount2,
  1677. BOOL fModify)
  1678. {
  1679. int ctr1 = cchCount1; // loop counter for string 1
  1680. int ctr2 = cchCount2; // loop counter for string 2
  1681. register LPWSTR pString1; // ptr to go thru string 1
  1682. register LPWSTR pString2; // ptr to go thru string 2
  1683. BOOL IfCompress; // if compression in locale
  1684. BOOL IfDblCompress1; // if double compression in string 1
  1685. BOOL IfDblCompress2; // if double compression in string 2
  1686. BOOL fEnd1; // if at end of string 1
  1687. BOOL fIgnorePunct; // flag to ignore punctuation (not symbol)
  1688. BOOL fIgnoreDiacritic; // flag to ignore diacritics
  1689. BOOL fIgnoreSymbol; // flag to ignore symbols
  1690. BOOL fStringSort; // flag to use string sort
  1691. DWORD State; // state table
  1692. DWORD Mask; // mask for weights
  1693. DWORD Weight1; // full weight of char - string 1
  1694. DWORD Weight2; // full weight of char - string 2
  1695. int JamoFlag = FALSE;
  1696. LPCWSTR pLastJamo = lpString1;
  1697. int WhichDiacritic; // DW => 1 = str1 smaller, 3 = str2 smaller
  1698. int WhichCase; // CW => 1 = str1 smaller, 3 = str2 smaller
  1699. int WhichJamo; // XW for Jamo
  1700. int WhichPunct1; // SW => 1 = str1 smaller, 3 = str2 smaller
  1701. int WhichPunct2; // SW => 1 = str1 smaller, 3 = str2 smaller
  1702. LPWSTR pSave1; // ptr to saved pString1
  1703. LPWSTR pSave2; // ptr to saved pString2
  1704. int cExpChar1, cExpChar2; // ct of expansions in tmp
  1705. DWORD ExtraWt1, ExtraWt2; // extra weight values (for far east)
  1706. DWORD WhichExtra; // XW => wts 4, 5, 6, 7 (for far east)
  1707. //
  1708. // Initialize string pointers.
  1709. //
  1710. pString1 = (LPWSTR)lpString1;
  1711. pString2 = (LPWSTR)lpString2;
  1712. //
  1713. // Invalid Flags Check:
  1714. // - invalid flags
  1715. //
  1716. if (dwCmpFlags & CS_INVALID_FLAG)
  1717. {
  1718. SetLastError(ERROR_INVALID_FLAGS);
  1719. return (0);
  1720. }
  1721. //
  1722. // See if we should stop on the null terminator regardless of the
  1723. // count values. The original count values are stored in ctr1 and ctr2
  1724. // above, so it's ok to set these here.
  1725. //
  1726. if (dwCmpFlags & NORM_STOP_ON_NULL)
  1727. {
  1728. cchCount1 = cchCount2 = -2;
  1729. }
  1730. //
  1731. // Check if compression in the given locale. If not, then
  1732. // try a wchar by wchar compare. If strings are equal, this
  1733. // will be quick.
  1734. //
  1735. if ((IfCompress = pHashN->IfCompression) == FALSE)
  1736. {
  1737. //
  1738. // Compare each wide character in the two strings.
  1739. //
  1740. while ( NOT_END_STRING(ctr1, pString1, cchCount1) &&
  1741. NOT_END_STRING(ctr2, pString2, cchCount2) )
  1742. {
  1743. //
  1744. // See if characters are equal.
  1745. //
  1746. if (*pString1 == *pString2)
  1747. {
  1748. //
  1749. // Characters are equal, so increment pointers,
  1750. // decrement counters, and continue string compare.
  1751. //
  1752. pString1++;
  1753. pString2++;
  1754. ctr1--;
  1755. ctr2--;
  1756. }
  1757. else
  1758. {
  1759. //
  1760. // Difference was found. Fall into the sortkey
  1761. // check below.
  1762. //
  1763. break;
  1764. }
  1765. }
  1766. //
  1767. // If the end of BOTH strings has been reached, then the strings
  1768. // match exactly. Return success.
  1769. //
  1770. if ( AT_STRING_END(ctr1, pString1, cchCount1) &&
  1771. AT_STRING_END(ctr2, pString2, cchCount2) )
  1772. {
  1773. return (CSTR_EQUAL);
  1774. }
  1775. }
  1776. //
  1777. // Initialize flags, pointers, and counters.
  1778. //
  1779. fIgnorePunct = dwCmpFlags & NORM_IGNORESYMBOLS;
  1780. fIgnoreDiacritic = dwCmpFlags & NORM_IGNORENONSPACE;
  1781. fIgnoreSymbol = fIgnorePunct;
  1782. fStringSort = dwCmpFlags & SORT_STRINGSORT;
  1783. WhichDiacritic = 0;
  1784. WhichCase = 0;
  1785. WhichJamo = 0;
  1786. WhichPunct1 = 0;
  1787. WhichPunct2 = 0;
  1788. pSave1 = NULL;
  1789. pSave2 = NULL;
  1790. ExtraWt1 = (DWORD)0;
  1791. WhichExtra = (DWORD)0;
  1792. //
  1793. // Set the weights to be invalid. This flags whether or not to
  1794. // recompute the weights next time through the loop. It also flags
  1795. // whether or not to start over (continue) in the loop.
  1796. //
  1797. Weight1 = CMP_INVALID_WEIGHT;
  1798. Weight2 = CMP_INVALID_WEIGHT;
  1799. //
  1800. // Switch on the different flag options. This will speed up
  1801. // the comparisons of two strings that are different.
  1802. //
  1803. State = STATE_CW | STATE_JAMO_WEIGHT;
  1804. switch (dwCmpFlags & (NORM_IGNORECASE | NORM_IGNORENONSPACE))
  1805. {
  1806. case ( 0 ) :
  1807. {
  1808. Mask = CMP_MASKOFF_NONE;
  1809. State |= (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW;
  1810. break;
  1811. }
  1812. case ( NORM_IGNORECASE ) :
  1813. {
  1814. Mask = CMP_MASKOFF_CW;
  1815. State |= (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW;
  1816. break;
  1817. }
  1818. case ( NORM_IGNORENONSPACE ) :
  1819. {
  1820. Mask = CMP_MASKOFF_DW;
  1821. break;
  1822. }
  1823. case ( NORM_IGNORECASE | NORM_IGNORENONSPACE ) :
  1824. {
  1825. Mask = CMP_MASKOFF_DW_CW;
  1826. break;
  1827. }
  1828. }
  1829. switch (dwCmpFlags & (NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH))
  1830. {
  1831. case ( 0 ) :
  1832. {
  1833. break;
  1834. }
  1835. case ( NORM_IGNOREKANATYPE ) :
  1836. {
  1837. Mask &= CMP_MASKOFF_KANA;
  1838. break;
  1839. }
  1840. case ( NORM_IGNOREWIDTH ) :
  1841. {
  1842. Mask &= CMP_MASKOFF_WIDTH;
  1843. if (dwCmpFlags & NORM_IGNORECASE)
  1844. {
  1845. REMOVE_STATE(STATE_CW);
  1846. }
  1847. break;
  1848. }
  1849. case ( NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ) :
  1850. {
  1851. Mask &= CMP_MASKOFF_KANA_WIDTH;
  1852. if (dwCmpFlags & NORM_IGNORECASE)
  1853. {
  1854. REMOVE_STATE(STATE_CW);
  1855. }
  1856. break;
  1857. }
  1858. }
  1859. //
  1860. // Compare each character's sortkey weight in the two strings.
  1861. //
  1862. while ( NOT_END_STRING(ctr1, pString1, cchCount1) &&
  1863. NOT_END_STRING(ctr2, pString2, cchCount2) )
  1864. {
  1865. if (Weight1 == CMP_INVALID_WEIGHT)
  1866. {
  1867. Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1);
  1868. Weight1 &= Mask;
  1869. }
  1870. if (Weight2 == CMP_INVALID_WEIGHT)
  1871. {
  1872. Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
  1873. Weight2 &= Mask;
  1874. }
  1875. //
  1876. // If compression locale, then need to check for compression
  1877. // characters even if the weights are equal. If it's not a
  1878. // compression locale, then we don't need to check anything
  1879. // if the weights are equal.
  1880. //
  1881. if ( (IfCompress) &&
  1882. (GET_COMPRESSION(&Weight1) || GET_COMPRESSION(&Weight2)) )
  1883. {
  1884. int ctr; // loop counter
  1885. PCOMPRESS_3 pComp3; // ptr to compress 3 table
  1886. PCOMPRESS_2 pComp2; // ptr to compress 2 table
  1887. int If1; // if compression found in string 1
  1888. int If2; // if compression found in string 2
  1889. int CompVal; // compression value
  1890. int IfEnd1; // if exists 1 more char in string 1
  1891. int IfEnd2; // if exists 1 more char in string 2
  1892. //
  1893. // Check for compression in the weights.
  1894. //
  1895. If1 = GET_COMPRESSION(&Weight1);
  1896. If2 = GET_COMPRESSION(&Weight2);
  1897. CompVal = ((If1 > If2) ? If1 : If2);
  1898. IfEnd1 = AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1);
  1899. IfEnd2 = AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2);
  1900. if (pHashN->IfDblCompression == FALSE)
  1901. {
  1902. //
  1903. // NO double compression, so don't check for it.
  1904. //
  1905. switch (CompVal)
  1906. {
  1907. //
  1908. // Check for 3 characters compressing to 1.
  1909. //
  1910. case ( COMPRESS_3_MASK ) :
  1911. {
  1912. //
  1913. // Check character in string 1 and string 2.
  1914. //
  1915. if ( ((If1) && (!IfEnd1) &&
  1916. !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1)) ||
  1917. ((If2) && (!IfEnd2) &&
  1918. !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2)) )
  1919. {
  1920. ctr = pHashN->pCompHdr->Num3;
  1921. pComp3 = pHashN->pCompress3;
  1922. for (; ctr > 0; ctr--, pComp3++)
  1923. {
  1924. //
  1925. // Check character in string 1.
  1926. //
  1927. if ( (If1) && (!IfEnd1) &&
  1928. !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1) &&
  1929. (pComp3->UCP1 == *pString1) &&
  1930. (pComp3->UCP2 == *(pString1 + 1)) &&
  1931. (pComp3->UCP3 == *(pString1 + 2)) )
  1932. {
  1933. //
  1934. // Found compression for string 1.
  1935. // Get new weight and mask it.
  1936. // Increment pointer and decrement counter.
  1937. //
  1938. Weight1 = MAKE_SORTKEY_DWORD(pComp3->Weights);
  1939. Weight1 &= Mask;
  1940. pString1 += 2;
  1941. ctr1 -= 2;
  1942. //
  1943. // Set boolean for string 1 - search is
  1944. // complete.
  1945. //
  1946. If1 = 0;
  1947. //
  1948. // Break out of loop if both searches are
  1949. // done.
  1950. //
  1951. if (If2 == 0)
  1952. {
  1953. break;
  1954. }
  1955. }
  1956. //
  1957. // Check character in string 2.
  1958. //
  1959. if ( (If2) && (!IfEnd2) &&
  1960. !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2) &&
  1961. (pComp3->UCP1 == *pString2) &&
  1962. (pComp3->UCP2 == *(pString2 + 1)) &&
  1963. (pComp3->UCP3 == *(pString2 + 2)) )
  1964. {
  1965. //
  1966. // Found compression for string 2.
  1967. // Get new weight and mask it.
  1968. // Increment pointer and decrement counter.
  1969. //
  1970. Weight2 = MAKE_SORTKEY_DWORD(pComp3->Weights);
  1971. Weight2 &= Mask;
  1972. pString2 += 2;
  1973. ctr2 -= 2;
  1974. //
  1975. // Set boolean for string 2 - search is
  1976. // complete.
  1977. //
  1978. If2 = 0;
  1979. //
  1980. // Break out of loop if both searches are
  1981. // done.
  1982. //
  1983. if (If1 == 0)
  1984. {
  1985. break;
  1986. }
  1987. }
  1988. }
  1989. if (ctr > 0)
  1990. {
  1991. break;
  1992. }
  1993. }
  1994. //
  1995. // Fall through if not found.
  1996. //
  1997. }
  1998. //
  1999. // Check for 2 characters compressing to 1.
  2000. //
  2001. case ( COMPRESS_2_MASK ) :
  2002. {
  2003. //
  2004. // Check character in string 1 and string 2.
  2005. //
  2006. if ( ((If1) && (!IfEnd1)) ||
  2007. ((If2) && (!IfEnd2)) )
  2008. {
  2009. ctr = pHashN->pCompHdr->Num2;
  2010. pComp2 = pHashN->pCompress2;
  2011. for (; ((ctr > 0) && (If1 || If2)); ctr--, pComp2++)
  2012. {
  2013. //
  2014. // Check character in string 1.
  2015. //
  2016. if ( (If1) &&
  2017. (!IfEnd1) &&
  2018. (pComp2->UCP1 == *pString1) &&
  2019. (pComp2->UCP2 == *(pString1 + 1)) )
  2020. {
  2021. //
  2022. // Found compression for string 1.
  2023. // Get new weight and mask it.
  2024. // Increment pointer and decrement counter.
  2025. //
  2026. Weight1 = MAKE_SORTKEY_DWORD(pComp2->Weights);
  2027. Weight1 &= Mask;
  2028. pString1++;
  2029. ctr1--;
  2030. //
  2031. // Set boolean for string 1 - search is
  2032. // complete.
  2033. //
  2034. If1 = 0;
  2035. //
  2036. // Break out of loop if both searches are
  2037. // done.
  2038. //
  2039. if (If2 == 0)
  2040. {
  2041. break;
  2042. }
  2043. }
  2044. //
  2045. // Check character in string 2.
  2046. //
  2047. if ( (If2) &&
  2048. (!IfEnd2) &&
  2049. (pComp2->UCP1 == *pString2) &&
  2050. (pComp2->UCP2 == *(pString2 + 1)) )
  2051. {
  2052. //
  2053. // Found compression for string 2.
  2054. // Get new weight and mask it.
  2055. // Increment pointer and decrement counter.
  2056. //
  2057. Weight2 = MAKE_SORTKEY_DWORD(pComp2->Weights);
  2058. Weight2 &= Mask;
  2059. pString2++;
  2060. ctr2--;
  2061. //
  2062. // Set boolean for string 2 - search is
  2063. // complete.
  2064. //
  2065. If2 = 0;
  2066. //
  2067. // Break out of loop if both searches are
  2068. // done.
  2069. //
  2070. if (If1 == 0)
  2071. {
  2072. break;
  2073. }
  2074. }
  2075. }
  2076. if (ctr > 0)
  2077. {
  2078. break;
  2079. }
  2080. }
  2081. }
  2082. }
  2083. }
  2084. else if (!IfEnd1 && !IfEnd2)
  2085. {
  2086. //
  2087. // Double Compression exists, so must check for it.
  2088. //
  2089. if (IfDblCompress1 =
  2090. ((GET_DWORD_WEIGHT(pHashN, *pString1) & CMP_MASKOFF_CW) ==
  2091. (GET_DWORD_WEIGHT(pHashN, *(pString1 + 1)) & CMP_MASKOFF_CW)))
  2092. {
  2093. //
  2094. // Advance past the first code point to get to the
  2095. // compression character.
  2096. //
  2097. pString1++;
  2098. ctr1--;
  2099. IfEnd1 = AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1);
  2100. }
  2101. if (IfDblCompress2 =
  2102. ((GET_DWORD_WEIGHT(pHashN, *pString2) & CMP_MASKOFF_CW) ==
  2103. (GET_DWORD_WEIGHT(pHashN, *(pString2 + 1)) & CMP_MASKOFF_CW)))
  2104. {
  2105. //
  2106. // Advance past the first code point to get to the
  2107. // compression character.
  2108. //
  2109. pString2++;
  2110. ctr2--;
  2111. IfEnd2 = AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2);
  2112. }
  2113. switch (CompVal)
  2114. {
  2115. //
  2116. // Check for 3 characters compressing to 1.
  2117. //
  2118. case ( COMPRESS_3_MASK ) :
  2119. {
  2120. //
  2121. // Check character in string 1.
  2122. //
  2123. if ( (If1) && (!IfEnd1) &&
  2124. !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1) )
  2125. {
  2126. ctr = pHashN->pCompHdr->Num3;
  2127. pComp3 = pHashN->pCompress3;
  2128. for (; ctr > 0; ctr--, pComp3++)
  2129. {
  2130. //
  2131. // Check character in string 1.
  2132. //
  2133. if ( (pComp3->UCP1 == *pString1) &&
  2134. (pComp3->UCP2 == *(pString1 + 1)) &&
  2135. (pComp3->UCP3 == *(pString1 + 2)) )
  2136. {
  2137. //
  2138. // Found compression for string 1.
  2139. // Get new weight and mask it.
  2140. // Increment pointer and decrement counter.
  2141. //
  2142. Weight1 = MAKE_SORTKEY_DWORD(pComp3->Weights);
  2143. Weight1 &= Mask;
  2144. if (!IfDblCompress1)
  2145. {
  2146. pString1 += 2;
  2147. ctr1 -= 2;
  2148. }
  2149. //
  2150. // Set boolean for string 1 - search is
  2151. // complete.
  2152. //
  2153. If1 = 0;
  2154. break;
  2155. }
  2156. }
  2157. }
  2158. //
  2159. // Check character in string 2.
  2160. //
  2161. if ( (If2) && (!IfEnd2) &&
  2162. !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2) )
  2163. {
  2164. ctr = pHashN->pCompHdr->Num3;
  2165. pComp3 = pHashN->pCompress3;
  2166. for (; ctr > 0; ctr--, pComp3++)
  2167. {
  2168. //
  2169. // Check character in string 2.
  2170. //
  2171. if ( (pComp3->UCP1 == *pString2) &&
  2172. (pComp3->UCP2 == *(pString2 + 1)) &&
  2173. (pComp3->UCP3 == *(pString2 + 2)) )
  2174. {
  2175. //
  2176. // Found compression for string 2.
  2177. // Get new weight and mask it.
  2178. // Increment pointer and decrement counter.
  2179. //
  2180. Weight2 = MAKE_SORTKEY_DWORD(pComp3->Weights);
  2181. Weight2 &= Mask;
  2182. if (!IfDblCompress2)
  2183. {
  2184. pString2 += 2;
  2185. ctr2 -= 2;
  2186. }
  2187. //
  2188. // Set boolean for string 2 - search is
  2189. // complete.
  2190. //
  2191. If2 = 0;
  2192. break;
  2193. }
  2194. }
  2195. }
  2196. //
  2197. // Fall through if not found.
  2198. //
  2199. if ((If1 == 0) && (If2 == 0))
  2200. {
  2201. break;
  2202. }
  2203. }
  2204. //
  2205. // Check for 2 characters compressing to 1.
  2206. //
  2207. case ( COMPRESS_2_MASK ) :
  2208. {
  2209. //
  2210. // Check character in string 1.
  2211. //
  2212. if ((If1) && (!IfEnd1))
  2213. {
  2214. ctr = pHashN->pCompHdr->Num2;
  2215. pComp2 = pHashN->pCompress2;
  2216. for (; ctr > 0; ctr--, pComp2++)
  2217. {
  2218. //
  2219. // Check character in string 1.
  2220. //
  2221. if ((pComp2->UCP1 == *pString1) &&
  2222. (pComp2->UCP2 == *(pString1 + 1)))
  2223. {
  2224. //
  2225. // Found compression for string 1.
  2226. // Get new weight and mask it.
  2227. // Increment pointer and decrement counter.
  2228. //
  2229. Weight1 = MAKE_SORTKEY_DWORD(pComp2->Weights);
  2230. Weight1 &= Mask;
  2231. if (!IfDblCompress1)
  2232. {
  2233. pString1++;
  2234. ctr1--;
  2235. }
  2236. //
  2237. // Set boolean for string 1 - search is
  2238. // complete.
  2239. //
  2240. If1 = 0;
  2241. break;
  2242. }
  2243. }
  2244. }
  2245. //
  2246. // Check character in string 2.
  2247. //
  2248. if ((If2) && (!IfEnd2))
  2249. {
  2250. ctr = pHashN->pCompHdr->Num2;
  2251. pComp2 = pHashN->pCompress2;
  2252. for (; ctr > 0; ctr--, pComp2++)
  2253. {
  2254. //
  2255. // Check character in string 2.
  2256. //
  2257. if ((pComp2->UCP1 == *pString2) &&
  2258. (pComp2->UCP2 == *(pString2 + 1)))
  2259. {
  2260. //
  2261. // Found compression for string 2.
  2262. // Get new weight and mask it.
  2263. // Increment pointer and decrement counter.
  2264. //
  2265. Weight2 = MAKE_SORTKEY_DWORD(pComp2->Weights);
  2266. Weight2 &= Mask;
  2267. if (!IfDblCompress2)
  2268. {
  2269. pString2++;
  2270. ctr2--;
  2271. }
  2272. //
  2273. // Set boolean for string 2 - search is
  2274. // complete.
  2275. //
  2276. If2 = 0;
  2277. break;
  2278. }
  2279. }
  2280. }
  2281. }
  2282. }
  2283. //
  2284. // Reset the pointer back to the beginning of the double
  2285. // compression. Pointer fixup at the end will advance
  2286. // them correctly.
  2287. //
  2288. // If double compression, we advanced the pointer at
  2289. // the beginning of the switch statement. If double
  2290. // compression character was actually found, the pointer
  2291. // was NOT advanced. We now want to decrement the pointer
  2292. // to put it back to where it was.
  2293. //
  2294. // The next time through, the pointer will be pointing to
  2295. // the regular compression part of the string.
  2296. //
  2297. if (IfDblCompress1)
  2298. {
  2299. pString1--;
  2300. ctr1++;
  2301. }
  2302. if (IfDblCompress2)
  2303. {
  2304. pString2--;
  2305. ctr2++;
  2306. }
  2307. }
  2308. }
  2309. //
  2310. // Check the weights again.
  2311. //
  2312. if ((Weight1 != Weight2) ||
  2313. (GET_SCRIPT_MEMBER(&Weight1) == EXTENSION_A))
  2314. {
  2315. //
  2316. // Weights are still not equal, even after compression
  2317. // check, so compare the different weights.
  2318. //
  2319. BYTE sm1 = GET_SCRIPT_MEMBER(&Weight1); // script member 1
  2320. BYTE sm2 = GET_SCRIPT_MEMBER(&Weight2); // script member 2
  2321. WORD uw1 = GET_UNICODE_SM_MOD(&Weight1, sm1, fModify); // unicode weight 1
  2322. WORD uw2 = GET_UNICODE_SM_MOD(&Weight2, sm2, fModify); // unicode weight 2
  2323. BYTE dw1; // diacritic weight 1
  2324. BYTE dw2; // diacritic weight 2
  2325. DWORD Wt; // temp weight holder
  2326. WCHAR pTmpBuf1[MAX_TBL_EXPANSION]; // temp buffer for exp 1
  2327. WCHAR pTmpBuf2[MAX_TBL_EXPANSION]; // temp buffer for exp 2
  2328. //
  2329. // If Unicode Weights are different and no special cases,
  2330. // then we're done. Otherwise, we need to do extra checking.
  2331. //
  2332. // Must check ENTIRE string for any possibility of Unicode Weight
  2333. // differences. As soon as a Unicode Weight difference is found,
  2334. // then we're done. If no UW difference is found, then the
  2335. // first Diacritic Weight difference is used. If no DW difference
  2336. // is found, then use the first Case Difference. If no CW
  2337. // difference is found, then use the first Extra Weight
  2338. // difference. If no XW difference is found, then use the first
  2339. // Special Weight difference.
  2340. //
  2341. if ((uw1 != uw2) ||
  2342. ((sm1 <= SYMBOL_5) && (sm1 >= FAREAST_SPECIAL)))
  2343. {
  2344. //
  2345. // Check for Unsortable characters and skip them.
  2346. // This needs to be outside the switch statement. If EITHER
  2347. // character is unsortable, must skip it and start over.
  2348. //
  2349. if (sm1 == UNSORTABLE)
  2350. {
  2351. pString1++;
  2352. ctr1--;
  2353. Weight1 = CMP_INVALID_WEIGHT;
  2354. }
  2355. if (sm2 == UNSORTABLE)
  2356. {
  2357. pString2++;
  2358. ctr2--;
  2359. Weight2 = CMP_INVALID_WEIGHT;
  2360. }
  2361. //
  2362. // Check for Ignore Nonspace and Ignore Symbol. If
  2363. // Ignore Nonspace is set and either character is a
  2364. // nonspace mark only, then we need to advance the
  2365. // pointer to skip over the character and continue.
  2366. // If Ignore Symbol is set and either character is a
  2367. // punctuation char, then we need to advance the
  2368. // pointer to skip over the character and continue.
  2369. //
  2370. // This step is necessary so that a string with a
  2371. // nonspace mark and a punctuation char following one
  2372. // another are properly ignored when one or both of
  2373. // the ignore flags is set.
  2374. //
  2375. if (fIgnoreDiacritic)
  2376. {
  2377. if (sm1 == NONSPACE_MARK)
  2378. {
  2379. pString1++;
  2380. ctr1--;
  2381. Weight1 = CMP_INVALID_WEIGHT;
  2382. }
  2383. if (sm2 == NONSPACE_MARK)
  2384. {
  2385. pString2++;
  2386. ctr2--;
  2387. Weight2 = CMP_INVALID_WEIGHT;
  2388. }
  2389. }
  2390. if (fIgnoreSymbol)
  2391. {
  2392. if (sm1 == PUNCTUATION)
  2393. {
  2394. pString1++;
  2395. ctr1--;
  2396. Weight1 = CMP_INVALID_WEIGHT;
  2397. }
  2398. if (sm2 == PUNCTUATION)
  2399. {
  2400. pString2++;
  2401. ctr2--;
  2402. Weight2 = CMP_INVALID_WEIGHT;
  2403. }
  2404. }
  2405. if ((Weight1 == CMP_INVALID_WEIGHT) || (Weight2 == CMP_INVALID_WEIGHT))
  2406. {
  2407. continue;
  2408. }
  2409. //
  2410. // Switch on the script member of string 1 and take care
  2411. // of any special cases.
  2412. //
  2413. switch (sm1)
  2414. {
  2415. case ( NONSPACE_MARK ) :
  2416. {
  2417. //
  2418. // Nonspace only - look at diacritic weight only.
  2419. //
  2420. if (!fIgnoreDiacritic)
  2421. {
  2422. if ((WhichDiacritic == 0) ||
  2423. (State & STATE_REVERSE_DW))
  2424. {
  2425. WhichDiacritic = CSTR_GREATER_THAN;
  2426. //
  2427. // Remove state from state machine.
  2428. //
  2429. REMOVE_STATE(STATE_DW);
  2430. }
  2431. }
  2432. //
  2433. // Adjust pointer and counter and set flags.
  2434. //
  2435. pString1++;
  2436. ctr1--;
  2437. Weight1 = CMP_INVALID_WEIGHT;
  2438. break;
  2439. }
  2440. case ( SYMBOL_1 ) :
  2441. case ( SYMBOL_2 ) :
  2442. case ( SYMBOL_3 ) :
  2443. case ( SYMBOL_4 ) :
  2444. case ( SYMBOL_5 ) :
  2445. {
  2446. //
  2447. // If the ignore symbol flag is set, then skip over
  2448. // the symbol.
  2449. //
  2450. if (fIgnoreSymbol)
  2451. {
  2452. pString1++;
  2453. ctr1--;
  2454. Weight1 = CMP_INVALID_WEIGHT;
  2455. }
  2456. break;
  2457. }
  2458. case ( PUNCTUATION ) :
  2459. {
  2460. //
  2461. // If the ignore punctuation flag is set, then skip
  2462. // over the punctuation char.
  2463. //
  2464. if (fIgnorePunct)
  2465. {
  2466. pString1++;
  2467. ctr1--;
  2468. Weight1 = CMP_INVALID_WEIGHT;
  2469. }
  2470. else if (!fStringSort)
  2471. {
  2472. //
  2473. // Use WORD sort method.
  2474. //
  2475. if (sm2 != PUNCTUATION)
  2476. {
  2477. //
  2478. // The character in the second string is
  2479. // NOT punctuation.
  2480. //
  2481. if (WhichPunct2)
  2482. {
  2483. //
  2484. // Set WP 2 to show that string 2 is
  2485. // smaller, since a punctuation char had
  2486. // already been found at an earlier
  2487. // position in string 2.
  2488. //
  2489. // Set the Ignore Punctuation flag so we
  2490. // just skip over any other punctuation
  2491. // chars in the string.
  2492. //
  2493. WhichPunct2 = CSTR_GREATER_THAN;
  2494. fIgnorePunct = TRUE;
  2495. }
  2496. else
  2497. {
  2498. //
  2499. // Set WP 1 to show that string 2 is
  2500. // smaller, and that string 1 has had
  2501. // a punctuation char - since no
  2502. // punctuation chars have been found
  2503. // in string 2.
  2504. //
  2505. WhichPunct1 = CSTR_GREATER_THAN;
  2506. }
  2507. //
  2508. // Advance pointer 1 and decrement counter 1.
  2509. //
  2510. pString1++;
  2511. ctr1--;
  2512. Weight1 = CMP_INVALID_WEIGHT;
  2513. }
  2514. //
  2515. // Do NOT want to advance the pointer in string 1
  2516. // if string 2 is also a punctuation char. This
  2517. // will be done later.
  2518. //
  2519. }
  2520. break;
  2521. }
  2522. case ( EXPANSION ) :
  2523. {
  2524. //
  2525. // Save pointer in pString1 so that it can be
  2526. // restored.
  2527. //
  2528. if (pSave1 == NULL)
  2529. {
  2530. pSave1 = pString1;
  2531. }
  2532. pString1 = pTmpBuf1;
  2533. //
  2534. // Add one to counter so that subtraction doesn't end
  2535. // comparison prematurely.
  2536. //
  2537. ctr1++;
  2538. //
  2539. // Expand character into temporary buffer.
  2540. //
  2541. pTmpBuf1[0] = GET_EXPANSION_1(&Weight1);
  2542. pTmpBuf1[1] = GET_EXPANSION_2(&Weight1);
  2543. //
  2544. // Set cExpChar1 to the number of expansion characters
  2545. // stored.
  2546. //
  2547. cExpChar1 = MAX_TBL_EXPANSION;
  2548. Weight1 = CMP_INVALID_WEIGHT;
  2549. break;
  2550. }
  2551. case ( FAREAST_SPECIAL ) :
  2552. {
  2553. if (sm2 != EXPANSION)
  2554. {
  2555. //
  2556. // Get the weight for the far east special case
  2557. // and store it in Weight1.
  2558. //
  2559. GET_FAREAST_WEIGHT( Weight1,
  2560. uw1,
  2561. Mask,
  2562. lpString1,
  2563. pString1,
  2564. ExtraWt1,
  2565. fModify );
  2566. if (sm2 != FAREAST_SPECIAL)
  2567. {
  2568. //
  2569. // The character in the second string is
  2570. // NOT a fareast special char.
  2571. //
  2572. // Set each of weights 4, 5, 6, and 7 to show
  2573. // that string 2 is smaller (if not already set).
  2574. //
  2575. if ((GET_WT_FOUR(&WhichExtra) == 0) &&
  2576. (GET_WT_FOUR(&ExtraWt1) != 0))
  2577. {
  2578. GET_WT_FOUR(&WhichExtra) = CSTR_GREATER_THAN;
  2579. }
  2580. if ((GET_WT_FIVE(&WhichExtra) == 0) &&
  2581. (GET_WT_FIVE(&ExtraWt1) != 0))
  2582. {
  2583. GET_WT_FIVE(&WhichExtra) = CSTR_GREATER_THAN;
  2584. }
  2585. if ((GET_WT_SIX(&WhichExtra) == 0) &&
  2586. (GET_WT_SIX(&ExtraWt1) != 0))
  2587. {
  2588. GET_WT_SIX(&WhichExtra) = CSTR_GREATER_THAN;
  2589. }
  2590. if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
  2591. (GET_WT_SEVEN(&ExtraWt1) != 0))
  2592. {
  2593. GET_WT_SEVEN(&WhichExtra) = CSTR_GREATER_THAN;
  2594. }
  2595. }
  2596. }
  2597. break;
  2598. }
  2599. case ( JAMO_SPECIAL ) :
  2600. {
  2601. LPWSTR pStr1 = pString1;
  2602. LPWSTR pStr2 = pString2;
  2603. //
  2604. // Set the JamoFlag so we don't handle it again.
  2605. //
  2606. JamoFlag = TRUE;
  2607. FindJamoDifference(
  2608. pHashN,
  2609. &pStr1, &ctr1, cchCount1, &Weight1,
  2610. &pStr2, &ctr2, cchCount2, &Weight2,
  2611. &pLastJamo,
  2612. &uw1, &uw2,
  2613. &State,
  2614. &WhichJamo,
  2615. fModify );
  2616. if (WhichJamo)
  2617. {
  2618. return (WhichJamo);
  2619. }
  2620. pString1 = pStr1;
  2621. pString2 = pStr2;
  2622. break;
  2623. }
  2624. case ( EXTENSION_A ) :
  2625. {
  2626. //
  2627. // Get the full weight in case DW got masked.
  2628. //
  2629. Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1);
  2630. if (sm2 == EXTENSION_A)
  2631. {
  2632. Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
  2633. }
  2634. //
  2635. // Compare the weights.
  2636. //
  2637. if (Weight1 == Weight2)
  2638. {
  2639. //
  2640. // Adjust pointers and counters and set flags.
  2641. //
  2642. pString1++; pString2++;
  2643. ctr1--; ctr2--;
  2644. Weight1 = CMP_INVALID_WEIGHT;
  2645. Weight2 = CMP_INVALID_WEIGHT;
  2646. }
  2647. else
  2648. {
  2649. //
  2650. // Get the actual UW to compare.
  2651. //
  2652. if (sm2 == EXTENSION_A)
  2653. {
  2654. //
  2655. // Set the UW values to be the AW and DW since
  2656. // both strings contain an extension A char.
  2657. //
  2658. uw1 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight1),
  2659. GET_DIACRITIC(&Weight1),
  2660. FALSE );
  2661. uw2 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight2),
  2662. GET_DIACRITIC(&Weight2),
  2663. FALSE );
  2664. }
  2665. else
  2666. {
  2667. //
  2668. // Only string1 contains an extension A char,
  2669. // so set the UW value to be the first UW
  2670. // value for extension A (default values):
  2671. // SM_EXT_A, AW_EXT_A
  2672. //
  2673. uw1 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
  2674. }
  2675. }
  2676. break;
  2677. }
  2678. case ( UNSORTABLE ) :
  2679. {
  2680. //
  2681. // Fill out the case statement so the compiler
  2682. // will use a jump table.
  2683. //
  2684. break;
  2685. }
  2686. }
  2687. //
  2688. // Switch on the script member of string 2 and take care
  2689. // of any special cases.
  2690. //
  2691. switch (sm2)
  2692. {
  2693. case ( NONSPACE_MARK ) :
  2694. {
  2695. //
  2696. // Nonspace only - look at diacritic weight only.
  2697. //
  2698. if (!fIgnoreDiacritic)
  2699. {
  2700. if ((WhichDiacritic == 0) ||
  2701. (State & STATE_REVERSE_DW))
  2702. {
  2703. WhichDiacritic = CSTR_LESS_THAN;
  2704. //
  2705. // Remove state from state machine.
  2706. //
  2707. REMOVE_STATE(STATE_DW);
  2708. }
  2709. }
  2710. //
  2711. // Adjust pointer and counter and set flags.
  2712. //
  2713. pString2++;
  2714. ctr2--;
  2715. Weight2 = CMP_INVALID_WEIGHT;
  2716. break;
  2717. }
  2718. case ( SYMBOL_1 ) :
  2719. case ( SYMBOL_2 ) :
  2720. case ( SYMBOL_3 ) :
  2721. case ( SYMBOL_4 ) :
  2722. case ( SYMBOL_5 ) :
  2723. {
  2724. //
  2725. // If the ignore symbol flag is set, then skip over
  2726. // the symbol.
  2727. //
  2728. if (fIgnoreSymbol)
  2729. {
  2730. pString2++;
  2731. ctr2--;
  2732. Weight2 = CMP_INVALID_WEIGHT;
  2733. }
  2734. break;
  2735. }
  2736. case ( PUNCTUATION ) :
  2737. {
  2738. //
  2739. // If the ignore punctuation flag is set, then
  2740. // skip over the punctuation char.
  2741. //
  2742. if (fIgnorePunct)
  2743. {
  2744. //
  2745. // Advance pointer 2 and decrement counter 2.
  2746. //
  2747. pString2++;
  2748. ctr2--;
  2749. Weight2 = CMP_INVALID_WEIGHT;
  2750. }
  2751. else if (!fStringSort)
  2752. {
  2753. //
  2754. // Use WORD sort method.
  2755. //
  2756. if (sm1 != PUNCTUATION)
  2757. {
  2758. //
  2759. // The character in the first string is
  2760. // NOT punctuation.
  2761. //
  2762. if (WhichPunct1)
  2763. {
  2764. //
  2765. // Set WP 1 to show that string 1 is
  2766. // smaller, since a punctuation char had
  2767. // already been found at an earlier
  2768. // position in string 1.
  2769. //
  2770. // Set the Ignore Punctuation flag so we
  2771. // just skip over any other punctuation
  2772. // chars in the string.
  2773. //
  2774. WhichPunct1 = CSTR_LESS_THAN;
  2775. fIgnorePunct = TRUE;
  2776. }
  2777. else
  2778. {
  2779. //
  2780. // Set WP 2 to show that string 1 is
  2781. // smaller, and that string 2 has had
  2782. // a punctuation char - since no
  2783. // punctuation chars have been found
  2784. // in string 1.
  2785. //
  2786. WhichPunct2 = CSTR_LESS_THAN;
  2787. }
  2788. //
  2789. // Pointer 2 and counter 2 will be updated
  2790. // after if-else statement.
  2791. //
  2792. }
  2793. else
  2794. {
  2795. //
  2796. // Both code points are punctuation chars.
  2797. //
  2798. // See if either of the strings has encountered
  2799. // punctuation chars previous to this.
  2800. //
  2801. if (WhichPunct1)
  2802. {
  2803. //
  2804. // String 1 has had a punctuation char, so
  2805. // it should be the smaller string (since
  2806. // both have punctuation chars).
  2807. //
  2808. WhichPunct1 = CSTR_LESS_THAN;
  2809. }
  2810. else if (WhichPunct2)
  2811. {
  2812. //
  2813. // String 2 has had a punctuation char, so
  2814. // it should be the smaller string (since
  2815. // both have punctuation chars).
  2816. //
  2817. WhichPunct2 = CSTR_GREATER_THAN;
  2818. }
  2819. else
  2820. {
  2821. BYTE aw1 = GET_ALPHA_NUMERIC(&Weight1);
  2822. BYTE aw2 = GET_ALPHA_NUMERIC(&Weight2);
  2823. if (aw1 == aw2)
  2824. {
  2825. BYTE cw1 = GET_CASE(&Weight1);
  2826. BYTE cw2 = GET_CASE(&Weight2);
  2827. if (cw1 < cw2)
  2828. {
  2829. WhichPunct1 = CSTR_LESS_THAN;
  2830. } else if (cw1 > cw2)
  2831. {
  2832. WhichPunct1 = CSTR_GREATER_THAN;
  2833. }
  2834. } else
  2835. {
  2836. //
  2837. // Position is the same, so compare the
  2838. // special weights. Set WhichPunct1 to
  2839. // the smaller special weight.
  2840. //
  2841. WhichPunct1 = (aw1 < aw2
  2842. ? CSTR_LESS_THAN
  2843. : CSTR_GREATER_THAN);
  2844. }
  2845. }
  2846. //
  2847. // Set the Ignore Punctuation flag.
  2848. //
  2849. fIgnorePunct = TRUE;
  2850. //
  2851. // Advance pointer 1 and decrement counter 1.
  2852. // Pointer 2 and counter 2 will be updated
  2853. // after if-else statement.
  2854. //
  2855. pString1++;
  2856. ctr1--;
  2857. Weight1 = CMP_INVALID_WEIGHT;
  2858. }
  2859. //
  2860. // Advance pointer 2 and decrement counter 2.
  2861. //
  2862. pString2++;
  2863. ctr2--;
  2864. Weight2 = CMP_INVALID_WEIGHT;
  2865. }
  2866. break;
  2867. }
  2868. case ( EXPANSION ) :
  2869. {
  2870. //
  2871. // Save pointer in pString1 so that it can be restored.
  2872. //
  2873. if (pSave2 == NULL)
  2874. {
  2875. pSave2 = pString2;
  2876. }
  2877. pString2 = pTmpBuf2;
  2878. //
  2879. // Add one to counter so that subtraction doesn't end
  2880. // comparison prematurely.
  2881. //
  2882. ctr2++;
  2883. //
  2884. // Expand character into temporary buffer.
  2885. //
  2886. pTmpBuf2[0] = GET_EXPANSION_1(&Weight2);
  2887. pTmpBuf2[1] = GET_EXPANSION_2(&Weight2);
  2888. //
  2889. // Set cExpChar2 to the number of expansion characters
  2890. // stored.
  2891. //
  2892. cExpChar2 = MAX_TBL_EXPANSION;
  2893. Weight2 = CMP_INVALID_WEIGHT;
  2894. break;
  2895. }
  2896. case ( FAREAST_SPECIAL ) :
  2897. {
  2898. if (sm1 != EXPANSION)
  2899. {
  2900. //
  2901. // Get the weight for the far east special case
  2902. // and store it in Weight2.
  2903. //
  2904. GET_FAREAST_WEIGHT( Weight2,
  2905. uw2,
  2906. Mask,
  2907. lpString2,
  2908. pString2,
  2909. ExtraWt2,
  2910. fModify );
  2911. if (sm1 != FAREAST_SPECIAL)
  2912. {
  2913. //
  2914. // The character in the first string is
  2915. // NOT a fareast special char.
  2916. //
  2917. // Set each of weights 4, 5, 6, and 7 to show
  2918. // that string 1 is smaller (if not already set).
  2919. //
  2920. if ((GET_WT_FOUR(&WhichExtra) == 0) &&
  2921. (GET_WT_FOUR(&ExtraWt2) != 0))
  2922. {
  2923. GET_WT_FOUR(&WhichExtra) = CSTR_LESS_THAN;
  2924. }
  2925. if ((GET_WT_FIVE(&WhichExtra) == 0) &&
  2926. (GET_WT_FIVE(&ExtraWt2) != 0))
  2927. {
  2928. GET_WT_FIVE(&WhichExtra) = CSTR_LESS_THAN;
  2929. }
  2930. if ((GET_WT_SIX(&WhichExtra) == 0) &&
  2931. (GET_WT_SIX(&ExtraWt2) != 0))
  2932. {
  2933. GET_WT_SIX(&WhichExtra) = CSTR_LESS_THAN;
  2934. }
  2935. if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
  2936. (GET_WT_SEVEN(&ExtraWt2) != 0))
  2937. {
  2938. GET_WT_SEVEN(&WhichExtra) = CSTR_LESS_THAN;
  2939. }
  2940. }
  2941. else
  2942. {
  2943. //
  2944. // Characters in both strings are fareast
  2945. // special chars.
  2946. //
  2947. // Set each of weights 4, 5, 6, and 7
  2948. // appropriately (if not already set).
  2949. //
  2950. if ( (GET_WT_FOUR(&WhichExtra) == 0) &&
  2951. ( GET_WT_FOUR(&ExtraWt1) !=
  2952. GET_WT_FOUR(&ExtraWt2) ) )
  2953. {
  2954. GET_WT_FOUR(&WhichExtra) =
  2955. ( GET_WT_FOUR(&ExtraWt1) <
  2956. GET_WT_FOUR(&ExtraWt2) )
  2957. ? CSTR_LESS_THAN
  2958. : CSTR_GREATER_THAN;
  2959. }
  2960. if ( (GET_WT_FIVE(&WhichExtra) == 0) &&
  2961. ( GET_WT_FIVE(&ExtraWt1) !=
  2962. GET_WT_FIVE(&ExtraWt2) ) )
  2963. {
  2964. GET_WT_FIVE(&WhichExtra) =
  2965. ( GET_WT_FIVE(&ExtraWt1) <
  2966. GET_WT_FIVE(&ExtraWt2) )
  2967. ? CSTR_LESS_THAN
  2968. : CSTR_GREATER_THAN;
  2969. }
  2970. if ( (GET_WT_SIX(&WhichExtra) == 0) &&
  2971. ( GET_WT_SIX(&ExtraWt1) !=
  2972. GET_WT_SIX(&ExtraWt2) ) )
  2973. {
  2974. GET_WT_SIX(&WhichExtra) =
  2975. ( GET_WT_SIX(&ExtraWt1) <
  2976. GET_WT_SIX(&ExtraWt2) )
  2977. ? CSTR_LESS_THAN
  2978. : CSTR_GREATER_THAN;
  2979. }
  2980. if ( (GET_WT_SEVEN(&WhichExtra) == 0) &&
  2981. ( GET_WT_SEVEN(&ExtraWt1) !=
  2982. GET_WT_SEVEN(&ExtraWt2) ) )
  2983. {
  2984. GET_WT_SEVEN(&WhichExtra) =
  2985. ( GET_WT_SEVEN(&ExtraWt1) <
  2986. GET_WT_SEVEN(&ExtraWt2) )
  2987. ? CSTR_LESS_THAN
  2988. : CSTR_GREATER_THAN;
  2989. }
  2990. }
  2991. }
  2992. break;
  2993. }
  2994. case ( JAMO_SPECIAL ) :
  2995. {
  2996. if (!JamoFlag)
  2997. {
  2998. LPWSTR pStr1 = pString1;
  2999. LPWSTR pStr2 = pString2;
  3000. FindJamoDifference(
  3001. pHashN,
  3002. &pStr1, &ctr1, cchCount1, &Weight1,
  3003. &pStr2, &ctr2, cchCount2, &Weight2,
  3004. &pLastJamo,
  3005. &uw1, &uw2,
  3006. &State,
  3007. &WhichJamo,
  3008. fModify );
  3009. if (WhichJamo)
  3010. {
  3011. return (WhichJamo);
  3012. }
  3013. pString1 = pStr1;
  3014. pString2 = pStr2;
  3015. }
  3016. else
  3017. {
  3018. //
  3019. // Reset the Jamo flag.
  3020. //
  3021. JamoFlag = FALSE;
  3022. }
  3023. break;
  3024. }
  3025. case ( EXTENSION_A ) :
  3026. {
  3027. //
  3028. // If sm1 is an extension A character, then
  3029. // both sm1 and sm2 have been handled. We should
  3030. // only get here when either sm1 is not an
  3031. // extension A character or the two extension A
  3032. // characters are different.
  3033. //
  3034. if (sm1 != EXTENSION_A)
  3035. {
  3036. //
  3037. // Get the full weight in case DW got masked.
  3038. // Also, get the actual UW to compare.
  3039. //
  3040. // Only string2 contains an extension A char,
  3041. // so set the UW value to be the first UW
  3042. // value for extension A (default values):
  3043. // SM_EXT_A, AW_EXT_A
  3044. //
  3045. Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
  3046. uw2 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
  3047. }
  3048. //
  3049. // We should then fall through to the comparison
  3050. // of the Unicode weights.
  3051. //
  3052. break;
  3053. }
  3054. case ( UNSORTABLE ) :
  3055. {
  3056. //
  3057. // Fill out the case statement so the compiler
  3058. // will use a jump table.
  3059. //
  3060. break;
  3061. }
  3062. }
  3063. //
  3064. // See if the comparison should start again.
  3065. //
  3066. if ((Weight1 == CMP_INVALID_WEIGHT) || (Weight2 == CMP_INVALID_WEIGHT))
  3067. {
  3068. //
  3069. // Check to see if we're modifying the script value.
  3070. // If so, then we need to reset the fareast weight
  3071. // (if applicable) so that it doesn't get modified
  3072. // again.
  3073. //
  3074. if (fModify == TRUE)
  3075. {
  3076. if (sm1 == FAREAST_SPECIAL)
  3077. {
  3078. Weight1 = CMP_INVALID_WEIGHT;
  3079. }
  3080. else if (sm2 == FAREAST_SPECIAL)
  3081. {
  3082. Weight2 = CMP_INVALID_WEIGHT;
  3083. }
  3084. }
  3085. continue;
  3086. }
  3087. //
  3088. // We're not supposed to drop down into the state table if
  3089. // the unicode weights are different, so stop comparison
  3090. // and return result of unicode weight comparison.
  3091. //
  3092. if (uw1 != uw2)
  3093. {
  3094. return ((uw1 < uw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN);
  3095. }
  3096. }
  3097. //
  3098. // For each state in the state table, do the appropriate
  3099. // comparisons.
  3100. //
  3101. if (State & (STATE_DW | STATE_REVERSE_DW))
  3102. {
  3103. //
  3104. // Get the diacritic weights.
  3105. //
  3106. dw1 = GET_DIACRITIC(&Weight1);
  3107. dw2 = GET_DIACRITIC(&Weight2);
  3108. if (dw1 != dw2)
  3109. {
  3110. //
  3111. // Look ahead to see if diacritic follows a
  3112. // minimum diacritic weight. If so, get the
  3113. // diacritic weight of the nonspace mark.
  3114. //
  3115. while (!AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1))
  3116. {
  3117. Wt = GET_DWORD_WEIGHT(pHashN, *(pString1 + 1));
  3118. if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
  3119. {
  3120. dw1 += GET_DIACRITIC(&Wt);
  3121. pString1++;
  3122. ctr1--;
  3123. }
  3124. else
  3125. {
  3126. break;
  3127. }
  3128. }
  3129. while (!AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2))
  3130. {
  3131. Wt = GET_DWORD_WEIGHT(pHashN, *(pString2 + 1));
  3132. if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
  3133. {
  3134. dw2 += GET_DIACRITIC(&Wt);
  3135. pString2++;
  3136. ctr2--;
  3137. }
  3138. else
  3139. {
  3140. break;
  3141. }
  3142. }
  3143. //
  3144. // Save which string has the smaller diacritic
  3145. // weight if the diacritic weights are still
  3146. // different.
  3147. //
  3148. if (dw1 != dw2)
  3149. {
  3150. WhichDiacritic = (dw1 < dw2)
  3151. ? CSTR_LESS_THAN
  3152. : CSTR_GREATER_THAN;
  3153. //
  3154. // Remove state from state machine.
  3155. //
  3156. REMOVE_STATE(STATE_DW);
  3157. }
  3158. }
  3159. }
  3160. if (State & STATE_CW)
  3161. {
  3162. //
  3163. // Get the case weights.
  3164. //
  3165. if (GET_CASE(&Weight1) != GET_CASE(&Weight2))
  3166. {
  3167. //
  3168. // Save which string has the smaller case weight.
  3169. //
  3170. WhichCase = (GET_CASE(&Weight1) < GET_CASE(&Weight2))
  3171. ? CSTR_LESS_THAN
  3172. : CSTR_GREATER_THAN;
  3173. //
  3174. // Remove state from state machine.
  3175. //
  3176. REMOVE_STATE(STATE_CW);
  3177. }
  3178. }
  3179. }
  3180. //
  3181. // Fixup the pointers and counters.
  3182. //
  3183. POINTER_FIXUP();
  3184. ctr1--;
  3185. ctr2--;
  3186. //
  3187. // Reset the weights to be invalid.
  3188. //
  3189. Weight1 = CMP_INVALID_WEIGHT;
  3190. Weight2 = CMP_INVALID_WEIGHT;
  3191. }
  3192. //
  3193. // If the end of BOTH strings has been reached, then the unicode
  3194. // weights match exactly. Check the diacritic, case and special
  3195. // weights. If all are zero, then return success. Otherwise,
  3196. // return the result of the weight difference.
  3197. //
  3198. // NOTE: The following checks MUST REMAIN IN THIS ORDER:
  3199. // Diacritic, Case, Punctuation.
  3200. //
  3201. if (AT_STRING_END(ctr1, pString1, cchCount1))
  3202. {
  3203. if (AT_STRING_END(ctr2, pString2, cchCount2))
  3204. {
  3205. if (WhichDiacritic)
  3206. {
  3207. return (WhichDiacritic);
  3208. }
  3209. if (WhichCase)
  3210. {
  3211. return (WhichCase);
  3212. }
  3213. if (WhichExtra)
  3214. {
  3215. if (!fIgnoreDiacritic)
  3216. {
  3217. if (GET_WT_FOUR(&WhichExtra))
  3218. {
  3219. return (GET_WT_FOUR(&WhichExtra));
  3220. }
  3221. if (GET_WT_FIVE(&WhichExtra))
  3222. {
  3223. return (GET_WT_FIVE(&WhichExtra));
  3224. }
  3225. }
  3226. if (GET_WT_SIX(&WhichExtra))
  3227. {
  3228. return (GET_WT_SIX(&WhichExtra));
  3229. }
  3230. if (GET_WT_SEVEN(&WhichExtra))
  3231. {
  3232. return (GET_WT_SEVEN(&WhichExtra));
  3233. }
  3234. }
  3235. if (WhichPunct1)
  3236. {
  3237. return (WhichPunct1);
  3238. }
  3239. if (WhichPunct2)
  3240. {
  3241. return (WhichPunct2);
  3242. }
  3243. return (CSTR_EQUAL);
  3244. }
  3245. else
  3246. {
  3247. //
  3248. // String 2 is longer.
  3249. //
  3250. pString1 = pString2;
  3251. ctr1 = ctr2;
  3252. cchCount1 = cchCount2;
  3253. fEnd1 = CSTR_LESS_THAN;
  3254. }
  3255. }
  3256. else
  3257. {
  3258. fEnd1 = CSTR_GREATER_THAN;
  3259. }
  3260. //
  3261. // Scan to the end of the longer string.
  3262. //
  3263. SCAN_LONGER_STRING( ctr1,
  3264. pString1,
  3265. cchCount1,
  3266. fEnd1 );
  3267. }
  3268. ////////////////////////////////////////////////////////////////////////////
  3269. //
  3270. // FindJamoDifference
  3271. //
  3272. ////////////////////////////////////////////////////////////////////////////
  3273. int FindJamoDifference(
  3274. PLOC_HASH pHashN,
  3275. LPCWSTR* ppString1, int* ctr1, int cchCount1, DWORD* pWeight1,
  3276. LPCWSTR* ppString2, int* ctr2, int cchCount2, DWORD* pWeight2,
  3277. LPCWSTR* pLastJamo,
  3278. WORD* uw1,
  3279. WORD* uw2,
  3280. int* pState,
  3281. int* WhichJamo,
  3282. BOOL fModify)
  3283. {
  3284. int bRestart = 0; // if string compare should restart again
  3285. int oldHangulsFound1 = 0; // # of valid old Hangul Jamo compositions found
  3286. int oldHangulsFound2 = 0; // # of valid old Hangul Jamo compositions found
  3287. WORD UW;
  3288. BYTE JamoWeight1[3]; // extra weight for first old Hangul composition
  3289. BYTE JamoWeight2[3]; // extra weight for second old Hangul composition
  3290. //
  3291. // Roll back to the first Jamo. We know that these Jamos in both strings
  3292. // should be equal, so we can decrement both strings at once.
  3293. //
  3294. while ((*ppString1 > *pLastJamo) && IsJamo(*(*ppString1 - 1)))
  3295. {
  3296. (*ppString1)--; (*ppString2)--; (*ctr1)++; (*ctr2)++;
  3297. }
  3298. //
  3299. // Now we are at the beginning of two groups of Jamo characters.
  3300. // Compare Jamo unit (either a single Jamo or a valid old Hangul Jamo
  3301. // composition) until we run out Jamo units in either strings.
  3302. // We also exit when we reach the ends of either string.
  3303. //
  3304. // while (NOT_END_STRING(*ctr1, *ppString1, cchCount1) &&
  3305. // NOT_END_STRING(*ctr2, *ppString2, cchCount2))
  3306. //
  3307. for (;;)
  3308. {
  3309. if (IsJamo(**ppString1))
  3310. {
  3311. if (IsLeadingJamo(**ppString1))
  3312. {
  3313. if ((oldHangulsFound1 = MapOldHangulSortKey( pHashN,
  3314. *ppString1,
  3315. *ctr1,
  3316. &UW,
  3317. JamoWeight1,
  3318. fModify )) > 0)
  3319. {
  3320. *uw1 = UW;
  3321. //
  3322. // Mark *pWeight1 so that it is not CMP_INVALID_WEIGHT.
  3323. // 0202 is the DW/CW.
  3324. //
  3325. *pWeight1 = ((DWORD)UW | 0x02020000);
  3326. //
  3327. // We always increment ppString1/ctr1 at the end of the
  3328. // loop, so we need to subtract 1 here.
  3329. //
  3330. *ppString1 += (oldHangulsFound1 - 1);
  3331. *ctr1 -= (oldHangulsFound1 - 1);
  3332. }
  3333. }
  3334. if (oldHangulsFound1 == 0)
  3335. {
  3336. //
  3337. // No valid old Hangul compositions are found. Get the UW
  3338. // for the Jamo instead.
  3339. //
  3340. *pWeight1 = GET_DWORD_WEIGHT(pHashN, **ppString1);
  3341. //
  3342. // The SMs in PSORTKEY for Jamos are not really SMs. They
  3343. // are all 4 (for JAMO_SPECIAL).
  3344. // Here we get the real Jamo Unicode weight. The actual SM
  3345. // is stored in DW.
  3346. //
  3347. *uw1 = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight1),
  3348. GET_ALPHA_NUMERIC(pWeight1),
  3349. fModify );
  3350. ((PSORTKEY)pWeight1)->Diacritic = MIN_DW;
  3351. }
  3352. }
  3353. if (IsJamo(**ppString2))
  3354. {
  3355. if (IsLeadingJamo(**ppString2))
  3356. {
  3357. if ((oldHangulsFound2 = MapOldHangulSortKey( pHashN,
  3358. *ppString2,
  3359. *ctr2,
  3360. &UW,
  3361. JamoWeight2,
  3362. fModify )) > 0)
  3363. {
  3364. *uw2 = UW;
  3365. *pWeight2 = ((DWORD)UW | 0x02020000);
  3366. *ppString2 += (oldHangulsFound2 - 1);
  3367. *ctr2 -= (oldHangulsFound2 - 1);
  3368. }
  3369. }
  3370. if (oldHangulsFound2 == 0)
  3371. {
  3372. *pWeight2 = GET_DWORD_WEIGHT(pHashN, **ppString2);
  3373. *uw2 = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight2),
  3374. GET_ALPHA_NUMERIC(pWeight2),
  3375. fModify );
  3376. ((PSORTKEY)pWeight2)->Diacritic = MIN_DW;
  3377. }
  3378. }
  3379. //
  3380. // See if either weight is invalid.
  3381. // A weight can be invalid when the character is not a Jamo.
  3382. //
  3383. if (*pWeight1 == CMP_INVALID_WEIGHT)
  3384. {
  3385. //
  3386. // The current character is not a Jamo. Set the Weight to
  3387. // be CMP_INVALID_WEIGHT, so that the string comparision can
  3388. // restart within the loop of CompareString().
  3389. //
  3390. *pWeight1 = CMP_INVALID_WEIGHT;
  3391. bRestart = 1;
  3392. goto FindJamoDifferenceExit;
  3393. }
  3394. if (*pWeight2 == CMP_INVALID_WEIGHT)
  3395. {
  3396. //
  3397. // The current character is not a Jamo. Set the Weight to
  3398. // be CMP_INVALID_WEIGHT, so that the string comparision can
  3399. // restart within the loop of CompareString().
  3400. //
  3401. *pWeight2 = CMP_INVALID_WEIGHT;
  3402. bRestart = 1;
  3403. goto FindJamoDifferenceExit;
  3404. }
  3405. if (*uw1 != *uw2)
  3406. {
  3407. //
  3408. // Found differences in Unicode weight. We can stop the
  3409. // processing now.
  3410. //
  3411. goto FindJamoDifferenceExit;
  3412. }
  3413. //
  3414. // When we get here, we know that we have the same Unicode Weight.
  3415. // Check if we need to record the WhichJamo.
  3416. //
  3417. if ((*pState & STATE_JAMO_WEIGHT) &&
  3418. ((oldHangulsFound1 > 0) || (oldHangulsFound2 > 0)))
  3419. {
  3420. if ((oldHangulsFound1 > 0) && (oldHangulsFound2 > 0))
  3421. {
  3422. *WhichJamo = (int)memcmp( JamoWeight1,
  3423. JamoWeight2,
  3424. sizeof(JamoWeight1) ) + 2;
  3425. }
  3426. else if (oldHangulsFound1 > 0)
  3427. {
  3428. *WhichJamo = CSTR_GREATER_THAN;
  3429. }
  3430. else
  3431. {
  3432. *WhichJamo = CSTR_LESS_THAN;
  3433. }
  3434. *pState &= ~STATE_JAMO_WEIGHT;
  3435. oldHangulsFound1 = oldHangulsFound2 = 0;
  3436. }
  3437. (*ppString1)++; (*ctr1)--;
  3438. (*ppString2)++; (*ctr2)--;
  3439. if (AT_STRING_END(*ctr1, *ppString1, cchCount1) ||
  3440. AT_STRING_END(*ctr2, *ppString2, cchCount2))
  3441. {
  3442. break;
  3443. }
  3444. *pWeight1 = *pWeight2 = CMP_INVALID_WEIGHT;
  3445. }
  3446. //
  3447. // If we drop out of the while loop because we reach the end of strings,
  3448. // decrement the pointers by one because loops in CompareString() will
  3449. // increase the pointers at the end of the loop.
  3450. //
  3451. // If we drop out of the while loop because the goto's in it, we are
  3452. // already off by one.
  3453. //
  3454. if (AT_STRING_END(*ctr1, *ppString1, cchCount1))
  3455. {
  3456. (*ppString1)--; (*ctr1)++;
  3457. }
  3458. if (AT_STRING_END(*ctr2, *ppString2, cchCount2))
  3459. {
  3460. (*ppString2)--; (*ctr2)++;
  3461. }
  3462. FindJamoDifferenceExit:
  3463. *pLastJamo = *ppString1;
  3464. return (bRestart);
  3465. }