Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3746 lines
158 KiB

  1. /*++
  2. Copyright (c) 1991-2000, Microsoft Corporation All rights reserved.
  3. Module Name:
  4. string.c
  5. Abstract:
  6. This file contains functions that deal with characters and strings.
  7. APIs found in this file:
  8. CompareStringW
  9. GetStringTypeExW
  10. GetStringTypeW
  11. Revision History:
  12. 05-31-91 JulieB Created.
  13. --*/
  14. //
  15. // Include Files.
  16. //
  17. #include "nls.h"
  18. #include "nlssafe.h"
  19. #include "jamo.h"
  20. //
  21. // Constant Declarations.
  22. //
  23. //
  24. // State Table.
  25. //
  26. #define STATE_DW 1 // normal diacritic weight state
  27. #define STATE_REVERSE_DW 2 // reverse diacritic weight state
  28. #define STATE_CW 4 // case weight state
  29. #define STATE_JAMO_WEIGHT 8 // jamo weight state
  30. //
  31. // Invalid weight value.
  32. //
  33. #define CMP_INVALID_WEIGHT 0xffffffff
  34. #define CMP_INVALID_FAREAST 0xffff0000
  35. #define CMP_INVALID_UW 0xffff
  36. //
  37. // Forward Declarations.
  38. //
  39. int
  40. LongCompareStringW(
  41. PLOC_HASH pHashN,
  42. DWORD dwCmpFlags,
  43. LPCWSTR lpString1,
  44. int cchCount1,
  45. LPCWSTR lpString2,
  46. int cchCount2,
  47. BOOL fModify);
  48. int
  49. FindJamoDifference(
  50. PLOC_HASH pHashN,
  51. LPCWSTR* ppString1,
  52. int* ctr1,
  53. int cchCount1,
  54. DWORD* pWeight1,
  55. LPCWSTR* ppString2,
  56. int* ctr2,
  57. int cchCount2,
  58. DWORD* pWeight2,
  59. LPCWSTR* pLastJamo,
  60. WORD* uw1,
  61. WORD* uw2,
  62. int* pState,
  63. int* WhichJamo,
  64. BOOL fModify);
  65. //-------------------------------------------------------------------------//
  66. // INTERNAL MACROS //
  67. //-------------------------------------------------------------------------//
  68. ////////////////////////////////////////////////////////////////////////////
  69. //
  70. // NOT_END_STRING
  71. //
  72. // Checks to see if the search has reached the end of the string.
  73. // It returns TRUE if the counter is not at zero (counting backwards) and
  74. // the null termination has not been reached (if -1 was passed in the count
  75. // parameter.
  76. //
  77. // 11-04-92 JulieB Created.
  78. ////////////////////////////////////////////////////////////////////////////
  79. #define NOT_END_STRING(ct, ptr, cchIn) \
  80. ((ct != 0) && (!((*(ptr) == 0) && (cchIn == -2))))
  81. ////////////////////////////////////////////////////////////////////////////
  82. //
  83. // AT_STRING_END
  84. //
  85. // Checks to see if the pointer is at the end of the string.
  86. // It returns TRUE if the counter is zero or if the null termination
  87. // has been reached (if -2 was passed in the count parameter).
  88. //
  89. // 11-04-92 JulieB Created.
  90. ////////////////////////////////////////////////////////////////////////////
  91. #define AT_STRING_END(ct, ptr, cchIn) \
  92. ((ct == 0) || ((*(ptr) == 0) && (cchIn == -2)))
  93. ////////////////////////////////////////////////////////////////////////////
  94. //
  95. // REMOVE_STATE
  96. //
  97. // Removes the current state from the state table. This should only be
  98. // called when the current state should not be entered for the remainder
  99. // of the comparison. It decrements the counter going through the state
  100. // table and decrements the number of states in the table.
  101. //
  102. // 11-04-92 JulieB Created.
  103. ////////////////////////////////////////////////////////////////////////////
  104. #define REMOVE_STATE(value) (State &= ~value)
  105. ////////////////////////////////////////////////////////////////////////////
  106. //
  107. // POINTER_FIXUP
  108. //
  109. // Fixup the string pointers if expansion characters were found.
  110. // Then, advance the string pointers and decrement the string counters.
  111. //
  112. // 11-04-92 JulieB Created.
  113. ////////////////////////////////////////////////////////////////////////////
  114. #define POINTER_FIXUP() \
  115. { \
  116. /* \
  117. * Fixup the pointers (if necessary). \
  118. */ \
  119. if (pSave1 && (--cExpChar1 == 0)) \
  120. { \
  121. /* \
  122. * Done using expansion temporary buffer. \
  123. */ \
  124. pString1 = pSave1; \
  125. pSave1 = NULL; \
  126. } \
  127. \
  128. if (pSave2 && (--cExpChar2 == 0)) \
  129. { \
  130. /* \
  131. * Done using expansion temporary buffer. \
  132. */ \
  133. pString2 = pSave2; \
  134. pSave2 = NULL; \
  135. } \
  136. \
  137. /* \
  138. * Advance the string pointers. \
  139. */ \
  140. pString1++; \
  141. pString2++; \
  142. }
  143. ////////////////////////////////////////////////////////////////////////////
  144. //
  145. // SCAN_LONGER_STRING
  146. //
  147. // Scans the longer string for diacritic, case, and special weights.
  148. //
  149. // 11-04-92 JulieB Created.
  150. ////////////////////////////////////////////////////////////////////////////
  151. #define SCAN_LONGER_STRING( ct, \
  152. ptr, \
  153. cchIn, \
  154. ret ) \
  155. { \
  156. /* \
  157. * Search through the rest of the longer string to make sure \
  158. * all characters are not to be ignored. If find a character that \
  159. * should not be ignored, return the given return value immediately. \
  160. * \
  161. * The only exception to this is when a nonspace mark is found. If \
  162. * another DW difference has been found earlier, then use that. \
  163. */ \
  164. while (NOT_END_STRING(ct, ptr, cchIn)) \
  165. { \
  166. Weight1 = GET_DWORD_WEIGHT(pHashN, *ptr); \
  167. switch (GET_SCRIPT_MEMBER(&Weight1)) \
  168. { \
  169. case ( UNSORTABLE ): \
  170. { \
  171. break; \
  172. } \
  173. case ( NONSPACE_MARK ): \
  174. { \
  175. if ((!fIgnoreDiacritic) && (!WhichDiacritic)) \
  176. { \
  177. return (ret); \
  178. } \
  179. break; \
  180. } \
  181. case ( PUNCTUATION ) : \
  182. case ( SYMBOL_1 ) : \
  183. case ( SYMBOL_2 ) : \
  184. case ( SYMBOL_3 ) : \
  185. case ( SYMBOL_4 ) : \
  186. case ( SYMBOL_5 ) : \
  187. { \
  188. if (!fIgnoreSymbol) \
  189. { \
  190. return (ret); \
  191. } \
  192. break; \
  193. } \
  194. case ( EXPANSION ) : \
  195. case ( FAREAST_SPECIAL ) : \
  196. case ( JAMO_SPECIAL ) : \
  197. case ( EXTENSION_A ) : \
  198. default : \
  199. { \
  200. return (ret); \
  201. } \
  202. } \
  203. \
  204. /* \
  205. * Advance pointer and decrement counter. \
  206. */ \
  207. ptr++; \
  208. ct--; \
  209. } \
  210. \
  211. /* \
  212. * Need to check diacritic, case, extra, and special weights for \
  213. * final return value. Still could be equal if the longer part of \
  214. * the string contained only characters to be ignored. \
  215. * \
  216. * NOTE: The following checks MUST REMAIN IN THIS ORDER: \
  217. * Diacritic, Case, Extra, Punctuation. \
  218. */ \
  219. if (WhichDiacritic) \
  220. { \
  221. return (WhichDiacritic); \
  222. } \
  223. if (WhichCase) \
  224. { \
  225. return (WhichCase); \
  226. } \
  227. if (WhichExtra) \
  228. { \
  229. if (!fIgnoreDiacritic) \
  230. { \
  231. if (GET_WT_FOUR(&WhichExtra)) \
  232. { \
  233. return (GET_WT_FOUR(&WhichExtra)); \
  234. } \
  235. if (GET_WT_FIVE(&WhichExtra)) \
  236. { \
  237. return (GET_WT_FIVE(&WhichExtra)); \
  238. } \
  239. } \
  240. if (GET_WT_SIX(&WhichExtra)) \
  241. { \
  242. return (GET_WT_SIX(&WhichExtra)); \
  243. } \
  244. if (GET_WT_SEVEN(&WhichExtra)) \
  245. { \
  246. return (GET_WT_SEVEN(&WhichExtra)); \
  247. } \
  248. } \
  249. if (WhichJamo) \
  250. { \
  251. return (WhichJamo); \
  252. } \
  253. if (WhichPunct1) \
  254. { \
  255. return (WhichPunct1); \
  256. } \
  257. if (WhichPunct2) \
  258. { \
  259. return (WhichPunct2); \
  260. } \
  261. \
  262. return (CSTR_EQUAL); \
  263. }
  264. ////////////////////////////////////////////////////////////////////////////
  265. //
  266. // QUICK_SCAN_LONGER_STRING
  267. //
  268. // Scans the longer string for diacritic, case, and special weights.
  269. // Assumes that both strings are null-terminated.
  270. //
  271. // 11-04-92 JulieB Created.
  272. ////////////////////////////////////////////////////////////////////////////
  273. #define QUICK_SCAN_LONGER_STRING( ptr, \
  274. ret ) \
  275. { \
  276. /* \
  277. * Search through the rest of the longer string to make sure \
  278. * all characters are not to be ignored. If find a character that \
  279. * should not be ignored, return the given return value immediately. \
  280. * \
  281. * The only exception to this is when a nonspace mark is found. If \
  282. * another DW difference has been found earlier, then use that. \
  283. */ \
  284. while (*ptr != 0) \
  285. { \
  286. switch (GET_SCRIPT_MEMBER(&(pHashN->pSortkey[*ptr]))) \
  287. { \
  288. case ( UNSORTABLE ): \
  289. { \
  290. break; \
  291. } \
  292. case ( NONSPACE_MARK ): \
  293. { \
  294. if (!WhichDiacritic) \
  295. { \
  296. return (ret); \
  297. } \
  298. break; \
  299. } \
  300. default : \
  301. { \
  302. return (ret); \
  303. } \
  304. } \
  305. \
  306. /* \
  307. * Advance pointer. \
  308. */ \
  309. ptr++; \
  310. } \
  311. \
  312. /* \
  313. * Need to check diacritic, case, extra, and special weights for \
  314. * final return value. Still could be equal if the longer part of \
  315. * the string contained only unsortable characters. \
  316. * \
  317. * NOTE: The following checks MUST REMAIN IN THIS ORDER: \
  318. * Diacritic, Case, Extra, Punctuation. \
  319. */ \
  320. if (WhichDiacritic) \
  321. { \
  322. return (WhichDiacritic); \
  323. } \
  324. if (WhichCase) \
  325. { \
  326. return (WhichCase); \
  327. } \
  328. if (WhichExtra) \
  329. { \
  330. if (GET_WT_FOUR(&WhichExtra)) \
  331. { \
  332. return (GET_WT_FOUR(&WhichExtra)); \
  333. } \
  334. if (GET_WT_FIVE(&WhichExtra)) \
  335. { \
  336. return (GET_WT_FIVE(&WhichExtra)); \
  337. } \
  338. if (GET_WT_SIX(&WhichExtra)) \
  339. { \
  340. return (GET_WT_SIX(&WhichExtra)); \
  341. } \
  342. if (GET_WT_SEVEN(&WhichExtra)) \
  343. { \
  344. return (GET_WT_SEVEN(&WhichExtra)); \
  345. } \
  346. } \
  347. if (WhichJamo) \
  348. { \
  349. return (WhichJamo); \
  350. } \
  351. if (WhichPunct1) \
  352. { \
  353. return (WhichPunct1); \
  354. } \
  355. if (WhichPunct2) \
  356. { \
  357. return (WhichPunct2); \
  358. } \
  359. \
  360. return (CSTR_EQUAL); \
  361. }
  362. ////////////////////////////////////////////////////////////////////////////
  363. //
  364. // GET_FAREAST_WEIGHT
  365. //
  366. // Returns the weight for the far east special case in "wt". This currently
  367. // includes the Cho-on, the Repeat, and the Kana characters.
  368. //
  369. // 08-19-93 JulieB Created.
  370. ////////////////////////////////////////////////////////////////////////////
  371. #define GET_FAREAST_WEIGHT( wt, \
  372. uw, \
  373. mask, \
  374. pBegin, \
  375. pCur, \
  376. ExtraWt, \
  377. fModify ) \
  378. { \
  379. int ct; /* loop counter */ \
  380. BYTE PrevSM; /* previous script member value */ \
  381. BYTE PrevAW; /* previous alphanumeric value */ \
  382. BYTE PrevCW; /* previous case value */ \
  383. BYTE AW; /* alphanumeric value */ \
  384. BYTE CW; /* case value */ \
  385. DWORD PrevWt; /* previous weight */ \
  386. \
  387. \
  388. /* \
  389. * Get the alphanumeric weight and the case weight of the \
  390. * current code point. \
  391. */ \
  392. AW = GET_ALPHA_NUMERIC(&wt); \
  393. CW = GET_CASE(&wt); \
  394. ExtraWt = (DWORD)0; \
  395. \
  396. /* \
  397. * Special case Repeat and Cho-On. \
  398. * AW = 0 => Repeat \
  399. * AW = 1 => Cho-On \
  400. * AW = 2+ => Kana \
  401. */ \
  402. if (AW <= MAX_SPECIAL_AW) \
  403. { \
  404. /* \
  405. * If the script member of the previous character is \
  406. * invalid, then give the special character an \
  407. * invalid weight (highest possible weight) so that it \
  408. * will sort AFTER everything else. \
  409. */ \
  410. ct = 1; \
  411. PrevWt = CMP_INVALID_FAREAST; \
  412. while ((pCur - ct) >= pBegin) \
  413. { \
  414. PrevWt = GET_DWORD_WEIGHT(pHashN, *(pCur - ct)); \
  415. PrevWt &= mask; \
  416. PrevSM = GET_SCRIPT_MEMBER(&PrevWt); \
  417. if (PrevSM < FAREAST_SPECIAL) \
  418. { \
  419. if (PrevSM == EXPANSION) \
  420. { \
  421. PrevWt = CMP_INVALID_FAREAST; \
  422. } \
  423. else \
  424. { \
  425. /* \
  426. * UNSORTABLE or NONSPACE_MARK. \
  427. * \
  428. * Just ignore these, since we only care about the \
  429. * previous UW value. \
  430. */ \
  431. PrevWt = CMP_INVALID_FAREAST; \
  432. ct++; \
  433. continue; \
  434. } \
  435. } \
  436. else if (PrevSM == FAREAST_SPECIAL) \
  437. { \
  438. PrevAW = GET_ALPHA_NUMERIC(&PrevWt); \
  439. if (PrevAW <= MAX_SPECIAL_AW) \
  440. { \
  441. /* \
  442. * Handle case where two special chars follow \
  443. * each other. Keep going back in the string. \
  444. */ \
  445. PrevWt = CMP_INVALID_FAREAST; \
  446. ct++; \
  447. continue; \
  448. } \
  449. \
  450. UNICODE_WT(&PrevWt) = \
  451. MAKE_UNICODE_WT(KANA, PrevAW, fModify); \
  452. \
  453. /* \
  454. * Only build weights 4, 5, 6, and 7 if the \
  455. * previous character is KANA. \
  456. * \
  457. * Always: \
  458. * 4W = previous CW & ISOLATE_SMALL \
  459. * 6W = previous CW & ISOLATE_KANA \
  460. * \
  461. */ \
  462. PrevCW = GET_CASE(&PrevWt); \
  463. GET_WT_FOUR(&ExtraWt) = PrevCW & ISOLATE_SMALL; \
  464. GET_WT_SIX(&ExtraWt) = PrevCW & ISOLATE_KANA; \
  465. \
  466. if (AW == AW_REPEAT) \
  467. { \
  468. /* \
  469. * Repeat: \
  470. * UW = previous UW \
  471. * 5W = WT_FIVE_REPEAT \
  472. * 7W = previous CW & ISOLATE_WIDTH \
  473. */ \
  474. uw = UNICODE_WT(&PrevWt); \
  475. GET_WT_FIVE(&ExtraWt) = WT_FIVE_REPEAT; \
  476. GET_WT_SEVEN(&ExtraWt) = PrevCW & ISOLATE_WIDTH; \
  477. } \
  478. else \
  479. { \
  480. /* \
  481. * Cho-On: \
  482. * UW = previous UW & CHO_ON_UW_MASK \
  483. * 5W = WT_FIVE_CHO_ON \
  484. * 7W = current CW & ISOLATE_WIDTH \
  485. */ \
  486. uw = UNICODE_WT(&PrevWt) & CHO_ON_UW_MASK; \
  487. GET_WT_FIVE(&ExtraWt) = WT_FIVE_CHO_ON; \
  488. GET_WT_SEVEN(&ExtraWt) = CW & ISOLATE_WIDTH; \
  489. } \
  490. } \
  491. else \
  492. { \
  493. uw = GET_UNICODE_MOD(&PrevWt, fModify); \
  494. } \
  495. \
  496. break; \
  497. } \
  498. } \
  499. else \
  500. { \
  501. /* \
  502. * Kana: \
  503. * SM = KANA \
  504. * AW = current AW \
  505. * 4W = current CW & ISOLATE_SMALL \
  506. * 5W = WT_FIVE_KANA \
  507. * 6W = current CW & ISOLATE_KANA \
  508. * 7W = current CW & ISOLATE_WIDTH \
  509. */ \
  510. uw = MAKE_UNICODE_WT(KANA, AW, fModify); \
  511. GET_WT_FOUR(&ExtraWt) = CW & ISOLATE_SMALL; \
  512. GET_WT_FIVE(&ExtraWt) = WT_FIVE_KANA; \
  513. GET_WT_SIX(&ExtraWt) = CW & ISOLATE_KANA; \
  514. GET_WT_SEVEN(&ExtraWt) = CW & ISOLATE_WIDTH; \
  515. } \
  516. \
  517. /* \
  518. * Get the weight for the far east special case and store it in wt. \
  519. */ \
  520. if ((AW > MAX_SPECIAL_AW) || (PrevWt != CMP_INVALID_FAREAST)) \
  521. { \
  522. /* \
  523. * Always: \
  524. * DW = current DW \
  525. * CW = minimum CW \
  526. */ \
  527. UNICODE_WT(&wt) = uw; \
  528. CASE_WT(&wt) = MIN_CW; \
  529. } \
  530. else \
  531. { \
  532. uw = CMP_INVALID_UW; \
  533. wt = CMP_INVALID_FAREAST; \
  534. ExtraWt = 0; \
  535. } \
  536. }
  537. //-------------------------------------------------------------------------//
  538. // API ROUTINES //
  539. //-------------------------------------------------------------------------//
  540. ////////////////////////////////////////////////////////////////////////////
  541. //
  542. // CompareStringW
  543. //
  544. // Compares two wide character strings of the same locale according to the
  545. // supplied locale handle.
  546. //
  547. // 05-31-91 JulieB Created.
  548. ////////////////////////////////////////////////////////////////////////////
  549. int WINAPI CompareStringW(
  550. LCID Locale,
  551. DWORD dwCmpFlags,
  552. LPCWSTR lpString1,
  553. int cchCount1,
  554. LPCWSTR lpString2,
  555. int cchCount2)
  556. {
  557. register LPWSTR pString1; // ptr to go thru string 1
  558. register LPWSTR pString2; // ptr to go thru string 2
  559. PLOC_HASH pHashN; // ptr to LOC hash node
  560. BOOL fIgnorePunct; // flag to ignore punctuation (not symbol)
  561. BOOL fModify; // flag to use modified script member weights
  562. DWORD State; // state table
  563. DWORD Mask; // mask for weights
  564. DWORD Weight1; // full weight of char - string 1
  565. DWORD Weight2; // full weight of char - string 2
  566. int JamoFlag = FALSE;
  567. LPCWSTR pLastJamo = lpString1;
  568. int WhichDiacritic; // DW => 1 = str1 smaller, 3 = str2 smaller
  569. int WhichCase; // CW => 1 = str1 smaller, 3 = str2 smaller
  570. int WhichJamo; // XW for Jamo
  571. int WhichPunct1; // SW => 1 = str1 smaller, 3 = str2 smaller
  572. int WhichPunct2; // SW => 1 = str1 smaller, 3 = str2 smaller
  573. LPWSTR pSave1; // ptr to saved pString1
  574. LPWSTR pSave2; // ptr to saved pString2
  575. int cExpChar1, cExpChar2; // ct of expansions in tmp
  576. DWORD ExtraWt1, ExtraWt2; // extra weight values (for far east)
  577. DWORD WhichExtra; // XW => wts 4, 5, 6, 7 (for far east)
  578. //
  579. // Invalid Parameter Check:
  580. // - validate LCID
  581. // - either string is null
  582. //
  583. VALIDATE_LANGUAGE(Locale, pHashN, 0, TRUE);
  584. if ((pHashN == NULL) ||
  585. (lpString1 == NULL) || (lpString2 == NULL))
  586. {
  587. SetLastError(ERROR_INVALID_PARAMETER);
  588. return (0);
  589. }
  590. //
  591. // Make sure the appropriate sorting tables are available. If not,
  592. // return an error.
  593. //
  594. if ((pHashN->pSortkey == NULL) ||
  595. (pHashN->IfIdeographFailure == TRUE))
  596. {
  597. KdPrint(("NLSAPI: Appropriate Sorting Tables Not Loaded.\n"));
  598. SetLastError(ERROR_FILE_NOT_FOUND);
  599. return (0);
  600. }
  601. //
  602. // Call longer compare string if any of the following is true:
  603. // - compression locale
  604. // - either count is not -1
  605. // - dwCmpFlags is not 0 or ignore case (see NOTE below)
  606. // - locale is Korean - script member weight adjustment needed
  607. //
  608. // NOTE: If the value of NORM_IGNORECASE ever changes, this
  609. // code should check for:
  610. // ( (dwCmpFlags != 0) && (dwCmpFlags != NORM_IGNORECASE) )
  611. // Since NORM_IGNORECASE is equal to 1, we can optimize this
  612. // by checking for > 1.
  613. //
  614. dwCmpFlags &= (~LOCALE_USE_CP_ACP);
  615. fModify = IS_KOREAN(Locale);
  616. if ( (pHashN->IfCompression) ||
  617. (cchCount1 > -1) || (cchCount2 > -1) ||
  618. (dwCmpFlags > NORM_IGNORECASE) ||
  619. (fModify == TRUE) )
  620. {
  621. return (LongCompareStringW( pHashN,
  622. dwCmpFlags,
  623. lpString1,
  624. ((cchCount1 <= -1) ? -2 : cchCount1),
  625. lpString2,
  626. ((cchCount2 <= -1) ? -2 : cchCount2),
  627. fModify ));
  628. }
  629. //
  630. // Initialize string pointers.
  631. //
  632. pString1 = (LPWSTR)lpString1;
  633. pString2 = (LPWSTR)lpString2;
  634. //
  635. // Do a wchar by wchar compare.
  636. //
  637. while (TRUE)
  638. {
  639. //
  640. // See if characters are equal.
  641. // If characters are equal, increment pointers and continue
  642. // string compare.
  643. //
  644. // NOTE: Loop is unrolled 8 times for performance.
  645. //
  646. if ((*pString1 != *pString2) || (*pString1 == 0))
  647. {
  648. break;
  649. }
  650. pString1++;
  651. pString2++;
  652. if ((*pString1 != *pString2) || (*pString1 == 0))
  653. {
  654. break;
  655. }
  656. pString1++;
  657. pString2++;
  658. if ((*pString1 != *pString2) || (*pString1 == 0))
  659. {
  660. break;
  661. }
  662. pString1++;
  663. pString2++;
  664. if ((*pString1 != *pString2) || (*pString1 == 0))
  665. {
  666. break;
  667. }
  668. pString1++;
  669. pString2++;
  670. if ((*pString1 != *pString2) || (*pString1 == 0))
  671. {
  672. break;
  673. }
  674. pString1++;
  675. pString2++;
  676. if ((*pString1 != *pString2) || (*pString1 == 0))
  677. {
  678. break;
  679. }
  680. pString1++;
  681. pString2++;
  682. if ((*pString1 != *pString2) || (*pString1 == 0))
  683. {
  684. break;
  685. }
  686. pString1++;
  687. pString2++;
  688. if ((*pString1 != *pString2) || (*pString1 == 0))
  689. {
  690. break;
  691. }
  692. pString1++;
  693. pString2++;
  694. }
  695. //
  696. // If strings are both at null terminators, return equal.
  697. //
  698. if (*pString1 == *pString2)
  699. {
  700. return (CSTR_EQUAL);
  701. }
  702. //
  703. // Initialize flags, pointers, and counters.
  704. //
  705. fIgnorePunct = FALSE;
  706. WhichDiacritic = 0;
  707. WhichCase = 0;
  708. WhichJamo = 0;
  709. WhichPunct1 = 0;
  710. WhichPunct2 = 0;
  711. pSave1 = NULL;
  712. pSave2 = NULL;
  713. ExtraWt1 = (DWORD)0;
  714. WhichExtra = (DWORD)0;
  715. //
  716. // Switch on the different flag options. This will speed up
  717. // the comparisons of two strings that are different.
  718. //
  719. // The only two possibilities in this optimized section are
  720. // no flags and the ignore case flag.
  721. //
  722. if (dwCmpFlags == 0)
  723. {
  724. Mask = CMP_MASKOFF_NONE;
  725. }
  726. else
  727. {
  728. Mask = CMP_MASKOFF_CW;
  729. }
  730. State = (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW;
  731. State |= (STATE_CW | STATE_JAMO_WEIGHT);
  732. //
  733. // Compare each character's sortkey weight in the two strings.
  734. //
  735. while ((*pString1 != 0) && (*pString2 != 0))
  736. {
  737. Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1);
  738. Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
  739. Weight1 &= Mask;
  740. Weight2 &= Mask;
  741. if (Weight1 != Weight2)
  742. {
  743. BYTE sm1 = GET_SCRIPT_MEMBER(&Weight1); // script member 1
  744. BYTE sm2 = GET_SCRIPT_MEMBER(&Weight2); // script member 2
  745. WORD uw1 = GET_UNICODE_SM(&Weight1, sm1); // unicode weight 1
  746. WORD uw2 = GET_UNICODE_SM(&Weight2, sm2); // unicode weight 2
  747. BYTE dw1; // diacritic weight 1
  748. BYTE dw2; // diacritic weight 2
  749. BOOL fContinue; // flag to continue loop
  750. DWORD Wt; // temp weight holder
  751. WCHAR pTmpBuf1[MAX_TBL_EXPANSION]; // temp buffer for exp 1
  752. WCHAR pTmpBuf2[MAX_TBL_EXPANSION]; // temp buffer for exp 2
  753. //
  754. // If Unicode Weights are different and no special cases,
  755. // then we're done. Otherwise, we need to do extra checking.
  756. //
  757. // Must check ENTIRE string for any possibility of Unicode Weight
  758. // differences. As soon as a Unicode Weight difference is found,
  759. // then we're done. If no UW difference is found, then the
  760. // first Diacritic Weight difference is used. If no DW difference
  761. // is found, then use the first Case Difference. If no CW
  762. // difference is found, then use the first Extra Weight
  763. // difference. If no XW difference is found, then use the first
  764. // Special Weight difference.
  765. //
  766. if ((uw1 != uw2) ||
  767. (sm1 == FAREAST_SPECIAL) ||
  768. (sm1 == EXTENSION_A))
  769. {
  770. //
  771. // Initialize the continue flag.
  772. //
  773. fContinue = FALSE;
  774. //
  775. // Check for Unsortable characters and skip them.
  776. // This needs to be outside the switch statement. If EITHER
  777. // character is unsortable, must skip it and start over.
  778. //
  779. if (sm1 == UNSORTABLE)
  780. {
  781. pString1++;
  782. fContinue = TRUE;
  783. }
  784. if (sm2 == UNSORTABLE)
  785. {
  786. pString2++;
  787. fContinue = TRUE;
  788. }
  789. if (fContinue)
  790. {
  791. continue;
  792. }
  793. //
  794. // Switch on the script member of string 1 and take care
  795. // of any special cases.
  796. //
  797. switch (sm1)
  798. {
  799. case ( NONSPACE_MARK ) :
  800. {
  801. //
  802. // Nonspace only - look at diacritic weight only.
  803. //
  804. if ((WhichDiacritic == 0) ||
  805. (State & STATE_REVERSE_DW))
  806. {
  807. WhichDiacritic = CSTR_GREATER_THAN;
  808. //
  809. // Remove state from state machine.
  810. //
  811. REMOVE_STATE(STATE_DW);
  812. }
  813. //
  814. // Adjust pointer and set flags.
  815. //
  816. pString1++;
  817. fContinue = TRUE;
  818. break;
  819. }
  820. case ( PUNCTUATION ) :
  821. {
  822. //
  823. // If the ignore punctuation flag is set, then skip
  824. // over the punctuation.
  825. //
  826. if (fIgnorePunct)
  827. {
  828. pString1++;
  829. fContinue = TRUE;
  830. }
  831. else if (sm2 != PUNCTUATION)
  832. {
  833. //
  834. // The character in the second string is
  835. // NOT punctuation.
  836. //
  837. if (WhichPunct2)
  838. {
  839. //
  840. // Set WP 2 to show that string 2 is smaller,
  841. // since a punctuation char had already been
  842. // found at an earlier position in string 2.
  843. //
  844. // Set the Ignore Punctuation flag so we just
  845. // skip over any other punctuation chars in
  846. // the string.
  847. //
  848. WhichPunct2 = CSTR_GREATER_THAN;
  849. fIgnorePunct = TRUE;
  850. }
  851. else
  852. {
  853. //
  854. // Set WP 1 to show that string 2 is smaller,
  855. // and that string 1 has had a punctuation
  856. // char - since no punctuation chars have
  857. // been found in string 2.
  858. //
  859. WhichPunct1 = CSTR_GREATER_THAN;
  860. }
  861. //
  862. // Advance pointer 1, and set flag to true.
  863. //
  864. pString1++;
  865. fContinue = TRUE;
  866. }
  867. //
  868. // Do NOT want to advance the pointer in string 1 if
  869. // string 2 is also a punctuation char. This will
  870. // be done later.
  871. //
  872. break;
  873. }
  874. case ( EXPANSION ) :
  875. {
  876. //
  877. // Save pointer in pString1 so that it can be
  878. // restored.
  879. //
  880. if (pSave1 == NULL)
  881. {
  882. pSave1 = pString1;
  883. }
  884. pString1 = pTmpBuf1;
  885. //
  886. // Expand character into temporary buffer.
  887. //
  888. pTmpBuf1[0] = GET_EXPANSION_1(&Weight1);
  889. pTmpBuf1[1] = GET_EXPANSION_2(&Weight1);
  890. //
  891. // Set cExpChar1 to the number of expansion characters
  892. // stored.
  893. //
  894. cExpChar1 = MAX_TBL_EXPANSION;
  895. fContinue = TRUE;
  896. break;
  897. }
  898. case ( FAREAST_SPECIAL ) :
  899. {
  900. if (sm2 != EXPANSION)
  901. {
  902. //
  903. // Get the weight for the far east special case
  904. // and store it in Weight1.
  905. //
  906. GET_FAREAST_WEIGHT( Weight1,
  907. uw1,
  908. Mask,
  909. lpString1,
  910. pString1,
  911. ExtraWt1,
  912. FALSE );
  913. if (sm2 != FAREAST_SPECIAL)
  914. {
  915. //
  916. // The character in the second string is
  917. // NOT a fareast special char.
  918. //
  919. // Set each of weights 4, 5, 6, and 7 to show
  920. // that string 2 is smaller (if not already set).
  921. //
  922. if ((GET_WT_FOUR(&WhichExtra) == 0) &&
  923. (GET_WT_FOUR(&ExtraWt1) != 0))
  924. {
  925. GET_WT_FOUR(&WhichExtra) = CSTR_GREATER_THAN;
  926. }
  927. if ((GET_WT_FIVE(&WhichExtra) == 0) &&
  928. (GET_WT_FIVE(&ExtraWt1) != 0))
  929. {
  930. GET_WT_FIVE(&WhichExtra) = CSTR_GREATER_THAN;
  931. }
  932. if ((GET_WT_SIX(&WhichExtra) == 0) &&
  933. (GET_WT_SIX(&ExtraWt1) != 0))
  934. {
  935. GET_WT_SIX(&WhichExtra) = CSTR_GREATER_THAN;
  936. }
  937. if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
  938. (GET_WT_SEVEN(&ExtraWt1) != 0))
  939. {
  940. GET_WT_SEVEN(&WhichExtra) = CSTR_GREATER_THAN;
  941. }
  942. }
  943. }
  944. break;
  945. }
  946. case ( JAMO_SPECIAL ) :
  947. {
  948. int ctr1; // dummy variables for FindJamoDifference
  949. LPWSTR pStr1 = pString1;
  950. LPWSTR pStr2 = pString2;
  951. //
  952. // Set the JamoFlag so we don't handle it again.
  953. //
  954. JamoFlag = TRUE;
  955. fContinue = FindJamoDifference(
  956. pHashN,
  957. &pStr1, &ctr1, -2, &Weight1,
  958. &pStr2, &ctr1, -2, &Weight2,
  959. &pLastJamo,
  960. &uw1, &uw2,
  961. &State,
  962. &WhichJamo,
  963. fModify );
  964. if (WhichJamo)
  965. {
  966. return (WhichJamo);
  967. }
  968. pString1 = pStr1;
  969. pString2 = pStr2;
  970. break;
  971. }
  972. case ( EXTENSION_A ) :
  973. {
  974. //
  975. // Compare the weights.
  976. //
  977. if (Weight1 == Weight2)
  978. {
  979. //
  980. // Adjust pointers and set flag.
  981. //
  982. pString1++; pString2++;
  983. fContinue = TRUE;
  984. }
  985. else
  986. {
  987. //
  988. // Get the actual UW to compare.
  989. //
  990. if (sm2 == EXTENSION_A)
  991. {
  992. //
  993. // Set the UW values to be the AW and DW since
  994. // both strings contain an extension A char.
  995. //
  996. uw1 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight1),
  997. GET_DIACRITIC(&Weight1),
  998. FALSE );
  999. uw2 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight2),
  1000. GET_DIACRITIC(&Weight2),
  1001. FALSE );
  1002. }
  1003. else
  1004. {
  1005. //
  1006. // Only string1 contains an extension A char,
  1007. // so set the UW value to be the first UW
  1008. // value for extension A (default values):
  1009. // SM_EXT_A, AW_EXT_A
  1010. //
  1011. uw1 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
  1012. }
  1013. }
  1014. break;
  1015. }
  1016. case ( UNSORTABLE ) :
  1017. {
  1018. //
  1019. // Fill out the case statement so the compiler
  1020. // will use a jump table.
  1021. //
  1022. break;
  1023. }
  1024. }
  1025. //
  1026. // Switch on the script member of string 2 and take care
  1027. // of any special cases.
  1028. //
  1029. switch (sm2)
  1030. {
  1031. case ( NONSPACE_MARK ) :
  1032. {
  1033. //
  1034. // Nonspace only - look at diacritic weight only.
  1035. //
  1036. if ((WhichDiacritic == 0) ||
  1037. (State & STATE_REVERSE_DW))
  1038. {
  1039. WhichDiacritic = CSTR_LESS_THAN;
  1040. //
  1041. // Remove state from state machine.
  1042. //
  1043. REMOVE_STATE(STATE_DW);
  1044. }
  1045. //
  1046. // Adjust pointer and set flags.
  1047. //
  1048. pString2++;
  1049. fContinue = TRUE;
  1050. break;
  1051. }
  1052. case ( PUNCTUATION ) :
  1053. {
  1054. //
  1055. // If the ignore punctuation flag is set, then skip
  1056. // over the punctuation.
  1057. //
  1058. if (fIgnorePunct)
  1059. {
  1060. //
  1061. // Pointer 2 will be advanced after if-else
  1062. // statement.
  1063. //
  1064. ;
  1065. }
  1066. else if (sm1 != PUNCTUATION)
  1067. {
  1068. //
  1069. // The character in the first string is
  1070. // NOT punctuation.
  1071. //
  1072. if (WhichPunct1)
  1073. {
  1074. //
  1075. // Set WP 1 to show that string 1 is smaller,
  1076. // since a punctuation char had already
  1077. // been found at an earlier position in
  1078. // string 1.
  1079. //
  1080. // Set the Ignore Punctuation flag so we just
  1081. // skip over any other punctuation in the
  1082. // string.
  1083. //
  1084. WhichPunct1 = CSTR_LESS_THAN;
  1085. fIgnorePunct = TRUE;
  1086. }
  1087. else
  1088. {
  1089. //
  1090. // Set WP 2 to show that string 1 is smaller,
  1091. // and that string 2 has had a punctuation
  1092. // char - since no punctuation chars have
  1093. // been found in string 1.
  1094. //
  1095. WhichPunct2 = CSTR_LESS_THAN;
  1096. }
  1097. //
  1098. // Pointer 2 will be advanced after if-else
  1099. // statement.
  1100. //
  1101. }
  1102. else
  1103. {
  1104. //
  1105. // Both code points are punctuation.
  1106. //
  1107. // See if either of the strings has encountered
  1108. // punctuation chars previous to this.
  1109. //
  1110. if (WhichPunct1)
  1111. {
  1112. //
  1113. // String 1 has had a punctuation char, so
  1114. // it should be the smaller string (since
  1115. // both have punctuation chars).
  1116. //
  1117. WhichPunct1 = CSTR_LESS_THAN;
  1118. }
  1119. else if (WhichPunct2)
  1120. {
  1121. //
  1122. // String 2 has had a punctuation char, so
  1123. // it should be the smaller string (since
  1124. // both have punctuation chars).
  1125. //
  1126. WhichPunct2 = CSTR_GREATER_THAN;
  1127. }
  1128. else
  1129. {
  1130. //
  1131. // Position is the same, so compare the
  1132. // special weights. Set WhichPunct1 to
  1133. // the smaller special weight.
  1134. //
  1135. WhichPunct1 = (((GET_ALPHA_NUMERIC(&Weight1) <
  1136. GET_ALPHA_NUMERIC(&Weight2)))
  1137. ? CSTR_LESS_THAN
  1138. : CSTR_GREATER_THAN);
  1139. }
  1140. //
  1141. // Set the Ignore Punctuation flag so we just
  1142. // skip over any other punctuation in the string.
  1143. //
  1144. fIgnorePunct = TRUE;
  1145. //
  1146. // Advance pointer 1. Pointer 2 will be
  1147. // advanced after if-else statement.
  1148. //
  1149. pString1++;
  1150. }
  1151. //
  1152. // Advance pointer 2 and set flag to true.
  1153. //
  1154. pString2++;
  1155. fContinue = TRUE;
  1156. break;
  1157. }
  1158. case ( EXPANSION ) :
  1159. {
  1160. //
  1161. // Save pointer in pString1 so that it can be
  1162. // restored.
  1163. //
  1164. if (pSave2 == NULL)
  1165. {
  1166. pSave2 = pString2;
  1167. }
  1168. pString2 = pTmpBuf2;
  1169. //
  1170. // Expand character into temporary buffer.
  1171. //
  1172. pTmpBuf2[0] = GET_EXPANSION_1(&Weight2);
  1173. pTmpBuf2[1] = GET_EXPANSION_2(&Weight2);
  1174. //
  1175. // Set cExpChar2 to the number of expansion characters
  1176. // stored.
  1177. //
  1178. cExpChar2 = MAX_TBL_EXPANSION;
  1179. fContinue = TRUE;
  1180. break;
  1181. }
  1182. case ( FAREAST_SPECIAL ) :
  1183. {
  1184. if (sm1 != EXPANSION)
  1185. {
  1186. //
  1187. // Get the weight for the far east special case
  1188. // and store it in Weight2.
  1189. //
  1190. GET_FAREAST_WEIGHT( Weight2,
  1191. uw2,
  1192. Mask,
  1193. lpString2,
  1194. pString2,
  1195. ExtraWt2,
  1196. FALSE );
  1197. if (sm1 != FAREAST_SPECIAL)
  1198. {
  1199. //
  1200. // The character in the first string is
  1201. // NOT a fareast special char.
  1202. //
  1203. // Set each of weights 4, 5, 6, and 7 to show
  1204. // that string 1 is smaller (if not already set).
  1205. //
  1206. if ((GET_WT_FOUR(&WhichExtra) == 0) &&
  1207. (GET_WT_FOUR(&ExtraWt2) != 0))
  1208. {
  1209. GET_WT_FOUR(&WhichExtra) = CSTR_LESS_THAN;
  1210. }
  1211. if ((GET_WT_FIVE(&WhichExtra) == 0) &&
  1212. (GET_WT_FIVE(&ExtraWt2) != 0))
  1213. {
  1214. GET_WT_FIVE(&WhichExtra) = CSTR_LESS_THAN;
  1215. }
  1216. if ((GET_WT_SIX(&WhichExtra) == 0) &&
  1217. (GET_WT_SIX(&ExtraWt2) != 0))
  1218. {
  1219. GET_WT_SIX(&WhichExtra) = CSTR_LESS_THAN;
  1220. }
  1221. if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
  1222. (GET_WT_SEVEN(&ExtraWt2) != 0))
  1223. {
  1224. GET_WT_SEVEN(&WhichExtra) = CSTR_LESS_THAN;
  1225. }
  1226. }
  1227. else
  1228. {
  1229. //
  1230. // Characters in both strings are fareast
  1231. // special chars.
  1232. //
  1233. // Set each of weights 4, 5, 6, and 7
  1234. // appropriately (if not already set).
  1235. //
  1236. if ( (GET_WT_FOUR(&WhichExtra) == 0) &&
  1237. ( GET_WT_FOUR(&ExtraWt1) !=
  1238. GET_WT_FOUR(&ExtraWt2) ) )
  1239. {
  1240. GET_WT_FOUR(&WhichExtra) =
  1241. ( GET_WT_FOUR(&ExtraWt1) <
  1242. GET_WT_FOUR(&ExtraWt2) )
  1243. ? CSTR_LESS_THAN
  1244. : CSTR_GREATER_THAN;
  1245. }
  1246. if ( (GET_WT_FIVE(&WhichExtra) == 0) &&
  1247. ( GET_WT_FIVE(&ExtraWt1) !=
  1248. GET_WT_FIVE(&ExtraWt2) ) )
  1249. {
  1250. GET_WT_FIVE(&WhichExtra) =
  1251. ( GET_WT_FIVE(&ExtraWt1) <
  1252. GET_WT_FIVE(&ExtraWt2) )
  1253. ? CSTR_LESS_THAN
  1254. : CSTR_GREATER_THAN;
  1255. }
  1256. if ( (GET_WT_SIX(&WhichExtra) == 0) &&
  1257. ( GET_WT_SIX(&ExtraWt1) !=
  1258. GET_WT_SIX(&ExtraWt2) ) )
  1259. {
  1260. GET_WT_SIX(&WhichExtra) =
  1261. ( GET_WT_SIX(&ExtraWt1) <
  1262. GET_WT_SIX(&ExtraWt2) )
  1263. ? CSTR_LESS_THAN
  1264. : CSTR_GREATER_THAN;
  1265. }
  1266. if ( (GET_WT_SEVEN(&WhichExtra) == 0) &&
  1267. ( GET_WT_SEVEN(&ExtraWt1) !=
  1268. GET_WT_SEVEN(&ExtraWt2) ) )
  1269. {
  1270. GET_WT_SEVEN(&WhichExtra) =
  1271. ( GET_WT_SEVEN(&ExtraWt1) <
  1272. GET_WT_SEVEN(&ExtraWt2) )
  1273. ? CSTR_LESS_THAN
  1274. : CSTR_GREATER_THAN;
  1275. }
  1276. }
  1277. }
  1278. break;
  1279. }
  1280. case ( JAMO_SPECIAL ) :
  1281. {
  1282. if (!JamoFlag)
  1283. {
  1284. int ctr1, ctr2; // dummy variables for FindJamoDifference
  1285. LPWSTR pStr1 = pString1;
  1286. LPWSTR pStr2 = pString2;
  1287. //
  1288. // Set the JamoFlag so we don't handle it again.
  1289. //
  1290. JamoFlag = TRUE;
  1291. fContinue = FindJamoDifference(
  1292. pHashN,
  1293. &pStr1, &ctr1, -2, &Weight1,
  1294. &pStr2, &ctr2, -2, &Weight2,
  1295. &pLastJamo,
  1296. &uw1, &uw2,
  1297. &State,
  1298. &WhichJamo,
  1299. fModify );
  1300. if (WhichJamo)
  1301. {
  1302. return (WhichJamo);
  1303. }
  1304. pString1 = pStr1;
  1305. pString2 = pStr2;
  1306. }
  1307. else
  1308. {
  1309. JamoFlag = FALSE;
  1310. }
  1311. break;
  1312. }
  1313. case ( EXTENSION_A ) :
  1314. {
  1315. //
  1316. // If sm1 is an extension A character, then
  1317. // both sm1 and sm2 have been handled. We should
  1318. // only get here when either sm1 is not an
  1319. // extension A character or the two extension A
  1320. // characters are different.
  1321. //
  1322. if (sm1 != EXTENSION_A)
  1323. {
  1324. //
  1325. // Get the actual UW to compare.
  1326. //
  1327. // Only string2 contains an extension A char,
  1328. // so set the UW value to be the first UW
  1329. // value for extension A (default values):
  1330. // SM_EXT_A, AW_EXT_A
  1331. //
  1332. uw2 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
  1333. }
  1334. //
  1335. // We should then fall through to the comparison
  1336. // of the Unicode weights.
  1337. //
  1338. break;
  1339. }
  1340. case ( UNSORTABLE ) :
  1341. {
  1342. //
  1343. // Fill out the case statement so the compiler
  1344. // will use a jump table.
  1345. //
  1346. break;
  1347. }
  1348. }
  1349. //
  1350. // See if the comparison should start again.
  1351. //
  1352. if (fContinue)
  1353. {
  1354. continue;
  1355. }
  1356. //
  1357. // We're not supposed to drop down into the state table if
  1358. // unicode weights are different, so stop comparison and
  1359. // return result of unicode weight comparison.
  1360. //
  1361. if (uw1 != uw2)
  1362. {
  1363. return ((uw1 < uw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN);
  1364. }
  1365. }
  1366. //
  1367. // For each state in the state table, do the appropriate
  1368. // comparisons. (UW1 == UW2)
  1369. //
  1370. if (State & (STATE_DW | STATE_REVERSE_DW))
  1371. {
  1372. //
  1373. // Get the diacritic weights.
  1374. //
  1375. dw1 = GET_DIACRITIC(&Weight1);
  1376. dw2 = GET_DIACRITIC(&Weight2);
  1377. if (dw1 != dw2)
  1378. {
  1379. //
  1380. // Look ahead to see if diacritic follows a
  1381. // minimum diacritic weight. If so, get the
  1382. // diacritic weight of the nonspace mark.
  1383. //
  1384. while (*(pString1 + 1) != 0)
  1385. {
  1386. Wt = GET_DWORD_WEIGHT(pHashN, *(pString1 + 1));
  1387. if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
  1388. {
  1389. dw1 += GET_DIACRITIC(&Wt);
  1390. pString1++;
  1391. }
  1392. else
  1393. {
  1394. break;
  1395. }
  1396. }
  1397. while (*(pString2 + 1) != 0)
  1398. {
  1399. Wt = GET_DWORD_WEIGHT(pHashN, *(pString2 + 1));
  1400. if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
  1401. {
  1402. dw2 += GET_DIACRITIC(&Wt);
  1403. pString2++;
  1404. }
  1405. else
  1406. {
  1407. break;
  1408. }
  1409. }
  1410. //
  1411. // Save which string has the smaller diacritic
  1412. // weight if the diacritic weights are still
  1413. // different.
  1414. //
  1415. if (dw1 != dw2)
  1416. {
  1417. WhichDiacritic = (dw1 < dw2)
  1418. ? CSTR_LESS_THAN
  1419. : CSTR_GREATER_THAN;
  1420. //
  1421. // Remove state from state machine.
  1422. //
  1423. REMOVE_STATE(STATE_DW);
  1424. }
  1425. }
  1426. }
  1427. if (State & STATE_CW)
  1428. {
  1429. //
  1430. // Get the case weights.
  1431. //
  1432. if (GET_CASE(&Weight1) != GET_CASE(&Weight2))
  1433. {
  1434. //
  1435. // Save which string has the smaller case weight.
  1436. //
  1437. WhichCase = (GET_CASE(&Weight1) < GET_CASE(&Weight2))
  1438. ? CSTR_LESS_THAN
  1439. : CSTR_GREATER_THAN;
  1440. //
  1441. // Remove state from state machine.
  1442. //
  1443. REMOVE_STATE(STATE_CW);
  1444. }
  1445. }
  1446. }
  1447. //
  1448. // Fixup the pointers.
  1449. //
  1450. POINTER_FIXUP();
  1451. }
  1452. //
  1453. // If the end of BOTH strings has been reached, then the unicode
  1454. // weights match exactly. Check the diacritic, case and special
  1455. // weights. If all are zero, then return success. Otherwise,
  1456. // return the result of the weight difference.
  1457. //
  1458. // NOTE: The following checks MUST REMAIN IN THIS ORDER:
  1459. // Diacritic, Case, Punctuation.
  1460. //
  1461. if (*pString1 == 0)
  1462. {
  1463. if (*pString2 == 0)
  1464. {
  1465. if (WhichDiacritic)
  1466. {
  1467. return (WhichDiacritic);
  1468. }
  1469. if (WhichCase)
  1470. {
  1471. return (WhichCase);
  1472. }
  1473. if (WhichExtra)
  1474. {
  1475. if (GET_WT_FOUR(&WhichExtra))
  1476. {
  1477. return (GET_WT_FOUR(&WhichExtra));
  1478. }
  1479. if (GET_WT_FIVE(&WhichExtra))
  1480. {
  1481. return (GET_WT_FIVE(&WhichExtra));
  1482. }
  1483. if (GET_WT_SIX(&WhichExtra))
  1484. {
  1485. return (GET_WT_SIX(&WhichExtra));
  1486. }
  1487. if (GET_WT_SEVEN(&WhichExtra))
  1488. {
  1489. return (GET_WT_SEVEN(&WhichExtra));
  1490. }
  1491. }
  1492. if (WhichPunct1)
  1493. {
  1494. return (WhichPunct1);
  1495. }
  1496. if (WhichPunct2)
  1497. {
  1498. return (WhichPunct2);
  1499. }
  1500. return (CSTR_EQUAL);
  1501. }
  1502. else
  1503. {
  1504. //
  1505. // String 2 is longer.
  1506. //
  1507. pString1 = pString2;
  1508. }
  1509. }
  1510. //
  1511. // Scan to the end of the longer string.
  1512. //
  1513. QUICK_SCAN_LONGER_STRING( pString1,
  1514. ((*pString2 == 0)
  1515. ? CSTR_GREATER_THAN
  1516. : CSTR_LESS_THAN) );
  1517. }
  1518. ////////////////////////////////////////////////////////////////////////////
  1519. //
  1520. // GetStringTypeExW
  1521. //
  1522. // Returns character type information about a particular Unicode string.
  1523. //
  1524. // 01-18-94 JulieB Created.
  1525. ////////////////////////////////////////////////////////////////////////////
  1526. BOOL WINAPI GetStringTypeExW(
  1527. LCID Locale,
  1528. DWORD dwInfoType,
  1529. LPCWSTR lpSrcStr,
  1530. int cchSrc,
  1531. LPWORD lpCharType)
  1532. {
  1533. PLOC_HASH pHashN; // ptr to LOC hash node
  1534. //
  1535. // Invalid Parameter Check:
  1536. // - Validate LCID
  1537. //
  1538. VALIDATE_LOCALE(Locale, pHashN, FALSE);
  1539. if (pHashN == NULL)
  1540. {
  1541. SetLastError(ERROR_INVALID_PARAMETER);
  1542. return (0);
  1543. }
  1544. //
  1545. // Return the result of GetStringTypeW.
  1546. //
  1547. return (GetStringTypeW( dwInfoType,
  1548. lpSrcStr,
  1549. cchSrc,
  1550. lpCharType ));
  1551. }
  1552. ////////////////////////////////////////////////////////////////////////////
  1553. //
  1554. // GetStringTypeW
  1555. //
  1556. // Returns character type information about a particular Unicode string.
  1557. //
  1558. // NOTE: The number of parameters is different from GetStringTypeA.
  1559. // The 16-bit OLE product shipped GetStringTypeA with the wrong
  1560. // parameters (ported from Chicago) and now we must support it.
  1561. //
  1562. // Use GetStringTypeEx to get the same set of parameters between
  1563. // the A and W version.
  1564. //
  1565. // 05-31-91 JulieB Created.
  1566. ////////////////////////////////////////////////////////////////////////////
  1567. BOOL WINAPI GetStringTypeW(
  1568. DWORD dwInfoType,
  1569. LPCWSTR lpSrcStr,
  1570. int cchSrc,
  1571. LPWORD lpCharType)
  1572. {
  1573. int Ctr; // loop counter
  1574. //
  1575. // Invalid Parameter Check:
  1576. // - lpSrcStr NULL
  1577. // - cchSrc is 0
  1578. // - lpCharType NULL
  1579. // - same buffer - src and destination
  1580. // - (flags will be checked in switch statement below)
  1581. //
  1582. if ( (lpSrcStr == NULL) || (cchSrc == 0) ||
  1583. (lpCharType == NULL) || (lpSrcStr == lpCharType) )
  1584. {
  1585. SetLastError(ERROR_INVALID_PARAMETER);
  1586. return (FALSE);
  1587. }
  1588. //
  1589. // If cchSrc is -1, then the source string is null terminated and we
  1590. // need to get the length of the source string. Add one to the
  1591. // length to include the null termination.
  1592. // (This will always be at least 1.)
  1593. //
  1594. if (cchSrc <= -1)
  1595. {
  1596. cchSrc = NlsStrLenW(lpSrcStr) + 1;
  1597. }
  1598. //
  1599. // Make sure the ctype table is mapped in.
  1600. //
  1601. if (GetCTypeFileInfo())
  1602. {
  1603. SetLastError(ERROR_FILE_NOT_FOUND);
  1604. return (FALSE);
  1605. }
  1606. //
  1607. // Return the appropriate information in the lpCharType parameter
  1608. // based on the dwInfoType parameter.
  1609. //
  1610. switch (dwInfoType)
  1611. {
  1612. case ( CT_CTYPE1 ) :
  1613. {
  1614. //
  1615. // Return the ctype 1 information for the string.
  1616. //
  1617. for (Ctr = 0; Ctr < cchSrc; Ctr++)
  1618. {
  1619. lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType1);
  1620. }
  1621. break;
  1622. }
  1623. case ( CT_CTYPE2 ) :
  1624. {
  1625. //
  1626. // Return the ctype 2 information.
  1627. //
  1628. for (Ctr = 0; Ctr < cchSrc; Ctr++)
  1629. {
  1630. lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType2);
  1631. }
  1632. break;
  1633. }
  1634. case ( CT_CTYPE3 ) :
  1635. {
  1636. //
  1637. // Return the ctype 3 information.
  1638. //
  1639. for (Ctr = 0; Ctr < cchSrc; Ctr++)
  1640. {
  1641. lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType3);
  1642. }
  1643. break;
  1644. }
  1645. default :
  1646. {
  1647. //
  1648. // Invalid flag parameter, so return failure.
  1649. //
  1650. SetLastError(ERROR_INVALID_FLAGS);
  1651. return (FALSE);
  1652. }
  1653. }
  1654. //
  1655. // Return success.
  1656. //
  1657. return (TRUE);
  1658. }
  1659. //-------------------------------------------------------------------------//
  1660. // INTERNAL ROUTINES //
  1661. //-------------------------------------------------------------------------//
  1662. ////////////////////////////////////////////////////////////////////////////
  1663. //
  1664. // LongCompareStringW
  1665. //
  1666. // Compares two wide character strings of the same locale according to the
  1667. // supplied locale handle.
  1668. //
  1669. // 05-31-91 JulieB Created.
  1670. ////////////////////////////////////////////////////////////////////////////
  1671. int LongCompareStringW(
  1672. PLOC_HASH pHashN,
  1673. DWORD dwCmpFlags,
  1674. LPCWSTR lpString1,
  1675. int cchCount1,
  1676. LPCWSTR lpString2,
  1677. int cchCount2,
  1678. BOOL fModify)
  1679. {
  1680. int ctr1 = cchCount1; // loop counter for string 1
  1681. int ctr2 = cchCount2; // loop counter for string 2
  1682. register LPWSTR pString1; // ptr to go thru string 1
  1683. register LPWSTR pString2; // ptr to go thru string 2
  1684. BOOL IfCompress; // if compression in locale
  1685. BOOL IfDblCompress1; // if double compression in string 1
  1686. BOOL IfDblCompress2; // if double compression in string 2
  1687. BOOL fEnd1; // if at end of string 1
  1688. BOOL fIgnorePunct; // flag to ignore punctuation (not symbol)
  1689. BOOL fIgnoreDiacritic; // flag to ignore diacritics
  1690. BOOL fIgnoreSymbol; // flag to ignore symbols
  1691. BOOL fStringSort; // flag to use string sort
  1692. DWORD State; // state table
  1693. DWORD Mask; // mask for weights
  1694. DWORD Weight1; // full weight of char - string 1
  1695. DWORD Weight2; // full weight of char - string 2
  1696. int JamoFlag = FALSE;
  1697. LPCWSTR pLastJamo = lpString1;
  1698. int WhichDiacritic; // DW => 1 = str1 smaller, 3 = str2 smaller
  1699. int WhichCase; // CW => 1 = str1 smaller, 3 = str2 smaller
  1700. int WhichJamo; // XW for Jamo
  1701. int WhichPunct1; // SW => 1 = str1 smaller, 3 = str2 smaller
  1702. int WhichPunct2; // SW => 1 = str1 smaller, 3 = str2 smaller
  1703. LPWSTR pSave1; // ptr to saved pString1
  1704. LPWSTR pSave2; // ptr to saved pString2
  1705. int cExpChar1, cExpChar2; // ct of expansions in tmp
  1706. DWORD ExtraWt1, ExtraWt2; // extra weight values (for far east)
  1707. DWORD WhichExtra; // XW => wts 4, 5, 6, 7 (for far east)
  1708. //
  1709. // Initialize string pointers.
  1710. //
  1711. pString1 = (LPWSTR)lpString1;
  1712. pString2 = (LPWSTR)lpString2;
  1713. //
  1714. // Invalid Flags Check:
  1715. // - invalid flags
  1716. //
  1717. if (dwCmpFlags & CS_INVALID_FLAG)
  1718. {
  1719. SetLastError(ERROR_INVALID_FLAGS);
  1720. return (0);
  1721. }
  1722. //
  1723. // See if we should stop on the null terminator regardless of the
  1724. // count values. The original count values are stored in ctr1 and ctr2
  1725. // above, so it's ok to set these here.
  1726. //
  1727. if (dwCmpFlags & NORM_STOP_ON_NULL)
  1728. {
  1729. cchCount1 = cchCount2 = -2;
  1730. }
  1731. //
  1732. // Check if compression in the given locale. If not, then
  1733. // try a wchar by wchar compare. If strings are equal, this
  1734. // will be quick.
  1735. //
  1736. if ((IfCompress = pHashN->IfCompression) == FALSE)
  1737. {
  1738. //
  1739. // Compare each wide character in the two strings.
  1740. //
  1741. while ( NOT_END_STRING(ctr1, pString1, cchCount1) &&
  1742. NOT_END_STRING(ctr2, pString2, cchCount2) )
  1743. {
  1744. //
  1745. // See if characters are equal.
  1746. //
  1747. if (*pString1 == *pString2)
  1748. {
  1749. //
  1750. // Characters are equal, so increment pointers,
  1751. // decrement counters, and continue string compare.
  1752. //
  1753. pString1++;
  1754. pString2++;
  1755. ctr1--;
  1756. ctr2--;
  1757. }
  1758. else
  1759. {
  1760. //
  1761. // Difference was found. Fall into the sortkey
  1762. // check below.
  1763. //
  1764. break;
  1765. }
  1766. }
  1767. //
  1768. // If the end of BOTH strings has been reached, then the strings
  1769. // match exactly. Return success.
  1770. //
  1771. if ( AT_STRING_END(ctr1, pString1, cchCount1) &&
  1772. AT_STRING_END(ctr2, pString2, cchCount2) )
  1773. {
  1774. return (CSTR_EQUAL);
  1775. }
  1776. }
  1777. //
  1778. // Initialize flags, pointers, and counters.
  1779. //
  1780. fIgnorePunct = dwCmpFlags & NORM_IGNORESYMBOLS;
  1781. fIgnoreDiacritic = dwCmpFlags & NORM_IGNORENONSPACE;
  1782. fIgnoreSymbol = fIgnorePunct;
  1783. fStringSort = dwCmpFlags & SORT_STRINGSORT;
  1784. WhichDiacritic = 0;
  1785. WhichCase = 0;
  1786. WhichJamo = 0;
  1787. WhichPunct1 = 0;
  1788. WhichPunct2 = 0;
  1789. pSave1 = NULL;
  1790. pSave2 = NULL;
  1791. ExtraWt1 = (DWORD)0;
  1792. WhichExtra = (DWORD)0;
  1793. //
  1794. // Set the weights to be invalid. This flags whether or not to
  1795. // recompute the weights next time through the loop. It also flags
  1796. // whether or not to start over (continue) in the loop.
  1797. //
  1798. Weight1 = CMP_INVALID_WEIGHT;
  1799. Weight2 = CMP_INVALID_WEIGHT;
  1800. //
  1801. // Switch on the different flag options. This will speed up
  1802. // the comparisons of two strings that are different.
  1803. //
  1804. State = STATE_CW | STATE_JAMO_WEIGHT;
  1805. switch (dwCmpFlags & (NORM_IGNORECASE | NORM_IGNORENONSPACE))
  1806. {
  1807. case ( 0 ) :
  1808. {
  1809. Mask = CMP_MASKOFF_NONE;
  1810. State |= (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW;
  1811. break;
  1812. }
  1813. case ( NORM_IGNORECASE ) :
  1814. {
  1815. Mask = CMP_MASKOFF_CW;
  1816. State |= (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW;
  1817. break;
  1818. }
  1819. case ( NORM_IGNORENONSPACE ) :
  1820. {
  1821. Mask = CMP_MASKOFF_DW;
  1822. break;
  1823. }
  1824. case ( NORM_IGNORECASE | NORM_IGNORENONSPACE ) :
  1825. {
  1826. Mask = CMP_MASKOFF_DW_CW;
  1827. break;
  1828. }
  1829. }
  1830. switch (dwCmpFlags & (NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH))
  1831. {
  1832. case ( 0 ) :
  1833. {
  1834. break;
  1835. }
  1836. case ( NORM_IGNOREKANATYPE ) :
  1837. {
  1838. Mask &= CMP_MASKOFF_KANA;
  1839. break;
  1840. }
  1841. case ( NORM_IGNOREWIDTH ) :
  1842. {
  1843. Mask &= CMP_MASKOFF_WIDTH;
  1844. if (dwCmpFlags & NORM_IGNORECASE)
  1845. {
  1846. REMOVE_STATE(STATE_CW);
  1847. }
  1848. break;
  1849. }
  1850. case ( NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ) :
  1851. {
  1852. Mask &= CMP_MASKOFF_KANA_WIDTH;
  1853. if (dwCmpFlags & NORM_IGNORECASE)
  1854. {
  1855. REMOVE_STATE(STATE_CW);
  1856. }
  1857. break;
  1858. }
  1859. }
  1860. //
  1861. // Compare each character's sortkey weight in the two strings.
  1862. //
  1863. while ( NOT_END_STRING(ctr1, pString1, cchCount1) &&
  1864. NOT_END_STRING(ctr2, pString2, cchCount2) )
  1865. {
  1866. if (Weight1 == CMP_INVALID_WEIGHT)
  1867. {
  1868. Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1);
  1869. Weight1 &= Mask;
  1870. }
  1871. if (Weight2 == CMP_INVALID_WEIGHT)
  1872. {
  1873. Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
  1874. Weight2 &= Mask;
  1875. }
  1876. //
  1877. // If compression locale, then need to check for compression
  1878. // characters even if the weights are equal. If it's not a
  1879. // compression locale, then we don't need to check anything
  1880. // if the weights are equal.
  1881. //
  1882. if ( (IfCompress) &&
  1883. (GET_COMPRESSION(&Weight1) || GET_COMPRESSION(&Weight2)) )
  1884. {
  1885. int ctr; // loop counter
  1886. PCOMPRESS_3 pComp3; // ptr to compress 3 table
  1887. PCOMPRESS_2 pComp2; // ptr to compress 2 table
  1888. int If1; // if compression found in string 1
  1889. int If2; // if compression found in string 2
  1890. int CompVal; // compression value
  1891. int IfEnd1; // if exists 1 more char in string 1
  1892. int IfEnd2; // if exists 1 more char in string 2
  1893. //
  1894. // Check for compression in the weights.
  1895. //
  1896. If1 = GET_COMPRESSION(&Weight1);
  1897. If2 = GET_COMPRESSION(&Weight2);
  1898. CompVal = ((If1 > If2) ? If1 : If2);
  1899. IfEnd1 = AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1);
  1900. IfEnd2 = AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2);
  1901. if (pHashN->IfDblCompression == FALSE)
  1902. {
  1903. //
  1904. // NO double compression, so don't check for it.
  1905. //
  1906. switch (CompVal)
  1907. {
  1908. //
  1909. // Check for 3 characters compressing to 1.
  1910. //
  1911. case ( COMPRESS_3_MASK ) :
  1912. {
  1913. //
  1914. // Check character in string 1 and string 2.
  1915. //
  1916. if ( ((If1) && (!IfEnd1) &&
  1917. !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1)) ||
  1918. ((If2) && (!IfEnd2) &&
  1919. !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2)) )
  1920. {
  1921. ctr = pHashN->pCompHdr->Num3;
  1922. pComp3 = pHashN->pCompress3;
  1923. for (; ctr > 0; ctr--, pComp3++)
  1924. {
  1925. //
  1926. // Check character in string 1.
  1927. //
  1928. if ( (If1) && (!IfEnd1) &&
  1929. !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1) &&
  1930. (pComp3->UCP1 == *pString1) &&
  1931. (pComp3->UCP2 == *(pString1 + 1)) &&
  1932. (pComp3->UCP3 == *(pString1 + 2)) )
  1933. {
  1934. //
  1935. // Found compression for string 1.
  1936. // Get new weight and mask it.
  1937. // Increment pointer and decrement counter.
  1938. //
  1939. Weight1 = MAKE_SORTKEY_DWORD(pComp3->Weights);
  1940. Weight1 &= Mask;
  1941. pString1 += 2;
  1942. ctr1 -= 2;
  1943. //
  1944. // Set boolean for string 1 - search is
  1945. // complete.
  1946. //
  1947. If1 = 0;
  1948. //
  1949. // Break out of loop if both searches are
  1950. // done.
  1951. //
  1952. if (If2 == 0)
  1953. {
  1954. break;
  1955. }
  1956. }
  1957. //
  1958. // Check character in string 2.
  1959. //
  1960. if ( (If2) && (!IfEnd2) &&
  1961. !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2) &&
  1962. (pComp3->UCP1 == *pString2) &&
  1963. (pComp3->UCP2 == *(pString2 + 1)) &&
  1964. (pComp3->UCP3 == *(pString2 + 2)) )
  1965. {
  1966. //
  1967. // Found compression for string 2.
  1968. // Get new weight and mask it.
  1969. // Increment pointer and decrement counter.
  1970. //
  1971. Weight2 = MAKE_SORTKEY_DWORD(pComp3->Weights);
  1972. Weight2 &= Mask;
  1973. pString2 += 2;
  1974. ctr2 -= 2;
  1975. //
  1976. // Set boolean for string 2 - search is
  1977. // complete.
  1978. //
  1979. If2 = 0;
  1980. //
  1981. // Break out of loop if both searches are
  1982. // done.
  1983. //
  1984. if (If1 == 0)
  1985. {
  1986. break;
  1987. }
  1988. }
  1989. }
  1990. if (ctr > 0)
  1991. {
  1992. break;
  1993. }
  1994. }
  1995. //
  1996. // Fall through if not found.
  1997. //
  1998. }
  1999. //
  2000. // Check for 2 characters compressing to 1.
  2001. //
  2002. case ( COMPRESS_2_MASK ) :
  2003. {
  2004. //
  2005. // Check character in string 1 and string 2.
  2006. //
  2007. if ( ((If1) && (!IfEnd1)) ||
  2008. ((If2) && (!IfEnd2)) )
  2009. {
  2010. ctr = pHashN->pCompHdr->Num2;
  2011. pComp2 = pHashN->pCompress2;
  2012. for (; ((ctr > 0) && (If1 || If2)); ctr--, pComp2++)
  2013. {
  2014. //
  2015. // Check character in string 1.
  2016. //
  2017. if ( (If1) &&
  2018. (!IfEnd1) &&
  2019. (pComp2->UCP1 == *pString1) &&
  2020. (pComp2->UCP2 == *(pString1 + 1)) )
  2021. {
  2022. //
  2023. // Found compression for string 1.
  2024. // Get new weight and mask it.
  2025. // Increment pointer and decrement counter.
  2026. //
  2027. Weight1 = MAKE_SORTKEY_DWORD(pComp2->Weights);
  2028. Weight1 &= Mask;
  2029. pString1++;
  2030. ctr1--;
  2031. //
  2032. // Set boolean for string 1 - search is
  2033. // complete.
  2034. //
  2035. If1 = 0;
  2036. //
  2037. // Break out of loop if both searches are
  2038. // done.
  2039. //
  2040. if (If2 == 0)
  2041. {
  2042. break;
  2043. }
  2044. }
  2045. //
  2046. // Check character in string 2.
  2047. //
  2048. if ( (If2) &&
  2049. (!IfEnd2) &&
  2050. (pComp2->UCP1 == *pString2) &&
  2051. (pComp2->UCP2 == *(pString2 + 1)) )
  2052. {
  2053. //
  2054. // Found compression for string 2.
  2055. // Get new weight and mask it.
  2056. // Increment pointer and decrement counter.
  2057. //
  2058. Weight2 = MAKE_SORTKEY_DWORD(pComp2->Weights);
  2059. Weight2 &= Mask;
  2060. pString2++;
  2061. ctr2--;
  2062. //
  2063. // Set boolean for string 2 - search is
  2064. // complete.
  2065. //
  2066. If2 = 0;
  2067. //
  2068. // Break out of loop if both searches are
  2069. // done.
  2070. //
  2071. if (If1 == 0)
  2072. {
  2073. break;
  2074. }
  2075. }
  2076. }
  2077. if (ctr > 0)
  2078. {
  2079. break;
  2080. }
  2081. }
  2082. }
  2083. }
  2084. }
  2085. else if (!IfEnd1 && !IfEnd2)
  2086. {
  2087. //
  2088. // Double Compression exists, so must check for it.
  2089. //
  2090. if (IfDblCompress1 =
  2091. ((GET_DWORD_WEIGHT(pHashN, *pString1) & CMP_MASKOFF_CW) ==
  2092. (GET_DWORD_WEIGHT(pHashN, *(pString1 + 1)) & CMP_MASKOFF_CW)))
  2093. {
  2094. //
  2095. // Advance past the first code point to get to the
  2096. // compression character.
  2097. //
  2098. pString1++;
  2099. ctr1--;
  2100. IfEnd1 = AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1);
  2101. }
  2102. if (IfDblCompress2 =
  2103. ((GET_DWORD_WEIGHT(pHashN, *pString2) & CMP_MASKOFF_CW) ==
  2104. (GET_DWORD_WEIGHT(pHashN, *(pString2 + 1)) & CMP_MASKOFF_CW)))
  2105. {
  2106. //
  2107. // Advance past the first code point to get to the
  2108. // compression character.
  2109. //
  2110. pString2++;
  2111. ctr2--;
  2112. IfEnd2 = AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2);
  2113. }
  2114. switch (CompVal)
  2115. {
  2116. //
  2117. // Check for 3 characters compressing to 1.
  2118. //
  2119. case ( COMPRESS_3_MASK ) :
  2120. {
  2121. //
  2122. // Check character in string 1.
  2123. //
  2124. if ( (If1) && (!IfEnd1) &&
  2125. !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1) )
  2126. {
  2127. ctr = pHashN->pCompHdr->Num3;
  2128. pComp3 = pHashN->pCompress3;
  2129. for (; ctr > 0; ctr--, pComp3++)
  2130. {
  2131. //
  2132. // Check character in string 1.
  2133. //
  2134. if ( (pComp3->UCP1 == *pString1) &&
  2135. (pComp3->UCP2 == *(pString1 + 1)) &&
  2136. (pComp3->UCP3 == *(pString1 + 2)) )
  2137. {
  2138. //
  2139. // Found compression for string 1.
  2140. // Get new weight and mask it.
  2141. // Increment pointer and decrement counter.
  2142. //
  2143. Weight1 = MAKE_SORTKEY_DWORD(pComp3->Weights);
  2144. Weight1 &= Mask;
  2145. if (!IfDblCompress1)
  2146. {
  2147. pString1 += 2;
  2148. ctr1 -= 2;
  2149. }
  2150. //
  2151. // Set boolean for string 1 - search is
  2152. // complete.
  2153. //
  2154. If1 = 0;
  2155. break;
  2156. }
  2157. }
  2158. }
  2159. //
  2160. // Check character in string 2.
  2161. //
  2162. if ( (If2) && (!IfEnd2) &&
  2163. !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2) )
  2164. {
  2165. ctr = pHashN->pCompHdr->Num3;
  2166. pComp3 = pHashN->pCompress3;
  2167. for (; ctr > 0; ctr--, pComp3++)
  2168. {
  2169. //
  2170. // Check character in string 2.
  2171. //
  2172. if ( (pComp3->UCP1 == *pString2) &&
  2173. (pComp3->UCP2 == *(pString2 + 1)) &&
  2174. (pComp3->UCP3 == *(pString2 + 2)) )
  2175. {
  2176. //
  2177. // Found compression for string 2.
  2178. // Get new weight and mask it.
  2179. // Increment pointer and decrement counter.
  2180. //
  2181. Weight2 = MAKE_SORTKEY_DWORD(pComp3->Weights);
  2182. Weight2 &= Mask;
  2183. if (!IfDblCompress2)
  2184. {
  2185. pString2 += 2;
  2186. ctr2 -= 2;
  2187. }
  2188. //
  2189. // Set boolean for string 2 - search is
  2190. // complete.
  2191. //
  2192. If2 = 0;
  2193. break;
  2194. }
  2195. }
  2196. }
  2197. //
  2198. // Fall through if not found.
  2199. //
  2200. if ((If1 == 0) && (If2 == 0))
  2201. {
  2202. break;
  2203. }
  2204. }
  2205. //
  2206. // Check for 2 characters compressing to 1.
  2207. //
  2208. case ( COMPRESS_2_MASK ) :
  2209. {
  2210. //
  2211. // Check character in string 1.
  2212. //
  2213. if ((If1) && (!IfEnd1))
  2214. {
  2215. ctr = pHashN->pCompHdr->Num2;
  2216. pComp2 = pHashN->pCompress2;
  2217. for (; ctr > 0; ctr--, pComp2++)
  2218. {
  2219. //
  2220. // Check character in string 1.
  2221. //
  2222. if ((pComp2->UCP1 == *pString1) &&
  2223. (pComp2->UCP2 == *(pString1 + 1)))
  2224. {
  2225. //
  2226. // Found compression for string 1.
  2227. // Get new weight and mask it.
  2228. // Increment pointer and decrement counter.
  2229. //
  2230. Weight1 = MAKE_SORTKEY_DWORD(pComp2->Weights);
  2231. Weight1 &= Mask;
  2232. if (!IfDblCompress1)
  2233. {
  2234. pString1++;
  2235. ctr1--;
  2236. }
  2237. //
  2238. // Set boolean for string 1 - search is
  2239. // complete.
  2240. //
  2241. If1 = 0;
  2242. break;
  2243. }
  2244. }
  2245. }
  2246. //
  2247. // Check character in string 2.
  2248. //
  2249. if ((If2) && (!IfEnd2))
  2250. {
  2251. ctr = pHashN->pCompHdr->Num2;
  2252. pComp2 = pHashN->pCompress2;
  2253. for (; ctr > 0; ctr--, pComp2++)
  2254. {
  2255. //
  2256. // Check character in string 2.
  2257. //
  2258. if ((pComp2->UCP1 == *pString2) &&
  2259. (pComp2->UCP2 == *(pString2 + 1)))
  2260. {
  2261. //
  2262. // Found compression for string 2.
  2263. // Get new weight and mask it.
  2264. // Increment pointer and decrement counter.
  2265. //
  2266. Weight2 = MAKE_SORTKEY_DWORD(pComp2->Weights);
  2267. Weight2 &= Mask;
  2268. if (!IfDblCompress2)
  2269. {
  2270. pString2++;
  2271. ctr2--;
  2272. }
  2273. //
  2274. // Set boolean for string 2 - search is
  2275. // complete.
  2276. //
  2277. If2 = 0;
  2278. break;
  2279. }
  2280. }
  2281. }
  2282. }
  2283. }
  2284. //
  2285. // Reset the pointer back to the beginning of the double
  2286. // compression. Pointer fixup at the end will advance
  2287. // them correctly.
  2288. //
  2289. // If double compression, we advanced the pointer at
  2290. // the beginning of the switch statement. If double
  2291. // compression character was actually found, the pointer
  2292. // was NOT advanced. We now want to decrement the pointer
  2293. // to put it back to where it was.
  2294. //
  2295. // The next time through, the pointer will be pointing to
  2296. // the regular compression part of the string.
  2297. //
  2298. if (IfDblCompress1)
  2299. {
  2300. pString1--;
  2301. ctr1++;
  2302. }
  2303. if (IfDblCompress2)
  2304. {
  2305. pString2--;
  2306. ctr2++;
  2307. }
  2308. }
  2309. }
  2310. //
  2311. // Check the weights again.
  2312. //
  2313. if ((Weight1 != Weight2) ||
  2314. (GET_SCRIPT_MEMBER(&Weight1) == EXTENSION_A))
  2315. {
  2316. //
  2317. // Weights are still not equal, even after compression
  2318. // check, so compare the different weights.
  2319. //
  2320. BYTE sm1 = GET_SCRIPT_MEMBER(&Weight1); // script member 1
  2321. BYTE sm2 = GET_SCRIPT_MEMBER(&Weight2); // script member 2
  2322. WORD uw1 = GET_UNICODE_SM_MOD(&Weight1, sm1, fModify); // unicode weight 1
  2323. WORD uw2 = GET_UNICODE_SM_MOD(&Weight2, sm2, fModify); // unicode weight 2
  2324. BYTE dw1; // diacritic weight 1
  2325. BYTE dw2; // diacritic weight 2
  2326. DWORD Wt; // temp weight holder
  2327. WCHAR pTmpBuf1[MAX_TBL_EXPANSION]; // temp buffer for exp 1
  2328. WCHAR pTmpBuf2[MAX_TBL_EXPANSION]; // temp buffer for exp 2
  2329. //
  2330. // If Unicode Weights are different and no special cases,
  2331. // then we're done. Otherwise, we need to do extra checking.
  2332. //
  2333. // Must check ENTIRE string for any possibility of Unicode Weight
  2334. // differences. As soon as a Unicode Weight difference is found,
  2335. // then we're done. If no UW difference is found, then the
  2336. // first Diacritic Weight difference is used. If no DW difference
  2337. // is found, then use the first Case Difference. If no CW
  2338. // difference is found, then use the first Extra Weight
  2339. // difference. If no XW difference is found, then use the first
  2340. // Special Weight difference.
  2341. //
  2342. if ((uw1 != uw2) ||
  2343. ((sm1 <= SYMBOL_5) && (sm1 >= FAREAST_SPECIAL)))
  2344. {
  2345. //
  2346. // Check for Unsortable characters and skip them.
  2347. // This needs to be outside the switch statement. If EITHER
  2348. // character is unsortable, must skip it and start over.
  2349. //
  2350. if (sm1 == UNSORTABLE)
  2351. {
  2352. pString1++;
  2353. ctr1--;
  2354. Weight1 = CMP_INVALID_WEIGHT;
  2355. }
  2356. if (sm2 == UNSORTABLE)
  2357. {
  2358. pString2++;
  2359. ctr2--;
  2360. Weight2 = CMP_INVALID_WEIGHT;
  2361. }
  2362. //
  2363. // Check for Ignore Nonspace and Ignore Symbol. If
  2364. // Ignore Nonspace is set and either character is a
  2365. // nonspace mark only, then we need to advance the
  2366. // pointer to skip over the character and continue.
  2367. // If Ignore Symbol is set and either character is a
  2368. // punctuation char, then we need to advance the
  2369. // pointer to skip over the character and continue.
  2370. //
  2371. // This step is necessary so that a string with a
  2372. // nonspace mark and a punctuation char following one
  2373. // another are properly ignored when one or both of
  2374. // the ignore flags is set.
  2375. //
  2376. if (fIgnoreDiacritic)
  2377. {
  2378. if (sm1 == NONSPACE_MARK)
  2379. {
  2380. pString1++;
  2381. ctr1--;
  2382. Weight1 = CMP_INVALID_WEIGHT;
  2383. }
  2384. if (sm2 == NONSPACE_MARK)
  2385. {
  2386. pString2++;
  2387. ctr2--;
  2388. Weight2 = CMP_INVALID_WEIGHT;
  2389. }
  2390. }
  2391. if (fIgnoreSymbol)
  2392. {
  2393. if (sm1 == PUNCTUATION)
  2394. {
  2395. pString1++;
  2396. ctr1--;
  2397. Weight1 = CMP_INVALID_WEIGHT;
  2398. }
  2399. if (sm2 == PUNCTUATION)
  2400. {
  2401. pString2++;
  2402. ctr2--;
  2403. Weight2 = CMP_INVALID_WEIGHT;
  2404. }
  2405. }
  2406. if ((Weight1 == CMP_INVALID_WEIGHT) || (Weight2 == CMP_INVALID_WEIGHT))
  2407. {
  2408. continue;
  2409. }
  2410. //
  2411. // Switch on the script member of string 1 and take care
  2412. // of any special cases.
  2413. //
  2414. switch (sm1)
  2415. {
  2416. case ( NONSPACE_MARK ) :
  2417. {
  2418. //
  2419. // Nonspace only - look at diacritic weight only.
  2420. //
  2421. if (!fIgnoreDiacritic)
  2422. {
  2423. if ((WhichDiacritic == 0) ||
  2424. (State & STATE_REVERSE_DW))
  2425. {
  2426. WhichDiacritic = CSTR_GREATER_THAN;
  2427. //
  2428. // Remove state from state machine.
  2429. //
  2430. REMOVE_STATE(STATE_DW);
  2431. }
  2432. }
  2433. //
  2434. // Adjust pointer and counter and set flags.
  2435. //
  2436. pString1++;
  2437. ctr1--;
  2438. Weight1 = CMP_INVALID_WEIGHT;
  2439. break;
  2440. }
  2441. case ( SYMBOL_1 ) :
  2442. case ( SYMBOL_2 ) :
  2443. case ( SYMBOL_3 ) :
  2444. case ( SYMBOL_4 ) :
  2445. case ( SYMBOL_5 ) :
  2446. {
  2447. //
  2448. // If the ignore symbol flag is set, then skip over
  2449. // the symbol.
  2450. //
  2451. if (fIgnoreSymbol)
  2452. {
  2453. pString1++;
  2454. ctr1--;
  2455. Weight1 = CMP_INVALID_WEIGHT;
  2456. }
  2457. break;
  2458. }
  2459. case ( PUNCTUATION ) :
  2460. {
  2461. //
  2462. // If the ignore punctuation flag is set, then skip
  2463. // over the punctuation char.
  2464. //
  2465. if (fIgnorePunct)
  2466. {
  2467. pString1++;
  2468. ctr1--;
  2469. Weight1 = CMP_INVALID_WEIGHT;
  2470. }
  2471. else if (!fStringSort)
  2472. {
  2473. //
  2474. // Use WORD sort method.
  2475. //
  2476. if (sm2 != PUNCTUATION)
  2477. {
  2478. //
  2479. // The character in the second string is
  2480. // NOT punctuation.
  2481. //
  2482. if (WhichPunct2)
  2483. {
  2484. //
  2485. // Set WP 2 to show that string 2 is
  2486. // smaller, since a punctuation char had
  2487. // already been found at an earlier
  2488. // position in string 2.
  2489. //
  2490. // Set the Ignore Punctuation flag so we
  2491. // just skip over any other punctuation
  2492. // chars in the string.
  2493. //
  2494. WhichPunct2 = CSTR_GREATER_THAN;
  2495. fIgnorePunct = TRUE;
  2496. }
  2497. else
  2498. {
  2499. //
  2500. // Set WP 1 to show that string 2 is
  2501. // smaller, and that string 1 has had
  2502. // a punctuation char - since no
  2503. // punctuation chars have been found
  2504. // in string 2.
  2505. //
  2506. WhichPunct1 = CSTR_GREATER_THAN;
  2507. }
  2508. //
  2509. // Advance pointer 1 and decrement counter 1.
  2510. //
  2511. pString1++;
  2512. ctr1--;
  2513. Weight1 = CMP_INVALID_WEIGHT;
  2514. }
  2515. //
  2516. // Do NOT want to advance the pointer in string 1
  2517. // if string 2 is also a punctuation char. This
  2518. // will be done later.
  2519. //
  2520. }
  2521. break;
  2522. }
  2523. case ( EXPANSION ) :
  2524. {
  2525. //
  2526. // Save pointer in pString1 so that it can be
  2527. // restored.
  2528. //
  2529. if (pSave1 == NULL)
  2530. {
  2531. pSave1 = pString1;
  2532. }
  2533. pString1 = pTmpBuf1;
  2534. //
  2535. // Add one to counter so that subtraction doesn't end
  2536. // comparison prematurely.
  2537. //
  2538. ctr1++;
  2539. //
  2540. // Expand character into temporary buffer.
  2541. //
  2542. pTmpBuf1[0] = GET_EXPANSION_1(&Weight1);
  2543. pTmpBuf1[1] = GET_EXPANSION_2(&Weight1);
  2544. //
  2545. // Set cExpChar1 to the number of expansion characters
  2546. // stored.
  2547. //
  2548. cExpChar1 = MAX_TBL_EXPANSION;
  2549. Weight1 = CMP_INVALID_WEIGHT;
  2550. break;
  2551. }
  2552. case ( FAREAST_SPECIAL ) :
  2553. {
  2554. if (sm2 != EXPANSION)
  2555. {
  2556. //
  2557. // Get the weight for the far east special case
  2558. // and store it in Weight1.
  2559. //
  2560. GET_FAREAST_WEIGHT( Weight1,
  2561. uw1,
  2562. Mask,
  2563. lpString1,
  2564. pString1,
  2565. ExtraWt1,
  2566. fModify );
  2567. if (sm2 != FAREAST_SPECIAL)
  2568. {
  2569. //
  2570. // The character in the second string is
  2571. // NOT a fareast special char.
  2572. //
  2573. // Set each of weights 4, 5, 6, and 7 to show
  2574. // that string 2 is smaller (if not already set).
  2575. //
  2576. if ((GET_WT_FOUR(&WhichExtra) == 0) &&
  2577. (GET_WT_FOUR(&ExtraWt1) != 0))
  2578. {
  2579. GET_WT_FOUR(&WhichExtra) = CSTR_GREATER_THAN;
  2580. }
  2581. if ((GET_WT_FIVE(&WhichExtra) == 0) &&
  2582. (GET_WT_FIVE(&ExtraWt1) != 0))
  2583. {
  2584. GET_WT_FIVE(&WhichExtra) = CSTR_GREATER_THAN;
  2585. }
  2586. if ((GET_WT_SIX(&WhichExtra) == 0) &&
  2587. (GET_WT_SIX(&ExtraWt1) != 0))
  2588. {
  2589. GET_WT_SIX(&WhichExtra) = CSTR_GREATER_THAN;
  2590. }
  2591. if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
  2592. (GET_WT_SEVEN(&ExtraWt1) != 0))
  2593. {
  2594. GET_WT_SEVEN(&WhichExtra) = CSTR_GREATER_THAN;
  2595. }
  2596. }
  2597. }
  2598. break;
  2599. }
  2600. case ( JAMO_SPECIAL ) :
  2601. {
  2602. LPWSTR pStr1 = pString1;
  2603. LPWSTR pStr2 = pString2;
  2604. //
  2605. // Set the JamoFlag so we don't handle it again.
  2606. //
  2607. JamoFlag = TRUE;
  2608. FindJamoDifference(
  2609. pHashN,
  2610. &pStr1, &ctr1, cchCount1, &Weight1,
  2611. &pStr2, &ctr2, cchCount2, &Weight2,
  2612. &pLastJamo,
  2613. &uw1, &uw2,
  2614. &State,
  2615. &WhichJamo,
  2616. fModify );
  2617. if (WhichJamo)
  2618. {
  2619. return (WhichJamo);
  2620. }
  2621. pString1 = pStr1;
  2622. pString2 = pStr2;
  2623. break;
  2624. }
  2625. case ( EXTENSION_A ) :
  2626. {
  2627. //
  2628. // Get the full weight in case DW got masked.
  2629. //
  2630. Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1);
  2631. if (sm2 == EXTENSION_A)
  2632. {
  2633. Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
  2634. }
  2635. //
  2636. // Compare the weights.
  2637. //
  2638. if (Weight1 == Weight2)
  2639. {
  2640. //
  2641. // Adjust pointers and counters and set flags.
  2642. //
  2643. pString1++; pString2++;
  2644. ctr1--; ctr2--;
  2645. Weight1 = CMP_INVALID_WEIGHT;
  2646. Weight2 = CMP_INVALID_WEIGHT;
  2647. }
  2648. else
  2649. {
  2650. //
  2651. // Get the actual UW to compare.
  2652. //
  2653. if (sm2 == EXTENSION_A)
  2654. {
  2655. //
  2656. // Set the UW values to be the AW and DW since
  2657. // both strings contain an extension A char.
  2658. //
  2659. uw1 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight1),
  2660. GET_DIACRITIC(&Weight1),
  2661. FALSE );
  2662. uw2 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight2),
  2663. GET_DIACRITIC(&Weight2),
  2664. FALSE );
  2665. }
  2666. else
  2667. {
  2668. //
  2669. // Only string1 contains an extension A char,
  2670. // so set the UW value to be the first UW
  2671. // value for extension A (default values):
  2672. // SM_EXT_A, AW_EXT_A
  2673. //
  2674. uw1 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
  2675. }
  2676. }
  2677. break;
  2678. }
  2679. case ( UNSORTABLE ) :
  2680. {
  2681. //
  2682. // Fill out the case statement so the compiler
  2683. // will use a jump table.
  2684. //
  2685. break;
  2686. }
  2687. }
  2688. //
  2689. // Switch on the script member of string 2 and take care
  2690. // of any special cases.
  2691. //
  2692. switch (sm2)
  2693. {
  2694. case ( NONSPACE_MARK ) :
  2695. {
  2696. //
  2697. // Nonspace only - look at diacritic weight only.
  2698. //
  2699. if (!fIgnoreDiacritic)
  2700. {
  2701. if ((WhichDiacritic == 0) ||
  2702. (State & STATE_REVERSE_DW))
  2703. {
  2704. WhichDiacritic = CSTR_LESS_THAN;
  2705. //
  2706. // Remove state from state machine.
  2707. //
  2708. REMOVE_STATE(STATE_DW);
  2709. }
  2710. }
  2711. //
  2712. // Adjust pointer and counter and set flags.
  2713. //
  2714. pString2++;
  2715. ctr2--;
  2716. Weight2 = CMP_INVALID_WEIGHT;
  2717. break;
  2718. }
  2719. case ( SYMBOL_1 ) :
  2720. case ( SYMBOL_2 ) :
  2721. case ( SYMBOL_3 ) :
  2722. case ( SYMBOL_4 ) :
  2723. case ( SYMBOL_5 ) :
  2724. {
  2725. //
  2726. // If the ignore symbol flag is set, then skip over
  2727. // the symbol.
  2728. //
  2729. if (fIgnoreSymbol)
  2730. {
  2731. pString2++;
  2732. ctr2--;
  2733. Weight2 = CMP_INVALID_WEIGHT;
  2734. }
  2735. break;
  2736. }
  2737. case ( PUNCTUATION ) :
  2738. {
  2739. //
  2740. // If the ignore punctuation flag is set, then
  2741. // skip over the punctuation char.
  2742. //
  2743. if (fIgnorePunct)
  2744. {
  2745. //
  2746. // Advance pointer 2 and decrement counter 2.
  2747. //
  2748. pString2++;
  2749. ctr2--;
  2750. Weight2 = CMP_INVALID_WEIGHT;
  2751. }
  2752. else if (!fStringSort)
  2753. {
  2754. //
  2755. // Use WORD sort method.
  2756. //
  2757. if (sm1 != PUNCTUATION)
  2758. {
  2759. //
  2760. // The character in the first string is
  2761. // NOT punctuation.
  2762. //
  2763. if (WhichPunct1)
  2764. {
  2765. //
  2766. // Set WP 1 to show that string 1 is
  2767. // smaller, since a punctuation char had
  2768. // already been found at an earlier
  2769. // position in string 1.
  2770. //
  2771. // Set the Ignore Punctuation flag so we
  2772. // just skip over any other punctuation
  2773. // chars in the string.
  2774. //
  2775. WhichPunct1 = CSTR_LESS_THAN;
  2776. fIgnorePunct = TRUE;
  2777. }
  2778. else
  2779. {
  2780. //
  2781. // Set WP 2 to show that string 1 is
  2782. // smaller, and that string 2 has had
  2783. // a punctuation char - since no
  2784. // punctuation chars have been found
  2785. // in string 1.
  2786. //
  2787. WhichPunct2 = CSTR_LESS_THAN;
  2788. }
  2789. //
  2790. // Pointer 2 and counter 2 will be updated
  2791. // after if-else statement.
  2792. //
  2793. }
  2794. else
  2795. {
  2796. //
  2797. // Both code points are punctuation chars.
  2798. //
  2799. // See if either of the strings has encountered
  2800. // punctuation chars previous to this.
  2801. //
  2802. if (WhichPunct1)
  2803. {
  2804. //
  2805. // String 1 has had a punctuation char, so
  2806. // it should be the smaller string (since
  2807. // both have punctuation chars).
  2808. //
  2809. WhichPunct1 = CSTR_LESS_THAN;
  2810. }
  2811. else if (WhichPunct2)
  2812. {
  2813. //
  2814. // String 2 has had a punctuation char, so
  2815. // it should be the smaller string (since
  2816. // both have punctuation chars).
  2817. //
  2818. WhichPunct2 = CSTR_GREATER_THAN;
  2819. }
  2820. else
  2821. {
  2822. BYTE aw1 = GET_ALPHA_NUMERIC(&Weight1);
  2823. BYTE aw2 = GET_ALPHA_NUMERIC(&Weight2);
  2824. if (aw1 == aw2)
  2825. {
  2826. BYTE cw1 = GET_CASE(&Weight1);
  2827. BYTE cw2 = GET_CASE(&Weight2);
  2828. if (cw1 < cw2)
  2829. {
  2830. WhichPunct1 = CSTR_LESS_THAN;
  2831. } else if (cw1 > cw2)
  2832. {
  2833. WhichPunct1 = CSTR_GREATER_THAN;
  2834. }
  2835. } else
  2836. {
  2837. //
  2838. // Position is the same, so compare the
  2839. // special weights. Set WhichPunct1 to
  2840. // the smaller special weight.
  2841. //
  2842. WhichPunct1 = (aw1 < aw2
  2843. ? CSTR_LESS_THAN
  2844. : CSTR_GREATER_THAN);
  2845. }
  2846. }
  2847. //
  2848. // Set the Ignore Punctuation flag.
  2849. //
  2850. fIgnorePunct = TRUE;
  2851. //
  2852. // Advance pointer 1 and decrement counter 1.
  2853. // Pointer 2 and counter 2 will be updated
  2854. // after if-else statement.
  2855. //
  2856. pString1++;
  2857. ctr1--;
  2858. Weight1 = CMP_INVALID_WEIGHT;
  2859. }
  2860. //
  2861. // Advance pointer 2 and decrement counter 2.
  2862. //
  2863. pString2++;
  2864. ctr2--;
  2865. Weight2 = CMP_INVALID_WEIGHT;
  2866. }
  2867. break;
  2868. }
  2869. case ( EXPANSION ) :
  2870. {
  2871. //
  2872. // Save pointer in pString1 so that it can be restored.
  2873. //
  2874. if (pSave2 == NULL)
  2875. {
  2876. pSave2 = pString2;
  2877. }
  2878. pString2 = pTmpBuf2;
  2879. //
  2880. // Add one to counter so that subtraction doesn't end
  2881. // comparison prematurely.
  2882. //
  2883. ctr2++;
  2884. //
  2885. // Expand character into temporary buffer.
  2886. //
  2887. pTmpBuf2[0] = GET_EXPANSION_1(&Weight2);
  2888. pTmpBuf2[1] = GET_EXPANSION_2(&Weight2);
  2889. //
  2890. // Set cExpChar2 to the number of expansion characters
  2891. // stored.
  2892. //
  2893. cExpChar2 = MAX_TBL_EXPANSION;
  2894. Weight2 = CMP_INVALID_WEIGHT;
  2895. break;
  2896. }
  2897. case ( FAREAST_SPECIAL ) :
  2898. {
  2899. if (sm1 != EXPANSION)
  2900. {
  2901. //
  2902. // Get the weight for the far east special case
  2903. // and store it in Weight2.
  2904. //
  2905. GET_FAREAST_WEIGHT( Weight2,
  2906. uw2,
  2907. Mask,
  2908. lpString2,
  2909. pString2,
  2910. ExtraWt2,
  2911. fModify );
  2912. if (sm1 != FAREAST_SPECIAL)
  2913. {
  2914. //
  2915. // The character in the first string is
  2916. // NOT a fareast special char.
  2917. //
  2918. // Set each of weights 4, 5, 6, and 7 to show
  2919. // that string 1 is smaller (if not already set).
  2920. //
  2921. if ((GET_WT_FOUR(&WhichExtra) == 0) &&
  2922. (GET_WT_FOUR(&ExtraWt2) != 0))
  2923. {
  2924. GET_WT_FOUR(&WhichExtra) = CSTR_LESS_THAN;
  2925. }
  2926. if ((GET_WT_FIVE(&WhichExtra) == 0) &&
  2927. (GET_WT_FIVE(&ExtraWt2) != 0))
  2928. {
  2929. GET_WT_FIVE(&WhichExtra) = CSTR_LESS_THAN;
  2930. }
  2931. if ((GET_WT_SIX(&WhichExtra) == 0) &&
  2932. (GET_WT_SIX(&ExtraWt2) != 0))
  2933. {
  2934. GET_WT_SIX(&WhichExtra) = CSTR_LESS_THAN;
  2935. }
  2936. if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
  2937. (GET_WT_SEVEN(&ExtraWt2) != 0))
  2938. {
  2939. GET_WT_SEVEN(&WhichExtra) = CSTR_LESS_THAN;
  2940. }
  2941. }
  2942. else
  2943. {
  2944. //
  2945. // Characters in both strings are fareast
  2946. // special chars.
  2947. //
  2948. // Set each of weights 4, 5, 6, and 7
  2949. // appropriately (if not already set).
  2950. //
  2951. if ( (GET_WT_FOUR(&WhichExtra) == 0) &&
  2952. ( GET_WT_FOUR(&ExtraWt1) !=
  2953. GET_WT_FOUR(&ExtraWt2) ) )
  2954. {
  2955. GET_WT_FOUR(&WhichExtra) =
  2956. ( GET_WT_FOUR(&ExtraWt1) <
  2957. GET_WT_FOUR(&ExtraWt2) )
  2958. ? CSTR_LESS_THAN
  2959. : CSTR_GREATER_THAN;
  2960. }
  2961. if ( (GET_WT_FIVE(&WhichExtra) == 0) &&
  2962. ( GET_WT_FIVE(&ExtraWt1) !=
  2963. GET_WT_FIVE(&ExtraWt2) ) )
  2964. {
  2965. GET_WT_FIVE(&WhichExtra) =
  2966. ( GET_WT_FIVE(&ExtraWt1) <
  2967. GET_WT_FIVE(&ExtraWt2) )
  2968. ? CSTR_LESS_THAN
  2969. : CSTR_GREATER_THAN;
  2970. }
  2971. if ( (GET_WT_SIX(&WhichExtra) == 0) &&
  2972. ( GET_WT_SIX(&ExtraWt1) !=
  2973. GET_WT_SIX(&ExtraWt2) ) )
  2974. {
  2975. GET_WT_SIX(&WhichExtra) =
  2976. ( GET_WT_SIX(&ExtraWt1) <
  2977. GET_WT_SIX(&ExtraWt2) )
  2978. ? CSTR_LESS_THAN
  2979. : CSTR_GREATER_THAN;
  2980. }
  2981. if ( (GET_WT_SEVEN(&WhichExtra) == 0) &&
  2982. ( GET_WT_SEVEN(&ExtraWt1) !=
  2983. GET_WT_SEVEN(&ExtraWt2) ) )
  2984. {
  2985. GET_WT_SEVEN(&WhichExtra) =
  2986. ( GET_WT_SEVEN(&ExtraWt1) <
  2987. GET_WT_SEVEN(&ExtraWt2) )
  2988. ? CSTR_LESS_THAN
  2989. : CSTR_GREATER_THAN;
  2990. }
  2991. }
  2992. }
  2993. break;
  2994. }
  2995. case ( JAMO_SPECIAL ) :
  2996. {
  2997. if (!JamoFlag)
  2998. {
  2999. LPWSTR pStr1 = pString1;
  3000. LPWSTR pStr2 = pString2;
  3001. FindJamoDifference(
  3002. pHashN,
  3003. &pStr1, &ctr1, cchCount1, &Weight1,
  3004. &pStr2, &ctr2, cchCount2, &Weight2,
  3005. &pLastJamo,
  3006. &uw1, &uw2,
  3007. &State,
  3008. &WhichJamo,
  3009. fModify );
  3010. if (WhichJamo)
  3011. {
  3012. return (WhichJamo);
  3013. }
  3014. pString1 = pStr1;
  3015. pString2 = pStr2;
  3016. }
  3017. else
  3018. {
  3019. //
  3020. // Reset the Jamo flag.
  3021. //
  3022. JamoFlag = FALSE;
  3023. }
  3024. break;
  3025. }
  3026. case ( EXTENSION_A ) :
  3027. {
  3028. //
  3029. // If sm1 is an extension A character, then
  3030. // both sm1 and sm2 have been handled. We should
  3031. // only get here when either sm1 is not an
  3032. // extension A character or the two extension A
  3033. // characters are different.
  3034. //
  3035. if (sm1 != EXTENSION_A)
  3036. {
  3037. //
  3038. // Get the full weight in case DW got masked.
  3039. // Also, get the actual UW to compare.
  3040. //
  3041. // Only string2 contains an extension A char,
  3042. // so set the UW value to be the first UW
  3043. // value for extension A (default values):
  3044. // SM_EXT_A, AW_EXT_A
  3045. //
  3046. Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
  3047. uw2 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
  3048. }
  3049. //
  3050. // We should then fall through to the comparison
  3051. // of the Unicode weights.
  3052. //
  3053. break;
  3054. }
  3055. case ( UNSORTABLE ) :
  3056. {
  3057. //
  3058. // Fill out the case statement so the compiler
  3059. // will use a jump table.
  3060. //
  3061. break;
  3062. }
  3063. }
  3064. //
  3065. // See if the comparison should start again.
  3066. //
  3067. if ((Weight1 == CMP_INVALID_WEIGHT) || (Weight2 == CMP_INVALID_WEIGHT))
  3068. {
  3069. //
  3070. // Check to see if we're modifying the script value.
  3071. // If so, then we need to reset the fareast weight
  3072. // (if applicable) so that it doesn't get modified
  3073. // again.
  3074. //
  3075. if (fModify == TRUE)
  3076. {
  3077. if (sm1 == FAREAST_SPECIAL)
  3078. {
  3079. Weight1 = CMP_INVALID_WEIGHT;
  3080. }
  3081. else if (sm2 == FAREAST_SPECIAL)
  3082. {
  3083. Weight2 = CMP_INVALID_WEIGHT;
  3084. }
  3085. }
  3086. continue;
  3087. }
  3088. //
  3089. // We're not supposed to drop down into the state table if
  3090. // the unicode weights are different, so stop comparison
  3091. // and return result of unicode weight comparison.
  3092. //
  3093. if (uw1 != uw2)
  3094. {
  3095. return ((uw1 < uw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN);
  3096. }
  3097. }
  3098. //
  3099. // For each state in the state table, do the appropriate
  3100. // comparisons.
  3101. //
  3102. if (State & (STATE_DW | STATE_REVERSE_DW))
  3103. {
  3104. //
  3105. // Get the diacritic weights.
  3106. //
  3107. dw1 = GET_DIACRITIC(&Weight1);
  3108. dw2 = GET_DIACRITIC(&Weight2);
  3109. if (dw1 != dw2)
  3110. {
  3111. //
  3112. // Look ahead to see if diacritic follows a
  3113. // minimum diacritic weight. If so, get the
  3114. // diacritic weight of the nonspace mark.
  3115. //
  3116. while (!AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1))
  3117. {
  3118. Wt = GET_DWORD_WEIGHT(pHashN, *(pString1 + 1));
  3119. if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
  3120. {
  3121. dw1 += GET_DIACRITIC(&Wt);
  3122. pString1++;
  3123. ctr1--;
  3124. }
  3125. else
  3126. {
  3127. break;
  3128. }
  3129. }
  3130. while (!AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2))
  3131. {
  3132. Wt = GET_DWORD_WEIGHT(pHashN, *(pString2 + 1));
  3133. if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
  3134. {
  3135. dw2 += GET_DIACRITIC(&Wt);
  3136. pString2++;
  3137. ctr2--;
  3138. }
  3139. else
  3140. {
  3141. break;
  3142. }
  3143. }
  3144. //
  3145. // Save which string has the smaller diacritic
  3146. // weight if the diacritic weights are still
  3147. // different.
  3148. //
  3149. if (dw1 != dw2)
  3150. {
  3151. WhichDiacritic = (dw1 < dw2)
  3152. ? CSTR_LESS_THAN
  3153. : CSTR_GREATER_THAN;
  3154. //
  3155. // Remove state from state machine.
  3156. //
  3157. REMOVE_STATE(STATE_DW);
  3158. }
  3159. }
  3160. }
  3161. if (State & STATE_CW)
  3162. {
  3163. //
  3164. // Get the case weights.
  3165. //
  3166. if (GET_CASE(&Weight1) != GET_CASE(&Weight2))
  3167. {
  3168. //
  3169. // Save which string has the smaller case weight.
  3170. //
  3171. WhichCase = (GET_CASE(&Weight1) < GET_CASE(&Weight2))
  3172. ? CSTR_LESS_THAN
  3173. : CSTR_GREATER_THAN;
  3174. //
  3175. // Remove state from state machine.
  3176. //
  3177. REMOVE_STATE(STATE_CW);
  3178. }
  3179. }
  3180. }
  3181. //
  3182. // Fixup the pointers and counters.
  3183. //
  3184. POINTER_FIXUP();
  3185. ctr1--;
  3186. ctr2--;
  3187. //
  3188. // Reset the weights to be invalid.
  3189. //
  3190. Weight1 = CMP_INVALID_WEIGHT;
  3191. Weight2 = CMP_INVALID_WEIGHT;
  3192. }
  3193. //
  3194. // If the end of BOTH strings has been reached, then the unicode
  3195. // weights match exactly. Check the diacritic, case and special
  3196. // weights. If all are zero, then return success. Otherwise,
  3197. // return the result of the weight difference.
  3198. //
  3199. // NOTE: The following checks MUST REMAIN IN THIS ORDER:
  3200. // Diacritic, Case, Punctuation.
  3201. //
  3202. if (AT_STRING_END(ctr1, pString1, cchCount1))
  3203. {
  3204. if (AT_STRING_END(ctr2, pString2, cchCount2))
  3205. {
  3206. if (WhichDiacritic)
  3207. {
  3208. return (WhichDiacritic);
  3209. }
  3210. if (WhichCase)
  3211. {
  3212. return (WhichCase);
  3213. }
  3214. if (WhichExtra)
  3215. {
  3216. if (!fIgnoreDiacritic)
  3217. {
  3218. if (GET_WT_FOUR(&WhichExtra))
  3219. {
  3220. return (GET_WT_FOUR(&WhichExtra));
  3221. }
  3222. if (GET_WT_FIVE(&WhichExtra))
  3223. {
  3224. return (GET_WT_FIVE(&WhichExtra));
  3225. }
  3226. }
  3227. if (GET_WT_SIX(&WhichExtra))
  3228. {
  3229. return (GET_WT_SIX(&WhichExtra));
  3230. }
  3231. if (GET_WT_SEVEN(&WhichExtra))
  3232. {
  3233. return (GET_WT_SEVEN(&WhichExtra));
  3234. }
  3235. }
  3236. if (WhichPunct1)
  3237. {
  3238. return (WhichPunct1);
  3239. }
  3240. if (WhichPunct2)
  3241. {
  3242. return (WhichPunct2);
  3243. }
  3244. return (CSTR_EQUAL);
  3245. }
  3246. else
  3247. {
  3248. //
  3249. // String 2 is longer.
  3250. //
  3251. pString1 = pString2;
  3252. ctr1 = ctr2;
  3253. cchCount1 = cchCount2;
  3254. fEnd1 = CSTR_LESS_THAN;
  3255. }
  3256. }
  3257. else
  3258. {
  3259. fEnd1 = CSTR_GREATER_THAN;
  3260. }
  3261. //
  3262. // Scan to the end of the longer string.
  3263. //
  3264. SCAN_LONGER_STRING( ctr1,
  3265. pString1,
  3266. cchCount1,
  3267. fEnd1 );
  3268. }
  3269. ////////////////////////////////////////////////////////////////////////////
  3270. //
  3271. // FindJamoDifference
  3272. //
  3273. ////////////////////////////////////////////////////////////////////////////
  3274. int FindJamoDifference(
  3275. PLOC_HASH pHashN,
  3276. LPCWSTR* ppString1, int* ctr1, int cchCount1, DWORD* pWeight1,
  3277. LPCWSTR* ppString2, int* ctr2, int cchCount2, DWORD* pWeight2,
  3278. LPCWSTR* pLastJamo,
  3279. WORD* uw1,
  3280. WORD* uw2,
  3281. int* pState,
  3282. int* WhichJamo,
  3283. BOOL fModify)
  3284. {
  3285. int bRestart = 0; // if string compare should restart again
  3286. int oldHangulsFound1 = 0; // # of valid old Hangul Jamo compositions found
  3287. int oldHangulsFound2 = 0; // # of valid old Hangul Jamo compositions found
  3288. WORD UW;
  3289. BYTE JamoWeight1[3]; // extra weight for first old Hangul composition
  3290. BYTE JamoWeight2[3]; // extra weight for second old Hangul composition
  3291. //
  3292. // Roll back to the first Jamo. We know that these Jamos in both strings
  3293. // should be equal, so we can decrement both strings at once.
  3294. //
  3295. while ((*ppString1 > *pLastJamo) && IsJamo(*(*ppString1 - 1)))
  3296. {
  3297. (*ppString1)--; (*ppString2)--; (*ctr1)++; (*ctr2)++;
  3298. }
  3299. //
  3300. // Now we are at the beginning of two groups of Jamo characters.
  3301. // Compare Jamo unit (either a single Jamo or a valid old Hangul Jamo
  3302. // composition) until we run out Jamo units in either strings.
  3303. // We also exit when we reach the ends of either string.
  3304. //
  3305. // while (NOT_END_STRING(*ctr1, *ppString1, cchCount1) &&
  3306. // NOT_END_STRING(*ctr2, *ppString2, cchCount2))
  3307. //
  3308. for (;;)
  3309. {
  3310. if (IsJamo(**ppString1))
  3311. {
  3312. if (IsLeadingJamo(**ppString1))
  3313. {
  3314. if ((oldHangulsFound1 = MapOldHangulSortKey( pHashN,
  3315. *ppString1,
  3316. *ctr1,
  3317. &UW,
  3318. JamoWeight1,
  3319. fModify )) > 0)
  3320. {
  3321. *uw1 = UW;
  3322. //
  3323. // Mark *pWeight1 so that it is not CMP_INVALID_WEIGHT.
  3324. // 0202 is the DW/CW.
  3325. //
  3326. *pWeight1 = ((DWORD)UW | 0x02020000);
  3327. //
  3328. // We always increment ppString1/ctr1 at the end of the
  3329. // loop, so we need to subtract 1 here.
  3330. //
  3331. *ppString1 += (oldHangulsFound1 - 1);
  3332. *ctr1 -= (oldHangulsFound1 - 1);
  3333. }
  3334. }
  3335. if (oldHangulsFound1 == 0)
  3336. {
  3337. //
  3338. // No valid old Hangul compositions are found. Get the UW
  3339. // for the Jamo instead.
  3340. //
  3341. *pWeight1 = GET_DWORD_WEIGHT(pHashN, **ppString1);
  3342. //
  3343. // The SMs in PSORTKEY for Jamos are not really SMs. They
  3344. // are all 4 (for JAMO_SPECIAL).
  3345. // Here we get the real Jamo Unicode weight. The actual SM
  3346. // is stored in DW.
  3347. //
  3348. *uw1 = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight1),
  3349. GET_ALPHA_NUMERIC(pWeight1),
  3350. fModify );
  3351. ((PSORTKEY)pWeight1)->Diacritic = MIN_DW;
  3352. }
  3353. }
  3354. if (IsJamo(**ppString2))
  3355. {
  3356. if (IsLeadingJamo(**ppString2))
  3357. {
  3358. if ((oldHangulsFound2 = MapOldHangulSortKey( pHashN,
  3359. *ppString2,
  3360. *ctr2,
  3361. &UW,
  3362. JamoWeight2,
  3363. fModify )) > 0)
  3364. {
  3365. *uw2 = UW;
  3366. *pWeight2 = ((DWORD)UW | 0x02020000);
  3367. *ppString2 += (oldHangulsFound2 - 1);
  3368. *ctr2 -= (oldHangulsFound2 - 1);
  3369. }
  3370. }
  3371. if (oldHangulsFound2 == 0)
  3372. {
  3373. *pWeight2 = GET_DWORD_WEIGHT(pHashN, **ppString2);
  3374. *uw2 = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight2),
  3375. GET_ALPHA_NUMERIC(pWeight2),
  3376. fModify );
  3377. ((PSORTKEY)pWeight2)->Diacritic = MIN_DW;
  3378. }
  3379. }
  3380. //
  3381. // See if either weight is invalid.
  3382. // A weight can be invalid when the character is not a Jamo.
  3383. //
  3384. if (*pWeight1 == CMP_INVALID_WEIGHT)
  3385. {
  3386. //
  3387. // The current character is not a Jamo. Set the Weight to
  3388. // be CMP_INVALID_WEIGHT, so that the string comparision can
  3389. // restart within the loop of CompareString().
  3390. //
  3391. *pWeight1 = CMP_INVALID_WEIGHT;
  3392. bRestart = 1;
  3393. goto FindJamoDifferenceExit;
  3394. }
  3395. if (*pWeight2 == CMP_INVALID_WEIGHT)
  3396. {
  3397. //
  3398. // The current character is not a Jamo. Set the Weight to
  3399. // be CMP_INVALID_WEIGHT, so that the string comparision can
  3400. // restart within the loop of CompareString().
  3401. //
  3402. *pWeight2 = CMP_INVALID_WEIGHT;
  3403. bRestart = 1;
  3404. goto FindJamoDifferenceExit;
  3405. }
  3406. if (*uw1 != *uw2)
  3407. {
  3408. //
  3409. // Found differences in Unicode weight. We can stop the
  3410. // processing now.
  3411. //
  3412. goto FindJamoDifferenceExit;
  3413. }
  3414. //
  3415. // When we get here, we know that we have the same Unicode Weight.
  3416. // Check if we need to record the WhichJamo.
  3417. //
  3418. if ((*pState & STATE_JAMO_WEIGHT) &&
  3419. ((oldHangulsFound1 > 0) || (oldHangulsFound2 > 0)))
  3420. {
  3421. if ((oldHangulsFound1 > 0) && (oldHangulsFound2 > 0))
  3422. {
  3423. *WhichJamo = (int)memcmp( JamoWeight1,
  3424. JamoWeight2,
  3425. sizeof(JamoWeight1) ) + 2;
  3426. }
  3427. else if (oldHangulsFound1 > 0)
  3428. {
  3429. *WhichJamo = CSTR_GREATER_THAN;
  3430. }
  3431. else
  3432. {
  3433. *WhichJamo = CSTR_LESS_THAN;
  3434. }
  3435. *pState &= ~STATE_JAMO_WEIGHT;
  3436. oldHangulsFound1 = oldHangulsFound2 = 0;
  3437. }
  3438. (*ppString1)++; (*ctr1)--;
  3439. (*ppString2)++; (*ctr2)--;
  3440. if (AT_STRING_END(*ctr1, *ppString1, cchCount1) ||
  3441. AT_STRING_END(*ctr2, *ppString2, cchCount2))
  3442. {
  3443. break;
  3444. }
  3445. *pWeight1 = *pWeight2 = CMP_INVALID_WEIGHT;
  3446. }
  3447. //
  3448. // If we drop out of the while loop because we reach the end of strings,
  3449. // decrement the pointers by one because loops in CompareString() will
  3450. // increase the pointers at the end of the loop.
  3451. //
  3452. // If we drop out of the while loop because the goto's in it, we are
  3453. // already off by one.
  3454. //
  3455. if (AT_STRING_END(*ctr1, *ppString1, cchCount1))
  3456. {
  3457. (*ppString1)--; (*ctr1)++;
  3458. }
  3459. if (AT_STRING_END(*ctr2, *ppString2, cchCount2))
  3460. {
  3461. (*ppString2)--; (*ctr2)++;
  3462. }
  3463. FindJamoDifferenceExit:
  3464. *pLastJamo = *ppString1;
  3465. return (bRestart);
  3466. }