Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3769 lines
132 KiB

  1. /*++
  2. Copyright (c) 1991-2000, Microsoft Corporation All rights reserved.
  3. Module Name:
  4. map.c
  5. Abstract:
  6. This file contains functions that deal with map tables.
  7. APIs found in this file:
  8. FoldStringW
  9. LCMapStringW
  10. Revision History:
  11. 05-31-91 JulieB Created.
  12. --*/
  13. //
  14. // Include Files.
  15. //
  16. #include "nls.h"
  17. #include "jamo.h"
  18. //
  19. // Constant Declarations.
  20. //
  21. //
  22. // Invalid weight value.
  23. //
  24. #define MAP_INVALID_UW 0xffff
  25. //
  26. // Number of bytes in each weight.
  27. //
  28. // Note: Total number of bytes is limited by MAX_WEIGHTS definition.
  29. // The padding is needed if SW is not on a WORD boundary.
  30. //
  31. #define NUM_BYTES_UW 8
  32. #define NUM_BYTES_DW 1
  33. #define NUM_BYTES_CW 1
  34. #define NUM_BYTES_XW 4
  35. #define NUM_BYTES_PADDING 0
  36. #define NUM_BYTES_SW 4
  37. //
  38. // Flags to drop the 3rd weight (CW).
  39. //
  40. #define NORM_DROP_CW (NORM_IGNORECASE | NORM_IGNOREWIDTH)
  41. //
  42. // XW Values for FE Special Weights.
  43. //
  44. BYTE pXWDrop[] = // values to drop from XW
  45. {
  46. 0xc6, // weight 4
  47. 0x03, // weight 5
  48. 0xe4, // weight 6
  49. 0xc5 // weight 7
  50. };
  51. BYTE pXWSeparator[] = // separator values for XW
  52. {
  53. 0xff, // weight 4
  54. 0x02, // weight 5
  55. 0xff, // weight 6
  56. 0xff // weight 7
  57. };
  58. //
  59. // Forward Declarations.
  60. //
  61. int
  62. FoldCZone(
  63. LPCWSTR pSrc,
  64. int cchSrc,
  65. LPWSTR pDest,
  66. int cchDest);
  67. int
  68. FoldDigits(
  69. LPCWSTR pSrc,
  70. int cchSrc,
  71. LPWSTR pDest,
  72. int cchDest);
  73. int
  74. FoldCZone_Digits(
  75. LPCWSTR pSrc,
  76. int cchSrc,
  77. LPWSTR pDest,
  78. int cchDest);
  79. int FoldLigatures(
  80. LPCWSTR pSrc,
  81. int cchSrc,
  82. LPWSTR pDest,
  83. int cchDest);
  84. int
  85. FoldPreComposed(
  86. LPCWSTR pSrc,
  87. int cchSrc,
  88. LPWSTR pDest,
  89. int cchDest);
  90. int
  91. FoldComposite(
  92. LPCWSTR pSrc,
  93. int cchSrc,
  94. LPWSTR pDest,
  95. int cchDest);
  96. int
  97. MapCase(
  98. PLOC_HASH pHashN,
  99. LPCWSTR pSrc,
  100. int cchSrc,
  101. LPWSTR pDest,
  102. int cchDest,
  103. PCASE pCaseTbl);
  104. int
  105. MapSortKey(
  106. PLOC_HASH pHashN,
  107. DWORD dwFlags,
  108. LPCWSTR pSrc,
  109. int cchSrc,
  110. LPBYTE pDest,
  111. int cchDest,
  112. BOOL fModify);
  113. int
  114. MapNormalization(
  115. PLOC_HASH pHashN,
  116. DWORD dwFlags,
  117. LPCWSTR pSrc,
  118. int cchSrc,
  119. LPWSTR pDest,
  120. int cchDest);
  121. int
  122. MapKanaWidth(
  123. PLOC_HASH pHashN,
  124. DWORD dwFlags,
  125. LPCWSTR pSrc,
  126. int cchSrc,
  127. LPWSTR pDest,
  128. int cchDest);
  129. int
  130. MapHalfKana(
  131. LPCWSTR pSrc,
  132. int cchSrc,
  133. LPWSTR pDest,
  134. int cchDest,
  135. PKANA pKana,
  136. PCASE pCase);
  137. int
  138. MapFullKana(
  139. LPCWSTR pSrc,
  140. int cchSrc,
  141. LPWSTR pDest,
  142. int cchDest,
  143. PKANA pKana,
  144. PCASE pCase);
  145. int
  146. MapTraditionalSimplified(
  147. PLOC_HASH pHashN,
  148. DWORD dwFlags,
  149. LPCWSTR pSrc,
  150. int cchSrc,
  151. LPWSTR pDest,
  152. int cchDest,
  153. PCHINESE pChinese);
  154. //-------------------------------------------------------------------------//
  155. // API ROUTINES //
  156. //-------------------------------------------------------------------------//
  157. ////////////////////////////////////////////////////////////////////////////
  158. //
  159. // FoldStringW
  160. //
  161. // Maps one wide character string to another performing the specified
  162. // translation. This mapping routine only takes flags that are locale
  163. // independent.
  164. //
  165. // 05-31-91 JulieB Created.
  166. ////////////////////////////////////////////////////////////////////////////
  167. int WINAPI FoldStringW(
  168. DWORD dwMapFlags,
  169. LPCWSTR lpSrcStr,
  170. int cchSrc,
  171. LPWSTR lpDestStr,
  172. int cchDest)
  173. {
  174. int Count = 0; // word count
  175. //
  176. // Invalid Parameter Check:
  177. // - length of src string is 0
  178. // - either buffer size is negative (except cchSrc == -1)
  179. // - src string is NULL
  180. // - length of dest string is NOT zero AND dest string is NULL
  181. // - same buffer - src = destination
  182. //
  183. // - flags are checked in switch statement below
  184. //
  185. if ((cchSrc == 0) || (cchDest < 0) ||
  186. (lpSrcStr == NULL) ||
  187. ((cchDest != 0) && (lpDestStr == NULL)) ||
  188. (lpSrcStr == lpDestStr))
  189. {
  190. SetLastError(ERROR_INVALID_PARAMETER);
  191. return (0);
  192. }
  193. //
  194. // If cchSrc is -1, then the source string is null terminated and we
  195. // need to get the length of the source string. Add one to the
  196. // length to include the null termination.
  197. // (This will always be at least 1.)
  198. //
  199. if (cchSrc <= -1)
  200. {
  201. cchSrc = NlsStrLenW(lpSrcStr) + 1;
  202. }
  203. //
  204. // Map the string based on the given flags.
  205. //
  206. switch (dwMapFlags)
  207. {
  208. case ( MAP_FOLDCZONE ) :
  209. {
  210. //
  211. // Map the string to fold the Compatibility Zone.
  212. //
  213. Count = FoldCZone( lpSrcStr,
  214. cchSrc,
  215. lpDestStr,
  216. cchDest );
  217. break;
  218. }
  219. case ( MAP_FOLDDIGITS ) :
  220. {
  221. //
  222. // Map the string to fold the Ascii Digits.
  223. //
  224. Count = FoldDigits( lpSrcStr,
  225. cchSrc,
  226. lpDestStr,
  227. cchDest );
  228. break;
  229. }
  230. case ( MAP_EXPAND_LIGATURES ) :
  231. {
  232. //
  233. // Map the string to expand all Ligatures.
  234. //
  235. Count = FoldLigatures( lpSrcStr,
  236. cchSrc,
  237. lpDestStr,
  238. cchDest );
  239. break;
  240. }
  241. case ( MAP_PRECOMPOSED ) :
  242. {
  243. //
  244. // Map the string to compress all composite forms of
  245. // characters to their precomposed form.
  246. //
  247. Count = FoldPreComposed( lpSrcStr,
  248. cchSrc,
  249. lpDestStr,
  250. cchDest );
  251. break;
  252. }
  253. case ( MAP_COMPOSITE ) :
  254. {
  255. //
  256. // Map the string to expand out all precomposed characters
  257. // to their composite form.
  258. //
  259. Count = FoldComposite( lpSrcStr,
  260. cchSrc,
  261. lpDestStr,
  262. cchDest );
  263. break;
  264. }
  265. case ( MAP_FOLDCZONE | MAP_FOLDDIGITS ) :
  266. {
  267. //
  268. // Map the string to fold the Compatibility Zone and fold the
  269. // Ascii Digits.
  270. //
  271. Count = FoldCZone_Digits( lpSrcStr,
  272. cchSrc,
  273. lpDestStr,
  274. cchDest );
  275. break;
  276. }
  277. case ( MAP_EXPAND_LIGATURES | MAP_FOLDCZONE ) :
  278. {
  279. //
  280. // Map the string to expand the ligatures and fold the
  281. // Compatibility Zone.
  282. //
  283. Count = FoldLigatures( lpSrcStr,
  284. cchSrc,
  285. lpDestStr,
  286. cchDest );
  287. Count = FoldCZone( lpDestStr,
  288. Count,
  289. lpDestStr,
  290. cchDest );
  291. break;
  292. }
  293. case ( MAP_EXPAND_LIGATURES | MAP_FOLDDIGITS ) :
  294. {
  295. //
  296. // Map the string to expand the ligatures and fold the
  297. // Ascii Digits.
  298. //
  299. Count = FoldLigatures( lpSrcStr,
  300. cchSrc,
  301. lpDestStr,
  302. cchDest );
  303. Count = FoldDigits( lpDestStr,
  304. Count,
  305. lpDestStr,
  306. cchDest );
  307. break;
  308. }
  309. case ( MAP_EXPAND_LIGATURES | MAP_FOLDCZONE | MAP_FOLDDIGITS ) :
  310. {
  311. //
  312. // Map the string to expand the ligatures, fold the
  313. // Compatibility Zone and fold the Ascii Digits.
  314. //
  315. Count = FoldLigatures( lpSrcStr,
  316. cchSrc,
  317. lpDestStr,
  318. cchDest );
  319. Count = FoldCZone_Digits( lpDestStr,
  320. Count,
  321. lpDestStr,
  322. cchDest );
  323. break;
  324. }
  325. case ( MAP_PRECOMPOSED | MAP_FOLDCZONE ) :
  326. {
  327. //
  328. // Map the string to convert to precomposed forms and to
  329. // fold the Compatibility Zone.
  330. //
  331. Count = FoldPreComposed( lpSrcStr,
  332. cchSrc,
  333. lpDestStr,
  334. cchDest );
  335. Count = FoldCZone( lpDestStr,
  336. Count,
  337. lpDestStr,
  338. cchDest );
  339. break;
  340. }
  341. case ( MAP_PRECOMPOSED | MAP_FOLDDIGITS ) :
  342. {
  343. //
  344. // Map the string to convert to precomposed forms and to
  345. // fold the Ascii Digits.
  346. //
  347. Count = FoldPreComposed( lpSrcStr,
  348. cchSrc,
  349. lpDestStr,
  350. cchDest );
  351. Count = FoldDigits( lpDestStr,
  352. Count,
  353. lpDestStr,
  354. cchDest );
  355. break;
  356. }
  357. case ( MAP_PRECOMPOSED | MAP_FOLDCZONE | MAP_FOLDDIGITS ) :
  358. {
  359. //
  360. // Map the string to convert to precomposed forms,
  361. // fold the Compatibility Zone, and fold the Ascii Digits.
  362. //
  363. Count = FoldPreComposed( lpSrcStr,
  364. cchSrc,
  365. lpDestStr,
  366. cchDest );
  367. Count = FoldCZone_Digits( lpDestStr,
  368. Count,
  369. lpDestStr,
  370. cchDest );
  371. break;
  372. }
  373. case ( MAP_COMPOSITE | MAP_FOLDCZONE ) :
  374. {
  375. //
  376. // Map the string to convert to composite forms and to
  377. // fold the Compatibility Zone.
  378. //
  379. Count = FoldComposite( lpSrcStr,
  380. cchSrc,
  381. lpDestStr,
  382. cchDest );
  383. Count = FoldCZone( lpDestStr,
  384. Count,
  385. lpDestStr,
  386. cchDest );
  387. break;
  388. }
  389. case ( MAP_COMPOSITE | MAP_FOLDDIGITS ) :
  390. {
  391. //
  392. // Map the string to convert to composite forms and to
  393. // fold the Ascii Digits.
  394. //
  395. Count = FoldComposite( lpSrcStr,
  396. cchSrc,
  397. lpDestStr,
  398. cchDest );
  399. Count = FoldDigits( lpDestStr,
  400. Count,
  401. lpDestStr,
  402. cchDest );
  403. break;
  404. }
  405. case ( MAP_COMPOSITE | MAP_FOLDCZONE | MAP_FOLDDIGITS ) :
  406. {
  407. //
  408. // Map the string to convert to composite forms,
  409. // fold the Compatibility Zone, and fold the Ascii Digits.
  410. //
  411. Count = FoldComposite( lpSrcStr,
  412. cchSrc,
  413. lpDestStr,
  414. cchDest );
  415. Count = FoldCZone_Digits( lpDestStr,
  416. Count,
  417. lpDestStr,
  418. cchDest );
  419. break;
  420. }
  421. default :
  422. {
  423. SetLastError(ERROR_INVALID_FLAGS);
  424. return (0);
  425. }
  426. }
  427. //
  428. // Return the number of characters written to the buffer.
  429. // Or, if cchDest == 0, then return the number of characters
  430. // that would have been written to the buffer.
  431. //
  432. return (Count);
  433. }
  434. ////////////////////////////////////////////////////////////////////////////
  435. //
  436. // LCMapStringW
  437. //
  438. // Maps one wide character string to another performing the specified
  439. // translation. This mapping routine only takes flags that are locale
  440. // dependent.
  441. //
  442. // 05-31-91 JulieB Created.
  443. // 07-26-93 JulieB Added new flags for NT-J.
  444. ////////////////////////////////////////////////////////////////////////////
  445. int WINAPI LCMapStringW(
  446. LCID Locale,
  447. DWORD dwMapFlags,
  448. LPCWSTR lpSrcStr,
  449. int cchSrc,
  450. LPWSTR lpDestStr,
  451. int cchDest)
  452. {
  453. PLOC_HASH pHashN; // ptr to LOC hash node
  454. int Count = 0; // word count or byte count
  455. int ctr; // loop counter
  456. //
  457. // Invalid Parameter Check:
  458. // - validate LCID
  459. // - length of src string is 0
  460. // - destination buffer size is negative
  461. // - src string is NULL
  462. // - length of dest string is NOT zero AND dest string is NULL
  463. // - same buffer - src = destination
  464. // if not UPPER or LOWER or
  465. // UPPER or LOWER used with Japanese flags
  466. //
  467. VALIDATE_LANGUAGE(Locale, pHashN, dwMapFlags & LCMAP_LINGUISTIC_CASING, TRUE);
  468. if ( (pHashN == NULL) ||
  469. (cchSrc == 0) || (cchDest < 0) || (lpSrcStr == NULL) ||
  470. ((cchDest != 0) && (lpDestStr == NULL)) ||
  471. ((lpSrcStr == lpDestStr) &&
  472. ((!(dwMapFlags & (LCMAP_UPPERCASE | LCMAP_LOWERCASE))) ||
  473. (dwMapFlags & (LCMAP_HIRAGANA | LCMAP_KATAKANA |
  474. LCMAP_HALFWIDTH | LCMAP_FULLWIDTH)))) )
  475. {
  476. SetLastError(ERROR_INVALID_PARAMETER);
  477. return (0);
  478. }
  479. //
  480. // Invalid Flags Check:
  481. // - flags other than valid ones or 0
  482. // - (any NORM_ flag) AND (any LCMAP_ flag except byterev and sortkey)
  483. // - (NORM_ flags for sortkey) AND (NOT LCMAP_SORTKEY)
  484. // - more than one of lower, upper, sortkey
  485. // - more than one of hiragana, katakana, sortkey
  486. // - more than one of half width, full width, sortkey
  487. // - more than one of traditional, simplified, sortkey
  488. // - (LINGUISTIC flag) AND (NOT LCMAP_UPPER OR LCMAP_LOWER)
  489. //
  490. dwMapFlags &= (~LOCALE_USE_CP_ACP);
  491. if ( (dwMapFlags & LCMS_INVALID_FLAG) || (dwMapFlags == 0) ||
  492. ((dwMapFlags & (NORM_ALL | SORT_STRINGSORT)) &&
  493. (dwMapFlags & LCMAP_NO_NORM)) ||
  494. ((dwMapFlags & NORM_SORTKEY_ONLY) &&
  495. (!(dwMapFlags & LCMAP_SORTKEY))) ||
  496. (MORE_THAN_ONE(dwMapFlags, LCMS1_SINGLE_FLAG)) ||
  497. (MORE_THAN_ONE(dwMapFlags, LCMS2_SINGLE_FLAG)) ||
  498. (MORE_THAN_ONE(dwMapFlags, LCMS3_SINGLE_FLAG)) ||
  499. (MORE_THAN_ONE(dwMapFlags, LCMS4_SINGLE_FLAG)) ||
  500. ((dwMapFlags & LCMAP_LINGUISTIC_CASING) &&
  501. (!(dwMapFlags & (LCMAP_UPPERCASE | LCMAP_LOWERCASE)))) )
  502. {
  503. SetLastError(ERROR_INVALID_FLAGS);
  504. return (0);
  505. }
  506. //
  507. // If cchSrc is -1, then the source string is null terminated and we
  508. // need to get the length of the source string. Add one to the
  509. // length to include the null termination.
  510. // (This will always be at least 1.)
  511. //
  512. if (cchSrc <= -1)
  513. {
  514. cchSrc = NlsStrLenW(lpSrcStr) + 1;
  515. }
  516. //
  517. // Map the string based on the given flags.
  518. //
  519. if (dwMapFlags & LCMAP_SORTKEY)
  520. {
  521. //
  522. // Map the string to its sortkey.
  523. //
  524. // NOTE: This returns the number of BYTES, instead of the
  525. // number of wide characters (words).
  526. //
  527. Count = MapSortKey( pHashN,
  528. dwMapFlags,
  529. lpSrcStr,
  530. cchSrc,
  531. (LPBYTE)lpDestStr,
  532. cchDest,
  533. IS_KOREAN(Locale) );
  534. }
  535. else
  536. {
  537. switch (dwMapFlags & ~(LCMAP_BYTEREV | LCMAP_LINGUISTIC_CASING))
  538. {
  539. case ( LCMAP_LOWERCASE ) :
  540. {
  541. //
  542. // Map the string to Lower Case.
  543. //
  544. Count = MapCase( pHashN,
  545. lpSrcStr,
  546. cchSrc,
  547. lpDestStr,
  548. cchDest,
  549. (dwMapFlags & LCMAP_LINGUISTIC_CASING)
  550. ? pHashN->pLowerLinguist
  551. : pHashN->pLowerCase );
  552. break;
  553. }
  554. case ( LCMAP_UPPERCASE ) :
  555. {
  556. //
  557. // Map the string to Upper Case.
  558. //
  559. Count = MapCase( pHashN,
  560. lpSrcStr,
  561. cchSrc,
  562. lpDestStr,
  563. cchDest,
  564. (dwMapFlags & LCMAP_LINGUISTIC_CASING)
  565. ? pHashN->pUpperLinguist
  566. : pHashN->pUpperCase );
  567. break;
  568. }
  569. case ( NORM_IGNORENONSPACE ) :
  570. case ( NORM_IGNORESYMBOLS ) :
  571. case ( NORM_IGNORENONSPACE | NORM_IGNORESYMBOLS ) :
  572. {
  573. //
  574. // Map the string to strip out nonspace marks and/or symbols.
  575. //
  576. Count = MapNormalization( pHashN,
  577. dwMapFlags & ~LCMAP_BYTEREV,
  578. lpSrcStr,
  579. cchSrc,
  580. lpDestStr,
  581. cchDest );
  582. break;
  583. }
  584. case ( LCMAP_TRADITIONAL_CHINESE ) :
  585. case ( LCMAP_TRADITIONAL_CHINESE | LCMAP_LOWERCASE ) :
  586. case ( LCMAP_TRADITIONAL_CHINESE | LCMAP_UPPERCASE) :
  587. {
  588. //
  589. // Map the string to Traditional Chinese.
  590. //
  591. Count = MapTraditionalSimplified( pHashN,
  592. dwMapFlags & ~LCMAP_BYTEREV,
  593. lpSrcStr,
  594. cchSrc,
  595. lpDestStr,
  596. cchDest,
  597. pTblPtrs->pTraditional );
  598. break;
  599. }
  600. case ( LCMAP_SIMPLIFIED_CHINESE ) :
  601. case ( LCMAP_SIMPLIFIED_CHINESE | LCMAP_LOWERCASE ) :
  602. case ( LCMAP_SIMPLIFIED_CHINESE | LCMAP_UPPERCASE ) :
  603. {
  604. //
  605. // Map the string to Simplified Chinese.
  606. //
  607. Count = MapTraditionalSimplified( pHashN,
  608. dwMapFlags & ~LCMAP_BYTEREV,
  609. lpSrcStr,
  610. cchSrc,
  611. lpDestStr,
  612. cchDest,
  613. pTblPtrs->pSimplified );
  614. break;
  615. }
  616. default :
  617. {
  618. //
  619. // Make sure the Chinese flags are not used with the
  620. // Japanese flags.
  621. //
  622. if (dwMapFlags &
  623. (LCMAP_TRADITIONAL_CHINESE | LCMAP_SIMPLIFIED_CHINESE))
  624. {
  625. SetLastError(ERROR_INVALID_FLAGS);
  626. return (0);
  627. }
  628. //
  629. // The only flags not yet handled are the variations
  630. // containing the Kana and/or Width flags.
  631. // This handles all variations for:
  632. // LCMAP_HIRAGANA
  633. // LCMAP_KATAKANA
  634. // LCMAP_HALFWIDTH
  635. // LCMAP_FULLWIDTH
  636. //
  637. // Allow LCMAP_LOWERCASE and LCMAP_UPPERCASE
  638. // in combination with the kana and width flags.
  639. //
  640. Count = MapKanaWidth( pHashN,
  641. dwMapFlags & ~LCMAP_BYTEREV,
  642. lpSrcStr,
  643. cchSrc,
  644. lpDestStr,
  645. cchDest );
  646. break;
  647. }
  648. }
  649. }
  650. //
  651. // Always check LCMAP_BYTEREV last and do it in place.
  652. // LCMAP_BYTEREV may be used in combination with any other flag
  653. // (except ignore case without sortkey) or by itself.
  654. //
  655. if (dwMapFlags & LCMAP_BYTEREV)
  656. {
  657. //
  658. // Reverse the bytes of each word in the string.
  659. //
  660. if (dwMapFlags == LCMAP_BYTEREV)
  661. {
  662. //
  663. // Byte Reversal flag is used by itself.
  664. //
  665. // Make sure that the size of the destination buffer is
  666. // larger than zero. If it is zero, return the size of
  667. // the source string only. Do NOT touch lpDestStr.
  668. //
  669. if (cchDest != 0)
  670. {
  671. //
  672. // Flag is used by itself. Reverse the bytes from
  673. // the source string and store them in the destination
  674. // string.
  675. //
  676. if (cchSrc > cchDest)
  677. {
  678. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  679. return (0);
  680. }
  681. for (ctr = 0; ctr < cchSrc; ctr++)
  682. {
  683. lpDestStr[ctr] = MAKEWORD( HIBYTE(lpSrcStr[ctr]),
  684. LOBYTE(lpSrcStr[ctr]) );
  685. }
  686. }
  687. //
  688. // Return the size of the source string.
  689. //
  690. Count = cchSrc;
  691. }
  692. else
  693. {
  694. //
  695. // Make sure that the size of the destination buffer is
  696. // larger than zero. If it is zero, return the count and
  697. // do NOT touch lpDestStr.
  698. //
  699. if (cchDest != 0)
  700. {
  701. //
  702. // Check for sortkey flag.
  703. //
  704. if (dwMapFlags & LCMAP_SORTKEY)
  705. {
  706. //
  707. // Sortkey flag is also set, so 'Count' contains the
  708. // number of BYTES instead of the number of words.
  709. //
  710. // Reverse the bytes in place in the destination string.
  711. // No need to check the size of the destination buffer
  712. // here - it's been done elsewhere.
  713. //
  714. for (ctr = 0; ctr < Count / 2; ctr++)
  715. {
  716. lpDestStr[ctr] = MAKEWORD( HIBYTE(lpDestStr[ctr]),
  717. LOBYTE(lpDestStr[ctr]) );
  718. }
  719. }
  720. else
  721. {
  722. //
  723. // Flag is used in combination with another flag.
  724. // Reverse the bytes in place in the destination string.
  725. // No need to check the size of the destination buffer
  726. // here - it's been done elsewhere.
  727. //
  728. for (ctr = 0; ctr < Count; ctr++)
  729. {
  730. lpDestStr[ctr] = MAKEWORD( HIBYTE(lpDestStr[ctr]),
  731. LOBYTE(lpDestStr[ctr]) );
  732. }
  733. }
  734. }
  735. }
  736. }
  737. //
  738. // Return the number of characters (or number of bytes for sortkey)
  739. // written to the buffer.
  740. //
  741. return (Count);
  742. }
  743. //-------------------------------------------------------------------------//
  744. // INTERNAL ROUTINES //
  745. //-------------------------------------------------------------------------//
  746. ////////////////////////////////////////////////////////////////////////////
  747. //
  748. // FoldCZone
  749. //
  750. // Stores the compatibility zone values for the given string in the
  751. // destination buffer, and returns the number of wide characters
  752. // written to the buffer.
  753. //
  754. // 02-01-93 JulieB Created.
  755. ////////////////////////////////////////////////////////////////////////////
  756. int FoldCZone(
  757. LPCWSTR pSrc,
  758. int cchSrc,
  759. LPWSTR pDest,
  760. int cchDest)
  761. {
  762. int ctr; // loop counter
  763. //
  764. // If the destination value is zero, then just return the
  765. // length of the source string. Do NOT touch pDest.
  766. //
  767. if (cchDest == 0)
  768. {
  769. return (cchSrc);
  770. }
  771. //
  772. // If cchSrc is greater than cchDest, then the destination buffer
  773. // is too small to hold the new string. Return an error.
  774. //
  775. if (cchSrc > cchDest)
  776. {
  777. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  778. return (0);
  779. }
  780. //
  781. // Fold the Compatibility Zone and store it in the destination string.
  782. //
  783. for (ctr = 0; ctr < cchSrc; ctr++)
  784. {
  785. pDest[ctr] = GET_FOLD_CZONE(pTblPtrs->pCZone, pSrc[ctr]);
  786. }
  787. //
  788. // Return the number of wide characters written.
  789. //
  790. return (ctr);
  791. }
  792. ////////////////////////////////////////////////////////////////////////////
  793. //
  794. // FoldDigits
  795. //
  796. // Stores the ascii digits values for the given string in the
  797. // destination buffer, and returns the number of wide characters
  798. // written to the buffer.
  799. //
  800. // 02-01-93 JulieB Created.
  801. ////////////////////////////////////////////////////////////////////////////
  802. int FoldDigits(
  803. LPCWSTR pSrc,
  804. int cchSrc,
  805. LPWSTR pDest,
  806. int cchDest)
  807. {
  808. int ctr; // loop counter
  809. //
  810. // If the destination value is zero, then just return the
  811. // length of the source string. Do NOT touch pDest.
  812. //
  813. if (cchDest == 0)
  814. {
  815. return (cchSrc);
  816. }
  817. //
  818. // If cchSrc is greater than cchDest, then the destination buffer
  819. // is too small to hold the new string. Return an error.
  820. //
  821. if (cchSrc > cchDest)
  822. {
  823. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  824. return (0);
  825. }
  826. //
  827. // Fold the Ascii Digits and store it in the destination string.
  828. //
  829. for (ctr = 0; ctr < cchSrc; ctr++)
  830. {
  831. pDest[ctr] = GET_ASCII_DIGITS(pTblPtrs->pADigit, pSrc[ctr]);
  832. }
  833. //
  834. // Return the number of wide characters written.
  835. //
  836. return (ctr);
  837. }
  838. ////////////////////////////////////////////////////////////////////////////
  839. //
  840. // FoldCZone_Digits
  841. //
  842. // Stores the compatibility zone and ascii digits values for the given
  843. // string in the destination buffer, and returns the number of wide
  844. // characters written to the buffer.
  845. //
  846. // 02-01-93 JulieB Created.
  847. ////////////////////////////////////////////////////////////////////////////
  848. int FoldCZone_Digits(
  849. LPCWSTR pSrc,
  850. int cchSrc,
  851. LPWSTR pDest,
  852. int cchDest)
  853. {
  854. int ctr; // loop counter
  855. //
  856. // If the destination value is zero, then just return the
  857. // length of the source string. Do NOT touch pDest.
  858. //
  859. if (cchDest == 0)
  860. {
  861. return (cchSrc);
  862. }
  863. //
  864. // If cchSrc is greater than cchDest, then the destination buffer
  865. // is too small to hold the new string. Return an error.
  866. //
  867. if (cchSrc > cchDest)
  868. {
  869. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  870. return (0);
  871. }
  872. //
  873. // Fold the compatibility zone and the ascii digits values and store
  874. // it in the destination string.
  875. //
  876. for (ctr = 0; ctr < cchSrc; ctr++)
  877. {
  878. pDest[ctr] = GET_FOLD_CZONE(pTblPtrs->pCZone, pSrc[ctr]);
  879. pDest[ctr] = GET_ASCII_DIGITS(pTblPtrs->pADigit, pDest[ctr]);
  880. }
  881. //
  882. // Return the number of wide characters written.
  883. //
  884. return (ctr);
  885. }
  886. ////////////////////////////////////////////////////////////////////////////
  887. //
  888. // FoldLigatures
  889. //
  890. // Stores the expanded ligature values for the given string in the
  891. // destination buffer, and returns the number of wide characters
  892. // written to the buffer.
  893. //
  894. // 10-15-96 JulieB Created.
  895. ////////////////////////////////////////////////////////////////////////////
  896. int FoldLigatures(
  897. LPCWSTR pSrc,
  898. int cchSrc,
  899. LPWSTR pDest,
  900. int cchDest)
  901. {
  902. int ctr = 0; // source char counter
  903. int ctr2 = 0; // destination char counter
  904. DWORD Weight; // sort weight - used for expansions
  905. //
  906. // Make sure the default sorting table is available. If not,
  907. // return an error.
  908. //
  909. if (pTblPtrs->pDefaultSortkey == NULL)
  910. {
  911. KdPrint(("NLSAPI: No Default Sorting Table Loaded.\n"));
  912. SetLastError(ERROR_FILE_NOT_FOUND);
  913. return (0);
  914. }
  915. //
  916. // If the destination value is zero, then just return the
  917. // length of the string that would be returned. Do NOT touch pDest.
  918. //
  919. if (cchDest == 0)
  920. {
  921. //
  922. // Convert the source string to expand all ligatures and calculate
  923. // the number of characters that would have been written to a
  924. // destination buffer.
  925. //
  926. while (ctr < cchSrc)
  927. {
  928. Weight = MAKE_SORTKEY_DWORD((pTblPtrs->pDefaultSortkey)[pSrc[ctr]]);
  929. if (GET_SCRIPT_MEMBER(&Weight) == EXPANSION)
  930. {
  931. do
  932. {
  933. ctr2++;
  934. Weight = MAKE_SORTKEY_DWORD(
  935. (pTblPtrs->pDefaultSortkey)[GET_EXPANSION_2(&Weight)]);
  936. } while (GET_SCRIPT_MEMBER(&Weight) == EXPANSION);
  937. ctr2++;
  938. }
  939. else
  940. {
  941. ctr2++;
  942. }
  943. ctr++;
  944. }
  945. }
  946. else
  947. {
  948. //
  949. // Convert the source string to expand all ligatures and store
  950. // the result in the destination buffer.
  951. //
  952. while ((ctr < cchSrc) && (ctr2 < cchDest))
  953. {
  954. Weight = MAKE_SORTKEY_DWORD((pTblPtrs->pDefaultSortkey)[pSrc[ctr]]);
  955. if (GET_SCRIPT_MEMBER(&Weight) == EXPANSION)
  956. {
  957. do
  958. {
  959. if ((ctr2 + 1) < cchDest)
  960. {
  961. pDest[ctr2] = GET_EXPANSION_1(&Weight);
  962. pDest[ctr2 + 1] = GET_EXPANSION_2(&Weight);
  963. ctr2++;
  964. }
  965. else
  966. {
  967. ctr2++;
  968. break;
  969. }
  970. Weight = MAKE_SORTKEY_DWORD(
  971. (pTblPtrs->pDefaultSortkey)[pDest[ctr2]]);
  972. } while (GET_SCRIPT_MEMBER(&Weight) == EXPANSION);
  973. if (ctr2 >= cchDest)
  974. {
  975. break;
  976. }
  977. ctr2++;
  978. }
  979. else
  980. {
  981. pDest[ctr2] = pSrc[ctr];
  982. ctr2++;
  983. }
  984. ctr++;
  985. }
  986. }
  987. //
  988. // Make sure destination buffer was large enough.
  989. //
  990. if (ctr < cchSrc)
  991. {
  992. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  993. return (0);
  994. }
  995. //
  996. // Return the number of wide characters written.
  997. //
  998. return (ctr2);
  999. }
  1000. ////////////////////////////////////////////////////////////////////////////
  1001. //
  1002. // FoldPreComposed
  1003. //
  1004. // Stores the precomposed values for the given string in the
  1005. // destination buffer, and returns the number of wide characters
  1006. // written to the buffer.
  1007. //
  1008. // 02-01-93 JulieB Created.
  1009. ////////////////////////////////////////////////////////////////////////////
  1010. int FoldPreComposed(
  1011. LPCWSTR pSrc,
  1012. int cchSrc,
  1013. LPWSTR pDest,
  1014. int cchDest)
  1015. {
  1016. int ctr = 0; // source char counter
  1017. int ctr2 = 0; // destination char counter
  1018. WCHAR wch = 0; // wchar holder
  1019. //
  1020. // Make sure the default sorting table is available. If not,
  1021. // return an error.
  1022. //
  1023. if (pTblPtrs->pDefaultSortkey == NULL)
  1024. {
  1025. KdPrint(("NLSAPI: No Default Sorting Table Loaded.\n"));
  1026. SetLastError(ERROR_FILE_NOT_FOUND);
  1027. return (0);
  1028. }
  1029. //
  1030. // If the destination value is zero, then just return the
  1031. // length of the string that would be returned. Do NOT touch pDest.
  1032. //
  1033. if (cchDest == 0)
  1034. {
  1035. //
  1036. // Convert the source string to precomposed and calculate the
  1037. // number of characters that would have been written to a
  1038. // destination buffer.
  1039. //
  1040. while (ctr < cchSrc)
  1041. {
  1042. if ((ctr2 != 0) &&
  1043. (IS_NONSPACE_ONLY(pTblPtrs->pDefaultSortkey, pSrc[ctr])))
  1044. {
  1045. //
  1046. // Composite form. Write the precomposed form.
  1047. //
  1048. // If the precomposed character is written to the buffer,
  1049. // do NOT increment the destination pointer or the
  1050. // character count (the precomposed character was
  1051. // written over the previous character).
  1052. //
  1053. if (wch)
  1054. {
  1055. if ((wch = GetPreComposedChar(pSrc[ctr], wch)) == 0)
  1056. {
  1057. //
  1058. // No translation for composite form, so just
  1059. // increment the destination counter.
  1060. //
  1061. ctr2++;
  1062. }
  1063. }
  1064. else
  1065. {
  1066. if ((wch = GetPreComposedChar( pSrc[ctr],
  1067. pSrc[ctr - 1] )) == 0)
  1068. {
  1069. //
  1070. // No translation for composite form, so just
  1071. // increment the destination counter.
  1072. //
  1073. ctr2++;
  1074. }
  1075. }
  1076. }
  1077. else
  1078. {
  1079. //
  1080. // Not part of a composite character, so just
  1081. // increment the destination counter.
  1082. //
  1083. wch = 0;
  1084. ctr2++;
  1085. }
  1086. ctr++;
  1087. }
  1088. }
  1089. else
  1090. {
  1091. //
  1092. // Convert the source string to precomposed and store it in the
  1093. // destination string.
  1094. //
  1095. while ((ctr < cchSrc) && (ctr2 < cchDest))
  1096. {
  1097. if ((ctr2 != 0) &&
  1098. (IS_NONSPACE_ONLY(pTblPtrs->pDefaultSortkey, pSrc[ctr])))
  1099. {
  1100. //
  1101. // Composite form. Write the precomposed form.
  1102. //
  1103. // If the precomposed character is written to the buffer,
  1104. // do NOT increment the destination pointer or the
  1105. // character count (the precomposed character was
  1106. // written over the previous character).
  1107. //
  1108. wch = pDest[ctr2 - 1];
  1109. if ((pDest[ctr2 - 1] =
  1110. GetPreComposedChar( pSrc[ctr],
  1111. pDest[ctr2 - 1] )) == 0)
  1112. {
  1113. //
  1114. // No translation for composite form, so must
  1115. // rewrite the base character and write the
  1116. // composite character.
  1117. //
  1118. pDest[ctr2 - 1] = wch;
  1119. pDest[ctr2] = pSrc[ctr];
  1120. ctr2++;
  1121. }
  1122. }
  1123. else
  1124. {
  1125. //
  1126. // Not part of a composite character, so just write
  1127. // the character to the destination string.
  1128. //
  1129. pDest[ctr2] = pSrc[ctr];
  1130. ctr2++;
  1131. }
  1132. ctr++;
  1133. }
  1134. }
  1135. //
  1136. // Make sure destination buffer was large enough.
  1137. //
  1138. if (ctr < cchSrc)
  1139. {
  1140. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1141. return (0);
  1142. }
  1143. //
  1144. // Return the number of wide characters written.
  1145. //
  1146. return (ctr2);
  1147. }
  1148. ////////////////////////////////////////////////////////////////////////////
  1149. //
  1150. // FoldComposite
  1151. //
  1152. // Stores the composite values for the given string in the
  1153. // destination buffer, and returns the number of wide characters
  1154. // written to the buffer.
  1155. //
  1156. // 02-01-93 JulieB Created.
  1157. ////////////////////////////////////////////////////////////////////////////
  1158. int FoldComposite(
  1159. LPCWSTR pSrc,
  1160. int cchSrc,
  1161. LPWSTR pDest,
  1162. int cchDest)
  1163. {
  1164. int ctr = 0; // source char counter
  1165. int ctr2 = 0; // destination char counter
  1166. LPWSTR pEndDest; // ptr to end of destination string
  1167. WCHAR pTmp[MAX_COMPOSITE]; // tmp buffer for composite chars
  1168. //
  1169. // If the destination value is zero, then just return the
  1170. // length of the string that would be returned. Do NOT touch pDest.
  1171. //
  1172. if (cchDest == 0)
  1173. {
  1174. //
  1175. // Get the end of the tmp buffer.
  1176. //
  1177. pEndDest = (LPWSTR)pTmp + MAX_COMPOSITE;
  1178. //
  1179. // Convert the source string to precomposed and calculate the
  1180. // number of characters that would have been written to a
  1181. // destination buffer.
  1182. //
  1183. while (ctr < cchSrc)
  1184. {
  1185. //
  1186. // Write the character to the destination string.
  1187. //
  1188. *pTmp = pSrc[ctr];
  1189. //
  1190. // See if it needs to be expanded to its composite form.
  1191. //
  1192. // If no composite form is found, the routine returns 1 for
  1193. // the base character. Simply increment by the return value.
  1194. //
  1195. ctr2 += InsertCompositeForm(pTmp, pEndDest);
  1196. //
  1197. // Increment the source string counter.
  1198. //
  1199. ctr++;
  1200. }
  1201. }
  1202. else
  1203. {
  1204. //
  1205. // Get the end of the destination string.
  1206. //
  1207. pEndDest = (LPWSTR)pDest + cchDest;
  1208. //
  1209. // Convert the source string to precomposed and store it in the
  1210. // destination string.
  1211. //
  1212. while ((ctr < cchSrc) && (ctr2 < cchDest))
  1213. {
  1214. //
  1215. // Write the character to the destination string.
  1216. //
  1217. pDest[ctr2] = pSrc[ctr];
  1218. //
  1219. // See if it needs to be expanded to its composite form.
  1220. //
  1221. // If no composite form is found, the routine returns 1 for
  1222. // the base character. Simply increment by the return value.
  1223. //
  1224. ctr2 += InsertCompositeForm(&(pDest[ctr2]), pEndDest);
  1225. //
  1226. // Increment the source string counter.
  1227. //
  1228. ctr++;
  1229. }
  1230. }
  1231. //
  1232. // Make sure destination buffer was large enough.
  1233. //
  1234. if (ctr < cchSrc)
  1235. {
  1236. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1237. return (0);
  1238. }
  1239. //
  1240. // Return the number of wide characters written.
  1241. //
  1242. return (ctr2);
  1243. }
  1244. ////////////////////////////////////////////////////////////////////////////
  1245. //
  1246. // MapCase
  1247. //
  1248. // Stores the lower or upper case values for the given string in the
  1249. // destination buffer, and returns the number of wide characters written to
  1250. // the buffer.
  1251. //
  1252. // 05-31-91 JulieB Created.
  1253. ////////////////////////////////////////////////////////////////////////////
  1254. int MapCase(
  1255. PLOC_HASH pHashN,
  1256. LPCWSTR pSrc,
  1257. int cchSrc,
  1258. LPWSTR pDest,
  1259. int cchDest,
  1260. PCASE pCaseTbl)
  1261. {
  1262. int ctr; // loop counter
  1263. //
  1264. // If the destination value is zero, then just return the
  1265. // length of the source string. Do NOT touch pDest.
  1266. //
  1267. if (cchDest == 0)
  1268. {
  1269. return (cchSrc);
  1270. }
  1271. //
  1272. // If cchSrc is greater than cchDest, then the destination buffer
  1273. // is too small to hold the lower or upper case string. Return an
  1274. // error.
  1275. //
  1276. if (cchSrc > cchDest)
  1277. {
  1278. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1279. return (0);
  1280. }
  1281. //
  1282. // Lower or Upper case the source string and store it in the
  1283. // destination string.
  1284. //
  1285. for (ctr = 0; ctr < cchSrc; ctr++)
  1286. {
  1287. pDest[ctr] = GET_LOWER_UPPER_CASE(pCaseTbl, pSrc[ctr]);
  1288. }
  1289. //
  1290. // Return the number of wide characters written.
  1291. //
  1292. return (ctr);
  1293. }
  1294. ////////////////////////////////////////////////////////////////////////////
  1295. //
  1296. // SPECIAL_CASE_HANDLER
  1297. //
  1298. // Handles all of the special cases for each character. This includes only
  1299. // the valid values less than or equal to MAX_SPECIAL_CASE.
  1300. //
  1301. // DEFINED AS A MACRO.
  1302. //
  1303. // 11-04-92 JulieB Created.
  1304. ////////////////////////////////////////////////////////////////////////////
  1305. #define EXTRA_WEIGHT_POS(WtNum) (*(pPosXW + (WtNum * WeightLen)))
  1306. #define SPECIAL_CASE_HANDLER( SM, \
  1307. pWeight, \
  1308. pSortkey, \
  1309. pExpand, \
  1310. Position, \
  1311. fStringSort, \
  1312. fIgnoreSymbols, \
  1313. pCur, \
  1314. pBegin, \
  1315. fModify ) \
  1316. { \
  1317. PSORTKEY pExpWt; /* weight of 1 expansion char */ \
  1318. BYTE AW; /* alphanumeric weight */ \
  1319. BYTE XW; /* case weight value with extra bits */ \
  1320. DWORD PrevWt; /* previous weight */ \
  1321. BYTE PrevSM; /* previous script member */ \
  1322. BYTE PrevAW; /* previuos alphanumeric weight */ \
  1323. BYTE PrevCW; /* previuos case weight */ \
  1324. LPWSTR pPrev; /* ptr to previous char */ \
  1325. \
  1326. \
  1327. switch (SM) \
  1328. { \
  1329. case ( UNSORTABLE ) : \
  1330. { \
  1331. /* \
  1332. * Character is unsortable, so skip it. \
  1333. */ \
  1334. break; \
  1335. } \
  1336. case ( NONSPACE_MARK ) : \
  1337. { \
  1338. /* \
  1339. * Character is a nonspace mark, so only store \
  1340. * the diacritic weight. \
  1341. */ \
  1342. if (pPosDW > pDW) \
  1343. { \
  1344. (*(pPosDW - 1)) += GET_DIACRITIC(pWeight); \
  1345. } \
  1346. else \
  1347. { \
  1348. *pPosDW = GET_DIACRITIC(pWeight); \
  1349. pPosDW++; \
  1350. } \
  1351. \
  1352. break; \
  1353. } \
  1354. case ( EXPANSION ) : \
  1355. { \
  1356. /* \
  1357. * Expansion character - one character has 2 \
  1358. * different weights. Store each weight separately. \
  1359. */ \
  1360. pExpWt = &(pSortkey[(pExpand[GET_EXPAND_INDEX(pWeight)]).UCP1]); \
  1361. *pPosUW = GET_UNICODE_MOD(pExpWt, fModify); \
  1362. *pPosDW = GET_DIACRITIC(pExpWt); \
  1363. *pPosCW = GET_CASE(pExpWt) & CaseMask; \
  1364. pPosUW++; \
  1365. pPosDW++; \
  1366. pPosCW++; \
  1367. \
  1368. pExpWt = &(pSortkey[(pExpand[GET_EXPAND_INDEX(pWeight)]).UCP2]); \
  1369. while (GET_SCRIPT_MEMBER(pExpWt) == EXPANSION) \
  1370. { \
  1371. pWeight = pExpWt; \
  1372. pExpWt = &(pSortkey[(pExpand[GET_EXPAND_INDEX(pWeight)]).UCP1]); \
  1373. *pPosUW = GET_UNICODE_MOD(pExpWt, fModify); \
  1374. *pPosDW = GET_DIACRITIC(pExpWt); \
  1375. *pPosCW = GET_CASE(pExpWt) & CaseMask; \
  1376. pPosUW++; \
  1377. pPosDW++; \
  1378. pPosCW++; \
  1379. pExpWt = &(pSortkey[(pExpand[GET_EXPAND_INDEX(pWeight)]).UCP2]); \
  1380. } \
  1381. *pPosUW = GET_UNICODE_MOD(pExpWt, fModify); \
  1382. *pPosDW = GET_DIACRITIC(pExpWt); \
  1383. *pPosCW = GET_CASE(pExpWt) & CaseMask; \
  1384. pPosUW++; \
  1385. pPosDW++; \
  1386. pPosCW++; \
  1387. \
  1388. break; \
  1389. } \
  1390. case ( PUNCTUATION ) : \
  1391. { \
  1392. if (!fStringSort) \
  1393. { \
  1394. /* \
  1395. * Word Sort Method. \
  1396. * \
  1397. * Character is punctuation, so only store the special \
  1398. * weight. \
  1399. */ \
  1400. *((LPBYTE)pPosSW) = HIBYTE(GET_POSITION_SW(Position)); \
  1401. *(((LPBYTE)pPosSW) + 1) = LOBYTE(GET_POSITION_SW(Position)); \
  1402. pPosSW++; \
  1403. *pPosSW = GET_SPECIAL_WEIGHT(pWeight); \
  1404. pPosSW++; \
  1405. \
  1406. break; \
  1407. } \
  1408. \
  1409. /* \
  1410. * If using STRING sort method, treat punctuation the same \
  1411. * as symbol. So, FALL THROUGH to the symbol cases. \
  1412. */ \
  1413. } \
  1414. case ( SYMBOL_1 ) : \
  1415. case ( SYMBOL_2 ) : \
  1416. case ( SYMBOL_3 ) : \
  1417. case ( SYMBOL_4 ) : \
  1418. case ( SYMBOL_5 ) : \
  1419. { \
  1420. /* \
  1421. * Character is a symbol. \
  1422. * Store the Unicode weights ONLY if the NORM_IGNORESYMBOLS \
  1423. * flag is NOT set. \
  1424. */ \
  1425. if (!fIgnoreSymbols) \
  1426. { \
  1427. *pPosUW = GET_UNICODE_MOD(pWeight, fModify); \
  1428. *pPosDW = GET_DIACRITIC(pWeight); \
  1429. *pPosCW = GET_CASE(pWeight) & CaseMask; \
  1430. pPosUW++; \
  1431. pPosDW++; \
  1432. pPosCW++; \
  1433. } \
  1434. \
  1435. break; \
  1436. } \
  1437. case ( FAREAST_SPECIAL ) : \
  1438. { \
  1439. /* \
  1440. * Get the alphanumeric weight and the case weight of the \
  1441. * current code point. \
  1442. */ \
  1443. AW = GET_ALPHA_NUMERIC(pWeight); \
  1444. XW = (GET_CASE(pWeight) & CaseMask) | CASE_XW_MASK; \
  1445. \
  1446. /* \
  1447. * Special case Repeat and Cho-On. \
  1448. * AW = 0 => Repeat \
  1449. * AW = 1 => Cho-On \
  1450. * AW = 2+ => Kana \
  1451. */ \
  1452. if (AW <= MAX_SPECIAL_AW) \
  1453. { \
  1454. /* \
  1455. * If the script member of the previous character is \
  1456. * invalid, then give the special character an \
  1457. * invalid weight (highest possible weight) so that it \
  1458. * will sort AFTER everything else. \
  1459. */ \
  1460. pPrev = pCur - 1; \
  1461. *pPosUW = MAP_INVALID_UW; \
  1462. while (pPrev >= pBegin) \
  1463. { \
  1464. PrevWt = GET_DWORD_WEIGHT(pHashN, *pPrev); \
  1465. PrevSM = GET_SCRIPT_MEMBER(&PrevWt); \
  1466. if (PrevSM < FAREAST_SPECIAL) \
  1467. { \
  1468. if (PrevSM != EXPANSION) \
  1469. { \
  1470. /* \
  1471. * UNSORTABLE or NONSPACE_MARK. \
  1472. * \
  1473. * Just ignore these, since we only care \
  1474. * about the previous UW value. \
  1475. */ \
  1476. pPrev--; \
  1477. continue; \
  1478. } \
  1479. } \
  1480. else if (PrevSM == FAREAST_SPECIAL) \
  1481. { \
  1482. PrevAW = GET_ALPHA_NUMERIC(&PrevWt); \
  1483. if (PrevAW <= MAX_SPECIAL_AW) \
  1484. { \
  1485. /* \
  1486. * Handle case where two special chars follow \
  1487. * each other. Keep going back in the string. \
  1488. */ \
  1489. pPrev--; \
  1490. continue; \
  1491. } \
  1492. \
  1493. *pPosUW = MAKE_UNICODE_WT(KANA, PrevAW, fModify); \
  1494. \
  1495. /* \
  1496. * Only build weights 4, 5, 6, and 7 if the \
  1497. * previous character is KANA. \
  1498. * \
  1499. * Always: \
  1500. * 4W = previous CW & ISOLATE_SMALL \
  1501. * 6W = previous CW & ISOLATE_KANA \
  1502. * \
  1503. */ \
  1504. PrevCW = (GET_CASE(&PrevWt) & CaseMask) | \
  1505. CASE_XW_MASK; \
  1506. \
  1507. EXTRA_WEIGHT_POS(0) = PrevCW & ISOLATE_SMALL; \
  1508. EXTRA_WEIGHT_POS(2) = PrevCW & ISOLATE_KANA; \
  1509. \
  1510. if (AW == AW_REPEAT) \
  1511. { \
  1512. /* \
  1513. * Repeat: \
  1514. * UW = previous UW (set above) \
  1515. * 5W = WT_FIVE_REPEAT \
  1516. * 7W = previous CW & ISOLATE_WIDTH \
  1517. */ \
  1518. EXTRA_WEIGHT_POS(1) = WT_FIVE_REPEAT; \
  1519. EXTRA_WEIGHT_POS(3) = PrevCW & ISOLATE_WIDTH; \
  1520. } \
  1521. else \
  1522. { \
  1523. /* \
  1524. * Cho-On: \
  1525. * UW = previous UW & CHO_ON_UW_MASK \
  1526. * 5W = WT_FIVE_CHO_ON \
  1527. * 7W = current CW & ISOLATE_WIDTH \
  1528. */ \
  1529. *pPosUW &= CHO_ON_UW_MASK; \
  1530. EXTRA_WEIGHT_POS(1) = WT_FIVE_CHO_ON; \
  1531. EXTRA_WEIGHT_POS(3) = XW & ISOLATE_WIDTH; \
  1532. } \
  1533. \
  1534. pPosXW++; \
  1535. } \
  1536. else \
  1537. { \
  1538. *pPosUW = GET_UNICODE_MOD(&PrevWt, fModify); \
  1539. } \
  1540. \
  1541. break; \
  1542. } \
  1543. \
  1544. /* \
  1545. * Make sure there is a valid UW. If not, quit out \
  1546. * of switch case. \
  1547. */ \
  1548. if (*pPosUW == MAP_INVALID_UW) \
  1549. { \
  1550. pPosUW++; \
  1551. break; \
  1552. } \
  1553. } \
  1554. else \
  1555. { \
  1556. /* \
  1557. * Kana: \
  1558. * SM = KANA \
  1559. * AW = current AW \
  1560. * 4W = current CW & ISOLATE_SMALL \
  1561. * 5W = WT_FIVE_KANA \
  1562. * 6W = current CW & ISOLATE_KANA \
  1563. * 7W = current CW & ISOLATE_WIDTH \
  1564. */ \
  1565. *pPosUW = MAKE_UNICODE_WT(KANA, AW, fModify); \
  1566. EXTRA_WEIGHT_POS(0) = XW & ISOLATE_SMALL; \
  1567. EXTRA_WEIGHT_POS(1) = WT_FIVE_KANA; \
  1568. EXTRA_WEIGHT_POS(2) = XW & ISOLATE_KANA; \
  1569. EXTRA_WEIGHT_POS(3) = XW & ISOLATE_WIDTH; \
  1570. \
  1571. pPosXW++; \
  1572. } \
  1573. \
  1574. /* \
  1575. * Always: \
  1576. * DW = current DW \
  1577. * CW = minimum CW \
  1578. */ \
  1579. *pPosDW = GET_DIACRITIC(pWeight); \
  1580. *pPosCW = MIN_CW; \
  1581. \
  1582. pPosUW++; \
  1583. pPosDW++; \
  1584. pPosCW++; \
  1585. \
  1586. break; \
  1587. } \
  1588. case ( JAMO_SPECIAL ) : \
  1589. { \
  1590. /* \
  1591. * See if it's a leading Jamo. \
  1592. */ \
  1593. if (IsLeadingJamo(*pPos)) \
  1594. { \
  1595. int OldHangulCount; /* number of old Hangul found */ \
  1596. WORD JamoUW; \
  1597. BYTE JamoXW[3]; \
  1598. \
  1599. /* \
  1600. * If the characters beginning from pPos are a valid old \
  1601. * Hangul composition, create the sortkey according to \
  1602. * the old Hangul rule. \
  1603. */ \
  1604. if ((OldHangulCount = \
  1605. MapOldHangulSortKey( pHashN, \
  1606. pPos, \
  1607. cchSrc - PosCtr, \
  1608. &JamoUW, \
  1609. JamoXW, \
  1610. fModify )) > 0) \
  1611. { \
  1612. *pPosUW = JamoUW; \
  1613. pPosUW++; \
  1614. *pPosUW = MAKE_UNICODE_WT(SM_UW_XW, JamoXW[0], FALSE); \
  1615. pPosUW++; \
  1616. *pPosUW = MAKE_UNICODE_WT(SM_UW_XW, JamoXW[1], FALSE); \
  1617. pPosUW++; \
  1618. *pPosUW = MAKE_UNICODE_WT(SM_UW_XW, JamoXW[2], FALSE); \
  1619. pPosUW++; \
  1620. \
  1621. *pPosDW = MIN_DW; \
  1622. *pPosCW = MIN_CW; \
  1623. pPosDW++; \
  1624. pPosCW++; \
  1625. \
  1626. /* \
  1627. * Decrement OldHangulCount because the for loop will \
  1628. * increase PosCtr and pPos as well. \
  1629. */ \
  1630. OldHangulCount--; \
  1631. PosCtr += OldHangulCount; \
  1632. pPos += OldHangulCount; \
  1633. \
  1634. break; \
  1635. } \
  1636. } \
  1637. \
  1638. /* \
  1639. * Otherwise, fall back to the normal behavior. \
  1640. * \
  1641. * No special case on character, so store the various \
  1642. * weights for the character. \
  1643. */ \
  1644. \
  1645. /* \
  1646. * We store the real script member in the diacritic weight \
  1647. * since both the diacritic weight and case weight are not \
  1648. * used in Korean. \
  1649. */ \
  1650. *pPosUW = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight), \
  1651. GET_ALPHA_NUMERIC(pWeight), \
  1652. fModify ); \
  1653. *pPosDW = MIN_DW; \
  1654. *pPosCW = GET_CASE(pWeight); \
  1655. pPosUW++; \
  1656. pPosDW++; \
  1657. pPosCW++; \
  1658. \
  1659. break; \
  1660. } \
  1661. case ( EXTENSION_A ) : \
  1662. { \
  1663. /* \
  1664. * UW = SM_EXT_A, AW_EXT_A, AW, DW \
  1665. * DW = miniumum DW \
  1666. * CW = minimum CW \
  1667. */ \
  1668. *pPosUW = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify); \
  1669. pPosUW++; \
  1670. \
  1671. *pPosUW = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(pWeight), \
  1672. GET_DIACRITIC(pWeight), \
  1673. FALSE ); \
  1674. pPosUW++; \
  1675. \
  1676. *pPosDW = MIN_DW; \
  1677. *pPosCW = MIN_CW; \
  1678. pPosDW++; \
  1679. pPosCW++; \
  1680. \
  1681. break; \
  1682. } \
  1683. } \
  1684. }
  1685. ////////////////////////////////////////////////////////////////////////////
  1686. //
  1687. // MapSortKey
  1688. //
  1689. // Stores the sortkey weights for the given string in the destination
  1690. // buffer and returns the number of BYTES written to the buffer.
  1691. //
  1692. // 11-04-92 JulieB Created.
  1693. ////////////////////////////////////////////////////////////////////////////
  1694. int MapSortKey(
  1695. PLOC_HASH pHashN,
  1696. DWORD dwFlags,
  1697. LPCWSTR pSrc,
  1698. int cchSrc,
  1699. LPBYTE pDest,
  1700. int cbDest,
  1701. BOOL fModify)
  1702. {
  1703. register int WeightLen; // length of one set of weights
  1704. LPWSTR pUW; // ptr to Unicode Weights
  1705. LPBYTE pDW; // ptr to Diacritic Weights
  1706. LPBYTE pCW; // ptr to Case Weights
  1707. LPBYTE pXW; // ptr to Extra Weights
  1708. LPWSTR pSW; // ptr to Special Weights
  1709. LPWSTR pPosUW; // ptr to position in pUW buffer
  1710. LPBYTE pPosDW; // ptr to position in pDW buffer
  1711. LPBYTE pPosCW; // ptr to position in pCW buffer
  1712. LPBYTE pPosXW; // ptr to position in pXW buffer
  1713. LPWSTR pPosSW; // ptr to position in pSW buffer
  1714. PSORTKEY pWeight; // ptr to weight of character
  1715. BYTE SM; // script member value
  1716. BYTE CaseMask; // mask for case weight
  1717. int PosCtr; // position counter in string
  1718. LPWSTR pPos; // ptr to position in string
  1719. LPBYTE pTmp; // ptr to go through UW, XW, and SW
  1720. LPBYTE pPosTmp; // ptr to tmp position in XW
  1721. PCOMPRESS_2 pComp2; // ptr to compression 2 list
  1722. PCOMPRESS_3 pComp3; // ptr to compression 3 list
  1723. WORD pBuffer[MAX_SORTKEY_BUF_LEN]; // buffer to hold weights
  1724. int ctr; // loop counter
  1725. BOOL IfDblCompress; // if double compress possibility
  1726. BOOL fStringSort; // if using string sort method
  1727. BOOL fIgnoreSymbols; // if ignore symbols flag is set
  1728. //
  1729. // Make sure the sorting information is available in the system.
  1730. //
  1731. if ((pHashN->pSortkey == NULL) ||
  1732. (pHashN->IfIdeographFailure == TRUE))
  1733. {
  1734. KdPrint(("NLSAPI: Appropriate Sorting Tables Not Loaded.\n"));
  1735. SetLastError(ERROR_FILE_NOT_FOUND);
  1736. return (0);
  1737. }
  1738. //
  1739. // See if the length of the string is too large for the static
  1740. // buffer. If so, allocate a buffer that is large enough.
  1741. //
  1742. if (cchSrc > MAX_SMALL_BUF_LEN)
  1743. {
  1744. //
  1745. // Allocate buffer to hold all of the weights.
  1746. // (cchSrc) * (max # of expansions) * (# of weights)
  1747. //
  1748. WeightLen = cchSrc * MAX_EXPANSION;
  1749. if ((pUW = (LPWSTR)NLS_ALLOC_MEM( WeightLen * MAX_WEIGHTS *
  1750. sizeof(WCHAR) )) == NULL)
  1751. {
  1752. SetLastError(ERROR_OUTOFMEMORY);
  1753. return (0);
  1754. }
  1755. }
  1756. else
  1757. {
  1758. WeightLen = MAX_SMALL_BUF_LEN * MAX_EXPANSION;
  1759. pUW = (LPWSTR)pBuffer;
  1760. }
  1761. //
  1762. // Set the case weight mask based on the given flags.
  1763. // If none or all of the ignore case flags are set, then
  1764. // just leave the mask as 0xff.
  1765. //
  1766. CaseMask = 0xff;
  1767. switch (dwFlags & NORM_ALL_CASE)
  1768. {
  1769. case ( NORM_IGNORECASE ) :
  1770. {
  1771. CaseMask &= CASE_UPPER_MASK;
  1772. break;
  1773. }
  1774. case ( NORM_IGNOREKANATYPE ) :
  1775. {
  1776. CaseMask &= CASE_KANA_MASK;
  1777. break;
  1778. }
  1779. case ( NORM_IGNOREWIDTH ) :
  1780. {
  1781. CaseMask &= CASE_WIDTH_MASK;
  1782. break;
  1783. }
  1784. case ( NORM_IGNORECASE | NORM_IGNOREKANATYPE ) :
  1785. {
  1786. CaseMask &= (CASE_UPPER_MASK & CASE_KANA_MASK);
  1787. break;
  1788. }
  1789. case ( NORM_IGNORECASE | NORM_IGNOREWIDTH ) :
  1790. {
  1791. CaseMask &= (CASE_UPPER_MASK & CASE_WIDTH_MASK);
  1792. break;
  1793. }
  1794. case ( NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ) :
  1795. {
  1796. CaseMask &= (CASE_KANA_MASK & CASE_WIDTH_MASK);
  1797. break;
  1798. }
  1799. case ( NORM_IGNORECASE | NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ) :
  1800. {
  1801. CaseMask &= (CASE_UPPER_MASK & CASE_KANA_MASK & CASE_WIDTH_MASK);
  1802. break;
  1803. }
  1804. }
  1805. //
  1806. // Set pointers to positions of weights in buffer.
  1807. //
  1808. // UW => 4 word length (extension A and Jamo need extra words)
  1809. // DW => byte length
  1810. // CW => byte length
  1811. // XW => 4 byte length (4 weights, 1 byte each) FE Special
  1812. // SW => dword length (2 words each)
  1813. //
  1814. // Note: SW must start on a WORD boundary, so XW needs to be padded
  1815. // appropriately.
  1816. //
  1817. pDW = (LPBYTE)(pUW + (WeightLen * (NUM_BYTES_UW / sizeof(WCHAR))));
  1818. pCW = (LPBYTE)(pDW + (WeightLen * NUM_BYTES_DW));
  1819. pXW = (LPBYTE)(pCW + (WeightLen * NUM_BYTES_CW));
  1820. pSW = (LPWSTR)(pXW + (WeightLen * (NUM_BYTES_XW + NUM_BYTES_PADDING)));
  1821. pPosUW = pUW;
  1822. pPosDW = pDW;
  1823. pPosCW = pCW;
  1824. pPosXW = pXW;
  1825. pPosSW = pSW;
  1826. //
  1827. // Initialize flags and loop values.
  1828. //
  1829. fStringSort = dwFlags & SORT_STRINGSORT;
  1830. fIgnoreSymbols = dwFlags & NORM_IGNORESYMBOLS;
  1831. pPos = (LPWSTR)pSrc;
  1832. PosCtr = 1;
  1833. //
  1834. // Check if given locale has compressions.
  1835. //
  1836. if (pHashN->IfCompression == FALSE)
  1837. {
  1838. //
  1839. // Go through string, code point by code point.
  1840. //
  1841. // No compressions exist in the given locale, so
  1842. // DO NOT check for them.
  1843. //
  1844. for (; PosCtr <= cchSrc; PosCtr++, pPos++)
  1845. {
  1846. //
  1847. // Get weights.
  1848. //
  1849. pWeight = &((pHashN->pSortkey)[*pPos]);
  1850. SM = GET_SCRIPT_MEMBER(pWeight);
  1851. if (SM > MAX_SPECIAL_CASE)
  1852. {
  1853. //
  1854. // No special case on character, so store the
  1855. // various weights for the character.
  1856. //
  1857. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  1858. *pPosDW = GET_DIACRITIC(pWeight);
  1859. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1860. pPosUW++;
  1861. pPosDW++;
  1862. pPosCW++;
  1863. }
  1864. else
  1865. {
  1866. SPECIAL_CASE_HANDLER( SM,
  1867. pWeight,
  1868. pHashN->pSortkey,
  1869. pTblPtrs->pExpansion,
  1870. pPosUW - pUW + 1,
  1871. fStringSort,
  1872. fIgnoreSymbols,
  1873. pPos,
  1874. (LPWSTR)pSrc,
  1875. fModify );
  1876. }
  1877. }
  1878. }
  1879. else if (pHashN->IfDblCompression == FALSE)
  1880. {
  1881. //
  1882. // Go through string, code point by code point.
  1883. //
  1884. // Compressions DO exist in the given locale, so
  1885. // check for them.
  1886. //
  1887. // No double compressions exist in the given locale,
  1888. // so DO NOT check for them.
  1889. //
  1890. for (; PosCtr <= cchSrc; PosCtr++, pPos++)
  1891. {
  1892. //
  1893. // Get weights.
  1894. //
  1895. pWeight = &((pHashN->pSortkey)[*pPos]);
  1896. SM = GET_SCRIPT_MEMBER(pWeight);
  1897. if (SM > MAX_SPECIAL_CASE)
  1898. {
  1899. //
  1900. // No special case on character, but must check for
  1901. // compression characters.
  1902. //
  1903. switch (GET_COMPRESSION(pWeight))
  1904. {
  1905. case ( COMPRESS_3_MASK ) :
  1906. {
  1907. if ((PosCtr + 2) <= cchSrc)
  1908. {
  1909. ctr = pHashN->pCompHdr->Num3;
  1910. pComp3 = pHashN->pCompress3;
  1911. for (; ctr > 0; ctr--, pComp3++)
  1912. {
  1913. if ((pComp3->UCP1 == *pPos) &&
  1914. (pComp3->UCP2 == *(pPos + 1)) &&
  1915. (pComp3->UCP3 == *(pPos + 2)))
  1916. {
  1917. pWeight = &(pComp3->Weights);
  1918. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  1919. *pPosDW = GET_DIACRITIC(pWeight);
  1920. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1921. pPosUW++;
  1922. pPosDW++;
  1923. pPosCW++;
  1924. //
  1925. // Add only two to source, since one
  1926. // will be added by "for" structure.
  1927. //
  1928. pPos += 2;
  1929. PosCtr += 2;
  1930. break;
  1931. }
  1932. }
  1933. if (ctr > 0)
  1934. {
  1935. break;
  1936. }
  1937. }
  1938. //
  1939. // Fall through if not found.
  1940. //
  1941. }
  1942. case ( COMPRESS_2_MASK ) :
  1943. {
  1944. if ((PosCtr + 1) <= cchSrc)
  1945. {
  1946. ctr = pHashN->pCompHdr->Num2;
  1947. pComp2 = pHashN->pCompress2;
  1948. for (; ctr > 0; ctr--, pComp2++)
  1949. {
  1950. if ((pComp2->UCP1 == *pPos) &&
  1951. (pComp2->UCP2 == *(pPos + 1)))
  1952. {
  1953. pWeight = &(pComp2->Weights);
  1954. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  1955. *pPosDW = GET_DIACRITIC(pWeight);
  1956. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1957. pPosUW++;
  1958. pPosDW++;
  1959. pPosCW++;
  1960. //
  1961. // Add only one to source, since one
  1962. // will be added by "for" structure.
  1963. //
  1964. pPos++;
  1965. PosCtr++;
  1966. break;
  1967. }
  1968. }
  1969. if (ctr > 0)
  1970. {
  1971. break;
  1972. }
  1973. }
  1974. //
  1975. // Fall through if not found.
  1976. //
  1977. }
  1978. default :
  1979. {
  1980. //
  1981. // No possible compression for character, so store
  1982. // the various weights for the character.
  1983. //
  1984. *pPosUW = GET_UNICODE_SM_MOD(pWeight, SM, fModify);
  1985. *pPosDW = GET_DIACRITIC(pWeight);
  1986. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1987. pPosUW++;
  1988. pPosDW++;
  1989. pPosCW++;
  1990. }
  1991. }
  1992. }
  1993. else
  1994. {
  1995. SPECIAL_CASE_HANDLER( SM,
  1996. pWeight,
  1997. pHashN->pSortkey,
  1998. pTblPtrs->pExpansion,
  1999. pPosUW - pUW + 1,
  2000. fStringSort,
  2001. fIgnoreSymbols,
  2002. pPos,
  2003. (LPWSTR)pSrc,
  2004. fModify );
  2005. }
  2006. }
  2007. }
  2008. else
  2009. {
  2010. //
  2011. // Go through string, code point by code point.
  2012. //
  2013. // Compressions DO exist in the given locale, so
  2014. // check for them.
  2015. //
  2016. // Double Compressions also exist in the given locale,
  2017. // so check for them.
  2018. //
  2019. for (; PosCtr <= cchSrc; PosCtr++, pPos++)
  2020. {
  2021. //
  2022. // Get weights.
  2023. //
  2024. pWeight = &((pHashN->pSortkey)[*pPos]);
  2025. SM = GET_SCRIPT_MEMBER(pWeight);
  2026. if (SM > MAX_SPECIAL_CASE)
  2027. {
  2028. //
  2029. // No special case on character, but must check for
  2030. // compression characters and double compression
  2031. // characters.
  2032. //
  2033. IfDblCompress =
  2034. (((PosCtr + 1) <= cchSrc) &&
  2035. ((GET_DWORD_WEIGHT(pHashN, *pPos) & CMP_MASKOFF_CW) ==
  2036. (GET_DWORD_WEIGHT(pHashN, *(pPos + 1)) & CMP_MASKOFF_CW)))
  2037. ? 1
  2038. : 0;
  2039. switch (GET_COMPRESSION(pWeight))
  2040. {
  2041. case ( COMPRESS_3_MASK ) :
  2042. {
  2043. if (IfDblCompress)
  2044. {
  2045. if ((PosCtr + 3) <= cchSrc)
  2046. {
  2047. ctr = pHashN->pCompHdr->Num3;
  2048. pComp3 = pHashN->pCompress3;
  2049. for (; ctr > 0; ctr--, pComp3++)
  2050. {
  2051. if ((pComp3->UCP1 == *(pPos + 1)) &&
  2052. (pComp3->UCP2 == *(pPos + 2)) &&
  2053. (pComp3->UCP3 == *(pPos + 3)))
  2054. {
  2055. pWeight = &(pComp3->Weights);
  2056. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  2057. *pPosDW = GET_DIACRITIC(pWeight);
  2058. *pPosCW = GET_CASE(pWeight) & CaseMask;
  2059. *(pPosUW + 1) = *pPosUW;
  2060. *(pPosDW + 1) = *pPosDW;
  2061. *(pPosCW + 1) = *pPosCW;
  2062. pPosUW += 2;
  2063. pPosDW += 2;
  2064. pPosCW += 2;
  2065. //
  2066. // Add only three to source, since one
  2067. // will be added by "for" structure.
  2068. //
  2069. pPos += 3;
  2070. PosCtr += 3;
  2071. break;
  2072. }
  2073. }
  2074. if (ctr > 0)
  2075. {
  2076. break;
  2077. }
  2078. }
  2079. }
  2080. //
  2081. // Fall through if not found.
  2082. //
  2083. if ((PosCtr + 2) <= cchSrc)
  2084. {
  2085. ctr = pHashN->pCompHdr->Num3;
  2086. pComp3 = pHashN->pCompress3;
  2087. for (; ctr > 0; ctr--, pComp3++)
  2088. {
  2089. if ((pComp3->UCP1 == *pPos) &&
  2090. (pComp3->UCP2 == *(pPos + 1)) &&
  2091. (pComp3->UCP3 == *(pPos + 2)))
  2092. {
  2093. pWeight = &(pComp3->Weights);
  2094. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  2095. *pPosDW = GET_DIACRITIC(pWeight);
  2096. *pPosCW = GET_CASE(pWeight) & CaseMask;
  2097. pPosUW++;
  2098. pPosDW++;
  2099. pPosCW++;
  2100. //
  2101. // Add only two to source, since one
  2102. // will be added by "for" structure.
  2103. //
  2104. pPos += 2;
  2105. PosCtr += 2;
  2106. break;
  2107. }
  2108. }
  2109. if (ctr > 0)
  2110. {
  2111. break;
  2112. }
  2113. }
  2114. //
  2115. // Fall through if not found.
  2116. //
  2117. }
  2118. case ( COMPRESS_2_MASK ) :
  2119. {
  2120. if (IfDblCompress)
  2121. {
  2122. if ((PosCtr + 2) <= cchSrc)
  2123. {
  2124. ctr = pHashN->pCompHdr->Num2;
  2125. pComp2 = pHashN->pCompress2;
  2126. for (; ctr > 0; ctr--, pComp2++)
  2127. {
  2128. if ((pComp2->UCP1 == *(pPos + 1)) &&
  2129. (pComp2->UCP2 == *(pPos + 2)))
  2130. {
  2131. pWeight = &(pComp2->Weights);
  2132. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  2133. *pPosDW = GET_DIACRITIC(pWeight);
  2134. *pPosCW = GET_CASE(pWeight) & CaseMask;
  2135. *(pPosUW + 1) = *pPosUW;
  2136. *(pPosDW + 1) = *pPosDW;
  2137. *(pPosCW + 1) = *pPosCW;
  2138. pPosUW += 2;
  2139. pPosDW += 2;
  2140. pPosCW += 2;
  2141. //
  2142. // Add only two to source, since one
  2143. // will be added by "for" structure.
  2144. //
  2145. pPos += 2;
  2146. PosCtr += 2;
  2147. break;
  2148. }
  2149. }
  2150. if (ctr > 0)
  2151. {
  2152. break;
  2153. }
  2154. }
  2155. }
  2156. //
  2157. // Fall through if not found.
  2158. //
  2159. if ((PosCtr + 1) <= cchSrc)
  2160. {
  2161. ctr = pHashN->pCompHdr->Num2;
  2162. pComp2 = pHashN->pCompress2;
  2163. for (; ctr > 0; ctr--, pComp2++)
  2164. {
  2165. if ((pComp2->UCP1 == *pPos) &&
  2166. (pComp2->UCP2 == *(pPos + 1)))
  2167. {
  2168. pWeight = &(pComp2->Weights);
  2169. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  2170. *pPosDW = GET_DIACRITIC(pWeight);
  2171. *pPosCW = GET_CASE(pWeight) & CaseMask;
  2172. pPosUW++;
  2173. pPosDW++;
  2174. pPosCW++;
  2175. //
  2176. // Add only one to source, since one
  2177. // will be added by "for" structure.
  2178. //
  2179. pPos++;
  2180. PosCtr++;
  2181. break;
  2182. }
  2183. }
  2184. if (ctr > 0)
  2185. {
  2186. break;
  2187. }
  2188. }
  2189. //
  2190. // Fall through if not found.
  2191. //
  2192. }
  2193. default :
  2194. {
  2195. //
  2196. // No possible compression for character, so store
  2197. // the various weights for the character.
  2198. //
  2199. *pPosUW = GET_UNICODE_SM_MOD(pWeight, SM, fModify);
  2200. *pPosDW = GET_DIACRITIC(pWeight);
  2201. *pPosCW = GET_CASE(pWeight) & CaseMask;
  2202. pPosUW++;
  2203. pPosDW++;
  2204. pPosCW++;
  2205. }
  2206. }
  2207. }
  2208. else
  2209. {
  2210. SPECIAL_CASE_HANDLER( SM,
  2211. pWeight,
  2212. pHashN->pSortkey,
  2213. pTblPtrs->pExpansion,
  2214. pPosUW - pUW + 1,
  2215. fStringSort,
  2216. fIgnoreSymbols,
  2217. pPos,
  2218. (LPWSTR)pSrc,
  2219. fModify );
  2220. }
  2221. }
  2222. }
  2223. //
  2224. // Store the final sortkey weights in the destination buffer.
  2225. //
  2226. // PosCtr will be a BYTE count.
  2227. //
  2228. PosCtr = 0;
  2229. //
  2230. // If the destination value is zero, then just return the
  2231. // length of the string that would be returned. Do NOT touch pDest.
  2232. //
  2233. if (cbDest == 0)
  2234. {
  2235. //
  2236. // Count the Unicode Weights.
  2237. //
  2238. PosCtr += (int)((LPBYTE)pPosUW - (LPBYTE)pUW);
  2239. //
  2240. // Count the Separator.
  2241. //
  2242. PosCtr++;
  2243. //
  2244. // Count the Diacritic Weights.
  2245. //
  2246. // - Eliminate minimum DW.
  2247. // - Count the number of diacritic weights.
  2248. //
  2249. if (!(dwFlags & NORM_IGNORENONSPACE))
  2250. {
  2251. pPosDW--;
  2252. if (pHashN->IfReverseDW == TRUE)
  2253. {
  2254. //
  2255. // Reverse diacritics:
  2256. // - remove diacritics from left to right.
  2257. // - count diacritics from right to left.
  2258. //
  2259. while ((pDW <= pPosDW) && (*pDW <= MIN_DW))
  2260. {
  2261. pDW++;
  2262. }
  2263. PosCtr += (int)(pPosDW - pDW + 1);
  2264. }
  2265. else
  2266. {
  2267. //
  2268. // Regular diacritics:
  2269. // - remove diacritics from right to left.
  2270. // - count diacritics from left to right.
  2271. //
  2272. while ((pPosDW >= pDW) && (*pPosDW <= MIN_DW))
  2273. {
  2274. pPosDW--;
  2275. }
  2276. PosCtr += (int)(pPosDW - pDW + 1);
  2277. }
  2278. }
  2279. //
  2280. // Count the Separator.
  2281. //
  2282. PosCtr++;
  2283. //
  2284. // Count the Case Weights.
  2285. //
  2286. // - Eliminate minimum CW.
  2287. // - Count the number of case weights.
  2288. //
  2289. if ((dwFlags & NORM_DROP_CW) != NORM_DROP_CW)
  2290. {
  2291. pPosCW--;
  2292. while ((pPosCW >= pCW) && (*pPosCW <= MIN_CW))
  2293. {
  2294. pPosCW--;
  2295. }
  2296. PosCtr += (int)(pPosCW - pCW + 1);
  2297. }
  2298. //
  2299. // Count the Separator.
  2300. //
  2301. PosCtr++;
  2302. //
  2303. // Count the Extra Weights for Far East Special.
  2304. //
  2305. // - Eliminate unnecessary XW.
  2306. // - Count the number of extra weights and separators.
  2307. //
  2308. if (pXW < pPosXW)
  2309. {
  2310. if (dwFlags & NORM_IGNORENONSPACE)
  2311. {
  2312. //
  2313. // Ignore 4W and 5W. Must count separators for
  2314. // 4W and 5W, though.
  2315. //
  2316. PosCtr += 2;
  2317. ctr = 2;
  2318. }
  2319. else
  2320. {
  2321. ctr = 0;
  2322. }
  2323. pPosXW--;
  2324. for (; ctr < NUM_BYTES_XW; ctr++)
  2325. {
  2326. pTmp = pXW + (WeightLen * ctr);
  2327. pPosTmp = pPosXW + (WeightLen * ctr);
  2328. while ((pPosTmp >= pTmp) && (*pPosTmp == pXWDrop[ctr]))
  2329. {
  2330. pPosTmp--;
  2331. }
  2332. PosCtr += (int)(pPosTmp - pTmp + 1);
  2333. //
  2334. // Count the Separator.
  2335. //
  2336. PosCtr++;
  2337. }
  2338. }
  2339. //
  2340. // Count the Separator.
  2341. //
  2342. PosCtr++;
  2343. //
  2344. // Count the Special Weights.
  2345. //
  2346. if (!fIgnoreSymbols)
  2347. {
  2348. PosCtr += (int)((LPBYTE)pPosSW - (LPBYTE)pSW);
  2349. }
  2350. //
  2351. // Count the Terminator.
  2352. //
  2353. PosCtr++;
  2354. }
  2355. else
  2356. {
  2357. //
  2358. // Store the Unicode Weights in the destination buffer.
  2359. //
  2360. // - Make sure destination buffer is large enough.
  2361. // - Copy unicode weights to destination buffer.
  2362. //
  2363. // NOTE: cbDest is the number of BYTES.
  2364. // Also, must add one to length for separator.
  2365. //
  2366. if (cbDest < (((LPBYTE)pPosUW - (LPBYTE)pUW) + 1))
  2367. {
  2368. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2369. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2370. return (0);
  2371. }
  2372. pTmp = (LPBYTE)pUW;
  2373. while (pTmp < (LPBYTE)pPosUW)
  2374. {
  2375. //
  2376. // Copy Unicode weight to destination buffer.
  2377. //
  2378. // NOTE: Unicode Weight is stored in the data file as
  2379. // Alphanumeric Weight, Script Member
  2380. // so that the WORD value will be read correctly.
  2381. //
  2382. pDest[PosCtr] = *(pTmp + 1);
  2383. pDest[PosCtr + 1] = *pTmp;
  2384. PosCtr += 2;
  2385. pTmp += 2;
  2386. }
  2387. //
  2388. // Copy Separator to destination buffer.
  2389. //
  2390. // Destination buffer is large enough to hold the separator,
  2391. // since it was checked with the Unicode weights above.
  2392. //
  2393. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2394. PosCtr++;
  2395. //
  2396. // Store the Diacritic Weights in the destination buffer.
  2397. //
  2398. // - Eliminate minimum DW.
  2399. // - Make sure destination buffer is large enough.
  2400. // - Copy diacritic weights to destination buffer.
  2401. //
  2402. if (!(dwFlags & NORM_IGNORENONSPACE))
  2403. {
  2404. pPosDW--;
  2405. if (pHashN->IfReverseDW == TRUE)
  2406. {
  2407. //
  2408. // Reverse diacritics:
  2409. // - remove diacritics from left to right.
  2410. // - store diacritics from right to left.
  2411. //
  2412. while ((pDW <= pPosDW) && (*pDW <= MIN_DW))
  2413. {
  2414. pDW++;
  2415. }
  2416. if ((cbDest - PosCtr) <= (pPosDW - pDW + 1))
  2417. {
  2418. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2419. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2420. return (0);
  2421. }
  2422. while (pPosDW >= pDW)
  2423. {
  2424. pDest[PosCtr] = *pPosDW;
  2425. PosCtr++;
  2426. pPosDW--;
  2427. }
  2428. }
  2429. else
  2430. {
  2431. //
  2432. // Regular diacritics:
  2433. // - remove diacritics from right to left.
  2434. // - store diacritics from left to right.
  2435. //
  2436. while ((pPosDW >= pDW) && (*pPosDW <= MIN_DW))
  2437. {
  2438. pPosDW--;
  2439. }
  2440. if ((cbDest - PosCtr) <= (pPosDW - pDW + 1))
  2441. {
  2442. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2443. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2444. return (0);
  2445. }
  2446. while (pDW <= pPosDW)
  2447. {
  2448. pDest[PosCtr] = *pDW;
  2449. PosCtr++;
  2450. pDW++;
  2451. }
  2452. }
  2453. }
  2454. //
  2455. // Copy Separator to destination buffer if the destination
  2456. // buffer is large enough.
  2457. //
  2458. if (PosCtr == cbDest)
  2459. {
  2460. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2461. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2462. return (0);
  2463. }
  2464. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2465. PosCtr++;
  2466. //
  2467. // Store the Case Weights in the destination buffer.
  2468. //
  2469. // - Eliminate minimum CW.
  2470. // - Make sure destination buffer is large enough.
  2471. // - Copy case weights to destination buffer.
  2472. //
  2473. if ((dwFlags & NORM_DROP_CW) != NORM_DROP_CW)
  2474. {
  2475. pPosCW--;
  2476. while ((pPosCW >= pCW) && (*pPosCW <= MIN_CW))
  2477. {
  2478. pPosCW--;
  2479. }
  2480. if ((cbDest - PosCtr) <= (pPosCW - pCW + 1))
  2481. {
  2482. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2483. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2484. return (0);
  2485. }
  2486. while (pCW <= pPosCW)
  2487. {
  2488. pDest[PosCtr] = *pCW;
  2489. PosCtr++;
  2490. pCW++;
  2491. }
  2492. }
  2493. //
  2494. // Copy Separator to destination buffer if the destination
  2495. // buffer is large enough.
  2496. //
  2497. if (PosCtr == cbDest)
  2498. {
  2499. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2500. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2501. return (0);
  2502. }
  2503. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2504. PosCtr++;
  2505. //
  2506. // Store the Extra Weights in the destination buffer for
  2507. // Far East Special.
  2508. //
  2509. // - Eliminate unnecessary XW.
  2510. // - Make sure destination buffer is large enough.
  2511. // - Copy extra weights to destination buffer.
  2512. //
  2513. if (pXW < pPosXW)
  2514. {
  2515. if (dwFlags & NORM_IGNORENONSPACE)
  2516. {
  2517. //
  2518. // Ignore 4W and 5W. Must count separators for
  2519. // 4W and 5W, though.
  2520. //
  2521. if ((cbDest - PosCtr) <= 2)
  2522. {
  2523. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2524. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2525. return (0);
  2526. }
  2527. pDest[PosCtr] = pXWSeparator[0];
  2528. pDest[PosCtr + 1] = pXWSeparator[1];
  2529. PosCtr += 2;
  2530. ctr = 2;
  2531. }
  2532. else
  2533. {
  2534. ctr = 0;
  2535. }
  2536. pPosXW--;
  2537. for (; ctr < NUM_BYTES_XW; ctr++)
  2538. {
  2539. pTmp = pXW + (WeightLen * ctr);
  2540. pPosTmp = pPosXW + (WeightLen * ctr);
  2541. while ((pPosTmp >= pTmp) && (*pPosTmp == pXWDrop[ctr]))
  2542. {
  2543. pPosTmp--;
  2544. }
  2545. if ((cbDest - PosCtr) <= (pPosTmp - pTmp + 1))
  2546. {
  2547. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2548. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2549. return (0);
  2550. }
  2551. while (pTmp <= pPosTmp)
  2552. {
  2553. pDest[PosCtr] = *pTmp;
  2554. PosCtr++;
  2555. pTmp++;
  2556. }
  2557. //
  2558. // Copy Separator to destination buffer.
  2559. //
  2560. pDest[PosCtr] = pXWSeparator[ctr];
  2561. PosCtr++;
  2562. }
  2563. }
  2564. //
  2565. // Copy Separator to destination buffer if the destination
  2566. // buffer is large enough.
  2567. //
  2568. if (PosCtr == cbDest)
  2569. {
  2570. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2571. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2572. return (0);
  2573. }
  2574. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2575. PosCtr++;
  2576. //
  2577. // Store the Special Weights in the destination buffer.
  2578. //
  2579. // - Make sure destination buffer is large enough.
  2580. // - Copy special weights to destination buffer.
  2581. //
  2582. if (!fIgnoreSymbols)
  2583. {
  2584. if ((cbDest - PosCtr) <= (((LPBYTE)pPosSW - (LPBYTE)pSW)))
  2585. {
  2586. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2587. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2588. return (0);
  2589. }
  2590. pTmp = (LPBYTE)pSW;
  2591. while (pTmp < (LPBYTE)pPosSW)
  2592. {
  2593. pDest[PosCtr] = *pTmp;
  2594. pDest[PosCtr + 1] = *(pTmp + 1);
  2595. //
  2596. // NOTE: Special Weight is stored in the data file as
  2597. // Weight, Script
  2598. // so that the WORD value will be read correctly.
  2599. //
  2600. pDest[PosCtr + 2] = *(pTmp + 3);
  2601. pDest[PosCtr + 3] = *(pTmp + 2);
  2602. PosCtr += 4;
  2603. pTmp += 4;
  2604. }
  2605. }
  2606. //
  2607. // Copy Terminator to destination buffer if the destination
  2608. // buffer is large enough.
  2609. //
  2610. if (PosCtr == cbDest)
  2611. {
  2612. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2613. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2614. return (0);
  2615. }
  2616. pDest[PosCtr] = SORTKEY_TERMINATOR;
  2617. PosCtr++;
  2618. }
  2619. //
  2620. // Free the buffer used for the weights, if one was allocated.
  2621. //
  2622. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2623. //
  2624. // Return number of BYTES written to destination buffer.
  2625. //
  2626. return (PosCtr);
  2627. }
  2628. ////////////////////////////////////////////////////////////////////////////
  2629. //
  2630. // MapNormalization
  2631. //
  2632. // Stores the result of the normalization for the given string in the
  2633. // destination buffer, and returns the number of wide characters written
  2634. // to the buffer.
  2635. //
  2636. // 11-04-92 JulieB Created.
  2637. ////////////////////////////////////////////////////////////////////////////
  2638. int MapNormalization(
  2639. PLOC_HASH pHashN,
  2640. DWORD dwFlags,
  2641. LPCWSTR pSrc,
  2642. int cchSrc,
  2643. LPWSTR pDest,
  2644. int cchDest)
  2645. {
  2646. int ctr; // source char counter
  2647. int ctr2 = 0; // destination char counter
  2648. //
  2649. // Make sure the ctype table is available in the system.
  2650. //
  2651. if (GetCTypeFileInfo())
  2652. {
  2653. SetLastError(ERROR_FILE_NOT_FOUND);
  2654. return (0);
  2655. }
  2656. //
  2657. // Make sure the sorting information is available in the system.
  2658. //
  2659. if ((pHashN->pSortkey == NULL) ||
  2660. (pHashN->IfIdeographFailure == TRUE))
  2661. {
  2662. KdPrint(("NLSAPI: Appropriate Sorting Tables Not Loaded.\n"));
  2663. SetLastError(ERROR_FILE_NOT_FOUND);
  2664. return (0);
  2665. }
  2666. //
  2667. // Normalize based on the flags.
  2668. //
  2669. switch (dwFlags)
  2670. {
  2671. case ( NORM_IGNORENONSPACE ) :
  2672. {
  2673. //
  2674. // If the destination value is zero, then only return
  2675. // the count of characters. Do NOT touch pDest.
  2676. //
  2677. if (cchDest == 0)
  2678. {
  2679. //
  2680. // Count the number of characters that would be written
  2681. // to the destination buffer.
  2682. //
  2683. for (ctr = 0, ctr2 = 0; ctr < cchSrc; ctr++)
  2684. {
  2685. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2686. {
  2687. //
  2688. // Not a nonspacing character, so just write the
  2689. // character to the destination string.
  2690. //
  2691. ctr2++;
  2692. }
  2693. else if (!(IS_NONSPACE_ONLY(pHashN->pSortkey, pSrc[ctr])))
  2694. {
  2695. //
  2696. // PreComposed Form. Write the base character only.
  2697. //
  2698. ctr2++;
  2699. }
  2700. //
  2701. // Else - nonspace character only, so don't write
  2702. // anything.
  2703. //
  2704. }
  2705. }
  2706. else
  2707. {
  2708. //
  2709. // Store the normalized string in the destination string.
  2710. //
  2711. for (ctr = 0, ctr2 = 0; (ctr < cchSrc) && (ctr2 < cchDest);
  2712. ctr++)
  2713. {
  2714. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2715. {
  2716. //
  2717. // Not a nonspacing character, so just write the
  2718. // character to the destination string.
  2719. //
  2720. pDest[ctr2] = pSrc[ctr];
  2721. ctr2++;
  2722. }
  2723. else if (!(IS_NONSPACE_ONLY(pHashN->pSortkey, pSrc[ctr])))
  2724. {
  2725. //
  2726. // PreComposed Form. Write the base character only.
  2727. //
  2728. GET_BASE_CHAR(pSrc[ctr], pDest[ctr2]);
  2729. if (pDest[ctr2] == 0)
  2730. {
  2731. //
  2732. // No translation for precomposed character,
  2733. // so must write the precomposed character.
  2734. //
  2735. pDest[ctr2] = pSrc[ctr];
  2736. }
  2737. ctr2++;
  2738. }
  2739. //
  2740. // Else - nonspace character only, so don't write
  2741. // anything.
  2742. //
  2743. }
  2744. }
  2745. break;
  2746. }
  2747. case ( NORM_IGNORESYMBOLS ) :
  2748. {
  2749. //
  2750. // If the destination value is zero, then only return
  2751. // the count of characters. Do NOT touch pDest.
  2752. //
  2753. if (cchDest == 0)
  2754. {
  2755. //
  2756. // Count the number of characters that would be written
  2757. // to the destination buffer.
  2758. //
  2759. for (ctr = 0, ctr2 = 0; ctr < cchSrc; ctr++)
  2760. {
  2761. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2762. {
  2763. //
  2764. // Not a symbol, so write the character.
  2765. //
  2766. ctr2++;
  2767. }
  2768. }
  2769. }
  2770. else
  2771. {
  2772. //
  2773. // Store the normalized string in the destination string.
  2774. //
  2775. for (ctr = 0, ctr2 = 0; (ctr < cchSrc) && (ctr2 < cchDest);
  2776. ctr++)
  2777. {
  2778. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2779. {
  2780. //
  2781. // Not a symbol, so write the character.
  2782. //
  2783. pDest[ctr2] = pSrc[ctr];
  2784. ctr2++;
  2785. }
  2786. }
  2787. }
  2788. break;
  2789. }
  2790. case ( NORM_IGNORENONSPACE | NORM_IGNORESYMBOLS ) :
  2791. {
  2792. //
  2793. // If the destination value is zero, then only return
  2794. // the count of characters. Do NOT touch pDest.
  2795. //
  2796. if (cchDest == 0)
  2797. {
  2798. //
  2799. // Count the number of characters that would be written
  2800. // to the destination buffer.
  2801. //
  2802. for (ctr = 0, ctr2 = 0; ctr < cchSrc; ctr++)
  2803. {
  2804. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2805. {
  2806. //
  2807. // Not a symbol, so check for nonspace.
  2808. //
  2809. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2810. {
  2811. //
  2812. // Not a nonspacing character, so just write the
  2813. // character to the destination string.
  2814. //
  2815. ctr2++;
  2816. }
  2817. else if (!(IS_NONSPACE_ONLY( pHashN->pSortkey,
  2818. pSrc[ctr] )))
  2819. {
  2820. //
  2821. // PreComposed Form. Write the base character
  2822. // only.
  2823. //
  2824. ctr2++;
  2825. }
  2826. //
  2827. // Else - nonspace character only, so don't write
  2828. // anything.
  2829. //
  2830. }
  2831. }
  2832. }
  2833. else
  2834. {
  2835. //
  2836. // Store the normalized string in the destination string.
  2837. //
  2838. for (ctr = 0, ctr2 = 0; (ctr < cchSrc) && (ctr2 < cchDest);
  2839. ctr++)
  2840. {
  2841. //
  2842. // Check for symbol and nonspace.
  2843. //
  2844. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2845. {
  2846. //
  2847. // Not a symbol, so check for nonspace.
  2848. //
  2849. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2850. {
  2851. //
  2852. // Not a nonspacing character, so just write the
  2853. // character to the destination string.
  2854. //
  2855. pDest[ctr2] = pSrc[ctr];
  2856. ctr2++;
  2857. }
  2858. else if (!(IS_NONSPACE_ONLY( pHashN->pSortkey,
  2859. pSrc[ctr] )))
  2860. {
  2861. //
  2862. // PreComposed Form. Write the base character
  2863. // only.
  2864. //
  2865. GET_BASE_CHAR(pSrc[ctr], pDest[ctr2]);
  2866. if (pDest[ctr2] == 0)
  2867. {
  2868. //
  2869. // No translation for precomposed character,
  2870. // so must write the precomposed character.
  2871. //
  2872. pDest[ctr2] = pSrc[ctr];
  2873. }
  2874. ctr2++;
  2875. }
  2876. //
  2877. // Else - nonspace character only, so don't write
  2878. // anything.
  2879. //
  2880. }
  2881. }
  2882. }
  2883. break;
  2884. }
  2885. }
  2886. //
  2887. // Return the number of wide characters written.
  2888. //
  2889. return (ctr2);
  2890. }
  2891. ////////////////////////////////////////////////////////////////////////////
  2892. //
  2893. // MapKanaWidth
  2894. //
  2895. // Stores the result of the Kana, Width, and/or Casing mappings for the
  2896. // given string in the destination buffer, and returns the number of wide
  2897. // characters written to the buffer.
  2898. //
  2899. // 07-26-93 JulieB Created.
  2900. ////////////////////////////////////////////////////////////////////////////
  2901. int MapKanaWidth(
  2902. PLOC_HASH pHashN,
  2903. DWORD dwFlags,
  2904. LPCWSTR pSrc,
  2905. int cchSrc,
  2906. LPWSTR pDest,
  2907. int cchDest)
  2908. {
  2909. int ctr; // loop counter
  2910. PCASE pCase; // ptr to case table (if case flag is set)
  2911. //
  2912. // See if lower or upper case flags are present.
  2913. //
  2914. if (dwFlags & LCMAP_LOWERCASE)
  2915. {
  2916. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  2917. ? pHashN->pLowerLinguist
  2918. : pHashN->pLowerCase;
  2919. }
  2920. else if (dwFlags & LCMAP_UPPERCASE)
  2921. {
  2922. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  2923. ? pHashN->pUpperLinguist
  2924. : pHashN->pUpperCase;
  2925. }
  2926. else
  2927. {
  2928. pCase = NULL;
  2929. }
  2930. //
  2931. // Remove lower, upper, and linguistic casing flags.
  2932. //
  2933. dwFlags &= ~(LCMAP_LOWERCASE | LCMAP_UPPERCASE | LCMAP_LINGUISTIC_CASING);
  2934. //
  2935. // Map the string based on the given flags.
  2936. //
  2937. switch (dwFlags)
  2938. {
  2939. case ( LCMAP_HIRAGANA ) :
  2940. case ( LCMAP_KATAKANA ) :
  2941. {
  2942. //
  2943. // If the destination value is zero, then just return the
  2944. // length of the source string. Do NOT touch pDest.
  2945. //
  2946. if (cchDest == 0)
  2947. {
  2948. return (cchSrc);
  2949. }
  2950. //
  2951. // If cchSrc is greater than cchDest, then the destination
  2952. // buffer is too small to hold the string. Return an error.
  2953. //
  2954. if (cchSrc > cchDest)
  2955. {
  2956. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2957. return (0);
  2958. }
  2959. if (dwFlags == LCMAP_HIRAGANA)
  2960. {
  2961. //
  2962. // Map all Katakana full width to Hiragana full width.
  2963. // Katakana half width will remain Katakana half width.
  2964. //
  2965. if (pCase)
  2966. {
  2967. for (ctr = 0; ctr < cchSrc; ctr++)
  2968. {
  2969. pDest[ctr] = GET_KANA(pTblPtrs->pHiragana, pSrc[ctr]);
  2970. pDest[ctr] = GET_LOWER_UPPER_CASE(pCase, pDest[ctr]);
  2971. }
  2972. }
  2973. else
  2974. {
  2975. for (ctr = 0; ctr < cchSrc; ctr++)
  2976. {
  2977. pDest[ctr] = GET_KANA(pTblPtrs->pHiragana, pSrc[ctr]);
  2978. }
  2979. }
  2980. }
  2981. else
  2982. {
  2983. //
  2984. // Map all Hiragana full width to Katakana full width.
  2985. // Hiragana half width does not exist.
  2986. //
  2987. if (pCase)
  2988. {
  2989. for (ctr = 0; ctr < cchSrc; ctr++)
  2990. {
  2991. pDest[ctr] = GET_KANA(pTblPtrs->pKatakana, pSrc[ctr]);
  2992. pDest[ctr] = GET_LOWER_UPPER_CASE(pCase, pDest[ctr]);
  2993. }
  2994. }
  2995. else
  2996. {
  2997. for (ctr = 0; ctr < cchSrc; ctr++)
  2998. {
  2999. pDest[ctr] = GET_KANA(pTblPtrs->pKatakana, pSrc[ctr]);
  3000. }
  3001. }
  3002. }
  3003. //
  3004. // Return the number of characters mapped.
  3005. //
  3006. return (cchSrc);
  3007. break;
  3008. }
  3009. case ( LCMAP_HALFWIDTH ) :
  3010. {
  3011. //
  3012. // Map all chars to half width.
  3013. //
  3014. return (MapHalfKana( pSrc,
  3015. cchSrc,
  3016. pDest,
  3017. cchDest,
  3018. NULL,
  3019. pCase ));
  3020. break;
  3021. }
  3022. case ( LCMAP_FULLWIDTH ) :
  3023. {
  3024. //
  3025. // Map all chars to full width.
  3026. //
  3027. return (MapFullKana( pSrc,
  3028. cchSrc,
  3029. pDest,
  3030. cchDest,
  3031. NULL,
  3032. pCase ));
  3033. break;
  3034. }
  3035. case ( LCMAP_HIRAGANA | LCMAP_HALFWIDTH ) :
  3036. {
  3037. //
  3038. // This combination of flags is strange, because
  3039. // Hiragana is only full width. So, the Hiragana flag
  3040. // is the most important. Full width Katakana will be
  3041. // mapped to full width Hiragana, not half width
  3042. // Katakana.
  3043. //
  3044. // Map to Hiragana, then Half Width.
  3045. //
  3046. return (MapHalfKana( pSrc,
  3047. cchSrc,
  3048. pDest,
  3049. cchDest,
  3050. pTblPtrs->pHiragana,
  3051. pCase ));
  3052. break;
  3053. }
  3054. case ( LCMAP_HIRAGANA | LCMAP_FULLWIDTH ) :
  3055. {
  3056. //
  3057. // Since Hiragana is only FULL width, the mapping to
  3058. // width must be done first to convert all half width
  3059. // Katakana to full width Katakana before trying to
  3060. // map to Hiragana.
  3061. //
  3062. // Map to Full Width, then Hiragana.
  3063. //
  3064. return (MapFullKana( pSrc,
  3065. cchSrc,
  3066. pDest,
  3067. cchDest,
  3068. pTblPtrs->pHiragana,
  3069. pCase ));
  3070. break;
  3071. }
  3072. case ( LCMAP_KATAKANA | LCMAP_HALFWIDTH ) :
  3073. {
  3074. //
  3075. // Since Hiragana is only FULL width, the mapping to
  3076. // Katakana must be done first to convert all Hiragana
  3077. // to Katakana before trying to map to half width.
  3078. //
  3079. // Map to Katakana, then Half Width.
  3080. //
  3081. return (MapHalfKana( pSrc,
  3082. cchSrc,
  3083. pDest,
  3084. cchDest,
  3085. pTblPtrs->pKatakana,
  3086. pCase ));
  3087. break;
  3088. }
  3089. case ( LCMAP_KATAKANA | LCMAP_FULLWIDTH ) :
  3090. {
  3091. //
  3092. // Since Hiragana is only FULL width, it doesn't matter
  3093. // which way the mapping is done for this combination.
  3094. //
  3095. // Map to Full Width, then Katakana.
  3096. //
  3097. return (MapFullKana( pSrc,
  3098. cchSrc,
  3099. pDest,
  3100. cchDest,
  3101. pTblPtrs->pKatakana,
  3102. pCase ));
  3103. break;
  3104. }
  3105. default :
  3106. {
  3107. //
  3108. // Return error.
  3109. //
  3110. return (0);
  3111. }
  3112. }
  3113. }
  3114. ////////////////////////////////////////////////////////////////////////////
  3115. //
  3116. // MapHalfKana
  3117. //
  3118. // Stores the result of the half width and Kana mapping for the given string
  3119. // in the destination buffer, and returns the number of wide characters
  3120. // written to the buffer.
  3121. //
  3122. // This first converts the precomposed characters to their composite forms,
  3123. // and then maps all characters to their half width forms. This handles the
  3124. // case where the full width precomposed form should map to TWO half width
  3125. // code points (composite form). The half width precomposed forms do not
  3126. // exist in Unicode.
  3127. //
  3128. // 11-04-93 JulieB Created.
  3129. ////////////////////////////////////////////////////////////////////////////
  3130. int MapHalfKana(
  3131. LPCWSTR pSrc,
  3132. int cchSrc,
  3133. LPWSTR pDest,
  3134. int cchDest,
  3135. PKANA pKana,
  3136. PCASE pCase)
  3137. {
  3138. int Count; // count of characters written
  3139. int ctr = 0; // loop counter
  3140. int ct; // loop counter
  3141. LPWSTR pBuf; // ptr to destination buffer
  3142. LPWSTR pEndBuf; // ptr to end of destination buffer
  3143. LPWSTR pPosDest; // ptr to position in destination buffer
  3144. LPWSTR *ppIncr; // points to ptr to increment
  3145. WCHAR pTmp[MAX_COMPOSITE]; // ptr to temporary buffer
  3146. LPWSTR pEndTmp; // ptr to end of temporary buffer
  3147. //
  3148. // Initialize the destination pointers.
  3149. //
  3150. pEndTmp = pTmp + MAX_COMPOSITE;
  3151. if (cchDest == 0)
  3152. {
  3153. //
  3154. // Do not touch the pDest pointer. Use the pTmp buffer and
  3155. // initialize the end pointer.
  3156. //
  3157. pBuf = pTmp;
  3158. pEndBuf = pEndTmp;
  3159. //
  3160. // This is a bogus pointer and will never be touched. It just
  3161. // increments this pointer into oblivion.
  3162. //
  3163. pDest = pBuf;
  3164. ppIncr = &pDest;
  3165. }
  3166. else
  3167. {
  3168. //
  3169. // Initialize the pointers. Use the pDest buffer.
  3170. //
  3171. pBuf = pDest;
  3172. pEndBuf = pBuf + cchDest;
  3173. ppIncr = &pBuf;
  3174. }
  3175. //
  3176. // Search through the source string. Convert all precomposed
  3177. // forms to their composite form before converting to half width.
  3178. //
  3179. while ((ctr < cchSrc) && (pBuf < pEndBuf))
  3180. {
  3181. //
  3182. // Get the character to convert. If we need to convert to
  3183. // kana, do it.
  3184. //
  3185. if (pKana)
  3186. {
  3187. *pTmp = GET_KANA(pKana, pSrc[ctr]);
  3188. }
  3189. else
  3190. {
  3191. *pTmp = pSrc[ctr];
  3192. }
  3193. //
  3194. // Convert to its composite form (if exists).
  3195. //
  3196. // NOTE: Must use the tmp buffer in case the destination buffer
  3197. // isn't large enough to hold the composite form.
  3198. //
  3199. Count = InsertCompositeForm(pTmp, pEndTmp);
  3200. //
  3201. // Convert to half width (if exists) and case (if appropriate).
  3202. //
  3203. pPosDest = pTmp;
  3204. if (pCase)
  3205. {
  3206. for (ct = Count; ct > 0; ct--)
  3207. {
  3208. *pPosDest = GET_HALF_WIDTH(pTblPtrs->pHalfWidth, *pPosDest);
  3209. *pPosDest = GET_LOWER_UPPER_CASE(pCase, *pPosDest);
  3210. pPosDest++;
  3211. }
  3212. }
  3213. else
  3214. {
  3215. for (ct = Count; ct > 0; ct--)
  3216. {
  3217. *pPosDest = GET_HALF_WIDTH(pTblPtrs->pHalfWidth, *pPosDest);
  3218. pPosDest++;
  3219. }
  3220. }
  3221. //
  3222. // Convert back to its precomposed form (if exists).
  3223. //
  3224. if (Count > 1)
  3225. {
  3226. //
  3227. // Get the precomposed form.
  3228. //
  3229. // ct is the number of code points used from the
  3230. // composite form.
  3231. //
  3232. ct = InsertPreComposedForm(pTmp, pPosDest, pBuf);
  3233. if (ct > 1)
  3234. {
  3235. //
  3236. // Precomposed form was found. Need to make sure all
  3237. // of the composite chars were used.
  3238. //
  3239. if (ct == Count)
  3240. {
  3241. //
  3242. // All composite chars were used. Increment by 1.
  3243. //
  3244. (*ppIncr)++;
  3245. }
  3246. else
  3247. {
  3248. //
  3249. // Not all composite chars were used. Need to copy
  3250. // the rest of the composite chars from the tmp buffer
  3251. // to the destination buffer.
  3252. //
  3253. (*ppIncr)++;
  3254. Count -= ct;
  3255. if (pBuf + Count > pEndBuf)
  3256. {
  3257. break;
  3258. }
  3259. RtlMoveMemory(pBuf, pTmp + ct, Count * sizeof(WCHAR));
  3260. (*ppIncr) += Count;
  3261. }
  3262. }
  3263. else
  3264. {
  3265. //
  3266. // Precomposed form was NOT found. Need to copy the
  3267. // composite form from the tmp buffer to the destination
  3268. // buffer.
  3269. //
  3270. if (pBuf + Count > pEndBuf)
  3271. {
  3272. break;
  3273. }
  3274. RtlMoveMemory(pBuf, pTmp, Count * sizeof(WCHAR));
  3275. (*ppIncr) += Count;
  3276. }
  3277. }
  3278. else
  3279. {
  3280. //
  3281. // Only one character (no composite form), so just copy it
  3282. // from the tmp buffer to the destination buffer.
  3283. //
  3284. *pBuf = *pTmp;
  3285. (*ppIncr)++;
  3286. }
  3287. ctr++;
  3288. }
  3289. //
  3290. // Return the appropriate number of characters.
  3291. //
  3292. if (cchDest == 0)
  3293. {
  3294. //
  3295. // Return the number of characters written to the buffer.
  3296. //
  3297. return ((int)((*ppIncr) - pTmp));
  3298. }
  3299. else
  3300. {
  3301. //
  3302. // Make sure the given buffer was large enough to hold the
  3303. // mapping.
  3304. //
  3305. if (ctr < cchSrc)
  3306. {
  3307. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  3308. return (0);
  3309. }
  3310. //
  3311. // Return the number of characters written to the buffer.
  3312. //
  3313. return ((int)((*ppIncr) - pDest));
  3314. }
  3315. }
  3316. ////////////////////////////////////////////////////////////////////////////
  3317. //
  3318. // MapFullKana
  3319. //
  3320. // Stores the result of the full width and Kana mapping for the given string
  3321. // in the destination buffer, and returns the number of wide characters
  3322. // written to the buffer.
  3323. //
  3324. // This first converts the characters to full width, and then maps all
  3325. // composite characters to their precomposed forms. This handles the case
  3326. // where the half width composite form (TWO code points) should map to a
  3327. // full width precomposed form (ONE full width code point). The half
  3328. // width precomposed forms do not exist in Unicode and we need the full
  3329. // width precomposed forms to round trip with the TWO half width code
  3330. // points.
  3331. //
  3332. // 11-04-93 JulieB Created.
  3333. ////////////////////////////////////////////////////////////////////////////
  3334. int MapFullKana(
  3335. LPCWSTR pSrc,
  3336. int cchSrc,
  3337. LPWSTR pDest,
  3338. int cchDest,
  3339. PKANA pKana,
  3340. PCASE pCase)
  3341. {
  3342. int Count; // count of characters
  3343. LPWSTR pPosSrc; // ptr to position in source buffer
  3344. LPWSTR pEndSrc; // ptr to end of source buffer
  3345. LPWSTR pBuf; // ptr to destination buffer
  3346. LPWSTR pEndBuf; // ptr to end of destination buffer
  3347. LPWSTR *ppIncr; // points to ptr to increment
  3348. WCHAR pTmp[MAX_COMPOSITE]; // ptr to temporary buffer
  3349. //
  3350. // Initialize source string pointers.
  3351. //
  3352. pPosSrc = (LPWSTR)pSrc;
  3353. pEndSrc = pPosSrc + cchSrc;
  3354. //
  3355. // Initialize the destination pointers.
  3356. //
  3357. if (cchDest == 0)
  3358. {
  3359. //
  3360. // Do not touch the pDest pointer. Use the pTmp buffer and
  3361. // initialize the end pointer.
  3362. //
  3363. pBuf = pTmp;
  3364. pEndBuf = pTmp + MAX_COMPOSITE;
  3365. //
  3366. // This is a bogus pointer and will never be touched. It just
  3367. // increments this pointer into oblivion.
  3368. //
  3369. pDest = pBuf;
  3370. ppIncr = &pDest;
  3371. }
  3372. else
  3373. {
  3374. //
  3375. // Initialize the pointers. Use the pDest buffer.
  3376. //
  3377. pBuf = pDest;
  3378. pEndBuf = pBuf + cchDest;
  3379. ppIncr = &pBuf;
  3380. }
  3381. //
  3382. // Search through the source string. Convert all composite
  3383. // forms to their precomposed form before converting to full width.
  3384. //
  3385. while ((pPosSrc < pEndSrc) && (pBuf < pEndBuf))
  3386. {
  3387. //
  3388. // Convert a composite form to its full width precomposed
  3389. // form (if exists). Also, convert to case if necessary.
  3390. //
  3391. Count = InsertFullWidthPreComposedForm( pPosSrc,
  3392. pEndSrc,
  3393. pBuf,
  3394. pCase );
  3395. pPosSrc += Count;
  3396. //
  3397. // Convert to kana if necessary.
  3398. //
  3399. if (pKana)
  3400. {
  3401. *pBuf = GET_KANA(pKana, *pBuf);
  3402. }
  3403. //
  3404. // Increment the destination pointer.
  3405. //
  3406. (*ppIncr)++;
  3407. }
  3408. //
  3409. // Return the appropriate number of characters.
  3410. //
  3411. if (cchDest == 0)
  3412. {
  3413. //
  3414. // Return the number of characters written to the buffer.
  3415. //
  3416. return ((int)((*ppIncr) - pTmp));
  3417. }
  3418. else
  3419. {
  3420. //
  3421. // Make sure the given buffer was large enough to hold the
  3422. // mapping.
  3423. //
  3424. if (pPosSrc < pEndSrc)
  3425. {
  3426. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  3427. return (0);
  3428. }
  3429. //
  3430. // Return the number of characters written to the buffer.
  3431. //
  3432. return ((int)((*ppIncr) - pDest));
  3433. }
  3434. }
  3435. ////////////////////////////////////////////////////////////////////////////
  3436. //
  3437. // MapTraditionalSimplified
  3438. //
  3439. // Stores the appropriate Traditional or Simplified Chinese values in the
  3440. // destination buffer, and returns the number of wide characters
  3441. // written to the buffer.
  3442. //
  3443. // 05-07-96 JulieB Created.
  3444. ////////////////////////////////////////////////////////////////////////////
  3445. int MapTraditionalSimplified(
  3446. PLOC_HASH pHashN,
  3447. DWORD dwFlags,
  3448. LPCWSTR pSrc,
  3449. int cchSrc,
  3450. LPWSTR pDest,
  3451. int cchDest,
  3452. PCHINESE pChinese)
  3453. {
  3454. int ctr; // loop counter
  3455. PCASE pCase; // ptr to case table (if case flag is set)
  3456. //
  3457. // If the destination value is zero, then just return the
  3458. // length of the source string. Do NOT touch pDest.
  3459. //
  3460. if (cchDest == 0)
  3461. {
  3462. return (cchSrc);
  3463. }
  3464. //
  3465. // If cchSrc is greater than cchDest, then the destination buffer
  3466. // is too small to hold the new string. Return an error.
  3467. //
  3468. if (cchSrc > cchDest)
  3469. {
  3470. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  3471. return (0);
  3472. }
  3473. //
  3474. // See if lower or upper case flags are present.
  3475. //
  3476. if (dwFlags & LCMAP_LOWERCASE)
  3477. {
  3478. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  3479. ? pHashN->pLowerLinguist
  3480. : pHashN->pLowerCase;
  3481. }
  3482. else if (dwFlags & LCMAP_UPPERCASE)
  3483. {
  3484. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  3485. ? pHashN->pUpperLinguist
  3486. : pHashN->pUpperCase;
  3487. }
  3488. else
  3489. {
  3490. pCase = NULL;
  3491. }
  3492. //
  3493. // Map to Traditional/Simplified and store it in the destination string.
  3494. // Also map the case, if appropriate.
  3495. //
  3496. if (pCase)
  3497. {
  3498. for (ctr = 0; ctr < cchSrc; ctr++)
  3499. {
  3500. pDest[ctr] = GET_CHINESE(pChinese, pSrc[ctr]);
  3501. pDest[ctr] = GET_LOWER_UPPER_CASE(pCase, pDest[ctr]);
  3502. }
  3503. }
  3504. else
  3505. {
  3506. for (ctr = 0; ctr < cchSrc; ctr++)
  3507. {
  3508. pDest[ctr] = GET_CHINESE(pChinese, pSrc[ctr]);
  3509. }
  3510. }
  3511. //
  3512. // Return the number of wide characters written.
  3513. //
  3514. return (ctr);
  3515. }