Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3770 lines
136 KiB

  1. /*++
  2. Copyright (c) 1991-2000, Microsoft Corporation All rights reserved.
  3. Module Name:
  4. map.c
  5. Abstract:
  6. This file contains functions that deal with map tables.
  7. APIs found in this file:
  8. FoldStringW
  9. LCMapStringW
  10. Revision History:
  11. 05-31-91 JulieB Created.
  12. --*/
  13. //
  14. // Include Files.
  15. //
  16. #include "nls.h"
  17. #include "nlssafe.h"
  18. #include "jamo.h"
  19. //
  20. // Constant Declarations.
  21. //
  22. //
  23. // Invalid weight value.
  24. //
  25. #define MAP_INVALID_UW 0xffff
  26. //
  27. // Number of bytes in each weight.
  28. //
  29. // Note: Total number of bytes is limited by MAX_WEIGHTS definition.
  30. // The padding is needed if SW is not on a WORD boundary.
  31. //
  32. #define NUM_BYTES_UW 8
  33. #define NUM_BYTES_DW 1
  34. #define NUM_BYTES_CW 1
  35. #define NUM_BYTES_XW 4
  36. #define NUM_BYTES_PADDING 0
  37. #define NUM_BYTES_SW 4
  38. //
  39. // Flags to drop the 3rd weight (CW).
  40. //
  41. #define NORM_DROP_CW (NORM_IGNORECASE | NORM_IGNOREWIDTH)
  42. //
  43. // XW Values for FE Special Weights.
  44. //
  45. BYTE pXWDrop[] = // values to drop from XW
  46. {
  47. 0xc6, // weight 4
  48. 0x03, // weight 5
  49. 0xe4, // weight 6
  50. 0xc5 // weight 7
  51. };
  52. BYTE pXWSeparator[] = // separator values for XW
  53. {
  54. 0xff, // weight 4
  55. 0x02, // weight 5
  56. 0xff, // weight 6
  57. 0xff // weight 7
  58. };
  59. //
  60. // Forward Declarations.
  61. //
  62. int
  63. FoldCZone(
  64. LPCWSTR pSrc,
  65. int cchSrc,
  66. LPWSTR pDest,
  67. int cchDest);
  68. int
  69. FoldDigits(
  70. LPCWSTR pSrc,
  71. int cchSrc,
  72. LPWSTR pDest,
  73. int cchDest);
  74. int
  75. FoldCZone_Digits(
  76. LPCWSTR pSrc,
  77. int cchSrc,
  78. LPWSTR pDest,
  79. int cchDest);
  80. int FoldLigatures(
  81. LPCWSTR pSrc,
  82. int cchSrc,
  83. LPWSTR pDest,
  84. int cchDest);
  85. int
  86. FoldPreComposed(
  87. LPCWSTR pSrc,
  88. int cchSrc,
  89. LPWSTR pDest,
  90. int cchDest);
  91. int
  92. FoldComposite(
  93. LPCWSTR pSrc,
  94. int cchSrc,
  95. LPWSTR pDest,
  96. int cchDest);
  97. int
  98. MapCase(
  99. PLOC_HASH pHashN,
  100. LPCWSTR pSrc,
  101. int cchSrc,
  102. LPWSTR pDest,
  103. int cchDest,
  104. PCASE pCaseTbl);
  105. int
  106. MapSortKey(
  107. PLOC_HASH pHashN,
  108. DWORD dwFlags,
  109. LPCWSTR pSrc,
  110. int cchSrc,
  111. LPBYTE pDest,
  112. int cchDest,
  113. BOOL fModify);
  114. int
  115. MapNormalization(
  116. PLOC_HASH pHashN,
  117. DWORD dwFlags,
  118. LPCWSTR pSrc,
  119. int cchSrc,
  120. LPWSTR pDest,
  121. int cchDest);
  122. int
  123. MapKanaWidth(
  124. PLOC_HASH pHashN,
  125. DWORD dwFlags,
  126. LPCWSTR pSrc,
  127. int cchSrc,
  128. LPWSTR pDest,
  129. int cchDest);
  130. int
  131. MapHalfKana(
  132. LPCWSTR pSrc,
  133. int cchSrc,
  134. LPWSTR pDest,
  135. int cchDest,
  136. PKANA pKana,
  137. PCASE pCase);
  138. int
  139. MapFullKana(
  140. LPCWSTR pSrc,
  141. int cchSrc,
  142. LPWSTR pDest,
  143. int cchDest,
  144. PKANA pKana,
  145. PCASE pCase);
  146. int
  147. MapTraditionalSimplified(
  148. PLOC_HASH pHashN,
  149. DWORD dwFlags,
  150. LPCWSTR pSrc,
  151. int cchSrc,
  152. LPWSTR pDest,
  153. int cchDest,
  154. PCHINESE pChinese);
  155. //-------------------------------------------------------------------------//
  156. // API ROUTINES //
  157. //-------------------------------------------------------------------------//
  158. ////////////////////////////////////////////////////////////////////////////
  159. //
  160. // FoldStringW
  161. //
  162. // Maps one wide character string to another performing the specified
  163. // translation. This mapping routine only takes flags that are locale
  164. // independent.
  165. //
  166. // 05-31-91 JulieB Created.
  167. ////////////////////////////////////////////////////////////////////////////
  168. int WINAPI FoldStringW(
  169. DWORD dwMapFlags,
  170. LPCWSTR lpSrcStr,
  171. int cchSrc,
  172. LPWSTR lpDestStr,
  173. int cchDest)
  174. {
  175. int Count = 0; // word count
  176. //
  177. // Invalid Parameter Check:
  178. // - length of src string is 0
  179. // - either buffer size is negative (except cchSrc == -1)
  180. // - src string is NULL
  181. // - length of dest string is NOT zero AND dest string is NULL
  182. // - same buffer - src = destination
  183. //
  184. // - flags are checked in switch statement below
  185. //
  186. if ((cchSrc == 0) || (cchDest < 0) ||
  187. (lpSrcStr == NULL) ||
  188. ((cchDest != 0) && (lpDestStr == NULL)) ||
  189. (lpSrcStr == lpDestStr))
  190. {
  191. SetLastError(ERROR_INVALID_PARAMETER);
  192. return (0);
  193. }
  194. //
  195. // If cchSrc is -1, then the source string is null terminated and we
  196. // need to get the length of the source string. Add one to the
  197. // length to include the null termination.
  198. // (This will always be at least 1.)
  199. //
  200. if (cchSrc <= -1)
  201. {
  202. cchSrc = NlsStrLenW(lpSrcStr) + 1;
  203. }
  204. //
  205. // Map the string based on the given flags.
  206. //
  207. switch (dwMapFlags)
  208. {
  209. case ( MAP_FOLDCZONE ) :
  210. {
  211. //
  212. // Map the string to fold the Compatibility Zone.
  213. //
  214. Count = FoldCZone( lpSrcStr,
  215. cchSrc,
  216. lpDestStr,
  217. cchDest );
  218. break;
  219. }
  220. case ( MAP_FOLDDIGITS ) :
  221. {
  222. //
  223. // Map the string to fold the Ascii Digits.
  224. //
  225. Count = FoldDigits( lpSrcStr,
  226. cchSrc,
  227. lpDestStr,
  228. cchDest );
  229. break;
  230. }
  231. case ( MAP_EXPAND_LIGATURES ) :
  232. {
  233. //
  234. // Map the string to expand all Ligatures.
  235. //
  236. Count = FoldLigatures( lpSrcStr,
  237. cchSrc,
  238. lpDestStr,
  239. cchDest );
  240. break;
  241. }
  242. case ( MAP_PRECOMPOSED ) :
  243. {
  244. //
  245. // Map the string to compress all composite forms of
  246. // characters to their precomposed form.
  247. //
  248. Count = FoldPreComposed( lpSrcStr,
  249. cchSrc,
  250. lpDestStr,
  251. cchDest );
  252. break;
  253. }
  254. case ( MAP_COMPOSITE ) :
  255. {
  256. //
  257. // Map the string to expand out all precomposed characters
  258. // to their composite form.
  259. //
  260. Count = FoldComposite( lpSrcStr,
  261. cchSrc,
  262. lpDestStr,
  263. cchDest );
  264. break;
  265. }
  266. case ( MAP_FOLDCZONE | MAP_FOLDDIGITS ) :
  267. {
  268. //
  269. // Map the string to fold the Compatibility Zone and fold the
  270. // Ascii Digits.
  271. //
  272. Count = FoldCZone_Digits( lpSrcStr,
  273. cchSrc,
  274. lpDestStr,
  275. cchDest );
  276. break;
  277. }
  278. case ( MAP_EXPAND_LIGATURES | MAP_FOLDCZONE ) :
  279. {
  280. //
  281. // Map the string to expand the ligatures and fold the
  282. // Compatibility Zone.
  283. //
  284. Count = FoldLigatures( lpSrcStr,
  285. cchSrc,
  286. lpDestStr,
  287. cchDest );
  288. Count = FoldCZone( lpDestStr,
  289. Count,
  290. lpDestStr,
  291. cchDest );
  292. break;
  293. }
  294. case ( MAP_EXPAND_LIGATURES | MAP_FOLDDIGITS ) :
  295. {
  296. //
  297. // Map the string to expand the ligatures and fold the
  298. // Ascii Digits.
  299. //
  300. Count = FoldLigatures( lpSrcStr,
  301. cchSrc,
  302. lpDestStr,
  303. cchDest );
  304. Count = FoldDigits( lpDestStr,
  305. Count,
  306. lpDestStr,
  307. cchDest );
  308. break;
  309. }
  310. case ( MAP_EXPAND_LIGATURES | MAP_FOLDCZONE | MAP_FOLDDIGITS ) :
  311. {
  312. //
  313. // Map the string to expand the ligatures, fold the
  314. // Compatibility Zone and fold the Ascii Digits.
  315. //
  316. Count = FoldLigatures( lpSrcStr,
  317. cchSrc,
  318. lpDestStr,
  319. cchDest );
  320. Count = FoldCZone_Digits( lpDestStr,
  321. Count,
  322. lpDestStr,
  323. cchDest );
  324. break;
  325. }
  326. case ( MAP_PRECOMPOSED | MAP_FOLDCZONE ) :
  327. {
  328. //
  329. // Map the string to convert to precomposed forms and to
  330. // fold the Compatibility Zone.
  331. //
  332. Count = FoldPreComposed( lpSrcStr,
  333. cchSrc,
  334. lpDestStr,
  335. cchDest );
  336. Count = FoldCZone( lpDestStr,
  337. Count,
  338. lpDestStr,
  339. cchDest );
  340. break;
  341. }
  342. case ( MAP_PRECOMPOSED | MAP_FOLDDIGITS ) :
  343. {
  344. //
  345. // Map the string to convert to precomposed forms and to
  346. // fold the Ascii Digits.
  347. //
  348. Count = FoldPreComposed( lpSrcStr,
  349. cchSrc,
  350. lpDestStr,
  351. cchDest );
  352. Count = FoldDigits( lpDestStr,
  353. Count,
  354. lpDestStr,
  355. cchDest );
  356. break;
  357. }
  358. case ( MAP_PRECOMPOSED | MAP_FOLDCZONE | MAP_FOLDDIGITS ) :
  359. {
  360. //
  361. // Map the string to convert to precomposed forms,
  362. // fold the Compatibility Zone, and fold the Ascii Digits.
  363. //
  364. Count = FoldPreComposed( lpSrcStr,
  365. cchSrc,
  366. lpDestStr,
  367. cchDest );
  368. Count = FoldCZone_Digits( lpDestStr,
  369. Count,
  370. lpDestStr,
  371. cchDest );
  372. break;
  373. }
  374. case ( MAP_COMPOSITE | MAP_FOLDCZONE ) :
  375. {
  376. //
  377. // Map the string to convert to composite forms and to
  378. // fold the Compatibility Zone.
  379. //
  380. Count = FoldComposite( lpSrcStr,
  381. cchSrc,
  382. lpDestStr,
  383. cchDest );
  384. Count = FoldCZone( lpDestStr,
  385. Count,
  386. lpDestStr,
  387. cchDest );
  388. break;
  389. }
  390. case ( MAP_COMPOSITE | MAP_FOLDDIGITS ) :
  391. {
  392. //
  393. // Map the string to convert to composite forms and to
  394. // fold the Ascii Digits.
  395. //
  396. Count = FoldComposite( lpSrcStr,
  397. cchSrc,
  398. lpDestStr,
  399. cchDest );
  400. Count = FoldDigits( lpDestStr,
  401. Count,
  402. lpDestStr,
  403. cchDest );
  404. break;
  405. }
  406. case ( MAP_COMPOSITE | MAP_FOLDCZONE | MAP_FOLDDIGITS ) :
  407. {
  408. //
  409. // Map the string to convert to composite forms,
  410. // fold the Compatibility Zone, and fold the Ascii Digits.
  411. //
  412. Count = FoldComposite( lpSrcStr,
  413. cchSrc,
  414. lpDestStr,
  415. cchDest );
  416. Count = FoldCZone_Digits( lpDestStr,
  417. Count,
  418. lpDestStr,
  419. cchDest );
  420. break;
  421. }
  422. default :
  423. {
  424. SetLastError(ERROR_INVALID_FLAGS);
  425. return (0);
  426. }
  427. }
  428. //
  429. // Return the number of characters written to the buffer.
  430. // Or, if cchDest == 0, then return the number of characters
  431. // that would have been written to the buffer.
  432. //
  433. return (Count);
  434. }
  435. ////////////////////////////////////////////////////////////////////////////
  436. //
  437. // LCMapStringW
  438. //
  439. // Maps one wide character string to another performing the specified
  440. // translation. This mapping routine only takes flags that are locale
  441. // dependent.
  442. //
  443. // 05-31-91 JulieB Created.
  444. // 07-26-93 JulieB Added new flags for NT-J.
  445. ////////////////////////////////////////////////////////////////////////////
  446. int WINAPI LCMapStringW(
  447. LCID Locale,
  448. DWORD dwMapFlags,
  449. LPCWSTR lpSrcStr,
  450. int cchSrc,
  451. LPWSTR lpDestStr,
  452. int cchDest)
  453. {
  454. PLOC_HASH pHashN; // ptr to LOC hash node
  455. int Count = 0; // word count or byte count
  456. int ctr; // loop counter
  457. //
  458. // Invalid Parameter Check:
  459. // - validate LCID
  460. // - length of src string is 0
  461. // - destination buffer size is negative
  462. // - src string is NULL
  463. // - length of dest string is NOT zero AND dest string is NULL
  464. // - same buffer - src = destination
  465. // if not UPPER or LOWER or
  466. // UPPER or LOWER used with Japanese flags
  467. //
  468. VALIDATE_LANGUAGE(Locale, pHashN, dwMapFlags & LCMAP_LINGUISTIC_CASING, TRUE);
  469. if ( (pHashN == NULL) ||
  470. (cchSrc == 0) || (cchDest < 0) || (lpSrcStr == NULL) ||
  471. ((cchDest != 0) && (lpDestStr == NULL)) ||
  472. ((lpSrcStr == lpDestStr) &&
  473. ((!(dwMapFlags & (LCMAP_UPPERCASE | LCMAP_LOWERCASE))) ||
  474. (dwMapFlags & (LCMAP_HIRAGANA | LCMAP_KATAKANA |
  475. LCMAP_HALFWIDTH | LCMAP_FULLWIDTH)))) )
  476. {
  477. SetLastError(ERROR_INVALID_PARAMETER);
  478. return (0);
  479. }
  480. //
  481. // Invalid Flags Check:
  482. // - flags other than valid ones or 0
  483. // - (any NORM_ flag) AND (any LCMAP_ flag except byterev and sortkey)
  484. // - (NORM_ flags for sortkey) AND (NOT LCMAP_SORTKEY)
  485. // - more than one of lower, upper, sortkey
  486. // - more than one of hiragana, katakana, sortkey
  487. // - more than one of half width, full width, sortkey
  488. // - more than one of traditional, simplified, sortkey
  489. // - (LINGUISTIC flag) AND (NOT LCMAP_UPPER OR LCMAP_LOWER)
  490. //
  491. dwMapFlags &= (~LOCALE_USE_CP_ACP);
  492. if ( (dwMapFlags & LCMS_INVALID_FLAG) || (dwMapFlags == 0) ||
  493. ((dwMapFlags & (NORM_ALL | SORT_STRINGSORT)) &&
  494. (dwMapFlags & LCMAP_NO_NORM)) ||
  495. ((dwMapFlags & NORM_SORTKEY_ONLY) &&
  496. (!(dwMapFlags & LCMAP_SORTKEY))) ||
  497. (MORE_THAN_ONE(dwMapFlags, LCMS1_SINGLE_FLAG)) ||
  498. (MORE_THAN_ONE(dwMapFlags, LCMS2_SINGLE_FLAG)) ||
  499. (MORE_THAN_ONE(dwMapFlags, LCMS3_SINGLE_FLAG)) ||
  500. (MORE_THAN_ONE(dwMapFlags, LCMS4_SINGLE_FLAG)) ||
  501. ((dwMapFlags & LCMAP_LINGUISTIC_CASING) &&
  502. (!(dwMapFlags & (LCMAP_UPPERCASE | LCMAP_LOWERCASE)))) )
  503. {
  504. SetLastError(ERROR_INVALID_FLAGS);
  505. return (0);
  506. }
  507. //
  508. // If cchSrc is -1, then the source string is null terminated and we
  509. // need to get the length of the source string. Add one to the
  510. // length to include the null termination.
  511. // (This will always be at least 1.)
  512. //
  513. if (cchSrc <= -1)
  514. {
  515. cchSrc = NlsStrLenW(lpSrcStr) + 1;
  516. }
  517. //
  518. // Map the string based on the given flags.
  519. //
  520. if (dwMapFlags & LCMAP_SORTKEY)
  521. {
  522. //
  523. // Map the string to its sortkey.
  524. //
  525. // NOTE: This returns the number of BYTES, instead of the
  526. // number of wide characters (words).
  527. //
  528. Count = MapSortKey( pHashN,
  529. dwMapFlags,
  530. lpSrcStr,
  531. cchSrc,
  532. (LPBYTE)lpDestStr,
  533. cchDest,
  534. IS_KOREAN(Locale) );
  535. }
  536. else
  537. {
  538. switch (dwMapFlags & ~(LCMAP_BYTEREV | LCMAP_LINGUISTIC_CASING))
  539. {
  540. case ( LCMAP_LOWERCASE ) :
  541. {
  542. //
  543. // Map the string to Lower Case.
  544. //
  545. Count = MapCase( pHashN,
  546. lpSrcStr,
  547. cchSrc,
  548. lpDestStr,
  549. cchDest,
  550. (dwMapFlags & LCMAP_LINGUISTIC_CASING)
  551. ? pHashN->pLowerLinguist
  552. : pHashN->pLowerCase );
  553. break;
  554. }
  555. case ( LCMAP_UPPERCASE ) :
  556. {
  557. //
  558. // Map the string to Upper Case.
  559. //
  560. Count = MapCase( pHashN,
  561. lpSrcStr,
  562. cchSrc,
  563. lpDestStr,
  564. cchDest,
  565. (dwMapFlags & LCMAP_LINGUISTIC_CASING)
  566. ? pHashN->pUpperLinguist
  567. : pHashN->pUpperCase );
  568. break;
  569. }
  570. case ( NORM_IGNORENONSPACE ) :
  571. case ( NORM_IGNORESYMBOLS ) :
  572. case ( NORM_IGNORENONSPACE | NORM_IGNORESYMBOLS ) :
  573. {
  574. //
  575. // Map the string to strip out nonspace marks and/or symbols.
  576. //
  577. Count = MapNormalization( pHashN,
  578. dwMapFlags & ~LCMAP_BYTEREV,
  579. lpSrcStr,
  580. cchSrc,
  581. lpDestStr,
  582. cchDest );
  583. break;
  584. }
  585. case ( LCMAP_TRADITIONAL_CHINESE ) :
  586. case ( LCMAP_TRADITIONAL_CHINESE | LCMAP_LOWERCASE ) :
  587. case ( LCMAP_TRADITIONAL_CHINESE | LCMAP_UPPERCASE) :
  588. {
  589. //
  590. // Map the string to Traditional Chinese.
  591. //
  592. Count = MapTraditionalSimplified( pHashN,
  593. dwMapFlags & ~LCMAP_BYTEREV,
  594. lpSrcStr,
  595. cchSrc,
  596. lpDestStr,
  597. cchDest,
  598. pTblPtrs->pTraditional );
  599. break;
  600. }
  601. case ( LCMAP_SIMPLIFIED_CHINESE ) :
  602. case ( LCMAP_SIMPLIFIED_CHINESE | LCMAP_LOWERCASE ) :
  603. case ( LCMAP_SIMPLIFIED_CHINESE | LCMAP_UPPERCASE ) :
  604. {
  605. //
  606. // Map the string to Simplified Chinese.
  607. //
  608. Count = MapTraditionalSimplified( pHashN,
  609. dwMapFlags & ~LCMAP_BYTEREV,
  610. lpSrcStr,
  611. cchSrc,
  612. lpDestStr,
  613. cchDest,
  614. pTblPtrs->pSimplified );
  615. break;
  616. }
  617. default :
  618. {
  619. //
  620. // Make sure the Chinese flags are not used with the
  621. // Japanese flags.
  622. //
  623. if (dwMapFlags &
  624. (LCMAP_TRADITIONAL_CHINESE | LCMAP_SIMPLIFIED_CHINESE))
  625. {
  626. SetLastError(ERROR_INVALID_FLAGS);
  627. return (0);
  628. }
  629. //
  630. // The only flags not yet handled are the variations
  631. // containing the Kana and/or Width flags.
  632. // This handles all variations for:
  633. // LCMAP_HIRAGANA
  634. // LCMAP_KATAKANA
  635. // LCMAP_HALFWIDTH
  636. // LCMAP_FULLWIDTH
  637. //
  638. // Allow LCMAP_LOWERCASE and LCMAP_UPPERCASE
  639. // in combination with the kana and width flags.
  640. //
  641. Count = MapKanaWidth( pHashN,
  642. dwMapFlags & ~LCMAP_BYTEREV,
  643. lpSrcStr,
  644. cchSrc,
  645. lpDestStr,
  646. cchDest );
  647. break;
  648. }
  649. }
  650. }
  651. //
  652. // Always check LCMAP_BYTEREV last and do it in place.
  653. // LCMAP_BYTEREV may be used in combination with any other flag
  654. // (except ignore case without sortkey) or by itself.
  655. //
  656. if (dwMapFlags & LCMAP_BYTEREV)
  657. {
  658. //
  659. // Reverse the bytes of each word in the string.
  660. //
  661. if (dwMapFlags == LCMAP_BYTEREV)
  662. {
  663. //
  664. // Byte Reversal flag is used by itself.
  665. //
  666. // Make sure that the size of the destination buffer is
  667. // larger than zero. If it is zero, return the size of
  668. // the source string only. Do NOT touch lpDestStr.
  669. //
  670. if (cchDest != 0)
  671. {
  672. //
  673. // Flag is used by itself. Reverse the bytes from
  674. // the source string and store them in the destination
  675. // string.
  676. //
  677. if (cchSrc > cchDest)
  678. {
  679. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  680. return (0);
  681. }
  682. for (ctr = 0; ctr < cchSrc; ctr++)
  683. {
  684. lpDestStr[ctr] = MAKEWORD( HIBYTE(lpSrcStr[ctr]),
  685. LOBYTE(lpSrcStr[ctr]) );
  686. }
  687. }
  688. //
  689. // Return the size of the source string.
  690. //
  691. Count = cchSrc;
  692. }
  693. else
  694. {
  695. //
  696. // Make sure that the size of the destination buffer is
  697. // larger than zero. If it is zero, return the count and
  698. // do NOT touch lpDestStr.
  699. //
  700. if (cchDest != 0)
  701. {
  702. //
  703. // Check for sortkey flag.
  704. //
  705. if (dwMapFlags & LCMAP_SORTKEY)
  706. {
  707. //
  708. // Sortkey flag is also set, so 'Count' contains the
  709. // number of BYTES instead of the number of words.
  710. //
  711. // Reverse the bytes in place in the destination string.
  712. // No need to check the size of the destination buffer
  713. // here - it's been done elsewhere.
  714. //
  715. for (ctr = 0; ctr < Count / 2; ctr++)
  716. {
  717. lpDestStr[ctr] = MAKEWORD( HIBYTE(lpDestStr[ctr]),
  718. LOBYTE(lpDestStr[ctr]) );
  719. }
  720. }
  721. else
  722. {
  723. //
  724. // Flag is used in combination with another flag.
  725. // Reverse the bytes in place in the destination string.
  726. // No need to check the size of the destination buffer
  727. // here - it's been done elsewhere.
  728. //
  729. for (ctr = 0; ctr < Count; ctr++)
  730. {
  731. lpDestStr[ctr] = MAKEWORD( HIBYTE(lpDestStr[ctr]),
  732. LOBYTE(lpDestStr[ctr]) );
  733. }
  734. }
  735. }
  736. }
  737. }
  738. //
  739. // Return the number of characters (or number of bytes for sortkey)
  740. // written to the buffer.
  741. //
  742. return (Count);
  743. }
  744. //-------------------------------------------------------------------------//
  745. // INTERNAL ROUTINES //
  746. //-------------------------------------------------------------------------//
  747. ////////////////////////////////////////////////////////////////////////////
  748. //
  749. // FoldCZone
  750. //
  751. // Stores the compatibility zone values for the given string in the
  752. // destination buffer, and returns the number of wide characters
  753. // written to the buffer.
  754. //
  755. // 02-01-93 JulieB Created.
  756. ////////////////////////////////////////////////////////////////////////////
  757. int FoldCZone(
  758. LPCWSTR pSrc,
  759. int cchSrc,
  760. LPWSTR pDest,
  761. int cchDest)
  762. {
  763. int ctr; // loop counter
  764. //
  765. // If the destination value is zero, then just return the
  766. // length of the source string. Do NOT touch pDest.
  767. //
  768. if (cchDest == 0)
  769. {
  770. return (cchSrc);
  771. }
  772. //
  773. // If cchSrc is greater than cchDest, then the destination buffer
  774. // is too small to hold the new string. Return an error.
  775. //
  776. if (cchSrc > cchDest)
  777. {
  778. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  779. return (0);
  780. }
  781. //
  782. // Fold the Compatibility Zone and store it in the destination string.
  783. //
  784. for (ctr = 0; ctr < cchSrc; ctr++)
  785. {
  786. pDest[ctr] = GET_FOLD_CZONE(pTblPtrs->pCZone, pSrc[ctr]);
  787. }
  788. //
  789. // Return the number of wide characters written.
  790. //
  791. return (ctr);
  792. }
  793. ////////////////////////////////////////////////////////////////////////////
  794. //
  795. // FoldDigits
  796. //
  797. // Stores the ascii digits values for the given string in the
  798. // destination buffer, and returns the number of wide characters
  799. // written to the buffer.
  800. //
  801. // 02-01-93 JulieB Created.
  802. ////////////////////////////////////////////////////////////////////////////
  803. int FoldDigits(
  804. LPCWSTR pSrc,
  805. int cchSrc,
  806. LPWSTR pDest,
  807. int cchDest)
  808. {
  809. int ctr; // loop counter
  810. //
  811. // If the destination value is zero, then just return the
  812. // length of the source string. Do NOT touch pDest.
  813. //
  814. if (cchDest == 0)
  815. {
  816. return (cchSrc);
  817. }
  818. //
  819. // If cchSrc is greater than cchDest, then the destination buffer
  820. // is too small to hold the new string. Return an error.
  821. //
  822. if (cchSrc > cchDest)
  823. {
  824. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  825. return (0);
  826. }
  827. //
  828. // Fold the Ascii Digits and store it in the destination string.
  829. //
  830. for (ctr = 0; ctr < cchSrc; ctr++)
  831. {
  832. pDest[ctr] = GET_ASCII_DIGITS(pTblPtrs->pADigit, pSrc[ctr]);
  833. }
  834. //
  835. // Return the number of wide characters written.
  836. //
  837. return (ctr);
  838. }
  839. ////////////////////////////////////////////////////////////////////////////
  840. //
  841. // FoldCZone_Digits
  842. //
  843. // Stores the compatibility zone and ascii digits values for the given
  844. // string in the destination buffer, and returns the number of wide
  845. // characters written to the buffer.
  846. //
  847. // 02-01-93 JulieB Created.
  848. ////////////////////////////////////////////////////////////////////////////
  849. int FoldCZone_Digits(
  850. LPCWSTR pSrc,
  851. int cchSrc,
  852. LPWSTR pDest,
  853. int cchDest)
  854. {
  855. int ctr; // loop counter
  856. //
  857. // If the destination value is zero, then just return the
  858. // length of the source string. Do NOT touch pDest.
  859. //
  860. if (cchDest == 0)
  861. {
  862. return (cchSrc);
  863. }
  864. //
  865. // If cchSrc is greater than cchDest, then the destination buffer
  866. // is too small to hold the new string. Return an error.
  867. //
  868. if (cchSrc > cchDest)
  869. {
  870. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  871. return (0);
  872. }
  873. //
  874. // Fold the compatibility zone and the ascii digits values and store
  875. // it in the destination string.
  876. //
  877. for (ctr = 0; ctr < cchSrc; ctr++)
  878. {
  879. pDest[ctr] = GET_FOLD_CZONE(pTblPtrs->pCZone, pSrc[ctr]);
  880. pDest[ctr] = GET_ASCII_DIGITS(pTblPtrs->pADigit, pDest[ctr]);
  881. }
  882. //
  883. // Return the number of wide characters written.
  884. //
  885. return (ctr);
  886. }
  887. ////////////////////////////////////////////////////////////////////////////
  888. //
  889. // FoldLigatures
  890. //
  891. // Stores the expanded ligature values for the given string in the
  892. // destination buffer, and returns the number of wide characters
  893. // written to the buffer.
  894. //
  895. // 10-15-96 JulieB Created.
  896. ////////////////////////////////////////////////////////////////////////////
  897. int FoldLigatures(
  898. LPCWSTR pSrc,
  899. int cchSrc,
  900. LPWSTR pDest,
  901. int cchDest)
  902. {
  903. int ctr = 0; // source char counter
  904. int ctr2 = 0; // destination char counter
  905. DWORD Weight; // sort weight - used for expansions
  906. //
  907. // Make sure the default sorting table is available. If not,
  908. // return an error.
  909. //
  910. if (pTblPtrs->pDefaultSortkey == NULL)
  911. {
  912. KdPrint(("NLSAPI: No Default Sorting Table Loaded.\n"));
  913. SetLastError(ERROR_FILE_NOT_FOUND);
  914. return (0);
  915. }
  916. //
  917. // If the destination value is zero, then just return the
  918. // length of the string that would be returned. Do NOT touch pDest.
  919. //
  920. if (cchDest == 0)
  921. {
  922. //
  923. // Convert the source string to expand all ligatures and calculate
  924. // the number of characters that would have been written to a
  925. // destination buffer.
  926. //
  927. while (ctr < cchSrc)
  928. {
  929. Weight = MAKE_SORTKEY_DWORD((pTblPtrs->pDefaultSortkey)[pSrc[ctr]]);
  930. if (GET_SCRIPT_MEMBER(&Weight) == EXPANSION)
  931. {
  932. do
  933. {
  934. ctr2++;
  935. Weight = MAKE_SORTKEY_DWORD(
  936. (pTblPtrs->pDefaultSortkey)[GET_EXPANSION_2(&Weight)]);
  937. } while (GET_SCRIPT_MEMBER(&Weight) == EXPANSION);
  938. ctr2++;
  939. }
  940. else
  941. {
  942. ctr2++;
  943. }
  944. ctr++;
  945. }
  946. }
  947. else
  948. {
  949. //
  950. // Convert the source string to expand all ligatures and store
  951. // the result in the destination buffer.
  952. //
  953. while ((ctr < cchSrc) && (ctr2 < cchDest))
  954. {
  955. Weight = MAKE_SORTKEY_DWORD((pTblPtrs->pDefaultSortkey)[pSrc[ctr]]);
  956. if (GET_SCRIPT_MEMBER(&Weight) == EXPANSION)
  957. {
  958. do
  959. {
  960. if ((ctr2 + 1) < cchDest)
  961. {
  962. pDest[ctr2] = GET_EXPANSION_1(&Weight);
  963. pDest[ctr2 + 1] = GET_EXPANSION_2(&Weight);
  964. ctr2++;
  965. }
  966. else
  967. {
  968. ctr2++;
  969. break;
  970. }
  971. Weight = MAKE_SORTKEY_DWORD(
  972. (pTblPtrs->pDefaultSortkey)[pDest[ctr2]]);
  973. } while (GET_SCRIPT_MEMBER(&Weight) == EXPANSION);
  974. if (ctr2 >= cchDest)
  975. {
  976. break;
  977. }
  978. ctr2++;
  979. }
  980. else
  981. {
  982. pDest[ctr2] = pSrc[ctr];
  983. ctr2++;
  984. }
  985. ctr++;
  986. }
  987. }
  988. //
  989. // Make sure destination buffer was large enough.
  990. //
  991. if (ctr < cchSrc)
  992. {
  993. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  994. return (0);
  995. }
  996. //
  997. // Return the number of wide characters written.
  998. //
  999. return (ctr2);
  1000. }
  1001. ////////////////////////////////////////////////////////////////////////////
  1002. //
  1003. // FoldPreComposed
  1004. //
  1005. // Stores the precomposed values for the given string in the
  1006. // destination buffer, and returns the number of wide characters
  1007. // written to the buffer.
  1008. //
  1009. // 02-01-93 JulieB Created.
  1010. ////////////////////////////////////////////////////////////////////////////
  1011. int FoldPreComposed(
  1012. LPCWSTR pSrc,
  1013. int cchSrc,
  1014. LPWSTR pDest,
  1015. int cchDest)
  1016. {
  1017. int ctr = 0; // source char counter
  1018. int ctr2 = 0; // destination char counter
  1019. WCHAR wch = 0; // wchar holder
  1020. //
  1021. // Make sure the default sorting table is available. If not,
  1022. // return an error.
  1023. //
  1024. if (pTblPtrs->pDefaultSortkey == NULL)
  1025. {
  1026. KdPrint(("NLSAPI: No Default Sorting Table Loaded.\n"));
  1027. SetLastError(ERROR_FILE_NOT_FOUND);
  1028. return (0);
  1029. }
  1030. //
  1031. // If the destination value is zero, then just return the
  1032. // length of the string that would be returned. Do NOT touch pDest.
  1033. //
  1034. if (cchDest == 0)
  1035. {
  1036. //
  1037. // Convert the source string to precomposed and calculate the
  1038. // number of characters that would have been written to a
  1039. // destination buffer.
  1040. //
  1041. while (ctr < cchSrc)
  1042. {
  1043. if ((ctr2 != 0) &&
  1044. (IS_NONSPACE_ONLY(pTblPtrs->pDefaultSortkey, pSrc[ctr])))
  1045. {
  1046. //
  1047. // Composite form. Write the precomposed form.
  1048. //
  1049. // If the precomposed character is written to the buffer,
  1050. // do NOT increment the destination pointer or the
  1051. // character count (the precomposed character was
  1052. // written over the previous character).
  1053. //
  1054. if (wch)
  1055. {
  1056. if ((wch = GetPreComposedChar(pSrc[ctr], wch)) == 0)
  1057. {
  1058. //
  1059. // No translation for composite form, so just
  1060. // increment the destination counter.
  1061. //
  1062. ctr2++;
  1063. }
  1064. }
  1065. else
  1066. {
  1067. if ((wch = GetPreComposedChar( pSrc[ctr],
  1068. pSrc[ctr - 1] )) == 0)
  1069. {
  1070. //
  1071. // No translation for composite form, so just
  1072. // increment the destination counter.
  1073. //
  1074. ctr2++;
  1075. }
  1076. }
  1077. }
  1078. else
  1079. {
  1080. //
  1081. // Not part of a composite character, so just
  1082. // increment the destination counter.
  1083. //
  1084. wch = 0;
  1085. ctr2++;
  1086. }
  1087. ctr++;
  1088. }
  1089. }
  1090. else
  1091. {
  1092. //
  1093. // Convert the source string to precomposed and store it in the
  1094. // destination string.
  1095. //
  1096. while ((ctr < cchSrc) && (ctr2 < cchDest))
  1097. {
  1098. if ((ctr2 != 0) &&
  1099. (IS_NONSPACE_ONLY(pTblPtrs->pDefaultSortkey, pSrc[ctr])))
  1100. {
  1101. //
  1102. // Composite form. Write the precomposed form.
  1103. //
  1104. // If the precomposed character is written to the buffer,
  1105. // do NOT increment the destination pointer or the
  1106. // character count (the precomposed character was
  1107. // written over the previous character).
  1108. //
  1109. wch = pDest[ctr2 - 1];
  1110. if ((pDest[ctr2 - 1] =
  1111. GetPreComposedChar( pSrc[ctr],
  1112. pDest[ctr2 - 1] )) == 0)
  1113. {
  1114. //
  1115. // No translation for composite form, so must
  1116. // rewrite the base character and write the
  1117. // composite character.
  1118. //
  1119. pDest[ctr2 - 1] = wch;
  1120. pDest[ctr2] = pSrc[ctr];
  1121. ctr2++;
  1122. }
  1123. }
  1124. else
  1125. {
  1126. //
  1127. // Not part of a composite character, so just write
  1128. // the character to the destination string.
  1129. //
  1130. pDest[ctr2] = pSrc[ctr];
  1131. ctr2++;
  1132. }
  1133. ctr++;
  1134. }
  1135. }
  1136. //
  1137. // Make sure destination buffer was large enough.
  1138. //
  1139. if (ctr < cchSrc)
  1140. {
  1141. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1142. return (0);
  1143. }
  1144. //
  1145. // Return the number of wide characters written.
  1146. //
  1147. return (ctr2);
  1148. }
  1149. ////////////////////////////////////////////////////////////////////////////
  1150. //
  1151. // FoldComposite
  1152. //
  1153. // Stores the composite values for the given string in the
  1154. // destination buffer, and returns the number of wide characters
  1155. // written to the buffer.
  1156. //
  1157. // 02-01-93 JulieB Created.
  1158. ////////////////////////////////////////////////////////////////////////////
  1159. int FoldComposite(
  1160. LPCWSTR pSrc,
  1161. int cchSrc,
  1162. LPWSTR pDest,
  1163. int cchDest)
  1164. {
  1165. int ctr = 0; // source char counter
  1166. int ctr2 = 0; // destination char counter
  1167. LPWSTR pEndDest; // ptr to end of destination string
  1168. WCHAR pTmp[MAX_COMPOSITE]; // tmp buffer for composite chars
  1169. //
  1170. // If the destination value is zero, then just return the
  1171. // length of the string that would be returned. Do NOT touch pDest.
  1172. //
  1173. if (cchDest == 0)
  1174. {
  1175. //
  1176. // Get the end of the tmp buffer.
  1177. //
  1178. pEndDest = (LPWSTR)pTmp + MAX_COMPOSITE;
  1179. //
  1180. // Convert the source string to precomposed and calculate the
  1181. // number of characters that would have been written to a
  1182. // destination buffer.
  1183. //
  1184. while (ctr < cchSrc)
  1185. {
  1186. //
  1187. // Write the character to the destination string.
  1188. //
  1189. *pTmp = pSrc[ctr];
  1190. //
  1191. // See if it needs to be expanded to its composite form.
  1192. //
  1193. // If no composite form is found, the routine returns 1 for
  1194. // the base character. Simply increment by the return value.
  1195. //
  1196. ctr2 += InsertCompositeForm(pTmp, pEndDest);
  1197. //
  1198. // Increment the source string counter.
  1199. //
  1200. ctr++;
  1201. }
  1202. }
  1203. else
  1204. {
  1205. //
  1206. // Get the end of the destination string.
  1207. //
  1208. pEndDest = (LPWSTR)pDest + cchDest;
  1209. //
  1210. // Convert the source string to precomposed and store it in the
  1211. // destination string.
  1212. //
  1213. while ((ctr < cchSrc) && (ctr2 < cchDest))
  1214. {
  1215. //
  1216. // Write the character to the destination string.
  1217. //
  1218. pDest[ctr2] = pSrc[ctr];
  1219. //
  1220. // See if it needs to be expanded to its composite form.
  1221. //
  1222. // If no composite form is found, the routine returns 1 for
  1223. // the base character. Simply increment by the return value.
  1224. //
  1225. ctr2 += InsertCompositeForm(&(pDest[ctr2]), pEndDest);
  1226. //
  1227. // Increment the source string counter.
  1228. //
  1229. ctr++;
  1230. }
  1231. }
  1232. //
  1233. // Make sure destination buffer was large enough.
  1234. //
  1235. if (ctr < cchSrc)
  1236. {
  1237. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1238. return (0);
  1239. }
  1240. //
  1241. // Return the number of wide characters written.
  1242. //
  1243. return (ctr2);
  1244. }
  1245. ////////////////////////////////////////////////////////////////////////////
  1246. //
  1247. // MapCase
  1248. //
  1249. // Stores the lower or upper case values for the given string in the
  1250. // destination buffer, and returns the number of wide characters written to
  1251. // the buffer.
  1252. //
  1253. // 05-31-91 JulieB Created.
  1254. ////////////////////////////////////////////////////////////////////////////
  1255. int MapCase(
  1256. PLOC_HASH pHashN,
  1257. LPCWSTR pSrc,
  1258. int cchSrc,
  1259. LPWSTR pDest,
  1260. int cchDest,
  1261. PCASE pCaseTbl)
  1262. {
  1263. int ctr; // loop counter
  1264. //
  1265. // If the destination value is zero, then just return the
  1266. // length of the source string. Do NOT touch pDest.
  1267. //
  1268. if (cchDest == 0)
  1269. {
  1270. return (cchSrc);
  1271. }
  1272. //
  1273. // If cchSrc is greater than cchDest, then the destination buffer
  1274. // is too small to hold the lower or upper case string. Return an
  1275. // error.
  1276. //
  1277. if (cchSrc > cchDest)
  1278. {
  1279. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1280. return (0);
  1281. }
  1282. //
  1283. // Lower or Upper case the source string and store it in the
  1284. // destination string.
  1285. //
  1286. for (ctr = 0; ctr < cchSrc; ctr++)
  1287. {
  1288. pDest[ctr] = GET_LOWER_UPPER_CASE(pCaseTbl, pSrc[ctr]);
  1289. }
  1290. //
  1291. // Return the number of wide characters written.
  1292. //
  1293. return (ctr);
  1294. }
  1295. ////////////////////////////////////////////////////////////////////////////
  1296. //
  1297. // SPECIAL_CASE_HANDLER
  1298. //
  1299. // Handles all of the special cases for each character. This includes only
  1300. // the valid values less than or equal to MAX_SPECIAL_CASE.
  1301. //
  1302. // DEFINED AS A MACRO.
  1303. //
  1304. // 11-04-92 JulieB Created.
  1305. ////////////////////////////////////////////////////////////////////////////
  1306. #define EXTRA_WEIGHT_POS(WtNum) (*(pPosXW + (WtNum * WeightLen)))
  1307. #define SPECIAL_CASE_HANDLER( SM, \
  1308. pWeight, \
  1309. pSortkey, \
  1310. pExpand, \
  1311. Position, \
  1312. fStringSort, \
  1313. fIgnoreSymbols, \
  1314. pCur, \
  1315. pBegin, \
  1316. fModify ) \
  1317. { \
  1318. PSORTKEY pExpWt; /* weight of 1 expansion char */ \
  1319. BYTE AW; /* alphanumeric weight */ \
  1320. BYTE XW; /* case weight value with extra bits */ \
  1321. DWORD PrevWt; /* previous weight */ \
  1322. BYTE PrevSM; /* previous script member */ \
  1323. BYTE PrevAW; /* previuos alphanumeric weight */ \
  1324. BYTE PrevCW; /* previuos case weight */ \
  1325. LPWSTR pPrev; /* ptr to previous char */ \
  1326. \
  1327. \
  1328. switch (SM) \
  1329. { \
  1330. case ( UNSORTABLE ) : \
  1331. { \
  1332. /* \
  1333. * Character is unsortable, so skip it. \
  1334. */ \
  1335. break; \
  1336. } \
  1337. case ( NONSPACE_MARK ) : \
  1338. { \
  1339. /* \
  1340. * Character is a nonspace mark, so only store \
  1341. * the diacritic weight. \
  1342. */ \
  1343. if (pPosDW > pDW) \
  1344. { \
  1345. (*(pPosDW - 1)) += GET_DIACRITIC(pWeight); \
  1346. } \
  1347. else \
  1348. { \
  1349. *pPosDW = GET_DIACRITIC(pWeight); \
  1350. pPosDW++; \
  1351. } \
  1352. \
  1353. break; \
  1354. } \
  1355. case ( EXPANSION ) : \
  1356. { \
  1357. /* \
  1358. * Expansion character - one character has 2 \
  1359. * different weights. Store each weight separately. \
  1360. */ \
  1361. pExpWt = &(pSortkey[(pExpand[GET_EXPAND_INDEX(pWeight)]).UCP1]); \
  1362. *pPosUW = GET_UNICODE_MOD(pExpWt, fModify); \
  1363. *pPosDW = GET_DIACRITIC(pExpWt); \
  1364. *pPosCW = GET_CASE(pExpWt) & CaseMask; \
  1365. pPosUW++; \
  1366. pPosDW++; \
  1367. pPosCW++; \
  1368. \
  1369. pExpWt = &(pSortkey[(pExpand[GET_EXPAND_INDEX(pWeight)]).UCP2]); \
  1370. while (GET_SCRIPT_MEMBER(pExpWt) == EXPANSION) \
  1371. { \
  1372. pWeight = pExpWt; \
  1373. pExpWt = &(pSortkey[(pExpand[GET_EXPAND_INDEX(pWeight)]).UCP1]); \
  1374. *pPosUW = GET_UNICODE_MOD(pExpWt, fModify); \
  1375. *pPosDW = GET_DIACRITIC(pExpWt); \
  1376. *pPosCW = GET_CASE(pExpWt) & CaseMask; \
  1377. pPosUW++; \
  1378. pPosDW++; \
  1379. pPosCW++; \
  1380. pExpWt = &(pSortkey[(pExpand[GET_EXPAND_INDEX(pWeight)]).UCP2]); \
  1381. } \
  1382. *pPosUW = GET_UNICODE_MOD(pExpWt, fModify); \
  1383. *pPosDW = GET_DIACRITIC(pExpWt); \
  1384. *pPosCW = GET_CASE(pExpWt) & CaseMask; \
  1385. pPosUW++; \
  1386. pPosDW++; \
  1387. pPosCW++; \
  1388. \
  1389. break; \
  1390. } \
  1391. case ( PUNCTUATION ) : \
  1392. { \
  1393. if (!fStringSort) \
  1394. { \
  1395. /* \
  1396. * Word Sort Method. \
  1397. * \
  1398. * Character is punctuation, so only store the special \
  1399. * weight. \
  1400. */ \
  1401. *((LPBYTE)pPosSW) = HIBYTE(GET_POSITION_SW(Position)); \
  1402. *(((LPBYTE)pPosSW) + 1) = LOBYTE(GET_POSITION_SW(Position)); \
  1403. pPosSW++; \
  1404. *pPosSW = GET_SPECIAL_WEIGHT(pWeight); \
  1405. pPosSW++; \
  1406. \
  1407. break; \
  1408. } \
  1409. \
  1410. /* \
  1411. * If using STRING sort method, treat punctuation the same \
  1412. * as symbol. So, FALL THROUGH to the symbol cases. \
  1413. */ \
  1414. } \
  1415. case ( SYMBOL_1 ) : \
  1416. case ( SYMBOL_2 ) : \
  1417. case ( SYMBOL_3 ) : \
  1418. case ( SYMBOL_4 ) : \
  1419. case ( SYMBOL_5 ) : \
  1420. { \
  1421. /* \
  1422. * Character is a symbol. \
  1423. * Store the Unicode weights ONLY if the NORM_IGNORESYMBOLS \
  1424. * flag is NOT set. \
  1425. */ \
  1426. if (!fIgnoreSymbols) \
  1427. { \
  1428. *pPosUW = GET_UNICODE_MOD(pWeight, fModify); \
  1429. *pPosDW = GET_DIACRITIC(pWeight); \
  1430. *pPosCW = GET_CASE(pWeight) & CaseMask; \
  1431. pPosUW++; \
  1432. pPosDW++; \
  1433. pPosCW++; \
  1434. } \
  1435. \
  1436. break; \
  1437. } \
  1438. case ( FAREAST_SPECIAL ) : \
  1439. { \
  1440. /* \
  1441. * Get the alphanumeric weight and the case weight of the \
  1442. * current code point. \
  1443. */ \
  1444. AW = GET_ALPHA_NUMERIC(pWeight); \
  1445. XW = (GET_CASE(pWeight) & CaseMask) | CASE_XW_MASK; \
  1446. \
  1447. /* \
  1448. * Special case Repeat and Cho-On. \
  1449. * AW = 0 => Repeat \
  1450. * AW = 1 => Cho-On \
  1451. * AW = 2+ => Kana \
  1452. */ \
  1453. if (AW <= MAX_SPECIAL_AW) \
  1454. { \
  1455. /* \
  1456. * If the script member of the previous character is \
  1457. * invalid, then give the special character an \
  1458. * invalid weight (highest possible weight) so that it \
  1459. * will sort AFTER everything else. \
  1460. */ \
  1461. pPrev = pCur - 1; \
  1462. *pPosUW = MAP_INVALID_UW; \
  1463. while (pPrev >= pBegin) \
  1464. { \
  1465. PrevWt = GET_DWORD_WEIGHT(pHashN, *pPrev); \
  1466. PrevSM = GET_SCRIPT_MEMBER(&PrevWt); \
  1467. if (PrevSM < FAREAST_SPECIAL) \
  1468. { \
  1469. if (PrevSM != EXPANSION) \
  1470. { \
  1471. /* \
  1472. * UNSORTABLE or NONSPACE_MARK. \
  1473. * \
  1474. * Just ignore these, since we only care \
  1475. * about the previous UW value. \
  1476. */ \
  1477. pPrev--; \
  1478. continue; \
  1479. } \
  1480. } \
  1481. else if (PrevSM == FAREAST_SPECIAL) \
  1482. { \
  1483. PrevAW = GET_ALPHA_NUMERIC(&PrevWt); \
  1484. if (PrevAW <= MAX_SPECIAL_AW) \
  1485. { \
  1486. /* \
  1487. * Handle case where two special chars follow \
  1488. * each other. Keep going back in the string. \
  1489. */ \
  1490. pPrev--; \
  1491. continue; \
  1492. } \
  1493. \
  1494. *pPosUW = MAKE_UNICODE_WT(KANA, PrevAW, fModify); \
  1495. \
  1496. /* \
  1497. * Only build weights 4, 5, 6, and 7 if the \
  1498. * previous character is KANA. \
  1499. * \
  1500. * Always: \
  1501. * 4W = previous CW & ISOLATE_SMALL \
  1502. * 6W = previous CW & ISOLATE_KANA \
  1503. * \
  1504. */ \
  1505. PrevCW = (GET_CASE(&PrevWt) & CaseMask) | \
  1506. CASE_XW_MASK; \
  1507. \
  1508. EXTRA_WEIGHT_POS(0) = PrevCW & ISOLATE_SMALL; \
  1509. EXTRA_WEIGHT_POS(2) = PrevCW & ISOLATE_KANA; \
  1510. \
  1511. if (AW == AW_REPEAT) \
  1512. { \
  1513. /* \
  1514. * Repeat: \
  1515. * UW = previous UW (set above) \
  1516. * 5W = WT_FIVE_REPEAT \
  1517. * 7W = previous CW & ISOLATE_WIDTH \
  1518. */ \
  1519. EXTRA_WEIGHT_POS(1) = WT_FIVE_REPEAT; \
  1520. EXTRA_WEIGHT_POS(3) = PrevCW & ISOLATE_WIDTH; \
  1521. } \
  1522. else \
  1523. { \
  1524. /* \
  1525. * Cho-On: \
  1526. * UW = previous UW & CHO_ON_UW_MASK \
  1527. * 5W = WT_FIVE_CHO_ON \
  1528. * 7W = current CW & ISOLATE_WIDTH \
  1529. */ \
  1530. *pPosUW &= CHO_ON_UW_MASK; \
  1531. EXTRA_WEIGHT_POS(1) = WT_FIVE_CHO_ON; \
  1532. EXTRA_WEIGHT_POS(3) = XW & ISOLATE_WIDTH; \
  1533. } \
  1534. \
  1535. pPosXW++; \
  1536. } \
  1537. else \
  1538. { \
  1539. *pPosUW = GET_UNICODE_MOD(&PrevWt, fModify); \
  1540. } \
  1541. \
  1542. break; \
  1543. } \
  1544. \
  1545. /* \
  1546. * Make sure there is a valid UW. If not, quit out \
  1547. * of switch case. \
  1548. */ \
  1549. if (*pPosUW == MAP_INVALID_UW) \
  1550. { \
  1551. pPosUW++; \
  1552. break; \
  1553. } \
  1554. } \
  1555. else \
  1556. { \
  1557. /* \
  1558. * Kana: \
  1559. * SM = KANA \
  1560. * AW = current AW \
  1561. * 4W = current CW & ISOLATE_SMALL \
  1562. * 5W = WT_FIVE_KANA \
  1563. * 6W = current CW & ISOLATE_KANA \
  1564. * 7W = current CW & ISOLATE_WIDTH \
  1565. */ \
  1566. *pPosUW = MAKE_UNICODE_WT(KANA, AW, fModify); \
  1567. EXTRA_WEIGHT_POS(0) = XW & ISOLATE_SMALL; \
  1568. EXTRA_WEIGHT_POS(1) = WT_FIVE_KANA; \
  1569. EXTRA_WEIGHT_POS(2) = XW & ISOLATE_KANA; \
  1570. EXTRA_WEIGHT_POS(3) = XW & ISOLATE_WIDTH; \
  1571. \
  1572. pPosXW++; \
  1573. } \
  1574. \
  1575. /* \
  1576. * Always: \
  1577. * DW = current DW \
  1578. * CW = minimum CW \
  1579. */ \
  1580. *pPosDW = GET_DIACRITIC(pWeight); \
  1581. *pPosCW = MIN_CW; \
  1582. \
  1583. pPosUW++; \
  1584. pPosDW++; \
  1585. pPosCW++; \
  1586. \
  1587. break; \
  1588. } \
  1589. case ( JAMO_SPECIAL ) : \
  1590. { \
  1591. /* \
  1592. * See if it's a leading Jamo. \
  1593. */ \
  1594. if (IsLeadingJamo(*pPos)) \
  1595. { \
  1596. int OldHangulCount; /* number of old Hangul found */ \
  1597. WORD JamoUW; \
  1598. BYTE JamoXW[3]; \
  1599. \
  1600. /* \
  1601. * If the characters beginning from pPos are a valid old \
  1602. * Hangul composition, create the sortkey according to \
  1603. * the old Hangul rule. \
  1604. */ \
  1605. if ((OldHangulCount = \
  1606. MapOldHangulSortKey( pHashN, \
  1607. pPos, \
  1608. cchSrc - PosCtr, \
  1609. &JamoUW, \
  1610. JamoXW, \
  1611. fModify )) > 0) \
  1612. { \
  1613. *pPosUW = JamoUW; \
  1614. pPosUW++; \
  1615. *pPosUW = MAKE_UNICODE_WT(SM_UW_XW, JamoXW[0], FALSE); \
  1616. pPosUW++; \
  1617. *pPosUW = MAKE_UNICODE_WT(SM_UW_XW, JamoXW[1], FALSE); \
  1618. pPosUW++; \
  1619. *pPosUW = MAKE_UNICODE_WT(SM_UW_XW, JamoXW[2], FALSE); \
  1620. pPosUW++; \
  1621. \
  1622. *pPosDW = MIN_DW; \
  1623. *pPosCW = MIN_CW; \
  1624. pPosDW++; \
  1625. pPosCW++; \
  1626. \
  1627. /* \
  1628. * Decrement OldHangulCount because the for loop will \
  1629. * increase PosCtr and pPos as well. \
  1630. */ \
  1631. OldHangulCount--; \
  1632. PosCtr += OldHangulCount; \
  1633. pPos += OldHangulCount; \
  1634. \
  1635. break; \
  1636. } \
  1637. } \
  1638. \
  1639. /* \
  1640. * Otherwise, fall back to the normal behavior. \
  1641. * \
  1642. * No special case on character, so store the various \
  1643. * weights for the character. \
  1644. */ \
  1645. \
  1646. /* \
  1647. * We store the real script member in the diacritic weight \
  1648. * since both the diacritic weight and case weight are not \
  1649. * used in Korean. \
  1650. */ \
  1651. *pPosUW = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight), \
  1652. GET_ALPHA_NUMERIC(pWeight), \
  1653. fModify ); \
  1654. *pPosDW = MIN_DW; \
  1655. *pPosCW = GET_CASE(pWeight); \
  1656. pPosUW++; \
  1657. pPosDW++; \
  1658. pPosCW++; \
  1659. \
  1660. break; \
  1661. } \
  1662. case ( EXTENSION_A ) : \
  1663. { \
  1664. /* \
  1665. * UW = SM_EXT_A, AW_EXT_A, AW, DW \
  1666. * DW = miniumum DW \
  1667. * CW = minimum CW \
  1668. */ \
  1669. *pPosUW = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify); \
  1670. pPosUW++; \
  1671. \
  1672. *pPosUW = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(pWeight), \
  1673. GET_DIACRITIC(pWeight), \
  1674. FALSE ); \
  1675. pPosUW++; \
  1676. \
  1677. *pPosDW = MIN_DW; \
  1678. *pPosCW = MIN_CW; \
  1679. pPosDW++; \
  1680. pPosCW++; \
  1681. \
  1682. break; \
  1683. } \
  1684. } \
  1685. }
  1686. ////////////////////////////////////////////////////////////////////////////
  1687. //
  1688. // MapSortKey
  1689. //
  1690. // Stores the sortkey weights for the given string in the destination
  1691. // buffer and returns the number of BYTES written to the buffer.
  1692. //
  1693. // 11-04-92 JulieB Created.
  1694. ////////////////////////////////////////////////////////////////////////////
  1695. int MapSortKey(
  1696. PLOC_HASH pHashN,
  1697. DWORD dwFlags,
  1698. LPCWSTR pSrc,
  1699. int cchSrc,
  1700. LPBYTE pDest,
  1701. int cbDest,
  1702. BOOL fModify)
  1703. {
  1704. register int WeightLen; // length of one set of weights
  1705. LPWSTR pUW; // ptr to Unicode Weights
  1706. LPBYTE pDW; // ptr to Diacritic Weights
  1707. LPBYTE pCW; // ptr to Case Weights
  1708. LPBYTE pXW; // ptr to Extra Weights
  1709. LPWSTR pSW; // ptr to Special Weights
  1710. LPWSTR pPosUW; // ptr to position in pUW buffer
  1711. LPBYTE pPosDW; // ptr to position in pDW buffer
  1712. LPBYTE pPosCW; // ptr to position in pCW buffer
  1713. LPBYTE pPosXW; // ptr to position in pXW buffer
  1714. LPWSTR pPosSW; // ptr to position in pSW buffer
  1715. PSORTKEY pWeight; // ptr to weight of character
  1716. BYTE SM; // script member value
  1717. BYTE CaseMask; // mask for case weight
  1718. int PosCtr; // position counter in string
  1719. LPWSTR pPos; // ptr to position in string
  1720. LPBYTE pTmp; // ptr to go through UW, XW, and SW
  1721. LPBYTE pPosTmp; // ptr to tmp position in XW
  1722. PCOMPRESS_2 pComp2; // ptr to compression 2 list
  1723. PCOMPRESS_3 pComp3; // ptr to compression 3 list
  1724. WORD pBuffer[MAX_SORTKEY_BUF_LEN]; // buffer to hold weights
  1725. int ctr; // loop counter
  1726. BOOL IfDblCompress; // if double compress possibility
  1727. BOOL fStringSort; // if using string sort method
  1728. BOOL fIgnoreSymbols; // if ignore symbols flag is set
  1729. //
  1730. // Make sure the sorting information is available in the system.
  1731. //
  1732. if ((pHashN->pSortkey == NULL) ||
  1733. (pHashN->IfIdeographFailure == TRUE))
  1734. {
  1735. KdPrint(("NLSAPI: Appropriate Sorting Tables Not Loaded.\n"));
  1736. SetLastError(ERROR_FILE_NOT_FOUND);
  1737. return (0);
  1738. }
  1739. //
  1740. // See if the length of the string is too large for the static
  1741. // buffer. If so, allocate a buffer that is large enough.
  1742. //
  1743. if (cchSrc > MAX_SMALL_BUF_LEN)
  1744. {
  1745. //
  1746. // Allocate buffer to hold all of the weights.
  1747. // (cchSrc) * (max # of expansions) * (# of weights)
  1748. //
  1749. WeightLen = cchSrc * MAX_EXPANSION;
  1750. if ((pUW = (LPWSTR)NLS_ALLOC_MEM( WeightLen * MAX_WEIGHTS *
  1751. sizeof(WCHAR) )) == NULL)
  1752. {
  1753. SetLastError(ERROR_OUTOFMEMORY);
  1754. return (0);
  1755. }
  1756. }
  1757. else
  1758. {
  1759. WeightLen = MAX_SMALL_BUF_LEN * MAX_EXPANSION;
  1760. pUW = (LPWSTR)pBuffer;
  1761. }
  1762. //
  1763. // Set the case weight mask based on the given flags.
  1764. // If none or all of the ignore case flags are set, then
  1765. // just leave the mask as 0xff.
  1766. //
  1767. CaseMask = 0xff;
  1768. switch (dwFlags & NORM_ALL_CASE)
  1769. {
  1770. case ( NORM_IGNORECASE ) :
  1771. {
  1772. CaseMask &= CASE_UPPER_MASK;
  1773. break;
  1774. }
  1775. case ( NORM_IGNOREKANATYPE ) :
  1776. {
  1777. CaseMask &= CASE_KANA_MASK;
  1778. break;
  1779. }
  1780. case ( NORM_IGNOREWIDTH ) :
  1781. {
  1782. CaseMask &= CASE_WIDTH_MASK;
  1783. break;
  1784. }
  1785. case ( NORM_IGNORECASE | NORM_IGNOREKANATYPE ) :
  1786. {
  1787. CaseMask &= (CASE_UPPER_MASK & CASE_KANA_MASK);
  1788. break;
  1789. }
  1790. case ( NORM_IGNORECASE | NORM_IGNOREWIDTH ) :
  1791. {
  1792. CaseMask &= (CASE_UPPER_MASK & CASE_WIDTH_MASK);
  1793. break;
  1794. }
  1795. case ( NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ) :
  1796. {
  1797. CaseMask &= (CASE_KANA_MASK & CASE_WIDTH_MASK);
  1798. break;
  1799. }
  1800. case ( NORM_IGNORECASE | NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ) :
  1801. {
  1802. CaseMask &= (CASE_UPPER_MASK & CASE_KANA_MASK & CASE_WIDTH_MASK);
  1803. break;
  1804. }
  1805. }
  1806. //
  1807. // Set pointers to positions of weights in buffer.
  1808. //
  1809. // UW => 4 word length (extension A and Jamo need extra words)
  1810. // DW => byte length
  1811. // CW => byte length
  1812. // XW => 4 byte length (4 weights, 1 byte each) FE Special
  1813. // SW => dword length (2 words each)
  1814. //
  1815. // Note: SW must start on a WORD boundary, so XW needs to be padded
  1816. // appropriately.
  1817. //
  1818. pDW = (LPBYTE)(pUW + (WeightLen * (NUM_BYTES_UW / sizeof(WCHAR))));
  1819. pCW = (LPBYTE)(pDW + (WeightLen * NUM_BYTES_DW));
  1820. pXW = (LPBYTE)(pCW + (WeightLen * NUM_BYTES_CW));
  1821. pSW = (LPWSTR)(pXW + (WeightLen * (NUM_BYTES_XW + NUM_BYTES_PADDING)));
  1822. pPosUW = pUW;
  1823. pPosDW = pDW;
  1824. pPosCW = pCW;
  1825. pPosXW = pXW;
  1826. pPosSW = pSW;
  1827. //
  1828. // Initialize flags and loop values.
  1829. //
  1830. fStringSort = dwFlags & SORT_STRINGSORT;
  1831. fIgnoreSymbols = dwFlags & NORM_IGNORESYMBOLS;
  1832. pPos = (LPWSTR)pSrc;
  1833. PosCtr = 1;
  1834. //
  1835. // Check if given locale has compressions.
  1836. //
  1837. if (pHashN->IfCompression == FALSE)
  1838. {
  1839. //
  1840. // Go through string, code point by code point.
  1841. //
  1842. // No compressions exist in the given locale, so
  1843. // DO NOT check for them.
  1844. //
  1845. for (; PosCtr <= cchSrc; PosCtr++, pPos++)
  1846. {
  1847. //
  1848. // Get weights.
  1849. //
  1850. pWeight = &((pHashN->pSortkey)[*pPos]);
  1851. SM = GET_SCRIPT_MEMBER(pWeight);
  1852. if (SM > MAX_SPECIAL_CASE)
  1853. {
  1854. //
  1855. // No special case on character, so store the
  1856. // various weights for the character.
  1857. //
  1858. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  1859. *pPosDW = GET_DIACRITIC(pWeight);
  1860. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1861. pPosUW++;
  1862. pPosDW++;
  1863. pPosCW++;
  1864. }
  1865. else
  1866. {
  1867. SPECIAL_CASE_HANDLER( SM,
  1868. pWeight,
  1869. pHashN->pSortkey,
  1870. pTblPtrs->pExpansion,
  1871. pPosUW - pUW + 1,
  1872. fStringSort,
  1873. fIgnoreSymbols,
  1874. pPos,
  1875. (LPWSTR)pSrc,
  1876. fModify );
  1877. }
  1878. }
  1879. }
  1880. else if (pHashN->IfDblCompression == FALSE)
  1881. {
  1882. //
  1883. // Go through string, code point by code point.
  1884. //
  1885. // Compressions DO exist in the given locale, so
  1886. // check for them.
  1887. //
  1888. // No double compressions exist in the given locale,
  1889. // so DO NOT check for them.
  1890. //
  1891. for (; PosCtr <= cchSrc; PosCtr++, pPos++)
  1892. {
  1893. //
  1894. // Get weights.
  1895. //
  1896. pWeight = &((pHashN->pSortkey)[*pPos]);
  1897. SM = GET_SCRIPT_MEMBER(pWeight);
  1898. if (SM > MAX_SPECIAL_CASE)
  1899. {
  1900. //
  1901. // No special case on character, but must check for
  1902. // compression characters.
  1903. //
  1904. switch (GET_COMPRESSION(pWeight))
  1905. {
  1906. case ( COMPRESS_3_MASK ) :
  1907. {
  1908. if ((PosCtr + 2) <= cchSrc)
  1909. {
  1910. ctr = pHashN->pCompHdr->Num3;
  1911. pComp3 = pHashN->pCompress3;
  1912. for (; ctr > 0; ctr--, pComp3++)
  1913. {
  1914. if ((pComp3->UCP1 == *pPos) &&
  1915. (pComp3->UCP2 == *(pPos + 1)) &&
  1916. (pComp3->UCP3 == *(pPos + 2)))
  1917. {
  1918. pWeight = &(pComp3->Weights);
  1919. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  1920. *pPosDW = GET_DIACRITIC(pWeight);
  1921. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1922. pPosUW++;
  1923. pPosDW++;
  1924. pPosCW++;
  1925. //
  1926. // Add only two to source, since one
  1927. // will be added by "for" structure.
  1928. //
  1929. pPos += 2;
  1930. PosCtr += 2;
  1931. break;
  1932. }
  1933. }
  1934. if (ctr > 0)
  1935. {
  1936. break;
  1937. }
  1938. }
  1939. //
  1940. // Fall through if not found.
  1941. //
  1942. }
  1943. case ( COMPRESS_2_MASK ) :
  1944. {
  1945. if ((PosCtr + 1) <= cchSrc)
  1946. {
  1947. ctr = pHashN->pCompHdr->Num2;
  1948. pComp2 = pHashN->pCompress2;
  1949. for (; ctr > 0; ctr--, pComp2++)
  1950. {
  1951. if ((pComp2->UCP1 == *pPos) &&
  1952. (pComp2->UCP2 == *(pPos + 1)))
  1953. {
  1954. pWeight = &(pComp2->Weights);
  1955. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  1956. *pPosDW = GET_DIACRITIC(pWeight);
  1957. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1958. pPosUW++;
  1959. pPosDW++;
  1960. pPosCW++;
  1961. //
  1962. // Add only one to source, since one
  1963. // will be added by "for" structure.
  1964. //
  1965. pPos++;
  1966. PosCtr++;
  1967. break;
  1968. }
  1969. }
  1970. if (ctr > 0)
  1971. {
  1972. break;
  1973. }
  1974. }
  1975. //
  1976. // Fall through if not found.
  1977. //
  1978. }
  1979. default :
  1980. {
  1981. //
  1982. // No possible compression for character, so store
  1983. // the various weights for the character.
  1984. //
  1985. *pPosUW = GET_UNICODE_SM_MOD(pWeight, SM, fModify);
  1986. *pPosDW = GET_DIACRITIC(pWeight);
  1987. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1988. pPosUW++;
  1989. pPosDW++;
  1990. pPosCW++;
  1991. }
  1992. }
  1993. }
  1994. else
  1995. {
  1996. SPECIAL_CASE_HANDLER( SM,
  1997. pWeight,
  1998. pHashN->pSortkey,
  1999. pTblPtrs->pExpansion,
  2000. pPosUW - pUW + 1,
  2001. fStringSort,
  2002. fIgnoreSymbols,
  2003. pPos,
  2004. (LPWSTR)pSrc,
  2005. fModify );
  2006. }
  2007. }
  2008. }
  2009. else
  2010. {
  2011. //
  2012. // Go through string, code point by code point.
  2013. //
  2014. // Compressions DO exist in the given locale, so
  2015. // check for them.
  2016. //
  2017. // Double Compressions also exist in the given locale,
  2018. // so check for them.
  2019. //
  2020. for (; PosCtr <= cchSrc; PosCtr++, pPos++)
  2021. {
  2022. //
  2023. // Get weights.
  2024. //
  2025. pWeight = &((pHashN->pSortkey)[*pPos]);
  2026. SM = GET_SCRIPT_MEMBER(pWeight);
  2027. if (SM > MAX_SPECIAL_CASE)
  2028. {
  2029. //
  2030. // No special case on character, but must check for
  2031. // compression characters and double compression
  2032. // characters.
  2033. //
  2034. IfDblCompress =
  2035. (((PosCtr + 1) <= cchSrc) &&
  2036. ((GET_DWORD_WEIGHT(pHashN, *pPos) & CMP_MASKOFF_CW) ==
  2037. (GET_DWORD_WEIGHT(pHashN, *(pPos + 1)) & CMP_MASKOFF_CW)))
  2038. ? 1
  2039. : 0;
  2040. switch (GET_COMPRESSION(pWeight))
  2041. {
  2042. case ( COMPRESS_3_MASK ) :
  2043. {
  2044. if (IfDblCompress)
  2045. {
  2046. if ((PosCtr + 3) <= cchSrc)
  2047. {
  2048. ctr = pHashN->pCompHdr->Num3;
  2049. pComp3 = pHashN->pCompress3;
  2050. for (; ctr > 0; ctr--, pComp3++)
  2051. {
  2052. if ((pComp3->UCP1 == *(pPos + 1)) &&
  2053. (pComp3->UCP2 == *(pPos + 2)) &&
  2054. (pComp3->UCP3 == *(pPos + 3)))
  2055. {
  2056. pWeight = &(pComp3->Weights);
  2057. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  2058. *pPosDW = GET_DIACRITIC(pWeight);
  2059. *pPosCW = GET_CASE(pWeight) & CaseMask;
  2060. *(pPosUW + 1) = *pPosUW;
  2061. *(pPosDW + 1) = *pPosDW;
  2062. *(pPosCW + 1) = *pPosCW;
  2063. pPosUW += 2;
  2064. pPosDW += 2;
  2065. pPosCW += 2;
  2066. //
  2067. // Add only three to source, since one
  2068. // will be added by "for" structure.
  2069. //
  2070. pPos += 3;
  2071. PosCtr += 3;
  2072. break;
  2073. }
  2074. }
  2075. if (ctr > 0)
  2076. {
  2077. break;
  2078. }
  2079. }
  2080. }
  2081. //
  2082. // Fall through if not found.
  2083. //
  2084. if ((PosCtr + 2) <= cchSrc)
  2085. {
  2086. ctr = pHashN->pCompHdr->Num3;
  2087. pComp3 = pHashN->pCompress3;
  2088. for (; ctr > 0; ctr--, pComp3++)
  2089. {
  2090. if ((pComp3->UCP1 == *pPos) &&
  2091. (pComp3->UCP2 == *(pPos + 1)) &&
  2092. (pComp3->UCP3 == *(pPos + 2)))
  2093. {
  2094. pWeight = &(pComp3->Weights);
  2095. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  2096. *pPosDW = GET_DIACRITIC(pWeight);
  2097. *pPosCW = GET_CASE(pWeight) & CaseMask;
  2098. pPosUW++;
  2099. pPosDW++;
  2100. pPosCW++;
  2101. //
  2102. // Add only two to source, since one
  2103. // will be added by "for" structure.
  2104. //
  2105. pPos += 2;
  2106. PosCtr += 2;
  2107. break;
  2108. }
  2109. }
  2110. if (ctr > 0)
  2111. {
  2112. break;
  2113. }
  2114. }
  2115. //
  2116. // Fall through if not found.
  2117. //
  2118. }
  2119. case ( COMPRESS_2_MASK ) :
  2120. {
  2121. if (IfDblCompress)
  2122. {
  2123. if ((PosCtr + 2) <= cchSrc)
  2124. {
  2125. ctr = pHashN->pCompHdr->Num2;
  2126. pComp2 = pHashN->pCompress2;
  2127. for (; ctr > 0; ctr--, pComp2++)
  2128. {
  2129. if ((pComp2->UCP1 == *(pPos + 1)) &&
  2130. (pComp2->UCP2 == *(pPos + 2)))
  2131. {
  2132. pWeight = &(pComp2->Weights);
  2133. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  2134. *pPosDW = GET_DIACRITIC(pWeight);
  2135. *pPosCW = GET_CASE(pWeight) & CaseMask;
  2136. *(pPosUW + 1) = *pPosUW;
  2137. *(pPosDW + 1) = *pPosDW;
  2138. *(pPosCW + 1) = *pPosCW;
  2139. pPosUW += 2;
  2140. pPosDW += 2;
  2141. pPosCW += 2;
  2142. //
  2143. // Add only two to source, since one
  2144. // will be added by "for" structure.
  2145. //
  2146. pPos += 2;
  2147. PosCtr += 2;
  2148. break;
  2149. }
  2150. }
  2151. if (ctr > 0)
  2152. {
  2153. break;
  2154. }
  2155. }
  2156. }
  2157. //
  2158. // Fall through if not found.
  2159. //
  2160. if ((PosCtr + 1) <= cchSrc)
  2161. {
  2162. ctr = pHashN->pCompHdr->Num2;
  2163. pComp2 = pHashN->pCompress2;
  2164. for (; ctr > 0; ctr--, pComp2++)
  2165. {
  2166. if ((pComp2->UCP1 == *pPos) &&
  2167. (pComp2->UCP2 == *(pPos + 1)))
  2168. {
  2169. pWeight = &(pComp2->Weights);
  2170. *pPosUW = GET_UNICODE_MOD(pWeight, fModify);
  2171. *pPosDW = GET_DIACRITIC(pWeight);
  2172. *pPosCW = GET_CASE(pWeight) & CaseMask;
  2173. pPosUW++;
  2174. pPosDW++;
  2175. pPosCW++;
  2176. //
  2177. // Add only one to source, since one
  2178. // will be added by "for" structure.
  2179. //
  2180. pPos++;
  2181. PosCtr++;
  2182. break;
  2183. }
  2184. }
  2185. if (ctr > 0)
  2186. {
  2187. break;
  2188. }
  2189. }
  2190. //
  2191. // Fall through if not found.
  2192. //
  2193. }
  2194. default :
  2195. {
  2196. //
  2197. // No possible compression for character, so store
  2198. // the various weights for the character.
  2199. //
  2200. *pPosUW = GET_UNICODE_SM_MOD(pWeight, SM, fModify);
  2201. *pPosDW = GET_DIACRITIC(pWeight);
  2202. *pPosCW = GET_CASE(pWeight) & CaseMask;
  2203. pPosUW++;
  2204. pPosDW++;
  2205. pPosCW++;
  2206. }
  2207. }
  2208. }
  2209. else
  2210. {
  2211. SPECIAL_CASE_HANDLER( SM,
  2212. pWeight,
  2213. pHashN->pSortkey,
  2214. pTblPtrs->pExpansion,
  2215. pPosUW - pUW + 1,
  2216. fStringSort,
  2217. fIgnoreSymbols,
  2218. pPos,
  2219. (LPWSTR)pSrc,
  2220. fModify );
  2221. }
  2222. }
  2223. }
  2224. //
  2225. // Store the final sortkey weights in the destination buffer.
  2226. //
  2227. // PosCtr will be a BYTE count.
  2228. //
  2229. PosCtr = 0;
  2230. //
  2231. // If the destination value is zero, then just return the
  2232. // length of the string that would be returned. Do NOT touch pDest.
  2233. //
  2234. if (cbDest == 0)
  2235. {
  2236. //
  2237. // Count the Unicode Weights.
  2238. //
  2239. PosCtr += (int)((LPBYTE)pPosUW - (LPBYTE)pUW);
  2240. //
  2241. // Count the Separator.
  2242. //
  2243. PosCtr++;
  2244. //
  2245. // Count the Diacritic Weights.
  2246. //
  2247. // - Eliminate minimum DW.
  2248. // - Count the number of diacritic weights.
  2249. //
  2250. if (!(dwFlags & NORM_IGNORENONSPACE))
  2251. {
  2252. pPosDW--;
  2253. if (pHashN->IfReverseDW == TRUE)
  2254. {
  2255. //
  2256. // Reverse diacritics:
  2257. // - remove diacritics from left to right.
  2258. // - count diacritics from right to left.
  2259. //
  2260. while ((pDW <= pPosDW) && (*pDW <= MIN_DW))
  2261. {
  2262. pDW++;
  2263. }
  2264. PosCtr += (int)(pPosDW - pDW + 1);
  2265. }
  2266. else
  2267. {
  2268. //
  2269. // Regular diacritics:
  2270. // - remove diacritics from right to left.
  2271. // - count diacritics from left to right.
  2272. //
  2273. while ((pPosDW >= pDW) && (*pPosDW <= MIN_DW))
  2274. {
  2275. pPosDW--;
  2276. }
  2277. PosCtr += (int)(pPosDW - pDW + 1);
  2278. }
  2279. }
  2280. //
  2281. // Count the Separator.
  2282. //
  2283. PosCtr++;
  2284. //
  2285. // Count the Case Weights.
  2286. //
  2287. // - Eliminate minimum CW.
  2288. // - Count the number of case weights.
  2289. //
  2290. if ((dwFlags & NORM_DROP_CW) != NORM_DROP_CW)
  2291. {
  2292. pPosCW--;
  2293. while ((pPosCW >= pCW) && (*pPosCW <= MIN_CW))
  2294. {
  2295. pPosCW--;
  2296. }
  2297. PosCtr += (int)(pPosCW - pCW + 1);
  2298. }
  2299. //
  2300. // Count the Separator.
  2301. //
  2302. PosCtr++;
  2303. //
  2304. // Count the Extra Weights for Far East Special.
  2305. //
  2306. // - Eliminate unnecessary XW.
  2307. // - Count the number of extra weights and separators.
  2308. //
  2309. if (pXW < pPosXW)
  2310. {
  2311. if (dwFlags & NORM_IGNORENONSPACE)
  2312. {
  2313. //
  2314. // Ignore 4W and 5W. Must count separators for
  2315. // 4W and 5W, though.
  2316. //
  2317. PosCtr += 2;
  2318. ctr = 2;
  2319. }
  2320. else
  2321. {
  2322. ctr = 0;
  2323. }
  2324. pPosXW--;
  2325. for (; ctr < NUM_BYTES_XW; ctr++)
  2326. {
  2327. pTmp = pXW + (WeightLen * ctr);
  2328. pPosTmp = pPosXW + (WeightLen * ctr);
  2329. while ((pPosTmp >= pTmp) && (*pPosTmp == pXWDrop[ctr]))
  2330. {
  2331. pPosTmp--;
  2332. }
  2333. PosCtr += (int)(pPosTmp - pTmp + 1);
  2334. //
  2335. // Count the Separator.
  2336. //
  2337. PosCtr++;
  2338. }
  2339. }
  2340. //
  2341. // Count the Separator.
  2342. //
  2343. PosCtr++;
  2344. //
  2345. // Count the Special Weights.
  2346. //
  2347. if (!fIgnoreSymbols)
  2348. {
  2349. PosCtr += (int)((LPBYTE)pPosSW - (LPBYTE)pSW);
  2350. }
  2351. //
  2352. // Count the Terminator.
  2353. //
  2354. PosCtr++;
  2355. }
  2356. else
  2357. {
  2358. //
  2359. // Store the Unicode Weights in the destination buffer.
  2360. //
  2361. // - Make sure destination buffer is large enough.
  2362. // - Copy unicode weights to destination buffer.
  2363. //
  2364. // NOTE: cbDest is the number of BYTES.
  2365. // Also, must add one to length for separator.
  2366. //
  2367. if (cbDest < (((LPBYTE)pPosUW - (LPBYTE)pUW) + 1))
  2368. {
  2369. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2370. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2371. return (0);
  2372. }
  2373. pTmp = (LPBYTE)pUW;
  2374. while (pTmp < (LPBYTE)pPosUW)
  2375. {
  2376. //
  2377. // Copy Unicode weight to destination buffer.
  2378. //
  2379. // NOTE: Unicode Weight is stored in the data file as
  2380. // Alphanumeric Weight, Script Member
  2381. // so that the WORD value will be read correctly.
  2382. //
  2383. pDest[PosCtr] = *(pTmp + 1);
  2384. pDest[PosCtr + 1] = *pTmp;
  2385. PosCtr += 2;
  2386. pTmp += 2;
  2387. }
  2388. //
  2389. // Copy Separator to destination buffer.
  2390. //
  2391. // Destination buffer is large enough to hold the separator,
  2392. // since it was checked with the Unicode weights above.
  2393. //
  2394. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2395. PosCtr++;
  2396. //
  2397. // Store the Diacritic Weights in the destination buffer.
  2398. //
  2399. // - Eliminate minimum DW.
  2400. // - Make sure destination buffer is large enough.
  2401. // - Copy diacritic weights to destination buffer.
  2402. //
  2403. if (!(dwFlags & NORM_IGNORENONSPACE))
  2404. {
  2405. pPosDW--;
  2406. if (pHashN->IfReverseDW == TRUE)
  2407. {
  2408. //
  2409. // Reverse diacritics:
  2410. // - remove diacritics from left to right.
  2411. // - store diacritics from right to left.
  2412. //
  2413. while ((pDW <= pPosDW) && (*pDW <= MIN_DW))
  2414. {
  2415. pDW++;
  2416. }
  2417. if ((cbDest - PosCtr) <= (pPosDW - pDW + 1))
  2418. {
  2419. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2420. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2421. return (0);
  2422. }
  2423. while (pPosDW >= pDW)
  2424. {
  2425. pDest[PosCtr] = *pPosDW;
  2426. PosCtr++;
  2427. pPosDW--;
  2428. }
  2429. }
  2430. else
  2431. {
  2432. //
  2433. // Regular diacritics:
  2434. // - remove diacritics from right to left.
  2435. // - store diacritics from left to right.
  2436. //
  2437. while ((pPosDW >= pDW) && (*pPosDW <= MIN_DW))
  2438. {
  2439. pPosDW--;
  2440. }
  2441. if ((cbDest - PosCtr) <= (pPosDW - pDW + 1))
  2442. {
  2443. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2444. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2445. return (0);
  2446. }
  2447. while (pDW <= pPosDW)
  2448. {
  2449. pDest[PosCtr] = *pDW;
  2450. PosCtr++;
  2451. pDW++;
  2452. }
  2453. }
  2454. }
  2455. //
  2456. // Copy Separator to destination buffer if the destination
  2457. // buffer is large enough.
  2458. //
  2459. if (PosCtr == cbDest)
  2460. {
  2461. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2462. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2463. return (0);
  2464. }
  2465. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2466. PosCtr++;
  2467. //
  2468. // Store the Case Weights in the destination buffer.
  2469. //
  2470. // - Eliminate minimum CW.
  2471. // - Make sure destination buffer is large enough.
  2472. // - Copy case weights to destination buffer.
  2473. //
  2474. if ((dwFlags & NORM_DROP_CW) != NORM_DROP_CW)
  2475. {
  2476. pPosCW--;
  2477. while ((pPosCW >= pCW) && (*pPosCW <= MIN_CW))
  2478. {
  2479. pPosCW--;
  2480. }
  2481. if ((cbDest - PosCtr) <= (pPosCW - pCW + 1))
  2482. {
  2483. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2484. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2485. return (0);
  2486. }
  2487. while (pCW <= pPosCW)
  2488. {
  2489. pDest[PosCtr] = *pCW;
  2490. PosCtr++;
  2491. pCW++;
  2492. }
  2493. }
  2494. //
  2495. // Copy Separator to destination buffer if the destination
  2496. // buffer is large enough.
  2497. //
  2498. if (PosCtr == cbDest)
  2499. {
  2500. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2501. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2502. return (0);
  2503. }
  2504. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2505. PosCtr++;
  2506. //
  2507. // Store the Extra Weights in the destination buffer for
  2508. // Far East Special.
  2509. //
  2510. // - Eliminate unnecessary XW.
  2511. // - Make sure destination buffer is large enough.
  2512. // - Copy extra weights to destination buffer.
  2513. //
  2514. if (pXW < pPosXW)
  2515. {
  2516. if (dwFlags & NORM_IGNORENONSPACE)
  2517. {
  2518. //
  2519. // Ignore 4W and 5W. Must count separators for
  2520. // 4W and 5W, though.
  2521. //
  2522. if ((cbDest - PosCtr) <= 2)
  2523. {
  2524. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2525. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2526. return (0);
  2527. }
  2528. pDest[PosCtr] = pXWSeparator[0];
  2529. pDest[PosCtr + 1] = pXWSeparator[1];
  2530. PosCtr += 2;
  2531. ctr = 2;
  2532. }
  2533. else
  2534. {
  2535. ctr = 0;
  2536. }
  2537. pPosXW--;
  2538. for (; ctr < NUM_BYTES_XW; ctr++)
  2539. {
  2540. pTmp = pXW + (WeightLen * ctr);
  2541. pPosTmp = pPosXW + (WeightLen * ctr);
  2542. while ((pPosTmp >= pTmp) && (*pPosTmp == pXWDrop[ctr]))
  2543. {
  2544. pPosTmp--;
  2545. }
  2546. if ((cbDest - PosCtr) <= (pPosTmp - pTmp + 1))
  2547. {
  2548. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2549. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2550. return (0);
  2551. }
  2552. while (pTmp <= pPosTmp)
  2553. {
  2554. pDest[PosCtr] = *pTmp;
  2555. PosCtr++;
  2556. pTmp++;
  2557. }
  2558. //
  2559. // Copy Separator to destination buffer.
  2560. //
  2561. pDest[PosCtr] = pXWSeparator[ctr];
  2562. PosCtr++;
  2563. }
  2564. }
  2565. //
  2566. // Copy Separator to destination buffer if the destination
  2567. // buffer is large enough.
  2568. //
  2569. if (PosCtr == cbDest)
  2570. {
  2571. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2572. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2573. return (0);
  2574. }
  2575. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2576. PosCtr++;
  2577. //
  2578. // Store the Special Weights in the destination buffer.
  2579. //
  2580. // - Make sure destination buffer is large enough.
  2581. // - Copy special weights to destination buffer.
  2582. //
  2583. if (!fIgnoreSymbols)
  2584. {
  2585. if ((cbDest - PosCtr) <= (((LPBYTE)pPosSW - (LPBYTE)pSW)))
  2586. {
  2587. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2588. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2589. return (0);
  2590. }
  2591. pTmp = (LPBYTE)pSW;
  2592. while (pTmp < (LPBYTE)pPosSW)
  2593. {
  2594. pDest[PosCtr] = *pTmp;
  2595. pDest[PosCtr + 1] = *(pTmp + 1);
  2596. //
  2597. // NOTE: Special Weight is stored in the data file as
  2598. // Weight, Script
  2599. // so that the WORD value will be read correctly.
  2600. //
  2601. pDest[PosCtr + 2] = *(pTmp + 3);
  2602. pDest[PosCtr + 3] = *(pTmp + 2);
  2603. PosCtr += 4;
  2604. pTmp += 4;
  2605. }
  2606. }
  2607. //
  2608. // Copy Terminator to destination buffer if the destination
  2609. // buffer is large enough.
  2610. //
  2611. if (PosCtr == cbDest)
  2612. {
  2613. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2614. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2615. return (0);
  2616. }
  2617. pDest[PosCtr] = SORTKEY_TERMINATOR;
  2618. PosCtr++;
  2619. }
  2620. //
  2621. // Free the buffer used for the weights, if one was allocated.
  2622. //
  2623. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2624. //
  2625. // Return number of BYTES written to destination buffer.
  2626. //
  2627. return (PosCtr);
  2628. }
  2629. ////////////////////////////////////////////////////////////////////////////
  2630. //
  2631. // MapNormalization
  2632. //
  2633. // Stores the result of the normalization for the given string in the
  2634. // destination buffer, and returns the number of wide characters written
  2635. // to the buffer.
  2636. //
  2637. // 11-04-92 JulieB Created.
  2638. ////////////////////////////////////////////////////////////////////////////
  2639. int MapNormalization(
  2640. PLOC_HASH pHashN,
  2641. DWORD dwFlags,
  2642. LPCWSTR pSrc,
  2643. int cchSrc,
  2644. LPWSTR pDest,
  2645. int cchDest)
  2646. {
  2647. int ctr; // source char counter
  2648. int ctr2 = 0; // destination char counter
  2649. //
  2650. // Make sure the ctype table is available in the system.
  2651. //
  2652. if (GetCTypeFileInfo())
  2653. {
  2654. SetLastError(ERROR_FILE_NOT_FOUND);
  2655. return (0);
  2656. }
  2657. //
  2658. // Make sure the sorting information is available in the system.
  2659. //
  2660. if ((pHashN->pSortkey == NULL) ||
  2661. (pHashN->IfIdeographFailure == TRUE))
  2662. {
  2663. KdPrint(("NLSAPI: Appropriate Sorting Tables Not Loaded.\n"));
  2664. SetLastError(ERROR_FILE_NOT_FOUND);
  2665. return (0);
  2666. }
  2667. //
  2668. // Normalize based on the flags.
  2669. //
  2670. switch (dwFlags)
  2671. {
  2672. case ( NORM_IGNORENONSPACE ) :
  2673. {
  2674. //
  2675. // If the destination value is zero, then only return
  2676. // the count of characters. Do NOT touch pDest.
  2677. //
  2678. if (cchDest == 0)
  2679. {
  2680. //
  2681. // Count the number of characters that would be written
  2682. // to the destination buffer.
  2683. //
  2684. for (ctr = 0, ctr2 = 0; ctr < cchSrc; ctr++)
  2685. {
  2686. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2687. {
  2688. //
  2689. // Not a nonspacing character, so just write the
  2690. // character to the destination string.
  2691. //
  2692. ctr2++;
  2693. }
  2694. else if (!(IS_NONSPACE_ONLY(pHashN->pSortkey, pSrc[ctr])))
  2695. {
  2696. //
  2697. // PreComposed Form. Write the base character only.
  2698. //
  2699. ctr2++;
  2700. }
  2701. //
  2702. // Else - nonspace character only, so don't write
  2703. // anything.
  2704. //
  2705. }
  2706. }
  2707. else
  2708. {
  2709. //
  2710. // Store the normalized string in the destination string.
  2711. //
  2712. for (ctr = 0, ctr2 = 0; (ctr < cchSrc) && (ctr2 < cchDest);
  2713. ctr++)
  2714. {
  2715. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2716. {
  2717. //
  2718. // Not a nonspacing character, so just write the
  2719. // character to the destination string.
  2720. //
  2721. pDest[ctr2] = pSrc[ctr];
  2722. ctr2++;
  2723. }
  2724. else if (!(IS_NONSPACE_ONLY(pHashN->pSortkey, pSrc[ctr])))
  2725. {
  2726. //
  2727. // PreComposed Form. Write the base character only.
  2728. //
  2729. GET_BASE_CHAR(pSrc[ctr], pDest[ctr2]);
  2730. if (pDest[ctr2] == 0)
  2731. {
  2732. //
  2733. // No translation for precomposed character,
  2734. // so must write the precomposed character.
  2735. //
  2736. pDest[ctr2] = pSrc[ctr];
  2737. }
  2738. ctr2++;
  2739. }
  2740. //
  2741. // Else - nonspace character only, so don't write
  2742. // anything.
  2743. //
  2744. }
  2745. }
  2746. break;
  2747. }
  2748. case ( NORM_IGNORESYMBOLS ) :
  2749. {
  2750. //
  2751. // If the destination value is zero, then only return
  2752. // the count of characters. Do NOT touch pDest.
  2753. //
  2754. if (cchDest == 0)
  2755. {
  2756. //
  2757. // Count the number of characters that would be written
  2758. // to the destination buffer.
  2759. //
  2760. for (ctr = 0, ctr2 = 0; ctr < cchSrc; ctr++)
  2761. {
  2762. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2763. {
  2764. //
  2765. // Not a symbol, so write the character.
  2766. //
  2767. ctr2++;
  2768. }
  2769. }
  2770. }
  2771. else
  2772. {
  2773. //
  2774. // Store the normalized string in the destination string.
  2775. //
  2776. for (ctr = 0, ctr2 = 0; (ctr < cchSrc) && (ctr2 < cchDest);
  2777. ctr++)
  2778. {
  2779. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2780. {
  2781. //
  2782. // Not a symbol, so write the character.
  2783. //
  2784. pDest[ctr2] = pSrc[ctr];
  2785. ctr2++;
  2786. }
  2787. }
  2788. }
  2789. break;
  2790. }
  2791. case ( NORM_IGNORENONSPACE | NORM_IGNORESYMBOLS ) :
  2792. {
  2793. //
  2794. // If the destination value is zero, then only return
  2795. // the count of characters. Do NOT touch pDest.
  2796. //
  2797. if (cchDest == 0)
  2798. {
  2799. //
  2800. // Count the number of characters that would be written
  2801. // to the destination buffer.
  2802. //
  2803. for (ctr = 0, ctr2 = 0; ctr < cchSrc; ctr++)
  2804. {
  2805. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2806. {
  2807. //
  2808. // Not a symbol, so check for nonspace.
  2809. //
  2810. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2811. {
  2812. //
  2813. // Not a nonspacing character, so just write the
  2814. // character to the destination string.
  2815. //
  2816. ctr2++;
  2817. }
  2818. else if (!(IS_NONSPACE_ONLY( pHashN->pSortkey,
  2819. pSrc[ctr] )))
  2820. {
  2821. //
  2822. // PreComposed Form. Write the base character
  2823. // only.
  2824. //
  2825. ctr2++;
  2826. }
  2827. //
  2828. // Else - nonspace character only, so don't write
  2829. // anything.
  2830. //
  2831. }
  2832. }
  2833. }
  2834. else
  2835. {
  2836. //
  2837. // Store the normalized string in the destination string.
  2838. //
  2839. for (ctr = 0, ctr2 = 0; (ctr < cchSrc) && (ctr2 < cchDest);
  2840. ctr++)
  2841. {
  2842. //
  2843. // Check for symbol and nonspace.
  2844. //
  2845. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2846. {
  2847. //
  2848. // Not a symbol, so check for nonspace.
  2849. //
  2850. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2851. {
  2852. //
  2853. // Not a nonspacing character, so just write the
  2854. // character to the destination string.
  2855. //
  2856. pDest[ctr2] = pSrc[ctr];
  2857. ctr2++;
  2858. }
  2859. else if (!(IS_NONSPACE_ONLY( pHashN->pSortkey,
  2860. pSrc[ctr] )))
  2861. {
  2862. //
  2863. // PreComposed Form. Write the base character
  2864. // only.
  2865. //
  2866. GET_BASE_CHAR(pSrc[ctr], pDest[ctr2]);
  2867. if (pDest[ctr2] == 0)
  2868. {
  2869. //
  2870. // No translation for precomposed character,
  2871. // so must write the precomposed character.
  2872. //
  2873. pDest[ctr2] = pSrc[ctr];
  2874. }
  2875. ctr2++;
  2876. }
  2877. //
  2878. // Else - nonspace character only, so don't write
  2879. // anything.
  2880. //
  2881. }
  2882. }
  2883. }
  2884. break;
  2885. }
  2886. }
  2887. //
  2888. // Return the number of wide characters written.
  2889. //
  2890. return (ctr2);
  2891. }
  2892. ////////////////////////////////////////////////////////////////////////////
  2893. //
  2894. // MapKanaWidth
  2895. //
  2896. // Stores the result of the Kana, Width, and/or Casing mappings for the
  2897. // given string in the destination buffer, and returns the number of wide
  2898. // characters written to the buffer.
  2899. //
  2900. // 07-26-93 JulieB Created.
  2901. ////////////////////////////////////////////////////////////////////////////
  2902. int MapKanaWidth(
  2903. PLOC_HASH pHashN,
  2904. DWORD dwFlags,
  2905. LPCWSTR pSrc,
  2906. int cchSrc,
  2907. LPWSTR pDest,
  2908. int cchDest)
  2909. {
  2910. int ctr; // loop counter
  2911. PCASE pCase; // ptr to case table (if case flag is set)
  2912. //
  2913. // See if lower or upper case flags are present.
  2914. //
  2915. if (dwFlags & LCMAP_LOWERCASE)
  2916. {
  2917. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  2918. ? pHashN->pLowerLinguist
  2919. : pHashN->pLowerCase;
  2920. }
  2921. else if (dwFlags & LCMAP_UPPERCASE)
  2922. {
  2923. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  2924. ? pHashN->pUpperLinguist
  2925. : pHashN->pUpperCase;
  2926. }
  2927. else
  2928. {
  2929. pCase = NULL;
  2930. }
  2931. //
  2932. // Remove lower, upper, and linguistic casing flags.
  2933. //
  2934. dwFlags &= ~(LCMAP_LOWERCASE | LCMAP_UPPERCASE | LCMAP_LINGUISTIC_CASING);
  2935. //
  2936. // Map the string based on the given flags.
  2937. //
  2938. switch (dwFlags)
  2939. {
  2940. case ( LCMAP_HIRAGANA ) :
  2941. case ( LCMAP_KATAKANA ) :
  2942. {
  2943. //
  2944. // If the destination value is zero, then just return the
  2945. // length of the source string. Do NOT touch pDest.
  2946. //
  2947. if (cchDest == 0)
  2948. {
  2949. return (cchSrc);
  2950. }
  2951. //
  2952. // If cchSrc is greater than cchDest, then the destination
  2953. // buffer is too small to hold the string. Return an error.
  2954. //
  2955. if (cchSrc > cchDest)
  2956. {
  2957. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2958. return (0);
  2959. }
  2960. if (dwFlags == LCMAP_HIRAGANA)
  2961. {
  2962. //
  2963. // Map all Katakana full width to Hiragana full width.
  2964. // Katakana half width will remain Katakana half width.
  2965. //
  2966. if (pCase)
  2967. {
  2968. for (ctr = 0; ctr < cchSrc; ctr++)
  2969. {
  2970. pDest[ctr] = GET_KANA(pTblPtrs->pHiragana, pSrc[ctr]);
  2971. pDest[ctr] = GET_LOWER_UPPER_CASE(pCase, pDest[ctr]);
  2972. }
  2973. }
  2974. else
  2975. {
  2976. for (ctr = 0; ctr < cchSrc; ctr++)
  2977. {
  2978. pDest[ctr] = GET_KANA(pTblPtrs->pHiragana, pSrc[ctr]);
  2979. }
  2980. }
  2981. }
  2982. else
  2983. {
  2984. //
  2985. // Map all Hiragana full width to Katakana full width.
  2986. // Hiragana half width does not exist.
  2987. //
  2988. if (pCase)
  2989. {
  2990. for (ctr = 0; ctr < cchSrc; ctr++)
  2991. {
  2992. pDest[ctr] = GET_KANA(pTblPtrs->pKatakana, pSrc[ctr]);
  2993. pDest[ctr] = GET_LOWER_UPPER_CASE(pCase, pDest[ctr]);
  2994. }
  2995. }
  2996. else
  2997. {
  2998. for (ctr = 0; ctr < cchSrc; ctr++)
  2999. {
  3000. pDest[ctr] = GET_KANA(pTblPtrs->pKatakana, pSrc[ctr]);
  3001. }
  3002. }
  3003. }
  3004. //
  3005. // Return the number of characters mapped.
  3006. //
  3007. return (cchSrc);
  3008. break;
  3009. }
  3010. case ( LCMAP_HALFWIDTH ) :
  3011. {
  3012. //
  3013. // Map all chars to half width.
  3014. //
  3015. return (MapHalfKana( pSrc,
  3016. cchSrc,
  3017. pDest,
  3018. cchDest,
  3019. NULL,
  3020. pCase ));
  3021. break;
  3022. }
  3023. case ( LCMAP_FULLWIDTH ) :
  3024. {
  3025. //
  3026. // Map all chars to full width.
  3027. //
  3028. return (MapFullKana( pSrc,
  3029. cchSrc,
  3030. pDest,
  3031. cchDest,
  3032. NULL,
  3033. pCase ));
  3034. break;
  3035. }
  3036. case ( LCMAP_HIRAGANA | LCMAP_HALFWIDTH ) :
  3037. {
  3038. //
  3039. // This combination of flags is strange, because
  3040. // Hiragana is only full width. So, the Hiragana flag
  3041. // is the most important. Full width Katakana will be
  3042. // mapped to full width Hiragana, not half width
  3043. // Katakana.
  3044. //
  3045. // Map to Hiragana, then Half Width.
  3046. //
  3047. return (MapHalfKana( pSrc,
  3048. cchSrc,
  3049. pDest,
  3050. cchDest,
  3051. pTblPtrs->pHiragana,
  3052. pCase ));
  3053. break;
  3054. }
  3055. case ( LCMAP_HIRAGANA | LCMAP_FULLWIDTH ) :
  3056. {
  3057. //
  3058. // Since Hiragana is only FULL width, the mapping to
  3059. // width must be done first to convert all half width
  3060. // Katakana to full width Katakana before trying to
  3061. // map to Hiragana.
  3062. //
  3063. // Map to Full Width, then Hiragana.
  3064. //
  3065. return (MapFullKana( pSrc,
  3066. cchSrc,
  3067. pDest,
  3068. cchDest,
  3069. pTblPtrs->pHiragana,
  3070. pCase ));
  3071. break;
  3072. }
  3073. case ( LCMAP_KATAKANA | LCMAP_HALFWIDTH ) :
  3074. {
  3075. //
  3076. // Since Hiragana is only FULL width, the mapping to
  3077. // Katakana must be done first to convert all Hiragana
  3078. // to Katakana before trying to map to half width.
  3079. //
  3080. // Map to Katakana, then Half Width.
  3081. //
  3082. return (MapHalfKana( pSrc,
  3083. cchSrc,
  3084. pDest,
  3085. cchDest,
  3086. pTblPtrs->pKatakana,
  3087. pCase ));
  3088. break;
  3089. }
  3090. case ( LCMAP_KATAKANA | LCMAP_FULLWIDTH ) :
  3091. {
  3092. //
  3093. // Since Hiragana is only FULL width, it doesn't matter
  3094. // which way the mapping is done for this combination.
  3095. //
  3096. // Map to Full Width, then Katakana.
  3097. //
  3098. return (MapFullKana( pSrc,
  3099. cchSrc,
  3100. pDest,
  3101. cchDest,
  3102. pTblPtrs->pKatakana,
  3103. pCase ));
  3104. break;
  3105. }
  3106. default :
  3107. {
  3108. //
  3109. // Return error.
  3110. //
  3111. return (0);
  3112. }
  3113. }
  3114. }
  3115. ////////////////////////////////////////////////////////////////////////////
  3116. //
  3117. // MapHalfKana
  3118. //
  3119. // Stores the result of the half width and Kana mapping for the given string
  3120. // in the destination buffer, and returns the number of wide characters
  3121. // written to the buffer.
  3122. //
  3123. // This first converts the precomposed characters to their composite forms,
  3124. // and then maps all characters to their half width forms. This handles the
  3125. // case where the full width precomposed form should map to TWO half width
  3126. // code points (composite form). The half width precomposed forms do not
  3127. // exist in Unicode.
  3128. //
  3129. // 11-04-93 JulieB Created.
  3130. ////////////////////////////////////////////////////////////////////////////
  3131. int MapHalfKana(
  3132. LPCWSTR pSrc,
  3133. int cchSrc,
  3134. LPWSTR pDest,
  3135. int cchDest,
  3136. PKANA pKana,
  3137. PCASE pCase)
  3138. {
  3139. int Count; // count of characters written
  3140. int ctr = 0; // loop counter
  3141. int ct; // loop counter
  3142. LPWSTR pBuf; // ptr to destination buffer
  3143. LPWSTR pEndBuf; // ptr to end of destination buffer
  3144. LPWSTR pPosDest; // ptr to position in destination buffer
  3145. LPWSTR *ppIncr; // points to ptr to increment
  3146. WCHAR pTmp[MAX_COMPOSITE]; // ptr to temporary buffer
  3147. LPWSTR pEndTmp; // ptr to end of temporary buffer
  3148. //
  3149. // Initialize the destination pointers.
  3150. //
  3151. pEndTmp = pTmp + MAX_COMPOSITE;
  3152. if (cchDest == 0)
  3153. {
  3154. //
  3155. // Do not touch the pDest pointer. Use the pTmp buffer and
  3156. // initialize the end pointer.
  3157. //
  3158. pBuf = pTmp;
  3159. pEndBuf = pEndTmp;
  3160. //
  3161. // This is a bogus pointer and will never be touched. It just
  3162. // increments this pointer into oblivion.
  3163. //
  3164. pDest = pBuf;
  3165. ppIncr = &pDest;
  3166. }
  3167. else
  3168. {
  3169. //
  3170. // Initialize the pointers. Use the pDest buffer.
  3171. //
  3172. pBuf = pDest;
  3173. pEndBuf = pBuf + cchDest;
  3174. ppIncr = &pBuf;
  3175. }
  3176. //
  3177. // Search through the source string. Convert all precomposed
  3178. // forms to their composite form before converting to half width.
  3179. //
  3180. while ((ctr < cchSrc) && (pBuf < pEndBuf))
  3181. {
  3182. //
  3183. // Get the character to convert. If we need to convert to
  3184. // kana, do it.
  3185. //
  3186. if (pKana)
  3187. {
  3188. *pTmp = GET_KANA(pKana, pSrc[ctr]);
  3189. }
  3190. else
  3191. {
  3192. *pTmp = pSrc[ctr];
  3193. }
  3194. //
  3195. // Convert to its composite form (if exists).
  3196. //
  3197. // NOTE: Must use the tmp buffer in case the destination buffer
  3198. // isn't large enough to hold the composite form.
  3199. //
  3200. Count = InsertCompositeForm(pTmp, pEndTmp);
  3201. //
  3202. // Convert to half width (if exists) and case (if appropriate).
  3203. //
  3204. pPosDest = pTmp;
  3205. if (pCase)
  3206. {
  3207. for (ct = Count; ct > 0; ct--)
  3208. {
  3209. *pPosDest = GET_HALF_WIDTH(pTblPtrs->pHalfWidth, *pPosDest);
  3210. *pPosDest = GET_LOWER_UPPER_CASE(pCase, *pPosDest);
  3211. pPosDest++;
  3212. }
  3213. }
  3214. else
  3215. {
  3216. for (ct = Count; ct > 0; ct--)
  3217. {
  3218. *pPosDest = GET_HALF_WIDTH(pTblPtrs->pHalfWidth, *pPosDest);
  3219. pPosDest++;
  3220. }
  3221. }
  3222. //
  3223. // Convert back to its precomposed form (if exists).
  3224. //
  3225. if (Count > 1)
  3226. {
  3227. //
  3228. // Get the precomposed form.
  3229. //
  3230. // ct is the number of code points used from the
  3231. // composite form.
  3232. //
  3233. ct = InsertPreComposedForm(pTmp, pPosDest, pBuf);
  3234. if (ct > 1)
  3235. {
  3236. //
  3237. // Precomposed form was found. Need to make sure all
  3238. // of the composite chars were used.
  3239. //
  3240. if (ct == Count)
  3241. {
  3242. //
  3243. // All composite chars were used. Increment by 1.
  3244. //
  3245. (*ppIncr)++;
  3246. }
  3247. else
  3248. {
  3249. //
  3250. // Not all composite chars were used. Need to copy
  3251. // the rest of the composite chars from the tmp buffer
  3252. // to the destination buffer.
  3253. //
  3254. (*ppIncr)++;
  3255. Count -= ct;
  3256. if (pBuf + Count > pEndBuf)
  3257. {
  3258. break;
  3259. }
  3260. RtlMoveMemory(pBuf, pTmp + ct, Count * sizeof(WCHAR));
  3261. (*ppIncr) += Count;
  3262. }
  3263. }
  3264. else
  3265. {
  3266. //
  3267. // Precomposed form was NOT found. Need to copy the
  3268. // composite form from the tmp buffer to the destination
  3269. // buffer.
  3270. //
  3271. if (pBuf + Count > pEndBuf)
  3272. {
  3273. break;
  3274. }
  3275. RtlMoveMemory(pBuf, pTmp, Count * sizeof(WCHAR));
  3276. (*ppIncr) += Count;
  3277. }
  3278. }
  3279. else
  3280. {
  3281. //
  3282. // Only one character (no composite form), so just copy it
  3283. // from the tmp buffer to the destination buffer.
  3284. //
  3285. *pBuf = *pTmp;
  3286. (*ppIncr)++;
  3287. }
  3288. ctr++;
  3289. }
  3290. //
  3291. // Return the appropriate number of characters.
  3292. //
  3293. if (cchDest == 0)
  3294. {
  3295. //
  3296. // Return the number of characters written to the buffer.
  3297. //
  3298. return ((int)((*ppIncr) - pTmp));
  3299. }
  3300. else
  3301. {
  3302. //
  3303. // Make sure the given buffer was large enough to hold the
  3304. // mapping.
  3305. //
  3306. if (ctr < cchSrc)
  3307. {
  3308. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  3309. return (0);
  3310. }
  3311. //
  3312. // Return the number of characters written to the buffer.
  3313. //
  3314. return ((int)((*ppIncr) - pDest));
  3315. }
  3316. }
  3317. ////////////////////////////////////////////////////////////////////////////
  3318. //
  3319. // MapFullKana
  3320. //
  3321. // Stores the result of the full width and Kana mapping for the given string
  3322. // in the destination buffer, and returns the number of wide characters
  3323. // written to the buffer.
  3324. //
  3325. // This first converts the characters to full width, and then maps all
  3326. // composite characters to their precomposed forms. This handles the case
  3327. // where the half width composite form (TWO code points) should map to a
  3328. // full width precomposed form (ONE full width code point). The half
  3329. // width precomposed forms do not exist in Unicode and we need the full
  3330. // width precomposed forms to round trip with the TWO half width code
  3331. // points.
  3332. //
  3333. // 11-04-93 JulieB Created.
  3334. ////////////////////////////////////////////////////////////////////////////
  3335. int MapFullKana(
  3336. LPCWSTR pSrc,
  3337. int cchSrc,
  3338. LPWSTR pDest,
  3339. int cchDest,
  3340. PKANA pKana,
  3341. PCASE pCase)
  3342. {
  3343. int Count; // count of characters
  3344. LPWSTR pPosSrc; // ptr to position in source buffer
  3345. LPWSTR pEndSrc; // ptr to end of source buffer
  3346. LPWSTR pBuf; // ptr to destination buffer
  3347. LPWSTR pEndBuf; // ptr to end of destination buffer
  3348. LPWSTR *ppIncr; // points to ptr to increment
  3349. WCHAR pTmp[MAX_COMPOSITE]; // ptr to temporary buffer
  3350. //
  3351. // Initialize source string pointers.
  3352. //
  3353. pPosSrc = (LPWSTR)pSrc;
  3354. pEndSrc = pPosSrc + cchSrc;
  3355. //
  3356. // Initialize the destination pointers.
  3357. //
  3358. if (cchDest == 0)
  3359. {
  3360. //
  3361. // Do not touch the pDest pointer. Use the pTmp buffer and
  3362. // initialize the end pointer.
  3363. //
  3364. pBuf = pTmp;
  3365. pEndBuf = pTmp + MAX_COMPOSITE;
  3366. //
  3367. // This is a bogus pointer and will never be touched. It just
  3368. // increments this pointer into oblivion.
  3369. //
  3370. pDest = pBuf;
  3371. ppIncr = &pDest;
  3372. }
  3373. else
  3374. {
  3375. //
  3376. // Initialize the pointers. Use the pDest buffer.
  3377. //
  3378. pBuf = pDest;
  3379. pEndBuf = pBuf + cchDest;
  3380. ppIncr = &pBuf;
  3381. }
  3382. //
  3383. // Search through the source string. Convert all composite
  3384. // forms to their precomposed form before converting to full width.
  3385. //
  3386. while ((pPosSrc < pEndSrc) && (pBuf < pEndBuf))
  3387. {
  3388. //
  3389. // Convert a composite form to its full width precomposed
  3390. // form (if exists). Also, convert to case if necessary.
  3391. //
  3392. Count = InsertFullWidthPreComposedForm( pPosSrc,
  3393. pEndSrc,
  3394. pBuf,
  3395. pCase );
  3396. pPosSrc += Count;
  3397. //
  3398. // Convert to kana if necessary.
  3399. //
  3400. if (pKana)
  3401. {
  3402. *pBuf = GET_KANA(pKana, *pBuf);
  3403. }
  3404. //
  3405. // Increment the destination pointer.
  3406. //
  3407. (*ppIncr)++;
  3408. }
  3409. //
  3410. // Return the appropriate number of characters.
  3411. //
  3412. if (cchDest == 0)
  3413. {
  3414. //
  3415. // Return the number of characters written to the buffer.
  3416. //
  3417. return ((int)((*ppIncr) - pTmp));
  3418. }
  3419. else
  3420. {
  3421. //
  3422. // Make sure the given buffer was large enough to hold the
  3423. // mapping.
  3424. //
  3425. if (pPosSrc < pEndSrc)
  3426. {
  3427. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  3428. return (0);
  3429. }
  3430. //
  3431. // Return the number of characters written to the buffer.
  3432. //
  3433. return ((int)((*ppIncr) - pDest));
  3434. }
  3435. }
  3436. ////////////////////////////////////////////////////////////////////////////
  3437. //
  3438. // MapTraditionalSimplified
  3439. //
  3440. // Stores the appropriate Traditional or Simplified Chinese values in the
  3441. // destination buffer, and returns the number of wide characters
  3442. // written to the buffer.
  3443. //
  3444. // 05-07-96 JulieB Created.
  3445. ////////////////////////////////////////////////////////////////////////////
  3446. int MapTraditionalSimplified(
  3447. PLOC_HASH pHashN,
  3448. DWORD dwFlags,
  3449. LPCWSTR pSrc,
  3450. int cchSrc,
  3451. LPWSTR pDest,
  3452. int cchDest,
  3453. PCHINESE pChinese)
  3454. {
  3455. int ctr; // loop counter
  3456. PCASE pCase; // ptr to case table (if case flag is set)
  3457. //
  3458. // If the destination value is zero, then just return the
  3459. // length of the source string. Do NOT touch pDest.
  3460. //
  3461. if (cchDest == 0)
  3462. {
  3463. return (cchSrc);
  3464. }
  3465. //
  3466. // If cchSrc is greater than cchDest, then the destination buffer
  3467. // is too small to hold the new string. Return an error.
  3468. //
  3469. if (cchSrc > cchDest)
  3470. {
  3471. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  3472. return (0);
  3473. }
  3474. //
  3475. // See if lower or upper case flags are present.
  3476. //
  3477. if (dwFlags & LCMAP_LOWERCASE)
  3478. {
  3479. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  3480. ? pHashN->pLowerLinguist
  3481. : pHashN->pLowerCase;
  3482. }
  3483. else if (dwFlags & LCMAP_UPPERCASE)
  3484. {
  3485. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  3486. ? pHashN->pUpperLinguist
  3487. : pHashN->pUpperCase;
  3488. }
  3489. else
  3490. {
  3491. pCase = NULL;
  3492. }
  3493. //
  3494. // Map to Traditional/Simplified and store it in the destination string.
  3495. // Also map the case, if appropriate.
  3496. //
  3497. if (pCase)
  3498. {
  3499. for (ctr = 0; ctr < cchSrc; ctr++)
  3500. {
  3501. pDest[ctr] = GET_CHINESE(pChinese, pSrc[ctr]);
  3502. pDest[ctr] = GET_LOWER_UPPER_CASE(pCase, pDest[ctr]);
  3503. }
  3504. }
  3505. else
  3506. {
  3507. for (ctr = 0; ctr < cchSrc; ctr++)
  3508. {
  3509. pDest[ctr] = GET_CHINESE(pChinese, pSrc[ctr]);
  3510. }
  3511. }
  3512. //
  3513. // Return the number of wide characters written.
  3514. //
  3515. return (ctr);
  3516. }