Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3436 lines
116 KiB

4 years ago
  1. /*++
  2. Copyright (c) 1991-1996, Microsoft Corporation All rights reserved.
  3. Module Name:
  4. map.c
  5. Abstract:
  6. This file contains functions that deal with map tables.
  7. APIs found in this file:
  8. FoldStringW
  9. LCMapStringW
  10. Revision History:
  11. 05-31-91 JulieB Created.
  12. --*/
  13. //
  14. // Include Files.
  15. //
  16. #include "nls.h"
  17. //
  18. // Constant Declarations.
  19. //
  20. //
  21. // Invalid weight value.
  22. //
  23. #define MAP_INVALID_UW 0xffff
  24. //
  25. // Number of bytes in each weight.
  26. //
  27. #define NUM_BYTES_UW 2
  28. #define NUM_BYTES_DW 1
  29. #define NUM_BYTES_CW 1
  30. #define NUM_BYTES_XW 4
  31. #define NUM_BYTES_SW 4
  32. //
  33. // Flags to drop the 3rd weight (CW).
  34. //
  35. #define NORM_DROP_CW (NORM_IGNORECASE | NORM_IGNOREWIDTH)
  36. //
  37. // XW Values.
  38. //
  39. BYTE pXWDrop[] = // values to drop from XW
  40. {
  41. 0xc6, // weight 4
  42. 0x03, // weight 5
  43. 0xe4, // weight 6
  44. 0xc5 // weight 7
  45. };
  46. BYTE pXWSeparator[] = // separator values for XW
  47. {
  48. 0xff, // weight 4
  49. 0x02, // weight 5
  50. 0xff, // weight 6
  51. 0xff // weight 7
  52. };
  53. //
  54. // Forward Declarations.
  55. //
  56. int
  57. FoldCZone(
  58. LPCWSTR pSrc,
  59. int cchSrc,
  60. LPWSTR pDest,
  61. int cchDest);
  62. int
  63. FoldDigits(
  64. LPCWSTR pSrc,
  65. int cchSrc,
  66. LPWSTR pDest,
  67. int cchDest);
  68. int
  69. FoldPreComposed(
  70. LPCWSTR pSrc,
  71. int cchSrc,
  72. LPWSTR pDest,
  73. int cchDest);
  74. int
  75. FoldComposite(
  76. LPCWSTR pSrc,
  77. int cchSrc,
  78. LPWSTR pDest,
  79. int cchDest);
  80. int
  81. FoldCZone_Digits(
  82. LPCWSTR pSrc,
  83. int cchSrc,
  84. LPWSTR pDest,
  85. int cchDest);
  86. int
  87. MapCase(
  88. PLOC_HASH pHashN,
  89. LPCWSTR pSrc,
  90. int cchSrc,
  91. LPWSTR pDest,
  92. int cchDest,
  93. PCASE pCaseTbl);
  94. int
  95. MapSortKey(
  96. PLOC_HASH pHashN,
  97. DWORD dwFlags,
  98. LPCWSTR pSrc,
  99. int cchSrc,
  100. LPBYTE pDest,
  101. int cchDest);
  102. int
  103. MapNormalization(
  104. PLOC_HASH pHashN,
  105. DWORD dwFlags,
  106. LPCWSTR pSrc,
  107. int cchSrc,
  108. LPWSTR pDest,
  109. int cchDest);
  110. int
  111. MapKanaWidth(
  112. PLOC_HASH pHashN,
  113. DWORD dwFlags,
  114. LPCWSTR pSrc,
  115. int cchSrc,
  116. LPWSTR pDest,
  117. int cchDest);
  118. int
  119. MapHalfKana(
  120. LPCWSTR pSrc,
  121. int cchSrc,
  122. LPWSTR pDest,
  123. int cchDest,
  124. PKANA pKana,
  125. PCASE pCase);
  126. int
  127. MapFullKana(
  128. LPCWSTR pSrc,
  129. int cchSrc,
  130. LPWSTR pDest,
  131. int cchDest,
  132. PKANA pKana,
  133. PCASE pCase);
  134. int
  135. MapTraditionalSimplified(
  136. PLOC_HASH pHashN,
  137. DWORD dwFlags,
  138. LPCWSTR pSrc,
  139. int cchSrc,
  140. LPWSTR pDest,
  141. int cchDest,
  142. PCHINESE pChinese);
  143. //-------------------------------------------------------------------------//
  144. // API ROUTINES //
  145. //-------------------------------------------------------------------------//
  146. ////////////////////////////////////////////////////////////////////////////
  147. //
  148. // FoldStringW
  149. //
  150. // Maps one wide character string to another performing the specified
  151. // translation. This mapping routine only takes flags that are locale
  152. // independent.
  153. //
  154. // 05-31-91 JulieB Created.
  155. ////////////////////////////////////////////////////////////////////////////
  156. int WINAPI FoldStringW(
  157. DWORD dwMapFlags,
  158. LPCWSTR lpSrcStr,
  159. int cchSrc,
  160. LPWSTR lpDestStr,
  161. int cchDest)
  162. {
  163. int Count = 0; // word count
  164. //
  165. // Invalid Parameter Check:
  166. // - length of src string is 0
  167. // - either buffer size is negative (except cchSrc == -1)
  168. // - src string is NULL
  169. // - length of dest string is NOT zero AND dest string is NULL
  170. // - same buffer - src = destination
  171. //
  172. // - flags are checked in switch statement below
  173. //
  174. if ((cchSrc == 0) || (cchDest < 0) ||
  175. (lpSrcStr == NULL) ||
  176. ((cchDest != 0) && (lpDestStr == NULL)) ||
  177. (lpSrcStr == lpDestStr))
  178. {
  179. SetLastError(ERROR_INVALID_PARAMETER);
  180. return (0);
  181. }
  182. //
  183. // If cchSrc is -1, then the source string is null terminated and we
  184. // need to get the length of the source string. Add one to the
  185. // length to include the null termination.
  186. // (This will always be at least 1.)
  187. //
  188. if (cchSrc <= -1)
  189. {
  190. cchSrc = NlsStrLenW(lpSrcStr) + 1;
  191. }
  192. //
  193. // Map the string based on the given flags.
  194. //
  195. switch (dwMapFlags)
  196. {
  197. case ( MAP_FOLDCZONE ) :
  198. {
  199. //
  200. // Map the string to fold the Compatibility Zone.
  201. //
  202. Count = FoldCZone( lpSrcStr,
  203. cchSrc,
  204. lpDestStr,
  205. cchDest );
  206. break;
  207. }
  208. case ( MAP_FOLDDIGITS ) :
  209. {
  210. //
  211. // Map the string to fold the Ascii Digits.
  212. //
  213. Count = FoldDigits( lpSrcStr,
  214. cchSrc,
  215. lpDestStr,
  216. cchDest );
  217. break;
  218. }
  219. case ( MAP_PRECOMPOSED ) :
  220. {
  221. //
  222. // Map the string to compress all composite forms of
  223. // characters to their precomposed form.
  224. //
  225. Count = FoldPreComposed( lpSrcStr,
  226. cchSrc,
  227. lpDestStr,
  228. cchDest );
  229. break;
  230. }
  231. case ( MAP_COMPOSITE ) :
  232. {
  233. //
  234. // Map the string to expand out all precomposed characters
  235. // to their composite form.
  236. //
  237. Count = FoldComposite( lpSrcStr,
  238. cchSrc,
  239. lpDestStr,
  240. cchDest );
  241. break;
  242. }
  243. case ( MAP_FOLDCZONE | MAP_FOLDDIGITS ) :
  244. {
  245. //
  246. // Map the string to fold the Compatibility Zone and fold the
  247. // Ascii Digits.
  248. //
  249. Count = FoldCZone_Digits( lpSrcStr,
  250. cchSrc,
  251. lpDestStr,
  252. cchDest );
  253. break;
  254. }
  255. case ( MAP_PRECOMPOSED | MAP_FOLDCZONE ) :
  256. {
  257. //
  258. // Map the string to convert to precomposed forms and to
  259. // fold the Compatibility Zone.
  260. //
  261. Count = FoldPreComposed( lpSrcStr,
  262. cchSrc,
  263. lpDestStr,
  264. cchDest );
  265. Count = FoldCZone( lpDestStr,
  266. Count,
  267. lpDestStr,
  268. cchDest );
  269. break;
  270. }
  271. case ( MAP_PRECOMPOSED | MAP_FOLDDIGITS ) :
  272. {
  273. //
  274. // Map the string to convert to precomposed forms and to
  275. // fold the Ascii Digits.
  276. //
  277. Count = FoldPreComposed( lpSrcStr,
  278. cchSrc,
  279. lpDestStr,
  280. cchDest );
  281. Count = FoldDigits( lpDestStr,
  282. Count,
  283. lpDestStr,
  284. cchDest );
  285. break;
  286. }
  287. case ( MAP_PRECOMPOSED | MAP_FOLDCZONE | MAP_FOLDDIGITS ) :
  288. {
  289. //
  290. // Map the string to convert to precomposed forms,
  291. // fold the Compatibility Zone, and fold the Ascii Digits.
  292. //
  293. Count = FoldPreComposed( lpSrcStr,
  294. cchSrc,
  295. lpDestStr,
  296. cchDest );
  297. Count = FoldCZone_Digits( lpDestStr,
  298. Count,
  299. lpDestStr,
  300. cchDest );
  301. break;
  302. }
  303. case ( MAP_COMPOSITE | MAP_FOLDCZONE ) :
  304. {
  305. //
  306. // Map the string to convert to composite forms and to
  307. // fold the Compatibility Zone.
  308. //
  309. Count = FoldComposite( lpSrcStr,
  310. cchSrc,
  311. lpDestStr,
  312. cchDest );
  313. Count = FoldCZone( lpDestStr,
  314. Count,
  315. lpDestStr,
  316. cchDest );
  317. break;
  318. }
  319. case ( MAP_COMPOSITE | MAP_FOLDDIGITS ) :
  320. {
  321. //
  322. // Map the string to convert to composite forms and to
  323. // fold the Ascii Digits.
  324. //
  325. Count = FoldComposite( lpSrcStr,
  326. cchSrc,
  327. lpDestStr,
  328. cchDest );
  329. Count = FoldDigits( lpDestStr,
  330. Count,
  331. lpDestStr,
  332. cchDest );
  333. break;
  334. }
  335. case ( MAP_COMPOSITE | MAP_FOLDCZONE | MAP_FOLDDIGITS ) :
  336. {
  337. //
  338. // Map the string to convert to composite forms,
  339. // fold the Compatibility Zone, and fold the Ascii Digits.
  340. //
  341. Count = FoldComposite( lpSrcStr,
  342. cchSrc,
  343. lpDestStr,
  344. cchDest );
  345. Count = FoldCZone_Digits( lpDestStr,
  346. Count,
  347. lpDestStr,
  348. cchDest );
  349. break;
  350. }
  351. default :
  352. {
  353. SetLastError(ERROR_INVALID_FLAGS);
  354. return (0);
  355. }
  356. }
  357. //
  358. // Return the number of characters written to the buffer.
  359. // Or, if cchDest == 0, then return the number of characters
  360. // that would have been written to the buffer.
  361. //
  362. return (Count);
  363. }
  364. ////////////////////////////////////////////////////////////////////////////
  365. //
  366. // LCMapStringW
  367. //
  368. // Maps one wide character string to another performing the specified
  369. // translation. This mapping routine only takes flags that are locale
  370. // dependent.
  371. //
  372. // 05-31-91 JulieB Created.
  373. // 07-26-93 JulieB Added new flags for NT-J.
  374. ////////////////////////////////////////////////////////////////////////////
  375. int WINAPI LCMapStringW(
  376. LCID Locale,
  377. DWORD dwMapFlags,
  378. LPCWSTR lpSrcStr,
  379. int cchSrc,
  380. LPWSTR lpDestStr,
  381. int cchDest)
  382. {
  383. PLOC_HASH pHashN; // ptr to LOC hash node
  384. int Count = 0; // word count or byte count
  385. int ctr; // loop counter
  386. //
  387. // Invalid Parameter Check:
  388. // - validate LCID
  389. // - length of src string is 0
  390. // - destination buffer size is negative
  391. // - src string is NULL
  392. // - length of dest string is NOT zero AND dest string is NULL
  393. // - same buffer - src = destination if not UPPER or LOWER only
  394. //
  395. VALIDATE_LANGUAGE(Locale, pHashN, dwMapFlags & LCMAP_LINGUISTIC_CASING);
  396. if ( (pHashN == NULL) ||
  397. (cchSrc == 0) || (cchDest < 0) || (lpSrcStr == NULL) ||
  398. ((cchDest != 0) && (lpDestStr == NULL)) ||
  399. ((lpSrcStr == lpDestStr) &&
  400. ((!(dwMapFlags & (LCMAP_UPPERCASE | LCMAP_LOWERCASE))) ||
  401. (dwMapFlags & (LCMAP_HIRAGANA | LCMAP_KATAKANA |
  402. LCMAP_HALFWIDTH | LCMAP_FULLWIDTH)))) )
  403. {
  404. SetLastError(ERROR_INVALID_PARAMETER);
  405. return (0);
  406. }
  407. //
  408. // Invalid Flags Check:
  409. // - flags other than valid ones or 0
  410. // - (any NORM_ flag) AND (any LCMAP_ flag except byterev and sortkey)
  411. // - (NORM_ flags for sortkey) AND (NOT LCMAP_SORTKEY)
  412. // - more than one of lower, upper, sortkey
  413. // - more than one of hiragana, katakana, sortkey
  414. // - more than one of half width, full width, sortkey
  415. // - more than one of traditional, simplified, sortkey
  416. // - (LINGUISTIC flag) AND (NOT LCMAP_UPPER OR LCMAP_LOWER)
  417. //
  418. dwMapFlags &= (~LOCALE_USE_CP_ACP);
  419. if ( (dwMapFlags & LCMS_INVALID_FLAG) || (dwMapFlags == 0) ||
  420. ((dwMapFlags & (NORM_ALL | SORT_STRINGSORT)) &&
  421. (dwMapFlags & LCMAP_NO_NORM)) ||
  422. ((dwMapFlags & NORM_SORTKEY_ONLY) &&
  423. (!(dwMapFlags & LCMAP_SORTKEY))) ||
  424. (MORE_THAN_ONE(dwMapFlags, LCMS1_SINGLE_FLAG)) ||
  425. (MORE_THAN_ONE(dwMapFlags, LCMS2_SINGLE_FLAG)) ||
  426. (MORE_THAN_ONE(dwMapFlags, LCMS3_SINGLE_FLAG)) ||
  427. (MORE_THAN_ONE(dwMapFlags, LCMS4_SINGLE_FLAG)) ||
  428. ((dwMapFlags & LCMAP_LINGUISTIC_CASING) &&
  429. (!(dwMapFlags & (LCMAP_UPPERCASE | LCMAP_LOWERCASE)))) )
  430. {
  431. SetLastError(ERROR_INVALID_FLAGS);
  432. return (0);
  433. }
  434. //
  435. // If cchSrc is -1, then the source string is null terminated and we
  436. // need to get the length of the source string. Add one to the
  437. // length to include the null termination.
  438. // (This will always be at least 1.)
  439. //
  440. if (cchSrc <= -1)
  441. {
  442. cchSrc = NlsStrLenW(lpSrcStr) + 1;
  443. }
  444. //
  445. // Map the string based on the given flags.
  446. //
  447. if (dwMapFlags & LCMAP_SORTKEY)
  448. {
  449. //
  450. // Map the string to its sortkey.
  451. //
  452. // NOTE: This returns the number of BYTES, instead of the
  453. // number of wide characters (words).
  454. //
  455. Count = MapSortKey( pHashN,
  456. dwMapFlags,
  457. lpSrcStr,
  458. cchSrc,
  459. (LPBYTE)lpDestStr,
  460. cchDest );
  461. }
  462. else
  463. {
  464. switch (dwMapFlags & ~(LCMAP_BYTEREV | LCMAP_LINGUISTIC_CASING))
  465. {
  466. case ( LCMAP_LOWERCASE ) :
  467. {
  468. //
  469. // Map the string to Lower Case.
  470. //
  471. Count = MapCase( pHashN,
  472. lpSrcStr,
  473. cchSrc,
  474. lpDestStr,
  475. cchDest,
  476. (dwMapFlags & LCMAP_LINGUISTIC_CASING)
  477. ? pHashN->pLowerLinguist
  478. : pHashN->pLowerCase );
  479. break;
  480. }
  481. case ( LCMAP_UPPERCASE ) :
  482. {
  483. //
  484. // Map the string to Upper Case.
  485. //
  486. Count = MapCase( pHashN,
  487. lpSrcStr,
  488. cchSrc,
  489. lpDestStr,
  490. cchDest,
  491. (dwMapFlags & LCMAP_LINGUISTIC_CASING)
  492. ? pHashN->pUpperLinguist
  493. : pHashN->pUpperCase );
  494. break;
  495. }
  496. case ( NORM_IGNORENONSPACE ) :
  497. case ( NORM_IGNORESYMBOLS ) :
  498. case ( NORM_IGNORENONSPACE | NORM_IGNORESYMBOLS ) :
  499. {
  500. //
  501. // Map the string to strip out nonspace marks and/or symbols.
  502. //
  503. Count = MapNormalization( pHashN,
  504. dwMapFlags & ~LCMAP_BYTEREV,
  505. lpSrcStr,
  506. cchSrc,
  507. lpDestStr,
  508. cchDest );
  509. break;
  510. }
  511. case ( LCMAP_TRADITIONAL_CHINESE ) :
  512. case ( LCMAP_TRADITIONAL_CHINESE | LCMAP_LOWERCASE ) :
  513. case ( LCMAP_TRADITIONAL_CHINESE | LCMAP_UPPERCASE) :
  514. {
  515. //
  516. // Map the string to Traditional Chinese.
  517. //
  518. Count = MapTraditionalSimplified( pHashN,
  519. dwMapFlags & ~LCMAP_BYTEREV,
  520. lpSrcStr,
  521. cchSrc,
  522. lpDestStr,
  523. cchDest,
  524. pTblPtrs->pTraditional );
  525. break;
  526. }
  527. case ( LCMAP_SIMPLIFIED_CHINESE ) :
  528. case ( LCMAP_SIMPLIFIED_CHINESE | LCMAP_LOWERCASE ) :
  529. case ( LCMAP_SIMPLIFIED_CHINESE | LCMAP_UPPERCASE ) :
  530. {
  531. //
  532. // Map the string to Simplified Chinese.
  533. //
  534. Count = MapTraditionalSimplified( pHashN,
  535. dwMapFlags & ~LCMAP_BYTEREV,
  536. lpSrcStr,
  537. cchSrc,
  538. lpDestStr,
  539. cchDest,
  540. pTblPtrs->pSimplified );
  541. break;
  542. }
  543. default :
  544. {
  545. //
  546. // Make sure the Chinese flags are not used with the
  547. // Japanese flags.
  548. //
  549. if (dwMapFlags &
  550. (LCMAP_TRADITIONAL_CHINESE | LCMAP_SIMPLIFIED_CHINESE))
  551. {
  552. SetLastError(ERROR_INVALID_FLAGS);
  553. return (0);
  554. }
  555. //
  556. // The only flags not yet handled are the variations
  557. // containing the Kana and/or Width flags.
  558. // This handles all variations for:
  559. // LCMAP_HIRAGANA
  560. // LCMAP_KATAKANA
  561. // LCMAP_HALFWIDTH
  562. // LCMAP_FULLWIDTH
  563. //
  564. // Allow LCMAP_LOWERCASE and LCMAP_UPPERCASE
  565. // in combination with the kana and width flags.
  566. //
  567. Count = MapKanaWidth( pHashN,
  568. dwMapFlags & ~LCMAP_BYTEREV,
  569. lpSrcStr,
  570. cchSrc,
  571. lpDestStr,
  572. cchDest );
  573. break;
  574. }
  575. }
  576. }
  577. //
  578. // Always check LCMAP_BYTEREV last and do it in place.
  579. // LCMAP_BYTEREV may be used in combination with any other flag
  580. // (except ignore case without sortkey) or by itself.
  581. //
  582. if (dwMapFlags & LCMAP_BYTEREV)
  583. {
  584. //
  585. // Reverse the bytes of each word in the string.
  586. //
  587. if (dwMapFlags == LCMAP_BYTEREV)
  588. {
  589. //
  590. // Byte Reversal flag is used by itself.
  591. //
  592. // Make sure that the size of the destination buffer is
  593. // larger than zero. If it is zero, return the size of
  594. // the source string only. Do NOT touch lpDestStr.
  595. //
  596. if (cchDest != 0)
  597. {
  598. //
  599. // Flag is used by itself. Reverse the bytes from
  600. // the source string and store them in the destination
  601. // string.
  602. //
  603. if (cchSrc > cchDest)
  604. {
  605. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  606. return (0);
  607. }
  608. for (ctr = 0; ctr < cchSrc; ctr++)
  609. {
  610. lpDestStr[ctr] = MAKEWORD( HIBYTE(lpSrcStr[ctr]),
  611. LOBYTE(lpSrcStr[ctr]) );
  612. }
  613. }
  614. //
  615. // Return the size of the source string.
  616. //
  617. Count = cchSrc;
  618. }
  619. else
  620. {
  621. //
  622. // Make sure that the size of the destination buffer is
  623. // larger than zero. If it is zero, return the count and
  624. // do NOT touch lpDestStr.
  625. //
  626. if (cchDest != 0)
  627. {
  628. //
  629. // Check for sortkey flag.
  630. //
  631. if (dwMapFlags & LCMAP_SORTKEY)
  632. {
  633. //
  634. // Sortkey flag is also set, so 'Count' contains the
  635. // number of BYTES instead of the number of words.
  636. //
  637. // Reverse the bytes in place in the destination string.
  638. // No need to check the size of the destination buffer
  639. // here - it's been done elsewhere.
  640. //
  641. for (ctr = 0; ctr < Count / 2; ctr++)
  642. {
  643. lpDestStr[ctr] = MAKEWORD( HIBYTE(lpDestStr[ctr]),
  644. LOBYTE(lpDestStr[ctr]) );
  645. }
  646. }
  647. else
  648. {
  649. //
  650. // Flag is used in combination with another flag.
  651. // Reverse the bytes in place in the destination string.
  652. // No need to check the size of the destination buffer
  653. // here - it's been done elsewhere.
  654. //
  655. for (ctr = 0; ctr < Count; ctr++)
  656. {
  657. lpDestStr[ctr] = MAKEWORD( HIBYTE(lpDestStr[ctr]),
  658. LOBYTE(lpDestStr[ctr]) );
  659. }
  660. }
  661. }
  662. }
  663. }
  664. //
  665. // Return the number of characters (or number of bytes for sortkey)
  666. // written to the buffer.
  667. //
  668. return (Count);
  669. }
  670. //-------------------------------------------------------------------------//
  671. // INTERNAL ROUTINES //
  672. //-------------------------------------------------------------------------//
  673. ////////////////////////////////////////////////////////////////////////////
  674. //
  675. // FoldCZone
  676. //
  677. // Stores the compatibility zone values for the given string in the
  678. // destination buffer, and returns the number of wide characters
  679. // written to the buffer.
  680. //
  681. // 02-01-93 JulieB Created.
  682. ////////////////////////////////////////////////////////////////////////////
  683. int FoldCZone(
  684. LPCWSTR pSrc,
  685. int cchSrc,
  686. LPWSTR pDest,
  687. int cchDest)
  688. {
  689. int ctr; // loop counter
  690. //
  691. // If the destination value is zero, then just return the
  692. // length of the source string. Do NOT touch pDest.
  693. //
  694. if (cchDest == 0)
  695. {
  696. return (cchSrc);
  697. }
  698. //
  699. // If cchSrc is greater than cchDest, then the destination buffer
  700. // is too small to hold the new string. Return an error.
  701. //
  702. if (cchSrc > cchDest)
  703. {
  704. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  705. return (0);
  706. }
  707. //
  708. // Fold the Compatibility Zone and store it in the destination string.
  709. //
  710. for (ctr = 0; ctr < cchSrc; ctr++)
  711. {
  712. pDest[ctr] = GET_FOLD_CZONE(pTblPtrs->pCZone, pSrc[ctr]);
  713. }
  714. //
  715. // Return the number of wide characters written.
  716. //
  717. return (ctr);
  718. }
  719. ////////////////////////////////////////////////////////////////////////////
  720. //
  721. // FoldDigits
  722. //
  723. // Stores the ascii digits values for the given string in the
  724. // destination buffer, and returns the number of wide characters
  725. // written to the buffer.
  726. //
  727. // 02-01-93 JulieB Created.
  728. ////////////////////////////////////////////////////////////////////////////
  729. int FoldDigits(
  730. LPCWSTR pSrc,
  731. int cchSrc,
  732. LPWSTR pDest,
  733. int cchDest)
  734. {
  735. int ctr; // loop counter
  736. //
  737. // If the destination value is zero, then just return the
  738. // length of the source string. Do NOT touch pDest.
  739. //
  740. if (cchDest == 0)
  741. {
  742. return (cchSrc);
  743. }
  744. //
  745. // If cchSrc is greater than cchDest, then the destination buffer
  746. // is too small to hold the new string. Return an error.
  747. //
  748. if (cchSrc > cchDest)
  749. {
  750. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  751. return (0);
  752. }
  753. //
  754. // Fold the Ascii Digits and store it in the destination string.
  755. //
  756. for (ctr = 0; ctr < cchSrc; ctr++)
  757. {
  758. pDest[ctr] = GET_ASCII_DIGITS(pTblPtrs->pADigit, pSrc[ctr]);
  759. }
  760. //
  761. // Return the number of wide characters written.
  762. //
  763. return (ctr);
  764. }
  765. ////////////////////////////////////////////////////////////////////////////
  766. //
  767. // FoldPreComposed
  768. //
  769. // Stores the precomposed values for the given string in the
  770. // destination buffer, and returns the number of wide characters
  771. // written to the buffer.
  772. //
  773. // 02-01-93 JulieB Created.
  774. ////////////////////////////////////////////////////////////////////////////
  775. int FoldPreComposed(
  776. LPCWSTR pSrc,
  777. int cchSrc,
  778. LPWSTR pDest,
  779. int cchDest)
  780. {
  781. int ctr = 0; // source char counter
  782. int ctr2 = 0; // destination char counter
  783. WCHAR wch = 0; // wchar holder
  784. //
  785. // If the destination value is zero, then just return the
  786. // length of the string that would be returned. Do NOT touch pDest.
  787. //
  788. if (cchDest == 0)
  789. {
  790. //
  791. // Convert the source string to precomposed and calculate the
  792. // number of characters that would have been written to a
  793. // destination buffer.
  794. //
  795. while (ctr < cchSrc)
  796. {
  797. if ((ctr2 != 0) &&
  798. (IS_NONSPACE_ONLY(pTblPtrs->pDefaultSortkey, pSrc[ctr])))
  799. {
  800. //
  801. // Composite form. Write the precomposed form.
  802. //
  803. // If the precomposed character is written to the buffer,
  804. // do NOT increment the destination pointer or the
  805. // character count (the precomposed character was
  806. // written over the previous character).
  807. //
  808. if (wch)
  809. {
  810. if ((wch = GetPreComposedChar(pSrc[ctr], wch)) == 0)
  811. {
  812. //
  813. // No translation for composite form, so just
  814. // increment the destination counter.
  815. //
  816. ctr2++;
  817. }
  818. }
  819. else
  820. {
  821. if ((wch = GetPreComposedChar( pSrc[ctr],
  822. pSrc[ctr - 1] )) == 0)
  823. {
  824. //
  825. // No translation for composite form, so just
  826. // increment the destination counter.
  827. //
  828. ctr2++;
  829. }
  830. }
  831. }
  832. else
  833. {
  834. //
  835. // Not part of a composite character, so just
  836. // increment the destination counter.
  837. //
  838. wch = 0;
  839. ctr2++;
  840. }
  841. ctr++;
  842. }
  843. }
  844. else
  845. {
  846. //
  847. // Convert the source string to precomposed and store it in the
  848. // destination string.
  849. //
  850. while ((ctr < cchSrc) && (ctr2 < cchDest))
  851. {
  852. if ((ctr2 != 0) &&
  853. (IS_NONSPACE_ONLY(pTblPtrs->pDefaultSortkey, pSrc[ctr])))
  854. {
  855. //
  856. // Composite form. Write the precomposed form.
  857. //
  858. // If the precomposed character is written to the buffer,
  859. // do NOT increment the destination pointer or the
  860. // character count (the precomposed character was
  861. // written over the previous character).
  862. //
  863. wch = pDest[ctr2 - 1];
  864. if ((pDest[ctr2 - 1] =
  865. GetPreComposedChar( pSrc[ctr],
  866. pDest[ctr2 - 1] )) == 0)
  867. {
  868. //
  869. // No translation for composite form, so must
  870. // rewrite the base character and write the
  871. // composite character.
  872. //
  873. pDest[ctr2 - 1] = wch;
  874. pDest[ctr2] = pSrc[ctr];
  875. ctr2++;
  876. }
  877. }
  878. else
  879. {
  880. //
  881. // Not part of a composite character, so just write
  882. // the character to the destination string.
  883. //
  884. pDest[ctr2] = pSrc[ctr];
  885. ctr2++;
  886. }
  887. ctr++;
  888. }
  889. }
  890. //
  891. // Make sure destination buffer was large enough.
  892. //
  893. if (ctr < cchSrc)
  894. {
  895. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  896. return (0);
  897. }
  898. //
  899. // Return the number of wide characters written.
  900. //
  901. return (ctr2);
  902. }
  903. ////////////////////////////////////////////////////////////////////////////
  904. //
  905. // FoldComposite
  906. //
  907. // Stores the composite values for the given string in the
  908. // destination buffer, and returns the number of wide characters
  909. // written to the buffer.
  910. //
  911. // 02-01-93 JulieB Created.
  912. ////////////////////////////////////////////////////////////////////////////
  913. int FoldComposite(
  914. LPCWSTR pSrc,
  915. int cchSrc,
  916. LPWSTR pDest,
  917. int cchDest)
  918. {
  919. int ctr = 0; // source char counter
  920. int ctr2 = 0; // destination char counter
  921. LPWSTR pEndDest; // ptr to end of destination string
  922. WCHAR pTmp[MAX_COMPOSITE]; // tmp buffer for composite chars
  923. //
  924. // If the destination value is zero, then just return the
  925. // length of the string that would be returned. Do NOT touch pDest.
  926. //
  927. if (cchDest == 0)
  928. {
  929. //
  930. // Get the end of the tmp buffer.
  931. //
  932. pEndDest = (LPWSTR)pTmp + MAX_COMPOSITE;
  933. //
  934. // Convert the source string to precomposed and calculate the
  935. // number of characters that would have been written to a
  936. // destination buffer.
  937. //
  938. while (ctr < cchSrc)
  939. {
  940. //
  941. // Write the character to the destination string.
  942. //
  943. *pTmp = pSrc[ctr];
  944. //
  945. // See if it needs to be expanded to its composite form.
  946. //
  947. // If no composite form is found, the routine returns 1 for
  948. // the base character. Simply increment by the return value.
  949. //
  950. ctr2 += InsertCompositeForm(pTmp, pEndDest);
  951. //
  952. // Increment the source string counter.
  953. //
  954. ctr++;
  955. }
  956. }
  957. else
  958. {
  959. //
  960. // Get the end of the destination string.
  961. //
  962. pEndDest = (LPWSTR)pDest + cchDest;
  963. //
  964. // Convert the source string to precomposed and store it in the
  965. // destination string.
  966. //
  967. while ((ctr < cchSrc) && (ctr2 < cchDest))
  968. {
  969. //
  970. // Write the character to the destination string.
  971. //
  972. pDest[ctr2] = pSrc[ctr];
  973. //
  974. // See if it needs to be expanded to its composite form.
  975. //
  976. // If no composite form is found, the routine returns 1 for
  977. // the base character. Simply increment by the return value.
  978. //
  979. ctr2 += InsertCompositeForm(&(pDest[ctr2]), pEndDest);
  980. //
  981. // Increment the source string counter.
  982. //
  983. ctr++;
  984. }
  985. }
  986. //
  987. // Make sure destination buffer was large enough.
  988. //
  989. if (ctr < cchSrc)
  990. {
  991. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  992. return (0);
  993. }
  994. //
  995. // Return the number of wide characters written.
  996. //
  997. return (ctr2);
  998. }
  999. ////////////////////////////////////////////////////////////////////////////
  1000. //
  1001. // FoldCZone_Digits
  1002. //
  1003. // Stores the compatibility zone and ascii digits values for the given
  1004. // string in the destination buffer, and returns the number of wide
  1005. // characters written to the buffer.
  1006. //
  1007. // 02-01-93 JulieB Created.
  1008. ////////////////////////////////////////////////////////////////////////////
  1009. int FoldCZone_Digits(
  1010. LPCWSTR pSrc,
  1011. int cchSrc,
  1012. LPWSTR pDest,
  1013. int cchDest)
  1014. {
  1015. int ctr; // loop counter
  1016. //
  1017. // If the destination value is zero, then just return the
  1018. // length of the source string. Do NOT touch pDest.
  1019. //
  1020. if (cchDest == 0)
  1021. {
  1022. return (cchSrc);
  1023. }
  1024. //
  1025. // If cchSrc is greater than cchDest, then the destination buffer
  1026. // is too small to hold the new string. Return an error.
  1027. //
  1028. if (cchSrc > cchDest)
  1029. {
  1030. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1031. return (0);
  1032. }
  1033. //
  1034. // Fold the compatibility zone and the ascii digits values and store
  1035. // it in the destination string.
  1036. //
  1037. for (ctr = 0; ctr < cchSrc; ctr++)
  1038. {
  1039. pDest[ctr] = GET_FOLD_CZONE(pTblPtrs->pCZone, pSrc[ctr]);
  1040. pDest[ctr] = GET_ASCII_DIGITS(pTblPtrs->pADigit, pDest[ctr]);
  1041. }
  1042. //
  1043. // Return the number of wide characters written.
  1044. //
  1045. return (ctr);
  1046. }
  1047. ////////////////////////////////////////////////////////////////////////////
  1048. //
  1049. // MapCase
  1050. //
  1051. // Stores the lower or upper case values for the given string in the
  1052. // destination buffer, and returns the number of wide characters written to
  1053. // the buffer.
  1054. //
  1055. // 05-31-91 JulieB Created.
  1056. ////////////////////////////////////////////////////////////////////////////
  1057. int MapCase(
  1058. PLOC_HASH pHashN,
  1059. LPCWSTR pSrc,
  1060. int cchSrc,
  1061. LPWSTR pDest,
  1062. int cchDest,
  1063. PCASE pCaseTbl)
  1064. {
  1065. int ctr; // loop counter
  1066. //
  1067. // If the destination value is zero, then just return the
  1068. // length of the source string. Do NOT touch pDest.
  1069. //
  1070. if (cchDest == 0)
  1071. {
  1072. return (cchSrc);
  1073. }
  1074. //
  1075. // If cchSrc is greater than cchDest, then the destination buffer
  1076. // is too small to hold the lower or upper case string. Return an
  1077. // error.
  1078. //
  1079. if (cchSrc > cchDest)
  1080. {
  1081. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  1082. return (0);
  1083. }
  1084. //
  1085. // Lower or Upper case the source string and store it in the
  1086. // destination string.
  1087. //
  1088. for (ctr = 0; ctr < cchSrc; ctr++)
  1089. {
  1090. pDest[ctr] = GET_LOWER_UPPER_CASE(pCaseTbl, pSrc[ctr]);
  1091. }
  1092. //
  1093. // Return the number of wide characters written.
  1094. //
  1095. return (ctr);
  1096. }
  1097. ////////////////////////////////////////////////////////////////////////////
  1098. //
  1099. // SPECIAL_CASE_HANDLER
  1100. //
  1101. // Handles all of the special cases for each character. This includes only
  1102. // the valid values less than or equal to MAX_SPECIAL_CASE.
  1103. //
  1104. // DEFINED AS A MACRO.
  1105. //
  1106. // 11-04-92 JulieB Created.
  1107. ////////////////////////////////////////////////////////////////////////////
  1108. #define EXTRA_WEIGHT_POS(WtNum) (*(pPosXW + (WtNum * WeightLen)))
  1109. #define SPECIAL_CASE_HANDLER( SM, \
  1110. pWeight, \
  1111. pSortkey, \
  1112. pExpand, \
  1113. Position, \
  1114. fStringSort, \
  1115. fIgnoreSymbols, \
  1116. pCur, \
  1117. pBegin ) \
  1118. { \
  1119. PSORTKEY pExpWt; /* weight of 1 expansion char */ \
  1120. BYTE AW; /* alphanumeric weight */ \
  1121. BYTE XW; /* case weight value with extra bits */ \
  1122. DWORD PrevWt; /* previous weight */ \
  1123. BYTE PrevSM; /* previous script member */ \
  1124. BYTE PrevAW; /* previuos alphanumeric weight */ \
  1125. BYTE PrevCW; /* previuos case weight */ \
  1126. LPWSTR pPrev; /* ptr to previous char */ \
  1127. \
  1128. \
  1129. switch (SM) \
  1130. { \
  1131. case ( UNSORTABLE ) : \
  1132. { \
  1133. /* \
  1134. * Character is unsortable, so skip it. \
  1135. */ \
  1136. break; \
  1137. } \
  1138. \
  1139. case ( NONSPACE_MARK ) : \
  1140. { \
  1141. /* \
  1142. * Character is a nonspace mark, so only store \
  1143. * the diacritic weight. \
  1144. */ \
  1145. if (pPosDW > pDW) \
  1146. { \
  1147. (*(pPosDW - 1)) += GET_DIACRITIC(pWeight); \
  1148. } \
  1149. else \
  1150. { \
  1151. *pPosDW = GET_DIACRITIC(pWeight); \
  1152. pPosDW++; \
  1153. } \
  1154. \
  1155. break; \
  1156. } \
  1157. \
  1158. case ( EXPANSION ) : \
  1159. { \
  1160. /* \
  1161. * Expansion character - one character has 2 \
  1162. * different weights. Store each weight separately. \
  1163. */ \
  1164. pExpWt = &(pSortkey[(pExpand[GET_EXPAND_INDEX(pWeight)]).UCP1]); \
  1165. *pPosUW = GET_UNICODE(pExpWt); \
  1166. *pPosDW = GET_DIACRITIC(pExpWt); \
  1167. *pPosCW = GET_CASE(pExpWt) & CaseMask; \
  1168. pPosUW++; \
  1169. pPosDW++; \
  1170. pPosCW++; \
  1171. \
  1172. pExpWt = &(pSortkey[(pExpand[GET_EXPAND_INDEX(pWeight)]).UCP2]); \
  1173. *pPosUW = GET_UNICODE(pExpWt); \
  1174. *pPosDW = GET_DIACRITIC(pExpWt); \
  1175. *pPosCW = GET_CASE(pExpWt) & CaseMask; \
  1176. pPosUW++; \
  1177. pPosDW++; \
  1178. pPosCW++; \
  1179. \
  1180. break; \
  1181. } \
  1182. \
  1183. case ( PUNCTUATION ) : \
  1184. { \
  1185. if (!fStringSort) \
  1186. { \
  1187. /* \
  1188. * Word Sort Method. \
  1189. * \
  1190. * Character is punctuation, so only store the special \
  1191. * weight. \
  1192. */ \
  1193. *((LPBYTE)pPosSW) = HIBYTE(GET_POSITION_SW(Position)); \
  1194. *(((LPBYTE)pPosSW) + 1) = LOBYTE(GET_POSITION_SW(Position)); \
  1195. pPosSW++; \
  1196. *pPosSW = GET_SPECIAL_WEIGHT(pWeight); \
  1197. pPosSW++; \
  1198. \
  1199. break; \
  1200. } \
  1201. \
  1202. /* \
  1203. * If using STRING sort method, treat punctuation the same \
  1204. * as symbol. So, FALL THROUGH to the symbol cases. \
  1205. */ \
  1206. } \
  1207. \
  1208. case ( SYMBOL_1 ) : \
  1209. case ( SYMBOL_2 ) : \
  1210. case ( SYMBOL_3 ) : \
  1211. case ( SYMBOL_4 ) : \
  1212. case ( SYMBOL_5 ) : \
  1213. { \
  1214. /* \
  1215. * Character is a symbol. \
  1216. * Store the Unicode weights ONLY if the NORM_IGNORESYMBOLS \
  1217. * flag is NOT set. \
  1218. */ \
  1219. if (!fIgnoreSymbols) \
  1220. { \
  1221. *pPosUW = GET_UNICODE(pWeight); \
  1222. *pPosDW = GET_DIACRITIC(pWeight); \
  1223. *pPosCW = GET_CASE(pWeight) & CaseMask; \
  1224. pPosUW++; \
  1225. pPosDW++; \
  1226. pPosCW++; \
  1227. } \
  1228. \
  1229. break; \
  1230. } \
  1231. \
  1232. case ( FAREAST_SPECIAL ) : \
  1233. { \
  1234. /* \
  1235. * Get the alphanumeric weight and the case weight of the \
  1236. * current code point. \
  1237. */ \
  1238. AW = GET_ALPHA_NUMERIC(pWeight); \
  1239. XW = (GET_CASE(pWeight) & CaseMask) | CASE_XW_MASK; \
  1240. \
  1241. /* \
  1242. * Special case Repeat and Cho-On. \
  1243. * AW = 0 => Repeat \
  1244. * AW = 1 => Cho-On \
  1245. * AW = 2+ => Kana \
  1246. */ \
  1247. if (AW <= MAX_SPECIAL_AW) \
  1248. { \
  1249. /* \
  1250. * If the script member of the previous character is \
  1251. * invalid, then give the special character an \
  1252. * invalid weight (highest possible weight) so that it \
  1253. * will sort AFTER everything else. \
  1254. */ \
  1255. pPrev = pCur - 1; \
  1256. *pPosUW = MAP_INVALID_UW; \
  1257. while (pPrev >= pBegin) \
  1258. { \
  1259. PrevWt = GET_DWORD_WEIGHT(pHashN, *pPrev); \
  1260. PrevSM = GET_SCRIPT_MEMBER(&PrevWt); \
  1261. if (PrevSM < FAREAST_SPECIAL) \
  1262. { \
  1263. if (PrevSM != EXPANSION) \
  1264. { \
  1265. /* \
  1266. * UNSORTABLE or NONSPACE_MARK. \
  1267. * \
  1268. * Just ignore these, since we only care \
  1269. * about the previous UW value. \
  1270. */ \
  1271. pPrev--; \
  1272. continue; \
  1273. } \
  1274. } \
  1275. else if (PrevSM == FAREAST_SPECIAL) \
  1276. { \
  1277. PrevAW = GET_ALPHA_NUMERIC(&PrevWt); \
  1278. if (PrevAW <= MAX_SPECIAL_AW) \
  1279. { \
  1280. /* \
  1281. * Handle case where two special chars follow \
  1282. * each other. Keep going back in the string. \
  1283. */ \
  1284. pPrev--; \
  1285. continue; \
  1286. } \
  1287. \
  1288. *pPosUW = MAKE_UNICODE_WT(KANA, PrevAW); \
  1289. \
  1290. /* \
  1291. * Only build weights 4, 5, 6, and 7 if the \
  1292. * previous character is KANA. \
  1293. * \
  1294. * Always: \
  1295. * 4W = previous CW & ISOLATE_SMALL \
  1296. * 6W = previous CW & ISOLATE_KANA \
  1297. * \
  1298. */ \
  1299. PrevCW = (GET_CASE(&PrevWt) & CaseMask) | \
  1300. CASE_XW_MASK; \
  1301. EXTRA_WEIGHT_POS(0) = PrevCW & ISOLATE_SMALL; \
  1302. EXTRA_WEIGHT_POS(2) = PrevCW & ISOLATE_KANA; \
  1303. \
  1304. if (AW == AW_REPEAT) \
  1305. { \
  1306. /* \
  1307. * Repeat: \
  1308. * UW = previous UW (set above) \
  1309. * 5W = WT_FIVE_REPEAT \
  1310. * 7W = previous CW & ISOLATE_WIDTH \
  1311. */ \
  1312. EXTRA_WEIGHT_POS(1) = WT_FIVE_REPEAT; \
  1313. EXTRA_WEIGHT_POS(3) = PrevCW & ISOLATE_WIDTH; \
  1314. } \
  1315. else \
  1316. { \
  1317. /* \
  1318. * Cho-On: \
  1319. * UW = previous UW & CHO_ON_UW_MASK \
  1320. * 5W = WT_FIVE_CHO_ON \
  1321. * 7W = current CW & ISOLATE_WIDTH \
  1322. */ \
  1323. *pPosUW &= CHO_ON_UW_MASK; \
  1324. EXTRA_WEIGHT_POS(1) = WT_FIVE_CHO_ON; \
  1325. EXTRA_WEIGHT_POS(3) = XW & ISOLATE_WIDTH; \
  1326. } \
  1327. \
  1328. pPosXW++; \
  1329. } \
  1330. else \
  1331. { \
  1332. *pPosUW = GET_UNICODE(&PrevWt); \
  1333. } \
  1334. \
  1335. break; \
  1336. } \
  1337. \
  1338. /* \
  1339. * Make sure there is a valid UW. If not, quit out \
  1340. * of switch case. \
  1341. */ \
  1342. if (*pPosUW == MAP_INVALID_UW) \
  1343. { \
  1344. pPosUW++; \
  1345. break; \
  1346. } \
  1347. } \
  1348. else \
  1349. { \
  1350. /* \
  1351. * Kana: \
  1352. * SM = KANA \
  1353. * AW = current AW \
  1354. * 4W = current CW & ISOLATE_SMALL \
  1355. * 5W = WT_FIVE_KANA \
  1356. * 6W = current CW & ISOLATE_KANA \
  1357. * 7W = current CW & ISOLATE_WIDTH \
  1358. */ \
  1359. *pPosUW = MAKE_UNICODE_WT(KANA, AW); \
  1360. EXTRA_WEIGHT_POS(0) = XW & ISOLATE_SMALL; \
  1361. EXTRA_WEIGHT_POS(1) = WT_FIVE_KANA; \
  1362. EXTRA_WEIGHT_POS(2) = XW & ISOLATE_KANA; \
  1363. EXTRA_WEIGHT_POS(3) = XW & ISOLATE_WIDTH; \
  1364. \
  1365. pPosXW++; \
  1366. } \
  1367. \
  1368. /* \
  1369. * Always: \
  1370. * DW = current DW \
  1371. * CW = minimum CW \
  1372. */ \
  1373. *pPosDW = GET_DIACRITIC(pWeight); \
  1374. *pPosCW = MIN_CW; \
  1375. \
  1376. pPosUW++; \
  1377. pPosDW++; \
  1378. pPosCW++; \
  1379. \
  1380. break; \
  1381. } \
  1382. \
  1383. case ( RESERVED_2 ) : \
  1384. case ( RESERVED_3 ) : \
  1385. { \
  1386. /* \
  1387. * Fill out the case statement so the compiler \
  1388. * will use a jump table. \
  1389. */ \
  1390. ; \
  1391. } \
  1392. } \
  1393. }
  1394. ////////////////////////////////////////////////////////////////////////////
  1395. //
  1396. // MapSortKey
  1397. //
  1398. // Stores the sortkey weights for the given string in the destination buffer,
  1399. // and returns the number of BYTES written to the buffer.
  1400. //
  1401. // 11-04-92 JulieB Created.
  1402. ////////////////////////////////////////////////////////////////////////////
  1403. int MapSortKey(
  1404. PLOC_HASH pHashN,
  1405. DWORD dwFlags,
  1406. LPCWSTR pSrc,
  1407. int cchSrc,
  1408. LPBYTE pDest,
  1409. int cbDest)
  1410. {
  1411. register int WeightLen; // length of one set of weights
  1412. LPWSTR pUW; // ptr to Unicode Weights
  1413. LPBYTE pDW; // ptr to Diacritic Weights
  1414. LPBYTE pCW; // ptr to Case Weights
  1415. LPBYTE pXW; // ptr to Extra Weights
  1416. LPWSTR pSW; // ptr to Special Weights
  1417. LPWSTR pPosUW; // ptr to position in pUW buffer
  1418. LPBYTE pPosDW; // ptr to position in pDW buffer
  1419. LPBYTE pPosCW; // ptr to position in pCW buffer
  1420. LPBYTE pPosXW; // ptr to position in pXW buffer
  1421. LPWSTR pPosSW; // ptr to position in pSW buffer
  1422. PSORTKEY pWeight; // ptr to weight of character
  1423. BYTE SM; // script member value
  1424. BYTE CaseMask; // mask for case weight
  1425. int PosCtr; // position counter in string
  1426. LPWSTR pPos; // ptr to position in string
  1427. LPBYTE pTmp; // ptr to go through UW, XW, and SW
  1428. LPBYTE pPosTmp; // ptr to tmp position in XW
  1429. PCOMPRESS_2 pComp2; // ptr to compression 2 list
  1430. PCOMPRESS_3 pComp3; // ptr to compression 3 list
  1431. WORD pBuffer[MAX_SKEYBUFLEN]; // buffer to hold weights
  1432. int ctr; // loop counter
  1433. BOOL IfDblCompress; // if double compress possibility
  1434. BOOL fStringSort; // if using string sort method
  1435. BOOL fIgnoreSymbols; // if ignore symbols flag is set
  1436. //
  1437. // Make sure the SMWeight structure has been initialized.
  1438. // No need to check for an error here. If an error occurs,
  1439. // the default order will be used.
  1440. //
  1441. if ((pTblPtrs->SMWeight)[0] == INVALID_SM_VALUE)
  1442. {
  1443. GetScriptMemberWeights();
  1444. }
  1445. //
  1446. // See if the length of the string is too large for the static
  1447. // buffer. If so, allocate a buffer that is large enough.
  1448. //
  1449. if (cchSrc > MAX_STRING_LEN)
  1450. {
  1451. //
  1452. // Allocate buffer to hold all of the weights.
  1453. // (cchSrc) * (max # of expansions) * (# of weights)
  1454. //
  1455. WeightLen = cchSrc * MAX_EXPANSION;
  1456. if ((pUW = (LPWSTR)NLS_ALLOC_MEM( WeightLen * MAX_WEIGHTS *
  1457. sizeof(WCHAR) )) == NULL)
  1458. {
  1459. SetLastError(ERROR_OUTOFMEMORY);
  1460. return (0);
  1461. }
  1462. }
  1463. else
  1464. {
  1465. WeightLen = MAX_STRING_LEN * MAX_EXPANSION;
  1466. pUW = (LPWSTR)pBuffer;
  1467. }
  1468. //
  1469. // Set the case weight mask based on the given flags.
  1470. // If none or all of the ignore case flags are set, then
  1471. // just leave the mask as 0xff.
  1472. //
  1473. CaseMask = 0xff;
  1474. switch (dwFlags & NORM_ALL_CASE)
  1475. {
  1476. case ( NORM_IGNORECASE ) :
  1477. {
  1478. CaseMask &= CASE_UPPER_MASK;
  1479. break;
  1480. }
  1481. case ( NORM_IGNOREKANATYPE ) :
  1482. {
  1483. CaseMask &= CASE_KANA_MASK;
  1484. break;
  1485. }
  1486. case ( NORM_IGNOREWIDTH ) :
  1487. {
  1488. CaseMask &= CASE_WIDTH_MASK;
  1489. break;
  1490. }
  1491. case ( NORM_IGNORECASE | NORM_IGNOREKANATYPE ) :
  1492. {
  1493. CaseMask &= (CASE_UPPER_MASK & CASE_KANA_MASK);
  1494. break;
  1495. }
  1496. case ( NORM_IGNORECASE | NORM_IGNOREWIDTH ) :
  1497. {
  1498. CaseMask &= (CASE_UPPER_MASK & CASE_WIDTH_MASK);
  1499. break;
  1500. }
  1501. case ( NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ) :
  1502. {
  1503. CaseMask &= (CASE_KANA_MASK & CASE_WIDTH_MASK);
  1504. break;
  1505. }
  1506. case ( NORM_IGNORECASE | NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ) :
  1507. {
  1508. CaseMask &= (CASE_UPPER_MASK & CASE_KANA_MASK & CASE_WIDTH_MASK);
  1509. break;
  1510. }
  1511. }
  1512. //
  1513. // Set pointers to positions of weights in buffer.
  1514. //
  1515. // UW => word length
  1516. // DW => byte length
  1517. // CW => byte length
  1518. // XW => 4 byte length (4 weights, 1 byte each)
  1519. // SW => dword length (2 words each)
  1520. //
  1521. pDW = (LPBYTE)(pUW + (WeightLen * (NUM_BYTES_UW / sizeof(WCHAR))));
  1522. pCW = (LPBYTE)(pDW + (WeightLen * NUM_BYTES_DW));
  1523. pXW = (LPBYTE)(pCW + (WeightLen * NUM_BYTES_CW));
  1524. pSW = (LPWSTR)(pXW + (WeightLen * NUM_BYTES_XW));
  1525. pPosUW = pUW;
  1526. pPosDW = pDW;
  1527. pPosCW = pCW;
  1528. pPosXW = pXW;
  1529. pPosSW = pSW;
  1530. //
  1531. // Initialize flags and loop values.
  1532. //
  1533. fStringSort = dwFlags & SORT_STRINGSORT;
  1534. fIgnoreSymbols = dwFlags & NORM_IGNORESYMBOLS;
  1535. pPos = (LPWSTR)pSrc;
  1536. PosCtr = 1;
  1537. //
  1538. // Check if given locale has compressions.
  1539. //
  1540. if (pHashN->IfCompression == FALSE)
  1541. {
  1542. //
  1543. // Go through string, code point by code point.
  1544. //
  1545. // No compressions exist in the given locale, so
  1546. // DO NOT check for them.
  1547. //
  1548. for (; PosCtr <= cchSrc; PosCtr++, pPos++)
  1549. {
  1550. //
  1551. // Get weights.
  1552. //
  1553. pWeight = &((pHashN->pSortkey)[*pPos]);
  1554. SM = GET_SCRIPT_MEMBER(pWeight);
  1555. if (SM > MAX_SPECIAL_CASE)
  1556. {
  1557. //
  1558. // No special case on character, so store the
  1559. // various weights for the character.
  1560. //
  1561. *pPosUW = GET_UNICODE(pWeight);
  1562. *pPosDW = GET_DIACRITIC(pWeight);
  1563. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1564. pPosUW++;
  1565. pPosDW++;
  1566. pPosCW++;
  1567. }
  1568. else
  1569. {
  1570. SPECIAL_CASE_HANDLER( SM,
  1571. pWeight,
  1572. pHashN->pSortkey,
  1573. pTblPtrs->pExpansion,
  1574. pPosUW - pUW + 1,
  1575. fStringSort,
  1576. fIgnoreSymbols,
  1577. pPos,
  1578. (LPWSTR)pSrc );
  1579. }
  1580. }
  1581. }
  1582. else if (pHashN->IfDblCompression == FALSE)
  1583. {
  1584. //
  1585. // Go through string, code point by code point.
  1586. //
  1587. // Compressions DO exist in the given locale, so
  1588. // check for them.
  1589. //
  1590. // No double compressions exist in the given locale,
  1591. // so DO NOT check for them.
  1592. //
  1593. for (; PosCtr <= cchSrc; PosCtr++, pPos++)
  1594. {
  1595. //
  1596. // Get weights.
  1597. //
  1598. pWeight = &((pHashN->pSortkey)[*pPos]);
  1599. SM = GET_SCRIPT_MEMBER(pWeight);
  1600. if (SM > MAX_SPECIAL_CASE)
  1601. {
  1602. //
  1603. // No special case on character, but must check for
  1604. // compression characters.
  1605. //
  1606. switch (GET_COMPRESSION(pWeight))
  1607. {
  1608. case ( COMPRESS_3_MASK ) :
  1609. {
  1610. if ((PosCtr + 2) <= cchSrc)
  1611. {
  1612. ctr = pHashN->pCompHdr->Num3;
  1613. pComp3 = pHashN->pCompress3;
  1614. for (; ctr > 0; ctr--, pComp3++)
  1615. {
  1616. if ((pComp3->UCP1 == *pPos) &&
  1617. (pComp3->UCP2 == *(pPos + 1)) &&
  1618. (pComp3->UCP3 == *(pPos + 2)))
  1619. {
  1620. pWeight = &(pComp3->Weights);
  1621. *pPosUW = GET_UNICODE(pWeight);
  1622. *pPosDW = GET_DIACRITIC(pWeight);
  1623. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1624. pPosUW++;
  1625. pPosDW++;
  1626. pPosCW++;
  1627. //
  1628. // Add only two to source, since one
  1629. // will be added by "for" structure.
  1630. //
  1631. pPos += 2;
  1632. PosCtr += 2;
  1633. break;
  1634. }
  1635. }
  1636. if (ctr > 0)
  1637. {
  1638. break;
  1639. }
  1640. }
  1641. //
  1642. // Fall through if not found.
  1643. //
  1644. }
  1645. case ( COMPRESS_2_MASK ) :
  1646. {
  1647. if ((PosCtr + 1) <= cchSrc)
  1648. {
  1649. ctr = pHashN->pCompHdr->Num2;
  1650. pComp2 = pHashN->pCompress2;
  1651. for (; ctr > 0; ctr--, pComp2++)
  1652. {
  1653. if ((pComp2->UCP1 == *pPos) &&
  1654. (pComp2->UCP2 == *(pPos + 1)))
  1655. {
  1656. pWeight = &(pComp2->Weights);
  1657. *pPosUW = GET_UNICODE(pWeight);
  1658. *pPosDW = GET_DIACRITIC(pWeight);
  1659. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1660. pPosUW++;
  1661. pPosDW++;
  1662. pPosCW++;
  1663. //
  1664. // Add only one to source, since one
  1665. // will be added by "for" structure.
  1666. //
  1667. pPos++;
  1668. PosCtr++;
  1669. break;
  1670. }
  1671. }
  1672. if (ctr > 0)
  1673. {
  1674. break;
  1675. }
  1676. }
  1677. //
  1678. // Fall through if not found.
  1679. //
  1680. }
  1681. default :
  1682. {
  1683. //
  1684. // No possible compression for character, so store
  1685. // the various weights for the character.
  1686. //
  1687. *pPosUW = GET_UNICODE_SM(pWeight, SM);
  1688. *pPosDW = GET_DIACRITIC(pWeight);
  1689. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1690. pPosUW++;
  1691. pPosDW++;
  1692. pPosCW++;
  1693. }
  1694. }
  1695. }
  1696. else
  1697. {
  1698. SPECIAL_CASE_HANDLER( SM,
  1699. pWeight,
  1700. pHashN->pSortkey,
  1701. pTblPtrs->pExpansion,
  1702. pPosUW - pUW + 1,
  1703. fStringSort,
  1704. fIgnoreSymbols,
  1705. pPos,
  1706. (LPWSTR)pSrc );
  1707. }
  1708. }
  1709. }
  1710. else
  1711. {
  1712. //
  1713. // Go through string, code point by code point.
  1714. //
  1715. // Compressions DO exist in the given locale, so
  1716. // check for them.
  1717. //
  1718. // Double Compressions also exist in the given locale,
  1719. // so check for them.
  1720. //
  1721. for (; PosCtr <= cchSrc; PosCtr++, pPos++)
  1722. {
  1723. //
  1724. // Get weights.
  1725. //
  1726. pWeight = &((pHashN->pSortkey)[*pPos]);
  1727. SM = GET_SCRIPT_MEMBER(pWeight);
  1728. if (SM > MAX_SPECIAL_CASE)
  1729. {
  1730. //
  1731. // No special case on character, but must check for
  1732. // compression characters and double compression
  1733. // characters.
  1734. //
  1735. IfDblCompress =
  1736. (((PosCtr + 1) <= cchSrc) &&
  1737. ((GET_DWORD_WEIGHT(pHashN, *pPos) & CMP_MASKOFF_CW) ==
  1738. (GET_DWORD_WEIGHT(pHashN, *(pPos + 1)) & CMP_MASKOFF_CW)))
  1739. ? 1
  1740. : 0;
  1741. switch (GET_COMPRESSION(pWeight))
  1742. {
  1743. case ( COMPRESS_3_MASK ) :
  1744. {
  1745. if (IfDblCompress)
  1746. {
  1747. if ((PosCtr + 3) <= cchSrc)
  1748. {
  1749. ctr = pHashN->pCompHdr->Num3;
  1750. pComp3 = pHashN->pCompress3;
  1751. for (; ctr > 0; ctr--, pComp3++)
  1752. {
  1753. if ((pComp3->UCP1 == *(pPos + 1)) &&
  1754. (pComp3->UCP2 == *(pPos + 2)) &&
  1755. (pComp3->UCP3 == *(pPos + 3)))
  1756. {
  1757. pWeight = &(pComp3->Weights);
  1758. *pPosUW = GET_UNICODE(pWeight);
  1759. *pPosDW = GET_DIACRITIC(pWeight);
  1760. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1761. *(pPosUW + 1) = *pPosUW;
  1762. *(pPosDW + 1) = *pPosDW;
  1763. *(pPosCW + 1) = *pPosCW;
  1764. pPosUW += 2;
  1765. pPosDW += 2;
  1766. pPosCW += 2;
  1767. //
  1768. // Add only three to source, since one
  1769. // will be added by "for" structure.
  1770. //
  1771. pPos += 3;
  1772. PosCtr += 3;
  1773. break;
  1774. }
  1775. }
  1776. if (ctr > 0)
  1777. {
  1778. break;
  1779. }
  1780. }
  1781. }
  1782. //
  1783. // Fall through if not found.
  1784. //
  1785. if ((PosCtr + 2) <= cchSrc)
  1786. {
  1787. ctr = pHashN->pCompHdr->Num3;
  1788. pComp3 = pHashN->pCompress3;
  1789. for (; ctr > 0; ctr--, pComp3++)
  1790. {
  1791. if ((pComp3->UCP1 == *pPos) &&
  1792. (pComp3->UCP2 == *(pPos + 1)) &&
  1793. (pComp3->UCP3 == *(pPos + 2)))
  1794. {
  1795. pWeight = &(pComp3->Weights);
  1796. *pPosUW = GET_UNICODE(pWeight);
  1797. *pPosDW = GET_DIACRITIC(pWeight);
  1798. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1799. pPosUW++;
  1800. pPosDW++;
  1801. pPosCW++;
  1802. //
  1803. // Add only two to source, since one
  1804. // will be added by "for" structure.
  1805. //
  1806. pPos += 2;
  1807. PosCtr += 2;
  1808. break;
  1809. }
  1810. }
  1811. if (ctr > 0)
  1812. {
  1813. break;
  1814. }
  1815. }
  1816. //
  1817. // Fall through if not found.
  1818. //
  1819. }
  1820. case ( COMPRESS_2_MASK ) :
  1821. {
  1822. if (IfDblCompress)
  1823. {
  1824. if ((PosCtr + 2) <= cchSrc)
  1825. {
  1826. ctr = pHashN->pCompHdr->Num2;
  1827. pComp2 = pHashN->pCompress2;
  1828. for (; ctr > 0; ctr--, pComp2++)
  1829. {
  1830. if ((pComp2->UCP1 == *(pPos + 1)) &&
  1831. (pComp2->UCP2 == *(pPos + 2)))
  1832. {
  1833. pWeight = &(pComp2->Weights);
  1834. *pPosUW = GET_UNICODE(pWeight);
  1835. *pPosDW = GET_DIACRITIC(pWeight);
  1836. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1837. *(pPosUW + 1) = *pPosUW;
  1838. *(pPosDW + 1) = *pPosDW;
  1839. *(pPosCW + 1) = *pPosCW;
  1840. pPosUW += 2;
  1841. pPosDW += 2;
  1842. pPosCW += 2;
  1843. //
  1844. // Add only two to source, since one
  1845. // will be added by "for" structure.
  1846. //
  1847. pPos += 2;
  1848. PosCtr += 2;
  1849. break;
  1850. }
  1851. }
  1852. if (ctr > 0)
  1853. {
  1854. break;
  1855. }
  1856. }
  1857. }
  1858. //
  1859. // Fall through if not found.
  1860. //
  1861. if ((PosCtr + 1) <= cchSrc)
  1862. {
  1863. ctr = pHashN->pCompHdr->Num2;
  1864. pComp2 = pHashN->pCompress2;
  1865. for (; ctr > 0; ctr--, pComp2++)
  1866. {
  1867. if ((pComp2->UCP1 == *pPos) &&
  1868. (pComp2->UCP2 == *(pPos + 1)))
  1869. {
  1870. pWeight = &(pComp2->Weights);
  1871. *pPosUW = GET_UNICODE(pWeight);
  1872. *pPosDW = GET_DIACRITIC(pWeight);
  1873. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1874. pPosUW++;
  1875. pPosDW++;
  1876. pPosCW++;
  1877. //
  1878. // Add only one to source, since one
  1879. // will be added by "for" structure.
  1880. //
  1881. pPos++;
  1882. PosCtr++;
  1883. break;
  1884. }
  1885. }
  1886. if (ctr > 0)
  1887. {
  1888. break;
  1889. }
  1890. }
  1891. //
  1892. // Fall through if not found.
  1893. //
  1894. }
  1895. default :
  1896. {
  1897. //
  1898. // No possible compression for character, so store
  1899. // the various weights for the character.
  1900. //
  1901. *pPosUW = GET_UNICODE_SM(pWeight, SM);
  1902. *pPosDW = GET_DIACRITIC(pWeight);
  1903. *pPosCW = GET_CASE(pWeight) & CaseMask;
  1904. pPosUW++;
  1905. pPosDW++;
  1906. pPosCW++;
  1907. }
  1908. }
  1909. }
  1910. else
  1911. {
  1912. SPECIAL_CASE_HANDLER( SM,
  1913. pWeight,
  1914. pHashN->pSortkey,
  1915. pTblPtrs->pExpansion,
  1916. pPosUW - pUW + 1,
  1917. fStringSort,
  1918. fIgnoreSymbols,
  1919. pPos,
  1920. (LPWSTR)pSrc );
  1921. }
  1922. }
  1923. }
  1924. //
  1925. // Store the final sortkey weights in the destination buffer.
  1926. //
  1927. // PosCtr will be a BYTE count.
  1928. //
  1929. PosCtr = 0;
  1930. //
  1931. // If the destination value is zero, then just return the
  1932. // length of the string that would be returned. Do NOT touch pDest.
  1933. //
  1934. if (cbDest == 0)
  1935. {
  1936. //
  1937. // Count the Unicode Weights.
  1938. //
  1939. PosCtr += ((LPBYTE)pPosUW - (LPBYTE)pUW);
  1940. //
  1941. // Count the Separator.
  1942. //
  1943. PosCtr++;
  1944. //
  1945. // Count the Diacritic Weights.
  1946. //
  1947. // - Eliminate minimum DW.
  1948. // - Count the number of diacritic weights.
  1949. //
  1950. if (!(dwFlags & NORM_IGNORENONSPACE))
  1951. {
  1952. pPosDW--;
  1953. if (pHashN->IfReverseDW == TRUE)
  1954. {
  1955. //
  1956. // Reverse diacritics:
  1957. // - remove diacritics from left to right.
  1958. // - count diacritics from right to left.
  1959. //
  1960. while ((pDW <= pPosDW) && (*pDW <= MIN_DW))
  1961. {
  1962. pDW++;
  1963. }
  1964. PosCtr += (pPosDW - pDW + 1);
  1965. }
  1966. else
  1967. {
  1968. //
  1969. // Regular diacritics:
  1970. // - remove diacritics from right to left.
  1971. // - count diacritics from left to right.
  1972. //
  1973. while ((pPosDW >= pDW) && (*pPosDW <= MIN_DW))
  1974. {
  1975. pPosDW--;
  1976. }
  1977. PosCtr += (pPosDW - pDW + 1);
  1978. }
  1979. }
  1980. //
  1981. // Count the Separator.
  1982. //
  1983. PosCtr++;
  1984. //
  1985. // Count the Case Weights.
  1986. //
  1987. // - Eliminate minimum CW.
  1988. // - Count the number of case weights.
  1989. //
  1990. if ((dwFlags & NORM_DROP_CW) != NORM_DROP_CW)
  1991. {
  1992. pPosCW--;
  1993. while ((pPosCW >= pCW) && (*pPosCW <= MIN_CW))
  1994. {
  1995. pPosCW--;
  1996. }
  1997. PosCtr += (pPosCW - pCW + 1);
  1998. }
  1999. //
  2000. // Count the Separator.
  2001. //
  2002. PosCtr++;
  2003. //
  2004. // Count the Extra Weights.
  2005. //
  2006. // - Eliminate EW.
  2007. // - Count the number of extra weights and separators.
  2008. //
  2009. if (pXW < pPosXW)
  2010. {
  2011. if (dwFlags & NORM_IGNORENONSPACE)
  2012. {
  2013. //
  2014. // Ignore 4W and 5W. Must count separators for
  2015. // 4W and 5W, though.
  2016. //
  2017. PosCtr += 2;
  2018. ctr = 2;
  2019. }
  2020. else
  2021. {
  2022. ctr = 0;
  2023. }
  2024. pPosXW--;
  2025. for (; ctr < NUM_BYTES_XW; ctr++)
  2026. {
  2027. pTmp = pXW + (WeightLen * ctr);
  2028. pPosTmp = pPosXW + (WeightLen * ctr);
  2029. while ((pPosTmp >= pTmp) && (*pPosTmp == pXWDrop[ctr]))
  2030. {
  2031. pPosTmp--;
  2032. }
  2033. PosCtr += (pPosTmp - pTmp + 1);
  2034. //
  2035. // Count the Separator.
  2036. //
  2037. PosCtr++;
  2038. }
  2039. }
  2040. //
  2041. // Count the Separator.
  2042. //
  2043. PosCtr++;
  2044. //
  2045. // Count the Special Weights.
  2046. //
  2047. if (!fIgnoreSymbols)
  2048. {
  2049. PosCtr += ((LPBYTE)pPosSW - (LPBYTE)pSW);
  2050. }
  2051. //
  2052. // Count the Terminator.
  2053. //
  2054. PosCtr++;
  2055. }
  2056. else
  2057. {
  2058. //
  2059. // Store the Unicode Weights in the destination buffer.
  2060. //
  2061. // NOTE: cbDest is the number of BYTES.
  2062. // Also, must add one to length for separator.
  2063. //
  2064. if (cbDest < (pPosUW - pUW + 1))
  2065. {
  2066. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2067. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2068. return (0);
  2069. }
  2070. pTmp = (LPBYTE)pUW;
  2071. while (pTmp < (LPBYTE)pPosUW)
  2072. {
  2073. //
  2074. // Copy Unicode weight to destination buffer.
  2075. //
  2076. // NOTE: Unicode Weight is stored in the data file as
  2077. // Alphanumeric Weight, Script Member
  2078. // so that the WORD value will be read correctly.
  2079. //
  2080. pDest[PosCtr] = *(pTmp + 1);
  2081. pDest[PosCtr + 1] = *pTmp;
  2082. PosCtr += 2;
  2083. pTmp += 2;
  2084. }
  2085. //
  2086. // Copy Separator to destination buffer.
  2087. //
  2088. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2089. PosCtr++;
  2090. //
  2091. // Store the Diacritic Weights in the destination buffer.
  2092. //
  2093. // - Eliminate minimum DW.
  2094. // - Make sure destination buffer is large enough.
  2095. // - Copy diacritic weights to destination buffer.
  2096. //
  2097. if (!(dwFlags & NORM_IGNORENONSPACE))
  2098. {
  2099. pPosDW--;
  2100. if (pHashN->IfReverseDW == TRUE)
  2101. {
  2102. //
  2103. // Reverse diacritics:
  2104. // - remove diacritics from left to right.
  2105. // - store diacritics from right to left.
  2106. //
  2107. while ((pDW <= pPosDW) && (*pDW <= MIN_DW))
  2108. {
  2109. pDW++;
  2110. }
  2111. if ((cbDest - PosCtr) <= (pPosDW - pDW + 1))
  2112. {
  2113. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2114. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2115. return (0);
  2116. }
  2117. while (pPosDW >= pDW)
  2118. {
  2119. pDest[PosCtr] = *pPosDW;
  2120. PosCtr++;
  2121. pPosDW--;
  2122. }
  2123. }
  2124. else
  2125. {
  2126. //
  2127. // Regular diacritics:
  2128. // - remove diacritics from right to left.
  2129. // - store diacritics from left to right.
  2130. //
  2131. while ((pPosDW >= pDW) && (*pPosDW <= MIN_DW))
  2132. {
  2133. pPosDW--;
  2134. }
  2135. if ((cbDest - PosCtr) <= (pPosDW - pDW + 1))
  2136. {
  2137. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2138. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2139. return (0);
  2140. }
  2141. while (pDW <= pPosDW)
  2142. {
  2143. pDest[PosCtr] = *pDW;
  2144. PosCtr++;
  2145. pDW++;
  2146. }
  2147. }
  2148. }
  2149. //
  2150. // Copy Separator to destination buffer.
  2151. //
  2152. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2153. PosCtr++;
  2154. //
  2155. // Store the Case Weights in the destination buffer.
  2156. //
  2157. // - Eliminate minimum CW.
  2158. // - Make sure destination buffer is large enough.
  2159. // - Copy case weights to destination buffer.
  2160. //
  2161. if ((dwFlags & NORM_DROP_CW) != NORM_DROP_CW)
  2162. {
  2163. pPosCW--;
  2164. while ((pPosCW >= pCW) && (*pPosCW <= MIN_CW))
  2165. {
  2166. pPosCW--;
  2167. }
  2168. if ((cbDest - PosCtr) <= (pPosCW - pCW + 1))
  2169. {
  2170. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2171. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2172. return (0);
  2173. }
  2174. while (pCW <= pPosCW)
  2175. {
  2176. pDest[PosCtr] = *pCW;
  2177. PosCtr++;
  2178. pCW++;
  2179. }
  2180. }
  2181. //
  2182. // Copy Separator to destination buffer.
  2183. //
  2184. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2185. PosCtr++;
  2186. //
  2187. // Store the Extra Weights in the destination buffer.
  2188. //
  2189. // - Eliminate unnecessary XW.
  2190. // - Make sure destination buffer is large enough.
  2191. // - Copy extra weights to destination buffer.
  2192. //
  2193. if (pXW < pPosXW)
  2194. {
  2195. if (dwFlags & NORM_IGNORENONSPACE)
  2196. {
  2197. //
  2198. // Ignore 4W and 5W. Must count separators for
  2199. // 4W and 5W, though.
  2200. //
  2201. if ((cbDest - PosCtr) <= 2)
  2202. {
  2203. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2204. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2205. return (0);
  2206. }
  2207. pDest[PosCtr] = pXWSeparator[0];
  2208. pDest[PosCtr + 1] = pXWSeparator[1];
  2209. PosCtr += 2;
  2210. ctr = 2;
  2211. }
  2212. else
  2213. {
  2214. ctr = 0;
  2215. }
  2216. pPosXW--;
  2217. for (; ctr < NUM_BYTES_XW; ctr++)
  2218. {
  2219. pTmp = pXW + (WeightLen * ctr);
  2220. pPosTmp = pPosXW + (WeightLen * ctr);
  2221. while ((pPosTmp >= pTmp) && (*pPosTmp == pXWDrop[ctr]))
  2222. {
  2223. pPosTmp--;
  2224. }
  2225. if ((cbDest - PosCtr) <= (pPosTmp - pTmp + 1))
  2226. {
  2227. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2228. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2229. return (0);
  2230. }
  2231. while (pTmp <= pPosTmp)
  2232. {
  2233. pDest[PosCtr] = *pTmp;
  2234. PosCtr++;
  2235. pTmp++;
  2236. }
  2237. //
  2238. // Copy Separator to destination buffer.
  2239. //
  2240. pDest[PosCtr] = pXWSeparator[ctr];
  2241. PosCtr++;
  2242. }
  2243. }
  2244. //
  2245. // Copy Separator to destination buffer.
  2246. //
  2247. pDest[PosCtr] = SORTKEY_SEPARATOR;
  2248. PosCtr++;
  2249. //
  2250. // Store the Special Weights in the destination buffer.
  2251. //
  2252. // - Make sure destination buffer is large enough.
  2253. // - Copy special weights to destination buffer.
  2254. //
  2255. if (!fIgnoreSymbols)
  2256. {
  2257. if ((cbDest - PosCtr) <= (pPosSW - pSW))
  2258. {
  2259. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2260. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2261. return (0);
  2262. }
  2263. pTmp = (LPBYTE)pSW;
  2264. while (pTmp < (LPBYTE)pPosSW)
  2265. {
  2266. pDest[PosCtr] = *pTmp;
  2267. pDest[PosCtr + 1] = *(pTmp + 1);
  2268. //
  2269. // NOTE: Special Weight is stored in the data file as
  2270. // Weight, Script
  2271. // so that the WORD value will be read correctly.
  2272. //
  2273. pDest[PosCtr + 2] = *(pTmp + 3);
  2274. pDest[PosCtr + 3] = *(pTmp + 2);
  2275. PosCtr += 4;
  2276. pTmp += 4;
  2277. }
  2278. }
  2279. //
  2280. // Copy Terminator to destination buffer.
  2281. //
  2282. pDest[PosCtr] = SORTKEY_TERMINATOR;
  2283. PosCtr++;
  2284. }
  2285. //
  2286. // Free the buffer used for the weights, if one was allocated.
  2287. //
  2288. NLS_FREE_TMP_BUFFER(pUW, pBuffer);
  2289. //
  2290. // Return number of BYTES written to destination buffer.
  2291. //
  2292. return (PosCtr);
  2293. }
  2294. ////////////////////////////////////////////////////////////////////////////
  2295. //
  2296. // MapNormalization
  2297. //
  2298. // Stores the result of the normalization for the given string in the
  2299. // destination buffer, and returns the number of wide characters written
  2300. // to the buffer.
  2301. //
  2302. // 11-04-92 JulieB Created.
  2303. ////////////////////////////////////////////////////////////////////////////
  2304. int MapNormalization(
  2305. PLOC_HASH pHashN,
  2306. DWORD dwFlags,
  2307. LPCWSTR pSrc,
  2308. int cchSrc,
  2309. LPWSTR pDest,
  2310. int cchDest)
  2311. {
  2312. int ctr; // source char counter
  2313. int ctr2 = 0; // destination char counter
  2314. //
  2315. // Make sure the ctype table is available in the system.
  2316. //
  2317. if (GetCTypeFileInfo())
  2318. {
  2319. SetLastError(ERROR_FILE_NOT_FOUND);
  2320. return (0);
  2321. }
  2322. //
  2323. // Normalize based on the flags.
  2324. //
  2325. switch (dwFlags)
  2326. {
  2327. case ( NORM_IGNORENONSPACE ) :
  2328. {
  2329. //
  2330. // If the destination value is zero, then only return
  2331. // the count of characters. Do NOT touch pDest.
  2332. //
  2333. if (cchDest == 0)
  2334. {
  2335. //
  2336. // Count the number of characters that would be written
  2337. // to the destination buffer.
  2338. //
  2339. for (ctr = 0, ctr2 = 0; ctr < cchSrc; ctr++)
  2340. {
  2341. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2342. {
  2343. //
  2344. // Not a nonspacing character, so just write the
  2345. // character to the destination string.
  2346. //
  2347. ctr2++;
  2348. }
  2349. else if (!(IS_NONSPACE_ONLY(pHashN->pSortkey, pSrc[ctr])))
  2350. {
  2351. //
  2352. // PreComposed Form. Write the base character only.
  2353. //
  2354. ctr2++;
  2355. }
  2356. //
  2357. // Else - nonspace character only, so don't write
  2358. // anything.
  2359. //
  2360. }
  2361. }
  2362. else
  2363. {
  2364. //
  2365. // Store the normalized string in the destination string.
  2366. //
  2367. for (ctr = 0, ctr2 = 0; (ctr < cchSrc) && (ctr2 < cchDest);
  2368. ctr++)
  2369. {
  2370. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2371. {
  2372. //
  2373. // Not a nonspacing character, so just write the
  2374. // character to the destination string.
  2375. //
  2376. pDest[ctr2] = pSrc[ctr];
  2377. ctr2++;
  2378. }
  2379. else if (!(IS_NONSPACE_ONLY(pHashN->pSortkey, pSrc[ctr])))
  2380. {
  2381. //
  2382. // PreComposed Form. Write the base character only.
  2383. //
  2384. GET_BASE_CHAR(pSrc[ctr], pDest[ctr2]);
  2385. if (pDest[ctr2] == 0)
  2386. {
  2387. //
  2388. // No translation for precomposed character,
  2389. // so must write the precomposed character.
  2390. //
  2391. pDest[ctr2] = pSrc[ctr];
  2392. }
  2393. ctr2++;
  2394. }
  2395. //
  2396. // Else - nonspace character only, so don't write
  2397. // anything.
  2398. //
  2399. }
  2400. }
  2401. break;
  2402. }
  2403. case ( NORM_IGNORESYMBOLS ) :
  2404. {
  2405. //
  2406. // If the destination value is zero, then only return
  2407. // the count of characters. Do NOT touch pDest.
  2408. //
  2409. if (cchDest == 0)
  2410. {
  2411. //
  2412. // Count the number of characters that would be written
  2413. // to the destination buffer.
  2414. //
  2415. for (ctr = 0, ctr2 = 0; ctr < cchSrc; ctr++)
  2416. {
  2417. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2418. {
  2419. //
  2420. // Not a symbol, so write the character.
  2421. //
  2422. ctr2++;
  2423. }
  2424. }
  2425. }
  2426. else
  2427. {
  2428. //
  2429. // Store the normalized string in the destination string.
  2430. //
  2431. for (ctr = 0, ctr2 = 0; (ctr < cchSrc) && (ctr2 < cchDest);
  2432. ctr++)
  2433. {
  2434. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2435. {
  2436. //
  2437. // Not a symbol, so write the character.
  2438. //
  2439. pDest[ctr2] = pSrc[ctr];
  2440. ctr2++;
  2441. }
  2442. }
  2443. }
  2444. break;
  2445. }
  2446. case ( NORM_IGNORENONSPACE | NORM_IGNORESYMBOLS ) :
  2447. {
  2448. //
  2449. // If the destination value is zero, then only return
  2450. // the count of characters. Do NOT touch pDest.
  2451. //
  2452. if (cchDest == 0)
  2453. {
  2454. //
  2455. // Count the number of characters that would be written
  2456. // to the destination buffer.
  2457. //
  2458. for (ctr = 0, ctr2 = 0; ctr < cchSrc; ctr++)
  2459. {
  2460. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2461. {
  2462. //
  2463. // Not a symbol, so check for nonspace.
  2464. //
  2465. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2466. {
  2467. //
  2468. // Not a nonspacing character, so just write the
  2469. // character to the destination string.
  2470. //
  2471. ctr2++;
  2472. }
  2473. else if (!(IS_NONSPACE_ONLY( pHashN->pSortkey,
  2474. pSrc[ctr] )))
  2475. {
  2476. //
  2477. // PreComposed Form. Write the base character
  2478. // only.
  2479. //
  2480. ctr2++;
  2481. }
  2482. //
  2483. // Else - nonspace character only, so don't write
  2484. // anything.
  2485. //
  2486. }
  2487. }
  2488. }
  2489. else
  2490. {
  2491. //
  2492. // Store the normalized string in the destination string.
  2493. //
  2494. for (ctr = 0, ctr2 = 0; (ctr < cchSrc) && (ctr2 < cchDest);
  2495. ctr++)
  2496. {
  2497. //
  2498. // Check for symbol and nonspace.
  2499. //
  2500. if (!IS_SYMBOL(pHashN->pSortkey, pSrc[ctr]))
  2501. {
  2502. //
  2503. // Not a symbol, so check for nonspace.
  2504. //
  2505. if (!IS_NONSPACE(pHashN->pSortkey, pSrc[ctr]))
  2506. {
  2507. //
  2508. // Not a nonspacing character, so just write the
  2509. // character to the destination string.
  2510. //
  2511. pDest[ctr2] = pSrc[ctr];
  2512. ctr2++;
  2513. }
  2514. else if (!(IS_NONSPACE_ONLY( pHashN->pSortkey,
  2515. pSrc[ctr] )))
  2516. {
  2517. //
  2518. // PreComposed Form. Write the base character
  2519. // only.
  2520. //
  2521. GET_BASE_CHAR(pSrc[ctr], pDest[ctr2]);
  2522. if (pDest[ctr2] == 0)
  2523. {
  2524. //
  2525. // No translation for precomposed character,
  2526. // so must write the precomposed character.
  2527. //
  2528. pDest[ctr2] = pSrc[ctr];
  2529. }
  2530. ctr2++;
  2531. }
  2532. //
  2533. // Else - nonspace character only, so don't write
  2534. // anything.
  2535. //
  2536. }
  2537. }
  2538. }
  2539. break;
  2540. }
  2541. }
  2542. //
  2543. // Return the number of wide characters written.
  2544. //
  2545. return (ctr2);
  2546. }
  2547. ////////////////////////////////////////////////////////////////////////////
  2548. //
  2549. // MapKanaWidth
  2550. //
  2551. // Stores the result of the Kana, Width, and/or Casing mappings for the
  2552. // given string in the destination buffer, and returns the number of wide
  2553. // characters written to the buffer.
  2554. //
  2555. // 07-26-93 JulieB Created.
  2556. ////////////////////////////////////////////////////////////////////////////
  2557. int MapKanaWidth(
  2558. PLOC_HASH pHashN,
  2559. DWORD dwFlags,
  2560. LPCWSTR pSrc,
  2561. int cchSrc,
  2562. LPWSTR pDest,
  2563. int cchDest)
  2564. {
  2565. int ctr; // loop counter
  2566. PCASE pCase; // ptr to case table (if case flag is set)
  2567. //
  2568. // See if lower or upper case flags are present.
  2569. //
  2570. if (dwFlags & LCMAP_LOWERCASE)
  2571. {
  2572. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  2573. ? pHashN->pLowerLinguist
  2574. : pHashN->pLowerCase;
  2575. }
  2576. else if (dwFlags & LCMAP_UPPERCASE)
  2577. {
  2578. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  2579. ? pHashN->pUpperLinguist
  2580. : pHashN->pUpperCase;
  2581. }
  2582. else
  2583. {
  2584. pCase = NULL;
  2585. }
  2586. //
  2587. // Remove lower, upper, and linguistic casing flags.
  2588. //
  2589. dwFlags &= ~(LCMAP_LOWERCASE | LCMAP_UPPERCASE | LCMAP_LINGUISTIC_CASING);
  2590. //
  2591. // Map the string based on the given flags.
  2592. //
  2593. switch (dwFlags)
  2594. {
  2595. case ( LCMAP_HIRAGANA ) :
  2596. case ( LCMAP_KATAKANA ) :
  2597. {
  2598. //
  2599. // If the destination value is zero, then just return the
  2600. // length of the source string. Do NOT touch pDest.
  2601. //
  2602. if (cchDest == 0)
  2603. {
  2604. return (cchSrc);
  2605. }
  2606. //
  2607. // If cchSrc is greater than cchDest, then the destination
  2608. // buffer is too small to hold the string. Return an error.
  2609. //
  2610. if (cchSrc > cchDest)
  2611. {
  2612. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2613. return (0);
  2614. }
  2615. if (dwFlags == LCMAP_HIRAGANA)
  2616. {
  2617. //
  2618. // Map all Katakana full width to Hiragana full width.
  2619. // Katakana half width will remain Katakana half width.
  2620. //
  2621. if (pCase)
  2622. {
  2623. for (ctr = 0; ctr < cchSrc; ctr++)
  2624. {
  2625. pDest[ctr] = GET_KANA(pTblPtrs->pHiragana, pSrc[ctr]);
  2626. pDest[ctr] = GET_LOWER_UPPER_CASE(pCase, pDest[ctr]);
  2627. }
  2628. }
  2629. else
  2630. {
  2631. for (ctr = 0; ctr < cchSrc; ctr++)
  2632. {
  2633. pDest[ctr] = GET_KANA(pTblPtrs->pHiragana, pSrc[ctr]);
  2634. }
  2635. }
  2636. }
  2637. else
  2638. {
  2639. //
  2640. // Map all Hiragana full width to Katakana full width.
  2641. // Hiragana half width does not exist.
  2642. //
  2643. if (pCase)
  2644. {
  2645. for (ctr = 0; ctr < cchSrc; ctr++)
  2646. {
  2647. pDest[ctr] = GET_KANA(pTblPtrs->pKatakana, pSrc[ctr]);
  2648. pDest[ctr] = GET_LOWER_UPPER_CASE(pCase, pDest[ctr]);
  2649. }
  2650. }
  2651. else
  2652. {
  2653. for (ctr = 0; ctr < cchSrc; ctr++)
  2654. {
  2655. pDest[ctr] = GET_KANA(pTblPtrs->pKatakana, pSrc[ctr]);
  2656. }
  2657. }
  2658. }
  2659. //
  2660. // Return the number of characters mapped.
  2661. //
  2662. return (cchSrc);
  2663. break;
  2664. }
  2665. case ( LCMAP_HALFWIDTH ) :
  2666. {
  2667. //
  2668. // Map all chars to half width.
  2669. //
  2670. return (MapHalfKana( pSrc,
  2671. cchSrc,
  2672. pDest,
  2673. cchDest,
  2674. NULL,
  2675. pCase ));
  2676. break;
  2677. }
  2678. case ( LCMAP_FULLWIDTH ) :
  2679. {
  2680. //
  2681. // Map all chars to full width.
  2682. //
  2683. return (MapFullKana( pSrc,
  2684. cchSrc,
  2685. pDest,
  2686. cchDest,
  2687. NULL,
  2688. pCase ));
  2689. break;
  2690. }
  2691. case ( LCMAP_HIRAGANA | LCMAP_HALFWIDTH ) :
  2692. {
  2693. //
  2694. // This combination of flags is strange, because
  2695. // Hiragana is only full width. So, the Hiragana flag
  2696. // is the most important. Full width Katakana will be
  2697. // mapped to full width Hiragana, not half width
  2698. // Katakana.
  2699. //
  2700. // Map to Hiragana, then Half Width.
  2701. //
  2702. return (MapHalfKana( pSrc,
  2703. cchSrc,
  2704. pDest,
  2705. cchDest,
  2706. pTblPtrs->pHiragana,
  2707. pCase ));
  2708. break;
  2709. }
  2710. case ( LCMAP_HIRAGANA | LCMAP_FULLWIDTH ) :
  2711. {
  2712. //
  2713. // Since Hiragana is only FULL width, the mapping to
  2714. // width must be done first to convert all half width
  2715. // Katakana to full width Katakana before trying to
  2716. // map to Hiragana.
  2717. //
  2718. // Map to Full Width, then Hiragana.
  2719. //
  2720. return (MapFullKana( pSrc,
  2721. cchSrc,
  2722. pDest,
  2723. cchDest,
  2724. pTblPtrs->pHiragana,
  2725. pCase ));
  2726. break;
  2727. }
  2728. case ( LCMAP_KATAKANA | LCMAP_HALFWIDTH ) :
  2729. {
  2730. //
  2731. // Since Hiragana is only FULL width, the mapping to
  2732. // Katakana must be done first to convert all Hiragana
  2733. // to Katakana before trying to map to half width.
  2734. //
  2735. // Map to Katakana, then Half Width.
  2736. //
  2737. return (MapHalfKana( pSrc,
  2738. cchSrc,
  2739. pDest,
  2740. cchDest,
  2741. pTblPtrs->pKatakana,
  2742. pCase ));
  2743. break;
  2744. }
  2745. case ( LCMAP_KATAKANA | LCMAP_FULLWIDTH ) :
  2746. {
  2747. //
  2748. // Since Hiragana is only FULL width, it doesn't matter
  2749. // which way the mapping is done for this combination.
  2750. //
  2751. // Map to Full Width, then Katakana.
  2752. //
  2753. return (MapFullKana( pSrc,
  2754. cchSrc,
  2755. pDest,
  2756. cchDest,
  2757. pTblPtrs->pKatakana,
  2758. pCase ));
  2759. break;
  2760. }
  2761. default :
  2762. {
  2763. //
  2764. // Return error.
  2765. //
  2766. return (0);
  2767. }
  2768. }
  2769. }
  2770. ////////////////////////////////////////////////////////////////////////////
  2771. //
  2772. // MapHalfKana
  2773. //
  2774. // Stores the result of the half width and Kana mapping for the given string
  2775. // in the destination buffer, and returns the number of wide characters
  2776. // written to the buffer.
  2777. //
  2778. // This first converts the precomposed characters to their composite forms,
  2779. // and then maps all characters to their half width forms. This handles the
  2780. // case where the full width precomposed form should map to TWO half width
  2781. // code points (composite form). The half width precomposed forms do not
  2782. // exist in Unicode.
  2783. //
  2784. // 11-04-93 JulieB Created.
  2785. ////////////////////////////////////////////////////////////////////////////
  2786. int MapHalfKana(
  2787. LPCWSTR pSrc,
  2788. int cchSrc,
  2789. LPWSTR pDest,
  2790. int cchDest,
  2791. PKANA pKana,
  2792. PCASE pCase)
  2793. {
  2794. int Count; // count of characters written
  2795. int ctr = 0; // loop counter
  2796. int ct; // loop counter
  2797. LPWSTR pBuf; // ptr to destination buffer
  2798. LPWSTR pEndBuf; // ptr to end of destination buffer
  2799. LPWSTR pPosDest; // ptr to position in destination buffer
  2800. LPWSTR *ppIncr; // points to ptr to increment
  2801. WCHAR pTmp[MAX_COMPOSITE]; // ptr to temporary buffer
  2802. LPWSTR pEndTmp; // ptr to end of temporary buffer
  2803. //
  2804. // Initialize the destination pointers.
  2805. //
  2806. pEndTmp = pTmp + MAX_COMPOSITE;
  2807. if (cchDest == 0)
  2808. {
  2809. //
  2810. // Do not touch the pDest pointer. Use the pTmp buffer and
  2811. // initialize the end pointer.
  2812. //
  2813. pBuf = pTmp;
  2814. pEndBuf = pEndTmp;
  2815. //
  2816. // This is a bogus pointer and will never be touched. It just
  2817. // increments this pointer into oblivion.
  2818. //
  2819. pDest = pBuf;
  2820. ppIncr = &pDest;
  2821. }
  2822. else
  2823. {
  2824. //
  2825. // Initialize the pointers. Use the pDest buffer.
  2826. //
  2827. pBuf = pDest;
  2828. pEndBuf = pBuf + cchDest;
  2829. ppIncr = &pBuf;
  2830. }
  2831. //
  2832. // Search through the source string. Convert all precomposed
  2833. // forms to their composite form before converting to half width.
  2834. //
  2835. while ((ctr < cchSrc) && (pBuf < pEndBuf))
  2836. {
  2837. //
  2838. // Get the character to convert. If we need to convert to
  2839. // kana, do it.
  2840. //
  2841. if (pKana)
  2842. {
  2843. *pTmp = GET_KANA(pKana, pSrc[ctr]);
  2844. }
  2845. else
  2846. {
  2847. *pTmp = pSrc[ctr];
  2848. }
  2849. //
  2850. // Convert to its composite form (if exists).
  2851. //
  2852. // NOTE: Must use the tmp buffer in case the destination buffer
  2853. // isn't large enough to hold the composite form.
  2854. //
  2855. Count = InsertCompositeForm(pTmp, pEndTmp);
  2856. //
  2857. // Convert to half width (if exists) and case (if appropriate).
  2858. //
  2859. pPosDest = pTmp;
  2860. if (pCase)
  2861. {
  2862. for (ct = Count; ct > 0; ct--)
  2863. {
  2864. *pPosDest = GET_HALF_WIDTH(pTblPtrs->pHalfWidth, *pPosDest);
  2865. *pPosDest = GET_LOWER_UPPER_CASE(pCase, *pPosDest);
  2866. pPosDest++;
  2867. }
  2868. }
  2869. else
  2870. {
  2871. for (ct = Count; ct > 0; ct--)
  2872. {
  2873. *pPosDest = GET_HALF_WIDTH(pTblPtrs->pHalfWidth, *pPosDest);
  2874. pPosDest++;
  2875. }
  2876. }
  2877. //
  2878. // Convert back to its precomposed form (if exists).
  2879. //
  2880. if (Count > 1)
  2881. {
  2882. //
  2883. // Get the precomposed form.
  2884. //
  2885. // ct is the number of code points used from the
  2886. // composite form.
  2887. //
  2888. ct = InsertPreComposedForm(pTmp, pPosDest, pBuf);
  2889. if (ct > 1)
  2890. {
  2891. //
  2892. // Precomposed form was found. Need to make sure all
  2893. // of the composite chars were used.
  2894. //
  2895. if (ct == Count)
  2896. {
  2897. //
  2898. // All composite chars were used. Increment by 1.
  2899. //
  2900. (*ppIncr)++;
  2901. }
  2902. else
  2903. {
  2904. //
  2905. // Not all composite chars were used. Need to copy
  2906. // the rest of the composite chars from the tmp buffer
  2907. // to the destination buffer.
  2908. //
  2909. (*ppIncr)++;
  2910. Count -= ct;
  2911. if (pBuf + Count > pEndBuf)
  2912. {
  2913. break;
  2914. }
  2915. RtlMoveMemory(pBuf, pTmp + ct, Count * sizeof(WCHAR));
  2916. (*ppIncr) += Count;
  2917. }
  2918. }
  2919. else
  2920. {
  2921. //
  2922. // Precomposed form was NOT found. Need to copy the
  2923. // composite form from the tmp buffer to the destination
  2924. // buffer.
  2925. //
  2926. if (pBuf + Count > pEndBuf)
  2927. {
  2928. break;
  2929. }
  2930. RtlMoveMemory(pBuf, pTmp, Count * sizeof(WCHAR));
  2931. (*ppIncr) += Count;
  2932. }
  2933. }
  2934. else
  2935. {
  2936. //
  2937. // Only one character (no composite form), so just copy it
  2938. // from the tmp buffer to the destination buffer.
  2939. //
  2940. *pBuf = *pTmp;
  2941. (*ppIncr)++;
  2942. }
  2943. ctr++;
  2944. }
  2945. //
  2946. // Return the appropriate number of characters.
  2947. //
  2948. if (cchDest == 0)
  2949. {
  2950. //
  2951. // Return the number of characters written to the buffer.
  2952. //
  2953. return ((*ppIncr) - pTmp);
  2954. }
  2955. else
  2956. {
  2957. //
  2958. // Make sure the given buffer was large enough to hold the
  2959. // mapping.
  2960. //
  2961. if (ctr < cchSrc)
  2962. {
  2963. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  2964. return (0);
  2965. }
  2966. //
  2967. // Return the number of characters written to the buffer.
  2968. //
  2969. return ((*ppIncr) - pDest);
  2970. }
  2971. }
  2972. ////////////////////////////////////////////////////////////////////////////
  2973. //
  2974. // MapFullKana
  2975. //
  2976. // Stores the result of the full width and Kana mapping for the given string
  2977. // in the destination buffer, and returns the number of wide characters
  2978. // written to the buffer.
  2979. //
  2980. // This first converts the characters to full width, and then maps all
  2981. // composite characters to their precomposed forms. This handles the case
  2982. // where the half width composite form (TWO code points) should map to a
  2983. // full width precomposed form (ONE full width code point). The half
  2984. // width precomposed forms do not exist in Unicode and we need the full
  2985. // width precomposed forms to round trip with the TWO half width code
  2986. // points.
  2987. //
  2988. // 11-04-93 JulieB Created.
  2989. ////////////////////////////////////////////////////////////////////////////
  2990. int MapFullKana(
  2991. LPCWSTR pSrc,
  2992. int cchSrc,
  2993. LPWSTR pDest,
  2994. int cchDest,
  2995. PKANA pKana,
  2996. PCASE pCase)
  2997. {
  2998. int Count; // count of characters
  2999. LPWSTR pPosSrc; // ptr to position in source buffer
  3000. LPWSTR pEndSrc; // ptr to end of source buffer
  3001. LPWSTR pBuf; // ptr to destination buffer
  3002. LPWSTR pEndBuf; // ptr to end of destination buffer
  3003. LPWSTR *ppIncr; // points to ptr to increment
  3004. WCHAR pTmp[MAX_COMPOSITE]; // ptr to temporary buffer
  3005. //
  3006. // Initialize source string pointers.
  3007. //
  3008. pPosSrc = (LPWSTR)pSrc;
  3009. pEndSrc = pPosSrc + cchSrc;
  3010. //
  3011. // Initialize the destination pointers.
  3012. //
  3013. if (cchDest == 0)
  3014. {
  3015. //
  3016. // Do not touch the pDest pointer. Use the pTmp buffer and
  3017. // initialize the end pointer.
  3018. //
  3019. pBuf = pTmp;
  3020. pEndBuf = pTmp + MAX_COMPOSITE;
  3021. //
  3022. // This is a bogus pointer and will never be touched. It just
  3023. // increments this pointer into oblivion.
  3024. //
  3025. pDest = pBuf;
  3026. ppIncr = &pDest;
  3027. }
  3028. else
  3029. {
  3030. //
  3031. // Initialize the pointers. Use the pDest buffer.
  3032. //
  3033. pBuf = pDest;
  3034. pEndBuf = pBuf + cchDest;
  3035. ppIncr = &pBuf;
  3036. }
  3037. //
  3038. // Search through the source string. Convert all composite
  3039. // forms to their precomposed form before converting to full width.
  3040. //
  3041. while ((pPosSrc < pEndSrc) && (pBuf < pEndBuf))
  3042. {
  3043. //
  3044. // Convert a composite form to its full width precomposed
  3045. // form (if exists). Also, convert to case if necessary.
  3046. //
  3047. Count = InsertFullWidthPreComposedForm( pPosSrc,
  3048. pEndSrc,
  3049. pBuf,
  3050. pCase );
  3051. pPosSrc += Count;
  3052. //
  3053. // Convert to kana if necessary.
  3054. //
  3055. if (pKana)
  3056. {
  3057. *pBuf = GET_KANA(pKana, *pBuf);
  3058. }
  3059. //
  3060. // Increment the destination pointer.
  3061. //
  3062. (*ppIncr)++;
  3063. }
  3064. //
  3065. // Return the appropriate number of characters.
  3066. //
  3067. if (cchDest == 0)
  3068. {
  3069. //
  3070. // Return the number of characters written to the buffer.
  3071. //
  3072. return ((*ppIncr) - pTmp);
  3073. }
  3074. else
  3075. {
  3076. //
  3077. // Make sure the given buffer was large enough to hold the
  3078. // mapping.
  3079. //
  3080. if (pPosSrc < pEndSrc)
  3081. {
  3082. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  3083. return (0);
  3084. }
  3085. //
  3086. // Return the number of characters written to the buffer.
  3087. //
  3088. return ((*ppIncr) - pDest);
  3089. }
  3090. }
  3091. ////////////////////////////////////////////////////////////////////////////
  3092. //
  3093. // MapTraditionalSimplified
  3094. //
  3095. // Stores the appropriate Traditional or Simplified Chinese values in the
  3096. // destination buffer, and returns the number of wide characters
  3097. // written to the buffer.
  3098. //
  3099. // 05-07-96 JulieB Created.
  3100. ////////////////////////////////////////////////////////////////////////////
  3101. int MapTraditionalSimplified(
  3102. PLOC_HASH pHashN,
  3103. DWORD dwFlags,
  3104. LPCWSTR pSrc,
  3105. int cchSrc,
  3106. LPWSTR pDest,
  3107. int cchDest,
  3108. PCHINESE pChinese)
  3109. {
  3110. int ctr; // loop counter
  3111. PCASE pCase; // ptr to case table (if case flag is set)
  3112. //
  3113. // If the destination value is zero, then just return the
  3114. // length of the source string. Do NOT touch pDest.
  3115. //
  3116. if (cchDest == 0)
  3117. {
  3118. return (cchSrc);
  3119. }
  3120. //
  3121. // If cchSrc is greater than cchDest, then the destination buffer
  3122. // is too small to hold the new string. Return an error.
  3123. //
  3124. if (cchSrc > cchDest)
  3125. {
  3126. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  3127. return (0);
  3128. }
  3129. //
  3130. // See if lower or upper case flags are present.
  3131. //
  3132. if (dwFlags & LCMAP_LOWERCASE)
  3133. {
  3134. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  3135. ? pHashN->pLowerLinguist
  3136. : pHashN->pLowerCase;
  3137. }
  3138. else if (dwFlags & LCMAP_UPPERCASE)
  3139. {
  3140. pCase = (dwFlags & LCMAP_LINGUISTIC_CASING)
  3141. ? pHashN->pUpperLinguist
  3142. : pHashN->pUpperCase;
  3143. }
  3144. else
  3145. {
  3146. pCase = NULL;
  3147. }
  3148. //
  3149. // Map to Traditional/Simplified and store it in the destination string.
  3150. // Also map the case, if appropriate.
  3151. //
  3152. if (pCase)
  3153. {
  3154. for (ctr = 0; ctr < cchSrc; ctr++)
  3155. {
  3156. pDest[ctr] = GET_CHINESE(pChinese, pSrc[ctr]);
  3157. pDest[ctr] = GET_LOWER_UPPER_CASE(pCase, pDest[ctr]);
  3158. }
  3159. }
  3160. else
  3161. {
  3162. for (ctr = 0; ctr < cchSrc; ctr++)
  3163. {
  3164. pDest[ctr] = GET_CHINESE(pChinese, pSrc[ctr]);
  3165. }
  3166. }
  3167. //
  3168. // Return the number of wide characters written.
  3169. //
  3170. return (ctr);
  3171. }