Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

401 lines
13 KiB

  1. /*++
  2. Copyright (c) 1991-2000, Microsoft Corporation All rights reserved.
  3. Module Name:
  4. jamo.c
  5. Abstract:
  6. This file contains functions that deal with the sorting of old Hangul.
  7. Korean characters (Hangul) can be composed by Jamos (U+1100 - U+11ff).
  8. However, some valid compositions of Jamo are not found in mordern
  9. Hangul (U+AC00 - U+D7AF).
  10. These valid compositions are called old Hangul.
  11. MapOldHangulSortKey() is called by CompareString() and MapSortKey() to
  12. handle the sorting of old Hangul.
  13. Note:
  14. The Jamo composition means that several Jamo (Korean alpahbetic) composed
  15. a valid Hangul character or old Hangul character.
  16. Eg. U+1100 U+1103 U+1161 U+11a8 composes a valid old Hangul character.
  17. The following are data members of the global structure pTblPtrs used by
  18. old Hangul sorting:
  19. * pTblPtrs->pJamoIndex
  20. Given a Jamo, this is the index into the pJamoComposition state
  21. machine for this Jamo.
  22. The value for U+1100 is stored in pJamoIndex[0], U+1101 is in
  23. pJamoIndex[1], etc.
  24. The value for U+1100 is 1. This means the state machine for
  25. U+1100 is stored in pJamoComposition[1].
  26. Note that not every Jamo can start a valid composition. For
  27. those Jamos that can not start a valid composition, the table
  28. entry for that Jamo is 0. E.g. the index for U+1101 is 0.
  29. * pTblPtrs->NumJamoIndex
  30. The number of entries in pJamoIndex. Every index is a WORD.
  31. * pTblPtrs->pJamoComposition
  32. This is the Jamo composition state machine. It is used for two
  33. purposes:
  34. 1. Used to verify a valid Jamo combination that composes an
  35. old Hangul character.
  36. 2. If a valid old Hangul composition is found, get the
  37. SortInfo for the current combination.
  38. * pTblPtrs->NumJamoComposition
  39. The number of entires in pJamoComposition
  40. Revision History:
  41. 05-30-2000 JohnMcCo Create old Hangul sorting algorithm and sample.
  42. 06-23-2000 YSLin Created.
  43. --*/
  44. //
  45. // Include Files.
  46. //
  47. #include "nls.h"
  48. #include "nlssafe.h"
  49. #include "jamo.h"
  50. //-------------------------------------------------------------------------//
  51. // INTERNAL MACROS //
  52. //-------------------------------------------------------------------------//
  53. ////////////////////////////////////////////////////////////////////////////
  54. //
  55. // NOT_END_STRING
  56. //
  57. // Checks to see if the search has reached the end of the string.
  58. // It returns TRUE if the counter is not at zero (counting backwards) and
  59. // the null termination has not been reached (if -2 was passed in the count
  60. // parameter.
  61. //
  62. // 11-04-92 JulieB Created.
  63. ////////////////////////////////////////////////////////////////////////////
  64. #define NOT_END_STRING(ct, ptr, cchIn) \
  65. ((ct != 0) && (!((*(ptr) == 0) && (cchIn == -2))))
  66. ////////////////////////////////////////////////////////////////////////////
  67. //
  68. // GET_JAMO_INDEX
  69. //
  70. // Update the global sort sequence info based on the new state.
  71. //
  72. ////////////////////////////////////////////////////////////////////////////
  73. #define GET_JAMO_INDEX(wch) ((wch) - NLS_CHAR_FIRST_JAMO)
  74. //-------------------------------------------------------------------------//
  75. // INTERNAL ROUTINES //
  76. //-------------------------------------------------------------------------//
  77. ////////////////////////////////////////////////////////////////////////////
  78. //
  79. // UpdateJamoState
  80. //
  81. // Update the sort result info based on the new state.
  82. //
  83. // JamoClass The current Jamo class (LeadingJamo/VowelJamo/TrailingJamo)
  84. // pSort The sort information derived from the current state.
  85. // pSortResult The sort information for the final result. Used to
  86. // collect info from pSort.
  87. //
  88. // 06-22-2000 YSLin Created.
  89. ////////////////////////////////////////////////////////////////////////////
  90. void UpdateJamoState(
  91. int JamoClass,
  92. PJAMO_SORT_INFO pSort,
  93. PJAMO_SORT_INFOEX pSortResult) // new sort sequence information
  94. {
  95. //
  96. // Record if this is a jamo unique to old Hangul.
  97. //
  98. pSortResult->m_bOld |= pSort->m_bOld;
  99. //
  100. // Update the indices iff the new ones are higher than the current ones.
  101. //
  102. if (pSort->m_chLeadingIndex > pSortResult->m_chLeadingIndex)
  103. {
  104. pSortResult->m_chLeadingIndex = pSort->m_chLeadingIndex;
  105. }
  106. if (pSort->m_chVowelIndex > pSortResult->m_chVowelIndex)
  107. {
  108. pSortResult->m_chVowelIndex = pSort->m_chVowelIndex;
  109. }
  110. if (pSort->m_chTrailingIndex > pSortResult->m_chTrailingIndex)
  111. {
  112. pSortResult->m_chTrailingIndex = pSort->m_chTrailingIndex;
  113. }
  114. //
  115. // Update the extra weights according to the current Jamo class.
  116. //
  117. switch (JamoClass)
  118. {
  119. case ( NLS_CLASS_LEADING_JAMO ) :
  120. {
  121. if (pSort->m_ExtraWeight > pSortResult->m_LeadingWeight)
  122. {
  123. pSortResult->m_LeadingWeight = pSort->m_ExtraWeight;
  124. }
  125. break;
  126. }
  127. case ( NLS_CLASS_VOWEL_JAMO ) :
  128. {
  129. if (pSort->m_ExtraWeight > pSortResult->m_VowelWeight)
  130. {
  131. pSortResult->m_VowelWeight = pSort->m_ExtraWeight;
  132. }
  133. break;
  134. }
  135. case ( NLS_CLASS_TRAILING_JAMO ) :
  136. {
  137. if (pSort->m_ExtraWeight > pSortResult->m_TrailingWeight)
  138. {
  139. pSortResult->m_TrailingWeight = pSort->m_ExtraWeight;
  140. }
  141. break;
  142. }
  143. }
  144. }
  145. ////////////////////////////////////////////////////////////////////////////
  146. //
  147. // GetJamoComposition
  148. //
  149. // ppString pointer to the current Jamo character
  150. // pCount pointer to the current character count (couting backwards)
  151. // cchSrc The total character count (if the value is -2, then the string is null-terminated)
  152. // currentJamoClass the current Jamo class.
  153. // lpJamoTable The entry in jamo table.
  154. // JamoSortInfo the sort information for the final result.
  155. //
  156. // NOTENOTE This function assumes that the character at *ppString is a leading Jamo.
  157. //
  158. // 06-12-2000 YSLin Created.
  159. ////////////////////////////////////////////////////////////////////////////
  160. int GetJamoComposition(
  161. LPCWSTR* ppString, // The pointer to the current character
  162. int* pCount, // The current character count
  163. int cchSrc, // The total character length
  164. int currentJamoClass, // The current Jamo class.
  165. JAMO_SORT_INFOEX* JamoSortInfo // The result Jamo sorting information.
  166. )
  167. {
  168. WCHAR wch;
  169. int JamoClass;
  170. int Index;
  171. PJAMO_TABLE pJamo;
  172. PJAMO_COMPOSE_STATE lpNext = NULL;
  173. PJAMO_COMPOSE_STATE pSearchEnd;
  174. wch = **ppString;
  175. //
  176. // Get the Jamo information for the current character.
  177. //
  178. pJamo = pTblPtrs->pJamoIndex + GET_JAMO_INDEX(wch);
  179. UpdateJamoState(currentJamoClass, &(pJamo->SortInfo), JamoSortInfo);
  180. //
  181. // Move on to next character.
  182. //
  183. (*ppString)++;
  184. while (NOT_END_STRING(*pCount, *ppString, cchSrc))
  185. {
  186. wch = **ppString;
  187. if (!IsJamo(wch))
  188. {
  189. // The current character is not a Jamo. We are done with checking the Jamo composition.
  190. return (-1);
  191. }
  192. if (wch == 0x1160) {
  193. JamoSortInfo->m_bFiller = TRUE;
  194. }
  195. // Get the Jamo class of it.
  196. if (IsLeadingJamo(wch))
  197. {
  198. JamoClass = NLS_CLASS_LEADING_JAMO;
  199. }
  200. else if (IsTrailingJamo(wch))
  201. {
  202. JamoClass = NLS_CLASS_TRAILING_JAMO;
  203. }
  204. else
  205. {
  206. JamoClass = NLS_CLASS_VOWEL_JAMO;
  207. }
  208. if (JamoClass != currentJamoClass)
  209. {
  210. return (JamoClass);
  211. }
  212. if (lpNext == NULL)
  213. {
  214. //
  215. // Get the index into the Jamo composition information.
  216. //
  217. Index = pJamo->Index;
  218. if (Index == 0)
  219. {
  220. return (JamoClass);
  221. }
  222. lpNext = pTblPtrs->pJamoComposition + Index;
  223. pSearchEnd = lpNext + pJamo->TransitionCount;
  224. }
  225. //
  226. // Push the current Jamo (pointed by pString) into a state machine,
  227. // to check if we have a valid old Hangul composition.
  228. // During the check, we will also update the sortkey result in JamoSortInfo.
  229. //
  230. while (lpNext < pSearchEnd)
  231. {
  232. // Found a match--update the combination pointer and sort info.
  233. if (lpNext->m_wcCodePoint == wch)
  234. {
  235. UpdateJamoState(currentJamoClass, &(lpNext->m_SortInfo), JamoSortInfo);
  236. lpNext++;
  237. goto NextChar;
  238. }
  239. // No match -- skip all transitions beginning with this code point
  240. lpNext += lpNext->m_bTransitionCount + 1;
  241. }
  242. //
  243. // We didn't find a valid old Hangul composition for the current character.
  244. // So return the current Jamo class.
  245. //
  246. return (JamoClass);
  247. NextChar:
  248. // We are still in a valid old Hangul composition. Go check the next character.
  249. (*ppString)++; (*pCount)--;
  250. }
  251. return (-1);
  252. }
  253. //-------------------------------------------------------------------------//
  254. // EXTERNAL ROUTINES //
  255. //-------------------------------------------------------------------------//
  256. ////////////////////////////////////////////////////////////////////////////
  257. //
  258. // MapOldHangulSortKey
  259. //
  260. // Check if the given string has a valid old Hangul composition,
  261. // If yes, store the sortkey weights for the given string in the destination
  262. // buffer and return the number of CHARs consumed by the composition.
  263. // If not, return zero.
  264. //
  265. // NOTENOTE: This function assumes that string starting from pSrc is a
  266. // leading Jamo.
  267. //
  268. // 06-12-2000 YSLin Created.
  269. ////////////////////////////////////////////////////////////////////////////
  270. int MapOldHangulSortKey(
  271. PLOC_HASH pHashN,
  272. LPCWSTR pSrc, // source string
  273. int cchSrc, // the length of the string
  274. WORD* pUW, // generated Unicode weight
  275. LPBYTE pXW, // generated extra weight (3 bytes)
  276. BOOL fModify)
  277. {
  278. LPCWSTR pString = pSrc;
  279. LPCWSTR pScan;
  280. JAMO_SORT_INFOEX JamoSortInfo; // The result Jamo infomation.
  281. int Count = cchSrc;
  282. PSORTKEY pWeight;
  283. int JamoClass; // The current Jamo class.
  284. RtlZeroMemory(&JamoSortInfo, sizeof(JamoSortInfo));
  285. JamoClass = GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_LEADING_JAMO, &JamoSortInfo);
  286. if (JamoClass == NLS_CLASS_VOWEL_JAMO)
  287. {
  288. JamoClass = GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_VOWEL_JAMO, &JamoSortInfo);
  289. }
  290. if (JamoClass == NLS_CLASS_TRAILING_JAMO)
  291. {
  292. GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_TRAILING_JAMO, &JamoSortInfo);
  293. }
  294. //
  295. // If we have a valid leading and vowel sequences and this is an old
  296. // Hangul,...
  297. //
  298. if (JamoSortInfo.m_bOld)
  299. {
  300. //
  301. // Compute the modern Hangul syllable prior to this composition.
  302. // Uses formula from Unicode 3.0 Section 3.11 p54
  303. // "Hangul Syllable Composition".
  304. //
  305. WCHAR wchModernHangul =
  306. (JamoSortInfo.m_chLeadingIndex * NLS_JAMO_VOWEL_COUNT + JamoSortInfo.m_chVowelIndex) * NLS_JAMO_TRAILING_COUNT
  307. + JamoSortInfo.m_chTrailingIndex
  308. + NLS_HANGUL_FIRST_SYLLABLE;
  309. if (JamoSortInfo.m_bFiller)
  310. {
  311. // Sort before the modern Hangul, instead of after.
  312. wchModernHangul--;
  313. // If we fall off the modern Hangul syllable block,...
  314. if (wchModernHangul < NLS_HANGUL_FIRST_SYLLABLE)
  315. {
  316. // Sort after the previous character (Circled Hangul Kiyeok A)
  317. wchModernHangul = 0x326e;
  318. }
  319. // Shift the leading weight past any old Hangul that sorts after this modern Hangul
  320. JamoSortInfo.m_LeadingWeight += 0x80;
  321. }
  322. pWeight = &((pHashN->pSortkey)[wchModernHangul]);
  323. *pUW = GET_UNICODE_MOD(pWeight, fModify);
  324. pXW[0] = JamoSortInfo.m_LeadingWeight;
  325. pXW[1] = JamoSortInfo.m_VowelWeight;
  326. pXW[2] = JamoSortInfo.m_TrailingWeight;
  327. return (int)(pString - pSrc);
  328. }
  329. //
  330. // Otherwise it isn't a valid old Hangul composition and we don't do
  331. // anything with it.
  332. //
  333. return (0);
  334. }