Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

400 lines
13 KiB

  1. /*++
  2. Copyright (c) 1991-2000, Microsoft Corporation All rights reserved.
  3. Module Name:
  4. jamo.c
  5. Abstract:
  6. This file contains functions that deal with the sorting of old Hangul.
  7. Korean characters (Hangul) can be composed by Jamos (U+1100 - U+11ff).
  8. However, some valid compositions of Jamo are not found in mordern
  9. Hangul (U+AC00 - U+D7AF).
  10. These valid compositions are called old Hangul.
  11. MapOldHangulSortKey() is called by CompareString() and MapSortKey() to
  12. handle the sorting of old Hangul.
  13. Note:
  14. The Jamo composition means that several Jamo (Korean alpahbetic) composed
  15. a valid Hangul character or old Hangul character.
  16. Eg. U+1100 U+1103 U+1161 U+11a8 composes a valid old Hangul character.
  17. The following are data members of the global structure pTblPtrs used by
  18. old Hangul sorting:
  19. * pTblPtrs->pJamoIndex
  20. Given a Jamo, this is the index into the pJamoComposition state
  21. machine for this Jamo.
  22. The value for U+1100 is stored in pJamoIndex[0], U+1101 is in
  23. pJamoIndex[1], etc.
  24. The value for U+1100 is 1. This means the state machine for
  25. U+1100 is stored in pJamoComposition[1].
  26. Note that not every Jamo can start a valid composition. For
  27. those Jamos that can not start a valid composition, the table
  28. entry for that Jamo is 0. E.g. the index for U+1101 is 0.
  29. * pTblPtrs->NumJamoIndex
  30. The number of entries in pJamoIndex. Every index is a WORD.
  31. * pTblPtrs->pJamoComposition
  32. This is the Jamo composition state machine. It is used for two
  33. purposes:
  34. 1. Used to verify a valid Jamo combination that composes an
  35. old Hangul character.
  36. 2. If a valid old Hangul composition is found, get the
  37. SortInfo for the current combination.
  38. * pTblPtrs->NumJamoComposition
  39. The number of entires in pJamoComposition
  40. Revision History:
  41. 05-30-2000 JohnMcCo Create old Hangul sorting algorithm and sample.
  42. 06-23-2000 YSLin Created.
  43. --*/
  44. //
  45. // Include Files.
  46. //
  47. #include "nls.h"
  48. #include "jamo.h"
  49. //-------------------------------------------------------------------------//
  50. // INTERNAL MACROS //
  51. //-------------------------------------------------------------------------//
  52. ////////////////////////////////////////////////////////////////////////////
  53. //
  54. // NOT_END_STRING
  55. //
  56. // Checks to see if the search has reached the end of the string.
  57. // It returns TRUE if the counter is not at zero (counting backwards) and
  58. // the null termination has not been reached (if -2 was passed in the count
  59. // parameter.
  60. //
  61. // 11-04-92 JulieB Created.
  62. ////////////////////////////////////////////////////////////////////////////
  63. #define NOT_END_STRING(ct, ptr, cchIn) \
  64. ((ct != 0) && (!((*(ptr) == 0) && (cchIn == -2))))
  65. ////////////////////////////////////////////////////////////////////////////
  66. //
  67. // GET_JAMO_INDEX
  68. //
  69. // Update the global sort sequence info based on the new state.
  70. //
  71. ////////////////////////////////////////////////////////////////////////////
  72. #define GET_JAMO_INDEX(wch) ((wch) - NLS_CHAR_FIRST_JAMO)
  73. //-------------------------------------------------------------------------//
  74. // INTERNAL ROUTINES //
  75. //-------------------------------------------------------------------------//
  76. ////////////////////////////////////////////////////////////////////////////
  77. //
  78. // UpdateJamoState
  79. //
  80. // Update the sort result info based on the new state.
  81. //
  82. // JamoClass The current Jamo class (LeadingJamo/VowelJamo/TrailingJamo)
  83. // pSort The sort information derived from the current state.
  84. // pSortResult The sort information for the final result. Used to
  85. // collect info from pSort.
  86. //
  87. // 06-22-2000 YSLin Created.
  88. ////////////////////////////////////////////////////////////////////////////
  89. void UpdateJamoState(
  90. int JamoClass,
  91. PJAMO_SORT_INFO pSort,
  92. PJAMO_SORT_INFOEX pSortResult) // new sort sequence information
  93. {
  94. //
  95. // Record if this is a jamo unique to old Hangul.
  96. //
  97. pSortResult->m_bOld |= pSort->m_bOld;
  98. //
  99. // Update the indices iff the new ones are higher than the current ones.
  100. //
  101. if (pSort->m_chLeadingIndex > pSortResult->m_chLeadingIndex)
  102. {
  103. pSortResult->m_chLeadingIndex = pSort->m_chLeadingIndex;
  104. }
  105. if (pSort->m_chVowelIndex > pSortResult->m_chVowelIndex)
  106. {
  107. pSortResult->m_chVowelIndex = pSort->m_chVowelIndex;
  108. }
  109. if (pSort->m_chTrailingIndex > pSortResult->m_chTrailingIndex)
  110. {
  111. pSortResult->m_chTrailingIndex = pSort->m_chTrailingIndex;
  112. }
  113. //
  114. // Update the extra weights according to the current Jamo class.
  115. //
  116. switch (JamoClass)
  117. {
  118. case ( NLS_CLASS_LEADING_JAMO ) :
  119. {
  120. if (pSort->m_ExtraWeight > pSortResult->m_LeadingWeight)
  121. {
  122. pSortResult->m_LeadingWeight = pSort->m_ExtraWeight;
  123. }
  124. break;
  125. }
  126. case ( NLS_CLASS_VOWEL_JAMO ) :
  127. {
  128. if (pSort->m_ExtraWeight > pSortResult->m_VowelWeight)
  129. {
  130. pSortResult->m_VowelWeight = pSort->m_ExtraWeight;
  131. }
  132. break;
  133. }
  134. case ( NLS_CLASS_TRAILING_JAMO ) :
  135. {
  136. if (pSort->m_ExtraWeight > pSortResult->m_TrailingWeight)
  137. {
  138. pSortResult->m_TrailingWeight = pSort->m_ExtraWeight;
  139. }
  140. break;
  141. }
  142. }
  143. }
  144. ////////////////////////////////////////////////////////////////////////////
  145. //
  146. // GetJamoComposition
  147. //
  148. // ppString pointer to the current Jamo character
  149. // pCount pointer to the current character count (couting backwards)
  150. // cchSrc The total character count (if the value is -2, then the string is null-terminated)
  151. // currentJamoClass the current Jamo class.
  152. // lpJamoTable The entry in jamo table.
  153. // JamoSortInfo the sort information for the final result.
  154. //
  155. // NOTENOTE This function assumes that the character at *ppString is a leading Jamo.
  156. //
  157. // 06-12-2000 YSLin Created.
  158. ////////////////////////////////////////////////////////////////////////////
  159. int GetJamoComposition(
  160. LPCWSTR* ppString, // The pointer to the current character
  161. int* pCount, // The current character count
  162. int cchSrc, // The total character length
  163. int currentJamoClass, // The current Jamo class.
  164. JAMO_SORT_INFOEX* JamoSortInfo // The result Jamo sorting information.
  165. )
  166. {
  167. WCHAR wch;
  168. int JamoClass;
  169. int Index;
  170. PJAMO_TABLE pJamo;
  171. PJAMO_COMPOSE_STATE lpNext = NULL;
  172. PJAMO_COMPOSE_STATE pSearchEnd;
  173. wch = **ppString;
  174. //
  175. // Get the Jamo information for the current character.
  176. //
  177. pJamo = pTblPtrs->pJamoIndex + GET_JAMO_INDEX(wch);
  178. UpdateJamoState(currentJamoClass, &(pJamo->SortInfo), JamoSortInfo);
  179. //
  180. // Move on to next character.
  181. //
  182. (*ppString)++;
  183. while (NOT_END_STRING(*pCount, *ppString, cchSrc))
  184. {
  185. wch = **ppString;
  186. if (!IsJamo(wch))
  187. {
  188. // The current character is not a Jamo. We are done with checking the Jamo composition.
  189. return (-1);
  190. }
  191. if (wch == 0x1160) {
  192. JamoSortInfo->m_bFiller = TRUE;
  193. }
  194. // Get the Jamo class of it.
  195. if (IsLeadingJamo(wch))
  196. {
  197. JamoClass = NLS_CLASS_LEADING_JAMO;
  198. }
  199. else if (IsTrailingJamo(wch))
  200. {
  201. JamoClass = NLS_CLASS_TRAILING_JAMO;
  202. }
  203. else
  204. {
  205. JamoClass = NLS_CLASS_VOWEL_JAMO;
  206. }
  207. if (JamoClass != currentJamoClass)
  208. {
  209. return (JamoClass);
  210. }
  211. if (lpNext == NULL)
  212. {
  213. //
  214. // Get the index into the Jamo composition information.
  215. //
  216. Index = pJamo->Index;
  217. if (Index == 0)
  218. {
  219. return (JamoClass);
  220. }
  221. lpNext = pTblPtrs->pJamoComposition + Index;
  222. pSearchEnd = lpNext + pJamo->TransitionCount;
  223. }
  224. //
  225. // Push the current Jamo (pointed by pString) into a state machine,
  226. // to check if we have a valid old Hangul composition.
  227. // During the check, we will also update the sortkey result in JamoSortInfo.
  228. //
  229. while (lpNext < pSearchEnd)
  230. {
  231. // Found a match--update the combination pointer and sort info.
  232. if (lpNext->m_wcCodePoint == wch)
  233. {
  234. UpdateJamoState(currentJamoClass, &(lpNext->m_SortInfo), JamoSortInfo);
  235. lpNext++;
  236. goto NextChar;
  237. }
  238. // No match -- skip all transitions beginning with this code point
  239. lpNext += lpNext->m_bTransitionCount + 1;
  240. }
  241. //
  242. // We didn't find a valid old Hangul composition for the current character.
  243. // So return the current Jamo class.
  244. //
  245. return (JamoClass);
  246. NextChar:
  247. // We are still in a valid old Hangul composition. Go check the next character.
  248. (*ppString)++; (*pCount)--;
  249. }
  250. return (-1);
  251. }
  252. //-------------------------------------------------------------------------//
  253. // EXTERNAL ROUTINES //
  254. //-------------------------------------------------------------------------//
  255. ////////////////////////////////////////////////////////////////////////////
  256. //
  257. // MapOldHangulSortKey
  258. //
  259. // Check if the given string has a valid old Hangul composition,
  260. // If yes, store the sortkey weights for the given string in the destination
  261. // buffer and return the number of CHARs consumed by the composition.
  262. // If not, return zero.
  263. //
  264. // NOTENOTE: This function assumes that string starting from pSrc is a
  265. // leading Jamo.
  266. //
  267. // 06-12-2000 YSLin Created.
  268. ////////////////////////////////////////////////////////////////////////////
  269. int MapOldHangulSortKey(
  270. PLOC_HASH pHashN,
  271. LPCWSTR pSrc, // source string
  272. int cchSrc, // the length of the string
  273. WORD* pUW, // generated Unicode weight
  274. LPBYTE pXW, // generated extra weight (3 bytes)
  275. BOOL fModify)
  276. {
  277. LPCWSTR pString = pSrc;
  278. LPCWSTR pScan;
  279. JAMO_SORT_INFOEX JamoSortInfo; // The result Jamo infomation.
  280. int Count = cchSrc;
  281. PSORTKEY pWeight;
  282. int JamoClass; // The current Jamo class.
  283. RtlZeroMemory(&JamoSortInfo, sizeof(JamoSortInfo));
  284. JamoClass = GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_LEADING_JAMO, &JamoSortInfo);
  285. if (JamoClass == NLS_CLASS_VOWEL_JAMO)
  286. {
  287. JamoClass = GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_VOWEL_JAMO, &JamoSortInfo);
  288. }
  289. if (JamoClass == NLS_CLASS_TRAILING_JAMO)
  290. {
  291. GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_TRAILING_JAMO, &JamoSortInfo);
  292. }
  293. //
  294. // If we have a valid leading and vowel sequences and this is an old
  295. // Hangul,...
  296. //
  297. if (JamoSortInfo.m_bOld)
  298. {
  299. //
  300. // Compute the modern Hangul syllable prior to this composition.
  301. // Uses formula from Unicode 3.0 Section 3.11 p54
  302. // "Hangul Syllable Composition".
  303. //
  304. WCHAR wchModernHangul =
  305. (JamoSortInfo.m_chLeadingIndex * NLS_JAMO_VOWEL_COUNT + JamoSortInfo.m_chVowelIndex) * NLS_JAMO_TRAILING_COUNT
  306. + JamoSortInfo.m_chTrailingIndex
  307. + NLS_HANGUL_FIRST_SYLLABLE;
  308. if (JamoSortInfo.m_bFiller)
  309. {
  310. // Sort before the modern Hangul, instead of after.
  311. wchModernHangul--;
  312. // If we fall off the modern Hangul syllable block,...
  313. if (wchModernHangul < NLS_HANGUL_FIRST_SYLLABLE)
  314. {
  315. // Sort after the previous character (Circled Hangul Kiyeok A)
  316. wchModernHangul = 0x326e;
  317. }
  318. // Shift the leading weight past any old Hangul that sorts after this modern Hangul
  319. JamoSortInfo.m_LeadingWeight += 0x80;
  320. }
  321. pWeight = &((pHashN->pSortkey)[wchModernHangul]);
  322. *pUW = GET_UNICODE_MOD(pWeight, fModify);
  323. pXW[0] = JamoSortInfo.m_LeadingWeight;
  324. pXW[1] = JamoSortInfo.m_VowelWeight;
  325. pXW[2] = JamoSortInfo.m_TrailingWeight;
  326. return (int)(pString - pSrc);
  327. }
  328. //
  329. // Otherwise it isn't a valid old Hangul composition and we don't do
  330. // anything with it.
  331. //
  332. return (0);
  333. }