|
|
/*++
Copyright (c) 1991-2000, Microsoft Corporation All rights reserved.
Module Name:
jamo.c
Abstract:
This file contains functions that deal with the sorting of old Hangul. Korean characters (Hangul) can be composed by Jamos (U+1100 - U+11ff). However, some valid compositions of Jamo are not found in mordern Hangul (U+AC00 - U+D7AF). These valid compositions are called old Hangul.
MapOldHangulSortKey() is called by CompareString() and MapSortKey() to handle the sorting of old Hangul.
Note:
The Jamo composition means that several Jamo (Korean alpahbetic) composed a valid Hangul character or old Hangul character. Eg. U+1100 U+1103 U+1161 U+11a8 composes a valid old Hangul character.
The following are data members of the global structure pTblPtrs used by old Hangul sorting: * pTblPtrs->pJamoIndex Given a Jamo, this is the index into the pJamoComposition state machine for this Jamo. The value for U+1100 is stored in pJamoIndex[0], U+1101 is in pJamoIndex[1], etc. The value for U+1100 is 1. This means the state machine for U+1100 is stored in pJamoComposition[1]. Note that not every Jamo can start a valid composition. For those Jamos that can not start a valid composition, the table entry for that Jamo is 0. E.g. the index for U+1101 is 0.
* pTblPtrs->NumJamoIndex The number of entries in pJamoIndex. Every index is a WORD.
* pTblPtrs->pJamoComposition This is the Jamo composition state machine. It is used for two purposes: 1. Used to verify a valid Jamo combination that composes an old Hangul character. 2. If a valid old Hangul composition is found, get the SortInfo for the current combination.
* pTblPtrs->NumJamoComposition The number of entires in pJamoComposition
Revision History:
05-30-2000 JohnMcCo Create old Hangul sorting algorithm and sample. 06-23-2000 YSLin Created.
--*/
//
// Include Files.
//
#include "nls.h"
#include "nlssafe.h"
#include "jamo.h"
//-------------------------------------------------------------------------//
// INTERNAL MACROS //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// NOT_END_STRING
//
// Checks to see if the search has reached the end of the string.
// It returns TRUE if the counter is not at zero (counting backwards) and
// the null termination has not been reached (if -2 was passed in the count
// parameter.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define NOT_END_STRING(ct, ptr, cchIn) \
((ct != 0) && (!((*(ptr) == 0) && (cchIn == -2))))
////////////////////////////////////////////////////////////////////////////
//
// GET_JAMO_INDEX
//
// Update the global sort sequence info based on the new state.
//
////////////////////////////////////////////////////////////////////////////
#define GET_JAMO_INDEX(wch) ((wch) - NLS_CHAR_FIRST_JAMO)
//-------------------------------------------------------------------------//
// INTERNAL ROUTINES //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// UpdateJamoState
//
// Update the sort result info based on the new state.
//
// JamoClass The current Jamo class (LeadingJamo/VowelJamo/TrailingJamo)
// pSort The sort information derived from the current state.
// pSortResult The sort information for the final result. Used to
// collect info from pSort.
//
// 06-22-2000 YSLin Created.
////////////////////////////////////////////////////////////////////////////
void UpdateJamoState( int JamoClass, PJAMO_SORT_INFO pSort, PJAMO_SORT_INFOEX pSortResult) // new sort sequence information
{ //
// Record if this is a jamo unique to old Hangul.
//
pSortResult->m_bOld |= pSort->m_bOld;
//
// Update the indices iff the new ones are higher than the current ones.
//
if (pSort->m_chLeadingIndex > pSortResult->m_chLeadingIndex) { pSortResult->m_chLeadingIndex = pSort->m_chLeadingIndex; } if (pSort->m_chVowelIndex > pSortResult->m_chVowelIndex) { pSortResult->m_chVowelIndex = pSort->m_chVowelIndex; } if (pSort->m_chTrailingIndex > pSortResult->m_chTrailingIndex) { pSortResult->m_chTrailingIndex = pSort->m_chTrailingIndex; }
//
// Update the extra weights according to the current Jamo class.
//
switch (JamoClass) { case ( NLS_CLASS_LEADING_JAMO ) : { if (pSort->m_ExtraWeight > pSortResult->m_LeadingWeight) { pSortResult->m_LeadingWeight = pSort->m_ExtraWeight; } break; } case ( NLS_CLASS_VOWEL_JAMO ) : { if (pSort->m_ExtraWeight > pSortResult->m_VowelWeight) { pSortResult->m_VowelWeight = pSort->m_ExtraWeight; } break; } case ( NLS_CLASS_TRAILING_JAMO ) : { if (pSort->m_ExtraWeight > pSortResult->m_TrailingWeight) { pSortResult->m_TrailingWeight = pSort->m_ExtraWeight; } break; } } }
////////////////////////////////////////////////////////////////////////////
//
// GetJamoComposition
//
// ppString pointer to the current Jamo character
// pCount pointer to the current character count (couting backwards)
// cchSrc The total character count (if the value is -2, then the string is null-terminated)
// currentJamoClass the current Jamo class.
// lpJamoTable The entry in jamo table.
// JamoSortInfo the sort information for the final result.
//
// NOTENOTE This function assumes that the character at *ppString is a leading Jamo.
//
// 06-12-2000 YSLin Created.
////////////////////////////////////////////////////////////////////////////
int GetJamoComposition( LPCWSTR* ppString, // The pointer to the current character
int* pCount, // The current character count
int cchSrc, // The total character length
int currentJamoClass, // The current Jamo class.
JAMO_SORT_INFOEX* JamoSortInfo // The result Jamo sorting information.
) { WCHAR wch; int JamoClass; int Index; PJAMO_TABLE pJamo; PJAMO_COMPOSE_STATE lpNext = NULL; PJAMO_COMPOSE_STATE pSearchEnd;
wch = **ppString; //
// Get the Jamo information for the current character.
//
pJamo = pTblPtrs->pJamoIndex + GET_JAMO_INDEX(wch); UpdateJamoState(currentJamoClass, &(pJamo->SortInfo), JamoSortInfo);
//
// Move on to next character.
//
(*ppString)++; while (NOT_END_STRING(*pCount, *ppString, cchSrc)) { wch = **ppString; if (!IsJamo(wch)) { // The current character is not a Jamo. We are done with checking the Jamo composition.
return (-1); } if (wch == 0x1160) { JamoSortInfo->m_bFiller = TRUE; } // Get the Jamo class of it.
if (IsLeadingJamo(wch)) { JamoClass = NLS_CLASS_LEADING_JAMO; } else if (IsTrailingJamo(wch)) { JamoClass = NLS_CLASS_TRAILING_JAMO; } else { JamoClass = NLS_CLASS_VOWEL_JAMO; }
if (JamoClass != currentJamoClass) { return (JamoClass); }
if (lpNext == NULL) { //
// Get the index into the Jamo composition information.
//
Index = pJamo->Index; if (Index == 0) { return (JamoClass); } lpNext = pTblPtrs->pJamoComposition + Index; pSearchEnd = lpNext + pJamo->TransitionCount; }
//
// Push the current Jamo (pointed by pString) into a state machine,
// to check if we have a valid old Hangul composition.
// During the check, we will also update the sortkey result in JamoSortInfo.
//
while (lpNext < pSearchEnd) { // Found a match--update the combination pointer and sort info.
if (lpNext->m_wcCodePoint == wch) { UpdateJamoState(currentJamoClass, &(lpNext->m_SortInfo), JamoSortInfo); lpNext++; goto NextChar; } // No match -- skip all transitions beginning with this code point
lpNext += lpNext->m_bTransitionCount + 1; } //
// We didn't find a valid old Hangul composition for the current character.
// So return the current Jamo class.
//
return (JamoClass);
NextChar: // We are still in a valid old Hangul composition. Go check the next character.
(*ppString)++; (*pCount)--; }
return (-1); }
//-------------------------------------------------------------------------//
// EXTERNAL ROUTINES //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// MapOldHangulSortKey
//
// Check if the given string has a valid old Hangul composition,
// If yes, store the sortkey weights for the given string in the destination
// buffer and return the number of CHARs consumed by the composition.
// If not, return zero.
//
// NOTENOTE: This function assumes that string starting from pSrc is a
// leading Jamo.
//
// 06-12-2000 YSLin Created.
////////////////////////////////////////////////////////////////////////////
int MapOldHangulSortKey( PLOC_HASH pHashN, LPCWSTR pSrc, // source string
int cchSrc, // the length of the string
WORD* pUW, // generated Unicode weight
LPBYTE pXW, // generated extra weight (3 bytes)
BOOL fModify) { LPCWSTR pString = pSrc; LPCWSTR pScan; JAMO_SORT_INFOEX JamoSortInfo; // The result Jamo infomation.
int Count = cchSrc; PSORTKEY pWeight;
int JamoClass; // The current Jamo class.
RtlZeroMemory(&JamoSortInfo, sizeof(JamoSortInfo)); JamoClass = GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_LEADING_JAMO, &JamoSortInfo); if (JamoClass == NLS_CLASS_VOWEL_JAMO) { JamoClass = GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_VOWEL_JAMO, &JamoSortInfo); } if (JamoClass == NLS_CLASS_TRAILING_JAMO) { GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_TRAILING_JAMO, &JamoSortInfo); } //
// If we have a valid leading and vowel sequences and this is an old
// Hangul,...
//
if (JamoSortInfo.m_bOld) { //
// Compute the modern Hangul syllable prior to this composition.
// Uses formula from Unicode 3.0 Section 3.11 p54
// "Hangul Syllable Composition".
//
WCHAR wchModernHangul = (JamoSortInfo.m_chLeadingIndex * NLS_JAMO_VOWEL_COUNT + JamoSortInfo.m_chVowelIndex) * NLS_JAMO_TRAILING_COUNT + JamoSortInfo.m_chTrailingIndex + NLS_HANGUL_FIRST_SYLLABLE;
if (JamoSortInfo.m_bFiller) { // Sort before the modern Hangul, instead of after.
wchModernHangul--; // If we fall off the modern Hangul syllable block,...
if (wchModernHangul < NLS_HANGUL_FIRST_SYLLABLE) { // Sort after the previous character (Circled Hangul Kiyeok A)
wchModernHangul = 0x326e; } // Shift the leading weight past any old Hangul that sorts after this modern Hangul
JamoSortInfo.m_LeadingWeight += 0x80; }
pWeight = &((pHashN->pSortkey)[wchModernHangul]); *pUW = GET_UNICODE_MOD(pWeight, fModify); pXW[0] = JamoSortInfo.m_LeadingWeight; pXW[1] = JamoSortInfo.m_VowelWeight; pXW[2] = JamoSortInfo.m_TrailingWeight;
return (int)(pString - pSrc); }
//
// Otherwise it isn't a valid old Hangul composition and we don't do
// anything with it.
//
return (0); }
|