|
|
/*++
Copyright (c) 1991-2000, Microsoft Corporation All rights reserved.
Module Name:
string.c
Abstract:
This file contains functions that deal with characters and strings.
APIs found in this file: CompareStringW GetStringTypeExW GetStringTypeW
Revision History:
05-31-91 JulieB Created.
--*/
//
// Include Files.
//
#include "nls.h"
#include "jamo.h"
//
// Constant Declarations.
//
//
// State Table.
//
#define STATE_DW 1 // normal diacritic weight state
#define STATE_REVERSE_DW 2 // reverse diacritic weight state
#define STATE_CW 4 // case weight state
#define STATE_JAMO_WEIGHT 8 // jamo weight state
//
// Invalid weight value.
//
#define CMP_INVALID_WEIGHT 0xffffffff
#define CMP_INVALID_FAREAST 0xffff0000
#define CMP_INVALID_UW 0xffff
//
// Forward Declarations.
//
int LongCompareStringW( PLOC_HASH pHashN, DWORD dwCmpFlags, LPCWSTR lpString1, int cchCount1, LPCWSTR lpString2, int cchCount2, BOOL fModify);
int FindJamoDifference( PLOC_HASH pHashN, LPCWSTR* ppString1, int* ctr1, int cchCount1, DWORD* pWeight1, LPCWSTR* ppString2, int* ctr2, int cchCount2, DWORD* pWeight2, LPCWSTR* pLastJamo, WORD* uw1, WORD* uw2, int* pState, int* WhichJamo, BOOL fModify);
//-------------------------------------------------------------------------//
// INTERNAL MACROS //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// NOT_END_STRING
//
// Checks to see if the search has reached the end of the string.
// It returns TRUE if the counter is not at zero (counting backwards) and
// the null termination has not been reached (if -1 was passed in the count
// parameter.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define NOT_END_STRING(ct, ptr, cchIn) \
((ct != 0) && (!((*(ptr) == 0) && (cchIn == -2))))
////////////////////////////////////////////////////////////////////////////
//
// AT_STRING_END
//
// Checks to see if the pointer is at the end of the string.
// It returns TRUE if the counter is zero or if the null termination
// has been reached (if -2 was passed in the count parameter).
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define AT_STRING_END(ct, ptr, cchIn) \
((ct == 0) || ((*(ptr) == 0) && (cchIn == -2)))
////////////////////////////////////////////////////////////////////////////
//
// REMOVE_STATE
//
// Removes the current state from the state table. This should only be
// called when the current state should not be entered for the remainder
// of the comparison. It decrements the counter going through the state
// table and decrements the number of states in the table.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define REMOVE_STATE(value) (State &= ~value)
////////////////////////////////////////////////////////////////////////////
//
// POINTER_FIXUP
//
// Fixup the string pointers if expansion characters were found.
// Then, advance the string pointers and decrement the string counters.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define POINTER_FIXUP() \
{ \ /* \
* Fixup the pointers (if necessary). \ */ \ if (pSave1 && (--cExpChar1 == 0)) \ { \ /* \
* Done using expansion temporary buffer. \ */ \ pString1 = pSave1; \ pSave1 = NULL; \ } \ \ if (pSave2 && (--cExpChar2 == 0)) \ { \ /* \
* Done using expansion temporary buffer. \ */ \ pString2 = pSave2; \ pSave2 = NULL; \ } \ \ /* \
* Advance the string pointers. \ */ \ pString1++; \ pString2++; \ }
////////////////////////////////////////////////////////////////////////////
//
// SCAN_LONGER_STRING
//
// Scans the longer string for diacritic, case, and special weights.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define SCAN_LONGER_STRING( ct, \
ptr, \ cchIn, \ ret ) \ { \ /* \
* Search through the rest of the longer string to make sure \ * all characters are not to be ignored. If find a character that \ * should not be ignored, return the given return value immediately. \ * \ * The only exception to this is when a nonspace mark is found. If \ * another DW difference has been found earlier, then use that. \ */ \ while (NOT_END_STRING(ct, ptr, cchIn)) \ { \ Weight1 = GET_DWORD_WEIGHT(pHashN, *ptr); \ switch (GET_SCRIPT_MEMBER(&Weight1)) \ { \ case ( UNSORTABLE ): \ { \ break; \ } \ case ( NONSPACE_MARK ): \ { \ if ((!fIgnoreDiacritic) && (!WhichDiacritic)) \ { \ return (ret); \ } \ break; \ } \ case ( PUNCTUATION ) : \ case ( SYMBOL_1 ) : \ case ( SYMBOL_2 ) : \ case ( SYMBOL_3 ) : \ case ( SYMBOL_4 ) : \ case ( SYMBOL_5 ) : \ { \ if (!fIgnoreSymbol) \ { \ return (ret); \ } \ break; \ } \ case ( EXPANSION ) : \ case ( FAREAST_SPECIAL ) : \ case ( JAMO_SPECIAL ) : \ case ( EXTENSION_A ) : \ default : \ { \ return (ret); \ } \ } \ \ /* \
* Advance pointer and decrement counter. \ */ \ ptr++; \ ct--; \ } \ \ /* \
* Need to check diacritic, case, extra, and special weights for \ * final return value. Still could be equal if the longer part of \ * the string contained only characters to be ignored. \ * \ * NOTE: The following checks MUST REMAIN IN THIS ORDER: \ * Diacritic, Case, Extra, Punctuation. \ */ \ if (WhichDiacritic) \ { \ return (WhichDiacritic); \ } \ if (WhichCase) \ { \ return (WhichCase); \ } \ if (WhichExtra) \ { \ if (!fIgnoreDiacritic) \ { \ if (GET_WT_FOUR(&WhichExtra)) \ { \ return (GET_WT_FOUR(&WhichExtra)); \ } \ if (GET_WT_FIVE(&WhichExtra)) \ { \ return (GET_WT_FIVE(&WhichExtra)); \ } \ } \ if (GET_WT_SIX(&WhichExtra)) \ { \ return (GET_WT_SIX(&WhichExtra)); \ } \ if (GET_WT_SEVEN(&WhichExtra)) \ { \ return (GET_WT_SEVEN(&WhichExtra)); \ } \ } \ if (WhichJamo) \ { \ return (WhichJamo); \ } \ if (WhichPunct1) \ { \ return (WhichPunct1); \ } \ if (WhichPunct2) \ { \ return (WhichPunct2); \ } \ \ return (CSTR_EQUAL); \ }
////////////////////////////////////////////////////////////////////////////
//
// QUICK_SCAN_LONGER_STRING
//
// Scans the longer string for diacritic, case, and special weights.
// Assumes that both strings are null-terminated.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define QUICK_SCAN_LONGER_STRING( ptr, \
ret ) \ { \ /* \
* Search through the rest of the longer string to make sure \ * all characters are not to be ignored. If find a character that \ * should not be ignored, return the given return value immediately. \ * \ * The only exception to this is when a nonspace mark is found. If \ * another DW difference has been found earlier, then use that. \ */ \ while (*ptr != 0) \ { \ switch (GET_SCRIPT_MEMBER(&(pHashN->pSortkey[*ptr]))) \ { \ case ( UNSORTABLE ): \ { \ break; \ } \ case ( NONSPACE_MARK ): \ { \ if (!WhichDiacritic) \ { \ return (ret); \ } \ break; \ } \ default : \ { \ return (ret); \ } \ } \ \ /* \
* Advance pointer. \ */ \ ptr++; \ } \ \ /* \
* Need to check diacritic, case, extra, and special weights for \ * final return value. Still could be equal if the longer part of \ * the string contained only unsortable characters. \ * \ * NOTE: The following checks MUST REMAIN IN THIS ORDER: \ * Diacritic, Case, Extra, Punctuation. \ */ \ if (WhichDiacritic) \ { \ return (WhichDiacritic); \ } \ if (WhichCase) \ { \ return (WhichCase); \ } \ if (WhichExtra) \ { \ if (GET_WT_FOUR(&WhichExtra)) \ { \ return (GET_WT_FOUR(&WhichExtra)); \ } \ if (GET_WT_FIVE(&WhichExtra)) \ { \ return (GET_WT_FIVE(&WhichExtra)); \ } \ if (GET_WT_SIX(&WhichExtra)) \ { \ return (GET_WT_SIX(&WhichExtra)); \ } \ if (GET_WT_SEVEN(&WhichExtra)) \ { \ return (GET_WT_SEVEN(&WhichExtra)); \ } \ } \ if (WhichJamo) \ { \ return (WhichJamo); \ } \ if (WhichPunct1) \ { \ return (WhichPunct1); \ } \ if (WhichPunct2) \ { \ return (WhichPunct2); \ } \ \ return (CSTR_EQUAL); \ }
////////////////////////////////////////////////////////////////////////////
//
// GET_FAREAST_WEIGHT
//
// Returns the weight for the far east special case in "wt". This currently
// includes the Cho-on, the Repeat, and the Kana characters.
//
// 08-19-93 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define GET_FAREAST_WEIGHT( wt, \
uw, \ mask, \ pBegin, \ pCur, \ ExtraWt, \ fModify ) \ { \ int ct; /* loop counter */ \ BYTE PrevSM; /* previous script member value */ \ BYTE PrevAW; /* previous alphanumeric value */ \ BYTE PrevCW; /* previous case value */ \ BYTE AW; /* alphanumeric value */ \ BYTE CW; /* case value */ \ DWORD PrevWt; /* previous weight */ \ \ \ /* \
* Get the alphanumeric weight and the case weight of the \ * current code point. \ */ \ AW = GET_ALPHA_NUMERIC(&wt); \ CW = GET_CASE(&wt); \ ExtraWt = (DWORD)0; \ \ /* \
* Special case Repeat and Cho-On. \ * AW = 0 => Repeat \ * AW = 1 => Cho-On \ * AW = 2+ => Kana \ */ \ if (AW <= MAX_SPECIAL_AW) \ { \ /* \
* If the script member of the previous character is \ * invalid, then give the special character an \ * invalid weight (highest possible weight) so that it \ * will sort AFTER everything else. \ */ \ ct = 1; \ PrevWt = CMP_INVALID_FAREAST; \ while ((pCur - ct) >= pBegin) \ { \ PrevWt = GET_DWORD_WEIGHT(pHashN, *(pCur - ct)); \ PrevWt &= mask; \ PrevSM = GET_SCRIPT_MEMBER(&PrevWt); \ if (PrevSM < FAREAST_SPECIAL) \ { \ if (PrevSM == EXPANSION) \ { \ PrevWt = CMP_INVALID_FAREAST; \ } \ else \ { \ /* \
* UNSORTABLE or NONSPACE_MARK. \ * \ * Just ignore these, since we only care about the \ * previous UW value. \ */ \ PrevWt = CMP_INVALID_FAREAST; \ ct++; \ continue; \ } \ } \ else if (PrevSM == FAREAST_SPECIAL) \ { \ PrevAW = GET_ALPHA_NUMERIC(&PrevWt); \ if (PrevAW <= MAX_SPECIAL_AW) \ { \ /* \
* Handle case where two special chars follow \ * each other. Keep going back in the string. \ */ \ PrevWt = CMP_INVALID_FAREAST; \ ct++; \ continue; \ } \ \ UNICODE_WT(&PrevWt) = \ MAKE_UNICODE_WT(KANA, PrevAW, fModify); \ \ /* \
* Only build weights 4, 5, 6, and 7 if the \ * previous character is KANA. \ * \ * Always: \ * 4W = previous CW & ISOLATE_SMALL \ * 6W = previous CW & ISOLATE_KANA \ * \ */ \ PrevCW = GET_CASE(&PrevWt); \ GET_WT_FOUR(&ExtraWt) = PrevCW & ISOLATE_SMALL; \ GET_WT_SIX(&ExtraWt) = PrevCW & ISOLATE_KANA; \ \ if (AW == AW_REPEAT) \ { \ /* \
* Repeat: \ * UW = previous UW \ * 5W = WT_FIVE_REPEAT \ * 7W = previous CW & ISOLATE_WIDTH \ */ \ uw = UNICODE_WT(&PrevWt); \ GET_WT_FIVE(&ExtraWt) = WT_FIVE_REPEAT; \ GET_WT_SEVEN(&ExtraWt) = PrevCW & ISOLATE_WIDTH; \ } \ else \ { \ /* \
* Cho-On: \ * UW = previous UW & CHO_ON_UW_MASK \ * 5W = WT_FIVE_CHO_ON \ * 7W = current CW & ISOLATE_WIDTH \ */ \ uw = UNICODE_WT(&PrevWt) & CHO_ON_UW_MASK; \ GET_WT_FIVE(&ExtraWt) = WT_FIVE_CHO_ON; \ GET_WT_SEVEN(&ExtraWt) = CW & ISOLATE_WIDTH; \ } \ } \ else \ { \ uw = GET_UNICODE_MOD(&PrevWt, fModify); \ } \ \ break; \ } \ } \ else \ { \ /* \
* Kana: \ * SM = KANA \ * AW = current AW \ * 4W = current CW & ISOLATE_SMALL \ * 5W = WT_FIVE_KANA \ * 6W = current CW & ISOLATE_KANA \ * 7W = current CW & ISOLATE_WIDTH \ */ \ uw = MAKE_UNICODE_WT(KANA, AW, fModify); \ GET_WT_FOUR(&ExtraWt) = CW & ISOLATE_SMALL; \ GET_WT_FIVE(&ExtraWt) = WT_FIVE_KANA; \ GET_WT_SIX(&ExtraWt) = CW & ISOLATE_KANA; \ GET_WT_SEVEN(&ExtraWt) = CW & ISOLATE_WIDTH; \ } \ \ /* \
* Get the weight for the far east special case and store it in wt. \ */ \ if ((AW > MAX_SPECIAL_AW) || (PrevWt != CMP_INVALID_FAREAST)) \ { \ /* \
* Always: \ * DW = current DW \ * CW = minimum CW \ */ \ UNICODE_WT(&wt) = uw; \ CASE_WT(&wt) = MIN_CW; \ } \ else \ { \ uw = CMP_INVALID_UW; \ wt = CMP_INVALID_FAREAST; \ ExtraWt = 0; \ } \ }
//-------------------------------------------------------------------------//
// API ROUTINES //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// CompareStringW
//
// Compares two wide character strings of the same locale according to the
// supplied locale handle.
//
// 05-31-91 JulieB Created.
////////////////////////////////////////////////////////////////////////////
int WINAPI CompareStringW( LCID Locale, DWORD dwCmpFlags, LPCWSTR lpString1, int cchCount1, LPCWSTR lpString2, int cchCount2) { register LPWSTR pString1; // ptr to go thru string 1
register LPWSTR pString2; // ptr to go thru string 2
PLOC_HASH pHashN; // ptr to LOC hash node
BOOL fIgnorePunct; // flag to ignore punctuation (not symbol)
BOOL fModify; // flag to use modified script member weights
DWORD State; // state table
DWORD Mask; // mask for weights
DWORD Weight1; // full weight of char - string 1
DWORD Weight2; // full weight of char - string 2
int JamoFlag = FALSE; LPCWSTR pLastJamo = lpString1;
int WhichDiacritic; // DW => 1 = str1 smaller, 3 = str2 smaller
int WhichCase; // CW => 1 = str1 smaller, 3 = str2 smaller
int WhichJamo; // XW for Jamo
int WhichPunct1; // SW => 1 = str1 smaller, 3 = str2 smaller
int WhichPunct2; // SW => 1 = str1 smaller, 3 = str2 smaller
LPWSTR pSave1; // ptr to saved pString1
LPWSTR pSave2; // ptr to saved pString2
int cExpChar1, cExpChar2; // ct of expansions in tmp
DWORD ExtraWt1, ExtraWt2; // extra weight values (for far east)
DWORD WhichExtra; // XW => wts 4, 5, 6, 7 (for far east)
//
// Invalid Parameter Check:
// - validate LCID
// - either string is null
//
VALIDATE_LANGUAGE(Locale, pHashN, 0, TRUE); if ((pHashN == NULL) || (lpString1 == NULL) || (lpString2 == NULL)) { SetLastError(ERROR_INVALID_PARAMETER); return (0); }
//
// Make sure the appropriate sorting tables are available. If not,
// return an error.
//
if ((pHashN->pSortkey == NULL) || (pHashN->IfIdeographFailure == TRUE)) { KdPrint(("NLSAPI: Appropriate Sorting Tables Not Loaded.\n")); SetLastError(ERROR_FILE_NOT_FOUND); return (0); }
//
// Call longer compare string if any of the following is true:
// - compression locale
// - either count is not -1
// - dwCmpFlags is not 0 or ignore case (see NOTE below)
// - locale is Korean - script member weight adjustment needed
//
// NOTE: If the value of NORM_IGNORECASE ever changes, this
// code should check for:
// ( (dwCmpFlags != 0) && (dwCmpFlags != NORM_IGNORECASE) )
// Since NORM_IGNORECASE is equal to 1, we can optimize this
// by checking for > 1.
//
dwCmpFlags &= (~LOCALE_USE_CP_ACP); fModify = IS_KOREAN(Locale); if ( (pHashN->IfCompression) || (cchCount1 > -1) || (cchCount2 > -1) || (dwCmpFlags > NORM_IGNORECASE) || (fModify == TRUE) ) { return (LongCompareStringW( pHashN, dwCmpFlags, lpString1, ((cchCount1 <= -1) ? -2 : cchCount1), lpString2, ((cchCount2 <= -1) ? -2 : cchCount2), fModify )); }
//
// Initialize string pointers.
//
pString1 = (LPWSTR)lpString1; pString2 = (LPWSTR)lpString2;
//
// Do a wchar by wchar compare.
//
while (TRUE) { //
// See if characters are equal.
// If characters are equal, increment pointers and continue
// string compare.
//
// NOTE: Loop is unrolled 8 times for performance.
//
if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++; }
//
// If strings are both at null terminators, return equal.
//
if (*pString1 == *pString2) { return (CSTR_EQUAL); }
//
// Initialize flags, pointers, and counters.
//
fIgnorePunct = FALSE; WhichDiacritic = 0; WhichCase = 0; WhichJamo = 0; WhichPunct1 = 0; WhichPunct2 = 0; pSave1 = NULL; pSave2 = NULL; ExtraWt1 = (DWORD)0; WhichExtra = (DWORD)0;
//
// Switch on the different flag options. This will speed up
// the comparisons of two strings that are different.
//
// The only two possibilities in this optimized section are
// no flags and the ignore case flag.
//
if (dwCmpFlags == 0) { Mask = CMP_MASKOFF_NONE; } else { Mask = CMP_MASKOFF_CW; } State = (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW; State |= (STATE_CW | STATE_JAMO_WEIGHT);
//
// Compare each character's sortkey weight in the two strings.
//
while ((*pString1 != 0) && (*pString2 != 0)) { Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1); Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2); Weight1 &= Mask; Weight2 &= Mask;
if (Weight1 != Weight2) { BYTE sm1 = GET_SCRIPT_MEMBER(&Weight1); // script member 1
BYTE sm2 = GET_SCRIPT_MEMBER(&Weight2); // script member 2
WORD uw1 = GET_UNICODE_SM(&Weight1, sm1); // unicode weight 1
WORD uw2 = GET_UNICODE_SM(&Weight2, sm2); // unicode weight 2
BYTE dw1; // diacritic weight 1
BYTE dw2; // diacritic weight 2
BOOL fContinue; // flag to continue loop
DWORD Wt; // temp weight holder
WCHAR pTmpBuf1[MAX_TBL_EXPANSION]; // temp buffer for exp 1
WCHAR pTmpBuf2[MAX_TBL_EXPANSION]; // temp buffer for exp 2
//
// If Unicode Weights are different and no special cases,
// then we're done. Otherwise, we need to do extra checking.
//
// Must check ENTIRE string for any possibility of Unicode Weight
// differences. As soon as a Unicode Weight difference is found,
// then we're done. If no UW difference is found, then the
// first Diacritic Weight difference is used. If no DW difference
// is found, then use the first Case Difference. If no CW
// difference is found, then use the first Extra Weight
// difference. If no XW difference is found, then use the first
// Special Weight difference.
//
if ((uw1 != uw2) || (sm1 == FAREAST_SPECIAL) || (sm1 == EXTENSION_A)) { //
// Initialize the continue flag.
//
fContinue = FALSE;
//
// Check for Unsortable characters and skip them.
// This needs to be outside the switch statement. If EITHER
// character is unsortable, must skip it and start over.
//
if (sm1 == UNSORTABLE) { pString1++; fContinue = TRUE; } if (sm2 == UNSORTABLE) { pString2++; fContinue = TRUE; } if (fContinue) { continue; }
//
// Switch on the script member of string 1 and take care
// of any special cases.
//
switch (sm1) { case ( NONSPACE_MARK ) : { //
// Nonspace only - look at diacritic weight only.
//
if ((WhichDiacritic == 0) || (State & STATE_REVERSE_DW)) { WhichDiacritic = CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW); }
//
// Adjust pointer and set flags.
//
pString1++; fContinue = TRUE;
break; } case ( PUNCTUATION ) : { //
// If the ignore punctuation flag is set, then skip
// over the punctuation.
//
if (fIgnorePunct) { pString1++; fContinue = TRUE; } else if (sm2 != PUNCTUATION) { //
// The character in the second string is
// NOT punctuation.
//
if (WhichPunct2) { //
// Set WP 2 to show that string 2 is smaller,
// since a punctuation char had already been
// found at an earlier position in string 2.
//
// Set the Ignore Punctuation flag so we just
// skip over any other punctuation chars in
// the string.
//
WhichPunct2 = CSTR_GREATER_THAN; fIgnorePunct = TRUE; } else { //
// Set WP 1 to show that string 2 is smaller,
// and that string 1 has had a punctuation
// char - since no punctuation chars have
// been found in string 2.
//
WhichPunct1 = CSTR_GREATER_THAN; }
//
// Advance pointer 1, and set flag to true.
//
pString1++; fContinue = TRUE; }
//
// Do NOT want to advance the pointer in string 1 if
// string 2 is also a punctuation char. This will
// be done later.
//
break; } case ( EXPANSION ) : { //
// Save pointer in pString1 so that it can be
// restored.
//
if (pSave1 == NULL) { pSave1 = pString1; } pString1 = pTmpBuf1;
//
// Expand character into temporary buffer.
//
pTmpBuf1[0] = GET_EXPANSION_1(&Weight1); pTmpBuf1[1] = GET_EXPANSION_2(&Weight1);
//
// Set cExpChar1 to the number of expansion characters
// stored.
//
cExpChar1 = MAX_TBL_EXPANSION;
fContinue = TRUE;
break; } case ( FAREAST_SPECIAL ) : { if (sm2 != EXPANSION) { //
// Get the weight for the far east special case
// and store it in Weight1.
//
GET_FAREAST_WEIGHT( Weight1, uw1, Mask, lpString1, pString1, ExtraWt1, FALSE );
if (sm2 != FAREAST_SPECIAL) { //
// The character in the second string is
// NOT a fareast special char.
//
// Set each of weights 4, 5, 6, and 7 to show
// that string 2 is smaller (if not already set).
//
if ((GET_WT_FOUR(&WhichExtra) == 0) && (GET_WT_FOUR(&ExtraWt1) != 0)) { GET_WT_FOUR(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_FIVE(&WhichExtra) == 0) && (GET_WT_FIVE(&ExtraWt1) != 0)) { GET_WT_FIVE(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_SIX(&WhichExtra) == 0) && (GET_WT_SIX(&ExtraWt1) != 0)) { GET_WT_SIX(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_SEVEN(&WhichExtra) == 0) && (GET_WT_SEVEN(&ExtraWt1) != 0)) { GET_WT_SEVEN(&WhichExtra) = CSTR_GREATER_THAN; } } } break; } case ( JAMO_SPECIAL ) : { int ctr1; // dummy variables for FindJamoDifference
LPWSTR pStr1 = pString1; LPWSTR pStr2 = pString2;
//
// Set the JamoFlag so we don't handle it again.
//
JamoFlag = TRUE; fContinue = FindJamoDifference( pHashN, &pStr1, &ctr1, -2, &Weight1, &pStr2, &ctr1, -2, &Weight2, &pLastJamo, &uw1, &uw2, &State, &WhichJamo, fModify ); if (WhichJamo) { return (WhichJamo); }
pString1 = pStr1; pString2 = pStr2;
break; } case ( EXTENSION_A ) : { //
// Compare the weights.
//
if (Weight1 == Weight2) { //
// Adjust pointers and set flag.
//
pString1++; pString2++; fContinue = TRUE; } else { //
// Get the actual UW to compare.
//
if (sm2 == EXTENSION_A) { //
// Set the UW values to be the AW and DW since
// both strings contain an extension A char.
//
uw1 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight1), GET_DIACRITIC(&Weight1), FALSE ); uw2 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight2), GET_DIACRITIC(&Weight2), FALSE ); } else { //
// Only string1 contains an extension A char,
// so set the UW value to be the first UW
// value for extension A (default values):
// SM_EXT_A, AW_EXT_A
//
uw1 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify); } }
break; } case ( UNSORTABLE ) : { //
// Fill out the case statement so the compiler
// will use a jump table.
//
break; } }
//
// Switch on the script member of string 2 and take care
// of any special cases.
//
switch (sm2) { case ( NONSPACE_MARK ) : { //
// Nonspace only - look at diacritic weight only.
//
if ((WhichDiacritic == 0) || (State & STATE_REVERSE_DW)) { WhichDiacritic = CSTR_LESS_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW); }
//
// Adjust pointer and set flags.
//
pString2++; fContinue = TRUE;
break; } case ( PUNCTUATION ) : { //
// If the ignore punctuation flag is set, then skip
// over the punctuation.
//
if (fIgnorePunct) { //
// Pointer 2 will be advanced after if-else
// statement.
//
; } else if (sm1 != PUNCTUATION) { //
// The character in the first string is
// NOT punctuation.
//
if (WhichPunct1) { //
// Set WP 1 to show that string 1 is smaller,
// since a punctuation char had already
// been found at an earlier position in
// string 1.
//
// Set the Ignore Punctuation flag so we just
// skip over any other punctuation in the
// string.
//
WhichPunct1 = CSTR_LESS_THAN; fIgnorePunct = TRUE; } else { //
// Set WP 2 to show that string 1 is smaller,
// and that string 2 has had a punctuation
// char - since no punctuation chars have
// been found in string 1.
//
WhichPunct2 = CSTR_LESS_THAN; }
//
// Pointer 2 will be advanced after if-else
// statement.
//
} else { //
// Both code points are punctuation.
//
// See if either of the strings has encountered
// punctuation chars previous to this.
//
if (WhichPunct1) { //
// String 1 has had a punctuation char, so
// it should be the smaller string (since
// both have punctuation chars).
//
WhichPunct1 = CSTR_LESS_THAN; } else if (WhichPunct2) { //
// String 2 has had a punctuation char, so
// it should be the smaller string (since
// both have punctuation chars).
//
WhichPunct2 = CSTR_GREATER_THAN; } else { //
// Position is the same, so compare the
// special weights. Set WhichPunct1 to
// the smaller special weight.
//
WhichPunct1 = (((GET_ALPHA_NUMERIC(&Weight1) < GET_ALPHA_NUMERIC(&Weight2))) ? CSTR_LESS_THAN : CSTR_GREATER_THAN); }
//
// Set the Ignore Punctuation flag so we just
// skip over any other punctuation in the string.
//
fIgnorePunct = TRUE;
//
// Advance pointer 1. Pointer 2 will be
// advanced after if-else statement.
//
pString1++; }
//
// Advance pointer 2 and set flag to true.
//
pString2++; fContinue = TRUE;
break; } case ( EXPANSION ) : { //
// Save pointer in pString1 so that it can be
// restored.
//
if (pSave2 == NULL) { pSave2 = pString2; } pString2 = pTmpBuf2;
//
// Expand character into temporary buffer.
//
pTmpBuf2[0] = GET_EXPANSION_1(&Weight2); pTmpBuf2[1] = GET_EXPANSION_2(&Weight2);
//
// Set cExpChar2 to the number of expansion characters
// stored.
//
cExpChar2 = MAX_TBL_EXPANSION;
fContinue = TRUE;
break; } case ( FAREAST_SPECIAL ) : { if (sm1 != EXPANSION) { //
// Get the weight for the far east special case
// and store it in Weight2.
//
GET_FAREAST_WEIGHT( Weight2, uw2, Mask, lpString2, pString2, ExtraWt2, FALSE );
if (sm1 != FAREAST_SPECIAL) { //
// The character in the first string is
// NOT a fareast special char.
//
// Set each of weights 4, 5, 6, and 7 to show
// that string 1 is smaller (if not already set).
//
if ((GET_WT_FOUR(&WhichExtra) == 0) && (GET_WT_FOUR(&ExtraWt2) != 0)) { GET_WT_FOUR(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_FIVE(&WhichExtra) == 0) && (GET_WT_FIVE(&ExtraWt2) != 0)) { GET_WT_FIVE(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_SIX(&WhichExtra) == 0) && (GET_WT_SIX(&ExtraWt2) != 0)) { GET_WT_SIX(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_SEVEN(&WhichExtra) == 0) && (GET_WT_SEVEN(&ExtraWt2) != 0)) { GET_WT_SEVEN(&WhichExtra) = CSTR_LESS_THAN; } } else { //
// Characters in both strings are fareast
// special chars.
//
// Set each of weights 4, 5, 6, and 7
// appropriately (if not already set).
//
if ( (GET_WT_FOUR(&WhichExtra) == 0) && ( GET_WT_FOUR(&ExtraWt1) != GET_WT_FOUR(&ExtraWt2) ) ) { GET_WT_FOUR(&WhichExtra) = ( GET_WT_FOUR(&ExtraWt1) < GET_WT_FOUR(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_FIVE(&WhichExtra) == 0) && ( GET_WT_FIVE(&ExtraWt1) != GET_WT_FIVE(&ExtraWt2) ) ) { GET_WT_FIVE(&WhichExtra) = ( GET_WT_FIVE(&ExtraWt1) < GET_WT_FIVE(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_SIX(&WhichExtra) == 0) && ( GET_WT_SIX(&ExtraWt1) != GET_WT_SIX(&ExtraWt2) ) ) { GET_WT_SIX(&WhichExtra) = ( GET_WT_SIX(&ExtraWt1) < GET_WT_SIX(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_SEVEN(&WhichExtra) == 0) && ( GET_WT_SEVEN(&ExtraWt1) != GET_WT_SEVEN(&ExtraWt2) ) ) { GET_WT_SEVEN(&WhichExtra) = ( GET_WT_SEVEN(&ExtraWt1) < GET_WT_SEVEN(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } } } break; } case ( JAMO_SPECIAL ) : { if (!JamoFlag) { int ctr1, ctr2; // dummy variables for FindJamoDifference
LPWSTR pStr1 = pString1; LPWSTR pStr2 = pString2;
//
// Set the JamoFlag so we don't handle it again.
//
JamoFlag = TRUE; fContinue = FindJamoDifference( pHashN, &pStr1, &ctr1, -2, &Weight1, &pStr2, &ctr2, -2, &Weight2, &pLastJamo, &uw1, &uw2, &State, &WhichJamo, fModify ); if (WhichJamo) { return (WhichJamo); } pString1 = pStr1; pString2 = pStr2; } else { JamoFlag = FALSE; }
break; } case ( EXTENSION_A ) : { //
// If sm1 is an extension A character, then
// both sm1 and sm2 have been handled. We should
// only get here when either sm1 is not an
// extension A character or the two extension A
// characters are different.
//
if (sm1 != EXTENSION_A) { //
// Get the actual UW to compare.
//
// Only string2 contains an extension A char,
// so set the UW value to be the first UW
// value for extension A (default values):
// SM_EXT_A, AW_EXT_A
//
uw2 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify); }
//
// We should then fall through to the comparison
// of the Unicode weights.
//
break; } case ( UNSORTABLE ) : { //
// Fill out the case statement so the compiler
// will use a jump table.
//
break; } }
//
// See if the comparison should start again.
//
if (fContinue) { continue; }
//
// We're not supposed to drop down into the state table if
// unicode weights are different, so stop comparison and
// return result of unicode weight comparison.
//
if (uw1 != uw2) { return ((uw1 < uw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN); } }
//
// For each state in the state table, do the appropriate
// comparisons. (UW1 == UW2)
//
if (State & (STATE_DW | STATE_REVERSE_DW)) { //
// Get the diacritic weights.
//
dw1 = GET_DIACRITIC(&Weight1); dw2 = GET_DIACRITIC(&Weight2);
if (dw1 != dw2) { //
// Look ahead to see if diacritic follows a
// minimum diacritic weight. If so, get the
// diacritic weight of the nonspace mark.
//
while (*(pString1 + 1) != 0) { Wt = GET_DWORD_WEIGHT(pHashN, *(pString1 + 1)); if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK) { dw1 += GET_DIACRITIC(&Wt); pString1++; } else { break; } }
while (*(pString2 + 1) != 0) { Wt = GET_DWORD_WEIGHT(pHashN, *(pString2 + 1)); if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK) { dw2 += GET_DIACRITIC(&Wt); pString2++; } else { break; } }
//
// Save which string has the smaller diacritic
// weight if the diacritic weights are still
// different.
//
if (dw1 != dw2) { WhichDiacritic = (dw1 < dw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW); } } } if (State & STATE_CW) { //
// Get the case weights.
//
if (GET_CASE(&Weight1) != GET_CASE(&Weight2)) { //
// Save which string has the smaller case weight.
//
WhichCase = (GET_CASE(&Weight1) < GET_CASE(&Weight2)) ? CSTR_LESS_THAN : CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_CW); } } }
//
// Fixup the pointers.
//
POINTER_FIXUP(); }
//
// If the end of BOTH strings has been reached, then the unicode
// weights match exactly. Check the diacritic, case and special
// weights. If all are zero, then return success. Otherwise,
// return the result of the weight difference.
//
// NOTE: The following checks MUST REMAIN IN THIS ORDER:
// Diacritic, Case, Punctuation.
//
if (*pString1 == 0) { if (*pString2 == 0) { if (WhichDiacritic) { return (WhichDiacritic); } if (WhichCase) { return (WhichCase); } if (WhichExtra) { if (GET_WT_FOUR(&WhichExtra)) { return (GET_WT_FOUR(&WhichExtra)); } if (GET_WT_FIVE(&WhichExtra)) { return (GET_WT_FIVE(&WhichExtra)); } if (GET_WT_SIX(&WhichExtra)) { return (GET_WT_SIX(&WhichExtra)); } if (GET_WT_SEVEN(&WhichExtra)) { return (GET_WT_SEVEN(&WhichExtra)); } } if (WhichPunct1) { return (WhichPunct1); } if (WhichPunct2) { return (WhichPunct2); }
return (CSTR_EQUAL); } else { //
// String 2 is longer.
//
pString1 = pString2; } }
//
// Scan to the end of the longer string.
//
QUICK_SCAN_LONGER_STRING( pString1, ((*pString2 == 0) ? CSTR_GREATER_THAN : CSTR_LESS_THAN) ); }
////////////////////////////////////////////////////////////////////////////
//
// GetStringTypeExW
//
// Returns character type information about a particular Unicode string.
//
// 01-18-94 JulieB Created.
////////////////////////////////////////////////////////////////////////////
BOOL WINAPI GetStringTypeExW( LCID Locale, DWORD dwInfoType, LPCWSTR lpSrcStr, int cchSrc, LPWORD lpCharType) { PLOC_HASH pHashN; // ptr to LOC hash node
//
// Invalid Parameter Check:
// - Validate LCID
//
VALIDATE_LOCALE(Locale, pHashN, FALSE); if (pHashN == NULL) { SetLastError(ERROR_INVALID_PARAMETER); return (0); }
//
// Return the result of GetStringTypeW.
//
return (GetStringTypeW( dwInfoType, lpSrcStr, cchSrc, lpCharType )); }
////////////////////////////////////////////////////////////////////////////
//
// GetStringTypeW
//
// Returns character type information about a particular Unicode string.
//
// NOTE: The number of parameters is different from GetStringTypeA.
// The 16-bit OLE product shipped GetStringTypeA with the wrong
// parameters (ported from Chicago) and now we must support it.
//
// Use GetStringTypeEx to get the same set of parameters between
// the A and W version.
//
// 05-31-91 JulieB Created.
////////////////////////////////////////////////////////////////////////////
BOOL WINAPI GetStringTypeW( DWORD dwInfoType, LPCWSTR lpSrcStr, int cchSrc, LPWORD lpCharType) { int Ctr; // loop counter
//
// Invalid Parameter Check:
// - lpSrcStr NULL
// - cchSrc is 0
// - lpCharType NULL
// - same buffer - src and destination
// - (flags will be checked in switch statement below)
//
if ( (lpSrcStr == NULL) || (cchSrc == 0) || (lpCharType == NULL) || (lpSrcStr == lpCharType) ) { SetLastError(ERROR_INVALID_PARAMETER); return (FALSE); }
//
// If cchSrc is -1, then the source string is null terminated and we
// need to get the length of the source string. Add one to the
// length to include the null termination.
// (This will always be at least 1.)
//
if (cchSrc <= -1) { cchSrc = NlsStrLenW(lpSrcStr) + 1; }
//
// Make sure the ctype table is mapped in.
//
if (GetCTypeFileInfo()) { SetLastError(ERROR_FILE_NOT_FOUND); return (FALSE); }
//
// Return the appropriate information in the lpCharType parameter
// based on the dwInfoType parameter.
//
switch (dwInfoType) { case ( CT_CTYPE1 ) : { //
// Return the ctype 1 information for the string.
//
for (Ctr = 0; Ctr < cchSrc; Ctr++) { lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType1); } break; } case ( CT_CTYPE2 ) : { //
// Return the ctype 2 information.
//
for (Ctr = 0; Ctr < cchSrc; Ctr++) { lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType2); } break; } case ( CT_CTYPE3 ) : { //
// Return the ctype 3 information.
//
for (Ctr = 0; Ctr < cchSrc; Ctr++) { lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType3); } break; } default : { //
// Invalid flag parameter, so return failure.
//
SetLastError(ERROR_INVALID_FLAGS); return (FALSE); } }
//
// Return success.
//
return (TRUE); }
//-------------------------------------------------------------------------//
// INTERNAL ROUTINES //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// LongCompareStringW
//
// Compares two wide character strings of the same locale according to the
// supplied locale handle.
//
// 05-31-91 JulieB Created.
////////////////////////////////////////////////////////////////////////////
int LongCompareStringW( PLOC_HASH pHashN, DWORD dwCmpFlags, LPCWSTR lpString1, int cchCount1, LPCWSTR lpString2, int cchCount2, BOOL fModify) { int ctr1 = cchCount1; // loop counter for string 1
int ctr2 = cchCount2; // loop counter for string 2
register LPWSTR pString1; // ptr to go thru string 1
register LPWSTR pString2; // ptr to go thru string 2
BOOL IfCompress; // if compression in locale
BOOL IfDblCompress1; // if double compression in string 1
BOOL IfDblCompress2; // if double compression in string 2
BOOL fEnd1; // if at end of string 1
BOOL fIgnorePunct; // flag to ignore punctuation (not symbol)
BOOL fIgnoreDiacritic; // flag to ignore diacritics
BOOL fIgnoreSymbol; // flag to ignore symbols
BOOL fStringSort; // flag to use string sort
DWORD State; // state table
DWORD Mask; // mask for weights
DWORD Weight1; // full weight of char - string 1
DWORD Weight2; // full weight of char - string 2
int JamoFlag = FALSE; LPCWSTR pLastJamo = lpString1;
int WhichDiacritic; // DW => 1 = str1 smaller, 3 = str2 smaller
int WhichCase; // CW => 1 = str1 smaller, 3 = str2 smaller
int WhichJamo; // XW for Jamo
int WhichPunct1; // SW => 1 = str1 smaller, 3 = str2 smaller
int WhichPunct2; // SW => 1 = str1 smaller, 3 = str2 smaller
LPWSTR pSave1; // ptr to saved pString1
LPWSTR pSave2; // ptr to saved pString2
int cExpChar1, cExpChar2; // ct of expansions in tmp
DWORD ExtraWt1, ExtraWt2; // extra weight values (for far east)
DWORD WhichExtra; // XW => wts 4, 5, 6, 7 (for far east)
//
// Initialize string pointers.
//
pString1 = (LPWSTR)lpString1; pString2 = (LPWSTR)lpString2;
//
// Invalid Flags Check:
// - invalid flags
//
if (dwCmpFlags & CS_INVALID_FLAG) { SetLastError(ERROR_INVALID_FLAGS); return (0); }
//
// See if we should stop on the null terminator regardless of the
// count values. The original count values are stored in ctr1 and ctr2
// above, so it's ok to set these here.
//
if (dwCmpFlags & NORM_STOP_ON_NULL) { cchCount1 = cchCount2 = -2; }
//
// Check if compression in the given locale. If not, then
// try a wchar by wchar compare. If strings are equal, this
// will be quick.
//
if ((IfCompress = pHashN->IfCompression) == FALSE) { //
// Compare each wide character in the two strings.
//
while ( NOT_END_STRING(ctr1, pString1, cchCount1) && NOT_END_STRING(ctr2, pString2, cchCount2) ) { //
// See if characters are equal.
//
if (*pString1 == *pString2) { //
// Characters are equal, so increment pointers,
// decrement counters, and continue string compare.
//
pString1++; pString2++; ctr1--; ctr2--; } else { //
// Difference was found. Fall into the sortkey
// check below.
//
break; } }
//
// If the end of BOTH strings has been reached, then the strings
// match exactly. Return success.
//
if ( AT_STRING_END(ctr1, pString1, cchCount1) && AT_STRING_END(ctr2, pString2, cchCount2) ) { return (CSTR_EQUAL); } }
//
// Initialize flags, pointers, and counters.
//
fIgnorePunct = dwCmpFlags & NORM_IGNORESYMBOLS; fIgnoreDiacritic = dwCmpFlags & NORM_IGNORENONSPACE; fIgnoreSymbol = fIgnorePunct; fStringSort = dwCmpFlags & SORT_STRINGSORT; WhichDiacritic = 0; WhichCase = 0; WhichJamo = 0; WhichPunct1 = 0; WhichPunct2 = 0; pSave1 = NULL; pSave2 = NULL; ExtraWt1 = (DWORD)0; WhichExtra = (DWORD)0;
//
// Set the weights to be invalid. This flags whether or not to
// recompute the weights next time through the loop. It also flags
// whether or not to start over (continue) in the loop.
//
Weight1 = CMP_INVALID_WEIGHT; Weight2 = CMP_INVALID_WEIGHT;
//
// Switch on the different flag options. This will speed up
// the comparisons of two strings that are different.
//
State = STATE_CW | STATE_JAMO_WEIGHT; switch (dwCmpFlags & (NORM_IGNORECASE | NORM_IGNORENONSPACE)) { case ( 0 ) : { Mask = CMP_MASKOFF_NONE; State |= (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW;
break; }
case ( NORM_IGNORECASE ) : { Mask = CMP_MASKOFF_CW; State |= (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW;
break; }
case ( NORM_IGNORENONSPACE ) : { Mask = CMP_MASKOFF_DW;
break; }
case ( NORM_IGNORECASE | NORM_IGNORENONSPACE ) : { Mask = CMP_MASKOFF_DW_CW;
break; } }
switch (dwCmpFlags & (NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH)) { case ( 0 ) : { break; }
case ( NORM_IGNOREKANATYPE ) : { Mask &= CMP_MASKOFF_KANA;
break; }
case ( NORM_IGNOREWIDTH ) : { Mask &= CMP_MASKOFF_WIDTH;
if (dwCmpFlags & NORM_IGNORECASE) { REMOVE_STATE(STATE_CW); }
break; }
case ( NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ) : { Mask &= CMP_MASKOFF_KANA_WIDTH;
if (dwCmpFlags & NORM_IGNORECASE) { REMOVE_STATE(STATE_CW); }
break; } }
//
// Compare each character's sortkey weight in the two strings.
//
while ( NOT_END_STRING(ctr1, pString1, cchCount1) && NOT_END_STRING(ctr2, pString2, cchCount2) ) { if (Weight1 == CMP_INVALID_WEIGHT) { Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1); Weight1 &= Mask; } if (Weight2 == CMP_INVALID_WEIGHT) { Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2); Weight2 &= Mask; }
//
// If compression locale, then need to check for compression
// characters even if the weights are equal. If it's not a
// compression locale, then we don't need to check anything
// if the weights are equal.
//
if ( (IfCompress) && (GET_COMPRESSION(&Weight1) || GET_COMPRESSION(&Weight2)) ) { int ctr; // loop counter
PCOMPRESS_3 pComp3; // ptr to compress 3 table
PCOMPRESS_2 pComp2; // ptr to compress 2 table
int If1; // if compression found in string 1
int If2; // if compression found in string 2
int CompVal; // compression value
int IfEnd1; // if exists 1 more char in string 1
int IfEnd2; // if exists 1 more char in string 2
//
// Check for compression in the weights.
//
If1 = GET_COMPRESSION(&Weight1); If2 = GET_COMPRESSION(&Weight2); CompVal = ((If1 > If2) ? If1 : If2);
IfEnd1 = AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1); IfEnd2 = AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2);
if (pHashN->IfDblCompression == FALSE) { //
// NO double compression, so don't check for it.
//
switch (CompVal) { //
// Check for 3 characters compressing to 1.
//
case ( COMPRESS_3_MASK ) : { //
// Check character in string 1 and string 2.
//
if ( ((If1) && (!IfEnd1) && !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1)) || ((If2) && (!IfEnd2) && !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2)) ) { ctr = pHashN->pCompHdr->Num3; pComp3 = pHashN->pCompress3; for (; ctr > 0; ctr--, pComp3++) { //
// Check character in string 1.
//
if ( (If1) && (!IfEnd1) && !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1) && (pComp3->UCP1 == *pString1) && (pComp3->UCP2 == *(pString1 + 1)) && (pComp3->UCP3 == *(pString1 + 2)) ) { //
// Found compression for string 1.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight1 = MAKE_SORTKEY_DWORD(pComp3->Weights); Weight1 &= Mask; pString1 += 2; ctr1 -= 2;
//
// Set boolean for string 1 - search is
// complete.
//
If1 = 0;
//
// Break out of loop if both searches are
// done.
//
if (If2 == 0) { break; } }
//
// Check character in string 2.
//
if ( (If2) && (!IfEnd2) && !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2) && (pComp3->UCP1 == *pString2) && (pComp3->UCP2 == *(pString2 + 1)) && (pComp3->UCP3 == *(pString2 + 2)) ) { //
// Found compression for string 2.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight2 = MAKE_SORTKEY_DWORD(pComp3->Weights); Weight2 &= Mask; pString2 += 2; ctr2 -= 2;
//
// Set boolean for string 2 - search is
// complete.
//
If2 = 0;
//
// Break out of loop if both searches are
// done.
//
if (If1 == 0) { break; } } } if (ctr > 0) { break; } } //
// Fall through if not found.
//
}
//
// Check for 2 characters compressing to 1.
//
case ( COMPRESS_2_MASK ) : { //
// Check character in string 1 and string 2.
//
if ( ((If1) && (!IfEnd1)) || ((If2) && (!IfEnd2)) ) { ctr = pHashN->pCompHdr->Num2; pComp2 = pHashN->pCompress2; for (; ((ctr > 0) && (If1 || If2)); ctr--, pComp2++) { //
// Check character in string 1.
//
if ( (If1) && (!IfEnd1) && (pComp2->UCP1 == *pString1) && (pComp2->UCP2 == *(pString1 + 1)) ) { //
// Found compression for string 1.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight1 = MAKE_SORTKEY_DWORD(pComp2->Weights); Weight1 &= Mask; pString1++; ctr1--;
//
// Set boolean for string 1 - search is
// complete.
//
If1 = 0;
//
// Break out of loop if both searches are
// done.
//
if (If2 == 0) { break; } }
//
// Check character in string 2.
//
if ( (If2) && (!IfEnd2) && (pComp2->UCP1 == *pString2) && (pComp2->UCP2 == *(pString2 + 1)) ) { //
// Found compression for string 2.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight2 = MAKE_SORTKEY_DWORD(pComp2->Weights); Weight2 &= Mask; pString2++; ctr2--;
//
// Set boolean for string 2 - search is
// complete.
//
If2 = 0;
//
// Break out of loop if both searches are
// done.
//
if (If1 == 0) { break; } } } if (ctr > 0) { break; } } } } } else if (!IfEnd1 && !IfEnd2) { //
// Double Compression exists, so must check for it.
//
if (IfDblCompress1 = ((GET_DWORD_WEIGHT(pHashN, *pString1) & CMP_MASKOFF_CW) == (GET_DWORD_WEIGHT(pHashN, *(pString1 + 1)) & CMP_MASKOFF_CW))) { //
// Advance past the first code point to get to the
// compression character.
//
pString1++; ctr1--; IfEnd1 = AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1); }
if (IfDblCompress2 = ((GET_DWORD_WEIGHT(pHashN, *pString2) & CMP_MASKOFF_CW) == (GET_DWORD_WEIGHT(pHashN, *(pString2 + 1)) & CMP_MASKOFF_CW))) { //
// Advance past the first code point to get to the
// compression character.
//
pString2++; ctr2--; IfEnd2 = AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2); }
switch (CompVal) { //
// Check for 3 characters compressing to 1.
//
case ( COMPRESS_3_MASK ) : { //
// Check character in string 1.
//
if ( (If1) && (!IfEnd1) && !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1) ) { ctr = pHashN->pCompHdr->Num3; pComp3 = pHashN->pCompress3; for (; ctr > 0; ctr--, pComp3++) { //
// Check character in string 1.
//
if ( (pComp3->UCP1 == *pString1) && (pComp3->UCP2 == *(pString1 + 1)) && (pComp3->UCP3 == *(pString1 + 2)) ) { //
// Found compression for string 1.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight1 = MAKE_SORTKEY_DWORD(pComp3->Weights); Weight1 &= Mask; if (!IfDblCompress1) { pString1 += 2; ctr1 -= 2; }
//
// Set boolean for string 1 - search is
// complete.
//
If1 = 0;
break; } } }
//
// Check character in string 2.
//
if ( (If2) && (!IfEnd2) && !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2) ) { ctr = pHashN->pCompHdr->Num3; pComp3 = pHashN->pCompress3; for (; ctr > 0; ctr--, pComp3++) { //
// Check character in string 2.
//
if ( (pComp3->UCP1 == *pString2) && (pComp3->UCP2 == *(pString2 + 1)) && (pComp3->UCP3 == *(pString2 + 2)) ) { //
// Found compression for string 2.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight2 = MAKE_SORTKEY_DWORD(pComp3->Weights); Weight2 &= Mask; if (!IfDblCompress2) { pString2 += 2; ctr2 -= 2; }
//
// Set boolean for string 2 - search is
// complete.
//
If2 = 0;
break; } } }
//
// Fall through if not found.
//
if ((If1 == 0) && (If2 == 0)) { break; } }
//
// Check for 2 characters compressing to 1.
//
case ( COMPRESS_2_MASK ) : { //
// Check character in string 1.
//
if ((If1) && (!IfEnd1)) { ctr = pHashN->pCompHdr->Num2; pComp2 = pHashN->pCompress2; for (; ctr > 0; ctr--, pComp2++) { //
// Check character in string 1.
//
if ((pComp2->UCP1 == *pString1) && (pComp2->UCP2 == *(pString1 + 1))) { //
// Found compression for string 1.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight1 = MAKE_SORTKEY_DWORD(pComp2->Weights); Weight1 &= Mask; if (!IfDblCompress1) { pString1++; ctr1--; }
//
// Set boolean for string 1 - search is
// complete.
//
If1 = 0;
break; } } }
//
// Check character in string 2.
//
if ((If2) && (!IfEnd2)) { ctr = pHashN->pCompHdr->Num2; pComp2 = pHashN->pCompress2; for (; ctr > 0; ctr--, pComp2++) { //
// Check character in string 2.
//
if ((pComp2->UCP1 == *pString2) && (pComp2->UCP2 == *(pString2 + 1))) { //
// Found compression for string 2.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight2 = MAKE_SORTKEY_DWORD(pComp2->Weights); Weight2 &= Mask; if (!IfDblCompress2) { pString2++; ctr2--; }
//
// Set boolean for string 2 - search is
// complete.
//
If2 = 0;
break; } } } } }
//
// Reset the pointer back to the beginning of the double
// compression. Pointer fixup at the end will advance
// them correctly.
//
// If double compression, we advanced the pointer at
// the beginning of the switch statement. If double
// compression character was actually found, the pointer
// was NOT advanced. We now want to decrement the pointer
// to put it back to where it was.
//
// The next time through, the pointer will be pointing to
// the regular compression part of the string.
//
if (IfDblCompress1) { pString1--; ctr1++; } if (IfDblCompress2) { pString2--; ctr2++; } } }
//
// Check the weights again.
//
if ((Weight1 != Weight2) || (GET_SCRIPT_MEMBER(&Weight1) == EXTENSION_A)) { //
// Weights are still not equal, even after compression
// check, so compare the different weights.
//
BYTE sm1 = GET_SCRIPT_MEMBER(&Weight1); // script member 1
BYTE sm2 = GET_SCRIPT_MEMBER(&Weight2); // script member 2
WORD uw1 = GET_UNICODE_SM_MOD(&Weight1, sm1, fModify); // unicode weight 1
WORD uw2 = GET_UNICODE_SM_MOD(&Weight2, sm2, fModify); // unicode weight 2
BYTE dw1; // diacritic weight 1
BYTE dw2; // diacritic weight 2
DWORD Wt; // temp weight holder
WCHAR pTmpBuf1[MAX_TBL_EXPANSION]; // temp buffer for exp 1
WCHAR pTmpBuf2[MAX_TBL_EXPANSION]; // temp buffer for exp 2
//
// If Unicode Weights are different and no special cases,
// then we're done. Otherwise, we need to do extra checking.
//
// Must check ENTIRE string for any possibility of Unicode Weight
// differences. As soon as a Unicode Weight difference is found,
// then we're done. If no UW difference is found, then the
// first Diacritic Weight difference is used. If no DW difference
// is found, then use the first Case Difference. If no CW
// difference is found, then use the first Extra Weight
// difference. If no XW difference is found, then use the first
// Special Weight difference.
//
if ((uw1 != uw2) || ((sm1 <= SYMBOL_5) && (sm1 >= FAREAST_SPECIAL))) { //
// Check for Unsortable characters and skip them.
// This needs to be outside the switch statement. If EITHER
// character is unsortable, must skip it and start over.
//
if (sm1 == UNSORTABLE) { pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; } if (sm2 == UNSORTABLE) { pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; }
//
// Check for Ignore Nonspace and Ignore Symbol. If
// Ignore Nonspace is set and either character is a
// nonspace mark only, then we need to advance the
// pointer to skip over the character and continue.
// If Ignore Symbol is set and either character is a
// punctuation char, then we need to advance the
// pointer to skip over the character and continue.
//
// This step is necessary so that a string with a
// nonspace mark and a punctuation char following one
// another are properly ignored when one or both of
// the ignore flags is set.
//
if (fIgnoreDiacritic) { if (sm1 == NONSPACE_MARK) { pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; } if (sm2 == NONSPACE_MARK) { pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; } } if (fIgnoreSymbol) { if (sm1 == PUNCTUATION) { pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; } if (sm2 == PUNCTUATION) { pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; } } if ((Weight1 == CMP_INVALID_WEIGHT) || (Weight2 == CMP_INVALID_WEIGHT)) { continue; }
//
// Switch on the script member of string 1 and take care
// of any special cases.
//
switch (sm1) { case ( NONSPACE_MARK ) : { //
// Nonspace only - look at diacritic weight only.
//
if (!fIgnoreDiacritic) { if ((WhichDiacritic == 0) || (State & STATE_REVERSE_DW)) { WhichDiacritic = CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW); } }
//
// Adjust pointer and counter and set flags.
//
pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT;
break; } case ( SYMBOL_1 ) : case ( SYMBOL_2 ) : case ( SYMBOL_3 ) : case ( SYMBOL_4 ) : case ( SYMBOL_5 ) : { //
// If the ignore symbol flag is set, then skip over
// the symbol.
//
if (fIgnoreSymbol) { pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; }
break; } case ( PUNCTUATION ) : { //
// If the ignore punctuation flag is set, then skip
// over the punctuation char.
//
if (fIgnorePunct) { pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; } else if (!fStringSort) { //
// Use WORD sort method.
//
if (sm2 != PUNCTUATION) { //
// The character in the second string is
// NOT punctuation.
//
if (WhichPunct2) { //
// Set WP 2 to show that string 2 is
// smaller, since a punctuation char had
// already been found at an earlier
// position in string 2.
//
// Set the Ignore Punctuation flag so we
// just skip over any other punctuation
// chars in the string.
//
WhichPunct2 = CSTR_GREATER_THAN; fIgnorePunct = TRUE; } else { //
// Set WP 1 to show that string 2 is
// smaller, and that string 1 has had
// a punctuation char - since no
// punctuation chars have been found
// in string 2.
//
WhichPunct1 = CSTR_GREATER_THAN; }
//
// Advance pointer 1 and decrement counter 1.
//
pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; }
//
// Do NOT want to advance the pointer in string 1
// if string 2 is also a punctuation char. This
// will be done later.
//
}
break; } case ( EXPANSION ) : { //
// Save pointer in pString1 so that it can be
// restored.
//
if (pSave1 == NULL) { pSave1 = pString1; } pString1 = pTmpBuf1;
//
// Add one to counter so that subtraction doesn't end
// comparison prematurely.
//
ctr1++;
//
// Expand character into temporary buffer.
//
pTmpBuf1[0] = GET_EXPANSION_1(&Weight1); pTmpBuf1[1] = GET_EXPANSION_2(&Weight1);
//
// Set cExpChar1 to the number of expansion characters
// stored.
//
cExpChar1 = MAX_TBL_EXPANSION;
Weight1 = CMP_INVALID_WEIGHT;
break; } case ( FAREAST_SPECIAL ) : { if (sm2 != EXPANSION) { //
// Get the weight for the far east special case
// and store it in Weight1.
//
GET_FAREAST_WEIGHT( Weight1, uw1, Mask, lpString1, pString1, ExtraWt1, fModify );
if (sm2 != FAREAST_SPECIAL) { //
// The character in the second string is
// NOT a fareast special char.
//
// Set each of weights 4, 5, 6, and 7 to show
// that string 2 is smaller (if not already set).
//
if ((GET_WT_FOUR(&WhichExtra) == 0) && (GET_WT_FOUR(&ExtraWt1) != 0)) { GET_WT_FOUR(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_FIVE(&WhichExtra) == 0) && (GET_WT_FIVE(&ExtraWt1) != 0)) { GET_WT_FIVE(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_SIX(&WhichExtra) == 0) && (GET_WT_SIX(&ExtraWt1) != 0)) { GET_WT_SIX(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_SEVEN(&WhichExtra) == 0) && (GET_WT_SEVEN(&ExtraWt1) != 0)) { GET_WT_SEVEN(&WhichExtra) = CSTR_GREATER_THAN; } } } break; } case ( JAMO_SPECIAL ) : { LPWSTR pStr1 = pString1; LPWSTR pStr2 = pString2;
//
// Set the JamoFlag so we don't handle it again.
//
JamoFlag = TRUE; FindJamoDifference( pHashN, &pStr1, &ctr1, cchCount1, &Weight1, &pStr2, &ctr2, cchCount2, &Weight2, &pLastJamo, &uw1, &uw2, &State, &WhichJamo, fModify );
if (WhichJamo) { return (WhichJamo); } pString1 = pStr1; pString2 = pStr2;
break; } case ( EXTENSION_A ) : { //
// Get the full weight in case DW got masked.
//
Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1); if (sm2 == EXTENSION_A) { Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2); }
//
// Compare the weights.
//
if (Weight1 == Weight2) { //
// Adjust pointers and counters and set flags.
//
pString1++; pString2++; ctr1--; ctr2--; Weight1 = CMP_INVALID_WEIGHT; Weight2 = CMP_INVALID_WEIGHT; } else { //
// Get the actual UW to compare.
//
if (sm2 == EXTENSION_A) { //
// Set the UW values to be the AW and DW since
// both strings contain an extension A char.
//
uw1 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight1), GET_DIACRITIC(&Weight1), FALSE ); uw2 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight2), GET_DIACRITIC(&Weight2), FALSE ); } else { //
// Only string1 contains an extension A char,
// so set the UW value to be the first UW
// value for extension A (default values):
// SM_EXT_A, AW_EXT_A
//
uw1 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify); } }
break; } case ( UNSORTABLE ) : { //
// Fill out the case statement so the compiler
// will use a jump table.
//
break; } }
//
// Switch on the script member of string 2 and take care
// of any special cases.
//
switch (sm2) { case ( NONSPACE_MARK ) : { //
// Nonspace only - look at diacritic weight only.
//
if (!fIgnoreDiacritic) { if ((WhichDiacritic == 0) || (State & STATE_REVERSE_DW))
{ WhichDiacritic = CSTR_LESS_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW); } }
//
// Adjust pointer and counter and set flags.
//
pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT;
break; } case ( SYMBOL_1 ) : case ( SYMBOL_2 ) : case ( SYMBOL_3 ) : case ( SYMBOL_4 ) : case ( SYMBOL_5 ) : { //
// If the ignore symbol flag is set, then skip over
// the symbol.
//
if (fIgnoreSymbol) { pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; }
break; } case ( PUNCTUATION ) : { //
// If the ignore punctuation flag is set, then
// skip over the punctuation char.
//
if (fIgnorePunct) { //
// Advance pointer 2 and decrement counter 2.
//
pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; } else if (!fStringSort) { //
// Use WORD sort method.
//
if (sm1 != PUNCTUATION) { //
// The character in the first string is
// NOT punctuation.
//
if (WhichPunct1) { //
// Set WP 1 to show that string 1 is
// smaller, since a punctuation char had
// already been found at an earlier
// position in string 1.
//
// Set the Ignore Punctuation flag so we
// just skip over any other punctuation
// chars in the string.
//
WhichPunct1 = CSTR_LESS_THAN; fIgnorePunct = TRUE; } else { //
// Set WP 2 to show that string 1 is
// smaller, and that string 2 has had
// a punctuation char - since no
// punctuation chars have been found
// in string 1.
//
WhichPunct2 = CSTR_LESS_THAN; }
//
// Pointer 2 and counter 2 will be updated
// after if-else statement.
//
} else { //
// Both code points are punctuation chars.
//
// See if either of the strings has encountered
// punctuation chars previous to this.
//
if (WhichPunct1) { //
// String 1 has had a punctuation char, so
// it should be the smaller string (since
// both have punctuation chars).
//
WhichPunct1 = CSTR_LESS_THAN; } else if (WhichPunct2) { //
// String 2 has had a punctuation char, so
// it should be the smaller string (since
// both have punctuation chars).
//
WhichPunct2 = CSTR_GREATER_THAN; } else { BYTE aw1 = GET_ALPHA_NUMERIC(&Weight1); BYTE aw2 = GET_ALPHA_NUMERIC(&Weight2);
if (aw1 == aw2) { BYTE cw1 = GET_CASE(&Weight1); BYTE cw2 = GET_CASE(&Weight2); if (cw1 < cw2) { WhichPunct1 = CSTR_LESS_THAN; } else if (cw1 > cw2) { WhichPunct1 = CSTR_GREATER_THAN; } } else { //
// Position is the same, so compare the
// special weights. Set WhichPunct1 to
// the smaller special weight.
//
WhichPunct1 = (aw1 < aw2 ? CSTR_LESS_THAN : CSTR_GREATER_THAN); } }
//
// Set the Ignore Punctuation flag.
//
fIgnorePunct = TRUE;
//
// Advance pointer 1 and decrement counter 1.
// Pointer 2 and counter 2 will be updated
// after if-else statement.
//
pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; }
//
// Advance pointer 2 and decrement counter 2.
//
pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; }
break; } case ( EXPANSION ) : { //
// Save pointer in pString1 so that it can be restored.
//
if (pSave2 == NULL) { pSave2 = pString2; } pString2 = pTmpBuf2;
//
// Add one to counter so that subtraction doesn't end
// comparison prematurely.
//
ctr2++;
//
// Expand character into temporary buffer.
//
pTmpBuf2[0] = GET_EXPANSION_1(&Weight2); pTmpBuf2[1] = GET_EXPANSION_2(&Weight2);
//
// Set cExpChar2 to the number of expansion characters
// stored.
//
cExpChar2 = MAX_TBL_EXPANSION;
Weight2 = CMP_INVALID_WEIGHT;
break; } case ( FAREAST_SPECIAL ) : { if (sm1 != EXPANSION) { //
// Get the weight for the far east special case
// and store it in Weight2.
//
GET_FAREAST_WEIGHT( Weight2, uw2, Mask, lpString2, pString2, ExtraWt2, fModify );
if (sm1 != FAREAST_SPECIAL) { //
// The character in the first string is
// NOT a fareast special char.
//
// Set each of weights 4, 5, 6, and 7 to show
// that string 1 is smaller (if not already set).
//
if ((GET_WT_FOUR(&WhichExtra) == 0) && (GET_WT_FOUR(&ExtraWt2) != 0)) { GET_WT_FOUR(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_FIVE(&WhichExtra) == 0) && (GET_WT_FIVE(&ExtraWt2) != 0)) { GET_WT_FIVE(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_SIX(&WhichExtra) == 0) && (GET_WT_SIX(&ExtraWt2) != 0)) { GET_WT_SIX(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_SEVEN(&WhichExtra) == 0) && (GET_WT_SEVEN(&ExtraWt2) != 0)) { GET_WT_SEVEN(&WhichExtra) = CSTR_LESS_THAN; } } else { //
// Characters in both strings are fareast
// special chars.
//
// Set each of weights 4, 5, 6, and 7
// appropriately (if not already set).
//
if ( (GET_WT_FOUR(&WhichExtra) == 0) && ( GET_WT_FOUR(&ExtraWt1) != GET_WT_FOUR(&ExtraWt2) ) ) { GET_WT_FOUR(&WhichExtra) = ( GET_WT_FOUR(&ExtraWt1) < GET_WT_FOUR(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_FIVE(&WhichExtra) == 0) && ( GET_WT_FIVE(&ExtraWt1) != GET_WT_FIVE(&ExtraWt2) ) ) { GET_WT_FIVE(&WhichExtra) = ( GET_WT_FIVE(&ExtraWt1) < GET_WT_FIVE(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_SIX(&WhichExtra) == 0) && ( GET_WT_SIX(&ExtraWt1) != GET_WT_SIX(&ExtraWt2) ) ) { GET_WT_SIX(&WhichExtra) = ( GET_WT_SIX(&ExtraWt1) < GET_WT_SIX(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_SEVEN(&WhichExtra) == 0) && ( GET_WT_SEVEN(&ExtraWt1) != GET_WT_SEVEN(&ExtraWt2) ) ) { GET_WT_SEVEN(&WhichExtra) = ( GET_WT_SEVEN(&ExtraWt1) < GET_WT_SEVEN(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } } } break; } case ( JAMO_SPECIAL ) : { if (!JamoFlag) { LPWSTR pStr1 = pString1; LPWSTR pStr2 = pString2;
FindJamoDifference( pHashN, &pStr1, &ctr1, cchCount1, &Weight1, &pStr2, &ctr2, cchCount2, &Weight2, &pLastJamo, &uw1, &uw2, &State, &WhichJamo, fModify ); if (WhichJamo) { return (WhichJamo); } pString1 = pStr1; pString2 = pStr2; } else { //
// Reset the Jamo flag.
//
JamoFlag = FALSE; }
break; } case ( EXTENSION_A ) : { //
// If sm1 is an extension A character, then
// both sm1 and sm2 have been handled. We should
// only get here when either sm1 is not an
// extension A character or the two extension A
// characters are different.
//
if (sm1 != EXTENSION_A) { //
// Get the full weight in case DW got masked.
// Also, get the actual UW to compare.
//
// Only string2 contains an extension A char,
// so set the UW value to be the first UW
// value for extension A (default values):
// SM_EXT_A, AW_EXT_A
//
Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2); uw2 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify); }
//
// We should then fall through to the comparison
// of the Unicode weights.
//
break; } case ( UNSORTABLE ) : { //
// Fill out the case statement so the compiler
// will use a jump table.
//
break; } }
//
// See if the comparison should start again.
//
if ((Weight1 == CMP_INVALID_WEIGHT) || (Weight2 == CMP_INVALID_WEIGHT)) { //
// Check to see if we're modifying the script value.
// If so, then we need to reset the fareast weight
// (if applicable) so that it doesn't get modified
// again.
//
if (fModify == TRUE) { if (sm1 == FAREAST_SPECIAL) { Weight1 = CMP_INVALID_WEIGHT; } else if (sm2 == FAREAST_SPECIAL) { Weight2 = CMP_INVALID_WEIGHT; } } continue; }
//
// We're not supposed to drop down into the state table if
// the unicode weights are different, so stop comparison
// and return result of unicode weight comparison.
//
if (uw1 != uw2) { return ((uw1 < uw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN); } }
//
// For each state in the state table, do the appropriate
// comparisons.
//
if (State & (STATE_DW | STATE_REVERSE_DW)) { //
// Get the diacritic weights.
//
dw1 = GET_DIACRITIC(&Weight1); dw2 = GET_DIACRITIC(&Weight2);
if (dw1 != dw2) { //
// Look ahead to see if diacritic follows a
// minimum diacritic weight. If so, get the
// diacritic weight of the nonspace mark.
//
while (!AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1)) { Wt = GET_DWORD_WEIGHT(pHashN, *(pString1 + 1)); if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK) { dw1 += GET_DIACRITIC(&Wt); pString1++; ctr1--; } else { break; } }
while (!AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2)) { Wt = GET_DWORD_WEIGHT(pHashN, *(pString2 + 1)); if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK) { dw2 += GET_DIACRITIC(&Wt); pString2++; ctr2--; } else { break; } }
//
// Save which string has the smaller diacritic
// weight if the diacritic weights are still
// different.
//
if (dw1 != dw2) { WhichDiacritic = (dw1 < dw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW); } } } if (State & STATE_CW) { //
// Get the case weights.
//
if (GET_CASE(&Weight1) != GET_CASE(&Weight2)) { //
// Save which string has the smaller case weight.
//
WhichCase = (GET_CASE(&Weight1) < GET_CASE(&Weight2)) ? CSTR_LESS_THAN : CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_CW); } } }
//
// Fixup the pointers and counters.
//
POINTER_FIXUP(); ctr1--; ctr2--;
//
// Reset the weights to be invalid.
//
Weight1 = CMP_INVALID_WEIGHT; Weight2 = CMP_INVALID_WEIGHT; }
//
// If the end of BOTH strings has been reached, then the unicode
// weights match exactly. Check the diacritic, case and special
// weights. If all are zero, then return success. Otherwise,
// return the result of the weight difference.
//
// NOTE: The following checks MUST REMAIN IN THIS ORDER:
// Diacritic, Case, Punctuation.
//
if (AT_STRING_END(ctr1, pString1, cchCount1)) { if (AT_STRING_END(ctr2, pString2, cchCount2)) { if (WhichDiacritic) { return (WhichDiacritic); } if (WhichCase) { return (WhichCase); } if (WhichExtra) { if (!fIgnoreDiacritic) { if (GET_WT_FOUR(&WhichExtra)) { return (GET_WT_FOUR(&WhichExtra)); } if (GET_WT_FIVE(&WhichExtra)) { return (GET_WT_FIVE(&WhichExtra)); } } if (GET_WT_SIX(&WhichExtra)) { return (GET_WT_SIX(&WhichExtra)); } if (GET_WT_SEVEN(&WhichExtra)) { return (GET_WT_SEVEN(&WhichExtra)); } } if (WhichPunct1) { return (WhichPunct1); } if (WhichPunct2) { return (WhichPunct2); }
return (CSTR_EQUAL); } else { //
// String 2 is longer.
//
pString1 = pString2; ctr1 = ctr2; cchCount1 = cchCount2; fEnd1 = CSTR_LESS_THAN; } } else { fEnd1 = CSTR_GREATER_THAN; }
//
// Scan to the end of the longer string.
//
SCAN_LONGER_STRING( ctr1, pString1, cchCount1, fEnd1 ); }
////////////////////////////////////////////////////////////////////////////
//
// FindJamoDifference
//
////////////////////////////////////////////////////////////////////////////
int FindJamoDifference( PLOC_HASH pHashN, LPCWSTR* ppString1, int* ctr1, int cchCount1, DWORD* pWeight1, LPCWSTR* ppString2, int* ctr2, int cchCount2, DWORD* pWeight2, LPCWSTR* pLastJamo, WORD* uw1, WORD* uw2, int* pState, int* WhichJamo, BOOL fModify) { int bRestart = 0; // if string compare should restart again
int oldHangulsFound1 = 0; // # of valid old Hangul Jamo compositions found
int oldHangulsFound2 = 0; // # of valid old Hangul Jamo compositions found
WORD UW; BYTE JamoWeight1[3]; // extra weight for first old Hangul composition
BYTE JamoWeight2[3]; // extra weight for second old Hangul composition
//
// Roll back to the first Jamo. We know that these Jamos in both strings
// should be equal, so we can decrement both strings at once.
//
while ((*ppString1 > *pLastJamo) && IsJamo(*(*ppString1 - 1))) { (*ppString1)--; (*ppString2)--; (*ctr1)++; (*ctr2)++; }
//
// Now we are at the beginning of two groups of Jamo characters.
// Compare Jamo unit (either a single Jamo or a valid old Hangul Jamo
// composition) until we run out Jamo units in either strings.
// We also exit when we reach the ends of either string.
//
// while (NOT_END_STRING(*ctr1, *ppString1, cchCount1) &&
// NOT_END_STRING(*ctr2, *ppString2, cchCount2))
//
for (;;) { if (IsJamo(**ppString1)) { if (IsLeadingJamo(**ppString1)) { if ((oldHangulsFound1 = MapOldHangulSortKey( pHashN, *ppString1, *ctr1, &UW, JamoWeight1, fModify )) > 0) { *uw1 = UW;
//
// Mark *pWeight1 so that it is not CMP_INVALID_WEIGHT.
// 0202 is the DW/CW.
//
*pWeight1 = ((DWORD)UW | 0x02020000);
//
// We always increment ppString1/ctr1 at the end of the
// loop, so we need to subtract 1 here.
//
*ppString1 += (oldHangulsFound1 - 1); *ctr1 -= (oldHangulsFound1 - 1); } } if (oldHangulsFound1 == 0) { //
// No valid old Hangul compositions are found. Get the UW
// for the Jamo instead.
//
*pWeight1 = GET_DWORD_WEIGHT(pHashN, **ppString1);
//
// The SMs in PSORTKEY for Jamos are not really SMs. They
// are all 4 (for JAMO_SPECIAL).
// Here we get the real Jamo Unicode weight. The actual SM
// is stored in DW.
//
*uw1 = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight1), GET_ALPHA_NUMERIC(pWeight1), fModify ); ((PSORTKEY)pWeight1)->Diacritic = MIN_DW; } }
if (IsJamo(**ppString2)) { if (IsLeadingJamo(**ppString2)) { if ((oldHangulsFound2 = MapOldHangulSortKey( pHashN, *ppString2, *ctr2, &UW, JamoWeight2, fModify )) > 0) { *uw2 = UW; *pWeight2 = ((DWORD)UW | 0x02020000); *ppString2 += (oldHangulsFound2 - 1); *ctr2 -= (oldHangulsFound2 - 1); } } if (oldHangulsFound2 == 0) { *pWeight2 = GET_DWORD_WEIGHT(pHashN, **ppString2); *uw2 = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight2), GET_ALPHA_NUMERIC(pWeight2), fModify ); ((PSORTKEY)pWeight2)->Diacritic = MIN_DW; } }
//
// See if either weight is invalid.
// A weight can be invalid when the character is not a Jamo.
//
if (*pWeight1 == CMP_INVALID_WEIGHT) { //
// The current character is not a Jamo. Set the Weight to
// be CMP_INVALID_WEIGHT, so that the string comparision can
// restart within the loop of CompareString().
//
*pWeight1 = CMP_INVALID_WEIGHT; bRestart = 1; goto FindJamoDifferenceExit; } if (*pWeight2 == CMP_INVALID_WEIGHT) { //
// The current character is not a Jamo. Set the Weight to
// be CMP_INVALID_WEIGHT, so that the string comparision can
// restart within the loop of CompareString().
//
*pWeight2 = CMP_INVALID_WEIGHT; bRestart = 1; goto FindJamoDifferenceExit; } if (*uw1 != *uw2) { //
// Found differences in Unicode weight. We can stop the
// processing now.
//
goto FindJamoDifferenceExit; }
//
// When we get here, we know that we have the same Unicode Weight.
// Check if we need to record the WhichJamo.
//
if ((*pState & STATE_JAMO_WEIGHT) && ((oldHangulsFound1 > 0) || (oldHangulsFound2 > 0))) { if ((oldHangulsFound1 > 0) && (oldHangulsFound2 > 0)) { *WhichJamo = (int)memcmp( JamoWeight1, JamoWeight2, sizeof(JamoWeight1) ) + 2; } else if (oldHangulsFound1 > 0) { *WhichJamo = CSTR_GREATER_THAN; } else { *WhichJamo = CSTR_LESS_THAN; } *pState &= ~STATE_JAMO_WEIGHT; oldHangulsFound1 = oldHangulsFound2 = 0; } (*ppString1)++; (*ctr1)--; (*ppString2)++; (*ctr2)--;
if (AT_STRING_END(*ctr1, *ppString1, cchCount1) || AT_STRING_END(*ctr2, *ppString2, cchCount2)) { break; } *pWeight1 = *pWeight2 = CMP_INVALID_WEIGHT; }
//
// If we drop out of the while loop because we reach the end of strings,
// decrement the pointers by one because loops in CompareString() will
// increase the pointers at the end of the loop.
//
// If we drop out of the while loop because the goto's in it, we are
// already off by one.
//
if (AT_STRING_END(*ctr1, *ppString1, cchCount1)) { (*ppString1)--; (*ctr1)++; } if (AT_STRING_END(*ctr2, *ppString2, cchCount2)) { (*ppString2)--; (*ctr2)++; }
FindJamoDifferenceExit: *pLastJamo = *ppString1; return (bRestart); }
|