/*++ Copyright (c) 1991-2000, Microsoft Corporation All rights reserved. Module Name: string.c Abstract: This file contains functions that deal with characters and strings. APIs found in this file: CompareStringW GetStringTypeExW GetStringTypeW Revision History: 05-31-91 JulieB Created. --*/ // // Include Files. // #include "nls.h" #include "nlssafe.h" #include "jamo.h" // // Constant Declarations. // // // State Table. // #define STATE_DW 1 // normal diacritic weight state #define STATE_REVERSE_DW 2 // reverse diacritic weight state #define STATE_CW 4 // case weight state #define STATE_JAMO_WEIGHT 8 // jamo weight state // // Invalid weight value. // #define CMP_INVALID_WEIGHT 0xffffffff #define CMP_INVALID_FAREAST 0xffff0000 #define CMP_INVALID_UW 0xffff // // Forward Declarations. // int LongCompareStringW( PLOC_HASH pHashN, DWORD dwCmpFlags, LPCWSTR lpString1, int cchCount1, LPCWSTR lpString2, int cchCount2, BOOL fModify); int FindJamoDifference( PLOC_HASH pHashN, LPCWSTR* ppString1, int* ctr1, int cchCount1, DWORD* pWeight1, LPCWSTR* ppString2, int* ctr2, int cchCount2, DWORD* pWeight2, LPCWSTR* pLastJamo, WORD* uw1, WORD* uw2, int* pState, int* WhichJamo, BOOL fModify); //-------------------------------------------------------------------------// // INTERNAL MACROS // //-------------------------------------------------------------------------// //////////////////////////////////////////////////////////////////////////// // // NOT_END_STRING // // Checks to see if the search has reached the end of the string. // It returns TRUE if the counter is not at zero (counting backwards) and // the null termination has not been reached (if -1 was passed in the count // parameter. // // 11-04-92 JulieB Created. //////////////////////////////////////////////////////////////////////////// #define NOT_END_STRING(ct, ptr, cchIn) \ ((ct != 0) && (!((*(ptr) == 0) && (cchIn == -2)))) //////////////////////////////////////////////////////////////////////////// // // AT_STRING_END // // Checks to see if the pointer is at the end of the string. // It returns TRUE if the counter is zero or if the null termination // has been reached (if -2 was passed in the count parameter). // // 11-04-92 JulieB Created. //////////////////////////////////////////////////////////////////////////// #define AT_STRING_END(ct, ptr, cchIn) \ ((ct == 0) || ((*(ptr) == 0) && (cchIn == -2))) //////////////////////////////////////////////////////////////////////////// // // REMOVE_STATE // // Removes the current state from the state table. This should only be // called when the current state should not be entered for the remainder // of the comparison. It decrements the counter going through the state // table and decrements the number of states in the table. // // 11-04-92 JulieB Created. //////////////////////////////////////////////////////////////////////////// #define REMOVE_STATE(value) (State &= ~value) //////////////////////////////////////////////////////////////////////////// // // POINTER_FIXUP // // Fixup the string pointers if expansion characters were found. // Then, advance the string pointers and decrement the string counters. // // 11-04-92 JulieB Created. //////////////////////////////////////////////////////////////////////////// #define POINTER_FIXUP() \ { \ /* \ * Fixup the pointers (if necessary). \ */ \ if (pSave1 && (--cExpChar1 == 0)) \ { \ /* \ * Done using expansion temporary buffer. \ */ \ pString1 = pSave1; \ pSave1 = NULL; \ } \ \ if (pSave2 && (--cExpChar2 == 0)) \ { \ /* \ * Done using expansion temporary buffer. \ */ \ pString2 = pSave2; \ pSave2 = NULL; \ } \ \ /* \ * Advance the string pointers. \ */ \ pString1++; \ pString2++; \ } //////////////////////////////////////////////////////////////////////////// // // SCAN_LONGER_STRING // // Scans the longer string for diacritic, case, and special weights. // // 11-04-92 JulieB Created. //////////////////////////////////////////////////////////////////////////// #define SCAN_LONGER_STRING( ct, \ ptr, \ cchIn, \ ret ) \ { \ /* \ * Search through the rest of the longer string to make sure \ * all characters are not to be ignored. If find a character that \ * should not be ignored, return the given return value immediately. \ * \ * The only exception to this is when a nonspace mark is found. If \ * another DW difference has been found earlier, then use that. \ */ \ while (NOT_END_STRING(ct, ptr, cchIn)) \ { \ Weight1 = GET_DWORD_WEIGHT(pHashN, *ptr); \ switch (GET_SCRIPT_MEMBER(&Weight1)) \ { \ case ( UNSORTABLE ): \ { \ break; \ } \ case ( NONSPACE_MARK ): \ { \ if ((!fIgnoreDiacritic) && (!WhichDiacritic)) \ { \ return (ret); \ } \ break; \ } \ case ( PUNCTUATION ) : \ case ( SYMBOL_1 ) : \ case ( SYMBOL_2 ) : \ case ( SYMBOL_3 ) : \ case ( SYMBOL_4 ) : \ case ( SYMBOL_5 ) : \ { \ if (!fIgnoreSymbol) \ { \ return (ret); \ } \ break; \ } \ case ( EXPANSION ) : \ case ( FAREAST_SPECIAL ) : \ case ( JAMO_SPECIAL ) : \ case ( EXTENSION_A ) : \ default : \ { \ return (ret); \ } \ } \ \ /* \ * Advance pointer and decrement counter. \ */ \ ptr++; \ ct--; \ } \ \ /* \ * Need to check diacritic, case, extra, and special weights for \ * final return value. Still could be equal if the longer part of \ * the string contained only characters to be ignored. \ * \ * NOTE: The following checks MUST REMAIN IN THIS ORDER: \ * Diacritic, Case, Extra, Punctuation. \ */ \ if (WhichDiacritic) \ { \ return (WhichDiacritic); \ } \ if (WhichCase) \ { \ return (WhichCase); \ } \ if (WhichExtra) \ { \ if (!fIgnoreDiacritic) \ { \ if (GET_WT_FOUR(&WhichExtra)) \ { \ return (GET_WT_FOUR(&WhichExtra)); \ } \ if (GET_WT_FIVE(&WhichExtra)) \ { \ return (GET_WT_FIVE(&WhichExtra)); \ } \ } \ if (GET_WT_SIX(&WhichExtra)) \ { \ return (GET_WT_SIX(&WhichExtra)); \ } \ if (GET_WT_SEVEN(&WhichExtra)) \ { \ return (GET_WT_SEVEN(&WhichExtra)); \ } \ } \ if (WhichJamo) \ { \ return (WhichJamo); \ } \ if (WhichPunct1) \ { \ return (WhichPunct1); \ } \ if (WhichPunct2) \ { \ return (WhichPunct2); \ } \ \ return (CSTR_EQUAL); \ } //////////////////////////////////////////////////////////////////////////// // // QUICK_SCAN_LONGER_STRING // // Scans the longer string for diacritic, case, and special weights. // Assumes that both strings are null-terminated. // // 11-04-92 JulieB Created. //////////////////////////////////////////////////////////////////////////// #define QUICK_SCAN_LONGER_STRING( ptr, \ ret ) \ { \ /* \ * Search through the rest of the longer string to make sure \ * all characters are not to be ignored. If find a character that \ * should not be ignored, return the given return value immediately. \ * \ * The only exception to this is when a nonspace mark is found. If \ * another DW difference has been found earlier, then use that. \ */ \ while (*ptr != 0) \ { \ switch (GET_SCRIPT_MEMBER(&(pHashN->pSortkey[*ptr]))) \ { \ case ( UNSORTABLE ): \ { \ break; \ } \ case ( NONSPACE_MARK ): \ { \ if (!WhichDiacritic) \ { \ return (ret); \ } \ break; \ } \ default : \ { \ return (ret); \ } \ } \ \ /* \ * Advance pointer. \ */ \ ptr++; \ } \ \ /* \ * Need to check diacritic, case, extra, and special weights for \ * final return value. Still could be equal if the longer part of \ * the string contained only unsortable characters. \ * \ * NOTE: The following checks MUST REMAIN IN THIS ORDER: \ * Diacritic, Case, Extra, Punctuation. \ */ \ if (WhichDiacritic) \ { \ return (WhichDiacritic); \ } \ if (WhichCase) \ { \ return (WhichCase); \ } \ if (WhichExtra) \ { \ if (GET_WT_FOUR(&WhichExtra)) \ { \ return (GET_WT_FOUR(&WhichExtra)); \ } \ if (GET_WT_FIVE(&WhichExtra)) \ { \ return (GET_WT_FIVE(&WhichExtra)); \ } \ if (GET_WT_SIX(&WhichExtra)) \ { \ return (GET_WT_SIX(&WhichExtra)); \ } \ if (GET_WT_SEVEN(&WhichExtra)) \ { \ return (GET_WT_SEVEN(&WhichExtra)); \ } \ } \ if (WhichJamo) \ { \ return (WhichJamo); \ } \ if (WhichPunct1) \ { \ return (WhichPunct1); \ } \ if (WhichPunct2) \ { \ return (WhichPunct2); \ } \ \ return (CSTR_EQUAL); \ } //////////////////////////////////////////////////////////////////////////// // // GET_FAREAST_WEIGHT // // Returns the weight for the far east special case in "wt". This currently // includes the Cho-on, the Repeat, and the Kana characters. // // 08-19-93 JulieB Created. //////////////////////////////////////////////////////////////////////////// #define GET_FAREAST_WEIGHT( wt, \ uw, \ mask, \ pBegin, \ pCur, \ ExtraWt, \ fModify ) \ { \ int ct; /* loop counter */ \ BYTE PrevSM; /* previous script member value */ \ BYTE PrevAW; /* previous alphanumeric value */ \ BYTE PrevCW; /* previous case value */ \ BYTE AW; /* alphanumeric value */ \ BYTE CW; /* case value */ \ DWORD PrevWt; /* previous weight */ \ \ \ /* \ * Get the alphanumeric weight and the case weight of the \ * current code point. \ */ \ AW = GET_ALPHA_NUMERIC(&wt); \ CW = GET_CASE(&wt); \ ExtraWt = (DWORD)0; \ \ /* \ * Special case Repeat and Cho-On. \ * AW = 0 => Repeat \ * AW = 1 => Cho-On \ * AW = 2+ => Kana \ */ \ if (AW <= MAX_SPECIAL_AW) \ { \ /* \ * If the script member of the previous character is \ * invalid, then give the special character an \ * invalid weight (highest possible weight) so that it \ * will sort AFTER everything else. \ */ \ ct = 1; \ PrevWt = CMP_INVALID_FAREAST; \ while ((pCur - ct) >= pBegin) \ { \ PrevWt = GET_DWORD_WEIGHT(pHashN, *(pCur - ct)); \ PrevWt &= mask; \ PrevSM = GET_SCRIPT_MEMBER(&PrevWt); \ if (PrevSM < FAREAST_SPECIAL) \ { \ if (PrevSM == EXPANSION) \ { \ PrevWt = CMP_INVALID_FAREAST; \ } \ else \ { \ /* \ * UNSORTABLE or NONSPACE_MARK. \ * \ * Just ignore these, since we only care about the \ * previous UW value. \ */ \ PrevWt = CMP_INVALID_FAREAST; \ ct++; \ continue; \ } \ } \ else if (PrevSM == FAREAST_SPECIAL) \ { \ PrevAW = GET_ALPHA_NUMERIC(&PrevWt); \ if (PrevAW <= MAX_SPECIAL_AW) \ { \ /* \ * Handle case where two special chars follow \ * each other. Keep going back in the string. \ */ \ PrevWt = CMP_INVALID_FAREAST; \ ct++; \ continue; \ } \ \ UNICODE_WT(&PrevWt) = \ MAKE_UNICODE_WT(KANA, PrevAW, fModify); \ \ /* \ * Only build weights 4, 5, 6, and 7 if the \ * previous character is KANA. \ * \ * Always: \ * 4W = previous CW & ISOLATE_SMALL \ * 6W = previous CW & ISOLATE_KANA \ * \ */ \ PrevCW = GET_CASE(&PrevWt); \ GET_WT_FOUR(&ExtraWt) = PrevCW & ISOLATE_SMALL; \ GET_WT_SIX(&ExtraWt) = PrevCW & ISOLATE_KANA; \ \ if (AW == AW_REPEAT) \ { \ /* \ * Repeat: \ * UW = previous UW \ * 5W = WT_FIVE_REPEAT \ * 7W = previous CW & ISOLATE_WIDTH \ */ \ uw = UNICODE_WT(&PrevWt); \ GET_WT_FIVE(&ExtraWt) = WT_FIVE_REPEAT; \ GET_WT_SEVEN(&ExtraWt) = PrevCW & ISOLATE_WIDTH; \ } \ else \ { \ /* \ * Cho-On: \ * UW = previous UW & CHO_ON_UW_MASK \ * 5W = WT_FIVE_CHO_ON \ * 7W = current CW & ISOLATE_WIDTH \ */ \ uw = UNICODE_WT(&PrevWt) & CHO_ON_UW_MASK; \ GET_WT_FIVE(&ExtraWt) = WT_FIVE_CHO_ON; \ GET_WT_SEVEN(&ExtraWt) = CW & ISOLATE_WIDTH; \ } \ } \ else \ { \ uw = GET_UNICODE_MOD(&PrevWt, fModify); \ } \ \ break; \ } \ } \ else \ { \ /* \ * Kana: \ * SM = KANA \ * AW = current AW \ * 4W = current CW & ISOLATE_SMALL \ * 5W = WT_FIVE_KANA \ * 6W = current CW & ISOLATE_KANA \ * 7W = current CW & ISOLATE_WIDTH \ */ \ uw = MAKE_UNICODE_WT(KANA, AW, fModify); \ GET_WT_FOUR(&ExtraWt) = CW & ISOLATE_SMALL; \ GET_WT_FIVE(&ExtraWt) = WT_FIVE_KANA; \ GET_WT_SIX(&ExtraWt) = CW & ISOLATE_KANA; \ GET_WT_SEVEN(&ExtraWt) = CW & ISOLATE_WIDTH; \ } \ \ /* \ * Get the weight for the far east special case and store it in wt. \ */ \ if ((AW > MAX_SPECIAL_AW) || (PrevWt != CMP_INVALID_FAREAST)) \ { \ /* \ * Always: \ * DW = current DW \ * CW = minimum CW \ */ \ UNICODE_WT(&wt) = uw; \ CASE_WT(&wt) = MIN_CW; \ } \ else \ { \ uw = CMP_INVALID_UW; \ wt = CMP_INVALID_FAREAST; \ ExtraWt = 0; \ } \ } //-------------------------------------------------------------------------// // API ROUTINES // //-------------------------------------------------------------------------// //////////////////////////////////////////////////////////////////////////// // // CompareStringW // // Compares two wide character strings of the same locale according to the // supplied locale handle. // // 05-31-91 JulieB Created. //////////////////////////////////////////////////////////////////////////// int WINAPI CompareStringW( LCID Locale, DWORD dwCmpFlags, LPCWSTR lpString1, int cchCount1, LPCWSTR lpString2, int cchCount2) { register LPWSTR pString1; // ptr to go thru string 1 register LPWSTR pString2; // ptr to go thru string 2 PLOC_HASH pHashN; // ptr to LOC hash node BOOL fIgnorePunct; // flag to ignore punctuation (not symbol) BOOL fModify; // flag to use modified script member weights DWORD State; // state table DWORD Mask; // mask for weights DWORD Weight1; // full weight of char - string 1 DWORD Weight2; // full weight of char - string 2 int JamoFlag = FALSE; LPCWSTR pLastJamo = lpString1; int WhichDiacritic; // DW => 1 = str1 smaller, 3 = str2 smaller int WhichCase; // CW => 1 = str1 smaller, 3 = str2 smaller int WhichJamo; // XW for Jamo int WhichPunct1; // SW => 1 = str1 smaller, 3 = str2 smaller int WhichPunct2; // SW => 1 = str1 smaller, 3 = str2 smaller LPWSTR pSave1; // ptr to saved pString1 LPWSTR pSave2; // ptr to saved pString2 int cExpChar1, cExpChar2; // ct of expansions in tmp DWORD ExtraWt1, ExtraWt2; // extra weight values (for far east) DWORD WhichExtra; // XW => wts 4, 5, 6, 7 (for far east) // // Invalid Parameter Check: // - validate LCID // - either string is null // VALIDATE_LANGUAGE(Locale, pHashN, 0, TRUE); if ((pHashN == NULL) || (lpString1 == NULL) || (lpString2 == NULL)) { SetLastError(ERROR_INVALID_PARAMETER); return (0); } // // Make sure the appropriate sorting tables are available. If not, // return an error. // if ((pHashN->pSortkey == NULL) || (pHashN->IfIdeographFailure == TRUE)) { KdPrint(("NLSAPI: Appropriate Sorting Tables Not Loaded.\n")); SetLastError(ERROR_FILE_NOT_FOUND); return (0); } // // Call longer compare string if any of the following is true: // - compression locale // - either count is not -1 // - dwCmpFlags is not 0 or ignore case (see NOTE below) // - locale is Korean - script member weight adjustment needed // // NOTE: If the value of NORM_IGNORECASE ever changes, this // code should check for: // ( (dwCmpFlags != 0) && (dwCmpFlags != NORM_IGNORECASE) ) // Since NORM_IGNORECASE is equal to 1, we can optimize this // by checking for > 1. // dwCmpFlags &= (~LOCALE_USE_CP_ACP); fModify = IS_KOREAN(Locale); if ( (pHashN->IfCompression) || (cchCount1 > -1) || (cchCount2 > -1) || (dwCmpFlags > NORM_IGNORECASE) || (fModify == TRUE) ) { return (LongCompareStringW( pHashN, dwCmpFlags, lpString1, ((cchCount1 <= -1) ? -2 : cchCount1), lpString2, ((cchCount2 <= -1) ? -2 : cchCount2), fModify )); } // // Initialize string pointers. // pString1 = (LPWSTR)lpString1; pString2 = (LPWSTR)lpString2; // // Do a wchar by wchar compare. // while (TRUE) { // // See if characters are equal. // If characters are equal, increment pointers and continue // string compare. // // NOTE: Loop is unrolled 8 times for performance. // if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++; if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++; if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++; if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++; if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++; if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++; if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++; if ((*pString1 != *pString2) || (*pString1 == 0)) { break; } pString1++; pString2++; } // // If strings are both at null terminators, return equal. // if (*pString1 == *pString2) { return (CSTR_EQUAL); } // // Initialize flags, pointers, and counters. // fIgnorePunct = FALSE; WhichDiacritic = 0; WhichCase = 0; WhichJamo = 0; WhichPunct1 = 0; WhichPunct2 = 0; pSave1 = NULL; pSave2 = NULL; ExtraWt1 = (DWORD)0; WhichExtra = (DWORD)0; // // Switch on the different flag options. This will speed up // the comparisons of two strings that are different. // // The only two possibilities in this optimized section are // no flags and the ignore case flag. // if (dwCmpFlags == 0) { Mask = CMP_MASKOFF_NONE; } else { Mask = CMP_MASKOFF_CW; } State = (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW; State |= (STATE_CW | STATE_JAMO_WEIGHT); // // Compare each character's sortkey weight in the two strings. // while ((*pString1 != 0) && (*pString2 != 0)) { Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1); Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2); Weight1 &= Mask; Weight2 &= Mask; if (Weight1 != Weight2) { BYTE sm1 = GET_SCRIPT_MEMBER(&Weight1); // script member 1 BYTE sm2 = GET_SCRIPT_MEMBER(&Weight2); // script member 2 WORD uw1 = GET_UNICODE_SM(&Weight1, sm1); // unicode weight 1 WORD uw2 = GET_UNICODE_SM(&Weight2, sm2); // unicode weight 2 BYTE dw1; // diacritic weight 1 BYTE dw2; // diacritic weight 2 BOOL fContinue; // flag to continue loop DWORD Wt; // temp weight holder WCHAR pTmpBuf1[MAX_TBL_EXPANSION]; // temp buffer for exp 1 WCHAR pTmpBuf2[MAX_TBL_EXPANSION]; // temp buffer for exp 2 // // If Unicode Weights are different and no special cases, // then we're done. Otherwise, we need to do extra checking. // // Must check ENTIRE string for any possibility of Unicode Weight // differences. As soon as a Unicode Weight difference is found, // then we're done. If no UW difference is found, then the // first Diacritic Weight difference is used. If no DW difference // is found, then use the first Case Difference. If no CW // difference is found, then use the first Extra Weight // difference. If no XW difference is found, then use the first // Special Weight difference. // if ((uw1 != uw2) || (sm1 == FAREAST_SPECIAL) || (sm1 == EXTENSION_A)) { // // Initialize the continue flag. // fContinue = FALSE; // // Check for Unsortable characters and skip them. // This needs to be outside the switch statement. If EITHER // character is unsortable, must skip it and start over. // if (sm1 == UNSORTABLE) { pString1++; fContinue = TRUE; } if (sm2 == UNSORTABLE) { pString2++; fContinue = TRUE; } if (fContinue) { continue; } // // Switch on the script member of string 1 and take care // of any special cases. // switch (sm1) { case ( NONSPACE_MARK ) : { // // Nonspace only - look at diacritic weight only. // if ((WhichDiacritic == 0) || (State & STATE_REVERSE_DW)) { WhichDiacritic = CSTR_GREATER_THAN; // // Remove state from state machine. // REMOVE_STATE(STATE_DW); } // // Adjust pointer and set flags. // pString1++; fContinue = TRUE; break; } case ( PUNCTUATION ) : { // // If the ignore punctuation flag is set, then skip // over the punctuation. // if (fIgnorePunct) { pString1++; fContinue = TRUE; } else if (sm2 != PUNCTUATION) { // // The character in the second string is // NOT punctuation. // if (WhichPunct2) { // // Set WP 2 to show that string 2 is smaller, // since a punctuation char had already been // found at an earlier position in string 2. // // Set the Ignore Punctuation flag so we just // skip over any other punctuation chars in // the string. // WhichPunct2 = CSTR_GREATER_THAN; fIgnorePunct = TRUE; } else { // // Set WP 1 to show that string 2 is smaller, // and that string 1 has had a punctuation // char - since no punctuation chars have // been found in string 2. // WhichPunct1 = CSTR_GREATER_THAN; } // // Advance pointer 1, and set flag to true. // pString1++; fContinue = TRUE; } // // Do NOT want to advance the pointer in string 1 if // string 2 is also a punctuation char. This will // be done later. // break; } case ( EXPANSION ) : { // // Save pointer in pString1 so that it can be // restored. // if (pSave1 == NULL) { pSave1 = pString1; } pString1 = pTmpBuf1; // // Expand character into temporary buffer. // pTmpBuf1[0] = GET_EXPANSION_1(&Weight1); pTmpBuf1[1] = GET_EXPANSION_2(&Weight1); // // Set cExpChar1 to the number of expansion characters // stored. // cExpChar1 = MAX_TBL_EXPANSION; fContinue = TRUE; break; } case ( FAREAST_SPECIAL ) : { if (sm2 != EXPANSION) { // // Get the weight for the far east special case // and store it in Weight1. // GET_FAREAST_WEIGHT( Weight1, uw1, Mask, lpString1, pString1, ExtraWt1, FALSE ); if (sm2 != FAREAST_SPECIAL) { // // The character in the second string is // NOT a fareast special char. // // Set each of weights 4, 5, 6, and 7 to show // that string 2 is smaller (if not already set). // if ((GET_WT_FOUR(&WhichExtra) == 0) && (GET_WT_FOUR(&ExtraWt1) != 0)) { GET_WT_FOUR(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_FIVE(&WhichExtra) == 0) && (GET_WT_FIVE(&ExtraWt1) != 0)) { GET_WT_FIVE(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_SIX(&WhichExtra) == 0) && (GET_WT_SIX(&ExtraWt1) != 0)) { GET_WT_SIX(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_SEVEN(&WhichExtra) == 0) && (GET_WT_SEVEN(&ExtraWt1) != 0)) { GET_WT_SEVEN(&WhichExtra) = CSTR_GREATER_THAN; } } } break; } case ( JAMO_SPECIAL ) : { int ctr1; // dummy variables for FindJamoDifference LPWSTR pStr1 = pString1; LPWSTR pStr2 = pString2; // // Set the JamoFlag so we don't handle it again. // JamoFlag = TRUE; fContinue = FindJamoDifference( pHashN, &pStr1, &ctr1, -2, &Weight1, &pStr2, &ctr1, -2, &Weight2, &pLastJamo, &uw1, &uw2, &State, &WhichJamo, fModify ); if (WhichJamo) { return (WhichJamo); } pString1 = pStr1; pString2 = pStr2; break; } case ( EXTENSION_A ) : { // // Compare the weights. // if (Weight1 == Weight2) { // // Adjust pointers and set flag. // pString1++; pString2++; fContinue = TRUE; } else { // // Get the actual UW to compare. // if (sm2 == EXTENSION_A) { // // Set the UW values to be the AW and DW since // both strings contain an extension A char. // uw1 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight1), GET_DIACRITIC(&Weight1), FALSE ); uw2 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight2), GET_DIACRITIC(&Weight2), FALSE ); } else { // // Only string1 contains an extension A char, // so set the UW value to be the first UW // value for extension A (default values): // SM_EXT_A, AW_EXT_A // uw1 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify); } } break; } case ( UNSORTABLE ) : { // // Fill out the case statement so the compiler // will use a jump table. // break; } } // // Switch on the script member of string 2 and take care // of any special cases. // switch (sm2) { case ( NONSPACE_MARK ) : { // // Nonspace only - look at diacritic weight only. // if ((WhichDiacritic == 0) || (State & STATE_REVERSE_DW)) { WhichDiacritic = CSTR_LESS_THAN; // // Remove state from state machine. // REMOVE_STATE(STATE_DW); } // // Adjust pointer and set flags. // pString2++; fContinue = TRUE; break; } case ( PUNCTUATION ) : { // // If the ignore punctuation flag is set, then skip // over the punctuation. // if (fIgnorePunct) { // // Pointer 2 will be advanced after if-else // statement. // ; } else if (sm1 != PUNCTUATION) { // // The character in the first string is // NOT punctuation. // if (WhichPunct1) { // // Set WP 1 to show that string 1 is smaller, // since a punctuation char had already // been found at an earlier position in // string 1. // // Set the Ignore Punctuation flag so we just // skip over any other punctuation in the // string. // WhichPunct1 = CSTR_LESS_THAN; fIgnorePunct = TRUE; } else { // // Set WP 2 to show that string 1 is smaller, // and that string 2 has had a punctuation // char - since no punctuation chars have // been found in string 1. // WhichPunct2 = CSTR_LESS_THAN; } // // Pointer 2 will be advanced after if-else // statement. // } else { // // Both code points are punctuation. // // See if either of the strings has encountered // punctuation chars previous to this. // if (WhichPunct1) { // // String 1 has had a punctuation char, so // it should be the smaller string (since // both have punctuation chars). // WhichPunct1 = CSTR_LESS_THAN; } else if (WhichPunct2) { // // String 2 has had a punctuation char, so // it should be the smaller string (since // both have punctuation chars). // WhichPunct2 = CSTR_GREATER_THAN; } else { // // Position is the same, so compare the // special weights. Set WhichPunct1 to // the smaller special weight. // WhichPunct1 = (((GET_ALPHA_NUMERIC(&Weight1) < GET_ALPHA_NUMERIC(&Weight2))) ? CSTR_LESS_THAN : CSTR_GREATER_THAN); } // // Set the Ignore Punctuation flag so we just // skip over any other punctuation in the string. // fIgnorePunct = TRUE; // // Advance pointer 1. Pointer 2 will be // advanced after if-else statement. // pString1++; } // // Advance pointer 2 and set flag to true. // pString2++; fContinue = TRUE; break; } case ( EXPANSION ) : { // // Save pointer in pString1 so that it can be // restored. // if (pSave2 == NULL) { pSave2 = pString2; } pString2 = pTmpBuf2; // // Expand character into temporary buffer. // pTmpBuf2[0] = GET_EXPANSION_1(&Weight2); pTmpBuf2[1] = GET_EXPANSION_2(&Weight2); // // Set cExpChar2 to the number of expansion characters // stored. // cExpChar2 = MAX_TBL_EXPANSION; fContinue = TRUE; break; } case ( FAREAST_SPECIAL ) : { if (sm1 != EXPANSION) { // // Get the weight for the far east special case // and store it in Weight2. // GET_FAREAST_WEIGHT( Weight2, uw2, Mask, lpString2, pString2, ExtraWt2, FALSE ); if (sm1 != FAREAST_SPECIAL) { // // The character in the first string is // NOT a fareast special char. // // Set each of weights 4, 5, 6, and 7 to show // that string 1 is smaller (if not already set). // if ((GET_WT_FOUR(&WhichExtra) == 0) && (GET_WT_FOUR(&ExtraWt2) != 0)) { GET_WT_FOUR(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_FIVE(&WhichExtra) == 0) && (GET_WT_FIVE(&ExtraWt2) != 0)) { GET_WT_FIVE(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_SIX(&WhichExtra) == 0) && (GET_WT_SIX(&ExtraWt2) != 0)) { GET_WT_SIX(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_SEVEN(&WhichExtra) == 0) && (GET_WT_SEVEN(&ExtraWt2) != 0)) { GET_WT_SEVEN(&WhichExtra) = CSTR_LESS_THAN; } } else { // // Characters in both strings are fareast // special chars. // // Set each of weights 4, 5, 6, and 7 // appropriately (if not already set). // if ( (GET_WT_FOUR(&WhichExtra) == 0) && ( GET_WT_FOUR(&ExtraWt1) != GET_WT_FOUR(&ExtraWt2) ) ) { GET_WT_FOUR(&WhichExtra) = ( GET_WT_FOUR(&ExtraWt1) < GET_WT_FOUR(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_FIVE(&WhichExtra) == 0) && ( GET_WT_FIVE(&ExtraWt1) != GET_WT_FIVE(&ExtraWt2) ) ) { GET_WT_FIVE(&WhichExtra) = ( GET_WT_FIVE(&ExtraWt1) < GET_WT_FIVE(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_SIX(&WhichExtra) == 0) && ( GET_WT_SIX(&ExtraWt1) != GET_WT_SIX(&ExtraWt2) ) ) { GET_WT_SIX(&WhichExtra) = ( GET_WT_SIX(&ExtraWt1) < GET_WT_SIX(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_SEVEN(&WhichExtra) == 0) && ( GET_WT_SEVEN(&ExtraWt1) != GET_WT_SEVEN(&ExtraWt2) ) ) { GET_WT_SEVEN(&WhichExtra) = ( GET_WT_SEVEN(&ExtraWt1) < GET_WT_SEVEN(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } } } break; } case ( JAMO_SPECIAL ) : { if (!JamoFlag) { int ctr1, ctr2; // dummy variables for FindJamoDifference LPWSTR pStr1 = pString1; LPWSTR pStr2 = pString2; // // Set the JamoFlag so we don't handle it again. // JamoFlag = TRUE; fContinue = FindJamoDifference( pHashN, &pStr1, &ctr1, -2, &Weight1, &pStr2, &ctr2, -2, &Weight2, &pLastJamo, &uw1, &uw2, &State, &WhichJamo, fModify ); if (WhichJamo) { return (WhichJamo); } pString1 = pStr1; pString2 = pStr2; } else { JamoFlag = FALSE; } break; } case ( EXTENSION_A ) : { // // If sm1 is an extension A character, then // both sm1 and sm2 have been handled. We should // only get here when either sm1 is not an // extension A character or the two extension A // characters are different. // if (sm1 != EXTENSION_A) { // // Get the actual UW to compare. // // Only string2 contains an extension A char, // so set the UW value to be the first UW // value for extension A (default values): // SM_EXT_A, AW_EXT_A // uw2 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify); } // // We should then fall through to the comparison // of the Unicode weights. // break; } case ( UNSORTABLE ) : { // // Fill out the case statement so the compiler // will use a jump table. // break; } } // // See if the comparison should start again. // if (fContinue) { continue; } // // We're not supposed to drop down into the state table if // unicode weights are different, so stop comparison and // return result of unicode weight comparison. // if (uw1 != uw2) { return ((uw1 < uw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN); } } // // For each state in the state table, do the appropriate // comparisons. (UW1 == UW2) // if (State & (STATE_DW | STATE_REVERSE_DW)) { // // Get the diacritic weights. // dw1 = GET_DIACRITIC(&Weight1); dw2 = GET_DIACRITIC(&Weight2); if (dw1 != dw2) { // // Look ahead to see if diacritic follows a // minimum diacritic weight. If so, get the // diacritic weight of the nonspace mark. // while (*(pString1 + 1) != 0) { Wt = GET_DWORD_WEIGHT(pHashN, *(pString1 + 1)); if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK) { dw1 += GET_DIACRITIC(&Wt); pString1++; } else { break; } } while (*(pString2 + 1) != 0) { Wt = GET_DWORD_WEIGHT(pHashN, *(pString2 + 1)); if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK) { dw2 += GET_DIACRITIC(&Wt); pString2++; } else { break; } } // // Save which string has the smaller diacritic // weight if the diacritic weights are still // different. // if (dw1 != dw2) { WhichDiacritic = (dw1 < dw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; // // Remove state from state machine. // REMOVE_STATE(STATE_DW); } } } if (State & STATE_CW) { // // Get the case weights. // if (GET_CASE(&Weight1) != GET_CASE(&Weight2)) { // // Save which string has the smaller case weight. // WhichCase = (GET_CASE(&Weight1) < GET_CASE(&Weight2)) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; // // Remove state from state machine. // REMOVE_STATE(STATE_CW); } } } // // Fixup the pointers. // POINTER_FIXUP(); } // // If the end of BOTH strings has been reached, then the unicode // weights match exactly. Check the diacritic, case and special // weights. If all are zero, then return success. Otherwise, // return the result of the weight difference. // // NOTE: The following checks MUST REMAIN IN THIS ORDER: // Diacritic, Case, Punctuation. // if (*pString1 == 0) { if (*pString2 == 0) { if (WhichDiacritic) { return (WhichDiacritic); } if (WhichCase) { return (WhichCase); } if (WhichExtra) { if (GET_WT_FOUR(&WhichExtra)) { return (GET_WT_FOUR(&WhichExtra)); } if (GET_WT_FIVE(&WhichExtra)) { return (GET_WT_FIVE(&WhichExtra)); } if (GET_WT_SIX(&WhichExtra)) { return (GET_WT_SIX(&WhichExtra)); } if (GET_WT_SEVEN(&WhichExtra)) { return (GET_WT_SEVEN(&WhichExtra)); } } if (WhichPunct1) { return (WhichPunct1); } if (WhichPunct2) { return (WhichPunct2); } return (CSTR_EQUAL); } else { // // String 2 is longer. // pString1 = pString2; } } // // Scan to the end of the longer string. // QUICK_SCAN_LONGER_STRING( pString1, ((*pString2 == 0) ? CSTR_GREATER_THAN : CSTR_LESS_THAN) ); } //////////////////////////////////////////////////////////////////////////// // // GetStringTypeExW // // Returns character type information about a particular Unicode string. // // 01-18-94 JulieB Created. //////////////////////////////////////////////////////////////////////////// BOOL WINAPI GetStringTypeExW( LCID Locale, DWORD dwInfoType, LPCWSTR lpSrcStr, int cchSrc, LPWORD lpCharType) { PLOC_HASH pHashN; // ptr to LOC hash node // // Invalid Parameter Check: // - Validate LCID // VALIDATE_LOCALE(Locale, pHashN, FALSE); if (pHashN == NULL) { SetLastError(ERROR_INVALID_PARAMETER); return (0); } // // Return the result of GetStringTypeW. // return (GetStringTypeW( dwInfoType, lpSrcStr, cchSrc, lpCharType )); } //////////////////////////////////////////////////////////////////////////// // // GetStringTypeW // // Returns character type information about a particular Unicode string. // // NOTE: The number of parameters is different from GetStringTypeA. // The 16-bit OLE product shipped GetStringTypeA with the wrong // parameters (ported from Chicago) and now we must support it. // // Use GetStringTypeEx to get the same set of parameters between // the A and W version. // // 05-31-91 JulieB Created. //////////////////////////////////////////////////////////////////////////// BOOL WINAPI GetStringTypeW( DWORD dwInfoType, LPCWSTR lpSrcStr, int cchSrc, LPWORD lpCharType) { int Ctr; // loop counter // // Invalid Parameter Check: // - lpSrcStr NULL // - cchSrc is 0 // - lpCharType NULL // - same buffer - src and destination // - (flags will be checked in switch statement below) // if ( (lpSrcStr == NULL) || (cchSrc == 0) || (lpCharType == NULL) || (lpSrcStr == lpCharType) ) { SetLastError(ERROR_INVALID_PARAMETER); return (FALSE); } // // If cchSrc is -1, then the source string is null terminated and we // need to get the length of the source string. Add one to the // length to include the null termination. // (This will always be at least 1.) // if (cchSrc <= -1) { cchSrc = NlsStrLenW(lpSrcStr) + 1; } // // Make sure the ctype table is mapped in. // if (GetCTypeFileInfo()) { SetLastError(ERROR_FILE_NOT_FOUND); return (FALSE); } // // Return the appropriate information in the lpCharType parameter // based on the dwInfoType parameter. // switch (dwInfoType) { case ( CT_CTYPE1 ) : { // // Return the ctype 1 information for the string. // for (Ctr = 0; Ctr < cchSrc; Ctr++) { lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType1); } break; } case ( CT_CTYPE2 ) : { // // Return the ctype 2 information. // for (Ctr = 0; Ctr < cchSrc; Ctr++) { lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType2); } break; } case ( CT_CTYPE3 ) : { // // Return the ctype 3 information. // for (Ctr = 0; Ctr < cchSrc; Ctr++) { lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType3); } break; } default : { // // Invalid flag parameter, so return failure. // SetLastError(ERROR_INVALID_FLAGS); return (FALSE); } } // // Return success. // return (TRUE); } //-------------------------------------------------------------------------// // INTERNAL ROUTINES // //-------------------------------------------------------------------------// //////////////////////////////////////////////////////////////////////////// // // LongCompareStringW // // Compares two wide character strings of the same locale according to the // supplied locale handle. // // 05-31-91 JulieB Created. //////////////////////////////////////////////////////////////////////////// int LongCompareStringW( PLOC_HASH pHashN, DWORD dwCmpFlags, LPCWSTR lpString1, int cchCount1, LPCWSTR lpString2, int cchCount2, BOOL fModify) { int ctr1 = cchCount1; // loop counter for string 1 int ctr2 = cchCount2; // loop counter for string 2 register LPWSTR pString1; // ptr to go thru string 1 register LPWSTR pString2; // ptr to go thru string 2 BOOL IfCompress; // if compression in locale BOOL IfDblCompress1; // if double compression in string 1 BOOL IfDblCompress2; // if double compression in string 2 BOOL fEnd1; // if at end of string 1 BOOL fIgnorePunct; // flag to ignore punctuation (not symbol) BOOL fIgnoreDiacritic; // flag to ignore diacritics BOOL fIgnoreSymbol; // flag to ignore symbols BOOL fStringSort; // flag to use string sort DWORD State; // state table DWORD Mask; // mask for weights DWORD Weight1; // full weight of char - string 1 DWORD Weight2; // full weight of char - string 2 int JamoFlag = FALSE; LPCWSTR pLastJamo = lpString1; int WhichDiacritic; // DW => 1 = str1 smaller, 3 = str2 smaller int WhichCase; // CW => 1 = str1 smaller, 3 = str2 smaller int WhichJamo; // XW for Jamo int WhichPunct1; // SW => 1 = str1 smaller, 3 = str2 smaller int WhichPunct2; // SW => 1 = str1 smaller, 3 = str2 smaller LPWSTR pSave1; // ptr to saved pString1 LPWSTR pSave2; // ptr to saved pString2 int cExpChar1, cExpChar2; // ct of expansions in tmp DWORD ExtraWt1, ExtraWt2; // extra weight values (for far east) DWORD WhichExtra; // XW => wts 4, 5, 6, 7 (for far east) // // Initialize string pointers. // pString1 = (LPWSTR)lpString1; pString2 = (LPWSTR)lpString2; // // Invalid Flags Check: // - invalid flags // if (dwCmpFlags & CS_INVALID_FLAG) { SetLastError(ERROR_INVALID_FLAGS); return (0); } // // See if we should stop on the null terminator regardless of the // count values. The original count values are stored in ctr1 and ctr2 // above, so it's ok to set these here. // if (dwCmpFlags & NORM_STOP_ON_NULL) { cchCount1 = cchCount2 = -2; } // // Check if compression in the given locale. If not, then // try a wchar by wchar compare. If strings are equal, this // will be quick. // if ((IfCompress = pHashN->IfCompression) == FALSE) { // // Compare each wide character in the two strings. // while ( NOT_END_STRING(ctr1, pString1, cchCount1) && NOT_END_STRING(ctr2, pString2, cchCount2) ) { // // See if characters are equal. // if (*pString1 == *pString2) { // // Characters are equal, so increment pointers, // decrement counters, and continue string compare. // pString1++; pString2++; ctr1--; ctr2--; } else { // // Difference was found. Fall into the sortkey // check below. // break; } } // // If the end of BOTH strings has been reached, then the strings // match exactly. Return success. // if ( AT_STRING_END(ctr1, pString1, cchCount1) && AT_STRING_END(ctr2, pString2, cchCount2) ) { return (CSTR_EQUAL); } } // // Initialize flags, pointers, and counters. // fIgnorePunct = dwCmpFlags & NORM_IGNORESYMBOLS; fIgnoreDiacritic = dwCmpFlags & NORM_IGNORENONSPACE; fIgnoreSymbol = fIgnorePunct; fStringSort = dwCmpFlags & SORT_STRINGSORT; WhichDiacritic = 0; WhichCase = 0; WhichJamo = 0; WhichPunct1 = 0; WhichPunct2 = 0; pSave1 = NULL; pSave2 = NULL; ExtraWt1 = (DWORD)0; WhichExtra = (DWORD)0; // // Set the weights to be invalid. This flags whether or not to // recompute the weights next time through the loop. It also flags // whether or not to start over (continue) in the loop. // Weight1 = CMP_INVALID_WEIGHT; Weight2 = CMP_INVALID_WEIGHT; // // Switch on the different flag options. This will speed up // the comparisons of two strings that are different. // State = STATE_CW | STATE_JAMO_WEIGHT; switch (dwCmpFlags & (NORM_IGNORECASE | NORM_IGNORENONSPACE)) { case ( 0 ) : { Mask = CMP_MASKOFF_NONE; State |= (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW; break; } case ( NORM_IGNORECASE ) : { Mask = CMP_MASKOFF_CW; State |= (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW; break; } case ( NORM_IGNORENONSPACE ) : { Mask = CMP_MASKOFF_DW; break; } case ( NORM_IGNORECASE | NORM_IGNORENONSPACE ) : { Mask = CMP_MASKOFF_DW_CW; break; } } switch (dwCmpFlags & (NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH)) { case ( 0 ) : { break; } case ( NORM_IGNOREKANATYPE ) : { Mask &= CMP_MASKOFF_KANA; break; } case ( NORM_IGNOREWIDTH ) : { Mask &= CMP_MASKOFF_WIDTH; if (dwCmpFlags & NORM_IGNORECASE) { REMOVE_STATE(STATE_CW); } break; } case ( NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ) : { Mask &= CMP_MASKOFF_KANA_WIDTH; if (dwCmpFlags & NORM_IGNORECASE) { REMOVE_STATE(STATE_CW); } break; } } // // Compare each character's sortkey weight in the two strings. // while ( NOT_END_STRING(ctr1, pString1, cchCount1) && NOT_END_STRING(ctr2, pString2, cchCount2) ) { if (Weight1 == CMP_INVALID_WEIGHT) { Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1); Weight1 &= Mask; } if (Weight2 == CMP_INVALID_WEIGHT) { Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2); Weight2 &= Mask; } // // If compression locale, then need to check for compression // characters even if the weights are equal. If it's not a // compression locale, then we don't need to check anything // if the weights are equal. // if ( (IfCompress) && (GET_COMPRESSION(&Weight1) || GET_COMPRESSION(&Weight2)) ) { int ctr; // loop counter PCOMPRESS_3 pComp3; // ptr to compress 3 table PCOMPRESS_2 pComp2; // ptr to compress 2 table int If1; // if compression found in string 1 int If2; // if compression found in string 2 int CompVal; // compression value int IfEnd1; // if exists 1 more char in string 1 int IfEnd2; // if exists 1 more char in string 2 // // Check for compression in the weights. // If1 = GET_COMPRESSION(&Weight1); If2 = GET_COMPRESSION(&Weight2); CompVal = ((If1 > If2) ? If1 : If2); IfEnd1 = AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1); IfEnd2 = AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2); if (pHashN->IfDblCompression == FALSE) { // // NO double compression, so don't check for it. // switch (CompVal) { // // Check for 3 characters compressing to 1. // case ( COMPRESS_3_MASK ) : { // // Check character in string 1 and string 2. // if ( ((If1) && (!IfEnd1) && !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1)) || ((If2) && (!IfEnd2) && !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2)) ) { ctr = pHashN->pCompHdr->Num3; pComp3 = pHashN->pCompress3; for (; ctr > 0; ctr--, pComp3++) { // // Check character in string 1. // if ( (If1) && (!IfEnd1) && !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1) && (pComp3->UCP1 == *pString1) && (pComp3->UCP2 == *(pString1 + 1)) && (pComp3->UCP3 == *(pString1 + 2)) ) { // // Found compression for string 1. // Get new weight and mask it. // Increment pointer and decrement counter. // Weight1 = MAKE_SORTKEY_DWORD(pComp3->Weights); Weight1 &= Mask; pString1 += 2; ctr1 -= 2; // // Set boolean for string 1 - search is // complete. // If1 = 0; // // Break out of loop if both searches are // done. // if (If2 == 0) { break; } } // // Check character in string 2. // if ( (If2) && (!IfEnd2) && !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2) && (pComp3->UCP1 == *pString2) && (pComp3->UCP2 == *(pString2 + 1)) && (pComp3->UCP3 == *(pString2 + 2)) ) { // // Found compression for string 2. // Get new weight and mask it. // Increment pointer and decrement counter. // Weight2 = MAKE_SORTKEY_DWORD(pComp3->Weights); Weight2 &= Mask; pString2 += 2; ctr2 -= 2; // // Set boolean for string 2 - search is // complete. // If2 = 0; // // Break out of loop if both searches are // done. // if (If1 == 0) { break; } } } if (ctr > 0) { break; } } // // Fall through if not found. // } // // Check for 2 characters compressing to 1. // case ( COMPRESS_2_MASK ) : { // // Check character in string 1 and string 2. // if ( ((If1) && (!IfEnd1)) || ((If2) && (!IfEnd2)) ) { ctr = pHashN->pCompHdr->Num2; pComp2 = pHashN->pCompress2; for (; ((ctr > 0) && (If1 || If2)); ctr--, pComp2++) { // // Check character in string 1. // if ( (If1) && (!IfEnd1) && (pComp2->UCP1 == *pString1) && (pComp2->UCP2 == *(pString1 + 1)) ) { // // Found compression for string 1. // Get new weight and mask it. // Increment pointer and decrement counter. // Weight1 = MAKE_SORTKEY_DWORD(pComp2->Weights); Weight1 &= Mask; pString1++; ctr1--; // // Set boolean for string 1 - search is // complete. // If1 = 0; // // Break out of loop if both searches are // done. // if (If2 == 0) { break; } } // // Check character in string 2. // if ( (If2) && (!IfEnd2) && (pComp2->UCP1 == *pString2) && (pComp2->UCP2 == *(pString2 + 1)) ) { // // Found compression for string 2. // Get new weight and mask it. // Increment pointer and decrement counter. // Weight2 = MAKE_SORTKEY_DWORD(pComp2->Weights); Weight2 &= Mask; pString2++; ctr2--; // // Set boolean for string 2 - search is // complete. // If2 = 0; // // Break out of loop if both searches are // done. // if (If1 == 0) { break; } } } if (ctr > 0) { break; } } } } } else if (!IfEnd1 && !IfEnd2) { // // Double Compression exists, so must check for it. // if (IfDblCompress1 = ((GET_DWORD_WEIGHT(pHashN, *pString1) & CMP_MASKOFF_CW) == (GET_DWORD_WEIGHT(pHashN, *(pString1 + 1)) & CMP_MASKOFF_CW))) { // // Advance past the first code point to get to the // compression character. // pString1++; ctr1--; IfEnd1 = AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1); } if (IfDblCompress2 = ((GET_DWORD_WEIGHT(pHashN, *pString2) & CMP_MASKOFF_CW) == (GET_DWORD_WEIGHT(pHashN, *(pString2 + 1)) & CMP_MASKOFF_CW))) { // // Advance past the first code point to get to the // compression character. // pString2++; ctr2--; IfEnd2 = AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2); } switch (CompVal) { // // Check for 3 characters compressing to 1. // case ( COMPRESS_3_MASK ) : { // // Check character in string 1. // if ( (If1) && (!IfEnd1) && !AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1) ) { ctr = pHashN->pCompHdr->Num3; pComp3 = pHashN->pCompress3; for (; ctr > 0; ctr--, pComp3++) { // // Check character in string 1. // if ( (pComp3->UCP1 == *pString1) && (pComp3->UCP2 == *(pString1 + 1)) && (pComp3->UCP3 == *(pString1 + 2)) ) { // // Found compression for string 1. // Get new weight and mask it. // Increment pointer and decrement counter. // Weight1 = MAKE_SORTKEY_DWORD(pComp3->Weights); Weight1 &= Mask; if (!IfDblCompress1) { pString1 += 2; ctr1 -= 2; } // // Set boolean for string 1 - search is // complete. // If1 = 0; break; } } } // // Check character in string 2. // if ( (If2) && (!IfEnd2) && !AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2) ) { ctr = pHashN->pCompHdr->Num3; pComp3 = pHashN->pCompress3; for (; ctr > 0; ctr--, pComp3++) { // // Check character in string 2. // if ( (pComp3->UCP1 == *pString2) && (pComp3->UCP2 == *(pString2 + 1)) && (pComp3->UCP3 == *(pString2 + 2)) ) { // // Found compression for string 2. // Get new weight and mask it. // Increment pointer and decrement counter. // Weight2 = MAKE_SORTKEY_DWORD(pComp3->Weights); Weight2 &= Mask; if (!IfDblCompress2) { pString2 += 2; ctr2 -= 2; } // // Set boolean for string 2 - search is // complete. // If2 = 0; break; } } } // // Fall through if not found. // if ((If1 == 0) && (If2 == 0)) { break; } } // // Check for 2 characters compressing to 1. // case ( COMPRESS_2_MASK ) : { // // Check character in string 1. // if ((If1) && (!IfEnd1)) { ctr = pHashN->pCompHdr->Num2; pComp2 = pHashN->pCompress2; for (; ctr > 0; ctr--, pComp2++) { // // Check character in string 1. // if ((pComp2->UCP1 == *pString1) && (pComp2->UCP2 == *(pString1 + 1))) { // // Found compression for string 1. // Get new weight and mask it. // Increment pointer and decrement counter. // Weight1 = MAKE_SORTKEY_DWORD(pComp2->Weights); Weight1 &= Mask; if (!IfDblCompress1) { pString1++; ctr1--; } // // Set boolean for string 1 - search is // complete. // If1 = 0; break; } } } // // Check character in string 2. // if ((If2) && (!IfEnd2)) { ctr = pHashN->pCompHdr->Num2; pComp2 = pHashN->pCompress2; for (; ctr > 0; ctr--, pComp2++) { // // Check character in string 2. // if ((pComp2->UCP1 == *pString2) && (pComp2->UCP2 == *(pString2 + 1))) { // // Found compression for string 2. // Get new weight and mask it. // Increment pointer and decrement counter. // Weight2 = MAKE_SORTKEY_DWORD(pComp2->Weights); Weight2 &= Mask; if (!IfDblCompress2) { pString2++; ctr2--; } // // Set boolean for string 2 - search is // complete. // If2 = 0; break; } } } } } // // Reset the pointer back to the beginning of the double // compression. Pointer fixup at the end will advance // them correctly. // // If double compression, we advanced the pointer at // the beginning of the switch statement. If double // compression character was actually found, the pointer // was NOT advanced. We now want to decrement the pointer // to put it back to where it was. // // The next time through, the pointer will be pointing to // the regular compression part of the string. // if (IfDblCompress1) { pString1--; ctr1++; } if (IfDblCompress2) { pString2--; ctr2++; } } } // // Check the weights again. // if ((Weight1 != Weight2) || (GET_SCRIPT_MEMBER(&Weight1) == EXTENSION_A)) { // // Weights are still not equal, even after compression // check, so compare the different weights. // BYTE sm1 = GET_SCRIPT_MEMBER(&Weight1); // script member 1 BYTE sm2 = GET_SCRIPT_MEMBER(&Weight2); // script member 2 WORD uw1 = GET_UNICODE_SM_MOD(&Weight1, sm1, fModify); // unicode weight 1 WORD uw2 = GET_UNICODE_SM_MOD(&Weight2, sm2, fModify); // unicode weight 2 BYTE dw1; // diacritic weight 1 BYTE dw2; // diacritic weight 2 DWORD Wt; // temp weight holder WCHAR pTmpBuf1[MAX_TBL_EXPANSION]; // temp buffer for exp 1 WCHAR pTmpBuf2[MAX_TBL_EXPANSION]; // temp buffer for exp 2 // // If Unicode Weights are different and no special cases, // then we're done. Otherwise, we need to do extra checking. // // Must check ENTIRE string for any possibility of Unicode Weight // differences. As soon as a Unicode Weight difference is found, // then we're done. If no UW difference is found, then the // first Diacritic Weight difference is used. If no DW difference // is found, then use the first Case Difference. If no CW // difference is found, then use the first Extra Weight // difference. If no XW difference is found, then use the first // Special Weight difference. // if ((uw1 != uw2) || ((sm1 <= SYMBOL_5) && (sm1 >= FAREAST_SPECIAL))) { // // Check for Unsortable characters and skip them. // This needs to be outside the switch statement. If EITHER // character is unsortable, must skip it and start over. // if (sm1 == UNSORTABLE) { pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; } if (sm2 == UNSORTABLE) { pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; } // // Check for Ignore Nonspace and Ignore Symbol. If // Ignore Nonspace is set and either character is a // nonspace mark only, then we need to advance the // pointer to skip over the character and continue. // If Ignore Symbol is set and either character is a // punctuation char, then we need to advance the // pointer to skip over the character and continue. // // This step is necessary so that a string with a // nonspace mark and a punctuation char following one // another are properly ignored when one or both of // the ignore flags is set. // if (fIgnoreDiacritic) { if (sm1 == NONSPACE_MARK) { pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; } if (sm2 == NONSPACE_MARK) { pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; } } if (fIgnoreSymbol) { if (sm1 == PUNCTUATION) { pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; } if (sm2 == PUNCTUATION) { pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; } } if ((Weight1 == CMP_INVALID_WEIGHT) || (Weight2 == CMP_INVALID_WEIGHT)) { continue; } // // Switch on the script member of string 1 and take care // of any special cases. // switch (sm1) { case ( NONSPACE_MARK ) : { // // Nonspace only - look at diacritic weight only. // if (!fIgnoreDiacritic) { if ((WhichDiacritic == 0) || (State & STATE_REVERSE_DW)) { WhichDiacritic = CSTR_GREATER_THAN; // // Remove state from state machine. // REMOVE_STATE(STATE_DW); } } // // Adjust pointer and counter and set flags. // pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; break; } case ( SYMBOL_1 ) : case ( SYMBOL_2 ) : case ( SYMBOL_3 ) : case ( SYMBOL_4 ) : case ( SYMBOL_5 ) : { // // If the ignore symbol flag is set, then skip over // the symbol. // if (fIgnoreSymbol) { pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; } break; } case ( PUNCTUATION ) : { // // If the ignore punctuation flag is set, then skip // over the punctuation char. // if (fIgnorePunct) { pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; } else if (!fStringSort) { // // Use WORD sort method. // if (sm2 != PUNCTUATION) { // // The character in the second string is // NOT punctuation. // if (WhichPunct2) { // // Set WP 2 to show that string 2 is // smaller, since a punctuation char had // already been found at an earlier // position in string 2. // // Set the Ignore Punctuation flag so we // just skip over any other punctuation // chars in the string. // WhichPunct2 = CSTR_GREATER_THAN; fIgnorePunct = TRUE; } else { // // Set WP 1 to show that string 2 is // smaller, and that string 1 has had // a punctuation char - since no // punctuation chars have been found // in string 2. // WhichPunct1 = CSTR_GREATER_THAN; } // // Advance pointer 1 and decrement counter 1. // pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; } // // Do NOT want to advance the pointer in string 1 // if string 2 is also a punctuation char. This // will be done later. // } break; } case ( EXPANSION ) : { // // Save pointer in pString1 so that it can be // restored. // if (pSave1 == NULL) { pSave1 = pString1; } pString1 = pTmpBuf1; // // Add one to counter so that subtraction doesn't end // comparison prematurely. // ctr1++; // // Expand character into temporary buffer. // pTmpBuf1[0] = GET_EXPANSION_1(&Weight1); pTmpBuf1[1] = GET_EXPANSION_2(&Weight1); // // Set cExpChar1 to the number of expansion characters // stored. // cExpChar1 = MAX_TBL_EXPANSION; Weight1 = CMP_INVALID_WEIGHT; break; } case ( FAREAST_SPECIAL ) : { if (sm2 != EXPANSION) { // // Get the weight for the far east special case // and store it in Weight1. // GET_FAREAST_WEIGHT( Weight1, uw1, Mask, lpString1, pString1, ExtraWt1, fModify ); if (sm2 != FAREAST_SPECIAL) { // // The character in the second string is // NOT a fareast special char. // // Set each of weights 4, 5, 6, and 7 to show // that string 2 is smaller (if not already set). // if ((GET_WT_FOUR(&WhichExtra) == 0) && (GET_WT_FOUR(&ExtraWt1) != 0)) { GET_WT_FOUR(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_FIVE(&WhichExtra) == 0) && (GET_WT_FIVE(&ExtraWt1) != 0)) { GET_WT_FIVE(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_SIX(&WhichExtra) == 0) && (GET_WT_SIX(&ExtraWt1) != 0)) { GET_WT_SIX(&WhichExtra) = CSTR_GREATER_THAN; } if ((GET_WT_SEVEN(&WhichExtra) == 0) && (GET_WT_SEVEN(&ExtraWt1) != 0)) { GET_WT_SEVEN(&WhichExtra) = CSTR_GREATER_THAN; } } } break; } case ( JAMO_SPECIAL ) : { LPWSTR pStr1 = pString1; LPWSTR pStr2 = pString2; // // Set the JamoFlag so we don't handle it again. // JamoFlag = TRUE; FindJamoDifference( pHashN, &pStr1, &ctr1, cchCount1, &Weight1, &pStr2, &ctr2, cchCount2, &Weight2, &pLastJamo, &uw1, &uw2, &State, &WhichJamo, fModify ); if (WhichJamo) { return (WhichJamo); } pString1 = pStr1; pString2 = pStr2; break; } case ( EXTENSION_A ) : { // // Get the full weight in case DW got masked. // Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1); if (sm2 == EXTENSION_A) { Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2); } // // Compare the weights. // if (Weight1 == Weight2) { // // Adjust pointers and counters and set flags. // pString1++; pString2++; ctr1--; ctr2--; Weight1 = CMP_INVALID_WEIGHT; Weight2 = CMP_INVALID_WEIGHT; } else { // // Get the actual UW to compare. // if (sm2 == EXTENSION_A) { // // Set the UW values to be the AW and DW since // both strings contain an extension A char. // uw1 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight1), GET_DIACRITIC(&Weight1), FALSE ); uw2 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight2), GET_DIACRITIC(&Weight2), FALSE ); } else { // // Only string1 contains an extension A char, // so set the UW value to be the first UW // value for extension A (default values): // SM_EXT_A, AW_EXT_A // uw1 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify); } } break; } case ( UNSORTABLE ) : { // // Fill out the case statement so the compiler // will use a jump table. // break; } } // // Switch on the script member of string 2 and take care // of any special cases. // switch (sm2) { case ( NONSPACE_MARK ) : { // // Nonspace only - look at diacritic weight only. // if (!fIgnoreDiacritic) { if ((WhichDiacritic == 0) || (State & STATE_REVERSE_DW)) { WhichDiacritic = CSTR_LESS_THAN; // // Remove state from state machine. // REMOVE_STATE(STATE_DW); } } // // Adjust pointer and counter and set flags. // pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; break; } case ( SYMBOL_1 ) : case ( SYMBOL_2 ) : case ( SYMBOL_3 ) : case ( SYMBOL_4 ) : case ( SYMBOL_5 ) : { // // If the ignore symbol flag is set, then skip over // the symbol. // if (fIgnoreSymbol) { pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; } break; } case ( PUNCTUATION ) : { // // If the ignore punctuation flag is set, then // skip over the punctuation char. // if (fIgnorePunct) { // // Advance pointer 2 and decrement counter 2. // pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; } else if (!fStringSort) { // // Use WORD sort method. // if (sm1 != PUNCTUATION) { // // The character in the first string is // NOT punctuation. // if (WhichPunct1) { // // Set WP 1 to show that string 1 is // smaller, since a punctuation char had // already been found at an earlier // position in string 1. // // Set the Ignore Punctuation flag so we // just skip over any other punctuation // chars in the string. // WhichPunct1 = CSTR_LESS_THAN; fIgnorePunct = TRUE; } else { // // Set WP 2 to show that string 1 is // smaller, and that string 2 has had // a punctuation char - since no // punctuation chars have been found // in string 1. // WhichPunct2 = CSTR_LESS_THAN; } // // Pointer 2 and counter 2 will be updated // after if-else statement. // } else { // // Both code points are punctuation chars. // // See if either of the strings has encountered // punctuation chars previous to this. // if (WhichPunct1) { // // String 1 has had a punctuation char, so // it should be the smaller string (since // both have punctuation chars). // WhichPunct1 = CSTR_LESS_THAN; } else if (WhichPunct2) { // // String 2 has had a punctuation char, so // it should be the smaller string (since // both have punctuation chars). // WhichPunct2 = CSTR_GREATER_THAN; } else { BYTE aw1 = GET_ALPHA_NUMERIC(&Weight1); BYTE aw2 = GET_ALPHA_NUMERIC(&Weight2); if (aw1 == aw2) { BYTE cw1 = GET_CASE(&Weight1); BYTE cw2 = GET_CASE(&Weight2); if (cw1 < cw2) { WhichPunct1 = CSTR_LESS_THAN; } else if (cw1 > cw2) { WhichPunct1 = CSTR_GREATER_THAN; } } else { // // Position is the same, so compare the // special weights. Set WhichPunct1 to // the smaller special weight. // WhichPunct1 = (aw1 < aw2 ? CSTR_LESS_THAN : CSTR_GREATER_THAN); } } // // Set the Ignore Punctuation flag. // fIgnorePunct = TRUE; // // Advance pointer 1 and decrement counter 1. // Pointer 2 and counter 2 will be updated // after if-else statement. // pString1++; ctr1--; Weight1 = CMP_INVALID_WEIGHT; } // // Advance pointer 2 and decrement counter 2. // pString2++; ctr2--; Weight2 = CMP_INVALID_WEIGHT; } break; } case ( EXPANSION ) : { // // Save pointer in pString1 so that it can be restored. // if (pSave2 == NULL) { pSave2 = pString2; } pString2 = pTmpBuf2; // // Add one to counter so that subtraction doesn't end // comparison prematurely. // ctr2++; // // Expand character into temporary buffer. // pTmpBuf2[0] = GET_EXPANSION_1(&Weight2); pTmpBuf2[1] = GET_EXPANSION_2(&Weight2); // // Set cExpChar2 to the number of expansion characters // stored. // cExpChar2 = MAX_TBL_EXPANSION; Weight2 = CMP_INVALID_WEIGHT; break; } case ( FAREAST_SPECIAL ) : { if (sm1 != EXPANSION) { // // Get the weight for the far east special case // and store it in Weight2. // GET_FAREAST_WEIGHT( Weight2, uw2, Mask, lpString2, pString2, ExtraWt2, fModify ); if (sm1 != FAREAST_SPECIAL) { // // The character in the first string is // NOT a fareast special char. // // Set each of weights 4, 5, 6, and 7 to show // that string 1 is smaller (if not already set). // if ((GET_WT_FOUR(&WhichExtra) == 0) && (GET_WT_FOUR(&ExtraWt2) != 0)) { GET_WT_FOUR(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_FIVE(&WhichExtra) == 0) && (GET_WT_FIVE(&ExtraWt2) != 0)) { GET_WT_FIVE(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_SIX(&WhichExtra) == 0) && (GET_WT_SIX(&ExtraWt2) != 0)) { GET_WT_SIX(&WhichExtra) = CSTR_LESS_THAN; } if ((GET_WT_SEVEN(&WhichExtra) == 0) && (GET_WT_SEVEN(&ExtraWt2) != 0)) { GET_WT_SEVEN(&WhichExtra) = CSTR_LESS_THAN; } } else { // // Characters in both strings are fareast // special chars. // // Set each of weights 4, 5, 6, and 7 // appropriately (if not already set). // if ( (GET_WT_FOUR(&WhichExtra) == 0) && ( GET_WT_FOUR(&ExtraWt1) != GET_WT_FOUR(&ExtraWt2) ) ) { GET_WT_FOUR(&WhichExtra) = ( GET_WT_FOUR(&ExtraWt1) < GET_WT_FOUR(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_FIVE(&WhichExtra) == 0) && ( GET_WT_FIVE(&ExtraWt1) != GET_WT_FIVE(&ExtraWt2) ) ) { GET_WT_FIVE(&WhichExtra) = ( GET_WT_FIVE(&ExtraWt1) < GET_WT_FIVE(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_SIX(&WhichExtra) == 0) && ( GET_WT_SIX(&ExtraWt1) != GET_WT_SIX(&ExtraWt2) ) ) { GET_WT_SIX(&WhichExtra) = ( GET_WT_SIX(&ExtraWt1) < GET_WT_SIX(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } if ( (GET_WT_SEVEN(&WhichExtra) == 0) && ( GET_WT_SEVEN(&ExtraWt1) != GET_WT_SEVEN(&ExtraWt2) ) ) { GET_WT_SEVEN(&WhichExtra) = ( GET_WT_SEVEN(&ExtraWt1) < GET_WT_SEVEN(&ExtraWt2) ) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; } } } break; } case ( JAMO_SPECIAL ) : { if (!JamoFlag) { LPWSTR pStr1 = pString1; LPWSTR pStr2 = pString2; FindJamoDifference( pHashN, &pStr1, &ctr1, cchCount1, &Weight1, &pStr2, &ctr2, cchCount2, &Weight2, &pLastJamo, &uw1, &uw2, &State, &WhichJamo, fModify ); if (WhichJamo) { return (WhichJamo); } pString1 = pStr1; pString2 = pStr2; } else { // // Reset the Jamo flag. // JamoFlag = FALSE; } break; } case ( EXTENSION_A ) : { // // If sm1 is an extension A character, then // both sm1 and sm2 have been handled. We should // only get here when either sm1 is not an // extension A character or the two extension A // characters are different. // if (sm1 != EXTENSION_A) { // // Get the full weight in case DW got masked. // Also, get the actual UW to compare. // // Only string2 contains an extension A char, // so set the UW value to be the first UW // value for extension A (default values): // SM_EXT_A, AW_EXT_A // Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2); uw2 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify); } // // We should then fall through to the comparison // of the Unicode weights. // break; } case ( UNSORTABLE ) : { // // Fill out the case statement so the compiler // will use a jump table. // break; } } // // See if the comparison should start again. // if ((Weight1 == CMP_INVALID_WEIGHT) || (Weight2 == CMP_INVALID_WEIGHT)) { // // Check to see if we're modifying the script value. // If so, then we need to reset the fareast weight // (if applicable) so that it doesn't get modified // again. // if (fModify == TRUE) { if (sm1 == FAREAST_SPECIAL) { Weight1 = CMP_INVALID_WEIGHT; } else if (sm2 == FAREAST_SPECIAL) { Weight2 = CMP_INVALID_WEIGHT; } } continue; } // // We're not supposed to drop down into the state table if // the unicode weights are different, so stop comparison // and return result of unicode weight comparison. // if (uw1 != uw2) { return ((uw1 < uw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN); } } // // For each state in the state table, do the appropriate // comparisons. // if (State & (STATE_DW | STATE_REVERSE_DW)) { // // Get the diacritic weights. // dw1 = GET_DIACRITIC(&Weight1); dw2 = GET_DIACRITIC(&Weight2); if (dw1 != dw2) { // // Look ahead to see if diacritic follows a // minimum diacritic weight. If so, get the // diacritic weight of the nonspace mark. // while (!AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1)) { Wt = GET_DWORD_WEIGHT(pHashN, *(pString1 + 1)); if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK) { dw1 += GET_DIACRITIC(&Wt); pString1++; ctr1--; } else { break; } } while (!AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2)) { Wt = GET_DWORD_WEIGHT(pHashN, *(pString2 + 1)); if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK) { dw2 += GET_DIACRITIC(&Wt); pString2++; ctr2--; } else { break; } } // // Save which string has the smaller diacritic // weight if the diacritic weights are still // different. // if (dw1 != dw2) { WhichDiacritic = (dw1 < dw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; // // Remove state from state machine. // REMOVE_STATE(STATE_DW); } } } if (State & STATE_CW) { // // Get the case weights. // if (GET_CASE(&Weight1) != GET_CASE(&Weight2)) { // // Save which string has the smaller case weight. // WhichCase = (GET_CASE(&Weight1) < GET_CASE(&Weight2)) ? CSTR_LESS_THAN : CSTR_GREATER_THAN; // // Remove state from state machine. // REMOVE_STATE(STATE_CW); } } } // // Fixup the pointers and counters. // POINTER_FIXUP(); ctr1--; ctr2--; // // Reset the weights to be invalid. // Weight1 = CMP_INVALID_WEIGHT; Weight2 = CMP_INVALID_WEIGHT; } // // If the end of BOTH strings has been reached, then the unicode // weights match exactly. Check the diacritic, case and special // weights. If all are zero, then return success. Otherwise, // return the result of the weight difference. // // NOTE: The following checks MUST REMAIN IN THIS ORDER: // Diacritic, Case, Punctuation. // if (AT_STRING_END(ctr1, pString1, cchCount1)) { if (AT_STRING_END(ctr2, pString2, cchCount2)) { if (WhichDiacritic) { return (WhichDiacritic); } if (WhichCase) { return (WhichCase); } if (WhichExtra) { if (!fIgnoreDiacritic) { if (GET_WT_FOUR(&WhichExtra)) { return (GET_WT_FOUR(&WhichExtra)); } if (GET_WT_FIVE(&WhichExtra)) { return (GET_WT_FIVE(&WhichExtra)); } } if (GET_WT_SIX(&WhichExtra)) { return (GET_WT_SIX(&WhichExtra)); } if (GET_WT_SEVEN(&WhichExtra)) { return (GET_WT_SEVEN(&WhichExtra)); } } if (WhichPunct1) { return (WhichPunct1); } if (WhichPunct2) { return (WhichPunct2); } return (CSTR_EQUAL); } else { // // String 2 is longer. // pString1 = pString2; ctr1 = ctr2; cchCount1 = cchCount2; fEnd1 = CSTR_LESS_THAN; } } else { fEnd1 = CSTR_GREATER_THAN; } // // Scan to the end of the longer string. // SCAN_LONGER_STRING( ctr1, pString1, cchCount1, fEnd1 ); } //////////////////////////////////////////////////////////////////////////// // // FindJamoDifference // //////////////////////////////////////////////////////////////////////////// int FindJamoDifference( PLOC_HASH pHashN, LPCWSTR* ppString1, int* ctr1, int cchCount1, DWORD* pWeight1, LPCWSTR* ppString2, int* ctr2, int cchCount2, DWORD* pWeight2, LPCWSTR* pLastJamo, WORD* uw1, WORD* uw2, int* pState, int* WhichJamo, BOOL fModify) { int bRestart = 0; // if string compare should restart again int oldHangulsFound1 = 0; // # of valid old Hangul Jamo compositions found int oldHangulsFound2 = 0; // # of valid old Hangul Jamo compositions found WORD UW; BYTE JamoWeight1[3]; // extra weight for first old Hangul composition BYTE JamoWeight2[3]; // extra weight for second old Hangul composition // // Roll back to the first Jamo. We know that these Jamos in both strings // should be equal, so we can decrement both strings at once. // while ((*ppString1 > *pLastJamo) && IsJamo(*(*ppString1 - 1))) { (*ppString1)--; (*ppString2)--; (*ctr1)++; (*ctr2)++; } // // Now we are at the beginning of two groups of Jamo characters. // Compare Jamo unit (either a single Jamo or a valid old Hangul Jamo // composition) until we run out Jamo units in either strings. // We also exit when we reach the ends of either string. // // while (NOT_END_STRING(*ctr1, *ppString1, cchCount1) && // NOT_END_STRING(*ctr2, *ppString2, cchCount2)) // for (;;) { if (IsJamo(**ppString1)) { if (IsLeadingJamo(**ppString1)) { if ((oldHangulsFound1 = MapOldHangulSortKey( pHashN, *ppString1, *ctr1, &UW, JamoWeight1, fModify )) > 0) { *uw1 = UW; // // Mark *pWeight1 so that it is not CMP_INVALID_WEIGHT. // 0202 is the DW/CW. // *pWeight1 = ((DWORD)UW | 0x02020000); // // We always increment ppString1/ctr1 at the end of the // loop, so we need to subtract 1 here. // *ppString1 += (oldHangulsFound1 - 1); *ctr1 -= (oldHangulsFound1 - 1); } } if (oldHangulsFound1 == 0) { // // No valid old Hangul compositions are found. Get the UW // for the Jamo instead. // *pWeight1 = GET_DWORD_WEIGHT(pHashN, **ppString1); // // The SMs in PSORTKEY for Jamos are not really SMs. They // are all 4 (for JAMO_SPECIAL). // Here we get the real Jamo Unicode weight. The actual SM // is stored in DW. // *uw1 = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight1), GET_ALPHA_NUMERIC(pWeight1), fModify ); ((PSORTKEY)pWeight1)->Diacritic = MIN_DW; } } if (IsJamo(**ppString2)) { if (IsLeadingJamo(**ppString2)) { if ((oldHangulsFound2 = MapOldHangulSortKey( pHashN, *ppString2, *ctr2, &UW, JamoWeight2, fModify )) > 0) { *uw2 = UW; *pWeight2 = ((DWORD)UW | 0x02020000); *ppString2 += (oldHangulsFound2 - 1); *ctr2 -= (oldHangulsFound2 - 1); } } if (oldHangulsFound2 == 0) { *pWeight2 = GET_DWORD_WEIGHT(pHashN, **ppString2); *uw2 = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight2), GET_ALPHA_NUMERIC(pWeight2), fModify ); ((PSORTKEY)pWeight2)->Diacritic = MIN_DW; } } // // See if either weight is invalid. // A weight can be invalid when the character is not a Jamo. // if (*pWeight1 == CMP_INVALID_WEIGHT) { // // The current character is not a Jamo. Set the Weight to // be CMP_INVALID_WEIGHT, so that the string comparision can // restart within the loop of CompareString(). // *pWeight1 = CMP_INVALID_WEIGHT; bRestart = 1; goto FindJamoDifferenceExit; } if (*pWeight2 == CMP_INVALID_WEIGHT) { // // The current character is not a Jamo. Set the Weight to // be CMP_INVALID_WEIGHT, so that the string comparision can // restart within the loop of CompareString(). // *pWeight2 = CMP_INVALID_WEIGHT; bRestart = 1; goto FindJamoDifferenceExit; } if (*uw1 != *uw2) { // // Found differences in Unicode weight. We can stop the // processing now. // goto FindJamoDifferenceExit; } // // When we get here, we know that we have the same Unicode Weight. // Check if we need to record the WhichJamo. // if ((*pState & STATE_JAMO_WEIGHT) && ((oldHangulsFound1 > 0) || (oldHangulsFound2 > 0))) { if ((oldHangulsFound1 > 0) && (oldHangulsFound2 > 0)) { *WhichJamo = (int)memcmp( JamoWeight1, JamoWeight2, sizeof(JamoWeight1) ) + 2; } else if (oldHangulsFound1 > 0) { *WhichJamo = CSTR_GREATER_THAN; } else { *WhichJamo = CSTR_LESS_THAN; } *pState &= ~STATE_JAMO_WEIGHT; oldHangulsFound1 = oldHangulsFound2 = 0; } (*ppString1)++; (*ctr1)--; (*ppString2)++; (*ctr2)--; if (AT_STRING_END(*ctr1, *ppString1, cchCount1) || AT_STRING_END(*ctr2, *ppString2, cchCount2)) { break; } *pWeight1 = *pWeight2 = CMP_INVALID_WEIGHT; } // // If we drop out of the while loop because we reach the end of strings, // decrement the pointers by one because loops in CompareString() will // increase the pointers at the end of the loop. // // If we drop out of the while loop because the goto's in it, we are // already off by one. // if (AT_STRING_END(*ctr1, *ppString1, cchCount1)) { (*ppString1)--; (*ctr1)++; } if (AT_STRING_END(*ctr2, *ppString2, cchCount2)) { (*ppString2)--; (*ctr2)++; } FindJamoDifferenceExit: *pLastJamo = *ppString1; return (bRestart); }