windows-xp/Source/XPSP1/NT/base/ntos/rtl/gen8dot3.c


								/*++


								Copyright (c) 1990  Microsoft Corporation


								Module Name:


								    Gen8dot3.c


								Abstract:


								    This module implements a routine to generate 8.3 names from long names.


								Author:


								    Gary Kimura     [GaryKi]    26-Mar-1992


								Environment:


								    Pure Utility Routines


								Revision History:


								--*/


								#include "ntrtlp.h"

								#include <stdio.h>


								extern PUSHORT  NlsUnicodeToMbOemData;

								extern PUSHORT  NlsOemToUnicodeData;

								extern PCH      NlsUnicodeToOemData;

								extern PUSHORT  NlsMbOemCodePageTables;

								extern BOOLEAN  NlsMbOemCodePageTag;

								extern const PUSHORT  NlsOemLeadByteInfo;

								extern USHORT   OemDefaultChar;


								//

								//  A condensed table of legal fat character values

								//


								#if defined(ALLOC_DATA_PRAGMA) && defined(NTOS_KERNEL_RUNTIME)

								#pragma const_seg("PAGECONST")

								#endif

								const

								ULONG RtlFatIllegalTable[] = { 0xffffffff,

								                               0xfc009c04,

								                               0x38000000,

								                               0x10000000 };


								WCHAR

								GetNextWchar (

								    IN PUNICODE_STRING Name,

								    IN PULONG CurrentIndex,

								    IN BOOLEAN SkipDots,

								    IN BOOLEAN AllowExtendedCharacters

								    );


								USHORT

								RtlComputeLfnChecksum (

								    PUNICODE_STRING Name

								    );


								//

								//  BOOLEAN

								//  IsDbcsCharacter (

								//      IN WCHAR Wc

								//  );

								//


								#define IsDbcsCharacter(WC) (             \

								    ((WC) > 127) &&                       \

								    (HIBYTE(NlsUnicodeToMbOemData[(WC)])) \

								)


								#if defined(ALLOC_PRAGMA) && defined(NTOS_KERNEL_RUNTIME)

								#pragma alloc_text(PAGE,RtlGenerate8dot3Name)

								#pragma alloc_text(PAGE,GetNextWchar)

								#pragma alloc_text(PAGE,RtlComputeLfnChecksum)

								#pragma alloc_text(PAGE,RtlIsNameLegalDOS8Dot3)

								#pragma alloc_text(PAGE,RtlIsValidOemCharacter)

								#endif


								VOID

								RtlGenerate8dot3Name (

								    IN PUNICODE_STRING Name,

								    IN BOOLEAN AllowExtendedCharacters,

								    IN OUT PGENERATE_NAME_CONTEXT Context,

								    OUT PUNICODE_STRING Name8dot3

								    )


								/*++


								Routine Description:


								    This routine is used to generate an 8.3 name from a long name.  It can

								    be called repeatedly to generate different 8.3 name variations for the

								    same long name.  This is necessary if the gernerated 8.3 name conflicts

								    with an existing 8.3 name.


								Arguments:


								    Name - Supplies the original long name that is being translated from.


								    AllowExtendedCharacters - If TRUE, then extended characters, including

								        DBCS characters, are allowed in the basis of the short name if they

								        map to an upcased Oem character.


								    Context - Supplies a context for the translation.  This is a private structure

								        needed by this routine to help enumerate the different long name

								        possibilities.  The caller is responsible with providing a "zeroed out"

								        context structure on the first call for each given input name.


								    Name8dot3 - Receives the new 8.3 name.  Pool for the buffer must be allocated

								        by the caller and should be 12 characters wide (i.e., 24 bytes).


								Return Value:


								    None.


								--*/


								{

								    BOOLEAN DbcsAware;

								    BOOLEAN IndexAll9s = TRUE;

								    ULONG OemLength;

								    ULONG IndexLength;

								    WCHAR IndexBuffer[8];

								    ULONG i;


								#ifdef NTOS_KERNEL_RUNTIME

								    extern BOOLEAN FsRtlSafeExtensions;

								#else

								    BOOLEAN FsRtlSafeExtensions = TRUE;

								#endif


								    DbcsAware = AllowExtendedCharacters && NlsMbOemCodePageTag;


								    //

								    //  Check if this is the first time we are being called, and if so then

								    //  initialize the context fields.

								    //


								    if (Context->NameLength == 0) {


								        ULONG LastDotIndex;


								        ULONG CurrentIndex;

								        BOOLEAN SkipDots;

								        WCHAR wc;


								        //

								        //  Skip down the name remembering the index of the last dot we

								        //  will skip over the first dot provided the name starts with

								        //  a dot.

								        //


								        LastDotIndex = MAXULONG;


								        CurrentIndex = 0;

								        SkipDots = ((Name->Length > 0) && (Name->Buffer[0] == L'.'));


								        while ((wc = GetNextWchar( Name,

								                                   &CurrentIndex,

								                                   SkipDots,

								                                   AllowExtendedCharacters )) != 0) {


								            SkipDots = FALSE;

								            if (wc == L'.') { LastDotIndex = CurrentIndex; }

								        }


								        //

								        //  If the LastDotIndex is the last character in the name,

								        //  then there really isn't an extension, so reset LastDotIndex.

								        //


								        if (LastDotIndex == Name->Length/sizeof(WCHAR)) {


								            LastDotIndex = MAXULONG;

								        }


								        //

								        //  Build up the name part. This can be at most 6 characters

								        //  (because of the ~# appeneded on the end) and we skip over

								        //  dots, except the last dot, which terminates the loop.

								        //

								        //  We exit the loop if:

								        //

								        //  - The input Name has been exhausted

								        //  - We have consumed the input name up to the last dot

								        //  - We have filled 6 characters of short name basis

								        //


								        CurrentIndex = 0;

								        OemLength = 0;

								        Context->NameLength = 0;


								        while ((wc = GetNextWchar( Name, &CurrentIndex, TRUE, AllowExtendedCharacters)) &&

								               (CurrentIndex < LastDotIndex) &&

								               (Context->NameLength < 6)) {


								            //

								            //  If we are on a multi-byte code page we have to be careful

								            //  here because the short name (when converted to Oem) must

								            //  be 8.3 compliant.  Note that if AllowExtendedCharacters

								            //  is FALSE, then GetNextWchar will never return a DBCS

								            //  character, so we don't care what kind of code page we

								            //  are on.

								            //


								            if (DbcsAware) {


								                OemLength += IsDbcsCharacter(wc) ? 2 : 1;


								                if (OemLength > 6) { break; }

								            }


								            //

								            //  Copy the UNICODE character into the name buffer

								            //


								            Context->NameBuffer[Context->NameLength++] = wc;

								        }


								        //

								        //  Now if the name part of the basis is 2 or less bytes (when

								        //  represented in Oem) then append a four character checksum

								        //  to make the short name space less sparse.

								        //


								        if ((DbcsAware ? OemLength : Context->NameLength) <= 2) {


								            USHORT Checksum;

								            WCHAR Nibble;


								            Checksum =

								            Context->Checksum = RtlComputeLfnChecksum( Name );


								            for (i = 0; i < 4; i++, Checksum >>= 4) {


								                Nibble = Checksum & 0xf;

								                Nibble += Nibble <= 9 ? '0' : 'A' - 10;


								                Context->NameBuffer[ Context->NameLength + i ] = Nibble;

								            }


								            Context->NameLength += 4;

								            Context->ChecksumInserted = TRUE;

								        }


								        //

								        //  Now process the last extension (if there is one).

								        //  If the last dot index is not MAXULONG then we

								        //  have located the last dot in the name

								        //


								        if (LastDotIndex != MAXULONG) {


								            //

								            //  Put in the "."

								            //


								            Context->ExtensionBuffer[0] = L'.';


								            //

								            //  Process the extension similar to how we processed the name

								            //

								            //  We exit the loop if:

								            //

								            //  - The input Name has been exhausted

								            //  - We have filled . + 3 characters of extension

								            //


								            OemLength = 1;

								            Context->ExtensionLength = 1;


								            while ((wc = GetNextWchar( Name, &LastDotIndex, TRUE, AllowExtendedCharacters)) &&

								                   (Context->ExtensionLength < 4)) {


								                if (DbcsAware) {


								                    OemLength += IsDbcsCharacter(wc) ? 2 : 1;


								                    if (OemLength > 4) { break; }

								                }


								                Context->ExtensionBuffer[Context->ExtensionLength++] = wc;

								            }


								            //

								            //  If we had to truncate the extension (i.e. input name was not

								            //  exhausted), change the last char of the truncated extension

								            //  to a ~ is user has selected safe extensions.

								            //


								            if (wc && FsRtlSafeExtensions) {


								                Context->ExtensionBuffer[Context->ExtensionLength - 1] = L'~';

								            }


								        } else {


								            Context->ExtensionLength = 0;

								        }

								    }


								    //

								    //  In all cases we add one to the index value and this is the value

								    //  of the index we are going to generate this time around

								    //


								    Context->LastIndexValue += 1;


								    //

								    //  Now if the new index value is greater than 4 then we've had too

								    //  many collisions and we should alter our basis if possible

								    //


								    if ((Context->LastIndexValue > 4) && !Context->ChecksumInserted) {


								        USHORT Checksum;

								        WCHAR Nibble;


								        //

								        // 'XX' is represented A DBCS character.

								        //

								        // LongName       -> ShortName  | DbcsBias  Oem  Unicode

								        // -----------------------------+------------------------

								        // XXXXThisisapen -> XX1234     |    1       6      5

								        // XXThisisapen   -> XX1234     |    1       6      5

								        // aXXThisisapen  -> a1234      |    1       5      5

								        // aaThisisapen   -> aa1234     |    0       6      6

								        //


								        ULONG DbcsBias;


								        if (DbcsAware) {


								              DbcsBias = ((IsDbcsCharacter(Context->NameBuffer[0]) ? 1 : 0) |

								                          (IsDbcsCharacter(Context->NameBuffer[1]) ? 1 : 0));


								        } else {


								              DbcsBias = 0;

								        }


								        Checksum =

								        Context->Checksum = RtlComputeLfnChecksum( Name );


								        for (i = (2-DbcsBias); i < (6-DbcsBias); i++, Checksum >>= 4) {


								            Nibble = Checksum & 0xf;

								            Nibble += Nibble <= 9 ? '0' : 'A' - 10;


								            Context->NameBuffer[ i ] = Nibble;

								        }


								        Context->NameLength = (UCHAR)(6-DbcsBias);

								        Context->LastIndexValue = 1;

								        Context->ChecksumInserted = TRUE;

								    }


								    //

								    //  Now build the index buffer from high index to low index because we

								    //  use a mod & div operation to build the string from the index value.

								    //

								    //  We also want to remember is we are about to rollover in base 10.

								    //


								    for (IndexLength = 1, i = Context->LastIndexValue;

								         (IndexLength <= 7) && (i > 0);

								         IndexLength += 1, i /= 10) {


								        if ((IndexBuffer[ 8 - IndexLength] = (WCHAR)(L'0' + (i % 10))) != L'9') {


								            IndexAll9s = FALSE;

								        }

								    }


								    //

								    //  And tack on the preceding dash

								    //


								    IndexBuffer[ 8 - IndexLength ] = L'~';


								    //

								    //  At this point everything is set up to copy to the output buffer.  First

								    //  copy over the name and then only copy the index and extension if they exist

								    //


								    if (Context->NameLength != 0) {


								        RtlCopyMemory( &Name8dot3->Buffer[0],

								                       &Context->NameBuffer[0],

								                       Context->NameLength * 2 );


								        Name8dot3->Length = (USHORT)(Context->NameLength * 2);


								    } else {


								        Name8dot3->Length = 0;

								    }


								    //

								    //  Now do the index.

								    //


								    RtlCopyMemory( &Name8dot3->Buffer[ Name8dot3->Length/2 ],

								                   &IndexBuffer[ 8 - IndexLength ],

								                   IndexLength * 2 );


								    Name8dot3->Length += (USHORT) (IndexLength * 2);


								    //

								    //  Now conditionally do the extension

								    //


								    if (Context->ExtensionLength != 0) {


								        RtlCopyMemory( &Name8dot3->Buffer[ Name8dot3->Length/2 ],

								                       &Context->ExtensionBuffer[0],

								                       Context->ExtensionLength * 2 );


								        Name8dot3->Length += (USHORT) (Context->ExtensionLength * 2);

								    }


								    //

								    //  If current index value is all 9s, then the next value will cause the

								    //  index string to grow from it's current size.  In this case recompute

								    //  Context->NameLength so that is will be correct for next time.

								    //


								    if (IndexAll9s) {


								        if (DbcsAware) {


								            for (i = 0, OemLength = 0; i < Context->NameLength; i++) {


								                OemLength += IsDbcsCharacter(Context->NameBuffer[i]) ? 2 : 1;


								                if (OemLength > 8 - (IndexLength + 1)) {

								                    break;

								                }

								            }


								            Context->NameLength = (UCHAR)i;


								        } else {


								            Context->NameLength -= 1;

								        }

								    }


								    //

								    //  And return to our caller

								    //


								    return;

								}


								BOOLEAN

								RtlIsValidOemCharacter (

								    IN PWCHAR Char

								)


								/*++


								Routine Description:


								    This routine determines if the best-fitted and upcased version of the

								    input unicode char is a valid Oem character.


								Arguments:


								    Char - Supplies the Unicode char and receives the best-fitted and

								        upcased version if it was indeed valid.


								Return Value:


								    TRUE if the character was valid.


								--*/


								{

								    WCHAR UniTmp;

								    WCHAR OemChar;


								    //

								    //  First try to make a round trip from Unicode->Oem->Unicode.

								    //


								    if (!NlsMbOemCodePageTag) {


								        UniTmp = (WCHAR)NLS_UPCASE(NlsOemToUnicodeData[(UCHAR)NlsUnicodeToOemData[*Char]]);

								        OemChar = NlsUnicodeToOemData[UniTmp];


								    } else {


								        //

								        // Convert to OEM and back to Unicode before upper casing

								        // to ensure the visual best fits are converted and

								        // upper cased properly.

								        //


								        OemChar = NlsUnicodeToMbOemData[ *Char ];


								        if (NlsOemLeadByteInfo[HIBYTE(OemChar)]) {


								            USHORT Entry;


								            //

								            // Lead byte - translate the trail byte using the table

								            // that corresponds to this lead byte.

								            //


								            Entry = NlsOemLeadByteInfo[HIBYTE(OemChar)];

								            UniTmp = (WCHAR)NlsMbOemCodePageTables[ Entry + LOBYTE(OemChar) ];


								        } else {


								            //

								            // Single byte character.

								            //


								            UniTmp = NlsOemToUnicodeData[LOBYTE(OemChar)];

								        }


								        //

								        //  Now upcase this UNICODE character, and convert it to Oem.

								        //


								        UniTmp = (WCHAR)NLS_UPCASE(UniTmp);

								        OemChar = NlsUnicodeToMbOemData[UniTmp];

								    }


								    //

								    //  Now if the final OemChar is the default one, then there was no

								    //  mapping for this UNICODE character.

								    //


								    if (OemChar == OemDefaultChar) {


								        return FALSE;


								    } else {


								        *Char = UniTmp;

								        return TRUE;

								    }

								}


								//

								//  Local support routine

								//


								WCHAR

								GetNextWchar (

								    IN PUNICODE_STRING Name,

								    IN PULONG CurrentIndex,

								    IN BOOLEAN SkipDots,

								    IN BOOLEAN AllowExtendedCharacters

								    )


								/*++


								Routine Description:


								    This routine scans the input name starting at the current index and

								    returns the next valid character for the long name to 8.3 generation

								    algorithm.  It also updates the current index to point to the

								    next character to examine.


								    The user can specify if dots are skipped over or passed back.  The

								    filtering done by the procedure is:


								    1. Skip characters less then blanks, and larger than 127 if

								       AllowExtendedCharacters is FALSE

								    2. Optionally skip over dots

								    3. translate the special 7 characters : + , ; = [ ] into underscores


								Arguments:


								    Name - Supplies the name being examined


								    CurrentIndex - Supplies the index to start our examination and also

								        receives the index of one beyond the character we return.


								    SkipDots - Indicates whether this routine will also skip over periods


								    AllowExtendedCharacters - Tell whether charaacters >= 127 are valid.


								Return Value:


								    WCHAR - returns the next wchar in the name string


								--*/


								{

								    WCHAR wc;


								    //

								    //  Until we find out otherwise the character we are going to return

								    //  is 0

								    //


								    wc = 0;


								    //

								    //  Now loop through updating the current index until we either have a character to

								    //  return or until we exhaust the name buffer

								    //


								    while (*CurrentIndex < (ULONG)(Name->Length/2)) {


								        //

								        //  Get the next character in the buffer

								        //


								        wc = Name->Buffer[*CurrentIndex];

								        *CurrentIndex += 1;


								        //

								        //  If the character is to be skipped over then reset wc to 0

								        //


								        if ((wc <= L' ') ||

								            ((wc >= 127) && (!AllowExtendedCharacters || !RtlIsValidOemCharacter(&wc))) ||

								            ((wc == L'.') && SkipDots)) {


								            wc = 0;


								        } else {


								            //

								            //  We have a character to return, but first translate the character is necessary

								            //


								            if ((wc < 0x80) && (RtlFatIllegalTable[wc/32] & (1 << (wc%32)))) {


								                wc = L'_';

								            }


								            //

								            //  Do an a-z upcase.

								            //


								            if ((wc >= L'a') && (wc <= L'z')) {


								                wc -= L'a' - L'A';

								            }


								            //

								            //  And break out of the loop to return to our caller

								            //


								            break;

								        }

								    }


								    //DebugTrace( 0, Dbg, "GetNextWchar -> %08x\n", wc);


								    return wc;

								}


								//

								//  Internal support routine

								//


								USHORT

								RtlComputeLfnChecksum (

								    PUNICODE_STRING Name

								    )


								/*++


								Routine Description:


								    This routine computes the Chicago long file name checksum.


								Arguments:


								    Name - Supplies the name to compute the checksum on.  Note that one

								        character names don't have interesting checksums.


								Return Value:


								    The checksum.


								--*/


								{

								    ULONG i;

								    USHORT Checksum;


								    RTL_PAGED_CODE();


								    if (Name->Length == sizeof(WCHAR)) {


								        return Name->Buffer[0];

								    }


								    Checksum = ((Name->Buffer[0] << 8) + Name->Buffer[1]) & 0xffff;


								    //

								    //  This checksum is kinda strange because we want to still have

								    //  a good range even if all the characters are < 0x00ff.

								    //


								    for (i=2; i < Name->Length / sizeof(WCHAR); i+=2) {


								        Checksum = (Checksum & 1 ? 0x8000 : 0) +

								                   (Checksum >> 1) +

								                   (Name->Buffer[i] << 8);


								        //

								        //  Be carefull to not walk off the end of the string.

								        //


								        if (i+1 < Name->Length / sizeof(WCHAR)) {


								            Checksum += Name->Buffer[i+1] & 0xffff;

								        }

								    }


								    return Checksum;

								}


								BOOLEAN

								RtlIsNameLegalDOS8Dot3 (

								    IN PUNICODE_STRING Name,

								    IN OUT POEM_STRING OemName OPTIONAL,

								    OUT PBOOLEAN NameContainsSpaces OPTIONAL

								    )

								/*++


								Routine Description:


								    This routine takes an input string and gives a definitive answer

								    on whether this name can successfully be used to create a file

								    on the FAT file system.


								    This routine can therefore also be used to determine if a name is

								    appropriate to be passed back to a Win31 or DOS app, i.e. whether

								    the downlevel APP will understand the name.


								    Note: an important part of this test is the mapping from UNICODE

								    to Oem, which is why it is important that the input parameter be

								    received in UNICODE.


								Arguments:


								    Name - The UNICODE name to test for conformance to 8.3 symantics.


								    OemName - If specified, will receive the Oem name corresponding

								        to the passed in Name.  Storage must be provided by the caller.

								        The name is undefined if the routine returns FALSE.


								    NameContainsSpaces - If the function returns TRUE, then this

								        parameter will indicate if the names contains spaces.  If

								        the function returns FALSE, this parameter is undefined. In

								        many instances, the alternate name is more appropriate to

								        use if spaces are present in the principle name, even if

								        it is 8.3 compliant.


								Return Value:


								    BOOLEAN - TRUE if the passed in UNICODE name forms a valid 8.3

								        FAT name when upcased to the current Oem code page.


								--*/


								{

								    ULONG Index;

								    BOOLEAN ExtensionPresent = FALSE;

								    BOOLEAN SpacesPresent = FALSE;

								    OEM_STRING LocalOemName;

								    UCHAR Char;

								    UCHAR OemBuffer[12];


								    //

								    //  If the name is more than 12 chars, bail.

								    //


								    if (Name->Length > 12*sizeof(WCHAR)) {

								        return FALSE;

								    }


								    //

								    //  Now upcase this name to Oem.  If anything goes wrong,

								    //  return FALSE.

								    //


								    if (!ARGUMENT_PRESENT(OemName)) {


								        OemName = &LocalOemName;


								        OemName->Buffer = &OemBuffer[0];

								        OemName->Length = 0;

								        OemName->MaximumLength = 12;

								    }


								    if (!NT_SUCCESS(RtlUpcaseUnicodeStringToCountedOemString(OemName, Name, FALSE))) {

								        return FALSE;

								    }


								    //

								    //  Special case . and ..

								    //


								    if (((OemName->Length == 1) && (OemName->Buffer[0] == '.')) ||

								        ((OemName->Length == 2) && (OemName->Buffer[0] == '.') && (OemName->Buffer[1] == '.'))) {


								        if (ARGUMENT_PRESENT(NameContainsSpaces)) {

								            *NameContainsSpaces = FALSE;

								        }

								        return TRUE;

								    }


								    //

								    //  Now we are going to walk through the string looking for

								    //  illegal characters and/or incorrect syntax.

								    //


								    for ( Index = 0; Index < OemName->Length; Index += 1 ) {


								        Char = OemName->Buffer[ Index ];


								        //

								        //  Skip over and Dbcs chacters

								        //


								        if (NlsMbOemCodePageTag && NlsOemLeadByteInfo[Char]) {


								            //

								            //  1) if we're looking at base part ( !ExtensionPresent ) and the 8th byte

								            //     is in the dbcs leading byte range, it's error ( Index == 7 ). If the

								            //     length of base part is more than 8 ( Index > 7 ), it's definitely error.

								            //

								            //  2) if the last byte ( Index == DbcsName.Length - 1 ) is in the dbcs leading

								            //     byte range, it's error

								            //


								            if ((!ExtensionPresent && (Index >= 7)) ||

								                (Index == (ULONG)(OemName->Length - 1))) {

								                return FALSE;

								            }


								            Index += 1;


								            continue;

								        }


								        //

								        //  Make sure this character is legal.

								        //


								        if ((Char < 0x80) &&

								            (RtlFatIllegalTable[Char/32] & (1 << (Char%32)))) {

								            return FALSE;

								        }


								        //

								        //  Remember if there was a space.

								        //


								        if (Char == ' ') {

								            SpacesPresent = TRUE;

								        }


								        if (Char == '.') {


								            //

								            //  We stepped onto a period.  We require the following things:

								            //

								            //      - There can only be one

								            //      - It can't be the first character

								            //      - The previous character can't be a space.

								            //      - There can't be more than 3 bytes following

								            //


								            if (ExtensionPresent ||

								                (Index == 0) ||

								                (OemName->Buffer[Index - 1] == ' ') ||

								                (OemName->Length - (Index + 1) > 3)) {


								                return FALSE;

								            }


								            ExtensionPresent = TRUE;

								        }


								        //

								        //  The base part of the name can't be more than 8 characters long.

								        //


								        if ((Index >= 8) && !ExtensionPresent) { return FALSE; }

								    }


								    //

								    //  The name cannot end in a space or a period.

								    //


								    if ((Char == ' ') || (Char == '.')) { return FALSE; }


								    if (ARGUMENT_PRESENT(NameContainsSpaces)) {

								        *NameContainsSpaces = SpacesPresent;

								    }

								    return TRUE;

								}