You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
689 lines
15 KiB
689 lines
15 KiB
/*++
|
|
|
|
Copyright (c) 1991 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
rcunicod.c
|
|
|
|
Abstract:
|
|
|
|
Routines added to rcpp to support 16-bit unicode file parsing.
|
|
Note that as of Aug 91, rcpp will not fully transfer the unicode
|
|
characters but only the string constants are guaranteed to be passed
|
|
cleanly.
|
|
|
|
Author:
|
|
|
|
David J. Marsyla (t-davema) 25-Aug-1991
|
|
|
|
Revision History:
|
|
|
|
|
|
--*/
|
|
|
|
|
|
#include <nt.h>
|
|
#include <ntrtl.h>
|
|
#include <nturtl.h>
|
|
#include <windows.h>
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
#include <process.h>
|
|
#include "rcunicod.h"
|
|
|
|
#ifdef DBCS
|
|
|
|
//
|
|
// Prototypes for conversion routines between Unicode and 932.
|
|
//
|
|
|
|
NTSTATUS
|
|
xxxRtlMultiByteToUnicodeN(
|
|
PWSTR UnicodeString,
|
|
PULONG BytesInUnicodeString,
|
|
PCHAR MultiByteString,
|
|
ULONG BytesInMultiByteString
|
|
);
|
|
|
|
NTSTATUS
|
|
xxxRtlUnicodeToMultiByteN(
|
|
PCHAR MultiByteString,
|
|
PULONG BytesInMultiByteString,
|
|
PWSTR UnicodeString,
|
|
ULONG BytesInUnicodeString
|
|
);
|
|
|
|
#endif // DBCS
|
|
|
|
#ifndef DBCS
|
|
// SHUNK: A_fwrite is not called from RC. Remove this for now.
|
|
|
|
INT
|
|
A_fwrite (
|
|
IN CHAR *pchMBString,
|
|
IN INT nSizeOfItem,
|
|
IN INT nCountToWrite,
|
|
IN FILE *fpOutputFile
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function will write out an 8-bit string as a unicode string.
|
|
Note, this function is very slow, but hey, I don't have time to optimize
|
|
it now.
|
|
As of Aug 91, only codepage 1252 is being supported.
|
|
|
|
Arguments:
|
|
|
|
pchMBString - This is a 8-bit multi byte string to write to the file
|
|
as a unicode string.
|
|
|
|
nSizeOfItem - Ignored, we always use sizeof (CHAR).
|
|
|
|
nCountToWrite - How long is this string.
|
|
|
|
fpOutputFile - File pointer to send the character.
|
|
|
|
Return Value:
|
|
|
|
The number of bytes written.
|
|
If the return does not equal nCountToWrite than an error has occured at
|
|
some point in the write.
|
|
|
|
--*/
|
|
|
|
{
|
|
WCHAR wchUniCharToWrite;
|
|
INT cCountWritten = 0;
|
|
|
|
UNREFERENCED_PARAMETER(nSizeOfItem);
|
|
|
|
//
|
|
// Write the string out as a two byte unicode string.
|
|
// For now do this with multiple calls to U_fputc.
|
|
//
|
|
|
|
while (nCountToWrite--) {
|
|
|
|
wchUniCharToWrite = RtlAnsiCharToUnicodeChar(&pchMBString);
|
|
|
|
//
|
|
// Write the current unicode char, break if an error occured.
|
|
//
|
|
|
|
if (U_fputc (wchUniCharToWrite, fpOutputFile) ==
|
|
(INT)wchUniCharToWrite) {
|
|
|
|
break;
|
|
}
|
|
|
|
cCountWritten++;
|
|
}
|
|
|
|
return (cCountWritten);
|
|
}
|
|
|
|
#endif // DBCS
|
|
|
|
|
|
#ifndef DBCS
|
|
// SHUNK: U_fwrite is not called from RC. Remove this for now.
|
|
|
|
INT
|
|
U_fwrite (
|
|
IN WCHAR *pwchUnicodeString,
|
|
IN INT nSizeOfItem,
|
|
IN INT nCountToWrite,
|
|
IN FILE *fpOutputFile
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function will write out a 16-bit string directly. It does no
|
|
translation on the string as it is written.
|
|
|
|
Arguments:
|
|
|
|
pchUnicodeString - This is a 16-bit unicode string to write to the file.
|
|
|
|
nSizeOfItem - Ignored. We always use sizeof (WCHAR).
|
|
|
|
nCountToWrite - How long is this string.
|
|
|
|
fpOutputFile - File pointer to send the character.
|
|
|
|
Return Value:
|
|
|
|
The number of bytes written.
|
|
If the return does not equal nCountToWrite than an error has occured at
|
|
some point in the write.
|
|
|
|
--*/
|
|
|
|
{
|
|
UNREFERENCED_PARAMETER(nSizeOfItem);
|
|
//
|
|
// Write the string out as a two byte unicode string.
|
|
//
|
|
|
|
return (fwrite (pwchUnicodeString, sizeof (WCHAR), nCountToWrite,
|
|
fpOutputFile));
|
|
}
|
|
|
|
#endif // DBCS
|
|
|
|
|
|
#ifndef DBCS
|
|
// SHUNK: A_fputc is not called from RC. Remove this for now.
|
|
|
|
INT
|
|
A_fputc (
|
|
IN CHAR chCharToWrite,
|
|
IN FILE *fpOutputFile
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is translates the character passed to it using the 1252
|
|
codepage and then sends it to U_fputc.
|
|
As of Aug 91, only codepage 1252 is being supported.
|
|
|
|
Arguments:
|
|
|
|
chCharToWrite - This is a 8-bit character to be output.
|
|
|
|
fpOutputFile - File pointer to send the character.
|
|
|
|
Return Value:
|
|
|
|
The character written.
|
|
EOF = There was some sort of error writing the data out.
|
|
|
|
--*/
|
|
|
|
{
|
|
WCHAR wchUniCharToWrite;
|
|
PUCHAR puch;
|
|
|
|
//
|
|
// Translate the char and write it as it's unicode equivalent.
|
|
//
|
|
|
|
puch = &chCharToWrite;
|
|
wchUniCharToWrite = RtlAnsiCharToUnicodeChar(&puch);
|
|
|
|
if (U_fputc (wchUniCharToWrite, fpOutputFile) == (INT)wchUniCharToWrite) {
|
|
|
|
return ((INT)chCharToWrite);
|
|
}
|
|
else {
|
|
|
|
return (EOF);
|
|
}
|
|
}
|
|
|
|
#endif // DBCS
|
|
|
|
|
|
#ifndef DBCS
|
|
// SHUNK: U_fputc is not called from RC. Remove this for now.
|
|
|
|
INT
|
|
U_fputc (
|
|
IN WCHAR wcCharToWrite,
|
|
IN FILE *fpOutputFile
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is simply the unicode version of fputc. It will output
|
|
a two byte character instead of the standard byte.
|
|
|
|
Arguments:
|
|
|
|
wcCharToWrite - This is a 16-bit unicode character to be output.
|
|
It is assumed that any codepage translation has
|
|
already been done to the character.
|
|
|
|
fpOutputFile - File pointer to send the character.
|
|
|
|
Return Value:
|
|
|
|
The character written.
|
|
EOF = There was some sort of error writing the data out.
|
|
|
|
--*/
|
|
|
|
{
|
|
INT cCountWritten;
|
|
|
|
//
|
|
// Write the char out as a two byte unicode character.
|
|
//
|
|
|
|
cCountWritten = fwrite (&wcCharToWrite, sizeof (WCHAR), 1, fpOutputFile);
|
|
|
|
if (cCountWritten == sizeof (WCHAR)) {
|
|
|
|
return (wcCharToWrite); // Successful write.
|
|
|
|
}
|
|
else {
|
|
|
|
#ifdef ASSERT_ERRORS
|
|
printf ("Error writing character in U_fputc\n");
|
|
exit (1);
|
|
#endif
|
|
return (EOF); // Some sort of error occured.
|
|
|
|
}
|
|
}
|
|
|
|
#endif // DBCS
|
|
|
|
|
|
BOOL
|
|
UnicodeFromMBString (
|
|
OUT WCHAR *pwchUnicodeString,
|
|
IN CHAR *pchMBString,
|
|
IN INT nCountStrLength
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function will translate a multi-byte string into it's unicode
|
|
equivalent. Note that the destination unicode string must be large
|
|
enough to hold the translated bytes.
|
|
As of Aug 91, only codepage 1252 is being supported.
|
|
|
|
Arguments:
|
|
|
|
pwchUnicodeString - This is a pointer to storage for the destination
|
|
unicode string. Note it must be nCountStrLength
|
|
large.
|
|
|
|
pchMBString - Pointer to the input multi-byte string to convert.
|
|
|
|
nCountStrLength - Count of bytes to translate.
|
|
|
|
Return Value:
|
|
|
|
TRUE - All of the characters mapped correctly into Unicode.
|
|
FALSE - One or more characters did not map. These characters have
|
|
been translated to 0xFFFF. The rest of the string has been
|
|
converted correctly.
|
|
|
|
--*/
|
|
|
|
{
|
|
#ifdef DBCS
|
|
NTSTATUS Status;
|
|
|
|
//
|
|
// Convert ANSI string to Unicode string based on ACP.
|
|
//
|
|
Status = xxxRtlMultiByteToUnicodeN(pwchUnicodeString,
|
|
NULL,
|
|
pchMBString,
|
|
nCountStrLength);
|
|
|
|
return(NT_SUCCESS(Status)? TRUE : FALSE);
|
|
#else // !DBCS
|
|
UNICODE_STRING Unicode;
|
|
ANSI_STRING Ansi;
|
|
|
|
Ansi.MaximumLength = Ansi.Length = nCountStrLength;
|
|
Unicode.MaximumLength = nCountStrLength*sizeof(WCHAR) + sizeof(WCHAR);
|
|
Ansi.Buffer = pchMBString;
|
|
Unicode.Buffer = pwchUnicodeString;
|
|
return RtlAnsiStringToUnicodeString(&Unicode,&Ansi,FALSE)==STATUS_SUCCESS;
|
|
#endif // !DBCS
|
|
|
|
}
|
|
|
|
|
|
|
|
BOOL
|
|
MBStringFromUnicode (
|
|
OUT CHAR *pchMBString,
|
|
IN WCHAR *pwchUnicodeString,
|
|
IN INT nCountStrLength
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function will translate a unicode string into a multi-byte string.
|
|
Note that the destination string must be large enough to hold the
|
|
translated bytes.
|
|
As of Aug 91, only the translation is simply done by truncating the
|
|
unicode character. We do this because we are not expecting anything
|
|
strange.
|
|
|
|
Arguments:
|
|
|
|
pwchUnicodeString - This is a pointer to storage for the destination
|
|
unicode string. Note it must be nCountStrLength
|
|
large.
|
|
|
|
pchMBString - Pointer to the input multi-byte string to convert.
|
|
|
|
nCountStrLength - Count of bytes to translate.
|
|
|
|
Return Value:
|
|
|
|
TRUE - All of the characters mapped correctly into the MB string.
|
|
FALSE - One or more characters did not map. As of Aug 91, this will
|
|
never happen.
|
|
|
|
--*/
|
|
|
|
{
|
|
#ifdef DBCS
|
|
NTSTATUS Status;
|
|
|
|
//
|
|
// Convert Unicode string to ANSI string based on ACP.
|
|
//
|
|
Status = xxxRtlUnicodeToMultiByteN(pchMBString,
|
|
NULL,
|
|
pwchUnicodeString,
|
|
nCountStrLength);
|
|
|
|
return(NT_SUCCESS(Status)? TRUE : FALSE);
|
|
#else // !DBCS
|
|
UNICODE_STRING Unicode;
|
|
ANSI_STRING Ansi;
|
|
|
|
Unicode.Length = nCountStrLength*sizeof(WCHAR);
|
|
Unicode.MaximumLength = nCountStrLength*sizeof(WCHAR)+sizeof(WCHAR);
|
|
Ansi.MaximumLength = Unicode.MaximumLength / sizeof(WCHAR);
|
|
Ansi.Buffer = pchMBString;
|
|
Unicode.Buffer = pwchUnicodeString;
|
|
return RtlUnicodeStringToAnsiString(&Ansi,&Unicode,FALSE)==STATUS_SUCCESS;
|
|
#endif // !DBCS
|
|
}
|
|
|
|
|
|
|
|
#ifndef DBCS
|
|
// SHUNK: Char1252FromUnicode() is not called any more.
|
|
|
|
INT
|
|
Char1252FromUnicode (
|
|
IN WCHAR wchUnicodeChar
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function will translate a unicode character into it's equivalent
|
|
codepage 1252 character. If the character does not map correctly,
|
|
then 0xFFFF is returned.
|
|
|
|
Arguments:
|
|
|
|
wchUnicodeChar - This is a 16-bit unicode character.
|
|
|
|
Return Value:
|
|
|
|
Value <= 0xFF - Codepage 1252 equivalent for this string.
|
|
0xFFFF - The character did not translate properly.
|
|
|
|
--*/
|
|
|
|
{
|
|
UNICODE_STRING Unicode;
|
|
ANSI_STRING Ansi;
|
|
UCHAR c;
|
|
INT s;
|
|
|
|
Ansi.Length = Unicode.Length = 1;
|
|
Ansi.MaximumLength = Unicode.MaximumLength = 1;
|
|
Ansi.Buffer = &c;
|
|
Unicode.Buffer = &wchUnicodeChar;
|
|
s = RtlUnicodeStringToAnsiString(&Ansi,&Unicode,FALSE);
|
|
if (s != STATUS_SUCCESS)
|
|
return 0xffff;
|
|
return (INT)c;
|
|
|
|
}
|
|
|
|
#endif // DBCS
|
|
|
|
|
|
INT
|
|
DetermineFileType (
|
|
IN FILE *fpInputFile
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is used to determine what type of file is being read.
|
|
Note that it assumes that the first few bytes of the given file contain
|
|
mostly ascii characters. This routine was originally intended for use
|
|
on .rc files and include files.
|
|
Note, the file is returned to it's proper position after function.
|
|
|
|
Arguments:
|
|
|
|
fpInputFile - File pointer to file we are checking, must be
|
|
open with read permissions.
|
|
|
|
Return Value:
|
|
|
|
DFT_FILE_IS_UNKNOWN - It was impossible to determine what type of file
|
|
we were checking. This usually happens when EOF
|
|
is unexpectedly reached.
|
|
DFT_FILE_IS_8_BIT - File was determined to be in standard 8-bit
|
|
format.
|
|
DFT_FILE_IS_16_BIT - File was determined to be a 16 bit unicode file
|
|
which can be directly read into a WCHAR array.
|
|
DFT_FILE_IS_16_BIT_REV - File was*/
|
|
|
|
{
|
|
CHAR rgchTestBytes [DFT_TEST_SIZE << 2]; // Storage for test data.
|
|
|
|
INT cNumberBytesTested = 0; // Test information.
|
|
|
|
INT cNumberOddZerosFound = 0;
|
|
INT cNumberEvenZerosFound = 0;
|
|
INT cNumberAsciiFound = 0;
|
|
INT cCountRead; // Temp storage for count read.
|
|
|
|
LONG lStartFilePos; // Storage for file position.
|
|
|
|
INT fSysEndianType; // System endian type.
|
|
|
|
INT fFileType = DFT_FILE_IS_UNKNOWN;// File type, when found.
|
|
|
|
fSysEndianType = DetermineSysEndianType ();
|
|
|
|
//
|
|
// Store position so we can get back to it.
|
|
//
|
|
lStartFilePos = ftell (fpInputFile);
|
|
|
|
//
|
|
// Make sure we start on an even byte to simplify routines.
|
|
//
|
|
if (lStartFilePos % 2) {
|
|
|
|
fgetc (fpInputFile);
|
|
}
|
|
|
|
do {
|
|
INT wT;
|
|
|
|
//
|
|
// Read in the first test segment.
|
|
//
|
|
|
|
cCountRead = fread (rgchTestBytes, sizeof (CHAR), DFT_TEST_SIZE << 2,
|
|
fpInputFile);
|
|
|
|
//
|
|
// Determine results and add to totals.
|
|
//
|
|
|
|
for (wT = 0; wT < cCountRead; wT++) {
|
|
|
|
if (rgchTestBytes [wT] == 0) {
|
|
|
|
if (wT % 2) {
|
|
|
|
cNumberOddZerosFound++;
|
|
|
|
}
|
|
else {
|
|
|
|
cNumberEvenZerosFound++;
|
|
}
|
|
}
|
|
|
|
if (isprint (rgchTestBytes [wT]) ||
|
|
rgchTestBytes[wT] == '\t' ||
|
|
rgchTestBytes[wT] == '\n' ||
|
|
rgchTestBytes[wT] == '\r') {
|
|
|
|
cNumberAsciiFound++;
|
|
}
|
|
}
|
|
|
|
cNumberBytesTested += cCountRead;
|
|
|
|
//
|
|
// Check if we have a definite pattern.
|
|
//
|
|
|
|
{
|
|
INT cMajorityTested; // 80% of the bytes tested.
|
|
|
|
cMajorityTested = cNumberBytesTested << 2;
|
|
cMajorityTested /= 5;
|
|
|
|
if (cNumberAsciiFound > cMajorityTested) {
|
|
|
|
fFileType = DFT_FILE_IS_8_BIT;
|
|
|
|
}
|
|
else if (cNumberOddZerosFound > (cMajorityTested >> 1)) {
|
|
|
|
//
|
|
// File type was determined to be little endian.
|
|
// If system is also little endian, byte order is correct.
|
|
//
|
|
fFileType = (fSysEndianType == DSE_SYS_LITTLE_ENDIAN) ?
|
|
DFT_FILE_IS_16_BIT : DFT_FILE_IS_16_BIT_REV;
|
|
|
|
}
|
|
else if (cNumberEvenZerosFound > (cMajorityTested >> 1)) {
|
|
|
|
//
|
|
// File type was determined to be big endian.
|
|
// If system is also big endian, byte order is correct.
|
|
//
|
|
fFileType = (fSysEndianType == DSE_SYS_LITTLE_ENDIAN) ?
|
|
DFT_FILE_IS_16_BIT_REV : DFT_FILE_IS_16_BIT;
|
|
|
|
}
|
|
}
|
|
|
|
} while (cCountRead == (DFT_TEST_SIZE << 2) &&
|
|
fFileType == DFT_FILE_IS_UNKNOWN);
|
|
|
|
//
|
|
// Return to starting file position. (usually beginning)
|
|
//
|
|
|
|
fseek (fpInputFile, lStartFilePos, SEEK_SET);
|
|
|
|
return (fFileType);
|
|
}
|
|
|
|
|
|
|
|
INT
|
|
DetermineSysEndianType (
|
|
VOID
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is used to determine how the current system stores its
|
|
integers in memory.
|
|
|
|
For those of us who are confused by little endian and big endian formats,
|
|
here is a breif recap.
|
|
|
|
Little Endian: (This is used on Intel 80x86 chips. The MIPS RS4000 chip
|
|
is switchable, but will run in little endian format for NT.)
|
|
This is where the high order bytes of a short or long are stored higher
|
|
in memory. For example the number 0x80402010 is stored as follows.
|
|
Address: Value:
|
|
00 10
|
|
01 20
|
|
02 40
|
|
03 80
|
|
This looks backwards when memory is dumped in order: 10 20 40 80
|
|
|
|
Big Endian: (This is not currently used on any NT systems but hey, this
|
|
is supposed to be portable!!)
|
|
This is where the high*/
|
|
|
|
{
|
|
INT nCheckInteger;
|
|
CHAR rgchTestBytes [sizeof (INT)];
|
|
|
|
//
|
|
// Clear the test bytes to zero.
|
|
//
|
|
|
|
*((INT * )rgchTestBytes) = 0;
|
|
|
|
//
|
|
// Set first to some value.
|
|
//
|
|
|
|
rgchTestBytes [0] = (CHAR)0xFF;
|
|
|
|
//
|
|
// Map it to an integer.
|
|
//
|
|
|
|
nCheckInteger = *((INT * )rgchTestBytes);
|
|
|
|
//
|
|
// See if value was stored in low order of integer.
|
|
// If so then system is little endian.
|
|
//
|
|
|
|
if (nCheckInteger == 0xFF) {
|
|
|
|
return (DSE_SYS_LITTLE_ENDIAN);
|
|
}
|
|
else {
|
|
|
|
return (DSE_SYS_LITTLE_ENDIAN);
|
|
}
|
|
|
|
}
|
|
|
|
|