You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
485 lines
9.9 KiB
485 lines
9.9 KiB
/*++
|
|
|
|
Copyright (c) 1993 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
Unitext.c
|
|
|
|
Abstract:
|
|
|
|
Main module for unicode <--> ansi/oem text file translator.
|
|
|
|
This program converts files between unicode and multibyte
|
|
character sets (ansi or oem). Usage is a follows:
|
|
|
|
unitext [-m|-u] [-o|-a|-<nnn>] [-z] <src_file> <dst_file>
|
|
|
|
Author:
|
|
|
|
Ted Miller (tedm) 16-June-1993
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
#include "unitext.h"
|
|
#include <wchar.h>
|
|
|
|
|
|
//
|
|
// Globals and prototypes for use within this module.
|
|
//
|
|
|
|
//
|
|
// Unicode argc/argv.
|
|
//
|
|
int _argcW;
|
|
PWCHAR *_argvW;
|
|
|
|
//
|
|
// Codepage for multibyte file.
|
|
//
|
|
DWORD CodePage = (DWORD)(-1);
|
|
|
|
//
|
|
// File handles.
|
|
//
|
|
HANDLE SourceFileHandle,TargetFileHandle;
|
|
|
|
//
|
|
// Size of source file.
|
|
//
|
|
DWORD SourceFileSize;
|
|
|
|
//
|
|
// Type of the multibyte file (source or destination).
|
|
//
|
|
DWORD MultibyteType = TFILE_NONE;
|
|
|
|
//
|
|
// Conversion type.
|
|
//
|
|
DWORD ConversionType = CONVERT_NONE;
|
|
DWORD ConversionOption = CHECK_NONE;
|
|
DWORD ConversionCheck = CHECK_NONE;
|
|
|
|
//
|
|
// Filenames.
|
|
//
|
|
LPWSTR SourceFilename = NULL,
|
|
TargetFilename = NULL;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BOOL
|
|
_ParseCommandLineArgs(
|
|
VOID
|
|
);
|
|
|
|
VOID
|
|
_CheckFilesAndOpen(
|
|
VOID
|
|
);
|
|
|
|
|
|
VOID
|
|
__cdecl
|
|
main(
|
|
VOID
|
|
)
|
|
{
|
|
//
|
|
// Get command line arguments.
|
|
//
|
|
if(!InitializeUnicodeArguments(&_argcW,&_argvW)) {
|
|
ErrorAbort(MSG_INSUFFICIENT_MEMORY);
|
|
}
|
|
|
|
//
|
|
// Parse command line arguments.
|
|
//
|
|
if(!_ParseCommandLineArgs()) {
|
|
ErrorAbort(MSG_USAGE);
|
|
}
|
|
|
|
//
|
|
// Check source and destination files.
|
|
//
|
|
_CheckFilesAndOpen();
|
|
|
|
|
|
//
|
|
// Perform conversion.
|
|
//
|
|
switch(ConversionType) {
|
|
|
|
case MB_TO_UNICODE:
|
|
|
|
MultibyteTextFileToUnicode(
|
|
SourceFilename,
|
|
TargetFilename,
|
|
SourceFileHandle,
|
|
TargetFileHandle,
|
|
SourceFileSize,
|
|
CodePage
|
|
);
|
|
|
|
break;
|
|
|
|
case UNICODE_TO_MB:
|
|
|
|
UnicodeTextFileToMultibyte(
|
|
SourceFilename,
|
|
TargetFilename,
|
|
SourceFileHandle,
|
|
TargetFileHandle,
|
|
SourceFileSize,
|
|
CodePage
|
|
);
|
|
|
|
break;
|
|
}
|
|
|
|
CloseHandle(SourceFileHandle);
|
|
CloseHandle(TargetFileHandle);
|
|
|
|
//
|
|
// Clean up and exit.
|
|
//
|
|
FreeUnicodeArguments(_argcW,_argvW);
|
|
}
|
|
|
|
|
|
|
|
|
|
BOOL
|
|
_ParseCommandLineArgs(
|
|
VOID
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Parse command line arguments.
|
|
|
|
Arguments:
|
|
|
|
None. Uses globals _argcW and _argvW.
|
|
|
|
Return Value:
|
|
|
|
FALSE if invalid arguments specified.
|
|
|
|
--*/
|
|
|
|
{
|
|
int argc;
|
|
PWCHAR *argv;
|
|
PWCHAR arg;
|
|
|
|
|
|
//
|
|
// Initialize local variables.
|
|
//
|
|
argc = _argcW;
|
|
argv = _argvW;
|
|
|
|
//
|
|
// Skip argv[0] (the program name).
|
|
//
|
|
if(argc) {
|
|
argc--;
|
|
argv++;
|
|
}
|
|
|
|
while(argc) {
|
|
|
|
arg = *argv;
|
|
|
|
if((*arg == L'-') || (*arg == L'/')) {
|
|
|
|
switch(*(++arg)) {
|
|
|
|
case L'a':
|
|
case L'A':
|
|
|
|
// if already specifed, error
|
|
if(MultibyteType != TFILE_NONE) {
|
|
return(FALSE);
|
|
}
|
|
MultibyteType = TFILE_ANSI;
|
|
break;
|
|
|
|
case L'o':
|
|
case L'O':
|
|
|
|
// if already specifed, error
|
|
if(MultibyteType != TFILE_NONE) {
|
|
return(FALSE);
|
|
}
|
|
MultibyteType = TFILE_OEM;
|
|
break;
|
|
|
|
case L'm':
|
|
case L'M':
|
|
|
|
if(ConversionType != CONVERT_NONE) {
|
|
return(FALSE);
|
|
}
|
|
|
|
ConversionType = MB_TO_UNICODE;
|
|
break;
|
|
|
|
case L'u':
|
|
case L'U':
|
|
|
|
if(ConversionType != CONVERT_NONE) {
|
|
return(FALSE);
|
|
}
|
|
|
|
ConversionType = UNICODE_TO_MB;
|
|
break;
|
|
|
|
case L'z':
|
|
case L'Z':
|
|
|
|
if(ConversionCheck != CHECK_NONE) {
|
|
return(FALSE);
|
|
}
|
|
|
|
ConversionCheck = CHECK_CONVERSION;
|
|
break;
|
|
|
|
default:
|
|
|
|
if(iswdigit(*arg)) {
|
|
|
|
if((CodePage != (DWORD)(-1)) || (MultibyteType != TFILE_NONE)) {
|
|
return(FALSE);
|
|
}
|
|
|
|
swscanf(arg,L"%u",&CodePage);
|
|
|
|
MultibyteType = TFILE_USERCP;
|
|
|
|
} else {
|
|
|
|
return(FALSE);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
} else {
|
|
|
|
if(SourceFilename == NULL) {
|
|
|
|
SourceFilename = arg;
|
|
|
|
} else if(TargetFilename == NULL) {
|
|
|
|
TargetFilename = arg;
|
|
|
|
} else {
|
|
|
|
return(FALSE);
|
|
}
|
|
|
|
}
|
|
|
|
argv++;
|
|
argc--;
|
|
}
|
|
|
|
//
|
|
// Must have source, destination filenames.
|
|
//
|
|
if(!SourceFilename || !TargetFilename) {
|
|
return(FALSE);
|
|
}
|
|
|
|
return(TRUE);
|
|
}
|
|
|
|
|
|
|
|
|
|
VOID
|
|
_CheckFilesAndOpen(
|
|
VOID
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Open the source and destination files, and try to make a guess
|
|
about the type of the source file. If we think the source file is
|
|
a different type than the user specified, print a warning.
|
|
|
|
Also check the codepage given by the user.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Return Value:
|
|
|
|
None. Does not return if a serious error occurs.
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD SourceFileType;
|
|
UCHAR FirstPartOfSource[256];
|
|
DWORD ReadSize;
|
|
|
|
//
|
|
// Determine and check codepage. Default to oem.
|
|
//
|
|
switch(MultibyteType) {
|
|
case TFILE_ANSI:
|
|
CodePage = GetACP();
|
|
case TFILE_USERCP:
|
|
break;
|
|
default: // oem or none.
|
|
CodePage = GetOEMCP();
|
|
break;
|
|
}
|
|
|
|
if(!IsValidCodePage(CodePage)) {
|
|
ErrorAbort(MSG_BAD_CODEPAGE,CodePage);
|
|
}
|
|
|
|
//
|
|
// Try to open the source file.
|
|
//
|
|
SourceFileHandle = CreateFileW(
|
|
SourceFilename,
|
|
GENERIC_READ,
|
|
FILE_SHARE_READ,
|
|
NULL,
|
|
OPEN_EXISTING,
|
|
0,
|
|
NULL
|
|
);
|
|
|
|
|
|
if(SourceFileHandle == INVALID_HANDLE_VALUE) {
|
|
ErrorAbort(MSG_CANT_OPEN_SOURCE,SourceFilename,GetLastError());
|
|
}
|
|
|
|
//
|
|
// Attempt to determine to determine the size of the source file.
|
|
//
|
|
SourceFileSize = GetFileSize(SourceFileHandle,NULL);
|
|
if(SourceFileSize == -1) {
|
|
ErrorAbort(MSG_CANT_GET_SIZE,SourceFilename,GetLastError());
|
|
}
|
|
|
|
//
|
|
// Filter out 0-length files here.
|
|
//
|
|
if(!SourceFileSize) {
|
|
ErrorAbort(MSG_ZERO_LENGTH,SourceFilename);
|
|
}
|
|
|
|
//
|
|
// Assume multibyte.
|
|
//
|
|
SourceFileType = TFILE_MULTIBYTE;
|
|
|
|
//
|
|
// Read first 256 bytes of file and call win32 api
|
|
// to determine if the text is probably unicode.
|
|
//
|
|
ReadSize = min(SourceFileSize,256);
|
|
MyReadFile(SourceFileHandle,FirstPartOfSource,ReadSize,SourceFilename);
|
|
if(IsTextUnicode(FirstPartOfSource,ReadSize,NULL)) {
|
|
SourceFileType = TFILE_UNICODE;
|
|
}
|
|
|
|
//
|
|
// If the user did not specify a conversion type, set it here
|
|
// based on the above test.
|
|
//
|
|
if(ConversionType == CONVERT_NONE) {
|
|
|
|
ConversionType = (SourceFileType == TFILE_UNICODE)
|
|
? UNICODE_TO_MB
|
|
: MB_TO_UNICODE;
|
|
} else {
|
|
|
|
if(ConversionCheck == CHECK_CONVERSION) {
|
|
if(ConversionType == UNICODE_TO_MB) {
|
|
ConversionOption = CHECK_IF_NOT_UNICODE;
|
|
}
|
|
else if(ConversionType == MB_TO_UNICODE) {
|
|
ConversionOption = CHECK_ALREADY_UNICODE;
|
|
}
|
|
else {
|
|
ConversionOption = CHECK_NONE;
|
|
}
|
|
}
|
|
|
|
//
|
|
// check if the file is UNICODE and we are trying to convert from MB_TO_UNICODE
|
|
// then issue an warning and exit
|
|
|
|
if((ConversionType == MB_TO_UNICODE) &&
|
|
(SourceFileType == TFILE_UNICODE) &&
|
|
(ConversionOption == CHECK_ALREADY_UNICODE)) {
|
|
CloseHandle(SourceFileHandle);
|
|
MsgPrintfW(MSG_ERR_SRC_IS_UNICODE,SourceFilename);
|
|
FreeUnicodeArguments(_argcW,_argvW);
|
|
exit(0);
|
|
}
|
|
|
|
//
|
|
// check if the file is not unicode and if we are trying to convert from
|
|
// unicode to MB, then issue an warning and exit
|
|
|
|
if((ConversionType == UNICODE_TO_MB) &&
|
|
(SourceFileType != TFILE_UNICODE) &&
|
|
(ConversionOption == CHECK_IF_NOT_UNICODE)) {
|
|
CloseHandle(SourceFileHandle);
|
|
MsgPrintfW(MSG_ERR_SRC_IS_MB,SourceFilename);
|
|
FreeUnicodeArguments(_argcW,_argvW);
|
|
exit(0);
|
|
}
|
|
//
|
|
// Check to see if what we guessed is what the user asked for.
|
|
// If not, issue a warning.
|
|
//
|
|
|
|
if((ConversionType == UNICODE_TO_MB) && (SourceFileType != TFILE_UNICODE)) {
|
|
MsgPrintfW(MSG_WARN_SRC_IS_MB,SourceFilename);
|
|
} else {
|
|
if((ConversionType == MB_TO_UNICODE) && (SourceFileType == TFILE_UNICODE)) {
|
|
MsgPrintfW(MSG_WARN_SRC_IS_UNICODE,SourceFilename);
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Try to create target file.
|
|
//
|
|
TargetFileHandle = CreateFileW(
|
|
TargetFilename,
|
|
GENERIC_READ | GENERIC_WRITE,
|
|
0,
|
|
NULL,
|
|
CREATE_ALWAYS,
|
|
FILE_ATTRIBUTE_NORMAL,
|
|
NULL
|
|
);
|
|
|
|
if(TargetFileHandle == INVALID_HANDLE_VALUE) {
|
|
ErrorAbort(MSG_CANT_OPEN_TARGET,TargetFilename,GetLastError());
|
|
}
|
|
}
|