/*++ Copyright (c) 1993 Microsoft Corporation Module Name: Unitext.c Abstract: Main module for unicode <--> ansi/oem text file translator. This program converts files between unicode and multibyte character sets (ansi or oem). Usage is a follows: unitext [-m|-u] [-o|-a|-] [-z] Author: Ted Miller (tedm) 16-June-1993 Revision History: --*/ #include "unitext.h" #include // // Globals and prototypes for use within this module. // // // Unicode argc/argv. // int _argcW; PWCHAR *_argvW; // // Codepage for multibyte file. // DWORD CodePage = (DWORD)(-1); // // File handles. // HANDLE SourceFileHandle,TargetFileHandle; // // Size of source file. // DWORD SourceFileSize; // // Type of the multibyte file (source or destination). // DWORD MultibyteType = TFILE_NONE; // // Conversion type. // DWORD ConversionType = CONVERT_NONE; DWORD ConversionOption = CHECK_NONE; DWORD ConversionCheck = CHECK_NONE; // // Filenames. // LPWSTR SourceFilename = NULL, TargetFilename = NULL; BOOL _ParseCommandLineArgs( VOID ); VOID _CheckFilesAndOpen( VOID ); VOID __cdecl main( VOID ) { // // Get command line arguments. // if(!InitializeUnicodeArguments(&_argcW,&_argvW)) { ErrorAbort(MSG_INSUFFICIENT_MEMORY); } // // Parse command line arguments. // if(!_ParseCommandLineArgs()) { ErrorAbort(MSG_USAGE); } // // Check source and destination files. // _CheckFilesAndOpen(); // // Perform conversion. // switch(ConversionType) { case MB_TO_UNICODE: MultibyteTextFileToUnicode( SourceFilename, TargetFilename, SourceFileHandle, TargetFileHandle, SourceFileSize, CodePage ); break; case UNICODE_TO_MB: UnicodeTextFileToMultibyte( SourceFilename, TargetFilename, SourceFileHandle, TargetFileHandle, SourceFileSize, CodePage ); break; } CloseHandle(SourceFileHandle); CloseHandle(TargetFileHandle); // // Clean up and exit. // FreeUnicodeArguments(_argcW,_argvW); } BOOL _ParseCommandLineArgs( VOID ) /*++ Routine Description: Parse command line arguments. Arguments: None. Uses globals _argcW and _argvW. Return Value: FALSE if invalid arguments specified. --*/ { int argc; PWCHAR *argv; PWCHAR arg; // // Initialize local variables. // argc = _argcW; argv = _argvW; // // Skip argv[0] (the program name). // if(argc) { argc--; argv++; } while(argc) { arg = *argv; if((*arg == L'-') || (*arg == L'/')) { switch(*(++arg)) { case L'a': case L'A': // if already specifed, error if(MultibyteType != TFILE_NONE) { return(FALSE); } MultibyteType = TFILE_ANSI; break; case L'o': case L'O': // if already specifed, error if(MultibyteType != TFILE_NONE) { return(FALSE); } MultibyteType = TFILE_OEM; break; case L'm': case L'M': if(ConversionType != CONVERT_NONE) { return(FALSE); } ConversionType = MB_TO_UNICODE; break; case L'u': case L'U': if(ConversionType != CONVERT_NONE) { return(FALSE); } ConversionType = UNICODE_TO_MB; break; case L'z': case L'Z': if(ConversionCheck != CHECK_NONE) { return(FALSE); } ConversionCheck = CHECK_CONVERSION; break; default: if(iswdigit(*arg)) { if((CodePage != (DWORD)(-1)) || (MultibyteType != TFILE_NONE)) { return(FALSE); } swscanf(arg,L"%u",&CodePage); MultibyteType = TFILE_USERCP; } else { return(FALSE); } break; } } else { if(SourceFilename == NULL) { SourceFilename = arg; } else if(TargetFilename == NULL) { TargetFilename = arg; } else { return(FALSE); } } argv++; argc--; } // // Must have source, destination filenames. // if(!SourceFilename || !TargetFilename) { return(FALSE); } return(TRUE); } VOID _CheckFilesAndOpen( VOID ) /*++ Routine Description: Open the source and destination files, and try to make a guess about the type of the source file. If we think the source file is a different type than the user specified, print a warning. Also check the codepage given by the user. Arguments: None. Return Value: None. Does not return if a serious error occurs. --*/ { DWORD SourceFileType; UCHAR FirstPartOfSource[256]; DWORD ReadSize; // // Determine and check codepage. Default to oem. // switch(MultibyteType) { case TFILE_ANSI: CodePage = GetACP(); case TFILE_USERCP: break; default: // oem or none. CodePage = GetOEMCP(); break; } if(!IsValidCodePage(CodePage)) { ErrorAbort(MSG_BAD_CODEPAGE,CodePage); } // // Try to open the source file. // SourceFileHandle = CreateFileW( SourceFilename, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL ); if(SourceFileHandle == INVALID_HANDLE_VALUE) { ErrorAbort(MSG_CANT_OPEN_SOURCE,SourceFilename,GetLastError()); } // // Attempt to determine to determine the size of the source file. // SourceFileSize = GetFileSize(SourceFileHandle,NULL); if(SourceFileSize == -1) { ErrorAbort(MSG_CANT_GET_SIZE,SourceFilename,GetLastError()); } // // Filter out 0-length files here. // if(!SourceFileSize) { ErrorAbort(MSG_ZERO_LENGTH,SourceFilename); } // // Assume multibyte. // SourceFileType = TFILE_MULTIBYTE; // // Read first 256 bytes of file and call win32 api // to determine if the text is probably unicode. // ReadSize = min(SourceFileSize,256); MyReadFile(SourceFileHandle,FirstPartOfSource,ReadSize,SourceFilename); if(IsTextUnicode(FirstPartOfSource,ReadSize,NULL)) { SourceFileType = TFILE_UNICODE; } // // If the user did not specify a conversion type, set it here // based on the above test. // if(ConversionType == CONVERT_NONE) { ConversionType = (SourceFileType == TFILE_UNICODE) ? UNICODE_TO_MB : MB_TO_UNICODE; } else { if(ConversionCheck == CHECK_CONVERSION) { if(ConversionType == UNICODE_TO_MB) { ConversionOption = CHECK_IF_NOT_UNICODE; } else if(ConversionType == MB_TO_UNICODE) { ConversionOption = CHECK_ALREADY_UNICODE; } else { ConversionOption = CHECK_NONE; } } // // check if the file is UNICODE and we are trying to convert from MB_TO_UNICODE // then issue an warning and exit if((ConversionType == MB_TO_UNICODE) && (SourceFileType == TFILE_UNICODE) && (ConversionOption == CHECK_ALREADY_UNICODE)) { CloseHandle(SourceFileHandle); MsgPrintfW(MSG_ERR_SRC_IS_UNICODE,SourceFilename); FreeUnicodeArguments(_argcW,_argvW); exit(0); } // // check if the file is not unicode and if we are trying to convert from // unicode to MB, then issue an warning and exit if((ConversionType == UNICODE_TO_MB) && (SourceFileType != TFILE_UNICODE) && (ConversionOption == CHECK_IF_NOT_UNICODE)) { CloseHandle(SourceFileHandle); MsgPrintfW(MSG_ERR_SRC_IS_MB,SourceFilename); FreeUnicodeArguments(_argcW,_argvW); exit(0); } // // Check to see if what we guessed is what the user asked for. // If not, issue a warning. // if((ConversionType == UNICODE_TO_MB) && (SourceFileType != TFILE_UNICODE)) { MsgPrintfW(MSG_WARN_SRC_IS_MB,SourceFilename); } else { if((ConversionType == MB_TO_UNICODE) && (SourceFileType == TFILE_UNICODE)) { MsgPrintfW(MSG_WARN_SRC_IS_UNICODE,SourceFilename); } } } // // Try to create target file. // TargetFileHandle = CreateFileW( TargetFilename, GENERIC_READ | GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL ); if(TargetFileHandle == INVALID_HANDLE_VALUE) { ErrorAbort(MSG_CANT_OPEN_TARGET,TargetFilename,GetLastError()); } }