Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

485 lines
9.9 KiB

  1. /*++
  2. Copyright (c) 1993 Microsoft Corporation
  3. Module Name:
  4. Unitext.c
  5. Abstract:
  6. Main module for unicode <--> ansi/oem text file translator.
  7. This program converts files between unicode and multibyte
  8. character sets (ansi or oem). Usage is a follows:
  9. unitext [-m|-u] [-o|-a|-<nnn>] [-z] <src_file> <dst_file>
  10. Author:
  11. Ted Miller (tedm) 16-June-1993
  12. Revision History:
  13. --*/
  14. #include "unitext.h"
  15. #include <wchar.h>
  16. //
  17. // Globals and prototypes for use within this module.
  18. //
  19. //
  20. // Unicode argc/argv.
  21. //
  22. int _argcW;
  23. PWCHAR *_argvW;
  24. //
  25. // Codepage for multibyte file.
  26. //
  27. DWORD CodePage = (DWORD)(-1);
  28. //
  29. // File handles.
  30. //
  31. HANDLE SourceFileHandle,TargetFileHandle;
  32. //
  33. // Size of source file.
  34. //
  35. DWORD SourceFileSize;
  36. //
  37. // Type of the multibyte file (source or destination).
  38. //
  39. DWORD MultibyteType = TFILE_NONE;
  40. //
  41. // Conversion type.
  42. //
  43. DWORD ConversionType = CONVERT_NONE;
  44. DWORD ConversionOption = CHECK_NONE;
  45. DWORD ConversionCheck = CHECK_NONE;
  46. //
  47. // Filenames.
  48. //
  49. LPWSTR SourceFilename = NULL,
  50. TargetFilename = NULL;
  51. BOOL
  52. _ParseCommandLineArgs(
  53. VOID
  54. );
  55. VOID
  56. _CheckFilesAndOpen(
  57. VOID
  58. );
  59. VOID
  60. __cdecl
  61. main(
  62. VOID
  63. )
  64. {
  65. //
  66. // Get command line arguments.
  67. //
  68. if(!InitializeUnicodeArguments(&_argcW,&_argvW)) {
  69. ErrorAbort(MSG_INSUFFICIENT_MEMORY);
  70. }
  71. //
  72. // Parse command line arguments.
  73. //
  74. if(!_ParseCommandLineArgs()) {
  75. ErrorAbort(MSG_USAGE);
  76. }
  77. //
  78. // Check source and destination files.
  79. //
  80. _CheckFilesAndOpen();
  81. //
  82. // Perform conversion.
  83. //
  84. switch(ConversionType) {
  85. case MB_TO_UNICODE:
  86. MultibyteTextFileToUnicode(
  87. SourceFilename,
  88. TargetFilename,
  89. SourceFileHandle,
  90. TargetFileHandle,
  91. SourceFileSize,
  92. CodePage
  93. );
  94. break;
  95. case UNICODE_TO_MB:
  96. UnicodeTextFileToMultibyte(
  97. SourceFilename,
  98. TargetFilename,
  99. SourceFileHandle,
  100. TargetFileHandle,
  101. SourceFileSize,
  102. CodePage
  103. );
  104. break;
  105. }
  106. CloseHandle(SourceFileHandle);
  107. CloseHandle(TargetFileHandle);
  108. //
  109. // Clean up and exit.
  110. //
  111. FreeUnicodeArguments(_argcW,_argvW);
  112. }
  113. BOOL
  114. _ParseCommandLineArgs(
  115. VOID
  116. )
  117. /*++
  118. Routine Description:
  119. Parse command line arguments.
  120. Arguments:
  121. None. Uses globals _argcW and _argvW.
  122. Return Value:
  123. FALSE if invalid arguments specified.
  124. --*/
  125. {
  126. int argc;
  127. PWCHAR *argv;
  128. PWCHAR arg;
  129. //
  130. // Initialize local variables.
  131. //
  132. argc = _argcW;
  133. argv = _argvW;
  134. //
  135. // Skip argv[0] (the program name).
  136. //
  137. if(argc) {
  138. argc--;
  139. argv++;
  140. }
  141. while(argc) {
  142. arg = *argv;
  143. if((*arg == L'-') || (*arg == L'/')) {
  144. switch(*(++arg)) {
  145. case L'a':
  146. case L'A':
  147. // if already specifed, error
  148. if(MultibyteType != TFILE_NONE) {
  149. return(FALSE);
  150. }
  151. MultibyteType = TFILE_ANSI;
  152. break;
  153. case L'o':
  154. case L'O':
  155. // if already specifed, error
  156. if(MultibyteType != TFILE_NONE) {
  157. return(FALSE);
  158. }
  159. MultibyteType = TFILE_OEM;
  160. break;
  161. case L'm':
  162. case L'M':
  163. if(ConversionType != CONVERT_NONE) {
  164. return(FALSE);
  165. }
  166. ConversionType = MB_TO_UNICODE;
  167. break;
  168. case L'u':
  169. case L'U':
  170. if(ConversionType != CONVERT_NONE) {
  171. return(FALSE);
  172. }
  173. ConversionType = UNICODE_TO_MB;
  174. break;
  175. case L'z':
  176. case L'Z':
  177. if(ConversionCheck != CHECK_NONE) {
  178. return(FALSE);
  179. }
  180. ConversionCheck = CHECK_CONVERSION;
  181. break;
  182. default:
  183. if(iswdigit(*arg)) {
  184. if((CodePage != (DWORD)(-1)) || (MultibyteType != TFILE_NONE)) {
  185. return(FALSE);
  186. }
  187. swscanf(arg,L"%u",&CodePage);
  188. MultibyteType = TFILE_USERCP;
  189. } else {
  190. return(FALSE);
  191. }
  192. break;
  193. }
  194. } else {
  195. if(SourceFilename == NULL) {
  196. SourceFilename = arg;
  197. } else if(TargetFilename == NULL) {
  198. TargetFilename = arg;
  199. } else {
  200. return(FALSE);
  201. }
  202. }
  203. argv++;
  204. argc--;
  205. }
  206. //
  207. // Must have source, destination filenames.
  208. //
  209. if(!SourceFilename || !TargetFilename) {
  210. return(FALSE);
  211. }
  212. return(TRUE);
  213. }
  214. VOID
  215. _CheckFilesAndOpen(
  216. VOID
  217. )
  218. /*++
  219. Routine Description:
  220. Open the source and destination files, and try to make a guess
  221. about the type of the source file. If we think the source file is
  222. a different type than the user specified, print a warning.
  223. Also check the codepage given by the user.
  224. Arguments:
  225. None.
  226. Return Value:
  227. None. Does not return if a serious error occurs.
  228. --*/
  229. {
  230. DWORD SourceFileType;
  231. UCHAR FirstPartOfSource[256];
  232. DWORD ReadSize;
  233. //
  234. // Determine and check codepage. Default to oem.
  235. //
  236. switch(MultibyteType) {
  237. case TFILE_ANSI:
  238. CodePage = GetACP();
  239. case TFILE_USERCP:
  240. break;
  241. default: // oem or none.
  242. CodePage = GetOEMCP();
  243. break;
  244. }
  245. if(!IsValidCodePage(CodePage)) {
  246. ErrorAbort(MSG_BAD_CODEPAGE,CodePage);
  247. }
  248. //
  249. // Try to open the source file.
  250. //
  251. SourceFileHandle = CreateFileW(
  252. SourceFilename,
  253. GENERIC_READ,
  254. FILE_SHARE_READ,
  255. NULL,
  256. OPEN_EXISTING,
  257. 0,
  258. NULL
  259. );
  260. if(SourceFileHandle == INVALID_HANDLE_VALUE) {
  261. ErrorAbort(MSG_CANT_OPEN_SOURCE,SourceFilename,GetLastError());
  262. }
  263. //
  264. // Attempt to determine to determine the size of the source file.
  265. //
  266. SourceFileSize = GetFileSize(SourceFileHandle,NULL);
  267. if(SourceFileSize == -1) {
  268. ErrorAbort(MSG_CANT_GET_SIZE,SourceFilename,GetLastError());
  269. }
  270. //
  271. // Filter out 0-length files here.
  272. //
  273. if(!SourceFileSize) {
  274. ErrorAbort(MSG_ZERO_LENGTH,SourceFilename);
  275. }
  276. //
  277. // Assume multibyte.
  278. //
  279. SourceFileType = TFILE_MULTIBYTE;
  280. //
  281. // Read first 256 bytes of file and call win32 api
  282. // to determine if the text is probably unicode.
  283. //
  284. ReadSize = min(SourceFileSize,256);
  285. MyReadFile(SourceFileHandle,FirstPartOfSource,ReadSize,SourceFilename);
  286. if(IsTextUnicode(FirstPartOfSource,ReadSize,NULL)) {
  287. SourceFileType = TFILE_UNICODE;
  288. }
  289. //
  290. // If the user did not specify a conversion type, set it here
  291. // based on the above test.
  292. //
  293. if(ConversionType == CONVERT_NONE) {
  294. ConversionType = (SourceFileType == TFILE_UNICODE)
  295. ? UNICODE_TO_MB
  296. : MB_TO_UNICODE;
  297. } else {
  298. if(ConversionCheck == CHECK_CONVERSION) {
  299. if(ConversionType == UNICODE_TO_MB) {
  300. ConversionOption = CHECK_IF_NOT_UNICODE;
  301. }
  302. else if(ConversionType == MB_TO_UNICODE) {
  303. ConversionOption = CHECK_ALREADY_UNICODE;
  304. }
  305. else {
  306. ConversionOption = CHECK_NONE;
  307. }
  308. }
  309. //
  310. // check if the file is UNICODE and we are trying to convert from MB_TO_UNICODE
  311. // then issue an warning and exit
  312. if((ConversionType == MB_TO_UNICODE) &&
  313. (SourceFileType == TFILE_UNICODE) &&
  314. (ConversionOption == CHECK_ALREADY_UNICODE)) {
  315. CloseHandle(SourceFileHandle);
  316. MsgPrintfW(MSG_ERR_SRC_IS_UNICODE,SourceFilename);
  317. FreeUnicodeArguments(_argcW,_argvW);
  318. exit(0);
  319. }
  320. //
  321. // check if the file is not unicode and if we are trying to convert from
  322. // unicode to MB, then issue an warning and exit
  323. if((ConversionType == UNICODE_TO_MB) &&
  324. (SourceFileType != TFILE_UNICODE) &&
  325. (ConversionOption == CHECK_IF_NOT_UNICODE)) {
  326. CloseHandle(SourceFileHandle);
  327. MsgPrintfW(MSG_ERR_SRC_IS_MB,SourceFilename);
  328. FreeUnicodeArguments(_argcW,_argvW);
  329. exit(0);
  330. }
  331. //
  332. // Check to see if what we guessed is what the user asked for.
  333. // If not, issue a warning.
  334. //
  335. if((ConversionType == UNICODE_TO_MB) && (SourceFileType != TFILE_UNICODE)) {
  336. MsgPrintfW(MSG_WARN_SRC_IS_MB,SourceFilename);
  337. } else {
  338. if((ConversionType == MB_TO_UNICODE) && (SourceFileType == TFILE_UNICODE)) {
  339. MsgPrintfW(MSG_WARN_SRC_IS_UNICODE,SourceFilename);
  340. }
  341. }
  342. }
  343. //
  344. // Try to create target file.
  345. //
  346. TargetFileHandle = CreateFileW(
  347. TargetFilename,
  348. GENERIC_READ | GENERIC_WRITE,
  349. 0,
  350. NULL,
  351. CREATE_ALWAYS,
  352. FILE_ATTRIBUTE_NORMAL,
  353. NULL
  354. );
  355. if(TargetFileHandle == INVALID_HANDLE_VALUE) {
  356. ErrorAbort(MSG_CANT_OPEN_TARGET,TargetFilename,GetLastError());
  357. }
  358. }