Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

689 lines
14 KiB

  1. /*++
  2. Copyright (c) 1991 Microsoft Corporation
  3. Module Name:
  4. rcunicod.c
  5. Abstract:
  6. Routines added to rcpp to support 16-bit unicode file parsing.
  7. Note that as of Aug 91, rcpp will not fully transfer the unicode
  8. characters but only the string constants are guaranteed to be passed
  9. cleanly.
  10. Author:
  11. David J. Marsyla (t-davema) 25-Aug-1991
  12. Revision History:
  13. --*/
  14. #include <nt.h>
  15. #include <ntrtl.h>
  16. #include <nturtl.h>
  17. #include <windows.h>
  18. #include <stdio.h>
  19. #include <ctype.h>
  20. #include <process.h>
  21. #include "rcunicod.h"
  22. #ifdef DBCS
  23. //
  24. // Prototypes for conversion routines between Unicode and 932.
  25. //
  26. NTSTATUS
  27. xxxRtlMultiByteToUnicodeN(
  28. PWSTR UnicodeString,
  29. PULONG BytesInUnicodeString,
  30. PCHAR MultiByteString,
  31. ULONG BytesInMultiByteString
  32. );
  33. NTSTATUS
  34. xxxRtlUnicodeToMultiByteN(
  35. PCHAR MultiByteString,
  36. PULONG BytesInMultiByteString,
  37. PWSTR UnicodeString,
  38. ULONG BytesInUnicodeString
  39. );
  40. #endif // DBCS
  41. #ifndef DBCS
  42. // SHUNK: A_fwrite is not called from RC. Remove this for now.
  43. INT
  44. A_fwrite (
  45. IN CHAR *pchMBString,
  46. IN INT nSizeOfItem,
  47. IN INT nCountToWrite,
  48. IN FILE *fpOutputFile
  49. )
  50. /*++
  51. Routine Description:
  52. This function will write out an 8-bit string as a unicode string.
  53. Note, this function is very slow, but hey, I don't have time to optimize
  54. it now.
  55. As of Aug 91, only codepage 1252 is being supported.
  56. Arguments:
  57. pchMBString - This is a 8-bit multi byte string to write to the file
  58. as a unicode string.
  59. nSizeOfItem - Ignored, we always use sizeof (CHAR).
  60. nCountToWrite - How long is this string.
  61. fpOutputFile - File pointer to send the character.
  62. Return Value:
  63. The number of bytes written.
  64. If the return does not equal nCountToWrite than an error has occured at
  65. some point in the write.
  66. --*/
  67. {
  68. WCHAR wchUniCharToWrite;
  69. INT cCountWritten = 0;
  70. UNREFERENCED_PARAMETER(nSizeOfItem);
  71. //
  72. // Write the string out as a two byte unicode string.
  73. // For now do this with multiple calls to U_fputc.
  74. //
  75. while (nCountToWrite--) {
  76. wchUniCharToWrite = RtlAnsiCharToUnicodeChar(&pchMBString);
  77. //
  78. // Write the current unicode char, break if an error occured.
  79. //
  80. if (U_fputc (wchUniCharToWrite, fpOutputFile) ==
  81. (INT)wchUniCharToWrite) {
  82. break;
  83. }
  84. cCountWritten++;
  85. }
  86. return (cCountWritten);
  87. }
  88. #endif // DBCS
  89. #ifndef DBCS
  90. // SHUNK: U_fwrite is not called from RC. Remove this for now.
  91. INT
  92. U_fwrite (
  93. IN WCHAR *pwchUnicodeString,
  94. IN INT nSizeOfItem,
  95. IN INT nCountToWrite,
  96. IN FILE *fpOutputFile
  97. )
  98. /*++
  99. Routine Description:
  100. This function will write out a 16-bit string directly. It does no
  101. translation on the string as it is written.
  102. Arguments:
  103. pchUnicodeString - This is a 16-bit unicode string to write to the file.
  104. nSizeOfItem - Ignored. We always use sizeof (WCHAR).
  105. nCountToWrite - How long is this string.
  106. fpOutputFile - File pointer to send the character.
  107. Return Value:
  108. The number of bytes written.
  109. If the return does not equal nCountToWrite than an error has occured at
  110. some point in the write.
  111. --*/
  112. {
  113. UNREFERENCED_PARAMETER(nSizeOfItem);
  114. //
  115. // Write the string out as a two byte unicode string.
  116. //
  117. return (fwrite (pwchUnicodeString, sizeof (WCHAR), nCountToWrite,
  118. fpOutputFile));
  119. }
  120. #endif // DBCS
  121. #ifndef DBCS
  122. // SHUNK: A_fputc is not called from RC. Remove this for now.
  123. INT
  124. A_fputc (
  125. IN CHAR chCharToWrite,
  126. IN FILE *fpOutputFile
  127. )
  128. /*++
  129. Routine Description:
  130. This function is translates the character passed to it using the 1252
  131. codepage and then sends it to U_fputc.
  132. As of Aug 91, only codepage 1252 is being supported.
  133. Arguments:
  134. chCharToWrite - This is a 8-bit character to be output.
  135. fpOutputFile - File pointer to send the character.
  136. Return Value:
  137. The character written.
  138. EOF = There was some sort of error writing the data out.
  139. --*/
  140. {
  141. WCHAR wchUniCharToWrite;
  142. PUCHAR puch;
  143. //
  144. // Translate the char and write it as it's unicode equivalent.
  145. //
  146. puch = &chCharToWrite;
  147. wchUniCharToWrite = RtlAnsiCharToUnicodeChar(&puch);
  148. if (U_fputc (wchUniCharToWrite, fpOutputFile) == (INT)wchUniCharToWrite) {
  149. return ((INT)chCharToWrite);
  150. }
  151. else {
  152. return (EOF);
  153. }
  154. }
  155. #endif // DBCS
  156. #ifndef DBCS
  157. // SHUNK: U_fputc is not called from RC. Remove this for now.
  158. INT
  159. U_fputc (
  160. IN WCHAR wcCharToWrite,
  161. IN FILE *fpOutputFile
  162. )
  163. /*++
  164. Routine Description:
  165. This function is simply the unicode version of fputc. It will output
  166. a two byte character instead of the standard byte.
  167. Arguments:
  168. wcCharToWrite - This is a 16-bit unicode character to be output.
  169. It is assumed that any codepage translation has
  170. already been done to the character.
  171. fpOutputFile - File pointer to send the character.
  172. Return Value:
  173. The character written.
  174. EOF = There was some sort of error writing the data out.
  175. --*/
  176. {
  177. INT cCountWritten;
  178. //
  179. // Write the char out as a two byte unicode character.
  180. //
  181. cCountWritten = fwrite (&wcCharToWrite, sizeof (WCHAR), 1, fpOutputFile);
  182. if (cCountWritten == sizeof (WCHAR)) {
  183. return (wcCharToWrite); // Successful write.
  184. }
  185. else {
  186. #ifdef ASSERT_ERRORS
  187. printf ("Error writing character in U_fputc\n");
  188. exit (1);
  189. #endif
  190. return (EOF); // Some sort of error occured.
  191. }
  192. }
  193. #endif // DBCS
  194. BOOL
  195. UnicodeFromMBString (
  196. OUT WCHAR *pwchUnicodeString,
  197. IN CHAR *pchMBString,
  198. IN INT nCountStrLength
  199. )
  200. /*++
  201. Routine Description:
  202. This function will translate a multi-byte string into it's unicode
  203. equivalent. Note that the destination unicode string must be large
  204. enough to hold the translated bytes.
  205. As of Aug 91, only codepage 1252 is being supported.
  206. Arguments:
  207. pwchUnicodeString - This is a pointer to storage for the destination
  208. unicode string. Note it must be nCountStrLength
  209. large.
  210. pchMBString - Pointer to the input multi-byte string to convert.
  211. nCountStrLength - Count of bytes to translate.
  212. Return Value:
  213. TRUE - All of the characters mapped correctly into Unicode.
  214. FALSE - One or more characters did not map. These characters have
  215. been translated to 0xFFFF. The rest of the string has been
  216. converted correctly.
  217. --*/
  218. {
  219. #ifdef DBCS
  220. NTSTATUS Status;
  221. //
  222. // Convert ANSI string to Unicode string based on ACP.
  223. //
  224. Status = xxxRtlMultiByteToUnicodeN(pwchUnicodeString,
  225. NULL,
  226. pchMBString,
  227. nCountStrLength);
  228. return(NT_SUCCESS(Status)? TRUE : FALSE);
  229. #else // !DBCS
  230. UNICODE_STRING Unicode;
  231. ANSI_STRING Ansi;
  232. Ansi.MaximumLength = Ansi.Length = nCountStrLength;
  233. Unicode.MaximumLength = nCountStrLength*sizeof(WCHAR) + sizeof(WCHAR);
  234. Ansi.Buffer = pchMBString;
  235. Unicode.Buffer = pwchUnicodeString;
  236. return RtlAnsiStringToUnicodeString(&Unicode,&Ansi,FALSE)==STATUS_SUCCESS;
  237. #endif // !DBCS
  238. }
  239. BOOL
  240. MBStringFromUnicode (
  241. OUT CHAR *pchMBString,
  242. IN WCHAR *pwchUnicodeString,
  243. IN INT nCountStrLength
  244. )
  245. /*++
  246. Routine Description:
  247. This function will translate a unicode string into a multi-byte string.
  248. Note that the destination string must be large enough to hold the
  249. translated bytes.
  250. As of Aug 91, only the translation is simply done by truncating the
  251. unicode character. We do this because we are not expecting anything
  252. strange.
  253. Arguments:
  254. pwchUnicodeString - This is a pointer to storage for the destination
  255. unicode string. Note it must be nCountStrLength
  256. large.
  257. pchMBString - Pointer to the input multi-byte string to convert.
  258. nCountStrLength - Count of bytes to translate.
  259. Return Value:
  260. TRUE - All of the characters mapped correctly into the MB string.
  261. FALSE - One or more characters did not map. As of Aug 91, this will
  262. never happen.
  263. --*/
  264. {
  265. #ifdef DBCS
  266. NTSTATUS Status;
  267. //
  268. // Convert Unicode string to ANSI string based on ACP.
  269. //
  270. Status = xxxRtlUnicodeToMultiByteN(pchMBString,
  271. NULL,
  272. pwchUnicodeString,
  273. nCountStrLength);
  274. return(NT_SUCCESS(Status)? TRUE : FALSE);
  275. #else // !DBCS
  276. UNICODE_STRING Unicode;
  277. ANSI_STRING Ansi;
  278. Unicode.Length = nCountStrLength*sizeof(WCHAR);
  279. Unicode.MaximumLength = nCountStrLength*sizeof(WCHAR)+sizeof(WCHAR);
  280. Ansi.MaximumLength = Unicode.MaximumLength / sizeof(WCHAR);
  281. Ansi.Buffer = pchMBString;
  282. Unicode.Buffer = pwchUnicodeString;
  283. return RtlUnicodeStringToAnsiString(&Ansi,&Unicode,FALSE)==STATUS_SUCCESS;
  284. #endif // !DBCS
  285. }
  286. #ifndef DBCS
  287. // SHUNK: Char1252FromUnicode() is not called any more.
  288. INT
  289. Char1252FromUnicode (
  290. IN WCHAR wchUnicodeChar
  291. )
  292. /*++
  293. Routine Description:
  294. This function will translate a unicode character into it's equivalent
  295. codepage 1252 character. If the character does not map correctly,
  296. then 0xFFFF is returned.
  297. Arguments:
  298. wchUnicodeChar - This is a 16-bit unicode character.
  299. Return Value:
  300. Value <= 0xFF - Codepage 1252 equivalent for this string.
  301. 0xFFFF - The character did not translate properly.
  302. --*/
  303. {
  304. UNICODE_STRING Unicode;
  305. ANSI_STRING Ansi;
  306. UCHAR c;
  307. INT s;
  308. Ansi.Length = Unicode.Length = 1;
  309. Ansi.MaximumLength = Unicode.MaximumLength = 1;
  310. Ansi.Buffer = &c;
  311. Unicode.Buffer = &wchUnicodeChar;
  312. s = RtlUnicodeStringToAnsiString(&Ansi,&Unicode,FALSE);
  313. if (s != STATUS_SUCCESS)
  314. return 0xffff;
  315. return (INT)c;
  316. }
  317. #endif // DBCS
  318. INT
  319. DetermineFileType (
  320. IN FILE *fpInputFile
  321. )
  322. /*++
  323. Routine Description:
  324. This function is used to determine what type of file is being read.
  325. Note that it assumes that the first few bytes of the given file contain
  326. mostly ascii characters. This routine was originally intended for use
  327. on .rc files and include files.
  328. Note, the file is returned to it's proper position after function.
  329. Arguments:
  330. fpInputFile - File pointer to file we are checking, must be
  331. open with read permissions.
  332. Return Value:
  333. DFT_FILE_IS_UNKNOWN - It was impossible to determine what type of file
  334. we were checking. This usually happens when EOF
  335. is unexpectedly reached.
  336. DFT_FILE_IS_8_BIT - File was determined to be in standard 8-bit
  337. format.
  338. DFT_FILE_IS_16_BIT - File was determined to be a 16 bit unicode file
  339. which can be directly read into a WCHAR array.
  340. DFT_FILE_IS_16_BIT_REV - File was*/
  341. {
  342. CHAR rgchTestBytes [DFT_TEST_SIZE << 2]; // Storage for test data.
  343. INT cNumberBytesTested = 0; // Test information.
  344. INT cNumberOddZerosFound = 0;
  345. INT cNumberEvenZerosFound = 0;
  346. INT cNumberAsciiFound = 0;
  347. INT cCountRead; // Temp storage for count read.
  348. LONG lStartFilePos; // Storage for file position.
  349. INT fSysEndianType; // System endian type.
  350. INT fFileType = DFT_FILE_IS_UNKNOWN;// File type, when found.
  351. fSysEndianType = DetermineSysEndianType ();
  352. //
  353. // Store position so we can get back to it.
  354. //
  355. lStartFilePos = ftell (fpInputFile);
  356. //
  357. // Make sure we start on an even byte to simplify routines.
  358. //
  359. if (lStartFilePos % 2) {
  360. fgetc (fpInputFile);
  361. }
  362. do {
  363. INT wT;
  364. //
  365. // Read in the first test segment.
  366. //
  367. cCountRead = fread (rgchTestBytes, sizeof (CHAR), DFT_TEST_SIZE << 2,
  368. fpInputFile);
  369. //
  370. // Determine results and add to totals.
  371. //
  372. for (wT = 0; wT < cCountRead; wT++) {
  373. if (rgchTestBytes [wT] == 0) {
  374. if (wT % 2) {
  375. cNumberOddZerosFound++;
  376. }
  377. else {
  378. cNumberEvenZerosFound++;
  379. }
  380. }
  381. if (isprint (rgchTestBytes [wT]) ||
  382. rgchTestBytes[wT] == '\t' ||
  383. rgchTestBytes[wT] == '\n' ||
  384. rgchTestBytes[wT] == '\r') {
  385. cNumberAsciiFound++;
  386. }
  387. }
  388. cNumberBytesTested += cCountRead;
  389. //
  390. // Check if we have a definite pattern.
  391. //
  392. {
  393. INT cMajorityTested; // 80% of the bytes tested.
  394. cMajorityTested = cNumberBytesTested << 2;
  395. cMajorityTested /= 5;
  396. if (cNumberAsciiFound > cMajorityTested) {
  397. fFileType = DFT_FILE_IS_8_BIT;
  398. }
  399. else if (cNumberOddZerosFound > (cMajorityTested >> 1)) {
  400. //
  401. // File type was determined to be little endian.
  402. // If system is also little endian, byte order is correct.
  403. //
  404. fFileType = (fSysEndianType == DSE_SYS_LITTLE_ENDIAN) ?
  405. DFT_FILE_IS_16_BIT : DFT_FILE_IS_16_BIT_REV;
  406. }
  407. else if (cNumberEvenZerosFound > (cMajorityTested >> 1)) {
  408. //
  409. // File type was determined to be big endian.
  410. // If system is also big endian, byte order is correct.
  411. //
  412. fFileType = (fSysEndianType == DSE_SYS_LITTLE_ENDIAN) ?
  413. DFT_FILE_IS_16_BIT_REV : DFT_FILE_IS_16_BIT;
  414. }
  415. }
  416. } while (cCountRead == (DFT_TEST_SIZE << 2) &&
  417. fFileType == DFT_FILE_IS_UNKNOWN);
  418. //
  419. // Return to starting file position. (usually beginning)
  420. //
  421. fseek (fpInputFile, lStartFilePos, SEEK_SET);
  422. return (fFileType);
  423. }
  424. INT
  425. DetermineSysEndianType (
  426. VOID
  427. )
  428. /*++
  429. Routine Description:
  430. This function is used to determine how the current system stores its
  431. integers in memory.
  432. For those of us who are confused by little endian and big endian formats,
  433. here is a breif recap.
  434. Little Endian: (This is used on Intel 80x86 chips. The MIPS RS4000 chip
  435. is switchable, but will run in little endian format for NT.)
  436. This is where the high order bytes of a short or long are stored higher
  437. in memory. For example the number 0x80402010 is stored as follows.
  438. Address: Value:
  439. 00 10
  440. 01 20
  441. 02 40
  442. 03 80
  443. This looks backwards when memory is dumped in order: 10 20 40 80
  444. Big Endian: (This is not currently used on any NT systems but hey, this
  445. is supposed to be portable!!)
  446. This is where the high*/
  447. {
  448. INT nCheckInteger;
  449. CHAR rgchTestBytes [sizeof (INT)];
  450. //
  451. // Clear the test bytes to zero.
  452. //
  453. *((INT * )rgchTestBytes) = 0;
  454. //
  455. // Set first to some value.
  456. //
  457. rgchTestBytes [0] = (CHAR)0xFF;
  458. //
  459. // Map it to an integer.
  460. //
  461. nCheckInteger = *((INT * )rgchTestBytes);
  462. //
  463. // See if value was stored in low order of integer.
  464. // If so then system is little endian.
  465. //
  466. if (nCheckInteger == 0xFF) {
  467. return (DSE_SYS_LITTLE_ENDIAN);
  468. }
  469. else {
  470. return (DSE_SYS_LITTLE_ENDIAN);
  471. }
  472. }