Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

261 lines
8.2 KiB

  1. /****************************** Module Header ******************************\
  2. * Module Name: nlsxlat.c
  3. *
  4. * Copyright (c) 1985-91, Microsoft Corporation
  5. *
  6. * This modules contains the private routines for character translation:
  7. * 8-bit <=> Unicode.
  8. *
  9. * History:
  10. * 03-Jan-1992 gregoryw
  11. \***************************************************************************/
  12. #include <nt.h>
  13. #include <ntrtl.h>
  14. /*
  15. * External declarations - these are temporary tables
  16. */
  17. extern USHORT TmpUnicodeToAnsiTable[];
  18. extern WCHAR TmpAnsiToUnicodeTable[];
  19. #ifdef DBCS
  20. extern WCHAR sjtouni( USHORT );
  21. #define IsDBCSFirst(w) (((unsigned char)w >= 0x81 && (unsigned char)w <= 0x9f) || (((unsigned char)w >= 0xe0 && (unsigned char)w <= 0xfc)))
  22. #endif // DBCS
  23. /*
  24. * Various defines for data access
  25. */
  26. #define DBCS_TABLE_SIZE 256
  27. #define LONIBBLE(b) ((UCHAR)((UCHAR)(b) & 0xF))
  28. #define HINIBBLE(b) ((UCHAR)(((UCHAR)(b) >> 4) & 0xF))
  29. #define LOBYTE(w) ((UCHAR)(w))
  30. #define HIBYTE(w) ((UCHAR)(((USHORT)(w) >> 8) & 0xFF))
  31. /*
  32. * Global data used by the translation routines.
  33. *
  34. */
  35. UCHAR NlsLeadByteInfo[DBCS_TABLE_SIZE]; // Lead byte info. for ACP
  36. PUSHORT *NlsMbCodePageTables; // Multibyte to Unicode translation tables
  37. PUSHORT NlsAnsiToUnicodeData = TmpAnsiToUnicodeTable; // Ansi CP to Unicode translation table
  38. PUSHORT NlsUnicodeToAnsiData = TmpUnicodeToAnsiTable; // Unicode to Ansi CP translation table
  39. NTSTATUS
  40. xxxRtlMultiByteToUnicodeN(
  41. OUT PWCH UnicodeString,
  42. OUT PULONG BytesInUnicodeString OPTIONAL,
  43. IN PCH MultiByteString,
  44. IN ULONG BytesInMultiByteString)
  45. /*++
  46. Routine Description:
  47. This functions converts the specified ansi source string into a
  48. Unicode string. The translation is done with respect to the
  49. ANSI Code Page (ACP) installed at boot time. Single byte characters
  50. in the range 0x00 - 0x7f are simply zero extended as a performance
  51. enhancement. In some far eastern code pages 0x5c is defined as the
  52. Yen sign. For system translation we always want to consider 0x5c
  53. to be the backslash character. We get this for free by zero extending.
  54. NOTE: This routine only supports precomposed Unicode characters.
  55. Arguments:
  56. UnicodeString - Returns a unicode string that is equivalent to
  57. the ansi source string.
  58. BytesInUnicodeString - Returns the number of bytes in the returned
  59. unicode string pointed to by UnicodeString.
  60. MultiByteString - Supplies the ansi source string that is to be
  61. converted to unicode.
  62. BytesInMultiByteString - The number of bytes in the string pointed to
  63. by MultiByteString.
  64. Return Value:
  65. SUCCESS - The conversion was successful
  66. --*/
  67. {
  68. UCHAR Entry;
  69. PWCH UnicodeStringAnchor;
  70. PUSHORT DBCSTable;
  71. UnicodeStringAnchor = UnicodeString;
  72. #ifdef DBCS
  73. while (BytesInMultiByteString--) {
  74. if ( IsDBCSFirst( *MultiByteString ) ) {
  75. if (!BytesInMultiByteString) {
  76. return STATUS_UNSUCCESSFUL;
  77. }
  78. *UnicodeString++ = sjtouni( (((USHORT)(*(PUCHAR)MultiByteString++)) << 8) +
  79. (USHORT)(*(PUCHAR)MultiByteString++)
  80. );
  81. BytesInMultiByteString--;
  82. } else {
  83. *UnicodeString++ = sjtouni( *(PUCHAR)MultiByteString++ );
  84. }
  85. }
  86. #else
  87. if (NlsMbCodePageTag) {
  88. //
  89. // The ACP is a multibyte code page. Check each character
  90. // to see if it is a lead byte before doing the translation.
  91. //
  92. while (BytesInMultiByteString--) {
  93. if ( NlsLeadByteInfo[*MultiByteString]) {
  94. //
  95. // Lead byte - translate the trail byte using the table
  96. // that corresponds to this lead byte. NOTE: make sure
  97. // we have a trail byte to convert.
  98. //
  99. if (!BytesInMultiByteString) {
  100. return STATUS_UNSUCCESSFUL;
  101. }
  102. Entry = NlsLeadByteInfo[*MultiByteString++];
  103. DBCSTable = NlsMbCodePageTables[HINIBBLE(Entry)] + (LONIBBLE(Entry) * DBCS_TABLE_SIZE);
  104. *UnicodeString++ = DBCSTable[*MultiByteString++];
  105. BytesInMultiByteString--;
  106. } else {
  107. //
  108. // Single byte character.
  109. //
  110. if (*MultiByteString & 0x80) {
  111. *UnicodeString++ = NlsAnsiToUnicodeData[*MultiByteString++];
  112. } else {
  113. *UnicodeString++ = (WCHAR)*MultiByteString++;
  114. }
  115. }
  116. }
  117. } else {
  118. //
  119. // The ACP is a single byte code page.
  120. //
  121. while (BytesInMultiByteString--) {
  122. if (*MultiByteString & 0x80) {
  123. *UnicodeString++ = NlsAnsiToUnicodeData[*MultiByteString++];
  124. } else {
  125. *UnicodeString++ = (WCHAR)*MultiByteString++;
  126. }
  127. }
  128. }
  129. #endif
  130. if (ARGUMENT_PRESENT(BytesInUnicodeString)) {
  131. *BytesInUnicodeString = (ULONG)((PCH)UnicodeString - (PCH)UnicodeStringAnchor);
  132. }
  133. return STATUS_SUCCESS;
  134. }
  135. NTSTATUS
  136. xxxRtlUnicodeToMultiByteN(
  137. OUT PCH MultiByteString,
  138. OUT PULONG BytesInMultiByteString OPTIONAL,
  139. IN PWCH UnicodeString,
  140. IN ULONG BytesInUnicodeString)
  141. /*++
  142. Routine Description:
  143. This functions converts the specified unicode source string into an
  144. ansi string. The translation is done with respect to the
  145. ANSI Code Page (ACP) loaded at boot time.
  146. Arguments:
  147. MultiByteString - Returns an ansi string that is equivalent to the
  148. unicode source string. If the translation can not be done
  149. because a character in the unicode string does not map to an
  150. ansi character in the ACP, an error is returned.
  151. BytesInMultiByteString - Returns the number of bytes in the returned
  152. ansi string pointed to by MultiByteString.
  153. UnicodeString - Supplies the unicode source string that is to be
  154. converted to ansi.
  155. BytesInUnicodeString - The number of bytes in the the string pointed to by
  156. UnicodeString.
  157. Return Value:
  158. SUCCESS - The conversion was successful
  159. !SUCCESS - The conversion failed. A unicode character was encountered
  160. that has no translation for the current ANSI Code Page (ACP).
  161. --*/
  162. {
  163. USHORT Offset;
  164. USHORT Entry;
  165. ULONG CharsInUnicodeString;
  166. PCH MultiByteStringAnchor;
  167. MultiByteStringAnchor = MultiByteString;
  168. /*
  169. * convert from bytes to chars for easier loop handling.
  170. */
  171. CharsInUnicodeString = BytesInUnicodeString / sizeof(WCHAR);
  172. while (CharsInUnicodeString--) {
  173. Offset = NlsUnicodeToAnsiData[HIBYTE(*UnicodeString)];
  174. if (Offset != 0) {
  175. Offset = NlsUnicodeToAnsiData[Offset + HINIBBLE(*UnicodeString)];
  176. if (Offset != 0) {
  177. Entry = NlsUnicodeToAnsiData[Offset + LONIBBLE(*UnicodeString)];
  178. if (HIBYTE(Entry) != 0) {
  179. *MultiByteString++ = HIBYTE(Entry); // lead byte
  180. }
  181. *MultiByteString++ = LOBYTE(Entry);
  182. } else {
  183. //
  184. // no translation for this Unicode character. Return
  185. // an error.
  186. //
  187. #ifdef DBCS // RtlUnicodeToMultiByteN : temporary hack to avoid error return
  188. if ( *UnicodeString <= (WCHAR)0xff )
  189. *MultiByteString++ = (UCHAR)*UnicodeString;
  190. else
  191. *MultiByteString++ = '\x20';
  192. #else
  193. return STATUS_UNSUCCESSFUL;
  194. #endif
  195. }
  196. } else {
  197. //
  198. // no translation for this Unicode character. Return an error.
  199. //
  200. #ifdef DBCS // RtlUnicodeToMultiByteN : temporary hack to avoid error return
  201. if ( *UnicodeString <= (WCHAR)0xff )
  202. *MultiByteString++ = (UCHAR)*UnicodeString;
  203. else
  204. *MultiByteString++ = '\x20';
  205. #else
  206. return STATUS_UNSUCCESSFUL;
  207. #endif
  208. }
  209. UnicodeString++;
  210. }
  211. if (ARGUMENT_PRESENT(BytesInMultiByteString)) {
  212. *BytesInMultiByteString = (ULONG)(MultiByteString - MultiByteStringAnchor);
  213. }
  214. return STATUS_SUCCESS;
  215. }