Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

128 lines
3.2 KiB

  1. /*++
  2. Copyright (c) 2002-2002 Microsoft Corporation
  3. Module Name:
  4. Utf8.h
  5. Abstract:
  6. UTF-8 manipulation routines
  7. Author:
  8. George V. Reilly (GeorgeRe) 01-Apr-2002
  9. Revision History:
  10. --*/
  11. #ifndef __UTF_8_H__
  12. #define __UTF_8_H__
  13. //
  14. // Declarations for UTF-8 Encoding
  15. //
  16. extern const UCHAR Utf8OctetCount[256];
  17. #define UTF8_OCTET_COUNT(c) (Utf8OctetCount[(UCHAR)(c)])
  18. #define UTF8_1_MAX 0x00007f // max UTF-8 1-byte sequence
  19. #define UTF8_2_MAX 0x0007ff // max UTF-8 2-byte sequence
  20. #define UTF8_3_MAX 0x00ffff // max UTF-8 3-byte sequence
  21. #define UTF8_4_MAX 0x10ffff // max UTF-8 4-byte sequence
  22. #define UTF8_1ST_OF_2 0xc0 // 110x xxxx - 0xCn or 0xDn
  23. #define UTF8_1ST_OF_3 0xe0 // 1110 xxxx - 0xEn
  24. #define UTF8_1ST_OF_4 0xf0 // 1111 0xxx - 0xFn, 0 <= n <= 7
  25. #define UTF8_TRAIL 0x80 // 10xx xxxx - 0x8n, 0x9n, 0xAn, or 0xBn
  26. #define IS_UTF8_SINGLETON(ch) (((UCHAR) (ch)) <= UTF8_1_MAX)
  27. #define IS_UTF8_1ST_BYTE_OF_2(ch) ((((UCHAR) (ch)) & 0xe0) == UTF8_1ST_OF_2)
  28. #define IS_UTF8_1ST_BYTE_OF_3(ch) ((((UCHAR) (ch)) & 0xf0) == UTF8_1ST_OF_3)
  29. #define IS_UTF8_1ST_BYTE_OF_4(ch) ((((UCHAR) (ch)) & 0xf8) == UTF8_1ST_OF_4)
  30. #define IS_UTF8_TRAILBYTE(ch) ((((UCHAR) (ch)) & 0xc0) == UTF8_TRAIL)
  31. #define HIGHER_6_BIT(u) (((u) & 0x3f000) >> 12)
  32. #define MIDDLE_6_BIT(u) (((u) & 0x00fc0) >> 6)
  33. #define LOWER_6_BIT(u) ((u) & 0x0003f)
  34. #define BIT7(a) ((a) & 0x80)
  35. #define BIT6(a) ((a) & 0x40)
  36. #define HIGH_SURROGATE_START 0xd800
  37. #define HIGH_SURROGATE_END 0xdbff
  38. #define LOW_SURROGATE_START 0xdc00
  39. #define LOW_SURROGATE_END 0xdfff
  40. #define HIGH_NONCHAR_START 0x0
  41. #define HIGH_NONCHAR_END 0x10
  42. #define LOW_NONCHAR_BOM 0xfffe
  43. #define LOW_NONCHAR_BITS 0xffff
  44. #define LOW_NONCHAR_START 0xfdd0
  45. #define LOW_NONCHAR_END 0xfdef
  46. #define IS_UNICODE_NONCHAR(c) \
  47. ( (((LOW_NONCHAR_BOM & (c)) == LOW_NONCHAR_BOM) && \
  48. (((c) >> 16) <= HIGH_NONCHAR_END)) \
  49. || ((LOW_NONCHAR_START <= (c)) && ((c) <= LOW_NONCHAR_END)) )
  50. VOID
  51. HttpInitializeUtf8(
  52. VOID
  53. );
  54. ULONG
  55. HttpUnicodeToUTF8(
  56. IN PCWSTR lpSrcStr,
  57. IN LONG cchSrc,
  58. OUT LPSTR lpDestStr,
  59. IN LONG cchDest
  60. );
  61. NTSTATUS
  62. HttpUTF8ToUnicode(
  63. IN LPCSTR lpSrcStr,
  64. IN LONG cchSrc,
  65. OUT LPWSTR lpDestStr,
  66. IN OUT PLONG pcchDest,
  67. IN ULONG dwFlags
  68. );
  69. NTSTATUS
  70. HttpUcs4toUtf16(
  71. IN ULONG UnicodeChar,
  72. OUT PWCHAR pHighSurrogate,
  73. OUT PWCHAR pLowSurrogate
  74. );
  75. ULONG
  76. HttpUnicodeToUTF8Count(
  77. IN LPCWSTR pwszIn,
  78. IN ULONG dwInLen,
  79. IN BOOLEAN bEncode
  80. );
  81. NTSTATUS
  82. HttpUnicodeToUTF8Encode(
  83. IN LPCWSTR pwszIn,
  84. IN ULONG dwInLen,
  85. OUT PUCHAR pszOut,
  86. IN ULONG dwOutLen,
  87. OUT PULONG pdwOutLen,
  88. IN BOOLEAN bEncode
  89. );
  90. NTSTATUS
  91. HttpUtf8RawBytesToUnicode(
  92. IN PCUCHAR pOctetArray,
  93. IN ULONG SourceLength,
  94. OUT PULONG pUnicodeChar,
  95. OUT PULONG pOctetsToSkip
  96. );
  97. #endif // __UTF_8_H__