Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

320 lines
9.5 KiB

  1. /*----------------------------------------------------------------------------
  2. %%File: msencode.h
  3. %%Unit: fechmap
  4. %%Contact: jpick
  5. External header file for MsEncode character conversion module.
  6. ----------------------------------------------------------------------------*/
  7. #ifndef MSENCODE_H
  8. #define MSENCODE_H
  9. // ----------------------------------------------------------------------------
  10. //
  11. // Error Returns
  12. //
  13. // ----------------------------------------------------------------------------
  14. //
  15. // Return Type for API Functions
  16. //
  17. typedef int CCE;
  18. //
  19. // Error: cceSuccess
  20. // Explanation: Function succeeded (no error).
  21. //
  22. #define cceSuccess 0
  23. //
  24. // Error: cceRequestedStop
  25. // Explanation: Function succeeded (no error). Caller
  26. // requested function to be run in iterator mode
  27. // (stop on each character or stop on ASCII) and
  28. // function is making requested stop. (Stream
  29. // conversion functions only).
  30. //
  31. #define cceRequestedStop (-1)
  32. //
  33. // Error: cceInsufficientBuffer
  34. // Explanation: Buffer provided to function is too small.
  35. //
  36. #define cceInsufficientBuffer (-2)
  37. //
  38. // Error: cceInvalidFlags
  39. // Explanation: An invalid flag or combination of flags was
  40. // given to function.
  41. //
  42. #define cceInvalidFlags (-3)
  43. //
  44. // Error: cceInvalidParameter
  45. // Explanation: Invalid parameter passed to function (null
  46. // pointer, invalid encoding specified, etc.).
  47. //
  48. #define cceInvalidParameter (-4)
  49. //
  50. // Error: cceRead
  51. // Explanation: User read-callback function failed.
  52. //
  53. #define cceRead (-5)
  54. //
  55. // Error: cceWrite
  56. // Explanation: User write-callback function failed.
  57. //
  58. #define cceWrite (-6)
  59. //
  60. // Error: cceUnget
  61. // Explanation: User unget-callback function failed.
  62. //
  63. #define cceUnget (-7)
  64. //
  65. // Error: cceNoCodePage
  66. // Explanation: Requested encoding requires an installed
  67. // code page (NLS file) for conversion. That
  68. // file is not installed.
  69. //
  70. #define cceNoCodePage (-8)
  71. //
  72. // Error: cceEndOfInput
  73. // Explanation: Unexpected end-of-input occurred within a
  74. // multi-byte character in conversion function.
  75. // (Returned only if user requested errors for
  76. // invalid characters).
  77. //
  78. #define cceEndOfInput (-9)
  79. //
  80. // Error: cceNoTranslation
  81. // Explanation: Character in input stream or string has no
  82. // equivalent Unicode (multi-byte to Unicode) or
  83. // multi-byte (Unicode to multi-byte) character.
  84. // (Returned only if user requested errors for
  85. // invalid characters).
  86. //
  87. #define cceNoTranslation (-10)
  88. //
  89. // Error: cceInvalidChar
  90. // Explanation: Converter found a single or multi-byte character
  91. // that is outside the legal range for the given
  92. // encoding. (Returned only if user requested
  93. // errors for invalid characters).
  94. //
  95. #define cceInvalidChar (-11)
  96. //
  97. // Error: cceAmbiguousInput
  98. // Explanation: CceDetectInputCode(), only. Data matches more
  99. // than one of the supported encodings types.
  100. // (Returned only if function told to not resolve
  101. // ambiguity).
  102. //
  103. #define cceAmbiguousInput (-12)
  104. //
  105. // Error: cceUnknownInput
  106. // Explanation: CceDetectInputCode(), only. Data matches none
  107. // of the supported encoding types.
  108. //
  109. #define cceUnknownInput (-13)
  110. //
  111. // Error: cceMayBeAscii
  112. // Explanation: CceDetectInputCode(), only. Technically, data
  113. // matches at least one of the supported encoding
  114. // types, but may not be a true match. (For example,
  115. // an ASCII file with only a few scattered extended
  116. // characters). (Returned only if function told to
  117. // resolve ambiguity).
  118. //
  119. // This is not an error, only a flag to the calling
  120. // application. CceDetectInputCode() will still set
  121. // the encoding type if it returns this value.
  122. //
  123. #define cceMayBeAscii (-14)
  124. //
  125. // Error: cceInternal
  126. // Explanation: Unrecoverable internal error.
  127. //
  128. #define cceInternal (-15)
  129. //
  130. // Error: cceConvert
  131. // Explanation: Unexpected DBCS function conversion error.
  132. //
  133. #define cceConvert (-16)
  134. //
  135. // Error: cceEncodingNotImplemented
  136. // Explanation: Temporary integration error. Requested encoding
  137. // is not implemented.
  138. //
  139. #define cceEncodingNotImplemented (-100)
  140. //
  141. // Error: cceFunctionNotImplemented
  142. // Explanation: Temporary integration error. Function
  143. // is not implemented.
  144. //
  145. #define cceFunctionNotImplemented (-101)
  146. // ----------------------------------------------------------------------------
  147. //
  148. // General Definitions for Modules Using these Routines
  149. //
  150. // ----------------------------------------------------------------------------
  151. #define MSENAPI PASCAL
  152. #define MSENCBACK PASCAL
  153. #define EXPIMPL(type) type MSENAPI
  154. #define EXPDECL(type) extern type MSENAPI
  155. // In case these are not already defined
  156. //
  157. #ifndef FAR
  158. #ifdef _WIN32
  159. #define FAR __far
  160. #else
  161. #define FAR
  162. #endif
  163. #endif
  164. typedef unsigned char UCHAR;
  165. typedef UCHAR *PUCHAR;
  166. typedef UCHAR FAR *LPUSTR;
  167. typedef const UCHAR FAR *LPCUSTR;
  168. #ifndef UNIX // IEUNIX uses 4 bytes WCHAR, these are already defined in winnt.h
  169. typedef unsigned short WCHAR;
  170. typedef WCHAR *PWCHAR;
  171. typedef WCHAR FAR *LPWSTR;
  172. typedef const WCHAR FAR *LPCWSTR;
  173. #endif
  174. //
  175. // Character encoding types supported by this module.
  176. //
  177. typedef enum _cenc
  178. {
  179. ceNil = -1,
  180. ceEucCn = 0,
  181. ceEucJp,
  182. ceEucKr,
  183. ceIso2022Jp,
  184. ceIso2022Kr,
  185. ceBig5,
  186. ceGbk,
  187. ceHz,
  188. ceShiftJis,
  189. ceWansung,
  190. ceUtf7,
  191. ceUtf8,
  192. ceCount,
  193. };
  194. typedef short CEnc;
  195. //
  196. // Encoding "families" (for CceDetectInputCode() preferences).
  197. //
  198. typedef enum _efam
  199. {
  200. efNone = 0,
  201. efDbcs,
  202. efEuc,
  203. efIso2022,
  204. efUtf8,
  205. } EFam;
  206. //
  207. // API private/reserved structure. For most API functions,
  208. // this structure must be zero-filled by calling application.
  209. // See converter function documentation, below, for more
  210. // information.
  211. //
  212. #define cdwReserved 4
  213. typedef struct _ars
  214. {
  215. DWORD rgdw[cdwReserved];
  216. } ARS;
  217. // For GetProcAddress()
  218. typedef void (MSENAPI *PFNMSENCODEVER)(WORD FAR *, WORD FAR *);
  219. // ----------------------------------------------------------------------------
  220. //
  221. // Input Code Auto-Detection Routine
  222. //
  223. // ----------------------------------------------------------------------------
  224. //
  225. // Configuration Flags for Auto Detection Routine
  226. //
  227. // grfDetectResolveAmbiguity
  228. // The default is to return cceAmbiguousInput if the auto
  229. // detection code cannot definitely determine the encoding
  230. // of the input stream. If this flag is set, the function
  231. // will use optional user preferences and the system code
  232. // page to pick an encoding (note that in this case, the
  233. // "lpfGuess" flag will be set to fTrue upon return).
  234. //
  235. // grfDetectUseCharMapping
  236. // The default action of the auto-detection code is to
  237. // parse the input against the known encoding types. Legal
  238. // character sequences are not analyzed for anything
  239. // beyond syntactic correctness. If this flag is set,
  240. // auto-detect will map recognized sequences to flush out
  241. // invalid characters.
  242. //
  243. // This option will cause auto-detection to run more
  244. // slowly, but also yield more accurate results.
  245. //
  246. // grfDetectIgnoreEof
  247. // Because auto-detect parses byte sequences against the
  248. // the known encoding types, end-of-input in the middle of a
  249. // sequence is obviously an error. If the calling application
  250. // will artificially limit the sample size, set this flag
  251. // to ignore such end-of-input errors.
  252. //
  253. #define grfDetectResolveAmbiguity 0x1
  254. #define grfDetectUseCharMapping 0x2
  255. #define grfDetectIgnoreEof 0x4
  256. //
  257. // Entry Point -- Attempt to Detect the Encoding
  258. //
  259. // Return cceAmbiguousInput if input is ambiguous or cceUnknownInput
  260. // if encoding type matches none of the known types.
  261. //
  262. // Detected encoding is returned in lpCe. lpfGuess used to return
  263. // a flag indicating whether or not the function "guessed" at an
  264. // encoding (chose default from ambiguous state).
  265. //
  266. // User preferences for encoding family (efPref) and code page
  267. // (nPrefCp) are optional, even if caller chooses to have
  268. // this function attempt to resolve ambiguity. If either has
  269. // the value 0, they will be ignored.
  270. //
  271. EXPDECL(CCE)
  272. CceDetectInputCode(
  273. IStream *pstmIn, // input stream
  274. DWORD dwFlags, // configuration flags
  275. EFam efPref, // optional: preferred encoding family
  276. int nPrefCp, // optional: preferred code page
  277. UINT *lpCe, // set to detected encoding
  278. BOOL *lpfGuess // set to fTrue if function "guessed"
  279. );
  280. #endif // #ifndef MSENCODE_H