|
|
/*----------------------------------------------------------------------------
%%File: msencode.h %%Unit: fechmap %%Contact: jpick
External header file for MsEncode character conversion module. ----------------------------------------------------------------------------*/
#ifndef MSENCODE_H
#define MSENCODE_H
// ----------------------------------------------------------------------------
//
// Error Returns
//
// ----------------------------------------------------------------------------
//
// Return Type for API Functions
//
typedef int CCE; //
// Error: cceSuccess
// Explanation: Function succeeded (no error).
//
#define cceSuccess 0
//
// Error: cceRequestedStop
// Explanation: Function succeeded (no error). Caller
// requested function to be run in iterator mode
// (stop on each character or stop on ASCII) and
// function is making requested stop. (Stream
// conversion functions only).
//
#define cceRequestedStop (-1)
//
// Error: cceInsufficientBuffer
// Explanation: Buffer provided to function is too small.
//
#define cceInsufficientBuffer (-2)
//
// Error: cceInvalidFlags
// Explanation: An invalid flag or combination of flags was
// given to function.
//
#define cceInvalidFlags (-3)
//
// Error: cceInvalidParameter
// Explanation: Invalid parameter passed to function (null
// pointer, invalid encoding specified, etc.).
//
#define cceInvalidParameter (-4)
//
// Error: cceRead
// Explanation: User read-callback function failed.
//
#define cceRead (-5)
//
// Error: cceWrite
// Explanation: User write-callback function failed.
//
#define cceWrite (-6)
//
// Error: cceUnget
// Explanation: User unget-callback function failed.
//
#define cceUnget (-7)
//
// Error: cceNoCodePage
// Explanation: Requested encoding requires an installed
// code page (NLS file) for conversion. That
// file is not installed.
//
#define cceNoCodePage (-8)
//
// Error: cceEndOfInput
// Explanation: Unexpected end-of-input occurred within a
// multi-byte character in conversion function.
// (Returned only if user requested errors for
// invalid characters).
//
#define cceEndOfInput (-9)
//
// Error: cceNoTranslation
// Explanation: Character in input stream or string has no
// equivalent Unicode (multi-byte to Unicode) or
// multi-byte (Unicode to multi-byte) character.
// (Returned only if user requested errors for
// invalid characters).
//
#define cceNoTranslation (-10)
//
// Error: cceInvalidChar
// Explanation: Converter found a single or multi-byte character
// that is outside the legal range for the given
// encoding. (Returned only if user requested
// errors for invalid characters).
//
#define cceInvalidChar (-11)
//
// Error: cceAmbiguousInput
// Explanation: CceDetectInputCode(), only. Data matches more
// than one of the supported encodings types.
// (Returned only if function told to not resolve
// ambiguity).
//
#define cceAmbiguousInput (-12)
//
// Error: cceUnknownInput
// Explanation: CceDetectInputCode(), only. Data matches none
// of the supported encoding types.
//
#define cceUnknownInput (-13)
//
// Error: cceMayBeAscii
// Explanation: CceDetectInputCode(), only. Technically, data
// matches at least one of the supported encoding
// types, but may not be a true match. (For example,
// an ASCII file with only a few scattered extended
// characters). (Returned only if function told to
// resolve ambiguity).
//
// This is not an error, only a flag to the calling
// application. CceDetectInputCode() will still set
// the encoding type if it returns this value.
//
#define cceMayBeAscii (-14)
//
// Error: cceInternal
// Explanation: Unrecoverable internal error.
//
#define cceInternal (-15)
//
// Error: cceConvert
// Explanation: Unexpected DBCS function conversion error.
//
#define cceConvert (-16)
//
// Error: cceEncodingNotImplemented
// Explanation: Temporary integration error. Requested encoding
// is not implemented.
//
#define cceEncodingNotImplemented (-100)
//
// Error: cceFunctionNotImplemented
// Explanation: Temporary integration error. Function
// is not implemented.
//
#define cceFunctionNotImplemented (-101)
// ----------------------------------------------------------------------------
//
// General Definitions for Modules Using these Routines
//
// ----------------------------------------------------------------------------
#define MSENAPI PASCAL
#define MSENCBACK PASCAL
#define EXPIMPL(type) type MSENAPI
#define EXPDECL(type) extern type MSENAPI
// In case these are not already defined
//
#ifndef FAR
#ifdef _WIN32
#define FAR __far
#else
#define FAR
#endif
#endif
typedef unsigned char UCHAR; typedef UCHAR *PUCHAR; typedef UCHAR FAR *LPUSTR; typedef const UCHAR FAR *LPCUSTR;
#ifndef UNIX // IEUNIX uses 4 bytes WCHAR, these are already defined in winnt.h
typedef unsigned short WCHAR; typedef WCHAR *PWCHAR; typedef WCHAR FAR *LPWSTR; typedef const WCHAR FAR *LPCWSTR; #endif
//
// Character encoding types supported by this module.
//
typedef enum _cenc { ceNil = -1, ceEucCn = 0, ceEucJp, ceEucKr, ceIso2022Jp, ceIso2022Kr, ceBig5, ceGbk, ceHz, ceShiftJis, ceWansung, ceUtf7, ceUtf8, ceCount, };
typedef short CEnc; //
// Encoding "families" (for CceDetectInputCode() preferences).
//
typedef enum _efam { efNone = 0, efDbcs, efEuc, efIso2022, efUtf8, } EFam;
//
// API private/reserved structure. For most API functions,
// this structure must be zero-filled by calling application.
// See converter function documentation, below, for more
// information.
//
#define cdwReserved 4
typedef struct _ars { DWORD rgdw[cdwReserved]; } ARS;
// For GetProcAddress()
typedef void (MSENAPI *PFNMSENCODEVER)(WORD FAR *, WORD FAR *);
// ----------------------------------------------------------------------------
//
// Input Code Auto-Detection Routine
//
// ----------------------------------------------------------------------------
//
// Configuration Flags for Auto Detection Routine
//
// grfDetectResolveAmbiguity
// The default is to return cceAmbiguousInput if the auto
// detection code cannot definitely determine the encoding
// of the input stream. If this flag is set, the function
// will use optional user preferences and the system code
// page to pick an encoding (note that in this case, the
// "lpfGuess" flag will be set to fTrue upon return).
//
// grfDetectUseCharMapping
// The default action of the auto-detection code is to
// parse the input against the known encoding types. Legal
// character sequences are not analyzed for anything
// beyond syntactic correctness. If this flag is set,
// auto-detect will map recognized sequences to flush out
// invalid characters.
//
// This option will cause auto-detection to run more
// slowly, but also yield more accurate results.
//
// grfDetectIgnoreEof
// Because auto-detect parses byte sequences against the
// the known encoding types, end-of-input in the middle of a
// sequence is obviously an error. If the calling application
// will artificially limit the sample size, set this flag
// to ignore such end-of-input errors.
//
#define grfDetectResolveAmbiguity 0x1
#define grfDetectUseCharMapping 0x2
#define grfDetectIgnoreEof 0x4
//
// Entry Point -- Attempt to Detect the Encoding
//
// Return cceAmbiguousInput if input is ambiguous or cceUnknownInput
// if encoding type matches none of the known types.
//
// Detected encoding is returned in lpCe. lpfGuess used to return
// a flag indicating whether or not the function "guessed" at an
// encoding (chose default from ambiguous state).
//
// User preferences for encoding family (efPref) and code page
// (nPrefCp) are optional, even if caller chooses to have
// this function attempt to resolve ambiguity. If either has
// the value 0, they will be ignored.
//
EXPDECL(CCE) CceDetectInputCode( IStream *pstmIn, // input stream
DWORD dwFlags, // configuration flags
EFam efPref, // optional: preferred encoding family
int nPrefCp, // optional: preferred code page
UINT *lpCe, // set to detected encoding
BOOL *lpfGuess // set to fTrue if function "guessed"
);
#endif // #ifndef MSENCODE_H
|