Leaked source code of windows server 2003
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
//------------------------------------------------------------------------
//
// Tabular Data Control Parse Module
// Copyright (C) Microsoft Corporation, 1996, 1997
//
// File: TDCParse.h
//
// Contents: Declaration of the TDC parser classes.
//
// The intent of these classes once was to create a pipeline.
//
//
// |
// | Wide-character stream
// | ~~~~~~~~~~~~~~~~~~~~~
// \|/
// ------------------------
// | CTDCTokenise object | Created with field & row delimiters, quote &
// | AddWcharBuffer() | escape characters
// ------------------------
// |
// | Stream of <field>, <eoln> and <eof> tokens
// | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// \|/
// ------------------------
// | CTDCFieldSink object | Abstract class, e.g. STD object created with
// | AddField() | sort/filter criteria & fUseHeader flag
// | EOLN() | to interpret the sequence of fields.
// | EOF() |
// ------------------------
//
//------------------------------------------------------------------------
#define DEFAULT_FIELD_DELIM L","
#define DEFAULT_ROW_DELIM L"\n"
#define DEFAULT_QUOTE_CHAR L"\""
#define UNICODE_CP 1200 // Win32's Unicode codepage
#define UNICODE_REVERSE_CP 1201 // Byte-swapped Unicode codepage
#define CP_1252 1252 // Ansi, Western Europe
#define CP_AUTO 50001 // cross language detection
// number of bytes for MLang to make a good guess for the codepage
// (this is a somewhat arbitrary number)
#define CODEPAGE_BYTE_THRESHOLD (4096)
#define N_DETECTENCODINGINFO (5)
#define ALLOW_DOMAIN_STRING L"@!allow_domains"
//------------------------------------------------------------------------
//
// Class: CTDCFieldSink
//
// This class accumulates a sequence of <fields> and <eoln> tokens
// into a 2-D array.
//
// An admissible calling sequence on this object is:
// * 0 or more calls to AddField() or EOLN()
// * 1 call to EOF()
//
//------------------------------------------------------------------------
class CTDCFieldSink { public: STDMETHOD(AddField)(LPWCH pwch, DWORD dwSize) PURE; STDMETHOD(EOLN)() PURE; STDMETHOD(EOF)() PURE; };
//------------------------------------------------------------------------
//
// Class: CTDCUnify
//
// This class takes a series of byte buffers and breaks them up into
// UNICODE buffers.
// The resulting buffers are passed to a CTDCTokenise object.
//
// An admissible calling sequence on this object is:
// * Exactly 1 call to Create()
// * 0 or more calls to AddByteBuffer() with a non-zero-sized buffer
// * Exactly 1 call to AddByteBuffer() with a zero-sized buffer
//
// Calls to query the characteristics of the parsed data are allowed
// after the call to Create(), but are only meaningful after a
// reasonable amount of data has been collected.
//
//
// Caveats:
// ~~~~~~~
// The class characterises the input stream as ASCII/UNICODE/COMPOSITE
// based on the buffer passed in the initial call to AddByteBuffer().
// If this buffer is too small, the class may make an incorrect
// characterisation.
//
//------------------------------------------------------------------------
class CTDCUnify { public: CTDCUnify(); ~CTDCUnify(); HRESULT Create(UINT nCodePage, UINT nAmbientCodePage, IMultiLanguage *pML); HRESULT ConvertByteBuffer(BYTE *pBytes, DWORD dwSize); HRESULT InitTokenizer(CTDCFieldSink *pFieldSink, WCHAR wchDelimField, WCHAR wchDelimRow, WCHAR wchQuote, WCHAR wchEscape); HRESULT AddWcharBuffer(BOOL fAtEnd); int IsUnicode(BYTE * pBytes, DWORD dwSize); BOOL DetermineCodePage(BOOL fForce); enum ALLOWDOMAINLIST { ALLOW_DOMAINLIST_YES, ALLOW_DOMAINLIST_NO, ALLOW_DOMAINLIST_DONTKNOW };
ALLOWDOMAINLIST CheckForAllowDomainList(); HRESULT MatchAllowDomainList(LPCWSTR pwzURL); boolean ProcessedAllowDomainList() {return m_fProcessedAllowDomainList;}
private: CTDCFieldSink *m_pFieldSink; WCHAR m_wchDelimField; WCHAR m_wchDelimRow; WCHAR m_wchQuote; WCHAR m_wchEscape; WCHAR m_ucParsed;
boolean m_fEscapeActive; boolean m_fQuoteActive; boolean m_fIgnoreNextLF; boolean m_fIgnoreNextCR; boolean m_fIgnoreNextWhiteSpace; boolean m_fFoldCRLF; boolean m_fFoldWhiteSpace;
UINT m_nUnicode; boolean m_fDataMarkedUnicode; boolean m_fDataIsUnicode; boolean m_fCanConvertToUnicode; boolean m_fProcessedAllowDomainList; DWORD m_dwBytesProcessed; DWORD m_dwConvertMode; UINT m_nCodePage; UINT m_nAmbientCodePage;
BYTE *m_psByteBuf; ULONG m_ucByteBufSize; ULONG m_ucByteBufCount;
WCHAR *m_psWcharBuf; ULONG m_ucWcharBufSize; ULONG m_ucWcharBufCount;
IMultiLanguage *m_pML; };
|