// ITWBRK.H:	(from Tripoli) IWordBreaker, IWordSink, IPhraseSink, IStem
//				(from InfoTech) IWordBreakerConfig
//				(from Tripoli and InfoTech) Supporting definitions.

#ifndef __ITWBRK_H__
#define __ITWBRK_H__

#include <comdef.h>
#include <itstem.h>


#ifndef __IPhraseSink_FWD_DEFINED__
#define __IPhraseSink_FWD_DEFINED__
typedef interface IPhraseSink IPhraseSink;
#endif 	/* __IPhraseSink_FWD_DEFINED__ */


#ifndef __IWordSink_FWD_DEFINED__
#define __IWordSink_FWD_DEFINED__
typedef interface IWordSink IWordSink;
#endif 	/* __IWordSink_FWD_DEFINED__ */


#ifndef __IWordBreaker_FWD_DEFINED__
#define __IWordBreaker_FWD_DEFINED__
typedef interface IWordBreaker IWordBreaker;
#endif 	/* __IWordBreaker_FWD_DEFINED__ */


#ifndef __IWordBreakerConfig_FWD_DEFINED__
#define __IWordBreakerConfig_FWD_DEFINED__
typedef interface IWordBreakerConfig IWordBreakerConfig;
#endif 	/* __IWordBreakerConfig_FWD_DEFINED__ */


#ifndef __IITStopWordList_FWD_DEFINED__
#define __IITStopWordList_FWD_DEFINED__
typedef interface IITStopWordList IITStopWordList;
#endif 	/* __IITStopWordList_FWD_DEFINED__ */


// Supporting definitions for IWordBreaker.
typedef struct tagTEXT_SOURCE TEXT_SOURCE;
typedef SCODE (__stdcall *PFNFILLTEXTBUFFER)(TEXT_SOURCE *pTextSource);

typedef struct tagTEXT_SOURCE
{
    PFNFILLTEXTBUFFER pfnFillTextBuffer;
    WCHAR *awcBuffer;
    ULONG iEnd;
    ULONG iCur;
} TEXT_SOURCE;


DECLARE_INTERFACE_(IWordBreaker, IUnknown)
{
	STDMETHOD(Init)(BOOL fQuery, ULONG ulMaxTokenSize, BOOL *pfLicense) PURE;
	STDMETHOD(BreakText)(TEXT_SOURCE *pTextSource, IWordSink *pWordSink,
											IPhraseSink *pPhraseSink) PURE;
	STDMETHOD(ComposePhrase)(WCHAR const *pwcNoun, ULONG cwcNoun,
						WCHAR const *pwcModifier, ULONG cwcModifier,
						ULONG ulAttachmentType, WCHAR *pwcPhrase,
												ULONG *pcwcPhrase) PURE;
    STDMETHOD(GetLicenseToUse)(WCHAR const **ppwcsLicense) PURE;
};

typedef IWordBreaker *PIWBRK;


// Break word types that can be passed to
// IWordBreakerConfig::SetBreakWordType.
#define IITWBC_BREAKTYPE_TEXT		((DWORD) 0)
#define IITWBC_BREAKTYPE_NUMBER		((DWORD) 1)
#define IITWBC_BREAKTYPE_DATE		((DWORD) 2)
#define IITWBC_BREAKTYPE_TIME		((DWORD) 3)
#define IITWBC_BREAKTYPE_EPOCH		((DWORD) 4)


// Breaker control flags that can be passed to
// IWordBreakerConfig::SetControlInfo.
#define IITWBC_BREAK_ACCEPT_WILDCARDS	0x00000001  // Interpret wildcard chars
													// as such.
#define IITWBC_BREAK_AND_STEM           0x00000002  // Stem words after breaking
													// them.

// External data types that can be passed to
// IWordBreakerConfig::LoadExternalBreakerData.
#define IITWBC_EXTDATA_CHARTABLE		((DWORD) 0)		
#define IITWBC_EXTDATA_STOPWORDLIST		((DWORD) 1)


DECLARE_INTERFACE_(IWordBreakerConfig, IUnknown)
{
	// Sets/gets locale info that will affect the word breaking
	// behavior of IWordBreaker::BreakText.
	// Returns S_OK if locale described by params is supported
	// by the breaker object; E_INVALIDARG otherwise.
	STDMETHOD(SetLocaleInfo)(DWORD dwCodePageID, LCID lcid) PURE;
	STDMETHOD(GetLocaleInfo)(DWORD *pdwCodePageID, LCID *plcid) PURE;

	// Sets/gets the type of words the breaker should expect
	// to see in all subsequent calls to IWordBreaker::BreakText.
	// Returns S_OK if the type is understood by the breaker
	//  object; E_INVALIDARG otherwise.
	STDMETHOD(SetBreakWordType)(DWORD dwBreakWordType) PURE;
	STDMETHOD(GetBreakWordType)(DWORD *pdwBreakWordType) PURE;

	// Sets/gets info that controls certain aspects of word breaking.
	// This method currently accepts only the following set of flags
	// in grfBreakFlags:
	//		IITWBC_BREAK_ACCEPT_WILDCARDS
	//		IITWBC_BREAK_AND_STEM
	// In the future, additional information may be passed in through
	// dwReserved.
	STDMETHOD(SetControlInfo)(DWORD grfBreakFlags, DWORD dwReserved) PURE;
	STDMETHOD(GetControlInfo)(DWORD *pgrfBreakFlags, DWORD *pdwReserved) PURE;

	// Will load external breaker data, such as a table containing
	// char-by-char break information or a list of stop words.
	// Although the format of the data in the stream is entirely
	// implementation-specific, this interface does define a couple
	// of general types for that data which can be passed in
	// dwStreamDataType:
	//		IITWBC_EXTDATA_CHARTABLE
	//		IITWBC_EXTDATA_STOPWORDLIST
	STDMETHOD(LoadExternalBreakerData)(IStream *pStream,
										DWORD dwExtDataType) PURE;

	// These methods allow a stemmer to be associated with the breaker.  The
	// breaker will take responsibility for calling
	// IPersistStreamInit::Load/Save when it is loaded/saved if the stemmer
	// supports that interface.
	STDMETHOD(SetWordStemmer)(REFCLSID rclsid, IStemmer *pStemmer) PURE;
	STDMETHOD(GetWordStemmer)(IStemmer **ppStemmer) PURE;
};

typedef IWordBreakerConfig *PIWBRKC;


// Supporting definitions for IWordSink.
typedef enum tagWORDREP_BREAK_TYPE
{
    WORDREP_BREAK_EOW = 0,
    WORDREP_BREAK_EOS = 1,
    WORDREP_BREAK_EOP = 2,
    WORDREP_BREAK_EOC = 3
} WORDREP_BREAK_TYPE;


DECLARE_INTERFACE_(IWordSink, IUnknown)
{
	STDMETHOD(PutWord)(WCHAR const *pwcInBuf, ULONG cwc,
						ULONG cwcSrcLen, ULONG cwcSrcPos) PURE;
	STDMETHOD(PutAltWord)(WCHAR const *pwcInBuf, ULONG cwc, 
						ULONG cwcSrcLen, ULONG cwcSrcPos) PURE;
	STDMETHOD(StartAltPhrase)(void) PURE;
	STDMETHOD(EndAltPhrase)(void) PURE;
	STDMETHOD(PutBreak)(WORDREP_BREAK_TYPE breakType) PURE;
};

typedef IWordSink *PIWRDSNK;


DECLARE_INTERFACE_(IPhraseSink, IUnknown)
{
	STDMETHOD(PutSmallPhrase)(WCHAR const *pwcNoun, ULONG cwcNoun,
								WCHAR const *pwcModifier, 
								ULONG cwcModifier,
								ULONG ulAttachmentType) PURE;
	STDMETHOD(PutPhrase)(WCHAR const *pwcPhrase, ULONG cwcPhrase) PURE;
};

typedef IPhraseSink *PIPHRSNK;


// Function or macro that can be used by a breaker implementation
// to pull characters from the caller's text source.
#ifdef __cplusplus

inline WCHAR WBreakGetWChar(TEXT_SOURCE *pTextSource )
{
    if ( pTextSource->iCur == pTextSource->iEnd )
    {
        if ( FAILED(pTextSource->pfnFillTextBuffer( pTextSource ) ) )
            return 0xFFFF;  // UniCode EOF
    }

    return pTextSource->awcBuffer[pTextSource->iCur++];
};

#else

#define WBreakGetWChar( pTextSource )\
    (pTextSource->iCur==pTextSource->iEnd)\
    ? (FAILED(pTextSource->pfnFillTextBuffer( pTextSource )) \
       ? 0xFFFF\
       : pTextSource->awcBuffer[pTextSource->iCur++])\
    : pTextSource->awcBuffer[pTextSource->iCur++]

#endif


DECLARE_INTERFACE_(IITStopWordList, IUnknown)
{
	STDMETHOD(AddWord)(WCHAR const *pwcInBuf, ULONG cwc) PURE;
	STDMETHOD(LookupWord)(WCHAR const *pwcInBuf, ULONG cwc) PURE;
};

typedef	IITStopWordList	*PIITSTWDL;


#endif		// __ITWBRK_H__