Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

212 lines
6.5 KiB

  1. // ITWBRK.H: (from Tripoli) IWordBreaker, IWordSink, IPhraseSink, IStem
  2. // (from InfoTech) IWordBreakerConfig
  3. // (from Tripoli and InfoTech) Supporting definitions.
  4. #ifndef __ITWBRK_H__
  5. #define __ITWBRK_H__
  6. #include <comdef.h>
  7. #include <itstem.h>
  8. #ifndef __IPhraseSink_FWD_DEFINED__
  9. #define __IPhraseSink_FWD_DEFINED__
  10. typedef interface IPhraseSink IPhraseSink;
  11. #endif /* __IPhraseSink_FWD_DEFINED__ */
  12. #ifndef __IWordSink_FWD_DEFINED__
  13. #define __IWordSink_FWD_DEFINED__
  14. typedef interface IWordSink IWordSink;
  15. #endif /* __IWordSink_FWD_DEFINED__ */
  16. #ifndef __IWordBreaker_FWD_DEFINED__
  17. #define __IWordBreaker_FWD_DEFINED__
  18. typedef interface IWordBreaker IWordBreaker;
  19. #endif /* __IWordBreaker_FWD_DEFINED__ */
  20. #ifndef __IWordBreakerConfig_FWD_DEFINED__
  21. #define __IWordBreakerConfig_FWD_DEFINED__
  22. typedef interface IWordBreakerConfig IWordBreakerConfig;
  23. #endif /* __IWordBreakerConfig_FWD_DEFINED__ */
  24. #ifndef __IITStopWordList_FWD_DEFINED__
  25. #define __IITStopWordList_FWD_DEFINED__
  26. typedef interface IITStopWordList IITStopWordList;
  27. #endif /* __IITStopWordList_FWD_DEFINED__ */
  28. // Supporting definitions for IWordBreaker.
  29. typedef struct tagTEXT_SOURCE TEXT_SOURCE;
  30. typedef SCODE (__stdcall *PFNFILLTEXTBUFFER)(TEXT_SOURCE *pTextSource);
  31. typedef struct tagTEXT_SOURCE
  32. {
  33. PFNFILLTEXTBUFFER pfnFillTextBuffer;
  34. WCHAR *awcBuffer;
  35. ULONG iEnd;
  36. ULONG iCur;
  37. } TEXT_SOURCE;
  38. DECLARE_INTERFACE_(IWordBreaker, IUnknown)
  39. {
  40. STDMETHOD(Init)(BOOL fQuery, ULONG ulMaxTokenSize, BOOL *pfLicense) PURE;
  41. STDMETHOD(BreakText)(TEXT_SOURCE *pTextSource, IWordSink *pWordSink,
  42. IPhraseSink *pPhraseSink) PURE;
  43. STDMETHOD(ComposePhrase)(WCHAR const *pwcNoun, ULONG cwcNoun,
  44. WCHAR const *pwcModifier, ULONG cwcModifier,
  45. ULONG ulAttachmentType, WCHAR *pwcPhrase,
  46. ULONG *pcwcPhrase) PURE;
  47. STDMETHOD(GetLicenseToUse)(WCHAR const **ppwcsLicense) PURE;
  48. };
  49. typedef IWordBreaker *PIWBRK;
  50. // Break word types that can be passed to
  51. // IWordBreakerConfig::SetBreakWordType.
  52. #define IITWBC_BREAKTYPE_TEXT ((DWORD) 0)
  53. #define IITWBC_BREAKTYPE_NUMBER ((DWORD) 1)
  54. #define IITWBC_BREAKTYPE_DATE ((DWORD) 2)
  55. #define IITWBC_BREAKTYPE_TIME ((DWORD) 3)
  56. #define IITWBC_BREAKTYPE_EPOCH ((DWORD) 4)
  57. // Breaker control flags that can be passed to
  58. // IWordBreakerConfig::SetControlInfo.
  59. #define IITWBC_BREAK_ACCEPT_WILDCARDS 0x00000001 // Interpret wildcard chars
  60. // as such.
  61. #define IITWBC_BREAK_AND_STEM 0x00000002 // Stem words after breaking
  62. // them.
  63. // External data types that can be passed to
  64. // IWordBreakerConfig::LoadExternalBreakerData.
  65. #define IITWBC_EXTDATA_CHARTABLE ((DWORD) 0)
  66. #define IITWBC_EXTDATA_STOPWORDLIST ((DWORD) 1)
  67. DECLARE_INTERFACE_(IWordBreakerConfig, IUnknown)
  68. {
  69. // Sets/gets locale info that will affect the word breaking
  70. // behavior of IWordBreaker::BreakText.
  71. // Returns S_OK if locale described by params is supported
  72. // by the breaker object; E_INVALIDARG otherwise.
  73. STDMETHOD(SetLocaleInfo)(DWORD dwCodePageID, LCID lcid) PURE;
  74. STDMETHOD(GetLocaleInfo)(DWORD *pdwCodePageID, LCID *plcid) PURE;
  75. // Sets/gets the type of words the breaker should expect
  76. // to see in all subsequent calls to IWordBreaker::BreakText.
  77. // Returns S_OK if the type is understood by the breaker
  78. // object; E_INVALIDARG otherwise.
  79. STDMETHOD(SetBreakWordType)(DWORD dwBreakWordType) PURE;
  80. STDMETHOD(GetBreakWordType)(DWORD *pdwBreakWordType) PURE;
  81. // Sets/gets info that controls certain aspects of word breaking.
  82. // This method currently accepts only the following set of flags
  83. // in grfBreakFlags:
  84. // IITWBC_BREAK_ACCEPT_WILDCARDS
  85. // IITWBC_BREAK_AND_STEM
  86. // In the future, additional information may be passed in through
  87. // dwReserved.
  88. STDMETHOD(SetControlInfo)(DWORD grfBreakFlags, DWORD dwReserved) PURE;
  89. STDMETHOD(GetControlInfo)(DWORD *pgrfBreakFlags, DWORD *pdwReserved) PURE;
  90. // Will load external breaker data, such as a table containing
  91. // char-by-char break information or a list of stop words.
  92. // Although the format of the data in the stream is entirely
  93. // implementation-specific, this interface does define a couple
  94. // of general types for that data which can be passed in
  95. // dwStreamDataType:
  96. // IITWBC_EXTDATA_CHARTABLE
  97. // IITWBC_EXTDATA_STOPWORDLIST
  98. STDMETHOD(LoadExternalBreakerData)(IStream *pStream,
  99. DWORD dwExtDataType) PURE;
  100. // These methods allow a stemmer to be associated with the breaker. The
  101. // breaker will take responsibility for calling
  102. // IPersistStreamInit::Load/Save when it is loaded/saved if the stemmer
  103. // supports that interface.
  104. STDMETHOD(SetWordStemmer)(REFCLSID rclsid, IStemmer *pStemmer) PURE;
  105. STDMETHOD(GetWordStemmer)(IStemmer **ppStemmer) PURE;
  106. };
  107. typedef IWordBreakerConfig *PIWBRKC;
  108. // Supporting definitions for IWordSink.
  109. typedef enum tagWORDREP_BREAK_TYPE
  110. {
  111. WORDREP_BREAK_EOW = 0,
  112. WORDREP_BREAK_EOS = 1,
  113. WORDREP_BREAK_EOP = 2,
  114. WORDREP_BREAK_EOC = 3
  115. } WORDREP_BREAK_TYPE;
  116. DECLARE_INTERFACE_(IWordSink, IUnknown)
  117. {
  118. STDMETHOD(PutWord)(WCHAR const *pwcInBuf, ULONG cwc,
  119. ULONG cwcSrcLen, ULONG cwcSrcPos) PURE;
  120. STDMETHOD(PutAltWord)(WCHAR const *pwcInBuf, ULONG cwc,
  121. ULONG cwcSrcLen, ULONG cwcSrcPos) PURE;
  122. STDMETHOD(StartAltPhrase)(void) PURE;
  123. STDMETHOD(EndAltPhrase)(void) PURE;
  124. STDMETHOD(PutBreak)(WORDREP_BREAK_TYPE breakType) PURE;
  125. };
  126. typedef IWordSink *PIWRDSNK;
  127. DECLARE_INTERFACE_(IPhraseSink, IUnknown)
  128. {
  129. STDMETHOD(PutSmallPhrase)(WCHAR const *pwcNoun, ULONG cwcNoun,
  130. WCHAR const *pwcModifier,
  131. ULONG cwcModifier,
  132. ULONG ulAttachmentType) PURE;
  133. STDMETHOD(PutPhrase)(WCHAR const *pwcPhrase, ULONG cwcPhrase) PURE;
  134. };
  135. typedef IPhraseSink *PIPHRSNK;
  136. // Function or macro that can be used by a breaker implementation
  137. // to pull characters from the caller's text source.
  138. #ifdef __cplusplus
  139. inline WCHAR WBreakGetWChar(TEXT_SOURCE *pTextSource )
  140. {
  141. if ( pTextSource->iCur == pTextSource->iEnd )
  142. {
  143. if ( FAILED(pTextSource->pfnFillTextBuffer( pTextSource ) ) )
  144. return 0xFFFF; // UniCode EOF
  145. }
  146. return pTextSource->awcBuffer[pTextSource->iCur++];
  147. };
  148. #else
  149. #define WBreakGetWChar( pTextSource )\
  150. (pTextSource->iCur==pTextSource->iEnd)\
  151. ? (FAILED(pTextSource->pfnFillTextBuffer( pTextSource )) \
  152. ? 0xFFFF\
  153. : pTextSource->awcBuffer[pTextSource->iCur++])\
  154. : pTextSource->awcBuffer[pTextSource->iCur++]
  155. #endif
  156. DECLARE_INTERFACE_(IITStopWordList, IUnknown)
  157. {
  158. STDMETHOD(AddWord)(WCHAR const *pwcInBuf, ULONG cwc) PURE;
  159. STDMETHOD(LookupWord)(WCHAR const *pwcInBuf, ULONG cwc) PURE;
  160. };
  161. typedef IITStopWordList *PIITSTWDL;
  162. #endif // __ITWBRK_H__