Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

131 lines
3.9 KiB

  1. // STDBRKR.H: Definition of CITStdBreaker breaker object implementation.
  2. #ifndef __STDBRKR_H__
  3. #define __STDBRKR_H__
  4. #include <itwbrk.h>
  5. #include <itwbrkid.h>
  6. #include "verinfo.h"
  7. #define VERSION_STDBRKR (MAKELONG(MAKEWORD(0, rapFile), MAKEWORD(rmmFile, rmjFile)))
  8. // Group of flags that indicate what data has been persisted to the
  9. // breaker's stream.
  10. #define ITSTDBRK_PERSISTED_BRKCTL 0x00000001
  11. #define ITSTDBRK_PERSISTED_CHARTABLE 0x00000002
  12. #define ITSTDBRK_PERSISTED_STOPWORDLIST 0x00000004
  13. #define ITSTDBRK_PERSISTED_STEMMER 0x00000008
  14. // Max number of stop words allowed.
  15. #define ITSTDBRK_STOPHASH_SIZE 211 // A good prime number for supporting
  16. // up to about 2000 stop words.
  17. // Breaker control structure that contains information that can
  18. // vary how text words are interpreted and broken.
  19. typedef struct _brkctl
  20. {
  21. DWORD dwCodePageID;
  22. LCID lcid;
  23. DWORD dwBreakWordType;
  24. DWORD grfBreakFlags;
  25. } BRKCTL;
  26. // Word callback function param struct that is passed to StdBreakerWordFunc,
  27. // which wraps the IWordSink implementation as far as the internal
  28. // word breaking functions are concerned.
  29. typedef struct _wrdfnpm
  30. {
  31. PIWRDSNK piwrdsnk;
  32. DWORD dwCodePageID;
  33. HGLOBAL hmemUnicode;
  34. DWORD cbBufUnicodeCur;
  35. LPBYTE lpbBuf; // MBCS text buffer.
  36. } WRDFNPM;
  37. class CITStdBreaker :
  38. public IWordBreaker,
  39. public IWordBreakerConfig,
  40. public IPersistStreamInit,
  41. public IITStopWordList,
  42. public CComObjectRootEx<CComMultiThreadModel>,
  43. public CComCoClass<CITStdBreaker,&CLSID_ITStdBreaker>
  44. {
  45. public:
  46. CITStdBreaker();
  47. virtual ~CITStdBreaker();
  48. BEGIN_COM_MAP(CITStdBreaker)
  49. COM_INTERFACE_ENTRY(IWordBreaker)
  50. COM_INTERFACE_ENTRY(IWordBreakerConfig)
  51. COM_INTERFACE_ENTRY(IPersistStreamInit)
  52. COM_INTERFACE_ENTRY(IITStopWordList)
  53. END_COM_MAP()
  54. DECLARE_REGISTRY(CLSID_ITStdBreaker, "ITIR.StdWordBreaker.4", "ITIR.StdWordBreaker", 0, THREADFLAGS_BOTH )
  55. // IWordBreaker methods
  56. STDMETHOD(Init)(BOOL fQuery, ULONG ulMaxTokenSize, BOOL *pfLicense);
  57. STDMETHOD(BreakText)(TEXT_SOURCE *pTextSource, IWordSink *pWordSink,
  58. IPhraseSink *pPhraseSink);
  59. STDMETHOD(ComposePhrase)(WCHAR const *pwcNoun, ULONG cwcNoun,
  60. WCHAR const *pwcModifier, ULONG cwcModifier,
  61. ULONG ulAttachmentType, WCHAR *pwcPhrase,
  62. ULONG *pcwcPhrase);
  63. STDMETHOD(GetLicenseToUse)(WCHAR const **ppwcsLicense);
  64. // IWordBreakerConfig methods
  65. STDMETHOD(SetLocaleInfo)(DWORD dwCodePageID, LCID lcid);
  66. STDMETHOD(GetLocaleInfo)(DWORD *pdwCodePageID, LCID *plcid);
  67. STDMETHOD(SetBreakWordType)(DWORD dwBreakWordType);
  68. STDMETHOD(GetBreakWordType)(DWORD *pdwBreakWordType);
  69. STDMETHOD(SetControlInfo)(DWORD grfBreakFlags, DWORD dwReserved);
  70. STDMETHOD(GetControlInfo)(DWORD *pgrfBreakFlags, DWORD *pdwReserved);
  71. STDMETHOD(LoadExternalBreakerData)(IStream *pStream, DWORD dwExtDataType);
  72. STDMETHOD(SetWordStemmer)(REFCLSID rclsid, IStemmer *pStemmer);
  73. STDMETHOD(GetWordStemmer)(IStemmer **ppStemmer);
  74. // IITStopWordList methods.
  75. STDMETHOD(AddWord)(WCHAR const *pwcInBuf, ULONG cwc);
  76. STDMETHOD(LookupWord)(WCHAR const *pwcInBuf, ULONG cwc);
  77. // IPersistStreamInit methods
  78. STDMETHOD(GetClassID)(CLSID *pclsid);
  79. STDMETHOD(IsDirty)(void);
  80. STDMETHOD(Load)(IStream *pStream);
  81. STDMETHOD(Save)(IStream *pStream, BOOL fClearDirty);
  82. STDMETHOD(GetSizeMax)(ULARGE_INTEGER *pcbSizeMax);
  83. STDMETHOD(InitNew)(void);
  84. private:
  85. // Private methods
  86. HRESULT StopListOp(WCHAR const *pwcInBuf, ULONG cwc, BOOL fAddWord);
  87. HRESULT ReallocBuffer(HGLOBAL *phmemBuf, DWORD *cbBufCur, DWORD cbBufNew);
  88. void ClearMembers(void);
  89. void InitBrkCtl(void);
  90. void Close(void);
  91. // Private data members
  92. BOOL m_fInitialized;
  93. BOOL m_fDirty;
  94. BOOL m_fQueryContext;
  95. DWORD m_grfPersistedItems;
  96. BRKCTL m_brkctl;
  97. HGLOBAL m_hmemAnsi;
  98. DWORD m_cbBufAnsiCur;
  99. LPCTAB m_lpctab;
  100. LPSIPB m_lpsipb;
  101. PISTEM m_pistem;
  102. CLSID m_clsidStemmer;
  103. _ThreadModel::AutoCriticalSection m_cs; // Critical section obj.
  104. };
  105. // Initial size of Ansi string buffers.
  106. #define cbAnsiBufInit 256
  107. #endif // __STDBRKR_H__