mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
60 lines
1.8 KiB
60 lines
1.8 KiB
//+---------------------------------------------------------------------------
|
|
//
|
|
//
|
|
// CThaiWordBreak
|
|
//
|
|
// History:
|
|
// created 7/99 aarayas
|
|
//
|
|
// ©1999 Microsoft Corporation
|
|
//----------------------------------------------------------------------------
|
|
#ifndef _CTHAIWORDBREAK_H_
|
|
#define _CTHAIWORDBREAK_H_
|
|
#include "thwbdef.hpp"
|
|
#include "ctrie.hpp"
|
|
#include "CThaiTrieIter.hpp"
|
|
#include "lextable.hpp"
|
|
#include "CThaiBreakTree.hpp"
|
|
#include "CThaiTrigramTrieIter.hpp"
|
|
|
|
class CThaiBreakTree;
|
|
|
|
class CThaiWordBreak {
|
|
public:
|
|
#if defined (NGRAM_ENABLE)
|
|
PTEC Init(WCHAR* wzFileName, WCHAR* wzFileNameSentStruct, WCHAR* wzFileNameTrigram);
|
|
#else
|
|
PTEC Init(WCHAR* wzFileName, WCHAR* wzFileNameTrigram);
|
|
#endif
|
|
PTEC InitRc(LPBYTE , LPBYTE);
|
|
void UnInit();
|
|
|
|
int IndexWordBreak(WCHAR* wzString,unsigned int iStringLen, BYTE* pBreakPos,THWB_STRUCT* pThwb_Struct,unsigned int iBreakMax);
|
|
int FindAltWord(WCHAR* wzWord,unsigned int iWordLen, BYTE Alt, BYTE* pBreakPos);
|
|
|
|
int FindWordBreak(WCHAR* wzString,unsigned int iStringLen, BYTE* pBreakPos,unsigned int iBreakMax, BYTE mode, bool fFastWordBreak = true);
|
|
|
|
DWORD_PTR CreateWordBreaker();
|
|
bool DeleteWordBreaker(DWORD_PTR dwBreaker);
|
|
int FindWordBreak(DWORD_PTR dwBreaker, WCHAR* wzString,unsigned int iStringLen, BYTE* pBreakPos,unsigned int iBreakMax, BYTE mode, bool fFastWordBreak = true, THWB_STRUCT* pThwb_Struct = NULL);
|
|
|
|
BOOL Find(WCHAR* wzString, DWORD* pdwPOS);
|
|
int Soundex(WCHAR* word) {return 0;} //breakTree.Soundex(word);} -- re-entrant bug fix
|
|
protected:
|
|
#if defined (NGRAM_ENABLE)
|
|
BOOL WordBreak(WCHAR* pszBegin, WCHAR* pszEnd);
|
|
#endif
|
|
|
|
CTrie m_trie;
|
|
#if defined (NGRAM_ENABLE)
|
|
CTrie trie_sentence_struct;
|
|
#endif
|
|
CTrie m_trie_trigram;
|
|
CThaiTrieIter m_thaiTrieIter;
|
|
// CThaiBreakTree breakTree; fix re-entrant bug.
|
|
|
|
int wordCount[MAXBREAK];
|
|
|
|
};
|
|
|
|
#endif
|