Source code of Windows XP (NT5)
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
//+---------------------------------------------------------------------------
//
//
// CTrie - class CTrie encapsulation for Trie data structure.
//
// History:
// created 6/99 aarayas
//
// �1999 Microsoft Corporation
//----------------------------------------------------------------------------
#include "ctrie.hpp"
#define VERSIONMAJOR 1
#define VERSIONMINOR 0
//+---------------------------------------------------------------------------
//
// Class: CTrieIter
//
// Synopsis: constructor
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
CTrieIter::CTrieIter() { // Initialize local variables.
Reset(); wc = 0; fWordEnd = FALSE; fRestricted = FALSE; frq = 0; dwTag = 0; }
//+---------------------------------------------------------------------------
//
// Class: CTrieIter
//
// Synopsis: copy constructor
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
CTrieIter::CTrieIter(const CTrieIter& trieIter) { // Copy all variables from Initial trie.
memcpy(&trieScan, &trieIter.trieScan, sizeof(TRIESCAN)); pTrieCtrl = trieIter.pTrieCtrl; wc = trieIter.wc; fWordEnd = trieIter.fWordEnd; fRestricted = trieIter.fRestricted; frq = trieIter.frq; dwTag = trieIter.dwTag; }
//+---------------------------------------------------------------------------
//
// Class: CTrieIter
//
// Synopsis: Initialize variables.
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
void CTrieIter::Init(CTrie* ctrie) { // Initialize TrieCtrl
pTrieCtrl = ctrie->pTrieCtrl; }
//+---------------------------------------------------------------------------
//
// Class: CTrieIter
//
// Synopsis: Initialize variables.
//
// Arguments:
//
// Modifies:
//
// History: created 3/00 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
void CTrieIter::Init(TRIECTRL* pTrieCtrl1) { // Initialize TrieCtrl
pTrieCtrl = pTrieCtrl1; }
//+---------------------------------------------------------------------------
//
// Class: CTrieIter
//
// Synopsis: Bring interation index to the first node.
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
void CTrieIter::Reset() { // Reset Trie.
memset(&trieScan, 0, sizeof(TRIESCAN)); }
//+---------------------------------------------------------------------------
//
// Class: CTrieIter
//
// Synopsis: Move Iteration index down one node.
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL CTrieIter::Down() { // Move the Trie down one node.
return TrieGetNextState(pTrieCtrl, &trieScan); }
//+---------------------------------------------------------------------------
//
// Class: CTrieIter
//
// Synopsis: Move Iteration index right one node.
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL CTrieIter::Right() { // Move the Trie right one node.
return TrieGetNextNode(pTrieCtrl, &trieScan); }
//+---------------------------------------------------------------------------
//
// Class: CTrieIter
//
// Synopsis: Bring interation index to the first node.
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
void CTrieIter::GetNode() { wc = trieScan.wch; fWordEnd = (trieScan.wFlags & TRIE_NODE_VALID) && (!(trieScan.wFlags & TRIE_NODE_TAGGED) || (trieScan.aTags[0].dwData & iDialectMask));
if (fWordEnd) { fRestricted = (trieScan.wFlags & TRIE_NODE_TAGGED) && (trieScan.aTags[0].dwData & iRestrictedMask); frq = (BYTE) (trieScan.wFlags & TRIE_NODE_TAGGED ? (trieScan.aTags[0].dwData & 0x300) >> iFrqShift : frqpenNormal);
posTag = (DWORD) (trieScan.wFlags & TRIE_NODE_TAGGED ? (trieScan.aTags[0].dwData & iPosMask) >> iPosShift : 0);
dwTag = (DWORD) (trieScan.wFlags & TRIE_NODE_TAGGED ? trieScan.aTags[0].dwData : 0); } }
//+---------------------------------------------------------------------------
//
// Class: CTrie
//
// Synopsis: Constructor
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
CTrie::CTrie() { pMapFile = NULL; pTrieCtrl = NULL; pTrieScan = NULL; }
//+---------------------------------------------------------------------------
//
// Class: CTrie
//
// Synopsis: Destructor
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
CTrie::~CTrie() { UnInit(); }
//+---------------------------------------------------------------------------
//
// Class: CTrie
//
// Synopsis: Initialize Trie.
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
PTEC CTrie::Init(WCHAR* szFileName) { // Declare and Initialize all local variables.
PTEC ptec = ptecModuleError;
// The function assume that pMapFile and pTrieCtrl is NULL, else there are possible memory leak.
// possibility of this could be call Initilization without Terminating.
assert(pMapFile == NULL); assert(pTrieCtrl == NULL);
// Initialize pMapFile and pTrieCtrl to NULL.
pMapFile = NULL; pTrieCtrl = NULL;
pMapFile = OpenMapFileW(szFileName);
if (pMapFile == NULL) { // Unable to load map files, return invalid read error.
ptec = retcode(ptecIOErrorMainLex, ptecFileRead); } else if (pMapFile->pvMap == NULL) { // Return Invalid format and close the files.
ptec = retcode(ptecIOErrorMainLex, ptecInvalidFormat); CloseMapFile(pMapFile); } else { BYTE *pmap = (BYTE *) pMapFile->pvMap;
// find the header
LEXHEADER *plxhead = (LEXHEADER *) pmap; pmap += sizeof(LEXHEADER);
// verify that it's a valid lex file
if (!(plxhead->lxid == lxidSpeller && plxhead->vendorid == vendoridMicrosoft && PROOFMAJORVERSION(plxhead->version) == VERSIONMAJOR )) { // If we reached here than the lexicon is no in a valid Thai wordbreak format.
ptec = retcode(ptecIOErrorMainLex, ptecInvalidFormat); } else { // Make sure the language matches check the first dialect of the lexicon.
// CTrie also support both Thai and Vietnamese language.
if ( (plxhead->lidArray[0] != lidThai) && (plxhead->lidArray[0] != lidViet) ) { // If we reached here than we are not using Thai lexicon.
ptec = retcode(ptecIOErrorMainLex, ptecInvalidLanguage); } else { // The size of the copyright notice
int cCopyright = 0; WCHAR* pwzCopyright = NULL; int cLexSup = 0;
cCopyright = * (int *) pmap; pmap += sizeof(int);
// The copyright notice itself
pwzCopyright = (WCHAR *) pmap; pmap += cCopyright * sizeof(WCHAR);
// Skip Supplemental data for Thai word break.
cLexSup = * (int *) pmap; pmap += sizeof(int); pmap += cLexSup;
pTrieCtrl = TrieInit(pmap); if (pTrieCtrl) { // We were able to load and point to the Trie okay.
//MessageBoxW(0,L"Was able to initialize Trie",pwsz,MB_OK);
pTrieScan = new CTrieIter(); pTrieScan->Init(this); ptec = ptecNoErrors; } else { // We were not able to initailize main lexicon.
ptec = retcode(ptecIOErrorMainLex, ptecInvalidMainLex); } } } }
return ptec; }
//+---------------------------------------------------------------------------
//
// Class: CTrie
//
// Synopsis: Initialize Trie.
//
// Arguments:
//
// Modifies:
//
// History: created 2/2000 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
PTEC CTrie::InitRc(LPBYTE pmap) { // Declare and Initialize all local variables.
PTEC ptec = ptecModuleError;
// The function assume that pMapFile and pTrieCtrl is NULL, else there are possible memory leak.
// possibility of this could be call Initilization without Terminating.
assert(pMapFile == NULL); assert(pTrieCtrl == NULL); assert(pTrieScan == NULL);
// Initialize pMapFile and pTrieCtrl to NULL.
pMapFile = NULL; pTrieCtrl = NULL; pTrieScan = NULL;
LEXHEADER *plxhead = (LEXHEADER *) pmap; pmap += sizeof(LEXHEADER);
// The size of the copyright notice
int cCopyright = 0; WCHAR* pwzCopyright = NULL; int cLexSup = 0;
cCopyright = * (int *) pmap; pmap += sizeof(int);
// The copyright notice itself
pwzCopyright = (WCHAR *) pmap; pmap += cCopyright * sizeof(WCHAR);
// Skip Supplemental data for Thai word break.
cLexSup = * (int *) pmap; pmap += sizeof(int); pmap += cLexSup;
pTrieCtrl = TrieInit(pmap); if (pTrieCtrl) { // We were able to load and point to the Trie okay.
//MessageBoxW(0,L"Was able to initialize Trie",L"ThWB",MB_OK);
pTrieScan = new CTrieIter(); pTrieScan->Init(this); ptec = ptecNoErrors; } else { // We were not able to initailize main lexicon.
ptec = retcode(ptecIOErrorMainLex, ptecInvalidMainLex); }
return ptec; }
//+---------------------------------------------------------------------------
//
// Class: CTrie
//
// Synopsis: UnInitialize Trie.
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
void CTrie::UnInit() { // Free up memory allocated by Trie.
if (pTrieCtrl) { TrieFree(pTrieCtrl); pTrieCtrl = NULL; }
// Close the map files.
if (pMapFile) { CloseMapFile(pMapFile); pMapFile = NULL; }
if (pTrieScan) { delete pTrieScan; pTrieScan = NULL; }
}
//+---------------------------------------------------------------------------
//
// Class: CTrie
//
// Synopsis: searches for the given string in the trie
//
// Arguments:
//
// Modifies:
//
// History: created 6/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL CTrie::Find(WCHAR* szWord, DWORD* pdwPOS) { // Declare and initialize all local variables.
int i = 0;
if (pTrieScan == NULL) return FALSE;
pTrieScan->Reset();
if (!pTrieScan->Down()) return FALSE;
while (TRUE) { pTrieScan->GetNode(); if (pTrieScan->wc == szWord[i]) { i++; if (pTrieScan->fWordEnd && szWord[i] == '\0') { *pdwPOS = pTrieScan->posTag; return TRUE; } else if (szWord[i] == '\0') break; // Move down the Trie Branch.
else if (!pTrieScan->Down()) break; } // Move right of the Trie Branch
else if (!pTrieScan->Right()) break; } *pdwPOS = POS_UNKNOWN; return FALSE; }
|