Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

86 lines
2.6 KiB

  1. #ifndef _CHT_WORD_BREAKER_H__
  2. #define _CHT_WORD_BREAKER_H__
  3. #define BUFFER_GROW_UINT 30
  4. #define LATTICE_LENGHT 50
  5. #define LOCAL_LENGTH 3
  6. class CBaseLex;
  7. typedef struct tagSLatticeNode {
  8. WORD wCount;
  9. WORD wAttr;
  10. BYTE bTerminalCode;
  11. UINT uLen;
  12. double fVariance;
  13. DWORD dwUniCount;
  14. } SLatticeNode, *PSLatticeNode, **PPSLatticeNode;
  15. typedef struct tagSLocalPath {
  16. DWORD dwLength[LOCAL_LENGTH];
  17. WORD wUnicount[LOCAL_LENGTH];
  18. WORD wAttribute[LOCAL_LENGTH];
  19. BYTE bTerminalCode[LOCAL_LENGTH];
  20. // for rule 1 - 5
  21. UINT uPathLength;
  22. double fVariance;
  23. UINT uCompoundNum;
  24. UINT uDMNum;
  25. WORD wUniCountSum;
  26. UINT uStep;
  27. }SLocalPath, *PSLocalPath;
  28. typedef struct tagBreakResult{
  29. DWORD dwWordNumber;
  30. PUINT puWordLen;
  31. PUINT puWordAttrib;
  32. PBYTE pbTerminalCode;
  33. } SBreakResult, *PSBreakResult;
  34. class CCHTWordBreaker {
  35. public:
  36. CCHTWordBreaker();
  37. ~CCHTWordBreaker();
  38. public:
  39. BOOL InitData(HINSTANCE hInstance);
  40. DWORD BreakText(LPCWSTR lpcwszText, INT nTextLen, CBaseLex* pcBaseLex = NULL, DWORD dwMaxWordLen = MAX_CHAR_PER_WORD,
  41. BOOL fBreakWithParser = TRUE);
  42. DWORD GetBreakResult(PUINT* ppuResult) {
  43. *ppuResult = m_psBreakResult->puWordLen;
  44. return m_psBreakResult->dwWordNumber;
  45. }
  46. DWORD GetBreakResultWithAttribute(PUINT* ppuResult, PUINT* ppuAttrib) {
  47. *ppuAttrib = m_psBreakResult->puWordAttrib;
  48. return GetBreakResult(ppuResult);
  49. }
  50. BOOL AddSpecialWord(LPCWSTR lpcwEUDPStr, WORD wAttrib) {
  51. return m_pcLexicon->AddInLexiconInsert(lpcwEUDPStr, wAttrib);
  52. }
  53. DWORD GetAltWord(LPCWSTR lpcwString, DWORD dwLength, LPWSTR* lppwAltWordBuf) {
  54. return m_pcLexicon->GetAltWord(lpcwString, dwLength, lppwAltWordBuf);
  55. }
  56. private:
  57. BOOL AllocLattice(DWORD dwLength);
  58. void DestroyLattice(void);
  59. BOOL LatticeGrow(DWORD dwNewLength);
  60. private:
  61. BOOL BuildLattice(LPCWSTR lpcwszText, DWORD dwTextLen, CBaseLex *pcBaseLex, DWORD dwWordLen);
  62. DWORD GetResult();
  63. void GetScore(PSLocalPath psLocalPath);
  64. INT CompareScore(PSLocalPath psLocalPath1, PSLocalPath psLocalPath);
  65. // DWORD LongestRuleWord(DWORD dwIndex);
  66. private:
  67. PCCHTLexicon m_pcLexicon;
  68. PPSLatticeNode m_ppWordLattice;
  69. PDWORD m_pdwCandidateNumber;
  70. DWORD m_dwSentenceLength;
  71. DWORD m_dwLatticeLength;
  72. PDWORD m_pdwMaxWordLength;
  73. PSBreakResult m_psBreakResult;
  74. PCRuleLexicon m_pcRuleLex;
  75. };
  76. typedef CCHTWordBreaker *PCCHTWordBreaker;
  77. #endif //_CHT_WORD_BREAKER_H__