Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

139 lines
3.2 KiB

  1. ////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // Filename : CustomBreaking.h
  4. // Purpose : enable the user to specify in a file a list of tokens that
  5. // should not be broken.
  6. //
  7. // Project : WordBreakers
  8. // Component: word breaker
  9. //
  10. // Author : yairh
  11. //
  12. // Log:
  13. //
  14. // Jul 20 2000 yairh creation
  15. //
  16. ////////////////////////////////////////////////////////////////////////////////
  17. #ifndef _CUSTOM_BREAKING_H_
  18. #define _CUSTOM_BREAKING_H_
  19. #include "trie.h"
  20. #include "vararray.h"
  21. #include "AutoPtr.h"
  22. #include "wbutils.h"
  23. ///////////////////////////////////////////////////////////////////////////////
  24. // Class CCustomWordTerm
  25. ///////////////////////////////////////////////////////////////////////////////
  26. class CCustomWordTerm
  27. {
  28. public:
  29. CCustomWordTerm(const WCHAR* pwcs);
  30. ~CCustomWordTerm()
  31. {
  32. delete m_pwcs;
  33. }
  34. bool CheckWord(
  35. const ULONG ulLen,
  36. ULONG ulOffsetToBaseWord,
  37. ULONG ulBaseWordLen,
  38. const WCHAR* pwcsBuf,
  39. ULONG* pMatchOffset,
  40. ULONG* pulMatchLen);
  41. ULONG GetTxtStart()
  42. {
  43. return m_ulStartTxt;
  44. }
  45. ULONG GetTxtEnd()
  46. {
  47. return m_ulEndTxt;
  48. }
  49. WCHAR* GetTxt()
  50. {
  51. return m_pwcs;
  52. }
  53. private:
  54. ULONG m_ulStartTxt;
  55. ULONG m_ulEndTxt;
  56. ULONG m_ulLen;
  57. WCHAR* m_pwcs;
  58. };
  59. ///////////////////////////////////////////////////////////////////////////////
  60. // Class CCustomWordCollection
  61. ///////////////////////////////////////////////////////////////////////////////
  62. class CCustomWordCollection
  63. {
  64. public:
  65. CCustomWordCollection() :
  66. m_vaWordCollection(1),
  67. m_ulCount(0)
  68. {
  69. }
  70. void AddWord(const WCHAR* pwcs);
  71. CCustomWordTerm* GetFirstWord()
  72. {
  73. if (m_ulCount)
  74. {
  75. return m_vaWordCollection[(ULONG)0].Get();
  76. }
  77. return NULL;
  78. }
  79. bool CheckWord(
  80. const ULONG ulLen,
  81. ULONG ulOffsetToBaseWord,
  82. ULONG ulBaseWordLen,
  83. const WCHAR* pwcsBuf,
  84. ULONG* pulMatchOffset,
  85. ULONG* pulMatchLen);
  86. private:
  87. ULONG m_ulCount;
  88. CVarArray< CAutoClassPointer<CCustomWordTerm> > m_vaWordCollection;
  89. };
  90. ///////////////////////////////////////////////////////////////////////////////
  91. // Class CCustomBreaker
  92. ///////////////////////////////////////////////////////////////////////////////
  93. class CCustomBreaker
  94. {
  95. public:
  96. CCustomBreaker(LCID lcid);
  97. bool IsNotEmpty()
  98. {
  99. return (m_ulWordCount > 0);
  100. }
  101. bool BreakText(
  102. ULONG ulLen,
  103. WCHAR* pwcsBuf,
  104. ULONG* pulOutLen,
  105. ULONG* pulOffset);
  106. private:
  107. CTrie<CCustomWordCollection, CWbToUpper> m_Trie;
  108. ULONG m_ulWordCount;
  109. };
  110. extern CAutoClassPointer<CCustomBreaker> g_apEngCustomBreaker;
  111. extern CAutoClassPointer<CCustomBreaker> g_apEngUKCustomBreaker;
  112. extern CAutoClassPointer<CCustomBreaker> g_apFrnCustomBreaker;
  113. extern CAutoClassPointer<CCustomBreaker> g_apSpnCustomBreaker;
  114. extern CAutoClassPointer<CCustomBreaker> g_apItlCustomBreaker;
  115. #endif // _CUSTOM_BREAKING_H_