Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

138 lines
3.8 KiB

  1. #include "base.h"
  2. #include "SpanishTokenizer.h"
  3. #include "WbUtils.h"
  4. CAutoClassPointer<CSpanishDict> g_apSpanishDict;
  5. CSpanishTokenizer::CSpanishTokenizer(
  6. TEXT_SOURCE* pTxtSource,
  7. IWordSink * pWordSink,
  8. IPhraseSink * pPhraseSink,
  9. LCID lcid,
  10. BOOL bQueryTime,
  11. ULONG ulMaxTokenSize) :
  12. CTokenizer(pTxtSource, pWordSink, pPhraseSink, lcid, bQueryTime, ulMaxTokenSize)
  13. {
  14. if (NULL == g_apSpanishDict.Get())
  15. {
  16. CSyncMutexCatcher cs(m_csSpanishDictInit);
  17. if (NULL == g_apSpanishDict.Get())
  18. {
  19. CAutoArrayPointer<WCHAR> apwcsPath;
  20. apwcsPath = CreateFilePath(L"SpanishDict.txt");
  21. if (NULL == g_apSpanishUtil.Get())
  22. {
  23. g_apSpanishUtil = new CSpanishUtil;
  24. }
  25. if (NULL == g_apSpanishDict.Get())
  26. {
  27. g_apSpanishDict = new CSpanishDict(apwcsPath.Get());
  28. }
  29. }
  30. }
  31. }
  32. void CSpanishTokenizer::OutputSimpleToken(
  33. CTokenState& State,
  34. const CCliticsTerm* pTerm)
  35. {
  36. HRESULT hr;
  37. ULONG ulOffsetInTxtSourceBuffer =
  38. m_pCurToken->CalculateStateOffsetInTxtSourceBuffer(State);
  39. if ((TAIL_MATCH_TRUNCATE == pTerm->ulOp) ||
  40. (HEAD_MATCH_TRUNCATE == pTerm->ulOp))
  41. {
  42. if (0 == ( State.m_ulEnd - State.m_ulStart - pTerm->ulLen ))
  43. {
  44. return;
  45. }
  46. hr = m_apWordSink->PutAltWord(
  47. State.m_ulEnd - State.m_ulStart,
  48. &State.m_pwcsToken[State.m_ulStart],
  49. State.m_ulEnd - State.m_ulStart,
  50. ulOffsetInTxtSourceBuffer);
  51. if (FAILED(hr))
  52. {
  53. THROW_HRESULT_EXCEPTION(hr);
  54. }
  55. if (pTerm->ulOp == TAIL_MATCH_TRUNCATE)
  56. {
  57. hr = m_apWordSink->PutWord(
  58. State.m_ulEnd - State.m_ulStart - pTerm->ulLen,
  59. &State.m_pwcsToken[State.m_ulStart],
  60. State.m_ulEnd - State.m_ulStart,
  61. ulOffsetInTxtSourceBuffer);
  62. if (FAILED(hr))
  63. {
  64. THROW_HRESULT_EXCEPTION(hr);
  65. }
  66. }
  67. else
  68. {
  69. Assert(pTerm->ulOp == HEAD_MATCH_TRUNCATE);
  70. hr = m_apWordSink->PutWord(
  71. State.m_ulEnd - State.m_ulStart - pTerm->ulLen,
  72. &State.m_pwcsToken[State.m_ulStart + pTerm->ulLen],
  73. State.m_ulEnd - State.m_ulStart,
  74. ulOffsetInTxtSourceBuffer);
  75. if (FAILED(hr))
  76. {
  77. THROW_HRESULT_EXCEPTION(hr);
  78. }
  79. }
  80. return;
  81. }
  82. WCHAR pwcsAlt[32];
  83. ULONG ulAltLen;
  84. bool bAlt = false;
  85. ULONG ulWordLen = State.m_ulEnd - State.m_ulStart;
  86. if (ulWordLen < 32)
  87. {
  88. g_apSpanishDict->BreakWord(
  89. ulWordLen,
  90. State.m_pwcsToken + State.m_ulStart,
  91. &bAlt,
  92. &ulAltLen,
  93. pwcsAlt);
  94. }
  95. if (bAlt)
  96. {
  97. hr = m_apWordSink->PutAltWord(
  98. ulAltLen,
  99. pwcsAlt,
  100. State.m_ulEnd - State.m_ulStart,
  101. ulOffsetInTxtSourceBuffer
  102. );
  103. if (FAILED(hr))
  104. {
  105. THROW_HRESULT_EXCEPTION(hr);
  106. }
  107. }
  108. hr = m_apWordSink->PutWord(
  109. State.m_ulEnd - State.m_ulStart,
  110. &State.m_pwcsToken[State.m_ulStart],
  111. State.m_ulEnd - State.m_ulStart,
  112. ulOffsetInTxtSourceBuffer
  113. );
  114. if (FAILED(hr))
  115. {
  116. THROW_HRESULT_EXCEPTION(hr);
  117. }
  118. }