Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

139 lines
4.0 KiB

  1. #include "base.h"
  2. #include "SpanishTokenizer.h"
  3. #include "WbUtils.h"
  4. CAutoClassPointer<CSpanishDict> g_apSpanishDict;
  5. CAutoClassPointer<CSyncCriticalSection> g_apcsSpanishDictInit;
  6. CSpanishTokenizer::CSpanishTokenizer(
  7. TEXT_SOURCE* pTxtSource,
  8. IWordSink * pWordSink,
  9. IPhraseSink * pPhraseSink,
  10. LCID lcid,
  11. BOOL bQueryTime,
  12. ULONG ulMaxTokenSize) :
  13. CTokenizer(pTxtSource, pWordSink, pPhraseSink, lcid, bQueryTime, ulMaxTokenSize)
  14. {
  15. if (NULL == g_apSpanishDict.Get())
  16. {
  17. CSyncMutexCatcher cs(*(g_apcsSpanishDictInit.Get()));
  18. if (NULL == g_apSpanishDict.Get())
  19. {
  20. CAutoArrayPointer<WCHAR> apwcsPath;
  21. apwcsPath = CreateFilePath(L"SpanishDict.txt");
  22. if (NULL == g_apSpanishUtil.Get())
  23. {
  24. g_apSpanishUtil = new CSpanishUtil;
  25. }
  26. if (NULL == g_apSpanishDict.Get())
  27. {
  28. g_apSpanishDict = new CSpanishDict(apwcsPath.Get());
  29. }
  30. }
  31. }
  32. }
  33. void CSpanishTokenizer::OutputSimpleToken(
  34. CTokenState& State,
  35. const CCliticsTerm* pTerm)
  36. {
  37. HRESULT hr;
  38. ULONG ulOffsetInTxtSourceBuffer =
  39. m_pCurToken->CalculateStateOffsetInTxtSourceBuffer(State);
  40. if ((TAIL_MATCH_TRUNCATE == pTerm->ulOp) ||
  41. (HEAD_MATCH_TRUNCATE == pTerm->ulOp))
  42. {
  43. if (0 == ( State.m_ulEnd - State.m_ulStart - pTerm->ulLen ))
  44. {
  45. return;
  46. }
  47. hr = m_apWordSink->PutAltWord(
  48. State.m_ulEnd - State.m_ulStart,
  49. &State.m_pwcsToken[State.m_ulStart],
  50. State.m_ulEnd - State.m_ulStart,
  51. ulOffsetInTxtSourceBuffer);
  52. if (FAILED(hr))
  53. {
  54. THROW_HRESULT_EXCEPTION(hr);
  55. }
  56. if (pTerm->ulOp == TAIL_MATCH_TRUNCATE)
  57. {
  58. hr = m_apWordSink->PutWord(
  59. State.m_ulEnd - State.m_ulStart - pTerm->ulLen,
  60. &State.m_pwcsToken[State.m_ulStart],
  61. State.m_ulEnd - State.m_ulStart,
  62. ulOffsetInTxtSourceBuffer);
  63. if (FAILED(hr))
  64. {
  65. THROW_HRESULT_EXCEPTION(hr);
  66. }
  67. }
  68. else
  69. {
  70. Assert(pTerm->ulOp == HEAD_MATCH_TRUNCATE);
  71. hr = m_apWordSink->PutWord(
  72. State.m_ulEnd - State.m_ulStart - pTerm->ulLen,
  73. &State.m_pwcsToken[State.m_ulStart + pTerm->ulLen],
  74. State.m_ulEnd - State.m_ulStart,
  75. ulOffsetInTxtSourceBuffer);
  76. if (FAILED(hr))
  77. {
  78. THROW_HRESULT_EXCEPTION(hr);
  79. }
  80. }
  81. return;
  82. }
  83. WCHAR pwcsAlt[MAX_WORD_LEN];
  84. ULONG ulAltLen = MAX_WORD_LEN;
  85. bool bAlt = false;
  86. ULONG ulWordLen = State.m_ulEnd - State.m_ulStart;
  87. if (ulWordLen < 32)
  88. {
  89. g_apSpanishDict->BreakWord(
  90. ulWordLen,
  91. State.m_pwcsToken + State.m_ulStart,
  92. &bAlt,
  93. &ulAltLen,
  94. pwcsAlt);
  95. }
  96. if (bAlt)
  97. {
  98. hr = m_apWordSink->PutAltWord(
  99. ulAltLen,
  100. pwcsAlt,
  101. State.m_ulEnd - State.m_ulStart,
  102. ulOffsetInTxtSourceBuffer
  103. );
  104. if (FAILED(hr))
  105. {
  106. THROW_HRESULT_EXCEPTION(hr);
  107. }
  108. }
  109. hr = m_apWordSink->PutWord(
  110. State.m_ulEnd - State.m_ulStart,
  111. &State.m_pwcsToken[State.m_ulStart],
  112. State.m_ulEnd - State.m_ulStart,
  113. ulOffsetInTxtSourceBuffer
  114. );
  115. if (FAILED(hr))
  116. {
  117. THROW_HRESULT_EXCEPTION(hr);
  118. }
  119. }