Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
3.3 KiB

  1. #include "base.h"
  2. #include "FrenchTokenizer.h"
  3. void CFrenchTokenizer::OutputHyphenation(
  4. CTokenState& State,
  5. const CCliticsTerm* pCliticsTerm)
  6. {
  7. HRESULT hr;
  8. ULONG ulOffsetInTxtSourceBuffer = m_pCurToken->CalculateStateOffsetInTxtSourceBuffer(State);
  9. ULONG ulAddToStart = 0;
  10. ULONG ulDecFromEnd = 0;
  11. if (pCliticsTerm->ulOp == HEAD_MATCH_TRUNCATE)
  12. {
  13. ulAddToStart = pCliticsTerm->ulLen;
  14. }
  15. else if (pCliticsTerm->ulOp == TAIL_MATCH_TRUNCATE)
  16. {
  17. ulDecFromEnd = pCliticsTerm->ulLen;
  18. }
  19. ULONG ulCur = State.m_ulStart + ulAddToStart;
  20. ULONG ulEnd = State.m_ulEnd - ulDecFromEnd;
  21. while (ulCur < ulEnd)
  22. {
  23. if ( HAS_PROP_DASH(GET_PROP(m_pCurToken->m_State.m_pwcsToken[ulCur])))
  24. {
  25. m_pCurToken->m_State.m_pwcsToken[ulCur] = L'-';
  26. }
  27. ulCur++;
  28. }
  29. ulCur = State.m_ulStart + ulAddToStart;
  30. CCliticsTerm* pDashTerm = NULL;
  31. bool fFoundDashClitics = false;
  32. while (ulCur < ulEnd)
  33. {
  34. if (m_pCurToken->m_State.m_pwcsToken[ulCur] == L'-')
  35. {
  36. DictStatus status;
  37. short sResCount = 0;
  38. if (ulCur > State.m_ulStart)
  39. {
  40. status = g_pClitics->m_trieClitics.trie_Find(
  41. State.m_pwcsToken + ulCur,
  42. TRIE_LONGEST_MATCH | TRIE_IGNORECASE,
  43. 1,
  44. &pDashTerm,
  45. &sResCount);
  46. if (sResCount && (pDashTerm->ulLen == (ulEnd - ulCur)))
  47. {
  48. Trace(
  49. elVerbose,
  50. s_tagTokenizerDecision,
  51. ("%*.*S has a %S clitcs",
  52. State.m_ulEnd - State.m_ulStart,
  53. State.m_ulEnd - State.m_ulStart,
  54. State.m_pwcsToken + State.m_ulStart,
  55. pDashTerm->pwcs
  56. ));
  57. fFoundDashClitics = true;
  58. break;
  59. }
  60. }
  61. }
  62. ulCur++;
  63. }
  64. if (fFoundDashClitics)
  65. {
  66. Assert(pDashTerm);
  67. if (pDashTerm->ulOp == HEAD_MATCH_TRUNCATE)
  68. {
  69. ulAddToStart += pDashTerm->ulLen;
  70. }
  71. else if (pDashTerm->ulOp == TAIL_MATCH_TRUNCATE)
  72. {
  73. ulDecFromEnd += pDashTerm->ulLen;
  74. }
  75. }
  76. if (ulDecFromEnd || ulAddToStart)
  77. {
  78. hr = m_apWordSink->PutAltWord(
  79. State.m_ulEnd - State.m_ulStart,
  80. State.m_pwcsToken + State.m_ulStart,
  81. State.m_ulEnd - State.m_ulStart,
  82. ulOffsetInTxtSourceBuffer);
  83. if (FAILED(hr))
  84. {
  85. THROW_HRESULT_EXCEPTION(hr);
  86. }
  87. }
  88. hr = m_apWordSink->PutWord(
  89. State.m_ulEnd - State.m_ulStart - ulDecFromEnd - ulAddToStart,
  90. State.m_pwcsToken + State.m_ulStart + ulAddToStart,
  91. State.m_ulEnd - State.m_ulStart,
  92. ulOffsetInTxtSourceBuffer);
  93. if (FAILED(hr))
  94. {
  95. THROW_HRESULT_EXCEPTION(hr);
  96. }
  97. }