Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

139 lines
4.0 KiB

#include "base.h"
#include "SpanishTokenizer.h"
#include "WbUtils.h"
CAutoClassPointer<CSpanishDict> g_apSpanishDict;
CAutoClassPointer<CSyncCriticalSection> g_apcsSpanishDictInit;
CSpanishTokenizer::CSpanishTokenizer(
TEXT_SOURCE* pTxtSource,
IWordSink * pWordSink,
IPhraseSink * pPhraseSink,
LCID lcid,
BOOL bQueryTime,
ULONG ulMaxTokenSize) :
CTokenizer(pTxtSource, pWordSink, pPhraseSink, lcid, bQueryTime, ulMaxTokenSize)
{
if (NULL == g_apSpanishDict.Get())
{
CSyncMutexCatcher cs(*(g_apcsSpanishDictInit.Get()));
if (NULL == g_apSpanishDict.Get())
{
CAutoArrayPointer<WCHAR> apwcsPath;
apwcsPath = CreateFilePath(L"SpanishDict.txt");
if (NULL == g_apSpanishUtil.Get())
{
g_apSpanishUtil = new CSpanishUtil;
}
if (NULL == g_apSpanishDict.Get())
{
g_apSpanishDict = new CSpanishDict(apwcsPath.Get());
}
}
}
}
void CSpanishTokenizer::OutputSimpleToken(
CTokenState& State,
const CCliticsTerm* pTerm)
{
HRESULT hr;
ULONG ulOffsetInTxtSourceBuffer =
m_pCurToken->CalculateStateOffsetInTxtSourceBuffer(State);
if ((TAIL_MATCH_TRUNCATE == pTerm->ulOp) ||
(HEAD_MATCH_TRUNCATE == pTerm->ulOp))
{
if (0 == ( State.m_ulEnd - State.m_ulStart - pTerm->ulLen ))
{
return;
}
hr = m_apWordSink->PutAltWord(
State.m_ulEnd - State.m_ulStart,
&State.m_pwcsToken[State.m_ulStart],
State.m_ulEnd - State.m_ulStart,
ulOffsetInTxtSourceBuffer);
if (FAILED(hr))
{
THROW_HRESULT_EXCEPTION(hr);
}
if (pTerm->ulOp == TAIL_MATCH_TRUNCATE)
{
hr = m_apWordSink->PutWord(
State.m_ulEnd - State.m_ulStart - pTerm->ulLen,
&State.m_pwcsToken[State.m_ulStart],
State.m_ulEnd - State.m_ulStart,
ulOffsetInTxtSourceBuffer);
if (FAILED(hr))
{
THROW_HRESULT_EXCEPTION(hr);
}
}
else
{
Assert(pTerm->ulOp == HEAD_MATCH_TRUNCATE);
hr = m_apWordSink->PutWord(
State.m_ulEnd - State.m_ulStart - pTerm->ulLen,
&State.m_pwcsToken[State.m_ulStart + pTerm->ulLen],
State.m_ulEnd - State.m_ulStart,
ulOffsetInTxtSourceBuffer);
if (FAILED(hr))
{
THROW_HRESULT_EXCEPTION(hr);
}
}
return;
}
WCHAR pwcsAlt[MAX_WORD_LEN];
ULONG ulAltLen = MAX_WORD_LEN;
bool bAlt = false;
ULONG ulWordLen = State.m_ulEnd - State.m_ulStart;
if (ulWordLen < 32)
{
g_apSpanishDict->BreakWord(
ulWordLen,
State.m_pwcsToken + State.m_ulStart,
&bAlt,
&ulAltLen,
pwcsAlt);
}
if (bAlt)
{
hr = m_apWordSink->PutAltWord(
ulAltLen,
pwcsAlt,
State.m_ulEnd - State.m_ulStart,
ulOffsetInTxtSourceBuffer
);
if (FAILED(hr))
{
THROW_HRESULT_EXCEPTION(hr);
}
}
hr = m_apWordSink->PutWord(
State.m_ulEnd - State.m_ulStart,
&State.m_pwcsToken[State.m_ulStart],
State.m_ulEnd - State.m_ulStart,
ulOffsetInTxtSourceBuffer
);
if (FAILED(hr))
{
THROW_HRESULT_EXCEPTION(hr);
}
}