Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

979 lines
29 KiB

//+-------------------------------------------------------------------------
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright 1998 - 2001 Microsoft Corporation. All Rights Reserved.
//
// PROGRAM: lrsample.cxx
//
// PURPOSE: Sample wordbreaker and stemmer.
//
// PLATFORM: Windows 2000 and later
//
//--------------------------------------------------------------------------
#include <stdio.h>
#include <wchar.h>
#include <windows.h>
#include <objidl.h>
#include <indexsrv.h>
#include <cierror.h>
#include <filterr.h>
#include "lrsample.hxx"
#include "filtreg.hxx"
#include "langreg.hxx"
//#define LEXICON_STEMMER
//#define PORTER_STEMMER
#define SIMPLE_LIST_STEMMER
// The CLSID for the wordbreaker
CLSID CLSID_SampleWordBreaker = /* d225281a-7ca9-4a46-ae7d-c63a9d4815d4 */
{
0xd225281a, 0x7ca9, 0x4a46,
{0xae, 0x7d, 0xc6, 0x3a, 0x9d, 0x48, 0x15, 0xd4}
};
// The CLSID of the stemmer
CLSID CLSID_SampleStemmer = /* 0a275611-aa4d-4b39-8290-4baf77703f55 */
{
0x0a275611, 0xaa4d, 0x4b39,
{0x82, 0x90, 0x4b, 0xaf, 0x77, 0x70, 0x3f, 0x55}
};
// Global module refcount
long g_cInstances = 0;
HMODULE g_hModule = 0;
#ifdef PORTER_STEMMER
#include "porter.hxx"
#endif //PORTER_STEMMER
#ifdef LEXICON_STEMMER
#include "stem.hxx"
CStem * g_pStem = 0;
#endif //LEXICON_STEMMER
#ifdef SIMPLE_LIST_STEMMER
// This is just a simple hard-coded list of words and stem forms.
struct SStemForm
{
USHORT iList; // first index into aStems
USHORT iForm; // second index into aStems
};
const SStemForm aStemForms[] =
{
{ 0, 0 }, // abide
{ 0, 2 }, // abided
{ 0, 4 }, // abides
{ 0, 3 }, // abiding
{ 0, 1 }, // abode
{ 1, 0 }, // bat
{ 2, 0 }, // batch
{ 2, 2 }, // batched
{ 2, 1 }, // batches
{ 2, 3 }, // batching
{ 1, 1 }, // bats
{ 1, 2 }, // batted
{ 1, 3 }, // batting
{ 3, 0 }, // bear
{ 3, 1 }, // bears
{ 4, 1 }, // began
{ 4, 0 }, // begin
{ 4, 3 }, // beginning
{ 4, 4 }, // begins
{ 4, 2 }, // begun
{ 3, 2 }, // bore
{ 3, 4 }, // born
{ 3, 3 }, // borne
{ 5, 0 }, // dance
{ 5, 1 }, // danced
{ 5, 2 }, // dances
{ 5, 3 }, // dancing
{ 6, 0 }, // heave
{ 6, 1 }, // heaved
{ 6, 3 }, // heaves
{ 6, 4 }, // heaving
{ 7, 0 }, // hero
{ 7, 1 }, // heroes
{ 6, 2 }, // hove
{ 8, 0 }, // keep
{ 8, 4 }, // keeping
{ 8, 1 }, // keeps
{ 8, 2 }, // kept
{ 9, 0 }, // misspell
{ 9, 1 }, // misspelled
{ 9, 3 }, // misspelling
{ 9, 4 }, // misspells
{ 9, 2 }, // misspelt
{ 10, 0 }, // plead
{ 10, 1 }, // pleaded
{ 10, 3 }, // pleading
{ 10, 4 }, // pleads
{ 10, 0 }, // pled
{ 11, 2 }, // ran
{ 11, 0 }, // run
{ 11, 3 }, // running
{ 11, 1 }, // runs
{ 12, 1 }, // swam
{ 12, 0 }, // swim
{ 12, 3 }, // swimming
{ 12, 4 }, // swims
{ 12, 2 }, // swum
{ 13, 2 }, // underlain
{ 13, 1 }, // underlay
{ 13, 0 }, // underlie
{ 13, 4 }, // underlies
{ 13, 3 }, // underlying
};
const ULONG cStemForms = ArraySize( aStemForms );
const ULONG cMaxStemForms = 8;
const WCHAR * aStems[][ cMaxStemForms ] =
{
{ L"abide", L"abode", L"abided", L"abiding", L"abides" }, // 0
{ L"bat", L"bats", L"batted", L"batting" }, // 1
{ L"batch", L"batches", L"batched", L"batching" }, // 2
{ L"bear", L"bears", L"bore", L"borne", L"born" }, // 3
{ L"begin", L"began", L"begun", L"beginning", L"begins" }, // 4
{ L"dance", L"danced", L"dances", L"dancing" }, // 5
{ L"heave", L"heaved", L"hove", L"heaves", L"heaving" }, // 6
{ L"hero", L"heroes" }, // 7
{ L"keep", L"keeps", L"kept", L"keeping" }, // 8
{ L"misspell", L"misspelled", L"misspelt", L"misspelling",
L"misspells" }, // 9
{ L"plead", L"pleaded", L"pled", L"pleading", L"pleads" }, // 10
{ L"run", L"runs", L"ran", L"running" }, // 11
{ L"swim", L"swam", L"swum", L"swimming", L"swims" }, // 12
{ L"underlie", L"underlay", L"underlain", L"underlying",
L"underlies" }, // 13
};
int __cdecl StemCompare( const void *p1, const void *p2 )
{
SStemForm const * pForm = (SStemForm const *) p2;
WCHAR const * pwcWord = (WCHAR const *) p1;
return wcscmp( pwcWord, aStems[ pForm->iList ][ pForm->iForm ] );
}
#endif // SIMPLE_LIST_STEMMER
//+-------------------------------------------------------------------------
//
// Function: IsWordChar
//
// Synopsis: Find whether the i'th character in the buffer _pwcChunk
// is a word character (rather than word break)
//
// Arguments: [pwcChunk] -- Characters whose type information is checked
// [i] -- Index of character to check
// [pInfo1] -- Type 1 information
// [pInfo3] -- Type 3 information
//
// Returns: TRUE if the character is a word character
// FALSE if it's a word breaking character
//
//--------------------------------------------------------------------------
__forceinline BOOL IsWordChar(
WCHAR const * pwcChunk,
int i,
WORD const * pInfo1,
WORD const * pInfo3 )
{
// Any alphabetic, digit, or non-spacing character is part of a word
if ( ( 0 != ( pInfo1[i] & ( C1_ALPHA | C1_DIGIT ) ) ) ||
( 0 != ( pInfo3[i] & C3_NONSPACING ) ) )
return TRUE;
WCHAR c = pwcChunk[i];
// Underscore is part of a word
if ( L'_' == c )
return TRUE;
//
// A non-breaking space followed by a non-spacing character should not
// be a word breaker.
//
if ( 0xa0 == c ) // non breaking space
{
// followed by a non-spacing character (looking ahead is okay)
if ( 0 != ( pInfo3[i+1] & C3_NONSPACING ) )
return TRUE;
}
return FALSE;
} //IsWordChar
//+---------------------------------------------------------------------------
//
// Function: ScanChunk
//
// Synopsis: For each character find its type information flags
//
// Arguments: [pwcChunk] -- Characters whose type information is retrieved
// [cwc] -- Number of characters to scan
// [pInfo1] -- Type 1 information is written here
// [pInfo3] -- Type 3 information is written here
//
// Returns: S_OK if successful or an error code
//
//----------------------------------------------------------------------------
HRESULT ScanChunk(
WCHAR const * pwcChunk,
ULONG cwc,
WORD * pInfo1,
WORD * pInfo3 )
{
if ( !GetStringTypeW( CT_CTYPE1, // POSIX character typing
pwcChunk, // Source
cwc, // Size of source
pInfo1 ) ) // Character info 1
return HRESULT_FROM_WIN32( GetLastError() );
if ( !GetStringTypeW( CT_CTYPE3, // Additional POSIX
pwcChunk, // Source
cwc, // Size of source
pInfo3 ) ) // Character info 3
return HRESULT_FROM_WIN32( GetLastError() );
return S_OK;
} //ScanChunk
//+---------------------------------------------------------------------------
//
// Member: CSampleWordBreaker::Tokenize
//
// Synopsis: Break a block of text into individual words
//
// Arguments: [pTextSource] -- Source of characters to work on
// [cwc] -- Number of characters to process
// [pWordSink] -- Where to send the words found
// [cwcProcessed] -- Returns the # of characters tokenized
//
// Returns: S_OK if successful or an error code
//
//----------------------------------------------------------------------------
HRESULT CSampleWordBreaker::Tokenize(
TEXT_SOURCE * pTextSource,
ULONG cwc,
IWordSink * pWordSink,
ULONG & cwcProcessed )
{
// Leave space for one (unused) lookahead
WORD aInfo1[ CSampleWordBreaker::cwcAtATime + 1 ];
WORD aInfo3[ CSampleWordBreaker::cwcAtATime + 1 ];
// Initialize this so we can go 1 beyond in IsWordChar()
aInfo3 [ CSampleWordBreaker::cwcAtATime ] = C3_NONSPACING;
// Get a pointer to the text we'll be working on
const WCHAR * pwcChunk = &pTextSource->awcBuffer[ pTextSource->iCur ];
HRESULT hr = ScanChunk( pwcChunk, cwc, aInfo1, aInfo3 );
if ( FAILED( hr ) )
return hr;
BOOL fWordHasZWS = FALSE; // Does the current word have a 0-width-space?
ULONG cwcZWS; // Length of word minus embedded 0-width-spaces
//
// iBeginWord is the offset into aInfoX of the beginning character of
// a word. iCur is the first unprocessed character.
// They are indexes into the current block (_pwcChunk).
//
ULONG iBeginWord = 0;
ULONG iCur = 0;
// Temp buffer for a word having zero-width space
WCHAR awcBufZWS[ CSampleWordBreaker::cwcAtATime ];
// Send words from the current block to word sink
while ( iCur < cwc )
{
// Skip whitespace, punctuation, etc.
for (; iCur < cwc; iCur++)
if ( IsWordChar( pwcChunk, iCur, aInfo1, aInfo3 ) )
break;
// iCur points to a word char or is equal to cwc
iBeginWord = iCur;
if ( iCur < cwc )
iCur++; // we knew it pointed at word character
//
// Find word break. Filter may output Unicode zero-width-space, which
// should be ignored by the wordbreaker.
//
fWordHasZWS = FALSE;
for ( ; iCur < cwc; iCur++ )
{
if ( !IsWordChar( pwcChunk, iCur, aInfo1, aInfo3 ) )
{
if ( ZERO_WIDTH_SPACE == pwcChunk[iCur] )
fWordHasZWS = TRUE;
else
break;
}
}
if ( fWordHasZWS )
{
// Copy word into awcBufZWS after stripping zero-width-spaces
cwcZWS = 0;
for ( ULONG i = iBeginWord; i < iCur; i++ )
{
if ( ZERO_WIDTH_SPACE != pwcChunk[i] )
awcBufZWS[cwcZWS++] = pwcChunk[i];
}
}
// iCur points to a non-word char or is equal to cwc
if ( iCur < cwc )
{
// store the word and its source position
if ( fWordHasZWS )
hr = pWordSink->PutWord( cwcZWS,
awcBufZWS, // stripped word
iCur - iBeginWord,
pTextSource->iCur + iBeginWord );
else
hr = pWordSink->PutWord( iCur - iBeginWord,
pwcChunk + iBeginWord, // the word
iCur - iBeginWord,
pTextSource->iCur + iBeginWord );
if ( FAILED( hr ) )
return hr;
iCur++; // we knew it pointed at non-word char
iBeginWord = iCur; // in case we exit the loop now
}
} // next word
// End of words in chunk.
// iCur == cwc
// iBeginWord points at beginning of word or == cwc
if ( 0 == iBeginWord )
{
// A single word fills from beginning of this chunk
// to the end. This is either a very long word or
// a short word in a leftover buffer.
// store the word and its source position
if ( fWordHasZWS )
hr = pWordSink->PutWord( cwcZWS,
awcBufZWS, // stripped word
iCur,
pTextSource->iCur ); // its source pos.
else
hr = pWordSink->PutWord( iCur,
pwcChunk, // the word
iCur,
pTextSource->iCur ); // its source pos.
if ( FAILED( hr ) )
return hr;
// Position it to not add the word twice.
iBeginWord = iCur;
}
//
// If this is the last chunk from text source, then process the
// last fragment.
//
if ( ( cwc < CSampleWordBreaker::cwcAtATime ) && ( iBeginWord != iCur ) )
{
// store the word and its source position
if ( fWordHasZWS )
hr = pWordSink->PutWord( cwcZWS,
awcBufZWS, // stripped word
iCur - iBeginWord,
pTextSource->iCur + iBeginWord );
else
hr = pWordSink->PutWord( iCur - iBeginWord,
pwcChunk + iBeginWord, // the word
iCur - iBeginWord,
pTextSource->iCur + iBeginWord );
if ( FAILED( hr ) )
return hr;
iBeginWord = iCur;
}
cwcProcessed = iBeginWord;
return S_OK;
} //Tokenize
//+---------------------------------------------------------------------------
//
// Member: CSampleWordBreaker::BreakText
//
// Synopsis: Break a block of text into individual words
//
// Arguments: [pTextSource] -- Source of characters to work on
// [pWordSink] -- Where to send the words found
// [pPhraseSink] -- Where to send the phrases found (not used)
//
// Returns: S_OK if successful or an error code
//
//----------------------------------------------------------------------------
HRESULT STDMETHODCALLTYPE CSampleWordBreaker::BreakText(
TEXT_SOURCE * pTextSource,
IWordSink * pWordSink,
IPhraseSink * pPhraseSink )
{
// Validate arguments
if ( 0 == pTextSource )
return E_INVALIDARG;
if ( ( 0 == pWordSink ) || ( pTextSource->iCur == pTextSource->iEnd ) )
return S_OK;
if ( pTextSource->iCur > pTextSource->iEnd )
return E_INVALIDARG;
ULONG cwcProcessed; // # chars actually processed by Tokenize()
HRESULT hr = S_OK;
// Pull text from the text source and tokenize it
do
{
BOOL fFirstTime = TRUE;
while ( pTextSource->iCur < pTextSource->iEnd )
{
ULONG cwc = pTextSource->iEnd - pTextSource->iCur;
// Process in buckets of cwcAtATime only
if ( cwc >= CSampleWordBreaker::cwcAtATime )
cwc = CSampleWordBreaker::cwcAtATime;
else if ( !fFirstTime )
break;
hr = Tokenize( pTextSource, cwc, pWordSink, cwcProcessed );
if ( FAILED( hr ) )
return hr;
pTextSource->iCur += cwcProcessed;
fFirstTime = FALSE;
}
hr = pTextSource->pfnFillTextBuffer( pTextSource );
} while ( SUCCEEDED( hr ) );
//
// If anything failed except for running out of text, report the error.
// Otherwise, for cases like out of memory, files will not get retried or
// reported as failures properly.
//
if ( ( FAILED( hr ) ) &&
( FILTER_E_NO_MORE_VALUES != hr ) &&
( FILTER_E_NO_TEXT != hr ) &&
( FILTER_E_NO_VALUES != hr ) &&
( FILTER_E_NO_MORE_TEXT != hr ) &&
( FILTER_E_END_OF_CHUNKS != hr ) &&
( WBREAK_E_END_OF_TEXT != hr ) )
return hr;
ULONG cwc = pTextSource->iEnd - pTextSource->iCur;
if ( 0 == cwc )
return S_OK;
return Tokenize( pTextSource, cwc, pWordSink, cwcProcessed );
} //BreakText
//+---------------------------------------------------------------------------
//
// Member: CSampleStemmer::GenerateWordForms
//
// Synopsis: From the input word, emit the original and alternate forms
// of the word.
//
// Arguments: [pwcInBuf] -- The original word to stem (not 0-terminated)
// [cwc] -- Length in characters of the word
// [pStemSink] -- Where to emit the stems
//
// Returns: S_OK if successful or an error code
//
//----------------------------------------------------------------------------
HRESULT STDMETHODCALLTYPE CSampleStemmer::GenerateWordForms(
WCHAR const * pwcInBuf,
ULONG cwc,
IWordFormSink * pWordFormSink )
{
// Validate the arguments
if ( ( 0 == pwcInBuf ) || ( 0 == pWordFormSink ) )
return E_INVALIDARG;
HRESULT hr = S_OK;
#ifdef PORTER_STEMMER
//
// If the word is small enough, attempt to get the stemmed form of the
// word. Emit both forms if they are different. The Porter algorithm
// does the opposite of what's required here, but doing the right thing
// requires a lexicon.
//
if ( cwc < cwcMaxPorterWord )
{
// Make a temporary buffer for the word
WCHAR awcPorter[ cwcMaxPorterWord ];
CopyMemory( awcPorter, pwcInBuf, sizeof(WCHAR) * cwc );
awcPorter[cwc] = 0;
// Convert it to lowercase and save the original in lowercase
CharLower( awcPorter );
WCHAR awcOriginal[ cwcMaxPorterWord ];
wcscpy( awcOriginal, awcPorter );
// Get the stemmed form of the word
GetPorterStemForm( awcPorter );
// If it's different from the original, emit it.
if ( wcscmp( awcOriginal, awcPorter ) )
{
hr = pWordFormSink->PutAltWord( awcPorter,
wcslen( awcPorter ) );
if ( FAILED( hr ) )
return hr;
}
}
#endif //PORTER_STEMMER
#ifdef LEXICON_STEMMER
//
// If the word is small enough to work with the stemmer, attempt to get
// various forms of the word.
//
if ( cwc < cbMaxStem )
{
//
// Convert the original string to 8-bit characters. This is OK since
// it's is an English stemmer that can safely assume such characters.
//
char acOriginal[ cbMaxStem ];
for ( unsigned i = 0; i < cwc; i++ )
acOriginal[ i ] = (char) pwcInBuf[ i ];
acOriginal[ i ] = 0;
// Enumerate all stem-sets that contain the word.
unsigned iBmk = stemInvalid;
unsigned iStemSet = stemInvalid;
char ac[ cbMaxStem ];
while ( g_pStem->FindStemSet( acOriginal, iBmk, iStemSet ) )
{
// Enumerate all forms of the stem-set, root first.
CStemSet set( g_pStem->GetStemSetRoot(), iStemSet );
unsigned iStemBmk = stemInvalid;
while ( set.GetForm( ac, iStemBmk ) )
{
if ( strcmp( ac, acOriginal ) )
{
WCHAR awcForm[ cbMaxStem ];
mbstowcs( awcForm, ac, -1 );
hr = pWordFormSink->PutAltWord( awcForm,
wcslen( awcForm ) );
if ( FAILED( hr ) )
return hr;
}
}
}
}
#endif //LEXICON_STEMMER
#ifdef SIMPLE_LIST_STEMMER
// Look up the word in the simple list of stem forms
SStemForm const * pStemForm = (SStemForm *) bsearch( pwcInBuf,
aStemForms,
cStemForms,
sizeof SStemForm,
StemCompare );
if ( 0 != pStemForm )
{
// Found it, now iterate all the forms
ULONG iList = pStemForm->iList;
ULONG iForm = 0;
while ( 0 != aStems[ iList ][ iForm ] )
{
WCHAR const * pwc = aStems[ iList ][ iForm ];
// Don't emit the original word yet
if ( 0 != wcscmp( pwc, pwcInBuf ) )
{
hr = pWordFormSink->PutAltWord( pwc,
wcslen( pwc ) );
if ( FAILED( hr ) )
return hr;
}
iForm++;
}
}
#endif //SIMPLE_LIST_STEMMER
// Emit the original word
return pWordFormSink->PutWord( pwcInBuf, cwc );
} //StemWord
//+-------------------------------------------------------------------------
//
// Method: CLanguageResourceSampleCF::CLanguageResourceSampleCF
//
// Synopsis: Language resource class factory constructor
//
//--------------------------------------------------------------------------
CLanguageResourceSampleCF::CLanguageResourceSampleCF() :
_lRefs( 1 )
{
InterlockedIncrement( &g_cInstances );
} //CLanguageResourceSampleCF
//+-------------------------------------------------------------------------
//
// Method: CLanguageResourceSampleCF::~CLanguageResourceSampleCF
//
// Synopsis: Language resource class factory destructor
//
//--------------------------------------------------------------------------
CLanguageResourceSampleCF::~CLanguageResourceSampleCF()
{
InterlockedDecrement( &g_cInstances );
} //~LanguageResourceSampleCF
//+-------------------------------------------------------------------------
//
// Method: CLanguageResourceSampleCF::QueryInterface
//
// Synopsis: Rebind to the requested interface
//
// Arguments: [riid] -- IID of new interface
// [ppvObject] -- New interface * returned here
//
// Returns: S_OK if bind succeeded, E_NOINTERFACE if bind failed
//
//--------------------------------------------------------------------------
HRESULT STDMETHODCALLTYPE CLanguageResourceSampleCF::QueryInterface(
REFIID riid,
void ** ppvObject )
{
if ( IID_IClassFactory == riid )
*ppvObject = (IUnknown *) (IClassFactory *) this;
else if ( IID_IUnknown == riid )
*ppvObject = (IUnknown *) (IPersist *) this;
else
{
*ppvObject = 0;
return E_NOINTERFACE;
}
AddRef();
return S_OK;
} //QueryInterface
//+-------------------------------------------------------------------------
//
// Method: CLanguageResourceSampleCF::AddRef
//
// Synopsis: Increments the refcount
//
// Returns: The new refcount
//
//--------------------------------------------------------------------------
ULONG STDMETHODCALLTYPE CLanguageResourceSampleCF::AddRef()
{
return InterlockedIncrement( &_lRefs );
} //AddRef
//+-------------------------------------------------------------------------
//
// Method: CLanguageResourceSampleCF::Release
//
// Synopsis: Decrement refcount. Delete self if necessary.
//
// Returns: The new refcount
//
//--------------------------------------------------------------------------
ULONG STDMETHODCALLTYPE CLanguageResourceSampleCF::Release()
{
long lTmp = InterlockedDecrement( &_lRefs );
if ( 0 == lTmp )
delete this;
return lTmp;
} //Release
//+-------------------------------------------------------------------------
//
// Method: CLanguageResourceSampleCF::CreateInstance
//
// Synopsis: Creates new Language Resource sample object
//
// Arguments: [pUnkOuter] -- 'Outer' IUnknown
// [riid] -- Interface to bind
// [ppvObject] -- Interface returned here
//
// Returns: S_OK if successful or an appropriate error code
//
//--------------------------------------------------------------------------
HRESULT STDMETHODCALLTYPE CLanguageResourceSampleCF::CreateInstance(
IUnknown * pUnkOuter,
REFIID riid,
void * * ppvObject )
{
*ppvObject = 0;
if ( IID_IStemmer == riid )
*ppvObject = new CSampleStemmer();
else if ( IID_IWordBreaker == riid )
*ppvObject = new CSampleWordBreaker();
else
return E_NOINTERFACE;
if ( 0 == *ppvObject )
return E_OUTOFMEMORY;
return S_OK;
} //CreateInstance
//+-------------------------------------------------------------------------
//
// Method: CLanguageResourceSampleCF::LockServer
//
// Synopsis: Force class factory to remain loaded
//
// Arguments: [fLock] -- TRUE if locking, FALSE if unlocking
//
// Returns: S_OK
//
//--------------------------------------------------------------------------
HRESULT STDMETHODCALLTYPE CLanguageResourceSampleCF::LockServer( BOOL fLock )
{
if ( fLock )
InterlockedIncrement( &g_cInstances );
else
InterlockedDecrement( &g_cInstances );
return S_OK;
} //LockServer
//+-------------------------------------------------------------------------
//
// Function: DllGetClassObject
//
// Synopsis: Ole DLL load class routine
//
// Arguments: [cid] -- Class to load
// [iid] -- Interface to bind to on class object
// [ppvObj] -- Interface pointer returned here
//
// Returns: Sample language resource class factory
//
//--------------------------------------------------------------------------
extern "C" HRESULT STDMETHODCALLTYPE DllGetClassObject(
REFCLSID cid,
REFIID iid,
void ** ppvObj )
{
IUnknown * pUnk = 0;
*ppvObj = 0;
if ( CLSID_SampleWordBreaker == cid ||
CLSID_SampleStemmer == cid )
{
pUnk = new CLanguageResourceSampleCF();
if ( 0 == pUnk )
return E_OUTOFMEMORY;
#ifdef LEXICON_STEMMER
if ( 0 == g_pStem )
g_pStem = MakeStemObject( g_hModule );
if ( 0 == g_pStem )
{
pUnk->Release();
return E_OUTOFMEMORY;
}
#endif //LEXICON_STEMMER
}
else
{
*ppvObj = 0;
return E_NOINTERFACE;
}
HRESULT hr = pUnk->QueryInterface( iid, ppvObj );
pUnk->Release();
return hr;
} //DllGetClassObject
//+-------------------------------------------------------------------------
//
// Function: DllCanUnloadNow
//
// Synopsis: Notifies DLL to unload (cleanup global resources)
//
// Returns: S_OK if it is acceptable for caller to unload DLL.
// S_FALSE otherwise.
//
//--------------------------------------------------------------------------
extern "C" HRESULT STDMETHODCALLTYPE DllCanUnloadNow( void )
{
if ( 0 == g_cInstances )
return S_OK;
return S_FALSE;
} //DllCanUnloadNow
//+-------------------------------------------------------------------------
//
// Function: DllMain
//
// Synopsis: Standard main entry point for the module.
//
//--------------------------------------------------------------------------
BOOL WINAPI DllMain(
HANDLE hInstance,
DWORD dwReason,
void * lpReserved )
{
if ( DLL_PROCESS_ATTACH == dwReason )
{
g_hModule = (HMODULE) hInstance;
DisableThreadLibraryCalls( (HINSTANCE) hInstance );
}
return TRUE;
} //DllMain
SLangRegistry const English_Sample_LangRes =
{
L"English_Sample", MAKELANGID( LANG_ENGLISH, SUBLANG_ENGLISH_SAMPLE ),
{ L"{d225281a-7ca9-4a46-ae7d-c63a9d4815d4}",
L"English_Sample Word Breaker",
L"lrsample.dll",
L"both" },
{ L"{0a275611-aa4d-4b39-8290-4baf77703f55}",
L"English_Sample Stemmer",
L"lrsample.dll",
L"both" }
};
//+-------------------------------------------------------------------------
//
// Method: DllRegisterServer
//
// Synopsis: Registers the language resources in the registry
//
//--------------------------------------------------------------------------
STDAPI DllRegisterServer()
{
return RegisterALanguageResource( English_Sample_LangRes );
} //DllRegisterServer
//+-------------------------------------------------------------------------
//
// Method: DllUnregisterServer
//
// Synopsis: Removes the language resources from the registry
//
//--------------------------------------------------------------------------
STDAPI DllUnregisterServer()
{
return UnRegisterALanguageResource( English_Sample_LangRes );
} //DllUnregisterServer