windows-server-2003/inetsrv/query/apps/lrsample/lrsample.cxx


								//+-------------------------------------------------------------------------

								//

								// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF

								// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO

								// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A

								// PARTICULAR PURPOSE.

								//

								// Copyright 1998 - 2001 Microsoft Corporation.  All Rights Reserved.

								//

								// PROGRAM:  lrsample.cxx

								//

								// PURPOSE:  Sample wordbreaker and stemmer.

								//

								// PLATFORM: Windows 2000 and later

								//

								//--------------------------------------------------------------------------


								#include <stdio.h>

								#include <wchar.h>


								#include <windows.h>

								#include <objidl.h>

								#include <indexsrv.h>

								#include <cierror.h>

								#include <filterr.h>


								#include "lrsample.hxx"

								#include "filtreg.hxx"

								#include "langreg.hxx"


								//#define LEXICON_STEMMER

								//#define PORTER_STEMMER

								#define SIMPLE_LIST_STEMMER


								// The CLSID for the wordbreaker


								CLSID CLSID_SampleWordBreaker = /* d225281a-7ca9-4a46-ae7d-c63a9d4815d4 */

								{

								    0xd225281a,  0x7ca9, 0x4a46,

								    {0xae, 0x7d, 0xc6, 0x3a, 0x9d, 0x48, 0x15, 0xd4}

								};


								// The CLSID of the stemmer


								CLSID CLSID_SampleStemmer = /* 0a275611-aa4d-4b39-8290-4baf77703f55 */

								{

								    0x0a275611, 0xaa4d, 0x4b39,

								    {0x82, 0x90, 0x4b, 0xaf, 0x77, 0x70, 0x3f, 0x55}

								};


								// Global module refcount


								long g_cInstances = 0;

								HMODULE g_hModule = 0;


								#ifdef PORTER_STEMMER


								    #include "porter.hxx"


								#endif //PORTER_STEMMER


								#ifdef LEXICON_STEMMER


								    #include "stem.hxx"


								    CStem * g_pStem = 0;


								#endif //LEXICON_STEMMER


								#ifdef SIMPLE_LIST_STEMMER


								    // This is just a simple hard-coded list of words and stem forms.


								    struct SStemForm

								    {

								        USHORT iList; // first index into aStems

								        USHORT iForm; // second index into aStems

								    };


								    const SStemForm aStemForms[] =

								    {

								        {  0, 0 },     // abide

								        {  0, 2 },     // abided

								        {  0, 4 },     // abides

								        {  0, 3 },     // abiding

								        {  0, 1 },     // abode

								        {  1, 0 },     // bat

								        {  2, 0 },     // batch

								        {  2, 2 },     // batched

								        {  2, 1 },     // batches

								        {  2, 3 },     // batching

								        {  1, 1 },     // bats

								        {  1, 2 },     // batted

								        {  1, 3 },     // batting

								        {  3, 0 },     // bear

								        {  3, 1 },     // bears

								        {  4, 1 },     // began

								        {  4, 0 },     // begin

								        {  4, 3 },     // beginning

								        {  4, 4 },     // begins

								        {  4, 2 },     // begun

								        {  3, 2 },     // bore

								        {  3, 4 },     // born

								        {  3, 3 },     // borne

								        {  5, 0 },     // dance

								        {  5, 1 },     // danced

								        {  5, 2 },     // dances

								        {  5, 3 },     // dancing

								        {  6, 0 },     // heave

								        {  6, 1 },     // heaved

								        {  6, 3 },     // heaves

								        {  6, 4 },     // heaving

								        {  7, 0 },     // hero

								        {  7, 1 },     // heroes

								        {  6, 2 },     // hove

								        {  8, 0 },     // keep

								        {  8, 4 },     // keeping

								        {  8, 1 },     // keeps

								        {  8, 2 },     // kept

								        {  9, 0 },     // misspell

								        {  9, 1 },     // misspelled

								        {  9, 3 },     // misspelling

								        {  9, 4 },     // misspells

								        {  9, 2 },     // misspelt

								        { 10, 0 },     // plead

								        { 10, 1 },     // pleaded

								        { 10, 3 },     // pleading

								        { 10, 4 },     // pleads

								        { 10, 0 },     // pled

								        { 11, 2 },     // ran

								        { 11, 0 },     // run

								        { 11, 3 },     // running

								        { 11, 1 },     // runs

								        { 12, 1 },     // swam

								        { 12, 0 },     // swim

								        { 12, 3 },     // swimming

								        { 12, 4 },     // swims

								        { 12, 2 },     // swum

								        { 13, 2 },     // underlain

								        { 13, 1 },     // underlay

								        { 13, 0 },     // underlie

								        { 13, 4 },     // underlies

								        { 13, 3 },     // underlying

								    };


								    const ULONG cStemForms = ArraySize( aStemForms );

								    const ULONG cMaxStemForms = 8;


								    const WCHAR * aStems[][ cMaxStemForms ] =

								    {

								        { L"abide", L"abode", L"abided", L"abiding", L"abides" },     // 0

								        { L"bat", L"bats", L"batted", L"batting" },                   // 1

								        { L"batch", L"batches", L"batched", L"batching" },            // 2

								        { L"bear", L"bears", L"bore", L"borne", L"born" },            // 3

								        { L"begin", L"began", L"begun", L"beginning", L"begins" },    // 4

								        { L"dance", L"danced", L"dances", L"dancing" },               // 5

								        { L"heave", L"heaved", L"hove", L"heaves", L"heaving" },      // 6

								        { L"hero", L"heroes" },                                       // 7

								        { L"keep", L"keeps", L"kept", L"keeping" },                   // 8

								        { L"misspell", L"misspelled", L"misspelt", L"misspelling",

								          L"misspells" },                                             // 9

								        { L"plead", L"pleaded", L"pled", L"pleading", L"pleads" },    // 10

								        { L"run", L"runs", L"ran", L"running" },                      // 11

								        { L"swim", L"swam", L"swum", L"swimming", L"swims" },         // 12

								        { L"underlie", L"underlay", L"underlain", L"underlying",

								          L"underlies" },                                             // 13

								    };


								    int __cdecl StemCompare( const void *p1, const void *p2 )

								    {

								        SStemForm const * pForm = (SStemForm const *) p2;

								        WCHAR const * pwcWord = (WCHAR const *) p1;

								        return wcscmp( pwcWord, aStems[ pForm->iList ][ pForm->iForm ] );

								    }


								#endif // SIMPLE_LIST_STEMMER


								//+-------------------------------------------------------------------------

								//

								//  Function:   IsWordChar

								//

								//  Synopsis:   Find whether the i'th character in the buffer _pwcChunk

								//              is a word character (rather than word break)

								//

								//  Arguments:  [pwcChunk] -- Characters whose type information is checked

								//              [i]        -- Index of character to check

								//              [pInfo1]   -- Type 1 information

								//              [pInfo3]   -- Type 3 information

								//

								//  Returns:    TRUE if the character is a word character

								//              FALSE if it's a word breaking character

								//

								//--------------------------------------------------------------------------


								__forceinline BOOL IsWordChar(

								    WCHAR const * pwcChunk,

								    int           i,

								    WORD const *  pInfo1,

								    WORD const *  pInfo3 )

								{

								    // Any alphabetic, digit, or non-spacing character is part of a word


								    if ( ( 0 != ( pInfo1[i] & ( C1_ALPHA | C1_DIGIT ) ) ) ||

								         ( 0 != ( pInfo3[i] & C3_NONSPACING ) ) )

								        return TRUE;


								    WCHAR c = pwcChunk[i];


								    // Underscore is part of a word


								    if ( L'_' == c )

								        return TRUE;


								    //

								    // A non-breaking space followed by a non-spacing character should not

								    // be a word breaker.

								    //


								    if ( 0xa0 == c ) // non breaking space

								    {

								        // followed by a non-spacing character (looking ahead is okay)


								        if ( 0 != ( pInfo3[i+1] & C3_NONSPACING ) )

								            return TRUE;

								    }


								    return FALSE;

								} //IsWordChar


								//+---------------------------------------------------------------------------

								//

								//  Function:   ScanChunk

								//

								//  Synopsis:   For each character find its type information flags

								//

								//  Arguments:  [pwcChunk] -- Characters whose type information is retrieved

								//              [cwc]      -- Number of characters to scan

								//              [pInfo1]   -- Type 1 information is written here

								//              [pInfo3]   -- Type 3 information is written here

								//

								//  Returns:    S_OK if successful or an error code

								//

								//----------------------------------------------------------------------------


								HRESULT ScanChunk(

								    WCHAR const * pwcChunk,

								    ULONG         cwc,

								    WORD *        pInfo1,

								    WORD *        pInfo3 )

								{

								    if ( !GetStringTypeW( CT_CTYPE1,         // POSIX character typing

								                          pwcChunk,          // Source

								                          cwc,               // Size of source

								                          pInfo1 ) )         // Character info 1

								        return HRESULT_FROM_WIN32( GetLastError() );


								    if ( !GetStringTypeW( CT_CTYPE3,         // Additional POSIX

								                          pwcChunk,          // Source

								                          cwc,               // Size of source

								                          pInfo3 ) )         // Character info 3

								        return HRESULT_FROM_WIN32( GetLastError() );


								    return S_OK;

								} //ScanChunk


								//+---------------------------------------------------------------------------

								//

								//  Member:     CSampleWordBreaker::Tokenize

								//

								//  Synopsis:   Break a block of text into individual words

								//

								//  Arguments:  [pTextSource]  -- Source of characters to work on

								//              [cwc]          -- Number of characters to process

								//              [pWordSink]    -- Where to send the words found

								//              [cwcProcessed] -- Returns the # of characters tokenized

								//

								//  Returns:    S_OK if successful or an error code

								//

								//----------------------------------------------------------------------------


								HRESULT CSampleWordBreaker::Tokenize(

								    TEXT_SOURCE * pTextSource,

								    ULONG         cwc,

								    IWordSink *   pWordSink,

								    ULONG &       cwcProcessed )

								{

								    // Leave space for one (unused) lookahead


								    WORD aInfo1[ CSampleWordBreaker::cwcAtATime + 1 ];

								    WORD aInfo3[ CSampleWordBreaker::cwcAtATime + 1 ];


								    // Initialize this so we can go 1 beyond in IsWordChar()


								    aInfo3 [ CSampleWordBreaker::cwcAtATime ] = C3_NONSPACING;


								    // Get a pointer to the text we'll be working on


								    const WCHAR * pwcChunk = &pTextSource->awcBuffer[ pTextSource->iCur ];


								    HRESULT hr = ScanChunk( pwcChunk, cwc, aInfo1, aInfo3 );

								    if ( FAILED( hr ) )

								        return hr;


								    BOOL fWordHasZWS = FALSE; // Does the current word have a 0-width-space?

								    ULONG cwcZWS;             // Length of word minus embedded 0-width-spaces


								    //

								    // iBeginWord is the offset into aInfoX of the beginning character of

								    // a word.  iCur is the first unprocessed character.

								    // They are indexes into the current block (_pwcChunk).

								    //


								    ULONG iBeginWord = 0;

								    ULONG iCur = 0;


								    // Temp buffer for a word having zero-width space


								    WCHAR awcBufZWS[ CSampleWordBreaker::cwcAtATime ];


								    // Send words from the current block to word sink


								    while ( iCur < cwc )

								    {

								        // Skip whitespace, punctuation, etc.


								        for (; iCur < cwc; iCur++)

								            if ( IsWordChar( pwcChunk, iCur, aInfo1, aInfo3 ) )

								                break;


								        // iCur points to a word char or is equal to cwc


								        iBeginWord = iCur;

								        if ( iCur < cwc )

								            iCur++; // we knew it pointed at word character


								        //

								        // Find word break. Filter may output Unicode zero-width-space, which

								        // should be ignored by the wordbreaker.

								        //


								        fWordHasZWS = FALSE;

								        for ( ; iCur < cwc; iCur++ )

								        {

								            if ( !IsWordChar( pwcChunk, iCur, aInfo1, aInfo3 ) )

								            {

								                if ( ZERO_WIDTH_SPACE == pwcChunk[iCur] )

								                    fWordHasZWS = TRUE;

								                else

								                    break;

								            }

								        }


								        if ( fWordHasZWS )

								        {

								            // Copy word into awcBufZWS after stripping zero-width-spaces


								            cwcZWS = 0;

								            for ( ULONG i = iBeginWord; i < iCur; i++ )

								            {

								                if ( ZERO_WIDTH_SPACE != pwcChunk[i] )

								                    awcBufZWS[cwcZWS++] = pwcChunk[i];

								            }

								        }


								        // iCur points to a non-word char or is equal to cwc


								        if ( iCur < cwc )

								        {

								            // store the word and its source position


								            if ( fWordHasZWS )

								                hr = pWordSink->PutWord( cwcZWS,

								                                         awcBufZWS,    // stripped word

								                                         iCur - iBeginWord,

								                                         pTextSource->iCur + iBeginWord );

								            else

								                hr = pWordSink->PutWord( iCur - iBeginWord,

								                                         pwcChunk + iBeginWord, // the word

								                                         iCur - iBeginWord,

								                                         pTextSource->iCur + iBeginWord );


								            if ( FAILED( hr ) )

								                return hr;


								            iCur++; // we knew it pointed at non-word char

								            iBeginWord = iCur; // in case we exit the loop now

								        }

								    } // next word


								    // End of words in chunk.

								    // iCur == cwc

								    // iBeginWord points at beginning of word or == cwc


								    if ( 0 == iBeginWord )

								    {

								        // A single word fills from beginning of this chunk

								        // to the end. This is either a very long word or

								        // a short word in a leftover buffer.


								        // store the word and its source position


								        if ( fWordHasZWS )

								            hr = pWordSink->PutWord( cwcZWS,

								                                     awcBufZWS,          // stripped word

								                                     iCur,

								                                     pTextSource->iCur ); // its source pos.

								        else

								            hr = pWordSink->PutWord( iCur,

								                                     pwcChunk,           // the word

								                                     iCur,

								                                     pTextSource->iCur ); // its source pos.


								        if ( FAILED( hr ) )

								            return hr;


								        // Position it to not add the word twice.


								        iBeginWord = iCur;

								    }


								    //

								    // If this is the last chunk from text source, then process the

								    // last fragment.

								    //


								    if ( ( cwc < CSampleWordBreaker::cwcAtATime ) && ( iBeginWord != iCur ) )

								    {

								        // store the word and its source position


								        if ( fWordHasZWS )

								            hr = pWordSink->PutWord( cwcZWS,

								                                     awcBufZWS,    // stripped word

								                                     iCur - iBeginWord,

								                                     pTextSource->iCur + iBeginWord );

								        else

								            hr = pWordSink->PutWord( iCur - iBeginWord,

								                                     pwcChunk + iBeginWord,  // the word

								                                     iCur - iBeginWord,

								                                     pTextSource->iCur + iBeginWord );


								        if ( FAILED( hr ) )

								            return hr;


								        iBeginWord = iCur;

								    }


								    cwcProcessed = iBeginWord;

								    return S_OK;

								} //Tokenize


								//+---------------------------------------------------------------------------

								//

								//  Member:     CSampleWordBreaker::BreakText

								//

								//  Synopsis:   Break a block of text into individual words

								//

								//  Arguments:  [pTextSource]  -- Source of characters to work on

								//              [pWordSink]    -- Where to send the words found

								//              [pPhraseSink]  -- Where to send the phrases found (not used)

								//

								//  Returns:    S_OK if successful or an error code

								//

								//----------------------------------------------------------------------------


								HRESULT STDMETHODCALLTYPE CSampleWordBreaker::BreakText(

								    TEXT_SOURCE * pTextSource,

								    IWordSink *   pWordSink,

								    IPhraseSink * pPhraseSink )

								{

								    // Validate arguments


								    if ( 0 == pTextSource )

								        return E_INVALIDARG;


								    if ( ( 0 == pWordSink ) || ( pTextSource->iCur == pTextSource->iEnd ) )

								        return S_OK;


								    if ( pTextSource->iCur > pTextSource->iEnd )

								        return E_INVALIDARG;


								    ULONG cwcProcessed;   // # chars actually processed by Tokenize()

								    HRESULT hr = S_OK;


								    // Pull text from the text source and tokenize it


								    do

								    {

								        BOOL fFirstTime = TRUE;


								        while ( pTextSource->iCur < pTextSource->iEnd )

								        {

								            ULONG cwc = pTextSource->iEnd - pTextSource->iCur;


								            // Process in buckets of cwcAtATime only


								            if ( cwc >= CSampleWordBreaker::cwcAtATime )

								                cwc = CSampleWordBreaker::cwcAtATime;

								            else if ( !fFirstTime )

								                break;


								            hr = Tokenize( pTextSource, cwc, pWordSink, cwcProcessed );

								            if ( FAILED( hr ) )

								                return hr;


								            pTextSource->iCur += cwcProcessed;

								            fFirstTime = FALSE;

								        }


								        hr = pTextSource->pfnFillTextBuffer( pTextSource );

								    } while ( SUCCEEDED( hr ) );


								    //

								    // If anything failed except for running out of text, report the error.

								    // Otherwise, for cases like out of memory, files will not get retried or

								    // reported as failures properly.

								    //


								    if ( ( FAILED( hr ) ) &&

								         ( FILTER_E_NO_MORE_VALUES != hr ) &&

								         ( FILTER_E_NO_TEXT != hr ) &&

								         ( FILTER_E_NO_VALUES != hr ) &&

								         ( FILTER_E_NO_MORE_TEXT != hr ) &&

								         ( FILTER_E_END_OF_CHUNKS != hr ) &&

								         ( WBREAK_E_END_OF_TEXT != hr ) )

								        return hr;


								    ULONG cwc = pTextSource->iEnd - pTextSource->iCur;


								    if ( 0 == cwc )

								        return S_OK;


								    return Tokenize( pTextSource, cwc, pWordSink, cwcProcessed );

								} //BreakText


								//+---------------------------------------------------------------------------

								//

								//  Member:     CSampleStemmer::GenerateWordForms

								//

								//  Synopsis:   From the input word, emit the original and alternate forms

								//              of the word.

								//

								//  Arguments:  [pwcInBuf]   -- The original word to stem (not 0-terminated)

								//              [cwc]        -- Length in characters of the word

								//              [pStemSink]  -- Where to emit the stems

								//

								//  Returns:    S_OK if successful or an error code

								//

								//----------------------------------------------------------------------------


								HRESULT STDMETHODCALLTYPE CSampleStemmer::GenerateWordForms(

								    WCHAR const *   pwcInBuf,

								    ULONG           cwc,

								    IWordFormSink * pWordFormSink )

								{

								    // Validate the arguments


								    if ( ( 0 == pwcInBuf ) || ( 0 == pWordFormSink ) )

								        return E_INVALIDARG;


								    HRESULT hr = S_OK;


								#ifdef PORTER_STEMMER


								    //

								    // If the word is small enough, attempt to get the stemmed form of the

								    // word.  Emit both forms if they are different.  The Porter algorithm

								    // does the opposite of what's required here, but doing the right thing

								    // requires a lexicon.

								    //


								    if ( cwc < cwcMaxPorterWord )

								    {

								        // Make a temporary buffer for the word


								        WCHAR awcPorter[ cwcMaxPorterWord ];

								        CopyMemory( awcPorter, pwcInBuf, sizeof(WCHAR) * cwc );

								        awcPorter[cwc] = 0;


								        // Convert it to lowercase and save the original in lowercase


								        CharLower( awcPorter );

								        WCHAR awcOriginal[ cwcMaxPorterWord ];

								        wcscpy( awcOriginal, awcPorter );


								        // Get the stemmed form of the word


								        GetPorterStemForm( awcPorter );


								        // If it's different from the original, emit it.


								        if ( wcscmp( awcOriginal, awcPorter ) )

								        {

								            hr = pWordFormSink->PutAltWord( awcPorter,

								                                        wcslen( awcPorter ) );

								            if ( FAILED( hr ) )

								                return hr;

								        }

								    }


								#endif //PORTER_STEMMER


								#ifdef LEXICON_STEMMER


								    //

								    // If the word is small enough to work with the stemmer, attempt to get

								    // various forms of the word.

								    //


								    if ( cwc < cbMaxStem )

								    {

								        //

								        // Convert the original string to 8-bit characters.  This is OK since

								        // it's is an English stemmer that can safely assume such characters.

								        //


								        char acOriginal[ cbMaxStem ];

								        for ( unsigned i = 0; i < cwc; i++ )

								            acOriginal[ i ] = (char) pwcInBuf[ i ];

								        acOriginal[ i ] = 0;


								        // Enumerate all stem-sets that contain the word.


								        unsigned iBmk = stemInvalid;

								        unsigned iStemSet = stemInvalid;

								        char ac[ cbMaxStem ];


								        while ( g_pStem->FindStemSet( acOriginal, iBmk, iStemSet ) )

								        {

								            // Enumerate all forms of the stem-set, root first.


								            CStemSet set( g_pStem->GetStemSetRoot(), iStemSet );

								            unsigned iStemBmk = stemInvalid;


								            while ( set.GetForm( ac, iStemBmk ) )

								            {

								                if ( strcmp( ac, acOriginal ) )

								                {

								                    WCHAR awcForm[ cbMaxStem ];

								                    mbstowcs( awcForm, ac, -1 );


								                    hr = pWordFormSink->PutAltWord( awcForm,

								                                                wcslen( awcForm ) );

								                    if ( FAILED( hr ) )

								                        return hr;

								                }

								            }

								        }

								    }


								#endif //LEXICON_STEMMER


								#ifdef SIMPLE_LIST_STEMMER


								    // Look up the word in the simple list of stem forms


								    SStemForm const * pStemForm = (SStemForm *) bsearch( pwcInBuf,

								                                                         aStemForms,

								                                                         cStemForms,

								                                                         sizeof SStemForm,

								                                                         StemCompare );


								    if ( 0 != pStemForm )

								    {

								        // Found it, now iterate all the forms


								        ULONG iList = pStemForm->iList;

								        ULONG iForm = 0;


								        while ( 0 != aStems[ iList ][ iForm ] )

								        {

								            WCHAR const * pwc = aStems[ iList ][ iForm ];


								            // Don't emit the original word yet


								            if ( 0 != wcscmp( pwc, pwcInBuf ) )

								            {

								                hr = pWordFormSink->PutAltWord( pwc,

								                                                wcslen( pwc ) );

								                if ( FAILED( hr ) )

								                    return hr;

								            }


								            iForm++;

								        }

								    }


								#endif //SIMPLE_LIST_STEMMER


								    // Emit the original word


								    return pWordFormSink->PutWord( pwcInBuf, cwc );

								} //StemWord


								//+-------------------------------------------------------------------------

								//

								//  Method:     CLanguageResourceSampleCF::CLanguageResourceSampleCF

								//

								//  Synopsis:   Language resource class factory constructor

								//

								//--------------------------------------------------------------------------


								CLanguageResourceSampleCF::CLanguageResourceSampleCF() :

								    _lRefs( 1 )

								{

								    InterlockedIncrement( &g_cInstances );

								} //CLanguageResourceSampleCF


								//+-------------------------------------------------------------------------

								//

								//  Method:     CLanguageResourceSampleCF::~CLanguageResourceSampleCF

								//

								//  Synopsis:   Language resource class factory destructor

								//

								//--------------------------------------------------------------------------


								CLanguageResourceSampleCF::~CLanguageResourceSampleCF()

								{

								    InterlockedDecrement( &g_cInstances );

								} //~LanguageResourceSampleCF


								//+-------------------------------------------------------------------------

								//

								//  Method:     CLanguageResourceSampleCF::QueryInterface

								//

								//  Synopsis:   Rebind to the requested interface

								//

								//  Arguments:  [riid]      -- IID of new interface

								//              [ppvObject] -- New interface * returned here

								//

								//  Returns:    S_OK if bind succeeded, E_NOINTERFACE if bind failed

								//

								//--------------------------------------------------------------------------


								HRESULT STDMETHODCALLTYPE CLanguageResourceSampleCF::QueryInterface(

								    REFIID   riid,

								    void  ** ppvObject )

								{

								    if ( IID_IClassFactory == riid )

								        *ppvObject = (IUnknown *) (IClassFactory *) this;

								    else if ( IID_IUnknown == riid )

								        *ppvObject = (IUnknown *) (IPersist *) this;

								    else

								    {

								        *ppvObject = 0;

								        return E_NOINTERFACE;

								    }


								    AddRef();

								    return S_OK;

								} //QueryInterface


								//+-------------------------------------------------------------------------

								//

								//  Method:     CLanguageResourceSampleCF::AddRef

								//

								//  Synopsis:   Increments the refcount

								//

								//  Returns:    The new refcount

								//

								//--------------------------------------------------------------------------


								ULONG STDMETHODCALLTYPE CLanguageResourceSampleCF::AddRef()

								{

								    return InterlockedIncrement( &_lRefs );

								} //AddRef


								//+-------------------------------------------------------------------------

								//

								//  Method:     CLanguageResourceSampleCF::Release

								//

								//  Synopsis:   Decrement refcount.  Delete self if necessary.

								//

								//  Returns:    The new refcount

								//

								//--------------------------------------------------------------------------


								ULONG STDMETHODCALLTYPE CLanguageResourceSampleCF::Release()

								{

								    long lTmp = InterlockedDecrement( &_lRefs );


								    if ( 0 == lTmp )

								        delete this;


								    return lTmp;

								} //Release


								//+-------------------------------------------------------------------------

								//

								//  Method:     CLanguageResourceSampleCF::CreateInstance

								//

								//  Synopsis:   Creates new Language Resource sample object

								//

								//  Arguments:  [pUnkOuter] -- 'Outer' IUnknown

								//              [riid]      -- Interface to bind

								//              [ppvObject] -- Interface returned here

								//

								//  Returns:    S_OK if successful or an appropriate error code

								//

								//--------------------------------------------------------------------------


								HRESULT STDMETHODCALLTYPE CLanguageResourceSampleCF::CreateInstance(

								    IUnknown * pUnkOuter,

								    REFIID     riid,

								    void * *   ppvObject )

								{

								    *ppvObject = 0;


								    if ( IID_IStemmer == riid )

								        *ppvObject = new CSampleStemmer();

								    else if ( IID_IWordBreaker == riid )

								        *ppvObject = new CSampleWordBreaker();

								    else

								        return E_NOINTERFACE;


								    if ( 0 == *ppvObject )

								        return E_OUTOFMEMORY;


								    return S_OK;

								} //CreateInstance


								//+-------------------------------------------------------------------------

								//

								//  Method:     CLanguageResourceSampleCF::LockServer

								//

								//  Synopsis:   Force class factory to remain loaded

								//

								//  Arguments:  [fLock] -- TRUE if locking, FALSE if unlocking

								//

								//  Returns:    S_OK

								//

								//--------------------------------------------------------------------------


								HRESULT STDMETHODCALLTYPE CLanguageResourceSampleCF::LockServer( BOOL fLock )

								{

								    if ( fLock )

								        InterlockedIncrement( &g_cInstances );

								    else

								        InterlockedDecrement( &g_cInstances );


								    return S_OK;

								} //LockServer


								//+-------------------------------------------------------------------------

								//

								//  Function:   DllGetClassObject

								//

								//  Synopsis:   Ole DLL load class routine

								//

								//  Arguments:  [cid]    -- Class to load

								//              [iid]    -- Interface to bind to on class object

								//              [ppvObj] -- Interface pointer returned here

								//

								//  Returns:    Sample language resource class factory

								//

								//--------------------------------------------------------------------------


								extern "C" HRESULT STDMETHODCALLTYPE DllGetClassObject(

								    REFCLSID cid,

								    REFIID   iid,

								    void **  ppvObj )

								{

								    IUnknown * pUnk = 0;

								    *ppvObj = 0;


								    if ( CLSID_SampleWordBreaker == cid ||

								         CLSID_SampleStemmer == cid )

								    {

								        pUnk = new CLanguageResourceSampleCF();


								        if ( 0 == pUnk )

								            return E_OUTOFMEMORY;


								        #ifdef LEXICON_STEMMER


								            if ( 0 == g_pStem )

								                g_pStem = MakeStemObject( g_hModule );


								            if ( 0 == g_pStem )

								            {

								                pUnk->Release();

								                return E_OUTOFMEMORY;

								            }


								        #endif //LEXICON_STEMMER


								    }

								    else

								    {

								        *ppvObj = 0;

								        return E_NOINTERFACE;

								    }


								    HRESULT hr = pUnk->QueryInterface( iid, ppvObj );


								    pUnk->Release();


								    return hr;

								} //DllGetClassObject


								//+-------------------------------------------------------------------------

								//

								//  Function:   DllCanUnloadNow

								//

								//  Synopsis:   Notifies DLL to unload (cleanup global resources)

								//

								//  Returns:    S_OK if it is acceptable for caller to unload DLL.

								//              S_FALSE otherwise.

								//

								//--------------------------------------------------------------------------


								extern "C" HRESULT STDMETHODCALLTYPE DllCanUnloadNow( void )

								{

								    if ( 0 == g_cInstances )

								        return S_OK;


								    return S_FALSE;

								} //DllCanUnloadNow


								//+-------------------------------------------------------------------------

								//

								//  Function:   DllMain

								//

								//  Synopsis:   Standard main entry point for the module.

								//

								//--------------------------------------------------------------------------


								BOOL WINAPI DllMain(

								    HANDLE hInstance,

								    DWORD  dwReason,

								    void * lpReserved )

								{

								    if ( DLL_PROCESS_ATTACH == dwReason )

								    {

								        g_hModule = (HMODULE) hInstance;

								        DisableThreadLibraryCalls( (HINSTANCE) hInstance );

								    }


								    return TRUE;

								} //DllMain


								SLangRegistry const English_Sample_LangRes =

								{

								    L"English_Sample", MAKELANGID( LANG_ENGLISH, SUBLANG_ENGLISH_SAMPLE ),

								    { L"{d225281a-7ca9-4a46-ae7d-c63a9d4815d4}",

								      L"English_Sample Word Breaker",

								      L"lrsample.dll",

								      L"both" },

								    { L"{0a275611-aa4d-4b39-8290-4baf77703f55}",

								      L"English_Sample Stemmer",

								      L"lrsample.dll",

								      L"both" }

								};


								//+-------------------------------------------------------------------------

								//

								//  Method:     DllRegisterServer

								//

								//  Synopsis:   Registers the language resources in the registry

								//

								//--------------------------------------------------------------------------


								STDAPI DllRegisterServer()

								{

								    return RegisterALanguageResource( English_Sample_LangRes );

								} //DllRegisterServer


								//+-------------------------------------------------------------------------

								//

								//  Method:     DllUnregisterServer

								//

								//  Synopsis:   Removes the language resources from the registry

								//

								//--------------------------------------------------------------------------


								STDAPI DllUnregisterServer()

								{

								    return UnRegisterALanguageResource( English_Sample_LangRes );

								} //DllUnregisterServer