Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

979 lines
29 KiB

  1. //+-------------------------------------------------------------------------
  2. //
  3. // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
  4. // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  5. // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
  6. // PARTICULAR PURPOSE.
  7. //
  8. // Copyright 1998 - 2001 Microsoft Corporation. All Rights Reserved.
  9. //
  10. // PROGRAM: lrsample.cxx
  11. //
  12. // PURPOSE: Sample wordbreaker and stemmer.
  13. //
  14. // PLATFORM: Windows 2000 and later
  15. //
  16. //--------------------------------------------------------------------------
  17. #include <stdio.h>
  18. #include <wchar.h>
  19. #include <windows.h>
  20. #include <objidl.h>
  21. #include <indexsrv.h>
  22. #include <cierror.h>
  23. #include <filterr.h>
  24. #include "lrsample.hxx"
  25. #include "filtreg.hxx"
  26. #include "langreg.hxx"
  27. //#define LEXICON_STEMMER
  28. //#define PORTER_STEMMER
  29. #define SIMPLE_LIST_STEMMER
  30. // The CLSID for the wordbreaker
  31. CLSID CLSID_SampleWordBreaker = /* d225281a-7ca9-4a46-ae7d-c63a9d4815d4 */
  32. {
  33. 0xd225281a, 0x7ca9, 0x4a46,
  34. {0xae, 0x7d, 0xc6, 0x3a, 0x9d, 0x48, 0x15, 0xd4}
  35. };
  36. // The CLSID of the stemmer
  37. CLSID CLSID_SampleStemmer = /* 0a275611-aa4d-4b39-8290-4baf77703f55 */
  38. {
  39. 0x0a275611, 0xaa4d, 0x4b39,
  40. {0x82, 0x90, 0x4b, 0xaf, 0x77, 0x70, 0x3f, 0x55}
  41. };
  42. // Global module refcount
  43. long g_cInstances = 0;
  44. HMODULE g_hModule = 0;
  45. #ifdef PORTER_STEMMER
  46. #include "porter.hxx"
  47. #endif //PORTER_STEMMER
  48. #ifdef LEXICON_STEMMER
  49. #include "stem.hxx"
  50. CStem * g_pStem = 0;
  51. #endif //LEXICON_STEMMER
  52. #ifdef SIMPLE_LIST_STEMMER
  53. // This is just a simple hard-coded list of words and stem forms.
  54. struct SStemForm
  55. {
  56. USHORT iList; // first index into aStems
  57. USHORT iForm; // second index into aStems
  58. };
  59. const SStemForm aStemForms[] =
  60. {
  61. { 0, 0 }, // abide
  62. { 0, 2 }, // abided
  63. { 0, 4 }, // abides
  64. { 0, 3 }, // abiding
  65. { 0, 1 }, // abode
  66. { 1, 0 }, // bat
  67. { 2, 0 }, // batch
  68. { 2, 2 }, // batched
  69. { 2, 1 }, // batches
  70. { 2, 3 }, // batching
  71. { 1, 1 }, // bats
  72. { 1, 2 }, // batted
  73. { 1, 3 }, // batting
  74. { 3, 0 }, // bear
  75. { 3, 1 }, // bears
  76. { 4, 1 }, // began
  77. { 4, 0 }, // begin
  78. { 4, 3 }, // beginning
  79. { 4, 4 }, // begins
  80. { 4, 2 }, // begun
  81. { 3, 2 }, // bore
  82. { 3, 4 }, // born
  83. { 3, 3 }, // borne
  84. { 5, 0 }, // dance
  85. { 5, 1 }, // danced
  86. { 5, 2 }, // dances
  87. { 5, 3 }, // dancing
  88. { 6, 0 }, // heave
  89. { 6, 1 }, // heaved
  90. { 6, 3 }, // heaves
  91. { 6, 4 }, // heaving
  92. { 7, 0 }, // hero
  93. { 7, 1 }, // heroes
  94. { 6, 2 }, // hove
  95. { 8, 0 }, // keep
  96. { 8, 4 }, // keeping
  97. { 8, 1 }, // keeps
  98. { 8, 2 }, // kept
  99. { 9, 0 }, // misspell
  100. { 9, 1 }, // misspelled
  101. { 9, 3 }, // misspelling
  102. { 9, 4 }, // misspells
  103. { 9, 2 }, // misspelt
  104. { 10, 0 }, // plead
  105. { 10, 1 }, // pleaded
  106. { 10, 3 }, // pleading
  107. { 10, 4 }, // pleads
  108. { 10, 0 }, // pled
  109. { 11, 2 }, // ran
  110. { 11, 0 }, // run
  111. { 11, 3 }, // running
  112. { 11, 1 }, // runs
  113. { 12, 1 }, // swam
  114. { 12, 0 }, // swim
  115. { 12, 3 }, // swimming
  116. { 12, 4 }, // swims
  117. { 12, 2 }, // swum
  118. { 13, 2 }, // underlain
  119. { 13, 1 }, // underlay
  120. { 13, 0 }, // underlie
  121. { 13, 4 }, // underlies
  122. { 13, 3 }, // underlying
  123. };
  124. const ULONG cStemForms = ArraySize( aStemForms );
  125. const ULONG cMaxStemForms = 8;
  126. const WCHAR * aStems[][ cMaxStemForms ] =
  127. {
  128. { L"abide", L"abode", L"abided", L"abiding", L"abides" }, // 0
  129. { L"bat", L"bats", L"batted", L"batting" }, // 1
  130. { L"batch", L"batches", L"batched", L"batching" }, // 2
  131. { L"bear", L"bears", L"bore", L"borne", L"born" }, // 3
  132. { L"begin", L"began", L"begun", L"beginning", L"begins" }, // 4
  133. { L"dance", L"danced", L"dances", L"dancing" }, // 5
  134. { L"heave", L"heaved", L"hove", L"heaves", L"heaving" }, // 6
  135. { L"hero", L"heroes" }, // 7
  136. { L"keep", L"keeps", L"kept", L"keeping" }, // 8
  137. { L"misspell", L"misspelled", L"misspelt", L"misspelling",
  138. L"misspells" }, // 9
  139. { L"plead", L"pleaded", L"pled", L"pleading", L"pleads" }, // 10
  140. { L"run", L"runs", L"ran", L"running" }, // 11
  141. { L"swim", L"swam", L"swum", L"swimming", L"swims" }, // 12
  142. { L"underlie", L"underlay", L"underlain", L"underlying",
  143. L"underlies" }, // 13
  144. };
  145. int __cdecl StemCompare( const void *p1, const void *p2 )
  146. {
  147. SStemForm const * pForm = (SStemForm const *) p2;
  148. WCHAR const * pwcWord = (WCHAR const *) p1;
  149. return wcscmp( pwcWord, aStems[ pForm->iList ][ pForm->iForm ] );
  150. }
  151. #endif // SIMPLE_LIST_STEMMER
  152. //+-------------------------------------------------------------------------
  153. //
  154. // Function: IsWordChar
  155. //
  156. // Synopsis: Find whether the i'th character in the buffer _pwcChunk
  157. // is a word character (rather than word break)
  158. //
  159. // Arguments: [pwcChunk] -- Characters whose type information is checked
  160. // [i] -- Index of character to check
  161. // [pInfo1] -- Type 1 information
  162. // [pInfo3] -- Type 3 information
  163. //
  164. // Returns: TRUE if the character is a word character
  165. // FALSE if it's a word breaking character
  166. //
  167. //--------------------------------------------------------------------------
  168. __forceinline BOOL IsWordChar(
  169. WCHAR const * pwcChunk,
  170. int i,
  171. WORD const * pInfo1,
  172. WORD const * pInfo3 )
  173. {
  174. // Any alphabetic, digit, or non-spacing character is part of a word
  175. if ( ( 0 != ( pInfo1[i] & ( C1_ALPHA | C1_DIGIT ) ) ) ||
  176. ( 0 != ( pInfo3[i] & C3_NONSPACING ) ) )
  177. return TRUE;
  178. WCHAR c = pwcChunk[i];
  179. // Underscore is part of a word
  180. if ( L'_' == c )
  181. return TRUE;
  182. //
  183. // A non-breaking space followed by a non-spacing character should not
  184. // be a word breaker.
  185. //
  186. if ( 0xa0 == c ) // non breaking space
  187. {
  188. // followed by a non-spacing character (looking ahead is okay)
  189. if ( 0 != ( pInfo3[i+1] & C3_NONSPACING ) )
  190. return TRUE;
  191. }
  192. return FALSE;
  193. } //IsWordChar
  194. //+---------------------------------------------------------------------------
  195. //
  196. // Function: ScanChunk
  197. //
  198. // Synopsis: For each character find its type information flags
  199. //
  200. // Arguments: [pwcChunk] -- Characters whose type information is retrieved
  201. // [cwc] -- Number of characters to scan
  202. // [pInfo1] -- Type 1 information is written here
  203. // [pInfo3] -- Type 3 information is written here
  204. //
  205. // Returns: S_OK if successful or an error code
  206. //
  207. //----------------------------------------------------------------------------
  208. HRESULT ScanChunk(
  209. WCHAR const * pwcChunk,
  210. ULONG cwc,
  211. WORD * pInfo1,
  212. WORD * pInfo3 )
  213. {
  214. if ( !GetStringTypeW( CT_CTYPE1, // POSIX character typing
  215. pwcChunk, // Source
  216. cwc, // Size of source
  217. pInfo1 ) ) // Character info 1
  218. return HRESULT_FROM_WIN32( GetLastError() );
  219. if ( !GetStringTypeW( CT_CTYPE3, // Additional POSIX
  220. pwcChunk, // Source
  221. cwc, // Size of source
  222. pInfo3 ) ) // Character info 3
  223. return HRESULT_FROM_WIN32( GetLastError() );
  224. return S_OK;
  225. } //ScanChunk
  226. //+---------------------------------------------------------------------------
  227. //
  228. // Member: CSampleWordBreaker::Tokenize
  229. //
  230. // Synopsis: Break a block of text into individual words
  231. //
  232. // Arguments: [pTextSource] -- Source of characters to work on
  233. // [cwc] -- Number of characters to process
  234. // [pWordSink] -- Where to send the words found
  235. // [cwcProcessed] -- Returns the # of characters tokenized
  236. //
  237. // Returns: S_OK if successful or an error code
  238. //
  239. //----------------------------------------------------------------------------
  240. HRESULT CSampleWordBreaker::Tokenize(
  241. TEXT_SOURCE * pTextSource,
  242. ULONG cwc,
  243. IWordSink * pWordSink,
  244. ULONG & cwcProcessed )
  245. {
  246. // Leave space for one (unused) lookahead
  247. WORD aInfo1[ CSampleWordBreaker::cwcAtATime + 1 ];
  248. WORD aInfo3[ CSampleWordBreaker::cwcAtATime + 1 ];
  249. // Initialize this so we can go 1 beyond in IsWordChar()
  250. aInfo3 [ CSampleWordBreaker::cwcAtATime ] = C3_NONSPACING;
  251. // Get a pointer to the text we'll be working on
  252. const WCHAR * pwcChunk = &pTextSource->awcBuffer[ pTextSource->iCur ];
  253. HRESULT hr = ScanChunk( pwcChunk, cwc, aInfo1, aInfo3 );
  254. if ( FAILED( hr ) )
  255. return hr;
  256. BOOL fWordHasZWS = FALSE; // Does the current word have a 0-width-space?
  257. ULONG cwcZWS; // Length of word minus embedded 0-width-spaces
  258. //
  259. // iBeginWord is the offset into aInfoX of the beginning character of
  260. // a word. iCur is the first unprocessed character.
  261. // They are indexes into the current block (_pwcChunk).
  262. //
  263. ULONG iBeginWord = 0;
  264. ULONG iCur = 0;
  265. // Temp buffer for a word having zero-width space
  266. WCHAR awcBufZWS[ CSampleWordBreaker::cwcAtATime ];
  267. // Send words from the current block to word sink
  268. while ( iCur < cwc )
  269. {
  270. // Skip whitespace, punctuation, etc.
  271. for (; iCur < cwc; iCur++)
  272. if ( IsWordChar( pwcChunk, iCur, aInfo1, aInfo3 ) )
  273. break;
  274. // iCur points to a word char or is equal to cwc
  275. iBeginWord = iCur;
  276. if ( iCur < cwc )
  277. iCur++; // we knew it pointed at word character
  278. //
  279. // Find word break. Filter may output Unicode zero-width-space, which
  280. // should be ignored by the wordbreaker.
  281. //
  282. fWordHasZWS = FALSE;
  283. for ( ; iCur < cwc; iCur++ )
  284. {
  285. if ( !IsWordChar( pwcChunk, iCur, aInfo1, aInfo3 ) )
  286. {
  287. if ( ZERO_WIDTH_SPACE == pwcChunk[iCur] )
  288. fWordHasZWS = TRUE;
  289. else
  290. break;
  291. }
  292. }
  293. if ( fWordHasZWS )
  294. {
  295. // Copy word into awcBufZWS after stripping zero-width-spaces
  296. cwcZWS = 0;
  297. for ( ULONG i = iBeginWord; i < iCur; i++ )
  298. {
  299. if ( ZERO_WIDTH_SPACE != pwcChunk[i] )
  300. awcBufZWS[cwcZWS++] = pwcChunk[i];
  301. }
  302. }
  303. // iCur points to a non-word char or is equal to cwc
  304. if ( iCur < cwc )
  305. {
  306. // store the word and its source position
  307. if ( fWordHasZWS )
  308. hr = pWordSink->PutWord( cwcZWS,
  309. awcBufZWS, // stripped word
  310. iCur - iBeginWord,
  311. pTextSource->iCur + iBeginWord );
  312. else
  313. hr = pWordSink->PutWord( iCur - iBeginWord,
  314. pwcChunk + iBeginWord, // the word
  315. iCur - iBeginWord,
  316. pTextSource->iCur + iBeginWord );
  317. if ( FAILED( hr ) )
  318. return hr;
  319. iCur++; // we knew it pointed at non-word char
  320. iBeginWord = iCur; // in case we exit the loop now
  321. }
  322. } // next word
  323. // End of words in chunk.
  324. // iCur == cwc
  325. // iBeginWord points at beginning of word or == cwc
  326. if ( 0 == iBeginWord )
  327. {
  328. // A single word fills from beginning of this chunk
  329. // to the end. This is either a very long word or
  330. // a short word in a leftover buffer.
  331. // store the word and its source position
  332. if ( fWordHasZWS )
  333. hr = pWordSink->PutWord( cwcZWS,
  334. awcBufZWS, // stripped word
  335. iCur,
  336. pTextSource->iCur ); // its source pos.
  337. else
  338. hr = pWordSink->PutWord( iCur,
  339. pwcChunk, // the word
  340. iCur,
  341. pTextSource->iCur ); // its source pos.
  342. if ( FAILED( hr ) )
  343. return hr;
  344. // Position it to not add the word twice.
  345. iBeginWord = iCur;
  346. }
  347. //
  348. // If this is the last chunk from text source, then process the
  349. // last fragment.
  350. //
  351. if ( ( cwc < CSampleWordBreaker::cwcAtATime ) && ( iBeginWord != iCur ) )
  352. {
  353. // store the word and its source position
  354. if ( fWordHasZWS )
  355. hr = pWordSink->PutWord( cwcZWS,
  356. awcBufZWS, // stripped word
  357. iCur - iBeginWord,
  358. pTextSource->iCur + iBeginWord );
  359. else
  360. hr = pWordSink->PutWord( iCur - iBeginWord,
  361. pwcChunk + iBeginWord, // the word
  362. iCur - iBeginWord,
  363. pTextSource->iCur + iBeginWord );
  364. if ( FAILED( hr ) )
  365. return hr;
  366. iBeginWord = iCur;
  367. }
  368. cwcProcessed = iBeginWord;
  369. return S_OK;
  370. } //Tokenize
  371. //+---------------------------------------------------------------------------
  372. //
  373. // Member: CSampleWordBreaker::BreakText
  374. //
  375. // Synopsis: Break a block of text into individual words
  376. //
  377. // Arguments: [pTextSource] -- Source of characters to work on
  378. // [pWordSink] -- Where to send the words found
  379. // [pPhraseSink] -- Where to send the phrases found (not used)
  380. //
  381. // Returns: S_OK if successful or an error code
  382. //
  383. //----------------------------------------------------------------------------
  384. HRESULT STDMETHODCALLTYPE CSampleWordBreaker::BreakText(
  385. TEXT_SOURCE * pTextSource,
  386. IWordSink * pWordSink,
  387. IPhraseSink * pPhraseSink )
  388. {
  389. // Validate arguments
  390. if ( 0 == pTextSource )
  391. return E_INVALIDARG;
  392. if ( ( 0 == pWordSink ) || ( pTextSource->iCur == pTextSource->iEnd ) )
  393. return S_OK;
  394. if ( pTextSource->iCur > pTextSource->iEnd )
  395. return E_INVALIDARG;
  396. ULONG cwcProcessed; // # chars actually processed by Tokenize()
  397. HRESULT hr = S_OK;
  398. // Pull text from the text source and tokenize it
  399. do
  400. {
  401. BOOL fFirstTime = TRUE;
  402. while ( pTextSource->iCur < pTextSource->iEnd )
  403. {
  404. ULONG cwc = pTextSource->iEnd - pTextSource->iCur;
  405. // Process in buckets of cwcAtATime only
  406. if ( cwc >= CSampleWordBreaker::cwcAtATime )
  407. cwc = CSampleWordBreaker::cwcAtATime;
  408. else if ( !fFirstTime )
  409. break;
  410. hr = Tokenize( pTextSource, cwc, pWordSink, cwcProcessed );
  411. if ( FAILED( hr ) )
  412. return hr;
  413. pTextSource->iCur += cwcProcessed;
  414. fFirstTime = FALSE;
  415. }
  416. hr = pTextSource->pfnFillTextBuffer( pTextSource );
  417. } while ( SUCCEEDED( hr ) );
  418. //
  419. // If anything failed except for running out of text, report the error.
  420. // Otherwise, for cases like out of memory, files will not get retried or
  421. // reported as failures properly.
  422. //
  423. if ( ( FAILED( hr ) ) &&
  424. ( FILTER_E_NO_MORE_VALUES != hr ) &&
  425. ( FILTER_E_NO_TEXT != hr ) &&
  426. ( FILTER_E_NO_VALUES != hr ) &&
  427. ( FILTER_E_NO_MORE_TEXT != hr ) &&
  428. ( FILTER_E_END_OF_CHUNKS != hr ) &&
  429. ( WBREAK_E_END_OF_TEXT != hr ) )
  430. return hr;
  431. ULONG cwc = pTextSource->iEnd - pTextSource->iCur;
  432. if ( 0 == cwc )
  433. return S_OK;
  434. return Tokenize( pTextSource, cwc, pWordSink, cwcProcessed );
  435. } //BreakText
  436. //+---------------------------------------------------------------------------
  437. //
  438. // Member: CSampleStemmer::GenerateWordForms
  439. //
  440. // Synopsis: From the input word, emit the original and alternate forms
  441. // of the word.
  442. //
  443. // Arguments: [pwcInBuf] -- The original word to stem (not 0-terminated)
  444. // [cwc] -- Length in characters of the word
  445. // [pStemSink] -- Where to emit the stems
  446. //
  447. // Returns: S_OK if successful or an error code
  448. //
  449. //----------------------------------------------------------------------------
  450. HRESULT STDMETHODCALLTYPE CSampleStemmer::GenerateWordForms(
  451. WCHAR const * pwcInBuf,
  452. ULONG cwc,
  453. IWordFormSink * pWordFormSink )
  454. {
  455. // Validate the arguments
  456. if ( ( 0 == pwcInBuf ) || ( 0 == pWordFormSink ) )
  457. return E_INVALIDARG;
  458. HRESULT hr = S_OK;
  459. #ifdef PORTER_STEMMER
  460. //
  461. // If the word is small enough, attempt to get the stemmed form of the
  462. // word. Emit both forms if they are different. The Porter algorithm
  463. // does the opposite of what's required here, but doing the right thing
  464. // requires a lexicon.
  465. //
  466. if ( cwc < cwcMaxPorterWord )
  467. {
  468. // Make a temporary buffer for the word
  469. WCHAR awcPorter[ cwcMaxPorterWord ];
  470. CopyMemory( awcPorter, pwcInBuf, sizeof(WCHAR) * cwc );
  471. awcPorter[cwc] = 0;
  472. // Convert it to lowercase and save the original in lowercase
  473. CharLower( awcPorter );
  474. WCHAR awcOriginal[ cwcMaxPorterWord ];
  475. wcscpy( awcOriginal, awcPorter );
  476. // Get the stemmed form of the word
  477. GetPorterStemForm( awcPorter );
  478. // If it's different from the original, emit it.
  479. if ( wcscmp( awcOriginal, awcPorter ) )
  480. {
  481. hr = pWordFormSink->PutAltWord( awcPorter,
  482. wcslen( awcPorter ) );
  483. if ( FAILED( hr ) )
  484. return hr;
  485. }
  486. }
  487. #endif //PORTER_STEMMER
  488. #ifdef LEXICON_STEMMER
  489. //
  490. // If the word is small enough to work with the stemmer, attempt to get
  491. // various forms of the word.
  492. //
  493. if ( cwc < cbMaxStem )
  494. {
  495. //
  496. // Convert the original string to 8-bit characters. This is OK since
  497. // it's is an English stemmer that can safely assume such characters.
  498. //
  499. char acOriginal[ cbMaxStem ];
  500. for ( unsigned i = 0; i < cwc; i++ )
  501. acOriginal[ i ] = (char) pwcInBuf[ i ];
  502. acOriginal[ i ] = 0;
  503. // Enumerate all stem-sets that contain the word.
  504. unsigned iBmk = stemInvalid;
  505. unsigned iStemSet = stemInvalid;
  506. char ac[ cbMaxStem ];
  507. while ( g_pStem->FindStemSet( acOriginal, iBmk, iStemSet ) )
  508. {
  509. // Enumerate all forms of the stem-set, root first.
  510. CStemSet set( g_pStem->GetStemSetRoot(), iStemSet );
  511. unsigned iStemBmk = stemInvalid;
  512. while ( set.GetForm( ac, iStemBmk ) )
  513. {
  514. if ( strcmp( ac, acOriginal ) )
  515. {
  516. WCHAR awcForm[ cbMaxStem ];
  517. mbstowcs( awcForm, ac, -1 );
  518. hr = pWordFormSink->PutAltWord( awcForm,
  519. wcslen( awcForm ) );
  520. if ( FAILED( hr ) )
  521. return hr;
  522. }
  523. }
  524. }
  525. }
  526. #endif //LEXICON_STEMMER
  527. #ifdef SIMPLE_LIST_STEMMER
  528. // Look up the word in the simple list of stem forms
  529. SStemForm const * pStemForm = (SStemForm *) bsearch( pwcInBuf,
  530. aStemForms,
  531. cStemForms,
  532. sizeof SStemForm,
  533. StemCompare );
  534. if ( 0 != pStemForm )
  535. {
  536. // Found it, now iterate all the forms
  537. ULONG iList = pStemForm->iList;
  538. ULONG iForm = 0;
  539. while ( 0 != aStems[ iList ][ iForm ] )
  540. {
  541. WCHAR const * pwc = aStems[ iList ][ iForm ];
  542. // Don't emit the original word yet
  543. if ( 0 != wcscmp( pwc, pwcInBuf ) )
  544. {
  545. hr = pWordFormSink->PutAltWord( pwc,
  546. wcslen( pwc ) );
  547. if ( FAILED( hr ) )
  548. return hr;
  549. }
  550. iForm++;
  551. }
  552. }
  553. #endif //SIMPLE_LIST_STEMMER
  554. // Emit the original word
  555. return pWordFormSink->PutWord( pwcInBuf, cwc );
  556. } //StemWord
  557. //+-------------------------------------------------------------------------
  558. //
  559. // Method: CLanguageResourceSampleCF::CLanguageResourceSampleCF
  560. //
  561. // Synopsis: Language resource class factory constructor
  562. //
  563. //--------------------------------------------------------------------------
  564. CLanguageResourceSampleCF::CLanguageResourceSampleCF() :
  565. _lRefs( 1 )
  566. {
  567. InterlockedIncrement( &g_cInstances );
  568. } //CLanguageResourceSampleCF
  569. //+-------------------------------------------------------------------------
  570. //
  571. // Method: CLanguageResourceSampleCF::~CLanguageResourceSampleCF
  572. //
  573. // Synopsis: Language resource class factory destructor
  574. //
  575. //--------------------------------------------------------------------------
  576. CLanguageResourceSampleCF::~CLanguageResourceSampleCF()
  577. {
  578. InterlockedDecrement( &g_cInstances );
  579. } //~LanguageResourceSampleCF
  580. //+-------------------------------------------------------------------------
  581. //
  582. // Method: CLanguageResourceSampleCF::QueryInterface
  583. //
  584. // Synopsis: Rebind to the requested interface
  585. //
  586. // Arguments: [riid] -- IID of new interface
  587. // [ppvObject] -- New interface * returned here
  588. //
  589. // Returns: S_OK if bind succeeded, E_NOINTERFACE if bind failed
  590. //
  591. //--------------------------------------------------------------------------
  592. HRESULT STDMETHODCALLTYPE CLanguageResourceSampleCF::QueryInterface(
  593. REFIID riid,
  594. void ** ppvObject )
  595. {
  596. if ( IID_IClassFactory == riid )
  597. *ppvObject = (IUnknown *) (IClassFactory *) this;
  598. else if ( IID_IUnknown == riid )
  599. *ppvObject = (IUnknown *) (IPersist *) this;
  600. else
  601. {
  602. *ppvObject = 0;
  603. return E_NOINTERFACE;
  604. }
  605. AddRef();
  606. return S_OK;
  607. } //QueryInterface
  608. //+-------------------------------------------------------------------------
  609. //
  610. // Method: CLanguageResourceSampleCF::AddRef
  611. //
  612. // Synopsis: Increments the refcount
  613. //
  614. // Returns: The new refcount
  615. //
  616. //--------------------------------------------------------------------------
  617. ULONG STDMETHODCALLTYPE CLanguageResourceSampleCF::AddRef()
  618. {
  619. return InterlockedIncrement( &_lRefs );
  620. } //AddRef
  621. //+-------------------------------------------------------------------------
  622. //
  623. // Method: CLanguageResourceSampleCF::Release
  624. //
  625. // Synopsis: Decrement refcount. Delete self if necessary.
  626. //
  627. // Returns: The new refcount
  628. //
  629. //--------------------------------------------------------------------------
  630. ULONG STDMETHODCALLTYPE CLanguageResourceSampleCF::Release()
  631. {
  632. long lTmp = InterlockedDecrement( &_lRefs );
  633. if ( 0 == lTmp )
  634. delete this;
  635. return lTmp;
  636. } //Release
  637. //+-------------------------------------------------------------------------
  638. //
  639. // Method: CLanguageResourceSampleCF::CreateInstance
  640. //
  641. // Synopsis: Creates new Language Resource sample object
  642. //
  643. // Arguments: [pUnkOuter] -- 'Outer' IUnknown
  644. // [riid] -- Interface to bind
  645. // [ppvObject] -- Interface returned here
  646. //
  647. // Returns: S_OK if successful or an appropriate error code
  648. //
  649. //--------------------------------------------------------------------------
  650. HRESULT STDMETHODCALLTYPE CLanguageResourceSampleCF::CreateInstance(
  651. IUnknown * pUnkOuter,
  652. REFIID riid,
  653. void * * ppvObject )
  654. {
  655. *ppvObject = 0;
  656. if ( IID_IStemmer == riid )
  657. *ppvObject = new CSampleStemmer();
  658. else if ( IID_IWordBreaker == riid )
  659. *ppvObject = new CSampleWordBreaker();
  660. else
  661. return E_NOINTERFACE;
  662. if ( 0 == *ppvObject )
  663. return E_OUTOFMEMORY;
  664. return S_OK;
  665. } //CreateInstance
  666. //+-------------------------------------------------------------------------
  667. //
  668. // Method: CLanguageResourceSampleCF::LockServer
  669. //
  670. // Synopsis: Force class factory to remain loaded
  671. //
  672. // Arguments: [fLock] -- TRUE if locking, FALSE if unlocking
  673. //
  674. // Returns: S_OK
  675. //
  676. //--------------------------------------------------------------------------
  677. HRESULT STDMETHODCALLTYPE CLanguageResourceSampleCF::LockServer( BOOL fLock )
  678. {
  679. if ( fLock )
  680. InterlockedIncrement( &g_cInstances );
  681. else
  682. InterlockedDecrement( &g_cInstances );
  683. return S_OK;
  684. } //LockServer
  685. //+-------------------------------------------------------------------------
  686. //
  687. // Function: DllGetClassObject
  688. //
  689. // Synopsis: Ole DLL load class routine
  690. //
  691. // Arguments: [cid] -- Class to load
  692. // [iid] -- Interface to bind to on class object
  693. // [ppvObj] -- Interface pointer returned here
  694. //
  695. // Returns: Sample language resource class factory
  696. //
  697. //--------------------------------------------------------------------------
  698. extern "C" HRESULT STDMETHODCALLTYPE DllGetClassObject(
  699. REFCLSID cid,
  700. REFIID iid,
  701. void ** ppvObj )
  702. {
  703. IUnknown * pUnk = 0;
  704. *ppvObj = 0;
  705. if ( CLSID_SampleWordBreaker == cid ||
  706. CLSID_SampleStemmer == cid )
  707. {
  708. pUnk = new CLanguageResourceSampleCF();
  709. if ( 0 == pUnk )
  710. return E_OUTOFMEMORY;
  711. #ifdef LEXICON_STEMMER
  712. if ( 0 == g_pStem )
  713. g_pStem = MakeStemObject( g_hModule );
  714. if ( 0 == g_pStem )
  715. {
  716. pUnk->Release();
  717. return E_OUTOFMEMORY;
  718. }
  719. #endif //LEXICON_STEMMER
  720. }
  721. else
  722. {
  723. *ppvObj = 0;
  724. return E_NOINTERFACE;
  725. }
  726. HRESULT hr = pUnk->QueryInterface( iid, ppvObj );
  727. pUnk->Release();
  728. return hr;
  729. } //DllGetClassObject
  730. //+-------------------------------------------------------------------------
  731. //
  732. // Function: DllCanUnloadNow
  733. //
  734. // Synopsis: Notifies DLL to unload (cleanup global resources)
  735. //
  736. // Returns: S_OK if it is acceptable for caller to unload DLL.
  737. // S_FALSE otherwise.
  738. //
  739. //--------------------------------------------------------------------------
  740. extern "C" HRESULT STDMETHODCALLTYPE DllCanUnloadNow( void )
  741. {
  742. if ( 0 == g_cInstances )
  743. return S_OK;
  744. return S_FALSE;
  745. } //DllCanUnloadNow
  746. //+-------------------------------------------------------------------------
  747. //
  748. // Function: DllMain
  749. //
  750. // Synopsis: Standard main entry point for the module.
  751. //
  752. //--------------------------------------------------------------------------
  753. BOOL WINAPI DllMain(
  754. HANDLE hInstance,
  755. DWORD dwReason,
  756. void * lpReserved )
  757. {
  758. if ( DLL_PROCESS_ATTACH == dwReason )
  759. {
  760. g_hModule = (HMODULE) hInstance;
  761. DisableThreadLibraryCalls( (HINSTANCE) hInstance );
  762. }
  763. return TRUE;
  764. } //DllMain
  765. SLangRegistry const English_Sample_LangRes =
  766. {
  767. L"English_Sample", MAKELANGID( LANG_ENGLISH, SUBLANG_ENGLISH_SAMPLE ),
  768. { L"{d225281a-7ca9-4a46-ae7d-c63a9d4815d4}",
  769. L"English_Sample Word Breaker",
  770. L"lrsample.dll",
  771. L"both" },
  772. { L"{0a275611-aa4d-4b39-8290-4baf77703f55}",
  773. L"English_Sample Stemmer",
  774. L"lrsample.dll",
  775. L"both" }
  776. };
  777. //+-------------------------------------------------------------------------
  778. //
  779. // Method: DllRegisterServer
  780. //
  781. // Synopsis: Registers the language resources in the registry
  782. //
  783. //--------------------------------------------------------------------------
  784. STDAPI DllRegisterServer()
  785. {
  786. return RegisterALanguageResource( English_Sample_LangRes );
  787. } //DllRegisterServer
  788. //+-------------------------------------------------------------------------
  789. //
  790. // Method: DllUnregisterServer
  791. //
  792. // Synopsis: Removes the language resources from the registry
  793. //
  794. //--------------------------------------------------------------------------
  795. STDAPI DllUnregisterServer()
  796. {
  797. return UnRegisterALanguageResource( English_Sample_LangRes );
  798. } //DllUnregisterServer