Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

989 lines
30 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 2000.
  5. //
  6. // File: LANG.CXX
  7. //
  8. // Contents: Language Support
  9. //
  10. // Classes: CLanguage
  11. // CLangList
  12. //
  13. // History: 02-May-91 BartoszM Created
  14. //
  15. // Notes: The filtering pipeline is hidden in the Data Repository
  16. // object which serves as a sink for the filter.
  17. // The sink for the Data Repository is the Key Repository.
  18. // The language dependent part of the pipeline
  19. // is obtained from the Language List object and is called
  20. // Key Maker. It consists of:
  21. //
  22. // Word Breaker
  23. // Stemmer (optional)
  24. // Normalizer
  25. // Noise List
  26. //
  27. // Each object serves as a sink for its predecessor,
  28. // Key Repository is the final sink.
  29. //----------------------------------------------------------------------------
  30. #include <pch.cxx>
  31. #pragma hdrstop
  32. #include <tfilt.hxx>
  33. #include <tsource.hxx>
  34. #include <defbreak.hxx>
  35. #include <lang.hxx>
  36. #include <keymak.hxx>
  37. #include <norm.hxx>
  38. #include <noise.hxx>
  39. #include <ciregkey.hxx>
  40. #define DEB_LLIST DEB_USER10
  41. //+-------------------------------------------------------------------------
  42. //
  43. // Method: CLangList::CLangList, public
  44. //
  45. // Synopsis: Create all languages.
  46. //
  47. // Arguments: [pICiCLangRes] -- Client-provided language creator
  48. // [ulMaxIdle] -- Max time (in seconds) before idle language
  49. // object is elegible for deletion.
  50. //
  51. // History: 02-May-91 BartoszM Created
  52. // 14-Jul-94 SitaramR Moved constructor here from lang.hxx
  53. //
  54. //--------------------------------------------------------------------------
  55. CLangList::CLangList( ICiCLangRes * pICiCLangRes,
  56. ULONG ulMaxIdle )
  57. : _xICiCLangRes(pICiCLangRes),
  58. _ulMaxIdle( ulMaxIdle * 1000 )
  59. {
  60. _xICiCLangRes->AddRef();
  61. }
  62. //+-------------------------------------------------------------------------
  63. //
  64. // Method: CLangList::~CLangList, public
  65. //
  66. // Synopsis: Delete all languages.
  67. //
  68. // History: 27-Apr-1994 KyleP Created
  69. //
  70. //--------------------------------------------------------------------------
  71. CLangList::~CLangList()
  72. {
  73. Shutdown();
  74. }
  75. //+-------------------------------------------------------------------------
  76. //
  77. // Method: CLangList::Shutdown, public
  78. //
  79. // Synopsis: Delete all languages.
  80. //
  81. // History: 2-July-1996 dlee Moved from the destructor
  82. //
  83. //--------------------------------------------------------------------------
  84. void CLangList::Shutdown()
  85. {
  86. for ( CLanguage *pLang = _langsAvailable.Pop();
  87. 0 != pLang;
  88. pLang = _langsAvailable.Pop() )
  89. {
  90. delete pLang;
  91. }
  92. } //Shutdown
  93. //+-------------------------------------------------------------------------
  94. //
  95. // Method: CLangList::Supports, public
  96. //
  97. // Synopsis: Determines if language object is suitable for lcid/pid
  98. //
  99. // Arguments: [pLang] -- Language object
  100. // [pid] -- PROPID to-be-used by [pLang]
  101. // [lcid] -- Locale to-be-used by [pLang]
  102. //
  103. // Returns: TRUE is [pLang] can be used to break/stem/etc. the
  104. // locale/property specified by [lcid]/[pid]
  105. //
  106. // History: 05-Jan-1998 KyleP Created
  107. //
  108. //--------------------------------------------------------------------------
  109. BOOL CLangList::Supports( CLanguage const * pLang, PROPID pid, LCID lcid )
  110. {
  111. ciDebugOut(( DEB_LLIST, "Supports, lcid %#x, pid %#x\n", lcid, pid ));
  112. LANGID langId = LANGIDFROMLCID(lcid);
  113. //
  114. // Easy case: Different language.
  115. //
  116. if ( !pLang->IsLocale( langId ) )
  117. {
  118. ciDebugOut(( DEB_LLIST, " supports: lcid doesn't match\n" ));
  119. return FALSE;
  120. }
  121. //
  122. // Easy case: Everything matches.
  123. //
  124. if ( pLang->IsPid( pid ) )
  125. return TRUE;
  126. CLangPidStateInfo stateInfo;
  127. if ( pLang->IsPid( CI_DEFAULT_PID ) )
  128. {
  129. //
  130. // Hard case: Mismatch, but possible default match to previously
  131. // seen pid.
  132. //
  133. if ( _pidHash.LokLookupOrAddLang( langId, stateInfo ) &&
  134. _pidHash.LokIsUseDefaultPid( pid, stateInfo.GetLangIndex() ) )
  135. {
  136. ciDebugOut(( DEB_LLIST, "CLangList::Supports -- Pid 0x%x can use current [default] language object\n", pid ));
  137. return TRUE;
  138. }
  139. //
  140. // Hardest case: Mismatch, but possible default match to brand
  141. // new pid.
  142. //
  143. CLanguage * pNewLang = FindLangAndActivate( langId, pid );
  144. if ( 0 != pNewLang )
  145. {
  146. //
  147. // Obviously not a default match if there is already a specific
  148. // language created. Note that extra work searching the list
  149. // in FindLangAndActivate is not wasted, as the ReturnLang below
  150. // will place the activated language on the top of the list for
  151. // easy access when the call is soon made to fetch the new
  152. // language object supporting this pid/locale.
  153. //
  154. ReturnLang( pNewLang );
  155. ciDebugOut(( DEB_LLIST, " supports found it, but returning FALSE\n" ));
  156. return FALSE;
  157. }
  158. pNewLang = CreateLang( langId, pid, stateInfo, pLang );
  159. if ( 0 == pNewLang )
  160. {
  161. ciDebugOut(( DEB_LLIST, "CLangList::Supports -- New pid 0x%x can use current [default] language object\n", pid ));
  162. Win4Assert( pLang->IsPid( CI_DEFAULT_PID ) ); // May be a bogus assert...
  163. return TRUE;
  164. }
  165. else
  166. {
  167. ciDebugOut(( DEB_LLIST, "CLangList::Supports -- New pid 0x%x cannot use current language object\n", pid ));
  168. ReturnLang( pNewLang ); // This one should get used in just a few calls...
  169. return FALSE;
  170. }
  171. }
  172. return FALSE;
  173. } //Supports
  174. //+---------------------------------------------------------------------------
  175. //
  176. // Member: CLangList::BorrowLang
  177. //
  178. // Synopsis: Borrows a language object
  179. //
  180. // Arguments: [locale] -- Locale
  181. // [pid] -- property id
  182. // [resources] -- Which resources to load.
  183. //
  184. // History: 19-Sep-91 BartoszM Created original GetLang.
  185. // 15-Aug-94 SitaramR Renamed GetLang to BorrowLang and
  186. // added code to look up registry.
  187. // 2-14-97 mohamedn use ICiCLangRes, use lang specific
  188. // default pid cache.
  189. //
  190. //----------------------------------------------------------------------------
  191. CLanguage* CLangList::BorrowLang( LCID locale, PROPID pid, ULONG resources )
  192. {
  193. LANGID langId = LANGIDFROMLCID(locale);
  194. ciDebugOut(( DEB_LLIST, "BorrowLang lang %#x, pid %#x, resources %#x\n",
  195. locale, pid, resources ));
  196. CLanguage * pLang = FindLangAndActivate( langId, pid );
  197. if ( 0 != pLang )
  198. return pLang;
  199. //==========================================================
  200. {
  201. //
  202. // We have to create a new language object. Serialize so that
  203. // multiple threads are not creating simultaneously.
  204. //
  205. CLock lockCreat( _mtxCreate );
  206. //
  207. // lookup the given pid if a default pid hash table exist
  208. // for the given LangID.
  209. // If pid found in default pid cache, use CI_DEFAULT_PID
  210. //
  211. CLangPidStateInfo stateInfo;
  212. if ( _pidHash.LokLookupOrAddLang( langId, stateInfo ) )
  213. {
  214. if ( _pidHash.LokIsUseDefaultPid( pid, stateInfo.GetLangIndex() ) )
  215. pid = CI_DEFAULT_PID;
  216. }
  217. // Check to see if one became available while we were waiting.
  218. pLang = FindLangAndActivate( langId, pid );
  219. if ( 0 != pLang )
  220. return pLang;
  221. // Create a new CLanguage object
  222. pLang = CreateLang( langId, pid, stateInfo, 0, resources );
  223. }
  224. //==========================================================
  225. Win4Assert( pLang );
  226. return pLang;
  227. } //BorrowLang
  228. //+---------------------------------------------------------------------------
  229. //
  230. // Member: CLangList::CreateLang
  231. //
  232. // Synopsis: Creates & Returns a language object
  233. //
  234. // Arguments: [langId] -- Locale
  235. // [pid] -- property id
  236. // [stateInfo] -- stateInfo to set internal state info.
  237. // [pDup] -- Don't create a language just like this.
  238. // Instead, return this one.
  239. // [resources] -- Which to load.
  240. //
  241. // Returns: [pLang] -- a valid pointer to CLanguage object.
  242. //
  243. // History: 19-Sep-91 BartoszM Created original GetLang.
  244. // 15-Aug-94 SitaramR Renamed GetLang to BorrowLang and
  245. // added code to look up registry.
  246. // 2-27-97 mohamedn use ICiCLangRes,
  247. // use lang specific default pid cache.
  248. //
  249. //----------------------------------------------------------------------------
  250. CLanguage * CLangList::CreateLang( LANGID langId,
  251. PROPID pid,
  252. CLangPidStateInfo & stateInfo,
  253. CLanguage const * pDup,
  254. ULONG resources )
  255. {
  256. ciDebugOut(( DEB_LLIST, "CreateLang lang %#x, pid %#x, resources %#x\n",
  257. langId, pid, resources ));
  258. ULONG pidFlags = 0;
  259. if ( LoadWB( resources ) )
  260. pidFlags |= USE_WB_DEFAULT_PID;
  261. if ( LoadST( resources ) )
  262. pidFlags |= USE_STEMMER_DEFAULT_PID;
  263. if ( LoadNO( resources ) )
  264. pidFlags |= USE_NWL_DEFAULT_PID;
  265. //
  266. // Get interfaces to the wordbreaker, stemmer and noise word list
  267. // via ICiCLangRes
  268. //
  269. XInterface<IWordBreaker> xIWBreak;
  270. XInterface<IStemmer> xIStemmer;
  271. XInterface<IStream> xIStrmNoiseFile;
  272. if ( pid == CI_DEFAULT_PID )
  273. {
  274. // create default word breaker, stemmer & nwl
  275. if ( LoadWB( resources ) )
  276. xIWBreak.Set( GetWordBreaker ( langId, pid, stateInfo, TRUE ) );
  277. if ( LoadST( resources ) )
  278. xIStemmer.Set( GetStemmer ( langId, pid, stateInfo ) );
  279. if ( LoadNO( resources ) )
  280. xIStrmNoiseFile.Set( GetNoiseWordList ( langId, pid, stateInfo ) );
  281. }
  282. else
  283. {
  284. // try to create wb, stemmer, and nwl using this pid (non-default)
  285. if ( LoadWB( resources ) )
  286. xIWBreak.Set( GetWordBreaker( langId, pid, stateInfo, FALSE ) );
  287. if ( LoadST( resources ) )
  288. xIStemmer.Set( GetStemmer( langId, pid, stateInfo ) );
  289. if ( LoadNO( resources ) )
  290. xIStrmNoiseFile.Set( GetNoiseWordList( langId, pid, stateInfo ) );
  291. ciDebugOut(( DEB_LLIST, " GetPidFlags: %#x\n", stateInfo.GetPidFlags() ));
  292. if ( stateInfo.GetPidFlags() == pidFlags )
  293. {
  294. // Client requested to use DEFAULT_PID:
  295. // add pid to the default pid cache for this langid,
  296. // scan availble lang objects for a match, and return it if
  297. // found else create default wb, stemmer, and nwl.
  298. Win4Assert ( xIWBreak.IsNull() );
  299. Win4Assert ( xIStemmer.IsNull() );
  300. Win4Assert ( xIStrmNoiseFile.IsNull() );
  301. _pidHash.LokAddDefaultPid( pid, stateInfo.GetLangIndex() );
  302. pid = CI_DEFAULT_PID;
  303. if ( 0 != pDup && pDup->IsLocale( langId ) && pDup->IsPid( pid ) )
  304. return 0;
  305. CLanguage * pLang = FindLangAndActivate( langId, pid );
  306. if ( 0 != pLang )
  307. return pLang;
  308. if ( LoadWB( resources ) )
  309. xIWBreak.Set( GetWordBreaker( langId, pid, stateInfo, TRUE ) );
  310. if ( LoadST( resources ) )
  311. xIStemmer.Set( GetStemmer( langId, pid, stateInfo ) );
  312. if ( LoadNO( resources ) )
  313. xIStrmNoiseFile.Set( GetNoiseWordList( langId, pid, stateInfo ) );
  314. }
  315. else
  316. {
  317. // Client didn't request default pid for all, create default wb, stemmer or nwl
  318. // only if client requested using default pid for it.
  319. if ( stateInfo.IsPidFlagSet( USE_WB_DEFAULT_PID ) )
  320. {
  321. if ( LoadWB( resources ) )
  322. xIWBreak.Set( GetWordBreaker( langId, CI_DEFAULT_PID, stateInfo, TRUE ) );
  323. }
  324. else
  325. Win4Assert ( !xIWBreak.IsNull() );
  326. if ( stateInfo.IsPidFlagSet( USE_STEMMER_DEFAULT_PID ) )
  327. {
  328. if ( LoadST( resources ) )
  329. xIStemmer.Set( GetStemmer( langId, CI_DEFAULT_PID, stateInfo ) );
  330. }
  331. if ( stateInfo.IsPidFlagSet( USE_NWL_DEFAULT_PID ) )
  332. {
  333. if ( LoadNO( resources ) )
  334. xIStrmNoiseFile.Set( GetNoiseWordList ( langId, CI_DEFAULT_PID, stateInfo ) );
  335. }
  336. }
  337. }
  338. // create a language object given the wb, stemmer & nwl.
  339. CLanguage * pLang = new CLanguage( langId,
  340. pid,
  341. xIWBreak,
  342. xIStemmer,
  343. xIStrmNoiseFile );
  344. // Queue can't fail, so no smart pointer for pLang is needed
  345. //------------------------------------------------------
  346. {
  347. CLock lock( _mtxList );
  348. _langsInUse.Queue( pLang );
  349. }
  350. //------------------------------------------------------
  351. return pLang;
  352. } //CreateLang
  353. //+---------------------------------------------------------------------------
  354. //
  355. // Member: CLangList::GetWordBreaker, private
  356. //
  357. // Synopsis: gets a word breaker interface given a langid and a pid
  358. //
  359. // Arguments: [langid] -- langid
  360. // [pid] -- property id
  361. // [stateInfo] -- stateInfo to set internal state info.
  362. // [fCreateDefault]-- flag to create default word breaker if TRUE
  363. //
  364. // Returns: IWordBreaker interface upon success, throws upon failure.
  365. //
  366. // History: 2-27-97 MohamedN Created (from borrowlang)
  367. //
  368. //----------------------------------------------------------------------------
  369. IWordBreaker * CLangList::GetWordBreaker( LANGID langid,
  370. PROPID pid,
  371. CLangPidStateInfo & stateInfo,
  372. BOOL fCreateDefault )
  373. {
  374. IWordBreaker * pIWordBreaker = 0;
  375. ciDebugOut(( DEB_LLIST, "!!! Actually creating a wordbreaker\n" ));
  376. SCODE sc = _xICiCLangRes->GetWordBreaker( langid, pid, &pIWordBreaker );
  377. if ( SUCCEEDED(sc) )
  378. {
  379. Win4Assert( 0 != pIWordBreaker );
  380. }
  381. else
  382. {
  383. switch (sc)
  384. {
  385. case CI_E_NOT_FOUND:
  386. if ( fCreateDefault )
  387. {
  388. ciDebugOut(( DEB_ERROR,"Using default word breaker for locale 0x%x\n",
  389. langid ));
  390. pIWordBreaker = new CDefWordBreaker();
  391. }
  392. // force fall thru
  393. case CI_E_USE_DEFAULT_PID:
  394. stateInfo.SetPidFlags( USE_WB_DEFAULT_PID );
  395. break;
  396. default:
  397. ciDebugOut(( DEB_ERROR, "GetWordBreaker Failed(locale: %x,pid: %x): sc: %x\n",
  398. langid, pid, sc ));
  399. THROW( CException( sc ) );
  400. } // switch
  401. } // else
  402. return pIWordBreaker;
  403. } //GetWordBreaker
  404. //+---------------------------------------------------------------------------
  405. //
  406. // Member: CLangList::GetStemmer, private
  407. //
  408. // Synopsis: gets a stemmer interface given a langid and a pid
  409. //
  410. // Arguments: [langid] -- langid
  411. // [pid] -- property id
  412. // [stateInfo] -- stateInfo to set internal state info.
  413. //
  414. // Returns: IStemmer interface upon success, null or throws upon failure.
  415. //
  416. // History: 2-27-97 MohamedN Created (from borrowlang)
  417. //
  418. //----------------------------------------------------------------------------
  419. IStemmer * CLangList::GetStemmer( LANGID langid,
  420. PROPID pid,
  421. CLangPidStateInfo & stateInfo )
  422. {
  423. SCODE sc = S_OK;
  424. IStemmer * pIStemmer = 0;
  425. sc = _xICiCLangRes->GetStemmer( langid, pid, &pIStemmer );
  426. if ( FAILED(sc) )
  427. {
  428. switch (sc)
  429. {
  430. case CI_E_NOT_FOUND:
  431. ciDebugOut(( DEB_ITRACE,"no stemmer found for locale 0x%x\n",
  432. langid ));
  433. break;
  434. case CI_E_USE_DEFAULT_PID:
  435. stateInfo.SetPidFlags( USE_STEMMER_DEFAULT_PID );
  436. break;
  437. default:
  438. ciDebugOut(( DEB_ERROR, "GetStemmer Failed(locale: %x,pid: %x): sc: %x\n",
  439. langid,pid, sc ));
  440. THROW( CException(sc) );
  441. } // switch
  442. } // else
  443. return pIStemmer;
  444. } // GetStemmer
  445. //+---------------------------------------------------------------------------
  446. //
  447. // Member: CLangList::GetNoiseWordList, private
  448. //
  449. // Synopsis: gets an IStream pointer to the noise word list, given a langid & locale.
  450. //
  451. // Arguments: [langid] -- langid
  452. // [pid] -- property id
  453. // [stateInfo] -- stateInfo to set internal state info.
  454. //
  455. // Returns: IStream interface upon success, null or throws upon failure.
  456. //
  457. // History: 2-27-97 MohamedN Created (from borrowlang)
  458. //
  459. //----------------------------------------------------------------------------
  460. IStream * CLangList::GetNoiseWordList( LANGID langid,
  461. PROPID pid,
  462. CLangPidStateInfo & stateInfo )
  463. {
  464. SCODE sc = S_OK;
  465. IStream * pIStream = 0;
  466. sc = _xICiCLangRes->GetNoiseWordList( langid, pid, &pIStream );
  467. if ( FAILED(sc) )
  468. {
  469. switch (sc)
  470. {
  471. case CI_E_NOT_FOUND:
  472. ciDebugOut(( DEB_ITRACE,"no NoiseWordList found for locale 0x%x\n",
  473. langid ));
  474. break;
  475. case CI_E_USE_DEFAULT_PID:
  476. stateInfo.SetPidFlags( USE_NWL_DEFAULT_PID );
  477. break;
  478. default:
  479. ciDebugOut(( DEB_ERROR, "GetNoiseWordList Failed(locale: %x,pid: %x): sc: %x\n",
  480. langid, pid, sc ));
  481. THROW( CException(sc) );
  482. } // switch
  483. } // else
  484. return pIStream;
  485. } //GetNoiseWordList
  486. //+---------------------------------------------------------------------------
  487. //
  488. // Member: CLangList::FindLangAndActivate, private
  489. //
  490. // Synopsis: If a language with the given locale exits, then return the
  491. // language after making it active
  492. //
  493. // Arguments: [locale] -- Locale
  494. // [pid] -- property id
  495. //
  496. // Notes:
  497. //
  498. // History: 14-Sep-94 SitaramR Created
  499. //
  500. //----------------------------------------------------------------------------
  501. CLanguage *CLangList::FindLangAndActivate( LCID locale, PROPID pid )
  502. {
  503. ciDebugOut(( DEB_LLIST, "FindLangAndActivate lcid %#x, pid %#x\n",
  504. locale, pid ));
  505. ULONG dwTick = GetTickCount();
  506. CLock lock( _mtxList );
  507. CLanguage *pLang = 0;
  508. CLangIter iter( _langsAvailable );
  509. while ( !_langsAvailable.AtEnd(iter) )
  510. {
  511. ciDebugOut(( DEB_LLIST, " looking for match, lcid %#x, iter->IsPid %d\n",
  512. iter->Locale(), iter->IsPid(pid) ));
  513. if ( iter->IsLocale(locale) && iter->IsPid(pid) )
  514. {
  515. pLang = iter.GetLang();
  516. _langsAvailable.Advance(iter);
  517. // move from Available list to InUse list
  518. pLang->Unlink();
  519. _langsInUse.Queue( pLang );
  520. //
  521. // Check one beyond, just to make some progress removing extra copies.
  522. //
  523. if ( !_langsAvailable.AtEnd(iter) &&
  524. (dwTick - iter->LastUsed()) > _ulMaxIdle )
  525. {
  526. CLanguage *pLangTemp = iter.GetLang();
  527. _langsAvailable.Advance(iter);
  528. pLangTemp->Unlink();
  529. delete pLangTemp;
  530. }
  531. break;
  532. }
  533. //
  534. // Is it idle? Ignore overflow. It just means we delete too early
  535. // once every few days.
  536. //
  537. if ( (dwTick - iter->LastUsed()) > _ulMaxIdle )
  538. {
  539. ciDebugOut(( DEB_LLIST, "deleting idle language object\n" ));
  540. pLang = iter.GetLang();
  541. _langsAvailable.Advance(iter);
  542. pLang->Unlink();
  543. delete pLang;
  544. pLang = 0;
  545. }
  546. else
  547. _langsAvailable.Advance(iter);
  548. }
  549. ciDebugOut(( DEB_LLIST, " FindLangAndActivate returning %p\n", pLang ));
  550. return pLang;
  551. } //FindLangAndActivate
  552. //+---------------------------------------------------------------------------
  553. //
  554. // Member: CLangList::ReturnLang, public
  555. //
  556. // Synopsis: Returns a Language
  557. //
  558. // Arguments: [pLang] -- language to be returned
  559. //
  560. // History: 15-Aug-94 SitaramR Created.
  561. //
  562. //----------------------------------------------------------------------------
  563. void CLangList::ReturnLang( CLanguage *pLang )
  564. {
  565. ULONG dwTick = GetTickCount();
  566. CLock lock( _mtxList );
  567. Win4Assert( pLang != 0 );
  568. if ( pLang->IsZombie() )
  569. delete pLang;
  570. else
  571. {
  572. // Move from InUse list to Available list. Put it at the front of
  573. // the list so we don't cycle through the cached languages.
  574. pLang->Unlink();
  575. pLang->SetLastUsed( dwTick );
  576. _langsAvailable.Push( pLang );
  577. }
  578. } //ReturnLang
  579. //+---------------------------------------------------------------------------
  580. //
  581. // Member: CLangList::InvalidateLangResources, public
  582. //
  583. // Synopsis: Delete all language objects so that new language objects
  584. // can be demand loaded from registry.
  585. //
  586. // History: 15-Aug-94 SitaramR Created.
  587. //
  588. //----------------------------------------------------------------------------
  589. void CLangList::InvalidateLangResources()
  590. {
  591. CLock lock( _mtxList );
  592. ciDebugOut(( DEB_LLIST, "InvalidateLangResources\n" ));
  593. for ( CLanguage *pLang = _langsAvailable.Pop();
  594. 0 != pLang;
  595. pLang = _langsAvailable.Pop() )
  596. {
  597. delete pLang;
  598. }
  599. for ( pLang = _langsInUse.Pop();
  600. pLang;
  601. pLang = _langsInUse.Pop() )
  602. {
  603. pLang->Zombify(); // because language is still in use
  604. }
  605. } //InvalidateLangResources
  606. //+---------------------------------------------------------------------------
  607. //
  608. // Member: CDefaultPidHash::LokLookupOrAddLang
  609. //
  610. // Synopsis: Sets internal index to the position of langId
  611. // if langId is found, else creates a langId and
  612. // a corresponding default pid hash table.
  613. //
  614. // Arguments: [langid] -- langid
  615. // [stateInfo] -- to set internal langId specific index value.
  616. //
  617. // Returns: TRUE if lang is found
  618. // FALSE if lang is not found
  619. //
  620. // History: 2-27-97 MohamedN Created
  621. //
  622. //----------------------------------------------------------------------------
  623. BOOL CDefaultPidHash::LokLookupOrAddLang( LANGID langId, CLangPidStateInfo & stateInfo )
  624. {
  625. BOOL fLangIdFound = FALSE;
  626. unsigned i;
  627. //
  628. // find whether langId is in _aLangId Table
  629. //
  630. for ( i = 0; i < _langIdCount ; i++ )
  631. {
  632. if ( _aLangId[i] == langId )
  633. {
  634. fLangIdFound = TRUE;
  635. break;
  636. }
  637. }
  638. //
  639. // if _aLangId is not found in _aLangID table,
  640. // add it, and create a corresponding hash table for it.
  641. //
  642. if ( !fLangIdFound )
  643. {
  644. BOOL fAddedLangId = FALSE;
  645. TRY
  646. {
  647. _aLangId.Add( langId, i );
  648. fAddedLangId = TRUE;
  649. XPtr<CPidHash> xPidHash( new CPidHash( INIT_PID_HASH_TABLE_SIZE ) );
  650. _aHashPidTables.Add( xPidHash.GetPointer(), i );
  651. xPidHash.Acquire();
  652. _langIdCount++;
  653. }
  654. CATCH( CException, e )
  655. {
  656. if ( fAddedLangId )
  657. _aLangId.Remove( i );
  658. RETHROW();
  659. }
  660. END_CATCH
  661. }
  662. //
  663. // store position of langId in our state object (found or created).
  664. //
  665. stateInfo.SetLangIndex(i);
  666. return fLangIdFound;
  667. } //LokLookupOrAddLang
  668. //+---------------------------------------------------------------------------
  669. //
  670. // Member: CDefaultPidHash::LokIsUseDefaultPid
  671. //
  672. // Synopsis: Determines if the passed-in pid belongs to the
  673. // default pid group.
  674. //
  675. // Arguments: [pid] -- property id
  676. // [index] -- position of langid cache table
  677. //
  678. // Returns: TRUE if pid is a member of the default pid group
  679. // FALSE if pid is not a member of the default pid group
  680. //
  681. // History: 2-27-97 MohamedN Created
  682. //
  683. //----------------------------------------------------------------------------
  684. BOOL CDefaultPidHash::LokIsUseDefaultPid( PROPID pid, unsigned index )
  685. {
  686. CWidHashEntry entry(pid);
  687. if ( _aHashPidTables[index] )
  688. return _aHashPidTables[index]->LookUpWorkId( entry );
  689. Win4Assert( !"invalid _aHashPidTables[index]" );
  690. return FALSE;
  691. }
  692. //+---------------------------------------------------------------------------
  693. //
  694. // Member: CDefaultPidHash::LokAddDefaultPid
  695. //
  696. // Synopsis: Inserts pid into the default pid hash table for a given langId
  697. //
  698. // Arguments: [pid] -- property id
  699. // [index] -- position of langid cache table
  700. //
  701. // Returns: none
  702. //
  703. // History: 2-27-97 MohamedN Created
  704. //----------------------------------------------------------------------------
  705. void CDefaultPidHash::LokAddDefaultPid( PROPID pid, unsigned index )
  706. {
  707. CWidHashEntry entry(pid);
  708. Win4Assert( _aHashPidTables[index] );
  709. _aHashPidTables[index]->AddEntry(entry);
  710. }
  711. //+---------------------------------------------------------------------------
  712. //
  713. // Member: CLanguage::CLanguage
  714. //
  715. // Synopsis: Finds language information
  716. //
  717. // History: 16-Jul-91 BartoszM Created.
  718. // 15-Aug-94 SitaramR Changed constructor to take
  719. // wordbreaker and noisefile.
  720. //
  721. //----------------------------------------------------------------------------
  722. #define NOISE_SIZE 257
  723. CLanguage::CLanguage( LCID locale,
  724. PROPID pid,
  725. XInterface<IWordBreaker> & xWBreak,
  726. XInterface<IStemmer> & xStemmer,
  727. XInterface<IStream> & xIStrmNoiseFile )
  728. : _locale( locale ),
  729. _pid( pid ),
  730. _xWBreak( xWBreak.Acquire() ),
  731. _xStemmer( xStemmer.Acquire() ),
  732. _xIStrmNoiseFile( xIStrmNoiseFile.Acquire() ),
  733. _zombie( FALSE )
  734. {
  735. ciDebugOut(( DEB_LLIST, "CLanguage, locale %#x, pid %#x\n", locale, pid ));
  736. //
  737. // Set up for filtering noise word list. This will always use the
  738. // default filter. We don't go through CFilterDriver, because that
  739. // performs too much extra work: Ole binding, property filtering, etc.
  740. //
  741. if ( !_xIStrmNoiseFile.IsNull() )
  742. {
  743. XInterface<CTextIFilter> xTextIFilter( new CTextIFilter );
  744. SCODE sc = xTextIFilter->Load( _xIStrmNoiseFile.GetPointer() );
  745. if ( FAILED(sc) )
  746. {
  747. ciDebugOut(( DEB_ERROR, "Filter of pIStrmNoiseFile(%x) returned 0x%x\n",
  748. _xIStrmNoiseFile.GetPointer(), sc ));
  749. }
  750. else
  751. {
  752. ULONG fBulkyObject;
  753. sc = xTextIFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS |
  754. IFILTER_INIT_CANON_HYPHENS |
  755. IFILTER_INIT_CANON_SPACES |
  756. IFILTER_INIT_APPLY_INDEX_ATTRIBUTES |
  757. IFILTER_INIT_INDEXING_ONLY,
  758. 0,
  759. 0,
  760. &fBulkyObject );
  761. if ( FAILED(sc) )
  762. {
  763. ciDebugOut(( DEB_ERROR,
  764. "IFilter->Init() of pIStrmNoiseFile(%x) returned 0x%x.\n",
  765. _xIStrmNoiseFile.GetPointer(), sc ));
  766. }
  767. else
  768. {
  769. STAT_CHUNK statChunk;
  770. for ( sc = xTextIFilter->GetChunk( &statChunk );
  771. SUCCEEDED(sc) && (statChunk.flags & CHUNK_TEXT) == 0;
  772. sc = xTextIFilter->GetChunk( &statChunk ) );
  773. if ( FAILED(sc) )
  774. {
  775. ciDebugOut(( DEB_ERROR,
  776. "IFilter->GetChunk() of pIStrmNoiseFile(%x) returned 0x%x.\n",
  777. _xIStrmNoiseFile.GetPointer(), sc ));
  778. }
  779. else
  780. {
  781. CNoiseListInit noiseInit( NOISE_SIZE );
  782. //
  783. // If we got this far, try creating the key maker.
  784. //
  785. CKeyMaker keymak( _xWBreak.GetPointer(), noiseInit );
  786. OCCURRENCE occ = 0;
  787. CTextSource tsource( xTextIFilter.GetPointer(), statChunk );
  788. keymak.PutStream( occ, &tsource );
  789. _xNoiseTable.Set( noiseInit.AcqStringTable() );
  790. }
  791. }
  792. }
  793. }
  794. else
  795. {
  796. //
  797. // _xIStrmNoiseFile is null, don't use a noise file in filtering
  798. //
  799. ciDebugOut(( DEB_ITRACE,
  800. "Creating language object 0x%x, noise file = EMPTY\n",
  801. locale ));
  802. }
  803. } //CLanguage
  804. CLanguage::~CLanguage()
  805. {
  806. }