Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

663 lines
18 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 2000.
  5. //
  6. // File: NOISE.CXX
  7. //
  8. // Contents: Noise list
  9. //
  10. // Classes: CNoiseList, NoiseListInit, NoiseListEmpty
  11. // CLString, CStringList, CStringTable
  12. //
  13. // History: 11-Jul-91 BartoszM Created
  14. //
  15. //----------------------------------------------------------------------------
  16. #include <pch.cxx>
  17. #pragma hdrstop
  18. #include <noise.hxx>
  19. //+---------------------------------------------------------------------------
  20. //
  21. // Member: CLString::CLString, public
  22. //
  23. // Synopsis: Initializes and links a string list element
  24. //
  25. // Arguments: [cb] -- length
  26. // [buf] -- string
  27. // [next] -- next link in the chain
  28. //
  29. // History: 16-Jul-91 BartoszM Created.
  30. //
  31. //----------------------------------------------------------------------------
  32. CLString::CLString ( UINT cb, const BYTE* buf, CLString* next )
  33. {
  34. _cb = cb;
  35. #if CIDBG == 1
  36. cb++;
  37. #endif
  38. memcpy ( _buf, buf, cb );
  39. _next = next;
  40. }
  41. //+---------------------------------------------------------------------------
  42. //
  43. // Member: CLString::operator new, public
  44. //
  45. // Synopsis: Allocates a string list element
  46. //
  47. // Arguments: [n] -- size of class instance
  48. // [cb] -- length of string buffer needed
  49. //
  50. // History: 10 Apr 96 AlanW Created.
  51. //
  52. //----------------------------------------------------------------------------
  53. void *
  54. CLString::operator new ( size_t n, UINT cb )
  55. {
  56. #if CIDBG == 1
  57. cb++;
  58. #endif
  59. return new BYTE [n+cb];
  60. }
  61. //+---------------------------------------------------------------------------
  62. //
  63. // Member: CStringList::~CStringList, public
  64. //
  65. // Synopsis: Free linked list
  66. //
  67. // History: 16-Jul-91 BartoszM Created.
  68. //
  69. //----------------------------------------------------------------------------
  70. CStringList::~CStringList()
  71. {
  72. while ( _head != 0 )
  73. {
  74. CLString* p = _head;
  75. _head = _head->Next();
  76. delete p;
  77. }
  78. }
  79. //+---------------------------------------------------------------------------
  80. //
  81. // Member: CStringList::Add, public
  82. //
  83. // Synopsis: Adds a string to list
  84. //
  85. // Arguments: [cb] -- length
  86. // [str] -- string
  87. //
  88. // History: 16-Jul-91 BartoszM Created.
  89. //
  90. //----------------------------------------------------------------------------
  91. void CStringList::Add ( UINT cb, const BYTE * str )
  92. {
  93. _head = new (cb) CLString ( cb, str, _head );
  94. }
  95. //+---------------------------------------------------------------------------
  96. //
  97. // Member: CStringList::Find, public
  98. //
  99. // Synopsis: Returns TRUE if string found in the list, FALSE otherwise
  100. //
  101. // Arguments: [cb] -- length
  102. // [str] -- string
  103. //
  104. // History: 16-Jul-91 BartoszM Created.
  105. //
  106. //----------------------------------------------------------------------------
  107. BOOL CStringList::Find ( UINT cb, const BYTE* str ) const
  108. {
  109. CLString* pStr = _head;
  110. while ( pStr != 0 )
  111. {
  112. if ( pStr->Equal ( cb, str ) )
  113. {
  114. return TRUE;
  115. }
  116. pStr = pStr->Next();
  117. }
  118. return FALSE;
  119. }
  120. #if CIDBG == 1
  121. void CStringList::Dump () const
  122. {
  123. CLString * p = _head;
  124. while ( p )
  125. {
  126. p->Dump();
  127. p = p->Next();
  128. }
  129. ciDebugOut (( DEB_ITRACE, "\n" ));
  130. }
  131. #endif // CIDBG == 1
  132. //+---------------------------------------------------------------------------
  133. //
  134. // Member: CStringTable::CStringTable, public
  135. //
  136. // Synopsis: Create hash table of given size
  137. //
  138. // Arguments: [size] -- size
  139. //
  140. // History: 16-Jul-91 BartoszM Created.
  141. //
  142. //----------------------------------------------------------------------------
  143. CStringTable::CStringTable( UINT size )
  144. {
  145. _size = size;
  146. _bucket = new CStringList[size];
  147. }
  148. //+---------------------------------------------------------------------------
  149. //
  150. // Member: CStringTable::~CStringTable, public
  151. //
  152. // Synopsis: Free linked lists
  153. //
  154. // History: 16-Jul-91 BartoszM Created.
  155. //
  156. //----------------------------------------------------------------------------
  157. CStringTable::~CStringTable()
  158. {
  159. delete [] _bucket;
  160. }
  161. //+---------------------------------------------------------------------------
  162. //
  163. // Member: CStringTable::Add, publid
  164. //
  165. // Synopsis: Add a string to hash table
  166. //
  167. // Arguments: [cb] -- size
  168. // [str] -- string
  169. // [hash] -- precomputed hash value
  170. //
  171. // History: 16-Jul-91 BartoszM Created.
  172. //
  173. //----------------------------------------------------------------------------
  174. void CStringTable::Add ( UINT cb, const BYTE* str, UINT hash )
  175. {
  176. _bucket[_index(hash)].Add ( cb, str );
  177. }
  178. #if CIDBG == 1
  179. void CStringTable::Dump () const
  180. {
  181. for ( unsigned i = 0; i < _size; i++ )
  182. {
  183. if ( !_bucket[i].IsEmpty() )
  184. {
  185. ciDebugOut (( DEB_ITRACE, "%3d: ", i ));
  186. _bucket[i].Dump();
  187. }
  188. }
  189. }
  190. #endif // CIDBG == 1
  191. //+---------------------------------------------------------------------------
  192. //
  193. // Member: CNoiseList::CNoiseList
  194. //
  195. // Synopsis: constructor for noise list
  196. //
  197. // Effects: gets buffers from key repository
  198. //
  199. // Arguments: [krep] -- key repository to give words to.
  200. //
  201. // History: 05-June-91 t-WadeR Created.
  202. //
  203. //----------------------------------------------------------------------------
  204. CNoiseList::CNoiseList( const CStringTable& table, PKeyRepository& krep )
  205. : _krep(krep),
  206. _table(table),
  207. _cNoiseWordsSkipped(0),
  208. _cNonNoiseAltWords(0),
  209. _fFoundNoise( FALSE )
  210. {
  211. krep.GetBuffers( &_pcbOutBuf, &_pbOutBuf, &_pocc );
  212. _cbMaxOutBuf = *_pcbOutBuf;
  213. }
  214. //+---------------------------------------------------------------------------
  215. //
  216. // Member: CNoiseList::GetBuffers
  217. //
  218. // Synopsis: Returns address of normilizer's input buffers
  219. //
  220. // Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer
  221. // [ppbInBuf] -- pointer to pointer to recieve address of buffer
  222. //
  223. // History: 05-June-91 t-WadeR Created.
  224. //
  225. //----------------------------------------------------------------------------
  226. void CNoiseList::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
  227. {
  228. // Don't actually have an in buffer, so pass through the out buffer
  229. *ppbInBuf = _pbOutBuf;
  230. *_pcbOutBuf = _cbMaxOutBuf;
  231. *ppcbInBuf = _pcbOutBuf;
  232. }
  233. //+---------------------------------------------------------------------------
  234. //
  235. // Member: CNoiseList::GetFlags
  236. //
  237. // Synopsis: Returns address of ranking and range flags
  238. //
  239. // Arguments: [ppRange] -- range flag
  240. // [ppRank] -- rank flag
  241. //
  242. // History: 11-Fab-92 BartoszM Created.
  243. //
  244. //----------------------------------------------------------------------------
  245. void CNoiseList::GetFlags ( BOOL** ppRange, CI_RANK** ppRank )
  246. {
  247. _krep.GetFlags ( ppRange, ppRank );
  248. }
  249. //+---------------------------------------------------------------------------
  250. //
  251. // Member: CNoiseList::PutWord
  252. //
  253. // Synopsis: If word isn't a noise word, passes it to the key repository
  254. //
  255. // Effects: calls _krep.PutKey
  256. //
  257. // Arguments: [hash] -- precomputed hash value
  258. //
  259. // History: 05-June-91 t-WadeR Created stub.
  260. //
  261. //----------------------------------------------------------------------------
  262. void CNoiseList::PutWord ( UINT hash )
  263. {
  264. // Check the word to see if it should pass through.
  265. if ( _table.Find ( *_pcbOutBuf, _pbOutBuf, hash ))
  266. {
  267. _fFoundNoise = TRUE;
  268. //
  269. // if all alternate words at current occurrence have been noise words,
  270. // then it is equivalent to one noise word at current occcurrence,
  271. // hence increment count of noise words skipped
  272. //
  273. if ( _cNonNoiseAltWords == 0 )
  274. _cNoiseWordsSkipped++;
  275. }
  276. else
  277. {
  278. //
  279. // output word to key repository. The count of noise words skipped refers to
  280. // noise words at previous occurrences only
  281. //
  282. _krep.PutKey( _cNoiseWordsSkipped );
  283. _cNoiseWordsSkipped = 0;
  284. }
  285. // reset count of non-noise words in preparation for word at next occurrence
  286. _cNonNoiseAltWords = 0;
  287. (*_pocc)++;
  288. }
  289. //+---------------------------------------------------------------------------
  290. //
  291. // Member: CNoiseList::PutAltWord
  292. //
  293. // Synopsis: If word isn't a noise word, passes it to the key repository
  294. //
  295. // Effects: calls _krep.PutKey
  296. //
  297. // Arguments: [hash] -- precomputed hash value
  298. //
  299. // History: 03-May-95 SitaramR Created
  300. //
  301. //----------------------------------------------------------------------------
  302. void CNoiseList::PutAltWord ( UINT hash )
  303. {
  304. // Check the word to see if it should pass through.
  305. if ( _table.Find ( *_pcbOutBuf, _pbOutBuf, hash ) )
  306. {
  307. _fFoundNoise = TRUE;
  308. }
  309. else
  310. {
  311. //
  312. // since this is not the last of a sequence of alternate words we increment
  313. // count of non-noise words at current occurrence
  314. //
  315. _cNonNoiseAltWords++;
  316. //
  317. // output word to key repository. The count of noise words skipped refers to
  318. // noise words at previous occurrences only
  319. //
  320. _krep.PutKey( _cNoiseWordsSkipped );
  321. _cNoiseWordsSkipped = 0;
  322. }
  323. }
  324. //+---------------------------------------------------------------------------
  325. //
  326. // Member: CNoiseList::StartAltPhrase
  327. //
  328. // History: 29-Nov-94 SitaramR Created
  329. //
  330. //----------------------------------------------------------------------------
  331. void CNoiseList::StartAltPhrase()
  332. {
  333. _krep.StartAltPhrase( _cNoiseWordsSkipped );
  334. _cNoiseWordsSkipped = 0;
  335. }
  336. //+---------------------------------------------------------------------------
  337. //
  338. // Member: CNoiseList::EndAltPhrase
  339. //
  340. // History: 29-Nov-94 SitaramR Created
  341. //
  342. //----------------------------------------------------------------------------
  343. void CNoiseList::EndAltPhrase()
  344. {
  345. _krep.EndAltPhrase( _cNoiseWordsSkipped );
  346. _cNoiseWordsSkipped = 0;
  347. }
  348. //+---------------------------------------------------------------------------
  349. //
  350. // Member: CNoiseListInit::CNoiseListInit
  351. //
  352. // Synopsis: Creates a hash table to be filled
  353. //
  354. // Arguments: [size] -- size of the hash table (possibly prime #)
  355. //
  356. // History: 15-Jul-91 BartoszM Created.
  357. //
  358. //----------------------------------------------------------------------------
  359. CNoiseListInit::CNoiseListInit ( UINT size )
  360. {
  361. _table = new CStringTable ( size );
  362. END_CONSTRUCTION( CNoiseListInit );
  363. }
  364. //+---------------------------------------------------------------------------
  365. //
  366. // Member: CNoiseListInit::GetBuffers
  367. //
  368. // Synopsis: Returns address of repository's input buffers
  369. //
  370. // Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer
  371. // [ppbInBuf] -- pointer to pointer to recieve address of buffer
  372. //
  373. // History: 15-Jul-91 BartoszM Created.
  374. //
  375. //----------------------------------------------------------------------------
  376. void CNoiseListInit::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
  377. {
  378. _key.SetCount(MAXKEYSIZE);
  379. *ppcbInBuf = _key.GetCountAddress();
  380. *ppbInBuf = _key.GetWritableBuf();
  381. }
  382. //+---------------------------------------------------------------------------
  383. //
  384. // Member: CNoiseListInit::PutWord
  385. //
  386. // Synopsis: Puts a key into the hash table
  387. //
  388. // Arguments: [hash] -- hash value
  389. //
  390. // History: 15-Jul-91 BartoszM Created
  391. //
  392. //----------------------------------------------------------------------------
  393. void CNoiseListInit::PutWord ( UINT hash )
  394. {
  395. _table->Add ( _key.Count(), _key.GetBuf(), hash );
  396. }
  397. //+---------------------------------------------------------------------------
  398. //
  399. // Member: CNoiseListInit::PutAltWord
  400. //
  401. // Synopsis: Puts a key into the hash table
  402. //
  403. // Arguments: [hash] -- hash value
  404. //
  405. // History: 03-May-95 SitaramR Created
  406. //
  407. //----------------------------------------------------------------------------
  408. void CNoiseListInit::PutAltWord ( unsigned hash )
  409. {
  410. PutWord( hash );
  411. }
  412. //+---------------------------------------------------------------------------
  413. //
  414. // Member: CNoiseListEmpty::CNoiseListEmpty
  415. //
  416. // Synopsis: constructor for a default empty noise list
  417. //
  418. // Effects: gets buffers from key repository
  419. //
  420. // Arguments: [krep] -- key repository to give words to.
  421. // [ulFuzzy] -- Fuzziness of query
  422. //
  423. // History: 16-Jul-91 BartoszM Created.
  424. //
  425. //----------------------------------------------------------------------------
  426. CNoiseListEmpty::CNoiseListEmpty( PKeyRepository& krep, ULONG ulFuzzy )
  427. : _krep(krep),
  428. _ulGenerateMethod(ulFuzzy),
  429. _cNoiseWordsSkipped(0),
  430. _cNonNoiseAltWords(0),
  431. _fFoundNoise( FALSE )
  432. {
  433. krep.GetBuffers( &_pcbOutBuf, &_pbOutBuf, &_pocc );
  434. _cbMaxOutBuf = *_pcbOutBuf;
  435. }
  436. //+---------------------------------------------------------------------------
  437. //
  438. // Member: CNoiseListEmpty::GetBuffers
  439. //
  440. // Synopsis: Returns address of normilizer's input buffers
  441. //
  442. // Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer
  443. // [ppbInBuf] -- pointer to pointer to recieve address of buffer
  444. //
  445. // History: 16-Jul-91 BartoszM Created.
  446. //
  447. //----------------------------------------------------------------------------
  448. void CNoiseListEmpty::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
  449. {
  450. // Don't actually have an in buffer, so pass through the out buffer
  451. *ppbInBuf = _pbOutBuf;
  452. *_pcbOutBuf = _cbMaxOutBuf;
  453. *ppcbInBuf = _pcbOutBuf;
  454. }
  455. //+---------------------------------------------------------------------------
  456. //
  457. // Member: CNoiseListEmpty::GetFlags
  458. //
  459. // Synopsis: Returns address of ranking and range flags
  460. //
  461. // Arguments: [ppRange] -- range flag
  462. // [ppRank] -- rank flag
  463. //
  464. // History: 11-Fab-92 BartoszM Created.
  465. //
  466. //----------------------------------------------------------------------------
  467. void CNoiseListEmpty::GetFlags ( BOOL** ppRange, CI_RANK** ppRank )
  468. {
  469. _krep.GetFlags ( ppRange, ppRank );
  470. }
  471. //+---------------------------------------------------------------------------
  472. //
  473. // Member: CNoiseListEmpty::PutWord
  474. //
  475. // Synopsis: If word isn't a noise word, passes it to the key repository
  476. //
  477. // Effects: calls _krep.PutKey
  478. //
  479. // Arguments: [hash] -- hash value (ignored)
  480. //
  481. // History: 16-Jul-91 BartoszM Created
  482. //
  483. // Notes: Filters out one letter words, unless it is a prefix (*) query
  484. //
  485. //----------------------------------------------------------------------------
  486. void CNoiseListEmpty::PutWord ( UINT )
  487. {
  488. //
  489. // Even though the noise list is empty, we are modeling PutBreak()
  490. // by a skip of appropriate number of noise words, and we are counting
  491. // 1 letter words as noise words. Note that the length is in bytes and there is
  492. // a 1 byte prefix.
  493. //
  494. if ( _ulGenerateMethod != GENERATE_METHOD_PREFIX && *_pcbOutBuf <= NOISE_WORD_LENGTH )
  495. {
  496. _fFoundNoise = TRUE;
  497. //
  498. // if all alternate words at current occurrence have been noise words,
  499. // then it is equivalent to one noise word at current occcurrence,
  500. // hence increment count of noise words skipped
  501. //
  502. if ( _cNonNoiseAltWords == 0 )
  503. _cNoiseWordsSkipped++;
  504. }
  505. else
  506. {
  507. //
  508. // output word to key repository. The count of noise words skipped refers to
  509. // noise words at previous occurrences only
  510. //
  511. _krep.PutKey( _cNoiseWordsSkipped );
  512. _cNoiseWordsSkipped = 0;
  513. }
  514. // reset count of non-noise words in preparation for word at next occurrence
  515. _cNonNoiseAltWords = 0;
  516. (*_pocc)++;
  517. }
  518. //+---------------------------------------------------------------------------
  519. //
  520. // Member: CNoiseListEmpty::PutAltWord
  521. //
  522. // Synopsis: If word isn't a noise word, passes it to the key repository
  523. //
  524. // Effects: calls _krep.PutKey
  525. //
  526. // Arguments: [hash] -- precomputed hash value
  527. //
  528. // History: 03-May-95 SitaramR Created
  529. //
  530. // Notes: Filters out one letter words, unless it is a prefix (*) query
  531. //
  532. //----------------------------------------------------------------------------
  533. void CNoiseListEmpty::PutAltWord ( UINT hash )
  534. {
  535. //
  536. // Even though the noise list is empty, we are modeling PutBreak()
  537. // by a skip of appropriate number of noise words, and we are counting
  538. // 1 letter words as noise words. Note that the length is in bytes and there is
  539. // a 1 byte prefix.
  540. //
  541. if ( _ulGenerateMethod == GENERATE_METHOD_PREFIX || *_pcbOutBuf > NOISE_WORD_LENGTH )
  542. {
  543. //
  544. // since this is not the last of a sequence of alternate words we increment
  545. // count of non-noise words at current occurrence
  546. //
  547. _cNonNoiseAltWords++;
  548. //
  549. // output word to key repository. The count of noise words skipped refers to
  550. // noise words at previous occurrences only
  551. //
  552. _krep.PutKey( _cNoiseWordsSkipped );
  553. _cNoiseWordsSkipped = 0;
  554. }
  555. else
  556. _fFoundNoise = TRUE;
  557. }
  558. //+---------------------------------------------------------------------------
  559. //
  560. // Member: CNoiseListEmpty::StartAltPhrase
  561. //
  562. // Synopsis: Pass on StartAltPhrase to key repository
  563. //
  564. // History: 20-Feb-95 SitaramR Created
  565. //
  566. //----------------------------------------------------------------------------
  567. void CNoiseListEmpty::StartAltPhrase()
  568. {
  569. _krep.StartAltPhrase( _cNoiseWordsSkipped );
  570. _cNoiseWordsSkipped = 0;
  571. }
  572. //+---------------------------------------------------------------------------
  573. //
  574. // Member: CNoiseListEmpty::EndAltPhrase
  575. //
  576. // Synopsis: Pass on EndAltPhrase to key repository
  577. //
  578. // History: 20-Feb-95 SitaramR Created
  579. //
  580. //----------------------------------------------------------------------------
  581. void CNoiseListEmpty::EndAltPhrase()
  582. {
  583. _krep.EndAltPhrase( _cNoiseWordsSkipped );
  584. _cNoiseWordsSkipped = 0;
  585. }