//+--------------------------------------------------------------------------- // // Microsoft Windows // Copyright (C) Microsoft Corporation, 1991 - 2000. // // File: NOISE.CXX // // Contents: Noise list // // Classes: CNoiseList, NoiseListInit, NoiseListEmpty // CLString, CStringList, CStringTable // // History: 11-Jul-91 BartoszM Created // //---------------------------------------------------------------------------- #include #pragma hdrstop #include //+--------------------------------------------------------------------------- // // Member: CLString::CLString, public // // Synopsis: Initializes and links a string list element // // Arguments: [cb] -- length // [buf] -- string // [next] -- next link in the chain // // History: 16-Jul-91 BartoszM Created. // //---------------------------------------------------------------------------- CLString::CLString ( UINT cb, const BYTE* buf, CLString* next ) { _cb = cb; #if CIDBG == 1 cb++; #endif memcpy ( _buf, buf, cb ); _next = next; } //+--------------------------------------------------------------------------- // // Member: CLString::operator new, public // // Synopsis: Allocates a string list element // // Arguments: [n] -- size of class instance // [cb] -- length of string buffer needed // // History: 10 Apr 96 AlanW Created. // //---------------------------------------------------------------------------- void * CLString::operator new ( size_t n, UINT cb ) { #if CIDBG == 1 cb++; #endif return new BYTE [n+cb]; } //+--------------------------------------------------------------------------- // // Member: CStringList::~CStringList, public // // Synopsis: Free linked list // // History: 16-Jul-91 BartoszM Created. // //---------------------------------------------------------------------------- CStringList::~CStringList() { while ( _head != 0 ) { CLString* p = _head; _head = _head->Next(); delete p; } } //+--------------------------------------------------------------------------- // // Member: CStringList::Add, public // // Synopsis: Adds a string to list // // Arguments: [cb] -- length // [str] -- string // // History: 16-Jul-91 BartoszM Created. // //---------------------------------------------------------------------------- void CStringList::Add ( UINT cb, const BYTE * str ) { _head = new (cb) CLString ( cb, str, _head ); } //+--------------------------------------------------------------------------- // // Member: CStringList::Find, public // // Synopsis: Returns TRUE if string found in the list, FALSE otherwise // // Arguments: [cb] -- length // [str] -- string // // History: 16-Jul-91 BartoszM Created. // //---------------------------------------------------------------------------- BOOL CStringList::Find ( UINT cb, const BYTE* str ) const { CLString* pStr = _head; while ( pStr != 0 ) { if ( pStr->Equal ( cb, str ) ) { return TRUE; } pStr = pStr->Next(); } return FALSE; } #if CIDBG == 1 void CStringList::Dump () const { CLString * p = _head; while ( p ) { p->Dump(); p = p->Next(); } ciDebugOut (( DEB_ITRACE, "\n" )); } #endif // CIDBG == 1 //+--------------------------------------------------------------------------- // // Member: CStringTable::CStringTable, public // // Synopsis: Create hash table of given size // // Arguments: [size] -- size // // History: 16-Jul-91 BartoszM Created. // //---------------------------------------------------------------------------- CStringTable::CStringTable( UINT size ) { _size = size; _bucket = new CStringList[size]; } //+--------------------------------------------------------------------------- // // Member: CStringTable::~CStringTable, public // // Synopsis: Free linked lists // // History: 16-Jul-91 BartoszM Created. // //---------------------------------------------------------------------------- CStringTable::~CStringTable() { delete [] _bucket; } //+--------------------------------------------------------------------------- // // Member: CStringTable::Add, publid // // Synopsis: Add a string to hash table // // Arguments: [cb] -- size // [str] -- string // [hash] -- precomputed hash value // // History: 16-Jul-91 BartoszM Created. // //---------------------------------------------------------------------------- void CStringTable::Add ( UINT cb, const BYTE* str, UINT hash ) { _bucket[_index(hash)].Add ( cb, str ); } #if CIDBG == 1 void CStringTable::Dump () const { for ( unsigned i = 0; i < _size; i++ ) { if ( !_bucket[i].IsEmpty() ) { ciDebugOut (( DEB_ITRACE, "%3d: ", i )); _bucket[i].Dump(); } } } #endif // CIDBG == 1 //+--------------------------------------------------------------------------- // // Member: CNoiseList::CNoiseList // // Synopsis: constructor for noise list // // Effects: gets buffers from key repository // // Arguments: [krep] -- key repository to give words to. // // History: 05-June-91 t-WadeR Created. // //---------------------------------------------------------------------------- CNoiseList::CNoiseList( const CStringTable& table, PKeyRepository& krep ) : _krep(krep), _table(table), _cNoiseWordsSkipped(0), _cNonNoiseAltWords(0), _fFoundNoise( FALSE ) { krep.GetBuffers( &_pcbOutBuf, &_pbOutBuf, &_pocc ); _cbMaxOutBuf = *_pcbOutBuf; } //+--------------------------------------------------------------------------- // // Member: CNoiseList::GetBuffers // // Synopsis: Returns address of normilizer's input buffers // // Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer // [ppbInBuf] -- pointer to pointer to recieve address of buffer // // History: 05-June-91 t-WadeR Created. // //---------------------------------------------------------------------------- void CNoiseList::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf ) { // Don't actually have an in buffer, so pass through the out buffer *ppbInBuf = _pbOutBuf; *_pcbOutBuf = _cbMaxOutBuf; *ppcbInBuf = _pcbOutBuf; } //+--------------------------------------------------------------------------- // // Member: CNoiseList::GetFlags // // Synopsis: Returns address of ranking and range flags // // Arguments: [ppRange] -- range flag // [ppRank] -- rank flag // // History: 11-Fab-92 BartoszM Created. // //---------------------------------------------------------------------------- void CNoiseList::GetFlags ( BOOL** ppRange, CI_RANK** ppRank ) { _krep.GetFlags ( ppRange, ppRank ); } //+--------------------------------------------------------------------------- // // Member: CNoiseList::PutWord // // Synopsis: If word isn't a noise word, passes it to the key repository // // Effects: calls _krep.PutKey // // Arguments: [hash] -- precomputed hash value // // History: 05-June-91 t-WadeR Created stub. // //---------------------------------------------------------------------------- void CNoiseList::PutWord ( UINT hash ) { // Check the word to see if it should pass through. if ( _table.Find ( *_pcbOutBuf, _pbOutBuf, hash )) { _fFoundNoise = TRUE; // // if all alternate words at current occurrence have been noise words, // then it is equivalent to one noise word at current occcurrence, // hence increment count of noise words skipped // if ( _cNonNoiseAltWords == 0 ) _cNoiseWordsSkipped++; } else { // // output word to key repository. The count of noise words skipped refers to // noise words at previous occurrences only // _krep.PutKey( _cNoiseWordsSkipped ); _cNoiseWordsSkipped = 0; } // reset count of non-noise words in preparation for word at next occurrence _cNonNoiseAltWords = 0; (*_pocc)++; } //+--------------------------------------------------------------------------- // // Member: CNoiseList::PutAltWord // // Synopsis: If word isn't a noise word, passes it to the key repository // // Effects: calls _krep.PutKey // // Arguments: [hash] -- precomputed hash value // // History: 03-May-95 SitaramR Created // //---------------------------------------------------------------------------- void CNoiseList::PutAltWord ( UINT hash ) { // Check the word to see if it should pass through. if ( _table.Find ( *_pcbOutBuf, _pbOutBuf, hash ) ) { _fFoundNoise = TRUE; } else { // // since this is not the last of a sequence of alternate words we increment // count of non-noise words at current occurrence // _cNonNoiseAltWords++; // // output word to key repository. The count of noise words skipped refers to // noise words at previous occurrences only // _krep.PutKey( _cNoiseWordsSkipped ); _cNoiseWordsSkipped = 0; } } //+--------------------------------------------------------------------------- // // Member: CNoiseList::StartAltPhrase // // History: 29-Nov-94 SitaramR Created // //---------------------------------------------------------------------------- void CNoiseList::StartAltPhrase() { _krep.StartAltPhrase( _cNoiseWordsSkipped ); _cNoiseWordsSkipped = 0; } //+--------------------------------------------------------------------------- // // Member: CNoiseList::EndAltPhrase // // History: 29-Nov-94 SitaramR Created // //---------------------------------------------------------------------------- void CNoiseList::EndAltPhrase() { _krep.EndAltPhrase( _cNoiseWordsSkipped ); _cNoiseWordsSkipped = 0; } //+--------------------------------------------------------------------------- // // Member: CNoiseListInit::CNoiseListInit // // Synopsis: Creates a hash table to be filled // // Arguments: [size] -- size of the hash table (possibly prime #) // // History: 15-Jul-91 BartoszM Created. // //---------------------------------------------------------------------------- CNoiseListInit::CNoiseListInit ( UINT size ) { _table = new CStringTable ( size ); END_CONSTRUCTION( CNoiseListInit ); } //+--------------------------------------------------------------------------- // // Member: CNoiseListInit::GetBuffers // // Synopsis: Returns address of repository's input buffers // // Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer // [ppbInBuf] -- pointer to pointer to recieve address of buffer // // History: 15-Jul-91 BartoszM Created. // //---------------------------------------------------------------------------- void CNoiseListInit::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf ) { _key.SetCount(MAXKEYSIZE); *ppcbInBuf = _key.GetCountAddress(); *ppbInBuf = _key.GetWritableBuf(); } //+--------------------------------------------------------------------------- // // Member: CNoiseListInit::PutWord // // Synopsis: Puts a key into the hash table // // Arguments: [hash] -- hash value // // History: 15-Jul-91 BartoszM Created // //---------------------------------------------------------------------------- void CNoiseListInit::PutWord ( UINT hash ) { _table->Add ( _key.Count(), _key.GetBuf(), hash ); } //+--------------------------------------------------------------------------- // // Member: CNoiseListInit::PutAltWord // // Synopsis: Puts a key into the hash table // // Arguments: [hash] -- hash value // // History: 03-May-95 SitaramR Created // //---------------------------------------------------------------------------- void CNoiseListInit::PutAltWord ( unsigned hash ) { PutWord( hash ); } //+--------------------------------------------------------------------------- // // Member: CNoiseListEmpty::CNoiseListEmpty // // Synopsis: constructor for a default empty noise list // // Effects: gets buffers from key repository // // Arguments: [krep] -- key repository to give words to. // [ulFuzzy] -- Fuzziness of query // // History: 16-Jul-91 BartoszM Created. // //---------------------------------------------------------------------------- CNoiseListEmpty::CNoiseListEmpty( PKeyRepository& krep, ULONG ulFuzzy ) : _krep(krep), _ulGenerateMethod(ulFuzzy), _cNoiseWordsSkipped(0), _cNonNoiseAltWords(0), _fFoundNoise( FALSE ) { krep.GetBuffers( &_pcbOutBuf, &_pbOutBuf, &_pocc ); _cbMaxOutBuf = *_pcbOutBuf; } //+--------------------------------------------------------------------------- // // Member: CNoiseListEmpty::GetBuffers // // Synopsis: Returns address of normilizer's input buffers // // Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer // [ppbInBuf] -- pointer to pointer to recieve address of buffer // // History: 16-Jul-91 BartoszM Created. // //---------------------------------------------------------------------------- void CNoiseListEmpty::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf ) { // Don't actually have an in buffer, so pass through the out buffer *ppbInBuf = _pbOutBuf; *_pcbOutBuf = _cbMaxOutBuf; *ppcbInBuf = _pcbOutBuf; } //+--------------------------------------------------------------------------- // // Member: CNoiseListEmpty::GetFlags // // Synopsis: Returns address of ranking and range flags // // Arguments: [ppRange] -- range flag // [ppRank] -- rank flag // // History: 11-Fab-92 BartoszM Created. // //---------------------------------------------------------------------------- void CNoiseListEmpty::GetFlags ( BOOL** ppRange, CI_RANK** ppRank ) { _krep.GetFlags ( ppRange, ppRank ); } //+--------------------------------------------------------------------------- // // Member: CNoiseListEmpty::PutWord // // Synopsis: If word isn't a noise word, passes it to the key repository // // Effects: calls _krep.PutKey // // Arguments: [hash] -- hash value (ignored) // // History: 16-Jul-91 BartoszM Created // // Notes: Filters out one letter words, unless it is a prefix (*) query // //---------------------------------------------------------------------------- void CNoiseListEmpty::PutWord ( UINT ) { // // Even though the noise list is empty, we are modeling PutBreak() // by a skip of appropriate number of noise words, and we are counting // 1 letter words as noise words. Note that the length is in bytes and there is // a 1 byte prefix. // if ( _ulGenerateMethod != GENERATE_METHOD_PREFIX && *_pcbOutBuf <= NOISE_WORD_LENGTH ) { _fFoundNoise = TRUE; // // if all alternate words at current occurrence have been noise words, // then it is equivalent to one noise word at current occcurrence, // hence increment count of noise words skipped // if ( _cNonNoiseAltWords == 0 ) _cNoiseWordsSkipped++; } else { // // output word to key repository. The count of noise words skipped refers to // noise words at previous occurrences only // _krep.PutKey( _cNoiseWordsSkipped ); _cNoiseWordsSkipped = 0; } // reset count of non-noise words in preparation for word at next occurrence _cNonNoiseAltWords = 0; (*_pocc)++; } //+--------------------------------------------------------------------------- // // Member: CNoiseListEmpty::PutAltWord // // Synopsis: If word isn't a noise word, passes it to the key repository // // Effects: calls _krep.PutKey // // Arguments: [hash] -- precomputed hash value // // History: 03-May-95 SitaramR Created // // Notes: Filters out one letter words, unless it is a prefix (*) query // //---------------------------------------------------------------------------- void CNoiseListEmpty::PutAltWord ( UINT hash ) { // // Even though the noise list is empty, we are modeling PutBreak() // by a skip of appropriate number of noise words, and we are counting // 1 letter words as noise words. Note that the length is in bytes and there is // a 1 byte prefix. // if ( _ulGenerateMethod == GENERATE_METHOD_PREFIX || *_pcbOutBuf > NOISE_WORD_LENGTH ) { // // since this is not the last of a sequence of alternate words we increment // count of non-noise words at current occurrence // _cNonNoiseAltWords++; // // output word to key repository. The count of noise words skipped refers to // noise words at previous occurrences only // _krep.PutKey( _cNoiseWordsSkipped ); _cNoiseWordsSkipped = 0; } else _fFoundNoise = TRUE; } //+--------------------------------------------------------------------------- // // Member: CNoiseListEmpty::StartAltPhrase // // Synopsis: Pass on StartAltPhrase to key repository // // History: 20-Feb-95 SitaramR Created // //---------------------------------------------------------------------------- void CNoiseListEmpty::StartAltPhrase() { _krep.StartAltPhrase( _cNoiseWordsSkipped ); _cNoiseWordsSkipped = 0; } //+--------------------------------------------------------------------------- // // Member: CNoiseListEmpty::EndAltPhrase // // Synopsis: Pass on EndAltPhrase to key repository // // History: 20-Feb-95 SitaramR Created // //---------------------------------------------------------------------------- void CNoiseListEmpty::EndAltPhrase() { _krep.EndAltPhrase( _cNoiseWordsSkipped ); _cNoiseWordsSkipped = 0; }