|
|
//+---------------------------------------------------------------------------
//
// Microsoft Windows
// Copyright (C) Microsoft Corporation, 1991 - 1992.
//
// File: NOISE.HXX
//
// Contents: Noise word list
//
// Classes: CNoiseList, NoiseListInit, NoiseListEmpty
// CLString, CStringList, CStringTable
//
// History: 11-Jul-91 BartoszM Created
//
//----------------------------------------------------------------------------
#pragma once
#include <plang.hxx>
const NOISE_WORD_LENGTH = cbKeyPrefix + sizeof( WCHAR ); // word length for detecting
// and filtering noise words
//+---------------------------------------------------------------------------
//
// Class: CLString
//
// Purpose: Linkable String
//
// History: 16-Jul-91 BartoszM Created
//
//----------------------------------------------------------------------------
class CLString { public: CLString ( UINT cb, const BYTE* buf, CLString* next ); void* operator new ( size_t n, UINT cb );
#if _MSC_VER >= 1200
void operator delete( void * p, UINT cb ) { ::delete(p); } void operator delete( void * p ) { ::delete(p); } #endif
inline BOOL Equal ( UINT cb, const BYTE* str ) const; CLString * Next() { return _next; }
#if CIDBG == 1
void Dump() const { ciDebugOut (( DEB_ITRACE, "%s ", _buf )); } #endif
private: CLString * _next; UINT _cb; #pragma warning(disable : 4200) // 0 sized array in struct is non-ansi
BYTE _buf[]; #pragma warning(default : 4200)
};
//+---------------------------------------------------------------------------
//
// Member: CLString::Equal, public
//
// Synopsis: String comparison
//
// Arguments: [cb] -- length
// [str] -- string
//
// History: 16-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
inline BOOL CLString::Equal ( UINT cb, const BYTE* str ) const { return ( (cb == _cb) && (memcmp ( str, _buf, _cb ) == 0) ); }
//+---------------------------------------------------------------------------
//
// Class: CStringList
//
// Purpose: List of Linkable Strings
//
// History: 16-Jul-91 BartoszM Created
//
//----------------------------------------------------------------------------
class CStringList { public: CStringList(): _head(0) {} ~CStringList(); void Add ( UINT cb, const BYTE * str ); BOOL Find ( UINT cb, const BYTE* str ) const; BOOL IsEmpty () const { return _head == 0; }
#if CIDBG == 1
void Dump() const; #endif
private:
CLString * _head; };
//+---------------------------------------------------------------------------
//
// Class: CStringTable
//
// Purpose: Hash Table of strings
//
// History: 16-Jul-91 BartoszM Created
//
//----------------------------------------------------------------------------
class CStringTable { public: CStringTable( UINT size ); ~CStringTable(); void Add ( UINT cb, const BYTE* str, UINT hash ); inline BOOL Find ( UINT cb, const BYTE* str, UINT hash ) const;
#if CIDBG == 1
void Dump() const; #endif
private:
UINT _index ( UINT hash ) const { return hash % _size; }
UINT _size; CStringList* _bucket; };
//+---------------------------------------------------------------------------
//
// Member: CStringTable::Find, public
//
// Synopsis: String comparison
//
// Arguments: [cb] -- length
// [str] -- string
//
// History: 16-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
inline BOOL CStringTable::Find ( UINT cb, const BYTE* str, UINT hash ) const { return _bucket [ _index(hash) ].Find ( cb, str ); }
class PKeyRepository;
//+---------------------------------------------------------------------------
//
// Class: CNoiseList
//
// Purpose: Discard meaningless words from the input stream
//
// History: 02-May-91 BartoszM Created stub.
// 30-May-91 t-WadeR Created first draft.
//
//----------------------------------------------------------------------------
class CNoiseList: public PNoiseList { public:
CNoiseList( const CStringTable& table, PKeyRepository& krep ); ~CNoiseList() {};
void GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf ); void GetFlags ( BOOL** ppRange, CI_RANK** ppRank );
void PutAltWord( UINT hash ); void PutWord( UINT hash ); void StartAltPhrase(); void EndAltPhrase();
void SkipNoiseWords( ULONG cWords ) { _cNoiseWordsSkipped += cWords; *_pocc += cWords; }
void SetOccurrence( OCCURRENCE occ ) { *_pocc = occ; }
BOOL FoundNoise() { return _fFoundNoise; }
OCCURRENCE GetOccurrence() { return *_pocc; }
private:
const CStringTable& _table;
UINT _cbMaxOutBuf; UINT* _pcbOutBuf; BYTE* _pbOutBuf; PKeyRepository& _krep; OCCURRENCE* _pocc;
BOOL _fFoundNoise; // One way trigger to TRUE when noise word found.
ULONG _cNoiseWordsSkipped; // count of noise words that haven't
// been passed onto the key repository.
// Care must be taken to ensure that
// noise words at the same occurrence
// (ie alternate words) are not counted
// multiple times.
ULONG _cNonNoiseAltWords; // count of non-noise words at current
// occurrence
};
//+---------------------------------------------------------------------------
//
// Class: CNoiseListInit
//
// Purpose: Initializer for the noise list
//
// History: 16-Jul-91 BartoszM Created
//
//----------------------------------------------------------------------------
class CNoiseListInit: INHERIT_VIRTUAL_UNWIND, public PNoiseList { INLINE_UNWIND( CNoiseListInit )
public:
CNoiseListInit( UINT size ); ~CNoiseListInit() { delete _table; };
void GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf );
void PutAltWord( UINT hash ); void PutWord( UINT hash );
CStringTable * AcqStringTable() { CStringTable* tmp = _table; _table = 0; return tmp; }
private:
CKeyBuf _key; CStringTable * _table; };
//+---------------------------------------------------------------------------
//
// Class: CNoiseListEmpty
//
// Purpose: Empty Noise List (used as default)
//
// History: 16-Jul-91 BartoszM Created
//
//----------------------------------------------------------------------------
class CNoiseListEmpty: public PNoiseList { public:
CNoiseListEmpty( PKeyRepository& krep, ULONG ulFuzzy );
void GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf ); void GetFlags ( BOOL** ppRange, CI_RANK** ppRank );
void PutAltWord( UINT hash ); void PutWord( UINT hash ); void StartAltPhrase(); void EndAltPhrase();
void SkipNoiseWords( ULONG cWords ) { _cNoiseWordsSkipped += cWords; *_pocc += cWords; }
void SetOccurrence( OCCURRENCE occ ) { *_pocc = occ; }
BOOL FoundNoise() { return _fFoundNoise; }
OCCURRENCE GetOccurrence() { return *_pocc; }
private:
UINT _cbMaxOutBuf; UINT* _pcbOutBuf; BYTE* _pbOutBuf; PKeyRepository& _krep; OCCURRENCE* _pocc; ULONG _ulGenerateMethod; // Fuzzines of query
BOOL _fFoundNoise; // One way trigger to TRUE when noise word found.
ULONG _cNoiseWordsSkipped; // count of noise words that haven't
// been passed onto the key repository.
// Care must be taken to ensure that
// noise words at the same occurrence
// (ie alternate words) are not counted
// multiple times.
ULONG _cNonNoiseAltWords; // count of non-noise words at current
// occurrence
};
|