//+--------------------------------------------------------------------------- // // Microsoft Windows // Copyright (C) Microsoft Corporation, 1991 - 2000. // // File: QKREP.CXX // // Contents: Query Key Repository // // Classes: CQueryKeyRepository // // History: 04-Jun-91 t-WadeR Created. // 23-Sep-91 BartosM Rewrote to use phrase expr. // 31-Jan-93 KyleP Use restrictions, not expressions // //---------------------------------------------------------------------------- #include #pragma hdrstop #include #include #include "qkrep.hxx" #include //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::CQueryKeyRepository // // Synopsis: Creates Key repository // // History: 31-May-91 t-WadeR Created // 23-Sep-91 BartoszM Rewrote to use phrase expr. // //---------------------------------------------------------------------------- CQueryKeyRepository::CQueryKeyRepository ( ULONG fuzzy ) : _occLast(OCC_INVALID), _pOrRst(0), _pCurAltPhrase(0), _cInitialNoiseWords(0), _fNoiseWordsOnly(FALSE), _fHasSynonym( FALSE ) { if ( fuzzy == GENERATE_METHOD_PREFIX ) _isRange = TRUE; else _isRange = FALSE; _pPhrase = new CPhraseRestriction( INIT_PHRASE_WORDS ); Win4Assert( _pPhrase->IsValid() ); } //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::~CQueryKeyRepository // // Synopsis: Destroys // // History: 31-May-91 t-WadeR Created // 23-Sep-91 BartoszM Rewrote to use phrase expr. // //---------------------------------------------------------------------------- CQueryKeyRepository::~CQueryKeyRepository() { delete _pPhrase; delete _pOrRst; delete _pCurAltPhrase; } //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::AcqXpr // // Synopsis: Acquire Phrase(s) // // History: 07-Feb-92 BartoszM Created // 24-Jan-97 KyleP Handle null phrase (from bad alt words) // //---------------------------------------------------------------------------- CRestriction * CQueryKeyRepository::AcqRst() { CNodeRestriction *pNodeRst; if ( _pOrRst ) { Win4Assert( _pPhrase == 0 ); pNodeRst = _pOrRst; } else pNodeRst = _pPhrase; // // pNodeRst may be null, if alternate phrasing didn't work out. // if ( 0 == pNodeRst ) return 0; switch( pNodeRst->Count() ) { case 0: return( 0 ); break; case 1: return( pNodeRst->RemoveChild(0) ); break; default: { CRestriction * tmp = pNodeRst; _pOrRst = 0; _pPhrase = 0; return( tmp ); break; } } } //AcqRst //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::PutKey // // Synopsis: Puts a key into the key list and occurrence list // // Arguments: [cNoiseWordsSkipped] -- count of noise words that have been skipped // // History: 31-May-91 t-WadeR Created // 23-Sep-91 BartoszM Rewrote to use phrase expr. // 29-Nov-94 SitaramR Rewrote to take Start/EndAltPhrase into account // //---------------------------------------------------------------------------- void CQueryKeyRepository::PutKey ( ULONG cNoiseWordsSkipped ) { // check if there is a set of alt phrases with noise words only if ( _fNoiseWordsOnly ) return; ciDebugOut (( DEB_ITRACE, "QueryKeyRepository::PutKey \"%.*ws\", pid = %d\n", _key.StrLen(), _key.GetStr(), _key.Pid() )); if ( _pCurAltPhrase ) // if, we are processing an alternate phrase AppendKey( _pCurAltPhrase, cNoiseWordsSkipped ); else { if ( _pOrRst ) { Win4Assert( _pOrRst->Count() ); Win4Assert( _pPhrase == 0 ); for ( unsigned i=0; i<_pOrRst->Count(); i++) { CRestriction *pRst = _pOrRst->GetChild(i); Win4Assert( pRst->Type() == RTPhrase ); AppendKey( (CPhraseRestriction *) pRst, cNoiseWordsSkipped ); } } else AppendKey( _pPhrase, cNoiseWordsSkipped ); } _occLast = _occ; } //PutKey //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::AppendKey // // Synopsis: Appends a key to end of phraseRst // // Arguments: [pPhraseRst] -- restriction to append to // [cNoiseWordsSkipped] -- count of noise words that have been skipped // // History: 29-Nov-94 SitaramR Created // //---------------------------------------------------------------------------- void CQueryKeyRepository::AppendKey( CPhraseRestriction *pPhraseRst, ULONG cNoiseWordsSkipped ) { // _occ as generated by CKeyMaker is not accurate because it does not // take StartAltPhrase/EndAltPhrase into account. We use _occ (and _occLast) // solely to test for synonyms. The test is: // // if ( _occ == _occLast ) // then synonym if ( _occ == _occLast ) { ULONG iLast = pPhraseRst->Count()-1; COccRestriction *pLastChild = pPhraseRst->GetChild( iLast ); Win4Assert( pLastChild ); if ( pLastChild->Type() == RTWord ) { ciDebugOut (( DEB_ITRACE, "Create Synonym Expression\n" )); const CKey* pKey = ((CWordRestriction*) pLastChild)->GetKey(); // there can be no noise words between synonyms Win4Assert( cNoiseWordsSkipped == 0 ); _fHasSynonym = TRUE; XPtr xTmp(new CSynRestriction ( *pKey, pLastChild->Occurrence(), 0, 0, _isRange )); Win4Assert( xTmp->IsValid() ); delete pLastChild; pLastChild = xTmp.Acquire(); pPhraseRst->SetChild ( pLastChild, iLast ); } Win4Assert ( pLastChild->Type() == RTSynonym ); ((CSynRestriction*) pLastChild)->AddKey ( _key ); } else { XPtr xChildRst( new CWordRestriction( _key, _occ, cNoiseWordsSkipped, 0, _isRange ) ); Win4Assert( xChildRst->IsValid() ); // calculate correct occurrence taking noise words into account OCCURRENCE occ = _ComputeOccurrence( xChildRst.GetPointer(), pPhraseRst ); xChildRst->SetOccurrence( occ ); pPhraseRst->AddChild ( xChildRst.GetPointer() ); xChildRst.Acquire(); } } //AppendKey //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::StartAltPhrase // // Synopsis: Preparation for start of an alternate phrase // // Arguments: [cNoiseWordsSkipped] -- count of noise words that have been skipped // // History: 29-Nov-94 SitaramR Created // //---------------------------------------------------------------------------- void CQueryKeyRepository::StartAltPhrase( ULONG cNoiseWordsSkipped ) { // check if there is a set of alt phrases with noise words only if ( _fNoiseWordsOnly ) return; if ( _pCurAltPhrase ) { if ( _pCurAltPhrase->Count() == 0 ) delete _pCurAltPhrase; else { // add count of noise words skipped to last child of _pCurAltPhrase COccRestriction *pOccRst = _pCurAltPhrase->GetChild( _pCurAltPhrase->Count()-1 ); Win4Assert( pOccRst ); pOccRst->AddCountPostNoiseWords( cNoiseWordsSkipped ); _stkAltPhrases.Push( _pCurAltPhrase ); } } else { if ( _pOrRst ) { Win4Assert( _pOrRst->Count() ); Win4Assert( _pPhrase == 0 ); // add count of noise words of last child of every phrase in _pOrRst for ( unsigned i=0; i<_pOrRst->Count(); i++) { CRestriction *pRst = _pOrRst->GetChild(i); Win4Assert( pRst->Type() == RTPhrase ); COccRestriction *pOccRst = ((CPhraseRestriction *)pRst)->GetChild( ((CPhraseRestriction *)pRst)->Count()-1 ); Win4Assert( pOccRst ); pOccRst->AddCountPostNoiseWords( cNoiseWordsSkipped ); } } else { if ( _pPhrase->Count() != 0 ) { // add count of noise words skipped to last child of _pPhrase COccRestriction *pOccRst = _pPhrase->GetChild( _pPhrase->Count()-1 ); Win4Assert( pOccRst ); pOccRst->AddCountPostNoiseWords( cNoiseWordsSkipped ); } else // sequence of noise words at the beginning of the phrase _cInitialNoiseWords = cNoiseWordsSkipped; } } _pCurAltPhrase = new CPhraseRestriction( INIT_PHRASE_WORDS ); _occLast = OCC_INVALID; // reset _occLast } //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::EndAltPhrase // // Synopsis: Append all alternate phrases to existing phrases // // Arguments: [cNoiseWordsSkipped] -- count of noise words that have been skipped // // History: 29-Nov-94 SitaramR Created // //---------------------------------------------------------------------------- void CQueryKeyRepository::EndAltPhrase( ULONG cNoiseWordsSkipped ) { // check if there is a set of alt phrases with noise words only if ( _fNoiseWordsOnly ) return; // call on StartAltPhrase to stack the current alternate phrase Win4Assert( _pCurAltPhrase ); StartAltPhrase( cNoiseWordsSkipped ); delete _pCurAltPhrase; // allocated in StartAltPhrase, but it is not needed _pCurAltPhrase = 0; // if all alternate phrases are noise, then the entire query is an // uninteresting phrase because we cannot compute the occurrence of the // first key after the set of alternate phrases. So, clean up and return. if ( _stkAltPhrases.Count() == 0 ) { _fNoiseWordsOnly = TRUE; delete _pOrRst; _pOrRst = 0; delete _pPhrase; _pPhrase = 0; return; } XNodeRestriction xNewOrRst( new CNodeRestriction( RTOr )); XPhraseRestriction xTailPhrase; if ( _pOrRst ) { // concatenate each of the stacked alternate phrases to every child phrase // of _pOrRst Win4Assert( _pOrRst->Count() ); Win4Assert( _pPhrase == 0 ); while ( _stkAltPhrases.Count() > 0 ) { xTailPhrase.Set( _stkAltPhrases.Pop() ); for ( unsigned i=0; i< _pOrRst->Count(); i++) { CRestriction *pRst = _pOrRst->GetChild(i); Win4Assert( pRst->Type() == RTPhrase ); CloneAndAdd( xNewOrRst.GetPointer(), (CPhraseRestriction *)pRst, xTailPhrase.GetPointer() ); } CPhraseRestriction *pTailPhrase = xTailPhrase.Acquire(); delete pTailPhrase; } } else // only one phrase so far { while ( _stkAltPhrases.Count() > 0 ) { xTailPhrase.Set( _stkAltPhrases.Pop() ); CloneAndAdd( xNewOrRst.GetPointer(), _pPhrase, xTailPhrase.GetPointer() ); CPhraseRestriction *pTailPhrase = xTailPhrase.Acquire(); delete pTailPhrase; } } delete _pPhrase; _pPhrase = 0; delete _pOrRst; _pOrRst = xNewOrRst.Acquire(); _occLast = OCC_INVALID; // reset _occLast } //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::CloneAndAdd // // Synopsis: Clone pHeadPhrase, pTailPhrase, concatenate and add the // resulting phrase to pOrRst // // Arguments: [pOrRst] -- Destination Or node // [pHeadPhrase] -- first part of a phrase // [pTailPhrase] -- remaining part of a phrase // // History: 29-Nov-94 SitaramR Created // //---------------------------------------------------------------------------- void CQueryKeyRepository::CloneAndAdd( CNodeRestriction *pOrRst, CPhraseRestriction *pHeadPhrase, CPhraseRestriction *pTailPhrase ) { XPhraseRestriction xPhraseRst( new CPhraseRestriction( INIT_PHRASE_WORDS ) ); Win4Assert( xPhraseRst->IsValid() ); // clone head XOccRestriction xOccRst; for ( unsigned i=0; iCount(); i++ ) { xOccRst.Set( pHeadPhrase->GetChild(i)->Clone() ); Win4Assert( xOccRst->IsValid() ); xPhraseRst.GetPointer()->AddChild( xOccRst.GetPointer() ); xOccRst.Acquire(); } // clone tail for ( i=0; iCount(); i++) { xOccRst.Set( pTailPhrase->GetChild(i)->Clone() ); Win4Assert( xOccRst->IsValid() ); OCCURRENCE occ = _ComputeOccurrence( xOccRst.GetPointer(), xPhraseRst.GetPointer() ); xOccRst.GetPointer()->SetOccurrence( occ ); xPhraseRst.GetPointer()->AddChild( xOccRst.GetPointer() ); xOccRst.Acquire(); } pOrRst->AddChild( xPhraseRst.GetPointer() ); xPhraseRst.Acquire(); } //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::_ComputeOccurrence // // Synopsis: Computes the noise word adjusted occurrence // // Arguments: [pOccRst] -- restriction whose occurrence is to be computed // [pPhrase] -- phrase to which pOccRst is being appended // // History: 29-Nov-94 SitaramR Created // //---------------------------------------------------------------------------- OCCURRENCE CQueryKeyRepository::_ComputeOccurrence( COccRestriction *pOccRst, CPhraseRestriction *pPhraseRst ) { OCCURRENCE occ; if ( pPhraseRst->Count() ) { COccRestriction *pPrevOccRst = pPhraseRst->GetChild( pPhraseRst->Count()-1 ); Win4Assert( pPrevOccRst ); // Occurrence of pOccRst is computed as: // occurrence of previous child (pPrevOccRst) in pPhraseRst // + count of noise words following pPrevOccRst // + count of noise words preceeding pOccRst // + 1 occ = pPrevOccRst->Occurrence() + pPrevOccRst->CountPostNoiseWords() + pOccRst->CountPrevNoiseWords() + 1; } else { // Since there are no preivous restrictions, occurrence of // pOccRst is computed as: // // count of noise words at the beginning of phrase // + count of noise words preceeding pOccRst // + 1 occ = _cInitialNoiseWords + pOccRst->CountPrevNoiseWords() + 1; } return occ; } //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::GetBuffers // // Synopsis: Returns address of repository's input buffers // // Effects: // // Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer // [ppbInBuf] -- pointer to pointer to recieve address of buffer // [ppocc] -- pointer to pointer to recieve address of occurrences // // History: 05-June-91 t-WadeR Created. // // Notes: // //---------------------------------------------------------------------------- void CQueryKeyRepository::GetBuffers( unsigned** ppcbWordBuf, BYTE** ppbWordBuf, OCCURRENCE** ppocc ) { _key.SetCount(MAXKEYSIZE); *ppcbWordBuf = _key.GetCountAddress(); *ppbWordBuf = _key.GetWritableBuf(); *ppocc = &_occ; } //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::GetFlags // // Synopsis: Returns address of rank and range flags // // Arguments: [ppRange] -- range flag // [ppRank] -- rank flag // // History: 11-Feb-92 BartoszM Created. // //---------------------------------------------------------------------------- void CQueryKeyRepository::GetFlags ( BOOL** ppRange, CI_RANK** ppRank ) { *ppRange = &_isRange; *ppRank = &_rank; } //+--------------------------------------------------------------------------- // // Member: CQueryKeyRepository::FixUp // // Synopsis: This funstion creates a word restriction with the cached phrase // in the CDataRepository. Then it connects the new Word Restriction // to the phrase (internal restriction ) with a new Or restriction. // If the internal restriction is an Or restriction, than it simply // do a AddChild to the Or restriction. // // Arguments : [drep] -- CDataRepository containing the cached phrase // // History: 10-Feb-2000 KitmanH Created // // Note: This function is a hack to fix a word breaker issue. The word // breaker does compund word breaking for some languages, such as // German. For example, "tes" is broken into "tes" and "1". // "tes" get a synonyn "t" and "1" gets a synonym "es". This is a // result of a hack in the infosoft word breaker. The "1" is a place // holder and is thrown out in a non prefix match phrase to capture // the case ("tes" | "t") "es". However, this breaks the prefix // matching scenerio. Noise words are not thrown out in a prefix // matching (GENERATE_METHOD_PREFIX) query, "tes*" becomes // (tes*|t*) (1*|es*). In this case, tes*" is not a match unless // it is followed immediately with a 1* or es*, e.g. "test case" // is not a match whereas "tested 1000 times" and "testing especially" // are matches. The hack here is to Or a CWordRestriction of the // originally phrase without word breaking. This hack works fine, // if the original phrase is a single word. It will not work in the // multiple word case. // //---------------------------------------------------------------------------- void CQueryKeyRepository::FixUp( CDataRepository & drep ) { // // If the keyRep has synonym, we assume word breaking has occured. // if ( _isRange && _fHasSynonym ) { XNodeRestriction xOrRst; if ( _pPhrase ) { xOrRst.Set( new CNodeRestriction( RTOr, 2 ) ); xOrRst->AddChild ( _pPhrase ); } else { Win4Assert( 0 != _pOrRst ); xOrRst.Set( _pOrRst ); } CKeyBuf KeyBuf; KeyBuf.SetPid( _key.Pid() ); drep.NormalizeWStr( KeyBuf.GetWritableBuf(), KeyBuf.GetCountAddress() ); // Create a CWordRestriction with the Normalized form of the whole phrase XPtr xWordRst( new CWordRestriction( KeyBuf, 0, // occurence 0, 0, TRUE ) ); xOrRst->AddChild( xWordRst.GetPointer() ); xWordRst.Acquire(); _pOrRst = xOrRst.Acquire(); _pPhrase = 0; } } //+--------------------------------------------------------------------------- // // Member: CVectorKeyRepository::CVectorKeyRepository // // Synopsis: Creates Vector Key repository // // History: 18-Jan-95 SitaramR Created // //---------------------------------------------------------------------------- CVectorKeyRepository::CVectorKeyRepository( const CFullPropSpec & ps, LCID lcid, ULONG ulWeight, CPidMapper & pidMap, CLangList & langList ) : _occLast(OCC_INVALID), _ps(ps), _lcid(lcid), _ulWeight(ulWeight), _pidMap(pidMap), _langList( langList ) { _pVectorRst = new CVectorRestriction( VECTOR_RANK_JACCARD ); } //+--------------------------------------------------------------------------- // // Member: CVectorKeyRepository::~CVectorKeyRepository // // History: 18-Jan-95 SitaramR Created // //---------------------------------------------------------------------------- CVectorKeyRepository::~CVectorKeyRepository() { delete _pVectorRst; } //+--------------------------------------------------------------------------- // // Member: CVectorKeyRepository::AcqRst // // Synopsis: Acquire vector restriction // // History: 18-Jan-95 SitaramR Created // //---------------------------------------------------------------------------- CVectorRestriction* CVectorKeyRepository::AcqRst() { if ( _pVectorRst->Count() == 0 ) return 0; else { CVectorRestriction *pTmp = _pVectorRst; _pVectorRst = 0; return pTmp; } } //+--------------------------------------------------------------------------- // // Member: CVectorKeyRepository::PutKey // // Synopsis: Adds a key to the vector restriction // // Arguments: cNoiseWordsSkipped -- ignored (used by CQueryKeyRepository::PutKey ) // // History: 18-Jan-95 SitaramR Created // //---------------------------------------------------------------------------- void CVectorKeyRepository::PutKey( ULONG cNoiseWordsSkipped ) { ciDebugOut (( DEB_ITRACE, "VectorKeyRepository::PutKey \"%.*ws\", pid=%d\n", _key.StrLen(), _key.GetStr(), _key.Pid() )); // _occ as generated by CKeyMaker is not accurate because it does not // take StartAltPhrase/EndAltPhrase into account. We use _occ (and _occLast) // solely to test for synonyms. The test is: // // if ( _occ == _occLast ) // then synonym if ( _occ == _occLast ) { ULONG iLast = _pVectorRst->Count()-1; COccRestriction *pLastChild = (COccRestriction *)_pVectorRst->GetChild( iLast ); Win4Assert( pLastChild ); if ( pLastChild->Type() == RTWord ) { ciDebugOut (( DEB_ITRACE, "Create Synonym Expression\n" )); const CKey* pKey = ((CWordRestriction*) pLastChild)->GetKey(); // there can be no noise words between synonyms Win4Assert( cNoiseWordsSkipped == 0 ); CSynRestriction* tmp = new CSynRestriction ( *pKey, pLastChild->Occurrence(), 0, 0, FALSE ); Win4Assert( tmp->IsValid() ); delete pLastChild; pLastChild = tmp; _pVectorRst->SetChild ( tmp, iLast ); } Win4Assert ( pLastChild->Type() == RTSynonym ); ((CSynRestriction*) pLastChild)->AddKey ( _key ); } else { XWordRestriction xWordRst( new CWordRestriction( _key, 1, 0, 0, FALSE )); _pVectorRst->AddChild( xWordRst.GetPointer() ); xWordRst.Acquire(); } _occLast = _occ; } //+--------------------------------------------------------------------------- // // Member: CVectorKeyRepository::GetBuffers // // Synopsis: Returns address of repository's input buffers // // Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer // [ppbInBuf] -- pointer to pointer to recieve address of buffer // [ppocc] -- pointer to pointer to recieve address of occurrences // // History: 18-Jan-95 SitaramR Created. // //---------------------------------------------------------------------------- void CVectorKeyRepository::GetBuffers( unsigned** ppcbWordBuf, BYTE** ppbWordBuf, OCCURRENCE** ppocc ) { _key.SetCount(MAXKEYSIZE); *ppcbWordBuf = _key.GetCountAddress(); *ppbWordBuf = _key.GetWritableBuf(); *ppocc = &_occ; } //+--------------------------------------------------------------------------- // // Member: CVectorKeyRepository::GetFlags // // Synopsis: Returns address of rank and range flags // // Arguments: [ppRange] -- range flag // [ppRank] -- rank flag // // History: 18-Jan-95 SitaramR Created. // //---------------------------------------------------------------------------- void CVectorKeyRepository::GetFlags ( BOOL** ppRange, CI_RANK** ppRank ) { *ppRange = 0; *ppRank = 0; } //+--------------------------------------------------------------------------- // // Member: CVectorKeyRepository::PutPhrase // // Synopsis: Stores query time phrases // // Arguments: [pwcPhrase] -- phrase as it exists in the text sources // [cwcPhrase] -- count of characters in pwcPhrase // // History: 14-Feb-95 SitaramR Created. // //---------------------------------------------------------------------------- SCODE CVectorKeyRepository::PutPhrase( WCHAR const *pwcPhrase, ULONG cwcPhrase ) { XPtrST xString( new WCHAR[cwcPhrase+1] ); RtlCopyMemory( xString.GetPointer(), pwcPhrase, cwcPhrase*sizeof(WCHAR) ); xString.GetPointer()[cwcPhrase] = 0; CQueryKeyRepository keyRep( GENERATE_METHOD_EXACT ); BreakPhrase( xString.GetPointer(), _ps, _lcid, GENERATE_METHOD_EXACT, keyRep, 0, _pidMap, _langList ); CRestriction *pPhraseRst = keyRep.AcqRst(); if ( 0 != pPhraseRst ) { XPtr xRst( pPhraseRst ); pPhraseRst->SetWeight( _ulWeight ); _pVectorRst->AddChild( pPhraseRst ); xRst.Acquire(); } _occLast = OCC_INVALID; // reset _occLast return S_OK; } // // The following are needed to make midl happy. There are no other interfaces // to bind to. Inheritance from IUnknown is unnecessary. // SCODE STDMETHODCALLTYPE CVectorKeyRepository::QueryInterface(REFIID riid, void * * ppvObject) { *ppvObject = 0; return( E_NOTIMPL ); } ULONG STDMETHODCALLTYPE CVectorKeyRepository::AddRef() { return( 1 ); } ULONG STDMETHODCALLTYPE CVectorKeyRepository::Release() { return( 1 ); }