//+--------------------------------------------------------------------------- // // Microsoft Windows // Copyright (C) Microsoft Corporation, 1991 - 1992. // // File: RWEX.CXX // // Contents: Relevant word extraction // // Classes: CRelevantWord, CRWStore, CRWHeap // // History: 25-Apr-94 dlee Created // //---------------------------------------------------------------------------- #include #pragma hdrstop #include //+--------------------------------------------------------------------------- // // Member: CRWStore::CRWStore, public // // Synopsis: Constructor for relevant word store // // Arguments: [pwList] -- array of work ids to operate over, must be in // increasing order // [cWids] -- # of items in pwList // [cIds] -- # of relevant word key ids per wid to store // // History: 25-Apr-94 dlee Created. // //---------------------------------------------------------------------------- CRWStore::CRWStore(WORKID *pwList,ULONG cWids,ULONG cIds) : _cWids(cWids), _cIds(cIds), _ulSearchLeftOff(0) { _cbRow = _RowSize(cIds); for (ULONG x = 0; x < _cWids; x++) { SRWHeader *p = GetRow(x); p->wid = pwList[x]; p->cItems = 0; } } //CRWStore //+--------------------------------------------------------------------------- // // Member: CRWStore::new, public // // Synopsis: Private new operator // // Arguments: [st] -- predefined param // [cWids] -- # of rows for the array // [cIds] -- # of relevant word key ids per wid to store // // History: 25-Apr-94 dlee Created. // //---------------------------------------------------------------------------- void * CRWStore::operator new(size_t st,ULONG cWids,ULONG cIds) { return (void *) ::new char[_ObjectSize(cWids,cIds)]; } //new #if _MSC_VER >= 1200 void CRWStore::operator delete(void * p, ULONG cWids, ULONG cIds) { ::delete (p); } #endif //+--------------------------------------------------------------------------- // // Member: CRWStore::Insert, public // // Synopsis: Inserts a keyid in a wid's heap if the rank is sufficient. // // Arguments: [wid] -- wid on whose heap is operated // [keyid] -- keyid to add // [lRank] -- rank of the keyid in the wid // // History: 17-Jun-94 dlee Created. // //---------------------------------------------------------------------------- void CRWStore::Insert(WORKID wid,KEYID keyid, LONG lRank) { SRWHeader *ph = _Find(wid,GetRow(_ulSearchLeftOff), _cWids - _ulSearchLeftOff); Win4Assert(ph != 0); // // This heap object is merely an accessor to a heap whose storage // has already been allocated. The heap object operates on the heap // but does not own the heap's memory. // CRWHeap heap(ph,_cIds); heap.Insert(keyid,lRank); // // Start the next binary search one after where the last search // left off, since we know the wids will come through in sorted order. // _ulSearchLeftOff = _HeaderToRow(ph) + 1; } //Insert //+--------------------------------------------------------------------------- // // Member: CRWStore::_Find, private // // Synopsis: Finds a wid's heap in the array of heaps given a wid // // Arguments: [wid] -- wid of heap to find // [base] -- pointer to first element in heap array // [cRows] -- # of rows in the array // // Returns: pointer to the head of the heap for the wid or 0 if not found. // // History: 25-Apr-94 dlee Created. // //---------------------------------------------------------------------------- SRWHeader * CRWStore::_Find(WORKID wid,SRWHeader *pBase,ULONG cRows) { Win4Assert(cRows != 0); SRWHeader *lo = pBase; SRWHeader *hi = lo->Forward(cRows - 1,_cbRow); SRWHeader *mid; ULONG cHalf; while (lo <= hi) { if (cHalf = cRows / 2) { mid = lo->Forward((cRows & 1) ? cHalf : (cHalf - 1),_cbRow); if (wid == mid->wid) { return mid; } else if (wid < mid->wid) { hi = mid->Backward(1,_cbRow); cRows = (cRows & 1) ? cHalf : (cHalf - 1); } else { lo = mid->Forward(1,_cbRow); cRows = cHalf; } } else if (cRows != 0) { if (wid == lo->wid) return lo; else return 0; } else { break; } } return 0; } //_Find //+--------------------------------------------------------------------------- // // Member: CRWHeap::DeQueue, public // // Synopsis: Removes the lowest-ranking keyid in the heap for a wid // // Returns: keyid of the lowest-ranking member of the heap // // History: 25-Apr-94 dlee Created. // //---------------------------------------------------------------------------- KEYID CRWHeap::DeQueue() { ULONG ulL,ulR,ulMax; KEYID kRet = _ph->aItems[0].kKeyId; _ph->cItems--; // // Take out the bottom-right most leaf and bubble it down from // the top of the tree until it is less than its parent. // SRWItem iFix = _ph->aItems[_ph->cItems]; ULONG ulPos = 0; while (!_IsLeaf(ulPos)) { ulL = _Left(ulPos); ulR = _Right(ulPos); if (!_IsValid(ulR)) ulMax = ulL; else { if (_ph->aItems[ulL].lRank < _ph->aItems[ulR].lRank) ulMax = ulL; else ulMax = ulR; } if (_ph->aItems[ulMax].lRank < iFix.lRank) { _ph->aItems[ulPos] = _ph->aItems[ulMax]; ulPos = ulMax; } else { break; } } _ph->aItems[ulPos] = iFix; return kRet; } //DeQueue //+--------------------------------------------------------------------------- // // Member: CRWStore::Insert, public // // Synopsis: Inserts an keyid in the rw heap for a wid if the keyid's rank // is greater than the lowest ranking keyid in the heap or if // the heap is not yet full. // // Arguments: [keyid] -- item to insert // [lRank] -- rank of the keyid // // History: 25-Apr-94 dlee Created. // //---------------------------------------------------------------------------- void CRWHeap::Insert(KEYID keyid, LONG lRank) { if ((_ph->cItems < _ulMaxIds) || (_ph->aItems[0].lRank < lRank)) { // // Pop off the top element if the list is full // if (_ph->cItems == _ulMaxIds) DeQueue(); // // Insert element as the rightmost bottom level leaf in the heap // ULONG ulPos = _ph->cItems++; _ph->aItems[ulPos].kKeyId = keyid; _ph->aItems[ulPos].lRank = lRank; // // bubble the element up until it fits correctly in the tree // while (ulPos) { ULONG ulParent = _Parent(ulPos); if (_ph->aItems[ulPos].lRank < _ph->aItems[ulParent].lRank) { // // swap the elements // SRWItem t = _ph->aItems[ulPos]; _ph->aItems[ulPos] = _ph->aItems[ulParent]; _ph->aItems[ulParent] = t; ulPos = ulParent; } else { break; } } } } //Insert //+--------------------------------------------------------------------------- // // Member: CRelevantWord::CRelevantWord, public // // Synopsis: Constructor for the relevant word object // // Arguments: [pwid] -- array of wids in sorted order to track // [cWidsUsed] -- # of wids in the array // [cRW] -- # of relevant words per wid to track // // History: 25-Apr-94 dlee Created. // //---------------------------------------------------------------------------- CRelevantWord::CRelevantWord(WORKID *pwid,ULONG cWidsUsed,ULONG cRW) : _pWidItem(0), _pstore(0), _cWidsAdded(0) { TRY { _pstore = new(cWidsUsed,cRW) CRWStore(pwid,cWidsUsed,cRW); _pWidItem = new SRWWidItem[cWidsUsed]; } CATCH ( CException, e ) { delete _pWidItem; delete _pstore; RETHROW(); } END_CATCH } //CRelevantWord //+--------------------------------------------------------------------------- // // Member: CRelevantWord::~CRelevantWord // // Synopsis: Destructor for the relevant word object // // History: 25-Apr-94 dlee Created. // //---------------------------------------------------------------------------- CRelevantWord::~CRelevantWord() { delete _pWidItem; delete _pstore; // may be 0 } //~CRelevantWord //+--------------------------------------------------------------------------- // // Member: CRelevantWord::DoneWithKey, public // // Synopsis: Computes rank for each wid occurance of a keyid and adjusts // the heaps appropriately // // Arguments: [keyid] -- keyid on which to calculate // [maxWid] -- # docs on disk // [cWids] -- # docs with key on disk // // History: 25-Apr-94 dlee Created. // //---------------------------------------------------------------------------- void CRelevantWord::DoneWithKey(KEYID keyid,ULONG maxWid,ULONG cWids) { if (0 != cWids) { _SetWidInfo(maxWid,cWids); for (ULONG x = 0; x < _cWidsAdded; x++) { Win4Assert(_pWidItem[x].wid != 0); _pstore->Insert(_pWidItem[x].wid,keyid,_Rank(_pWidItem[x].cOcc)); } _pstore->DoneWithKey(); _cWidsAdded = 0; } else { Win4Assert(0 == _cWidsAdded); } } //DoneWithKey //+------------------------------------------------------------------------- // // Function: _SortULongArray, private // // Synopsis: Sorts an array of unsigned longs from low to high // // Arguments: [pulItems] -- list of IDs to be sorted // [cItems] -- # of root words to be sorted // // Returns: void // // Algorithm: Heapsort, not quick sort. Give up 20% speed to save kernel // stack and prevent against n*n performance on sorted lists. // // History: 14-Mar-94 Dlee Created // //-------------------------------------------------------------------------- #define _CompUL(x,y) ((*(x)) > (*(y)) ? 1 : (*(x)) == (*(y)) ? 0 : -1) #define _SwapUL(x,y) { ULONG _t = *(x); *(x) = *(y); *(y) = _t; } inline static void _AddRootUL(ULONG x,ULONG n,ULONG *p) { ULONG _x = x; ULONG _j = (2 * (_x + 1)) - 1; while (_j < n) { if (((_j + 1) < n) && (_CompUL(p + _j,p + _j + 1) < 0)) _j++; if (_CompUL(p + _x,p + _j) < 0) { _SwapUL(p + _x,p + _j); _x = _j; _j = (2 * (_j + 1)) - 1; } else break; } } //_AddRootUL void _SortULongArray(ULONG *pulItems,ULONG cItems) { if (cItems == 0) return; long z; for (z = (((long) cItems + 1) / 2) - 1; z >= 0; z--) { _AddRootUL(z,cItems,pulItems); } for (z = cItems - 1; z != 0; z--) { _SwapUL(pulItems,pulItems + z); _AddRootUL(0,(ULONG) z,pulItems); } } //_SortULongArray