windows-xp/Source/XPSP1/NT/enduser/stuff/itircl/fts/search/delete.c

#include <mvopsys.h>
#include <mem.h>
#include <orkin.h>
#include <mvsearch.h>
#include "common.h"
#include "index.h"

#ifdef _DEBUG
static BYTE NEAR s_aszModule[] = __FILE__;	/* Used by error return functions.*/
#endif


extern FDECODE DecodeTable[];
extern DWORD argdwBits[];

typedef VOID (PASCAL NEAR *ENCODEDWORD) (PNODEINFO, DWORD, int);
PRIVATE VOID PASCAL NEAR EmitBitStreamDWord (PNODEINFO, DWORD, int);
PRIVATE VOID PASCAL NEAR EmitFixedDWord (PNODEINFO, DWORD, int);
PRIVATE VOID PASCAL NEAR EmitBellDWord (PNODEINFO, DWORD, int);

static ENCODEDWORD EncodeTable[] =
{
	EmitBitStreamDWord,
	EmitFixedDWord,
	EmitBellDWord,
	NULL,
};

#define EmitDword(p,dw,key) EncodeTable[(key).cschScheme]((p), (dw), (key).ucCenter)
#define FGetDword(a,b,c) (*DecodeTable[b.cschScheme])(a, b, c)

/*************************************************************************
 *
 *                    INTERNAL PRIVATE FUNCTIONS
 *
 *  All of them should be declared near
 *
 *************************************************************************/
PRIVATE int PASCAL NEAR TraverseLeafNode (_LPIPB, PNODEINFO,
    DWORD FAR *, DWORD);
PRIVATE int PASCAL NEAR DeleteTopicFromData (_LPIPB lpipb,
    FILEOFFSET dataOffset, DWORD FAR *, DWORD,
    LPDW pTopicIdArray, DWORD dwArraySize);
VOID PRIVATE PASCAL NEAR RemapData (_LPIPB, PNODEINFO, PNODEINFO,
    DWORD, DWORD);
VOID PRIVATE PASCAL NEAR EmitBits (PNODEINFO pNode, DWORD dwVal, BYTE cBits);
PRIVATE VOID PASCAL NEAR EmitBool (PNODEINFO pNode, BOOL fVal);

PUBLIC LONG PASCAL FAR CompareDWord (DWORD, DWORD, LPV lpParm);

/*************************************************************************
 *	@doc	API
 *  @func   HRESULT FAR PASCAL | MVIndexTopicDelete |
 *      Delete topics from an index
 *  @parm   HFPB | hSysFile |
 *      Handle to an opened system file, maybe NULL
 *  @parm   _LPIPB | lpipb |
 *      Pointer to index info. This structure is obtained through
 *      IndexInitiate()
 *  @parm   SZ | szIndexName |
 *      Name of the index. If hSysFile is NULL, this is a regular DOS file
 *      else it is a subfile of hSysFile
 *  @parm   DWORD FAR * | rgTopicId |
 *      Array of topic ids to be deleted from the index
 *  @parm   DWORD | dwCount |
 *      Number of elements in the array
 *  @rdesc  S_OK, or other errors
 *************************************************************************/
HRESULT PUBLIC EXPORT_API FAR PASCAL MVIndexTopicDelete (HFPB hSysFile,
    _LPIPB lpipb, SZ szIndexName, DWORD FAR * rgTopicId, DWORD dwCount)
{
    PNODEINFO pNodeInfo;
    int fRet;
    int cLevel;
    int cMaxLevel;
    WORD wLen;
    LPB pCur;
    
    if (lpipb == NULL || rgTopicId == NULL || dwCount == 0)
        return(E_INVALIDARG);
        
    // Set the bState
    lpipb->bState = DELETING_STATE;
        
    // Open the index file
    if ((fRet = IndexOpenRW(lpipb, hSysFile, szIndexName)) != S_OK)
    {
exit00:
        if (lpipb->idxf & IDXF_NORMALIZE)
        {
            FreeHandle (lpipb->wi.hSigma);
            FreeHandle (lpipb->wi.hLog);
            lpipb->wi.hSigma = lpipb->wi.hLog = NULL;
        }

        return(fRet);
    }
    
    // Allocate buffer
    if ((pNodeInfo = AllocBTreeNode (lpipb)) == NULL)
    {
        fRet = E_OUTOFMEMORY;
exit0:
        FileClose(lpipb->hfpbIdxFile);
        FreeBTreeNode (pNodeInfo);

        goto exit00;
    }
        
    if ((lpipb->hTmpBuf = _GLOBALALLOC (DLLGMEM_ZEROINIT,
        lpipb->BTreeData.Header.dwMaxWLen * 2)) == NULL)
        goto exit0;
    
    lpipb->pTmpBuf = (LPB)_GLOBALLOCK (lpipb->hTmpBuf);
    
    if (((lpipb->pIndexDataNode = 
        AllocBTreeNode (lpipb))) == NULL)    
    {
        fRet = E_OUTOFMEMORY;
exit1:
        _GLOBALUNLOCK(lpipb->hTmpBuf);
        _GLOBALFREE(lpipb->hTmpBuf);
        lpipb->hTmpBuf = NULL;
        goto exit0;
    }
        
    pNodeInfo->nodeOffset = lpipb->BTreeData.Header.foIdxRoot;
    cMaxLevel = lpipb->BTreeData.Header.cIdxLevels - 1;
    
    // Sort the incoming array
    if ((fRet = HugeDataSort((LPV HUGE*)rgTopicId, dwCount,
    	(FCOMPARE)CompareDWord, NULL, NULL, NULL)) != S_OK)
        goto exit1;
        
    // Move down the tree, based on the first offset of the block
    for (cLevel = 0; cLevel < cMaxLevel; cLevel++)
    {
        if ((fRet = ReadNewNode(lpipb->hfpbIdxFile, pNodeInfo,
            FALSE)) != S_OK)
        {
            _GLOBALUNLOCK(lpipb->hData);
            _GLOBALFREE(lpipb->hData);
            lpipb->hData = NULL;
exit2:
            FreeBTreeNode (lpipb->pIndexDataNode);
            lpipb->pIndexDataNode = NULL;
            goto exit1;
        }
        pCur = pNodeInfo->pBuffer + sizeof(WORD); // Skip cbLeft
        pCur = ExtractWord (lpipb->pTmpBuf, pCur, &wLen);
        pCur += ReadFileOffset (&pNodeInfo->nodeOffset, pCur);
    }
    
    // Handle leaf node
    while (!FoEquals (pNodeInfo->nodeOffset, foNil))
    {
        if ((fRet = ReadNewNode(lpipb->hfpbIdxFile, pNodeInfo,
            TRUE)) != S_OK)
			return fRet;
        if ((fRet = TraverseLeafNode (lpipb, pNodeInfo, rgTopicId, dwCount)) !=
            S_OK)
        {
            goto exit2;
        }
                
        ReadFileOffset (&pNodeInfo->nodeOffset, pNodeInfo->pBuffer);
    }
    fRet = S_OK;
    goto exit2;   
}

PRIVATE int PASCAL NEAR TraverseLeafNode (_LPIPB lpipb, PNODEINFO pNodeInfo,
    DWORD FAR *rgTopicId, DWORD dwCount)
{
    LPB pCur;
    LPB pMaxAddress;
    OCCF occf = lpipb->occf;
    WORD wLen;
    FILEOFFSET dataOffset;
    DWORD dataSize;
    BYTE  TopicCnt[20];
    BYTE  cbOldCount;
    BYTE  cbNewCount;
    ERRB  errb;
    BYTE  fChange = FALSE;
    HRESULT   fRet;
    
    pCur = pNodeInfo->pCurPtr;
    pMaxAddress = pNodeInfo->pMaxAddress;
    
    while (pCur < pMaxAddress)
    {
        DWORD dwTemp;
        DWORD dwTopicCount;
        DWORD dwOldTopicCount;
        LPB   pSaved;
        LPB   pTemp;
        
        pCur = ExtractWord (lpipb->pTmpBuf, pCur, &wLen);
        
        // Skip field id, topic count. fileoffset, datasize
        if (occf & OCCF_FIELDID)
            pCur += CbByteUnpack (&dwTemp, pCur); // FieldId
            
        pTemp = pSaved = pCur;  // Save the pointer to the topic count offset
        cbOldCount = (BYTE)CbByteUnpack (&dwTopicCount, pCur);
        pCur += cbOldCount;
        pCur += ReadFileOffset (&dataOffset, pCur);
        pCur += CbByteUnpack (&dataSize, pCur);
        
        if (dwTopicCount == 0)
            continue;
            
        dwOldTopicCount = dwTopicCount;
        if ((fRet = DeleteTopicFromData (lpipb, dataOffset, &dwTopicCount,
            dataSize, rgTopicId, dwCount)) != S_OK)
            return(fRet);
        
        if (dwOldTopicCount == dwTopicCount)
            continue;
        
        cbNewCount = (BYTE)CbBytePack (TopicCnt, dwTopicCount);

        // Update the topic count
        if (cbOldCount > cbNewCount)
		{
			TopicCnt[cbNewCount - 1] |= 0x80;	// Set the high bit
		}
        MEMCPY(pSaved, TopicCnt, cbNewCount);
		pSaved += cbNewCount;
        switch (cbOldCount - cbNewCount)
        {
            // Do we need 16 bytes to compress 4-bytes. YES!
            // Sometimes. we index/compress based on insufficient data
            // If subsequent updates contain value way larger than the
            // original data, then we may end up using 16 bytes to compress
            // 4 bytes!!
            case 16:
                *pSaved++ = 0x80; // Set the high bit
                break;
            case 15:
                *pSaved++ = 0x80; // Set the high bit
                break;
            case 14:
                *pSaved++ = 0x80; // Set the high bit
                break;
            case 13:
                *pSaved++ = 0x80; // Set the high bit
                break;
            case 12:
                *pSaved++ = 0x80; // Set the high bit
                break;
            case 11:
                *pSaved++ = 0x80; // Set the high bit
                break;
            case 10:
                *pSaved++ = 0x80; // Set the high bit
                break;
            case 9:
                *pSaved++ = 0x80; // Set the high bit
                break;
            case 7:
                *pSaved++ = 0x80; // Set the high bit
                break;
            case 6:
                *pSaved++ = 0x80; // Set the high bit
                break;
            case 5:
                *pSaved++ = 0x80; // Set the high bit
            case 4:
                *pSaved++ = 0x80; // Set the high bit
            case 3:
                *pSaved++ = 0x80; // Set the high bit
            case 2:
                *pSaved++ = 0x80; // Set the high bit
            case 1:
                *pSaved = 0x00; 
            case 0:
                break;
        }
#ifdef _DEBUG
        CbByteUnpack (&dwOldTopicCount, pTemp); // FieldId
        assert (dwOldTopicCount == dwTopicCount);
#endif
        
        fChange = TRUE; // The node have been changed
        
    }
    
    if (fChange == FALSE)
        return(S_OK);
        
    // Update the node
    if ((FileSeekWrite(lpipb->hfpbIdxFile,
        pNodeInfo->pBuffer, pNodeInfo->nodeOffset,
        lpipb->BTreeData.Header.dwBlockSize, &errb)) !=
        (LONG)lpipb->BTreeData.Header.dwBlockSize)
    {
        return(errb);
    }
    return(S_OK);
}

PRIVATE int PASCAL NEAR DeleteTopicFromData (_LPIPB lpipb,
    FILEOFFSET dataOffset, DWORD FAR * pTopicCount, DWORD dataSize,
    LPDW pTopicIdArray, DWORD dwArraySize)
{
    HRESULT fRet;
    ERRB errb;
    DWORD dwOldTopicCount;
    DWORD dwTopicId;
    DWORD dwTopicIdDelta;
    DWORD dwIndex;
    PNODEINFO pIndexDataNode = lpipb->pIndexDataNode;
    NODEINFO  CopyNode;
    PNODEINFO pCopyNode = &CopyNode;
    PIH20 pHeader = &lpipb->BTreeData.Header;
    OCCF occf = lpipb->occf;
    LPB  pStart;
    DWORD dwOldTopicId = 0;
    BYTE fetchOldData;
    BYTE fChanged;
    BYTE fNormalize = (lpipb->idxf & IDXF_NORMALIZE);
    
    // Make sure that we have enough memory to hold the data
    if (dataSize > pIndexDataNode->dwBlockSize)
    {
        _GLOBALUNLOCK (pIndexDataNode->hMem);
        if ((pIndexDataNode->hMem = _GLOBALREALLOC (pIndexDataNode->hMem,
            pIndexDataNode->dwBlockSize = dataSize, DLLGMEM_ZEROINIT)) == NULL)
            return(E_OUTOFMEMORY);
        
        pIndexDataNode->pBuffer = _GLOBALLOCK (pIndexDataNode->hMem);
    }
    
    // Read in the data
    if (FileSeekRead (lpipb->hfpbIdxFile, pIndexDataNode->pCurPtr =
        pIndexDataNode->pBuffer, dataOffset,
        dataSize, &errb) != (long)dataSize)
        return E_BADFILE;
        
    pIndexDataNode->pMaxAddress = pIndexDataNode->pBuffer + dataSize;    
	pIndexDataNode->ibit = cbitBYTE - 1;
    
    // Copy the prelimary node info
    CopyNode = *pIndexDataNode;
    
    dwOldTopicCount = *pTopicCount;
    dwTopicId = dwIndex = 0;
    fetchOldData = TRUE;
    fChanged = FALSE;
    
    while (dwOldTopicCount > 0)
    {
        DWORD dwTmp;
        
        if (fetchOldData)
        {
            // Byte align
            if (pIndexDataNode->ibit != cbitBYTE - 1)
            {
                pIndexDataNode->ibit = cbitBYTE - 1;
                pIndexDataNode->pCurPtr ++;
            }

            // Keep track of the starting position            
			pStart = pIndexDataNode->pCurPtr;
            if (fChanged == FALSE)
    			pCopyNode->pCurPtr = pIndexDataNode->pCurPtr;
            
            // Get the topicId from the index file
            if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyTopicId,
                &dwTopicIdDelta)) != S_OK)
                return fRet;
            dwTopicId += dwTopicIdDelta;
            fetchOldData = FALSE;
        }
        
        if (dwTopicId < pTopicIdArray[dwIndex])
        {
            if (fChanged == FALSE)
            {
    			if (fNormalize)
    			{
    				if ((fRet = FGetBits(pIndexDataNode, &dwTmp,
    					sizeof (USHORT) * cbitBYTE)) != S_OK)
    					return fRet;
    			}

                SkipOldData (lpipb, pIndexDataNode);
            }
            else
            {
                pIndexDataNode->pCurPtr = pStart;
                RemapData (lpipb, pCopyNode, pIndexDataNode,
                    dwTopicId, dwOldTopicId);
            }
            fetchOldData = TRUE;    
            dwOldTopicId = dwTopicId;
            dwOldTopicCount --;
            continue;
        }
        
        if (dwTopicId > pTopicIdArray[dwIndex])
        {
            if (dwIndex < dwArraySize - 1)
            {
                dwIndex++;
                continue;
            }
            
            if (fChanged == FALSE)
                return(S_OK);
                
            pIndexDataNode->pCurPtr = pStart;
            RemapData (lpipb, pCopyNode, pIndexDataNode,
                dwTopicId, dwOldTopicId);
            fetchOldData =TRUE;
            dwOldTopicId = dwTopicId;
            dwOldTopicCount --;
            continue;
        }
        
        // Both TopicId are equal. Ignore the current data
		fChanged = TRUE;	// We have changes
        if (fNormalize)
        {
            if ((fRet = FGetBits(pIndexDataNode, &dwTmp,
                sizeof (USHORT) * cbitBYTE)) != S_OK)
                return fRet;
        }

        if (occf & OCCF_HAVE_OCCURRENCE) 
        {
            if ((fRet = SkipOldData (lpipb, pIndexDataNode)) != S_OK)
                return(fRet);
        }
        
        (*pTopicCount)--;
        fetchOldData = TRUE;
		dwOldTopicCount--;
    }
    
    if (fChanged)
    {
        MEMSET(pCopyNode->pCurPtr, 0,
            (size_t) (pCopyNode->pMaxAddress - pCopyNode->pCurPtr));

        // Write out the new data
        if (FileSeekWrite (lpipb->hfpbIdxFile, pIndexDataNode->pBuffer, dataOffset,
            dataSize, &errb) != (long)dataSize)
            return errb;
    }
    return(S_OK);
}    

VOID PRIVATE PASCAL NEAR RemapData (_LPIPB lpipb, PNODEINFO pCopyNode,
    PNODEINFO pIndexDataNode, DWORD dwTopicId, DWORD dwOldTopicId)
{
    DWORD dwTmp;
    DWORD dwOccs;
    PIH20 pHeader = &lpipb->BTreeData.Header;
    OCCF  occf = lpipb->occf;
    
	 pIndexDataNode->ibit = cbitBYTE - 1;

    // Skip TopicIdDelta, since we already have TopicId
    FGetDword(pIndexDataNode, pHeader->ckeyTopicId, &dwTmp);
    
    EmitDword (pCopyNode, dwTopicId - dwOldTopicId, pHeader->ckeyTopicId);
    
    // EmitDword (pCopyNode, dwTopicDelta, pHeader->ckeyTopicId);
    if (lpipb->idxf & IDXF_NORMALIZE)
    {
        FGetBits(pIndexDataNode, &dwTmp, sizeof (USHORT) * cbitBYTE);
        EmitBits(pCopyNode, dwTmp, (BYTE)(sizeof (WORD) * cbitBYTE));
    }

    if ((occf & OCCF_HAVE_OCCURRENCE) == 0)
        return;
        
    // Get the number of occurrences
    FGetDword(pIndexDataNode, pHeader->ckeyOccCount, &dwOccs);
    EmitDword (pCopyNode, dwOccs, pHeader->ckeyOccCount);
    
    //
    //  One pass through here for each occurence in the
    //  current sub-list.
    //
    for (; dwOccs; dwOccs--)
    {
        //
        //  Keeping word-counts?  If so, get it.
        //
        if (occf & OCCF_COUNT) 
        {
            FGetDword(pIndexDataNode, pHeader->ckeyWordCount, &dwTmp);
            EmitDword(pCopyNode, dwTmp, pHeader->ckeyWordCount);
        }
        //
        //  Keeping byte-offsets?  If so, get it.
        //
        if (occf & OCCF_OFFSET) 
        {
            FGetDword(pIndexDataNode, pHeader->ckeyOffset, &dwTmp);
            EmitDword(pCopyNode, dwTmp, pHeader->ckeyOffset);
        }
    }
    if (pCopyNode->ibit != cbitBYTE - 1)
    {
        pCopyNode->ibit = cbitBYTE - 1;
        pCopyNode->pCurPtr ++;
    }
}    

PRIVATE VOID PASCAL NEAR EmitBitStreamDWord (PNODEINFO pNode, DWORD dw,
    int ckeyCenter)
{
    BYTE    ucBits;

    // Bitstream scheme.
    //
    //      This writes "dw" one-bits followed by a zero-bit.
    //
    for (; dw;)
    {
        if (dw < cbitBYTE * sizeof(DWORD))
        {
            ucBits = (BYTE)dw;
            dw = 0;
        }
        else
        {
            ucBits = cbitBYTE * sizeof(DWORD);
            dw -= cbitBYTE * sizeof(DWORD);
        }
        EmitBits(pNode, argdwBits[ucBits], (BYTE)ucBits);
    }
    EmitBool(pNode, 0);
}
    
PRIVATE VOID PASCAL NEAR EmitFixedDWord (PNODEINFO pNode, DWORD dw,
    int ckeyCenter)
{
	// This just writes "ckey.ucCenter" bits of data.
    EmitBits (pNode, dw, (BYTE)(ckeyCenter + 1));
}

PRIVATE VOID PASCAL NEAR EmitBellDWord (PNODEINFO pNode, DWORD dw,
    int ckeyCenter)
{
    BYTE ucBits;
    
    // The "BELL" scheme is more complicated.
    ucBits = (BYTE)CbitBitsDw(dw);
    
    if (ucBits <= ckeyCenter) 
    {
        //  
        //  Encoding a small value.  Write a zero, then write 
        // "ckey.ucCenter" bits of the value, which
        //  is guaranteed to be enough.
        //
        EmitBool(pNode, 0);
        EmitBits(pNode, dw, (BYTE)(ckeyCenter));
		return;
    }
    
    //
    //  Encoding a value that won't fit in "ckey.ucCenter" bits.
    //  "ucBits" is how many bits it will really take.
    //
    //  First, write out "ucBits - ckey.ucCenter"  one-bits.
    //
    EmitBits(pNode, argdwBits[ucBits - ckeyCenter],
        (BYTE)(ucBits - ckeyCenter));
    //
    // Now, write out the value in "ucBits" bits,
    // but zero the high-bit first.
    //
    EmitBits(pNode, dw & argdwBits[ucBits - 1], ucBits);
}


/*************************************************************************
 *
 * @doc  PRIVATE INDEXING
 *       
 * @func VOID | EmitBits |
 *    Writes a bunch of bits into the output buffer.
 *
 * @parm PNODEINFO | pNode |
 *    Pointer to the output data structure
 *
 * @parm DWORD | dwVal |
 *    DWORD value to write
 *
 * @parm BYTE | cbits |
 *    Number of bits to write from dwVal
 *************************************************************************/

PRIVATE VOID PASCAL NEAR EmitBits (PNODEINFO pNode, DWORD dwVal, BYTE cBits)
{
    BYTE    cbitThisPassBits;
    BYTE    bThis;

    // Loop until no bits left
    for (; cBits;) 
    {

        if (pNode->ibit < 0) 
        {
            pNode->pCurPtr++;
            pNode->ibit = cbitBYTE - 1;
        }
        cbitThisPassBits = (pNode->ibit + 1 < cBits) ?
            pNode->ibit + 1 : cBits;
        bThis = (pNode->ibit == cbitBYTE - 1) ?
            0 : *pNode->pCurPtr;
        bThis |= ((dwVal >> (cBits - cbitThisPassBits)) <<
            (pNode->ibit - cbitThisPassBits + 1));
        *pNode->pCurPtr = (BYTE)bThis;
        pNode->ibit -= cbitThisPassBits;
        cBits -= (BYTE)cbitThisPassBits;
    }
}


/*************************************************************************
 *
 * @doc  PRIVATE INDEXING
 *       
 * @func VOID | EmitBool |
 *    Writes a single bit into the output buffer.
 *
 * @parm PNODEINFO | pNode |
 *    Pointer to the output data structure
 *
 * @parm BOOL | dwVal |
 *    BOOL value to write
 *************************************************************************/

PRIVATE VOID PASCAL NEAR EmitBool (PNODEINFO pNode, BOOL fVal)
{

    if (pNode->ibit < 0) 
    {   // This byte is full, point to a new byte
        pNode->pCurPtr++;
        pNode->ibit = cbitBYTE - 1;
    }
    if (pNode->ibit == cbitBYTE - 1) // Zero out a brand-new byte.
        *pNode->pCurPtr = (BYTE)0;
    if (fVal)                               // Write my boolean.
        *pNode->pCurPtr |= 1 << pNode->ibit;
    pNode->ibit--;
}


PUBLIC LONG PASCAL FAR CompareDWord (DWORD dw1, DWORD dw2, LPV lpParm)
{
   return (dw1 - dw2);
}