|
|
#include <mvopsys.h>
#include <mem.h>
#include <orkin.h>
#include <mvsearch.h>
#include "common.h"
#include "index.h"
#ifdef _DEBUG
static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/ #endif
extern FDECODE DecodeTable[]; extern DWORD argdwBits[];
typedef VOID (PASCAL NEAR *ENCODEDWORD) (PNODEINFO, DWORD, int); PRIVATE VOID PASCAL NEAR EmitBitStreamDWord (PNODEINFO, DWORD, int); PRIVATE VOID PASCAL NEAR EmitFixedDWord (PNODEINFO, DWORD, int); PRIVATE VOID PASCAL NEAR EmitBellDWord (PNODEINFO, DWORD, int);
static ENCODEDWORD EncodeTable[] = { EmitBitStreamDWord, EmitFixedDWord, EmitBellDWord, NULL, };
#define EmitDword(p,dw,key) EncodeTable[(key).cschScheme]((p), (dw), (key).ucCenter)
#define FGetDword(a,b,c) (*DecodeTable[b.cschScheme])(a, b, c)
/*************************************************************************
* * INTERNAL PRIVATE FUNCTIONS * * All of them should be declared near * *************************************************************************/ PRIVATE int PASCAL NEAR TraverseLeafNode (_LPIPB, PNODEINFO, DWORD FAR *, DWORD); PRIVATE int PASCAL NEAR DeleteTopicFromData (_LPIPB lpipb, FILEOFFSET dataOffset, DWORD FAR *, DWORD, LPDW pTopicIdArray, DWORD dwArraySize); VOID PRIVATE PASCAL NEAR RemapData (_LPIPB, PNODEINFO, PNODEINFO, DWORD, DWORD); VOID PRIVATE PASCAL NEAR EmitBits (PNODEINFO pNode, DWORD dwVal, BYTE cBits); PRIVATE VOID PASCAL NEAR EmitBool (PNODEINFO pNode, BOOL fVal);
PUBLIC LONG PASCAL FAR CompareDWord (DWORD, DWORD, LPV lpParm);
/*************************************************************************
* @doc API * @func HRESULT FAR PASCAL | MVIndexTopicDelete | * Delete topics from an index * @parm HFPB | hSysFile | * Handle to an opened system file, maybe NULL * @parm _LPIPB | lpipb | * Pointer to index info. This structure is obtained through * IndexInitiate() * @parm SZ | szIndexName | * Name of the index. If hSysFile is NULL, this is a regular DOS file * else it is a subfile of hSysFile * @parm DWORD FAR * | rgTopicId | * Array of topic ids to be deleted from the index * @parm DWORD | dwCount | * Number of elements in the array * @rdesc S_OK, or other errors *************************************************************************/ HRESULT PUBLIC EXPORT_API FAR PASCAL MVIndexTopicDelete (HFPB hSysFile, _LPIPB lpipb, SZ szIndexName, DWORD FAR * rgTopicId, DWORD dwCount) { PNODEINFO pNodeInfo; int fRet; int cLevel; int cMaxLevel; WORD wLen; LPB pCur; if (lpipb == NULL || rgTopicId == NULL || dwCount == 0) return(E_INVALIDARG); // Set the bState
lpipb->bState = DELETING_STATE; // Open the index file
if ((fRet = IndexOpenRW(lpipb, hSysFile, szIndexName)) != S_OK) { exit00: if (lpipb->idxf & IDXF_NORMALIZE) { FreeHandle (lpipb->wi.hSigma); FreeHandle (lpipb->wi.hLog); lpipb->wi.hSigma = lpipb->wi.hLog = NULL; }
return(fRet); } // Allocate buffer
if ((pNodeInfo = AllocBTreeNode (lpipb)) == NULL) { fRet = E_OUTOFMEMORY; exit0: FileClose(lpipb->hfpbIdxFile); FreeBTreeNode (pNodeInfo);
goto exit00; } if ((lpipb->hTmpBuf = _GLOBALALLOC (DLLGMEM_ZEROINIT, lpipb->BTreeData.Header.dwMaxWLen * 2)) == NULL) goto exit0; lpipb->pTmpBuf = (LPB)_GLOBALLOCK (lpipb->hTmpBuf); if (((lpipb->pIndexDataNode = AllocBTreeNode (lpipb))) == NULL) { fRet = E_OUTOFMEMORY; exit1: _GLOBALUNLOCK(lpipb->hTmpBuf); _GLOBALFREE(lpipb->hTmpBuf); lpipb->hTmpBuf = NULL; goto exit0; } pNodeInfo->nodeOffset = lpipb->BTreeData.Header.foIdxRoot; cMaxLevel = lpipb->BTreeData.Header.cIdxLevels - 1; // Sort the incoming array
if ((fRet = HugeDataSort((LPV HUGE*)rgTopicId, dwCount, (FCOMPARE)CompareDWord, NULL, NULL, NULL)) != S_OK) goto exit1; // Move down the tree, based on the first offset of the block
for (cLevel = 0; cLevel < cMaxLevel; cLevel++) { if ((fRet = ReadNewNode(lpipb->hfpbIdxFile, pNodeInfo, FALSE)) != S_OK) { _GLOBALUNLOCK(lpipb->hData); _GLOBALFREE(lpipb->hData); lpipb->hData = NULL; exit2: FreeBTreeNode (lpipb->pIndexDataNode); lpipb->pIndexDataNode = NULL; goto exit1; } pCur = pNodeInfo->pBuffer + sizeof(WORD); // Skip cbLeft
pCur = ExtractWord (lpipb->pTmpBuf, pCur, &wLen); pCur += ReadFileOffset (&pNodeInfo->nodeOffset, pCur); } // Handle leaf node
while (!FoEquals (pNodeInfo->nodeOffset, foNil)) { if ((fRet = ReadNewNode(lpipb->hfpbIdxFile, pNodeInfo, TRUE)) != S_OK) return fRet; if ((fRet = TraverseLeafNode (lpipb, pNodeInfo, rgTopicId, dwCount)) != S_OK) { goto exit2; } ReadFileOffset (&pNodeInfo->nodeOffset, pNodeInfo->pBuffer); } fRet = S_OK; goto exit2; }
PRIVATE int PASCAL NEAR TraverseLeafNode (_LPIPB lpipb, PNODEINFO pNodeInfo, DWORD FAR *rgTopicId, DWORD dwCount) { LPB pCur; LPB pMaxAddress; OCCF occf = lpipb->occf; WORD wLen; FILEOFFSET dataOffset; DWORD dataSize; BYTE TopicCnt[20]; BYTE cbOldCount; BYTE cbNewCount; ERRB errb; BYTE fChange = FALSE; HRESULT fRet; pCur = pNodeInfo->pCurPtr; pMaxAddress = pNodeInfo->pMaxAddress; while (pCur < pMaxAddress) { DWORD dwTemp; DWORD dwTopicCount; DWORD dwOldTopicCount; LPB pSaved; LPB pTemp; pCur = ExtractWord (lpipb->pTmpBuf, pCur, &wLen); // Skip field id, topic count. fileoffset, datasize
if (occf & OCCF_FIELDID) pCur += CbByteUnpack (&dwTemp, pCur); // FieldId
pTemp = pSaved = pCur; // Save the pointer to the topic count offset
cbOldCount = (BYTE)CbByteUnpack (&dwTopicCount, pCur); pCur += cbOldCount; pCur += ReadFileOffset (&dataOffset, pCur); pCur += CbByteUnpack (&dataSize, pCur); if (dwTopicCount == 0) continue; dwOldTopicCount = dwTopicCount; if ((fRet = DeleteTopicFromData (lpipb, dataOffset, &dwTopicCount, dataSize, rgTopicId, dwCount)) != S_OK) return(fRet); if (dwOldTopicCount == dwTopicCount) continue; cbNewCount = (BYTE)CbBytePack (TopicCnt, dwTopicCount);
// Update the topic count
if (cbOldCount > cbNewCount) { TopicCnt[cbNewCount - 1] |= 0x80; // Set the high bit
} MEMCPY(pSaved, TopicCnt, cbNewCount); pSaved += cbNewCount; switch (cbOldCount - cbNewCount) { // Do we need 16 bytes to compress 4-bytes. YES!
// Sometimes. we index/compress based on insufficient data
// If subsequent updates contain value way larger than the
// original data, then we may end up using 16 bytes to compress
// 4 bytes!!
case 16: *pSaved++ = 0x80; // Set the high bit
break; case 15: *pSaved++ = 0x80; // Set the high bit
break; case 14: *pSaved++ = 0x80; // Set the high bit
break; case 13: *pSaved++ = 0x80; // Set the high bit
break; case 12: *pSaved++ = 0x80; // Set the high bit
break; case 11: *pSaved++ = 0x80; // Set the high bit
break; case 10: *pSaved++ = 0x80; // Set the high bit
break; case 9: *pSaved++ = 0x80; // Set the high bit
break; case 7: *pSaved++ = 0x80; // Set the high bit
break; case 6: *pSaved++ = 0x80; // Set the high bit
break; case 5: *pSaved++ = 0x80; // Set the high bit
case 4: *pSaved++ = 0x80; // Set the high bit
case 3: *pSaved++ = 0x80; // Set the high bit
case 2: *pSaved++ = 0x80; // Set the high bit
case 1: *pSaved = 0x00; case 0: break; } #ifdef _DEBUG
CbByteUnpack (&dwOldTopicCount, pTemp); // FieldId
assert (dwOldTopicCount == dwTopicCount); #endif
fChange = TRUE; // The node have been changed
} if (fChange == FALSE) return(S_OK); // Update the node
if ((FileSeekWrite(lpipb->hfpbIdxFile, pNodeInfo->pBuffer, pNodeInfo->nodeOffset, lpipb->BTreeData.Header.dwBlockSize, &errb)) != (LONG)lpipb->BTreeData.Header.dwBlockSize) { return(errb); } return(S_OK); }
PRIVATE int PASCAL NEAR DeleteTopicFromData (_LPIPB lpipb, FILEOFFSET dataOffset, DWORD FAR * pTopicCount, DWORD dataSize, LPDW pTopicIdArray, DWORD dwArraySize) { HRESULT fRet; ERRB errb; DWORD dwOldTopicCount; DWORD dwTopicId; DWORD dwTopicIdDelta; DWORD dwIndex; PNODEINFO pIndexDataNode = lpipb->pIndexDataNode; NODEINFO CopyNode; PNODEINFO pCopyNode = &CopyNode; PIH20 pHeader = &lpipb->BTreeData.Header; OCCF occf = lpipb->occf; LPB pStart; DWORD dwOldTopicId = 0; BYTE fetchOldData; BYTE fChanged; BYTE fNormalize = (lpipb->idxf & IDXF_NORMALIZE); // Make sure that we have enough memory to hold the data
if (dataSize > pIndexDataNode->dwBlockSize) { _GLOBALUNLOCK (pIndexDataNode->hMem); if ((pIndexDataNode->hMem = _GLOBALREALLOC (pIndexDataNode->hMem, pIndexDataNode->dwBlockSize = dataSize, DLLGMEM_ZEROINIT)) == NULL) return(E_OUTOFMEMORY); pIndexDataNode->pBuffer = _GLOBALLOCK (pIndexDataNode->hMem); } // Read in the data
if (FileSeekRead (lpipb->hfpbIdxFile, pIndexDataNode->pCurPtr = pIndexDataNode->pBuffer, dataOffset, dataSize, &errb) != (long)dataSize) return E_BADFILE; pIndexDataNode->pMaxAddress = pIndexDataNode->pBuffer + dataSize; pIndexDataNode->ibit = cbitBYTE - 1; // Copy the prelimary node info
CopyNode = *pIndexDataNode; dwOldTopicCount = *pTopicCount; dwTopicId = dwIndex = 0; fetchOldData = TRUE; fChanged = FALSE; while (dwOldTopicCount > 0) { DWORD dwTmp; if (fetchOldData) { // Byte align
if (pIndexDataNode->ibit != cbitBYTE - 1) { pIndexDataNode->ibit = cbitBYTE - 1; pIndexDataNode->pCurPtr ++; }
// Keep track of the starting position
pStart = pIndexDataNode->pCurPtr; if (fChanged == FALSE) pCopyNode->pCurPtr = pIndexDataNode->pCurPtr; // Get the topicId from the index file
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyTopicId, &dwTopicIdDelta)) != S_OK) return fRet; dwTopicId += dwTopicIdDelta; fetchOldData = FALSE; } if (dwTopicId < pTopicIdArray[dwIndex]) { if (fChanged == FALSE) { if (fNormalize) { if ((fRet = FGetBits(pIndexDataNode, &dwTmp, sizeof (USHORT) * cbitBYTE)) != S_OK) return fRet; }
SkipOldData (lpipb, pIndexDataNode); } else { pIndexDataNode->pCurPtr = pStart; RemapData (lpipb, pCopyNode, pIndexDataNode, dwTopicId, dwOldTopicId); } fetchOldData = TRUE; dwOldTopicId = dwTopicId; dwOldTopicCount --; continue; } if (dwTopicId > pTopicIdArray[dwIndex]) { if (dwIndex < dwArraySize - 1) { dwIndex++; continue; } if (fChanged == FALSE) return(S_OK); pIndexDataNode->pCurPtr = pStart; RemapData (lpipb, pCopyNode, pIndexDataNode, dwTopicId, dwOldTopicId); fetchOldData =TRUE; dwOldTopicId = dwTopicId; dwOldTopicCount --; continue; } // Both TopicId are equal. Ignore the current data
fChanged = TRUE; // We have changes
if (fNormalize) { if ((fRet = FGetBits(pIndexDataNode, &dwTmp, sizeof (USHORT) * cbitBYTE)) != S_OK) return fRet; }
if (occf & OCCF_HAVE_OCCURRENCE) { if ((fRet = SkipOldData (lpipb, pIndexDataNode)) != S_OK) return(fRet); } (*pTopicCount)--; fetchOldData = TRUE; dwOldTopicCount--; } if (fChanged) { MEMSET(pCopyNode->pCurPtr, 0, (size_t) (pCopyNode->pMaxAddress - pCopyNode->pCurPtr));
// Write out the new data
if (FileSeekWrite (lpipb->hfpbIdxFile, pIndexDataNode->pBuffer, dataOffset, dataSize, &errb) != (long)dataSize) return errb; } return(S_OK); }
VOID PRIVATE PASCAL NEAR RemapData (_LPIPB lpipb, PNODEINFO pCopyNode, PNODEINFO pIndexDataNode, DWORD dwTopicId, DWORD dwOldTopicId) { DWORD dwTmp; DWORD dwOccs; PIH20 pHeader = &lpipb->BTreeData.Header; OCCF occf = lpipb->occf; pIndexDataNode->ibit = cbitBYTE - 1;
// Skip TopicIdDelta, since we already have TopicId
FGetDword(pIndexDataNode, pHeader->ckeyTopicId, &dwTmp); EmitDword (pCopyNode, dwTopicId - dwOldTopicId, pHeader->ckeyTopicId); // EmitDword (pCopyNode, dwTopicDelta, pHeader->ckeyTopicId);
if (lpipb->idxf & IDXF_NORMALIZE) { FGetBits(pIndexDataNode, &dwTmp, sizeof (USHORT) * cbitBYTE); EmitBits(pCopyNode, dwTmp, (BYTE)(sizeof (WORD) * cbitBYTE)); }
if ((occf & OCCF_HAVE_OCCURRENCE) == 0) return; // Get the number of occurrences
FGetDword(pIndexDataNode, pHeader->ckeyOccCount, &dwOccs); EmitDword (pCopyNode, dwOccs, pHeader->ckeyOccCount); //
// One pass through here for each occurence in the
// current sub-list.
//
for (; dwOccs; dwOccs--) { //
// Keeping word-counts? If so, get it.
//
if (occf & OCCF_COUNT) { FGetDword(pIndexDataNode, pHeader->ckeyWordCount, &dwTmp); EmitDword(pCopyNode, dwTmp, pHeader->ckeyWordCount); } //
// Keeping byte-offsets? If so, get it.
//
if (occf & OCCF_OFFSET) { FGetDword(pIndexDataNode, pHeader->ckeyOffset, &dwTmp); EmitDword(pCopyNode, dwTmp, pHeader->ckeyOffset); } } if (pCopyNode->ibit != cbitBYTE - 1) { pCopyNode->ibit = cbitBYTE - 1; pCopyNode->pCurPtr ++; } }
PRIVATE VOID PASCAL NEAR EmitBitStreamDWord (PNODEINFO pNode, DWORD dw, int ckeyCenter) { BYTE ucBits;
// Bitstream scheme.
//
// This writes "dw" one-bits followed by a zero-bit.
//
for (; dw;) { if (dw < cbitBYTE * sizeof(DWORD)) { ucBits = (BYTE)dw; dw = 0; } else { ucBits = cbitBYTE * sizeof(DWORD); dw -= cbitBYTE * sizeof(DWORD); } EmitBits(pNode, argdwBits[ucBits], (BYTE)ucBits); } EmitBool(pNode, 0); } PRIVATE VOID PASCAL NEAR EmitFixedDWord (PNODEINFO pNode, DWORD dw, int ckeyCenter) { // This just writes "ckey.ucCenter" bits of data.
EmitBits (pNode, dw, (BYTE)(ckeyCenter + 1)); }
PRIVATE VOID PASCAL NEAR EmitBellDWord (PNODEINFO pNode, DWORD dw, int ckeyCenter) { BYTE ucBits; // The "BELL" scheme is more complicated.
ucBits = (BYTE)CbitBitsDw(dw); if (ucBits <= ckeyCenter) { //
// Encoding a small value. Write a zero, then write
// "ckey.ucCenter" bits of the value, which
// is guaranteed to be enough.
//
EmitBool(pNode, 0); EmitBits(pNode, dw, (BYTE)(ckeyCenter)); return; } //
// Encoding a value that won't fit in "ckey.ucCenter" bits.
// "ucBits" is how many bits it will really take.
//
// First, write out "ucBits - ckey.ucCenter" one-bits.
//
EmitBits(pNode, argdwBits[ucBits - ckeyCenter], (BYTE)(ucBits - ckeyCenter)); //
// Now, write out the value in "ucBits" bits,
// but zero the high-bit first.
//
EmitBits(pNode, dw & argdwBits[ucBits - 1], ucBits); }
/*************************************************************************
* * @doc PRIVATE INDEXING * * @func VOID | EmitBits | * Writes a bunch of bits into the output buffer. * * @parm PNODEINFO | pNode | * Pointer to the output data structure * * @parm DWORD | dwVal | * DWORD value to write * * @parm BYTE | cbits | * Number of bits to write from dwVal *************************************************************************/
PRIVATE VOID PASCAL NEAR EmitBits (PNODEINFO pNode, DWORD dwVal, BYTE cBits) { BYTE cbitThisPassBits; BYTE bThis;
// Loop until no bits left
for (; cBits;) {
if (pNode->ibit < 0) { pNode->pCurPtr++; pNode->ibit = cbitBYTE - 1; } cbitThisPassBits = (pNode->ibit + 1 < cBits) ? pNode->ibit + 1 : cBits; bThis = (pNode->ibit == cbitBYTE - 1) ? 0 : *pNode->pCurPtr; bThis |= ((dwVal >> (cBits - cbitThisPassBits)) << (pNode->ibit - cbitThisPassBits + 1)); *pNode->pCurPtr = (BYTE)bThis; pNode->ibit -= cbitThisPassBits; cBits -= (BYTE)cbitThisPassBits; } }
/*************************************************************************
* * @doc PRIVATE INDEXING * * @func VOID | EmitBool | * Writes a single bit into the output buffer. * * @parm PNODEINFO | pNode | * Pointer to the output data structure * * @parm BOOL | dwVal | * BOOL value to write *************************************************************************/
PRIVATE VOID PASCAL NEAR EmitBool (PNODEINFO pNode, BOOL fVal) {
if (pNode->ibit < 0) { // This byte is full, point to a new byte
pNode->pCurPtr++; pNode->ibit = cbitBYTE - 1; } if (pNode->ibit == cbitBYTE - 1) // Zero out a brand-new byte.
*pNode->pCurPtr = (BYTE)0; if (fVal) // Write my boolean.
*pNode->pCurPtr |= 1 << pNode->ibit; pNode->ibit--; }
PUBLIC LONG PASCAL FAR CompareDWord (DWORD dw1, DWORD dw2, LPV lpParm) { return (dw1 - dw2); }
|