mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2631 lines
88 KiB
2631 lines
88 KiB
/*************************************************************************
|
|
* *
|
|
* UPDATE.C *
|
|
* *
|
|
* Copyright (C) Microsoft Corporation 1990-1994 *
|
|
* All Rights reserved. *
|
|
* *
|
|
**************************************************************************
|
|
* *
|
|
* Module Intent *
|
|
* *
|
|
**************************************************************************
|
|
* *
|
|
* Current Owner: BinhN *
|
|
* *
|
|
**************************************************************************/
|
|
|
|
#include <mvopsys.h>
|
|
#include <math.h>
|
|
#include <mem.h>
|
|
#include <orkin.h>
|
|
#include <mvsearch.h>
|
|
#include "common.h"
|
|
#include "index.h"
|
|
|
|
#ifdef _DEBUG
|
|
static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/
|
|
#endif
|
|
|
|
|
|
#define SAFE_SLACK 48 // Extra safety bytes
|
|
#define ESOUTPUT_BUFFER 0xFFFC // Size of output file buffer
|
|
// This must be at the size of the largest word + 12
|
|
// or word + 14 if OCCF_LENGTH is set
|
|
#define ESINPUT_BUFFER 0x7FFC // Size of input file buffers.
|
|
// Each ESB block get its own input buffer
|
|
// Min Size: Size of index word + ~8 bytes
|
|
|
|
#define NEW_NODE_ON_LEFT 0
|
|
#define NEW_NODE_ON_RIGHT 1
|
|
|
|
extern FENCODE EncodeTable[];
|
|
extern FDECODE DecodeTable[];
|
|
|
|
#define FAddDword(p,dw,key) EncodeTable[(key).cschScheme]((p), (dw), (key).ucCenter)
|
|
#define FGetDword(a,b,c) (*DecodeTable[b.cschScheme])(a, b, c)
|
|
|
|
typedef struct WORDINFO
|
|
{
|
|
DWORD dwWordLen;
|
|
DWORD dwFieldId;
|
|
DWORD dwNewTopicCount;
|
|
DWORD dwIndexTopicCount;
|
|
DWORD dwMergeTopicCount;
|
|
DWORD dwOldTopicId;
|
|
DWORD dwNewTopicId;
|
|
DWORD dwIndexTopicId;
|
|
DWORD dwDataSize;
|
|
FILEOFFSET dataLocation;
|
|
WORD fFlag;
|
|
WORD pad;
|
|
} WORDINFO, FAR *PWORDINFO;
|
|
|
|
typedef struct FREEBLOCK
|
|
{
|
|
DWORD dwBlockSize;
|
|
FILEOFFSET foBlockOffset;
|
|
}FREEBLOCK, FAR *PFREEBLOCK;
|
|
|
|
BYTE EmptyWord[4] = { 0 };
|
|
|
|
#ifdef _DEBUG
|
|
DWORD dwOldDataLoss = 0;
|
|
DWORD dwNewDataSize = 0;
|
|
DWORD dwOldDataNeed = 0;
|
|
DWORD dwNewNodeSize = 0;
|
|
#endif
|
|
|
|
// Flag to denote that the current entry is to be replaced by the new entry
|
|
// This happens when:
|
|
// - A repeated entry in the leaf node
|
|
// - The last entry in the stem node that has to be changed to the last
|
|
// word of the leaf node
|
|
|
|
#define REPLACE_WORD_01 0x0001
|
|
|
|
// Flag to denote that the last word buffer actually contains the word
|
|
// before last. This is needed when we have to replace the last word
|
|
// with the new word. In this case we need the word before last to do
|
|
// compression
|
|
|
|
#define ONE_WORD_BEHIND_02 0x0002
|
|
|
|
// Flag to denote updating the offset field with the temp node offset
|
|
|
|
#define USE_TEMP_NODE_04 0x0004
|
|
|
|
// Flag to denote that only the node offset address is to be updated. Since
|
|
// this is a fixed record size, this will speed up the update.
|
|
|
|
#define UPDATE_NODE_ADDRESS_08 0x0008
|
|
|
|
// rgpTmpNodeInfo is the new right node if set, else it is the left node
|
|
|
|
#define USE_TEMP_FOR_RIGHT_NODE_10 0x0010
|
|
|
|
// Flag to denote that we have to skip the next word before inserting a new
|
|
// word. This happen when adding a new word to the end of the block, where
|
|
// pCurPtr is pointing to the beginning of the last word
|
|
|
|
#define SKIP_NEXT_WORD_20 0x0020
|
|
|
|
// Both nodes, rgpNodeInfo and rgpTmpNodeInfo are used as left and right
|
|
// children. This happens when a new top node is created
|
|
|
|
#define USE_BOTH_NODE_40 0x0040
|
|
|
|
/*************************************************************************
|
|
*
|
|
* INTERNAL PRIVATE FUNCTIONS
|
|
*
|
|
* All of them should be declared near
|
|
*
|
|
*************************************************************************/
|
|
PRIVATE HRESULT NEAR PASCAL ESFlushBuffer (LPESI);
|
|
PRIVATE HRESULT NEAR PASCAL ESFillBuffer (_LPIPB, LPESB);
|
|
PRIVATE void NEAR PASCAL ESMemory2Disk (_LPIPB, PMERGEHEADER);
|
|
PRIVATE HRESULT NEAR PASCAL ProcessFiles (_LPIPB lpipb, LPMERGEPARAMS);
|
|
PRIVATE int NEAR PASCAL CompareRecordBuffers (_LPIPB, LPB, LPB);
|
|
PRIVATE VOID NEAR PASCAL PQueueUp (_LPIPB, LPESB FAR *, LONG);
|
|
PRIVATE VOID NEAR PASCAL PQueueDown (_LPIPB);
|
|
PRIVATE PTOPICDATA PASCAL NEAR MergeTopicNode (PMERGEHEADER, PTOPICDATA, int);
|
|
PRIVATE VOID NEAR MergeOccurrence (PTOPICDATA, PTOPICDATA, int);
|
|
PRIVATE HRESULT NEAR PASCAL UpdateIndexBTree (_LPIPB, HFPB, LPB, LPB);
|
|
VOID SetQueue (LPESI pEsi);
|
|
PRIVATE HRESULT NEAR PASCAL AddWordToBTree (_LPIPB, LPB, PWORDINFO);
|
|
PRIVATE HRESULT PASCAL NEAR NewDataInsert(LPIPB lpipb, PFILEDATA pInfile,
|
|
PNODEINFO FAR *rgpNodeInfo, LPB pWord, PWORDINFO pWordInfo);
|
|
PRIVATE HRESULT PASCAL NEAR CreateNewNode(_LPIPB lpipb, int cLevel,
|
|
int fIsStemNode, int fAfter);
|
|
PRIVATE PASCAL NEAR AddRecordToBTree (_LPIPB lpipb, LPB pWord,
|
|
PWORDINFO pWordInfo, int cLevel, int fReplaceWord);
|
|
PRIVATE HRESULT PASCAL NEAR WriteNewDataRecord (_LPIPB, PWORDINFO);
|
|
PRIVATE HRESULT GetFreeBlock (_LPIPB, PFREEBLOCK, DWORD);
|
|
PRIVATE HRESULT PASCAL NEAR CopyBlockFile (PFILEDATA, HFPB, FILEOFFSET, DWORD);
|
|
PRIVATE HRESULT PASCAL FAR EmitOldData (_LPIPB, PNODEINFO, PWORDINFO);
|
|
PRIVATE HRESULT PASCAL FAR EmitNewData (_LPIPB, PWORDINFO, BOOL);
|
|
PRIVATE HRESULT PASCAL NEAR UpdateDataNode (_LPIPB lpipb, PWORDINFO pWordInfo);
|
|
PRIVATE int PASCAL NEAR SplitNodeAndAddData (_LPIPB lpipb, LPB pWord,
|
|
PWORDINFO pWordInfo, int cLevel, int fFlag, int fIsStemNode);
|
|
PRIVATE int PASCAL NEAR CopyNewDataToStemNode (_LPIPB lpipb,
|
|
PNODEINFO pTmpNode, LPB pWord, LPB pLastWord, int cLevel, int fFlag);
|
|
PRIVATE int PASCAL NEAR CopyNewDataToLeafNode (_LPIPB lpipb, PNODEINFO pTmpNode,
|
|
PWORDINFO pWordInfo, LPB pWord, LPB pLastWord);
|
|
VOID GetLastWordInNode (_LPIPB lpipb, PNODEINFO pNodeinfo, BOOL flag);
|
|
PRIVATE HRESULT PASCAL FAR SkipNewData (_LPIPB lpipb, PWORDINFO pWordInfo);
|
|
HRESULT CheckLeafNode (PNODEINFO pNodeInfo, int occf);
|
|
HRESULT CheckStemNode (PNODEINFO pNodeInfo);
|
|
|
|
|
|
/*************************************************************************
|
|
*
|
|
* INTERNAL PUBLIC FUNCTIONS
|
|
*
|
|
* All of them should be declared far, unless we know they belong to
|
|
* the same segment. They should be included in some include file
|
|
*
|
|
*************************************************************************/
|
|
HRESULT FAR PASCAL FlushTree(_LPIPB lpipb);
|
|
PUBLIC HRESULT FAR PASCAL MergeSortTreeFile (_LPIPB, LPMERGEPARAMS);
|
|
PUBLIC HRESULT FAR PASCAL FillInputBuffer (LPESB, HFPB);
|
|
PUBLIC VOID PASCAL FAR FreeBTreeNode (PNODEINFO pNode);
|
|
PUBLIC PNODEINFO PASCAL FAR AllocBTreeNode (_LPIPB lpipb);
|
|
PUBLIC PASCAL FAR PrefixCompressWord (LPB, LPB, LPB, int);
|
|
PUBLIC DWORD PASCAL FAR WriteDataNode (_LPIPB, DWORD, PHRESULT);
|
|
PUBLIC HRESULT PASCAL FAR IndexOpenRW (LPIPB, HFPB, LSZ);
|
|
PUBLIC HRESULT PASCAL FAR SkipOldData (_LPIPB, PNODEINFO);
|
|
PUBLIC LONG PASCAL FAR CompareDWord (DWORD, DWORD, LPV lpParm);
|
|
|
|
#ifdef _DEBUG
|
|
static LONG Count = 0;
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
*
|
|
* @doc EXTERNAL API INDEX
|
|
*
|
|
* @func HRESULT FAR PASCAL | MVIndexUpdate |
|
|
* This function will update an index file based on the information
|
|
* collected in the Index parameter block.
|
|
*
|
|
* @parm HFPB | hSysFile |
|
|
* System file handle.
|
|
* If it is 0, this function will open the system file
|
|
* specified in lszFilename, and then close it after finishing the
|
|
* index update. If the system file does not exist, then this function
|
|
* will create it.
|
|
* If it is non-zero, then the system file is already opened. Only the
|
|
* index sub-file needs to be created
|
|
*
|
|
* @parm LSZ | lszFilename |
|
|
* Index filename.
|
|
* If hSysFile is non-zero, the format is: !index_filename
|
|
* if hSysFile is zero, the format is: dos_filename[!index_filename]
|
|
* If !index_filename is not specified, the default name will be used
|
|
* if hSysFile == 0 and there is no '!', this is a regular DOS file
|
|
*
|
|
* @parm LPIPB | lpipb |
|
|
* Pointer to Index Parameter Block. This structure contains all the
|
|
* information necessary to update the index file
|
|
* *
|
|
* @rdesc S_OK if succeeded, or other errors
|
|
*
|
|
*************************************************************************/
|
|
PUBLIC HRESULT EXPORT_API FAR PASCAL MVIndexUpdate (HFPB hSysFile,
|
|
_LPIPB lpipb, LSZ lszFilename)
|
|
{
|
|
return MVIndexUpdateEx(hSysFile, lpipb, lszFilename, NULL, 0);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
*
|
|
* @doc EXTERNAL API INDEX
|
|
*
|
|
* @func HRESULT FAR PASCAL | MVIndexUpdateEx |
|
|
* This function will update an index file based on the information
|
|
* collected in the Index parameter block, and also will "pre-delete" the
|
|
* topics in the given list from the LPIPB before updating. This function is useful
|
|
* in scenarios where new topics are continuously added into the index
|
|
* before knowledge of out-dated topics is available (e.g. netnews).
|
|
* This allows a single-pass update once the deletes are known.
|
|
*
|
|
* @parm HFPB | hSysFile |
|
|
* System file handle.
|
|
* If it is 0, this function will open the system file
|
|
* specified in lszFilename, and then close it after finishing the
|
|
* index update. If the system file does not exist, then this function
|
|
* will create it.
|
|
* If it is non-zero, then the system file is already opened. Only the
|
|
* index sub-file needs to be created
|
|
*
|
|
* @parm LSZ | lszFilename |
|
|
* Index filename.
|
|
* If hSysFile is non-zero, the format is: !index_filename
|
|
* if hSysFile is zero, the format is: dos_filename[!index_filename]
|
|
* If !index_filename is not specified, the default name will be used
|
|
* if hSysFile == 0 and there is no '!', this is a regular DOS file
|
|
*
|
|
* @parm LPIPB | lpipb |
|
|
* Pointer to Index Parameter Block. This structure contains all the
|
|
* information necessary to update the index file
|
|
*
|
|
* @parm LPDW | lpdwTopicList |
|
|
* Pointer to DWORD array of topic UIDs to be deleted
|
|
*
|
|
* @parm DWORD | dwCount |
|
|
* The number of topics in the array
|
|
*
|
|
* @rdesc S_OK if succeeded, or other errors
|
|
*
|
|
*************************************************************************/
|
|
PUBLIC HRESULT EXPORT_API FAR PASCAL MVIndexUpdateEx (HFPB hSysFile,
|
|
_LPIPB lpipb, LSZ lszFilename, DWORD FAR *rgTopicId, DWORD dwCount)
|
|
{
|
|
ERRB errb;
|
|
PHRESULT phr = &errb;
|
|
PFILEDATA pOutFile;
|
|
MERGEPARAMS mp;
|
|
HRESULT fRet; // Return value from this function.
|
|
|
|
// Flush the internal sort
|
|
// Flushes any records in the tree to disk
|
|
fRet = FlushTree(lpipb);
|
|
|
|
// Free all memory blocks
|
|
FreeISI (lpipb);
|
|
|
|
if (fRet != S_OK)
|
|
return(fRet);
|
|
|
|
if (lpipb->esi.cesb == 0)
|
|
// Nothing to process, there will be no index file
|
|
return S_OK;
|
|
|
|
// Set the state flag
|
|
lpipb->bState = UPDATING_STATE;
|
|
|
|
// Open the index file
|
|
if ((fRet = IndexOpenRW(lpipb, hSysFile, lszFilename)) != S_OK)
|
|
{
|
|
exit00:
|
|
if (lpipb->idxf & IDXF_NORMALIZE)
|
|
{
|
|
FreeHandle (lpipb->wi.hSigma);
|
|
FreeHandle (lpipb->wi.hLog);
|
|
lpipb->wi.hSigma = lpipb->wi.hLog = NULL;
|
|
}
|
|
|
|
return fRet;
|
|
}
|
|
|
|
if (rgTopicId && dwCount)
|
|
{
|
|
// Sort the incoming array
|
|
if ((fRet = HugeDataSort((LPV HUGE*)rgTopicId, dwCount,
|
|
(FCOMPARE)CompareDWord, NULL, NULL, NULL)) != S_OK)
|
|
goto exit00;
|
|
|
|
mp.rgTopicId = rgTopicId;
|
|
mp.dwCount = dwCount;
|
|
mp.lpTopicIdLast = rgTopicId;
|
|
}
|
|
|
|
if ((fRet = MergeSortTreeFile (lpipb, (rgTopicId && dwCount) ? &mp: NULL)) != S_OK)
|
|
{
|
|
FileClose(lpipb->hfpbIdxFile);
|
|
fRet = SetErrCode (phr, fRet);
|
|
goto exit00;
|
|
}
|
|
FileUnlink (NULL, lpipb->isi.aszTempName, REGULAR_FILE);
|
|
|
|
// Open output file
|
|
pOutFile = &lpipb->OutFile;
|
|
if ((pOutFile->fFile = FileCreate (NULL, lpipb->isi.aszTempName,
|
|
REGULAR_FILE, phr)) == NULL)
|
|
{
|
|
FileClose(lpipb->hfpbIdxFile);
|
|
fRet = SetErrCode (phr, fRet);
|
|
goto exit00;
|
|
}
|
|
|
|
// Allocate output buffer
|
|
pOutFile->dwMax = FILE_BUFFER;
|
|
pOutFile->cbLeft = FILE_BUFFER;
|
|
if ((pOutFile->hMem = _GLOBALALLOC (DLLGMEM_ZEROINIT,
|
|
pOutFile->dwMax + SAFE_SLACK)) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
exit0:
|
|
FileClose(lpipb->hfpbIdxFile);
|
|
FileClose (pOutFile->fFile);
|
|
FileUnlink (NULL, lpipb->isi.aszTempName, REGULAR_FILE);
|
|
goto exit00;
|
|
}
|
|
pOutFile->pCurrent = pOutFile->pMem = _GLOBALLOCK (pOutFile->hMem);
|
|
// Build the permanent index
|
|
fRet = UpdateIndexBTree(lpipb, hSysFile, lpipb->esi.aszTempName,
|
|
lszFilename);
|
|
_GLOBALUNLOCK(pOutFile->hMem);
|
|
_GLOBALFREE(pOutFile->hMem);
|
|
pOutFile->hMem = NULL;
|
|
goto exit0;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
*
|
|
* @doc PRIVATE INDEXING
|
|
*
|
|
* @func HRESULT | UpdateIndexBTree |
|
|
* Allocates required memory and opens input files to create a B-Tree.
|
|
* Parses incoming words and calls AddRecordToBTree to process them.
|
|
*
|
|
* @parm _LPIPB | lpipb |
|
|
* Pointer to the index parameter block
|
|
*
|
|
* @parm LPB | lpszTemp |
|
|
* Filename of the temporary input file
|
|
*
|
|
* @parm LPB | szIndexFilename |
|
|
* Filename of the permanent B-Tree file
|
|
*
|
|
* @rdesc Returns S_OK on success or errors if failed
|
|
*
|
|
*************************************************************************/
|
|
|
|
PRIVATE HRESULT NEAR PASCAL UpdateIndexBTree (_LPIPB lpipb, HFPB hSysFile,
|
|
LPB lpszTemp, LPB szIndexFilename)
|
|
{
|
|
PFILEDATA pInFile; // Pointer to input data
|
|
DWORD dwBytesRead = 0; // Checks for EOF
|
|
PNODEINFO FAR * rgpNodeInfo;
|
|
PNODEINFO FAR * rgpTmpNodeInfo;
|
|
PNODEINFO pIndexDataNode;
|
|
ERRB errb;
|
|
PHRESULT phr = &errb;
|
|
PIH20 pHeader;
|
|
int cTreeLevel;
|
|
int iIndex;
|
|
LPB pWord;
|
|
WORDINFO WordInfo;
|
|
OCCF occf;
|
|
HRESULT fRet; // Return value
|
|
FILEOFFSET foFreeListOffset; // File Offset where the FreeList will be saved.
|
|
DWORD dwSizeFreeList; // Size of the FreeList to be saved.
|
|
|
|
|
|
rgpNodeInfo = lpipb->BTreeData.rgpNodeInfo;
|
|
rgpTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo;
|
|
|
|
MEMSET(&WordInfo, 0, sizeof(WORDINFO));
|
|
|
|
// Open input file
|
|
|
|
pInFile = &lpipb->InFile;
|
|
if ((pInFile->fFile = FileOpen (NULL, lpszTemp, REGULAR_FILE,
|
|
READ, phr)) == NULL)
|
|
return *phr;
|
|
|
|
// Allocate input buffer
|
|
pInFile->dwMax = FILE_BUFFER;
|
|
if ((pInFile->hMem =
|
|
_GLOBALALLOC (DLLGMEM_ZEROINIT, pInFile->dwMax + SAFE_SLACK)) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
exit0:
|
|
FileClose (pInFile->fFile);
|
|
FileUnlink (NULL, lpszTemp, REGULAR_FILE);
|
|
return fRet;
|
|
}
|
|
pInFile->pMem = _GLOBALLOCK (pInFile->hMem);
|
|
pInFile->pCurrent = pInFile->pMem;
|
|
|
|
pHeader = &lpipb->BTreeData.Header;
|
|
|
|
// Allocate BTree block.
|
|
for (cTreeLevel = pHeader->cIdxLevels - 1; cTreeLevel >= 0; cTreeLevel --)
|
|
{
|
|
if ((rgpNodeInfo[cTreeLevel] = AllocBTreeNode (lpipb)) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
goto exit2;
|
|
}
|
|
if ((rgpTmpNodeInfo[cTreeLevel] = AllocBTreeNode (lpipb)) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
goto exit2;
|
|
}
|
|
}
|
|
|
|
if (((lpipb->pIndexDataNode = pIndexDataNode =
|
|
AllocBTreeNode (lpipb))) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
goto exit2;
|
|
}
|
|
|
|
// Reallocate a bigger buffer. BTREE_NODE_SIZE is only good for btree node
|
|
_GLOBALUNLOCK (pIndexDataNode->hMem);
|
|
_GLOBALFREE (pIndexDataNode->hMem);
|
|
|
|
// Allocate 1M of memory for the data buffer
|
|
if ((pIndexDataNode->hMem = _GLOBALALLOC (DLLGMEM_ZEROINIT,
|
|
pIndexDataNode->dwBlockSize = FILE_BUFFER)) == NULL)
|
|
goto exit2;
|
|
|
|
pIndexDataNode->pCurPtr = pIndexDataNode->pBuffer =
|
|
_GLOBALLOCK (pIndexDataNode->hMem);
|
|
|
|
lpipb->pIndexDataNode->hfpbIdx = lpipb->hfpbIdxFile; // Index file to read from
|
|
|
|
// Remember the file offset of this node
|
|
rgpNodeInfo[0]->nodeOffset = pHeader->foIdxRoot;
|
|
|
|
// Read in data for the top stem node
|
|
|
|
if ((fRet = ReadNewNode(lpipb->hfpbIdxFile, rgpNodeInfo[0],
|
|
pHeader->cIdxLevels > 1 ? FALSE : TRUE)) != S_OK)
|
|
{
|
|
exit2:
|
|
FreeHandle (pInFile->hMem);
|
|
for (cTreeLevel = pHeader->cIdxLevels - 1; cTreeLevel >= 0; cTreeLevel --)
|
|
{
|
|
FreeBTreeNode (rgpNodeInfo[cTreeLevel]);
|
|
FreeBTreeNode (rgpTmpNodeInfo[cTreeLevel]);
|
|
}
|
|
goto exit0;
|
|
}
|
|
|
|
// Allocate temporary buffer for word. The buffer is allocated as followed:
|
|
// - Max word length * 2: for maximum word length. Minimum is 256
|
|
// - 3 byte: word length
|
|
// - 5 byte: Field Id
|
|
// - 5 byte: Topic count
|
|
// - 6 byte: data pointer
|
|
// iIndex is used as a tmp
|
|
|
|
iIndex = (WORD)(lpipb->BTreeData.Header.dwMaxWLen * 2);
|
|
if (iIndex < 1024)
|
|
iIndex = 1024;
|
|
iIndex += 3 + 5 + 5 + 6;
|
|
if ((lpipb->hTmpBuf = _GLOBALALLOC (DLLGMEM_ZEROINIT, iIndex * 2)) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
goto exit2;
|
|
}
|
|
lpipb->pTmpBuf = (LPB)_GLOBALLOCK (lpipb->hTmpBuf);
|
|
lpipb->pWord = lpipb->pTmpBuf + iIndex;
|
|
|
|
// Allocate a big buffer for data
|
|
if ((lpipb->hData = _GLOBALALLOC(DLLGMEM_ZEROINIT,
|
|
lpipb->dwDataSize = 0x80000)) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
goto exit2;
|
|
}
|
|
lpipb->pDataBuffer= _GLOBALLOCK(lpipb->hData);
|
|
|
|
// Load the input buffer & repeat until all records are processed
|
|
pInFile->dwMax = pInFile->cbLeft =
|
|
FileRead (pInFile->fFile, pInFile->pMem, pInFile->dwMax, phr);
|
|
fRet = S_OK;
|
|
|
|
pWord = lpipb->pWord;
|
|
occf = lpipb->BTreeData.Header.occf;
|
|
|
|
do
|
|
{
|
|
LPB pSrcPtr;
|
|
WORD wLen;
|
|
|
|
if (pInFile->cbLeft < CB_MAX_WORD_LEN * sizeof(DWORD) * 8)
|
|
{
|
|
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
|
|
pInFile->cbLeft += FileRead (pInFile->fFile,
|
|
pInFile->pMem + pInFile->cbLeft,
|
|
pInFile->dwMax - pInFile->cbLeft, &errb);
|
|
pInFile->dwMax = pInFile->cbLeft;
|
|
pInFile->pCurrent = pInFile->pMem;
|
|
}
|
|
|
|
// Extract the word and its info
|
|
pSrcPtr = pInFile->pCurrent + sizeof(DWORD); // Skip reclength
|
|
|
|
// Copy the word
|
|
MEMCPY (pWord, pSrcPtr, wLen = GETWORD((LPUW)pSrcPtr) + 2);
|
|
pSrcPtr += GETWORD((LPUW)pSrcPtr) + 2;
|
|
|
|
if (occf & OCCF_LENGTH)
|
|
{
|
|
pSrcPtr += CbByteUnpack(&WordInfo.dwWordLen, pSrcPtr);
|
|
CbBytePack (pWord + wLen, WordInfo.dwWordLen);
|
|
}
|
|
else
|
|
{
|
|
WordInfo.dwWordLen = wLen - 2;
|
|
}
|
|
if (occf & OCCF_FIELDID)
|
|
pSrcPtr += CbByteUnpack(&WordInfo.dwFieldId, pSrcPtr);
|
|
|
|
WordInfo.dwNewTopicCount = GETLONG((LPUL)pSrcPtr);
|
|
pSrcPtr += sizeof(DWORD);
|
|
|
|
pInFile->pCurrent = pSrcPtr;
|
|
pInFile->cbLeft = (LONG)(pInFile->dwMax - (pSrcPtr - pInFile->pMem));
|
|
|
|
#if 0
|
|
if (STRNICMP(pWord+2, "cylindeeer", 10) == 0)
|
|
_asm int 3;
|
|
#endif
|
|
#if 0
|
|
else
|
|
{
|
|
SkipNewData (lpipb, &WordInfo);
|
|
continue;
|
|
}
|
|
#endif
|
|
// Find/Add the record
|
|
|
|
if ((fRet = AddWordToBTree (lpipb, pWord, &WordInfo)) != S_OK)
|
|
{
|
|
exit3:
|
|
_GLOBALUNLOCK (lpipb->hTmpBuf);
|
|
_GLOBALFREE (lpipb->hTmpBuf);
|
|
_GLOBALUNLOCK(lpipb->hData);
|
|
_GLOBALFREE(lpipb->hData);
|
|
FreeBTreeNode (lpipb->pIndexDataNode);
|
|
lpipb->hData = lpipb->hTmpBuf = 0;
|
|
goto exit2;
|
|
}
|
|
|
|
pSrcPtr = pInFile->pCurrent;
|
|
|
|
// pInFile->pCurrent points to the record size
|
|
|
|
if (pInFile->cbLeft <= SAFE_SLACK ||
|
|
(LONG)GETLONG ((LPUL)pInFile->pCurrent) >= pInFile->cbLeft)
|
|
{
|
|
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
|
|
if ((pInFile->cbLeft += FileRead (pInFile->fFile, pInFile->pMem +
|
|
pInFile->cbLeft, pInFile->dwMax - pInFile->cbLeft, phr)) < 0)
|
|
{
|
|
fRet = *phr;
|
|
goto exit3;
|
|
}
|
|
|
|
pInFile->dwMax = pInFile->cbLeft;
|
|
pInFile->pCurrent = pInFile->pMem;
|
|
}
|
|
} while (fRet == S_OK && pInFile->cbLeft);
|
|
|
|
for (cTreeLevel = pHeader->cIdxLevels - 1; cTreeLevel >= 0; cTreeLevel --)
|
|
{
|
|
if (rgpNodeInfo[cTreeLevel]->fFlag == TO_BE_UPDATE)
|
|
{
|
|
if ((FileSeekWrite(lpipb->hfpbIdxFile,
|
|
rgpNodeInfo[cTreeLevel]->pBuffer,
|
|
rgpNodeInfo[cTreeLevel]->nodeOffset,
|
|
lpipb->BTreeData.Header.dwBlockSize, phr)) != (LONG)lpipb->BTreeData.Header.dwBlockSize)
|
|
{
|
|
|
|
fRet = *phr;
|
|
goto exit3;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (lpipb->idxf & IDXF_NORMALIZE)
|
|
{
|
|
LONG loop;
|
|
|
|
for (loop = lpipb->dwMaxTopicId; loop >= 0; loop--)
|
|
{
|
|
lpipb->wi.hrgsigma[loop] =
|
|
(float)sqrt ((double)lpipb->wi.hrgsigma[loop]);
|
|
}
|
|
|
|
pHeader->WeightTabSize = (lpipb->dwMaxTopicId + 1)* sizeof(float);
|
|
|
|
if (FileSeekWrite (lpipb->hfpbIdxFile, lpipb->wi.hrgsigma,
|
|
lpipb->foMaxOffset, pHeader->WeightTabSize, phr) !=
|
|
(LONG)pHeader->WeightTabSize)
|
|
{
|
|
fRet = *phr;
|
|
goto exit3;
|
|
}
|
|
pHeader->WeightTabOffset = lpipb->foMaxOffset;
|
|
}
|
|
|
|
// ERIC: 1/ Save the freelist info to the end of the file
|
|
// 2/ Update the header with the new freelist offset/size
|
|
if (lpipb->hFreeList)
|
|
{
|
|
LPBYTE lpbFreeList;
|
|
|
|
dwSizeFreeList = FreeListSize(lpipb->hFreeList,phr);
|
|
|
|
foFreeListOffset = FreeListGetBestFit(lpipb->hFreeList, MakeFo(dwSizeFreeList,0), phr);
|
|
|
|
if (FoIsNil(foFreeListOffset))
|
|
foFreeListOffset = lpipb->foMaxOffset;
|
|
|
|
if((lpbFreeList = (LPBYTE) _GLOBALALLOCPTR(DLLGMEM_ZEROINIT, dwSizeFreeList)) == NULL)
|
|
return E_OUTOFMEMORY;
|
|
|
|
FreeListGetMem(lpipb->hFreeList, (LPVOID)lpbFreeList);
|
|
FileSeekWrite (lpipb->hfpbIdxFile, (LPBYTE)lpbFreeList,
|
|
foFreeListOffset, dwSizeFreeList, phr);
|
|
if (FoEquals(foFreeListOffset, lpipb->foMaxOffset))
|
|
dwSizeFreeList |= 0x80000000;
|
|
FreeListDestroy(lpipb->hFreeList);
|
|
lpipb->hFreeList = (HFREELIST) NULL;
|
|
_GLOBALFREEPTR(lpbFreeList);
|
|
}
|
|
|
|
// Copy info to header
|
|
if (pHeader->lcTopics < lpipb->lcTopics)
|
|
pHeader->lcTopics = lpipb->lcTopics;
|
|
|
|
if (pHeader->dwMaxFieldId < lpipb->dwMaxFieldId)
|
|
pHeader->dwMaxFieldId = lpipb->dwMaxFieldId;
|
|
if (pHeader->dwMaxWCount < lpipb->dwMaxWCount)
|
|
pHeader->dwMaxWCount = lpipb->dwMaxWCount;
|
|
if (pHeader->dwMaxOffset < lpipb->dwMaxOffset)
|
|
pHeader->dwMaxOffset = lpipb->dwMaxOffset;
|
|
if (pHeader->dwMaxWLen < lpipb->dwMaxWLen)
|
|
pHeader->dwMaxWLen = lpipb->dwMaxWLen;
|
|
pHeader->dwMaxTopicId = lpipb->dwMaxTopicId;
|
|
|
|
// ERIC: Garbage Collection
|
|
pHeader->foFreeListOffset = foFreeListOffset;
|
|
pHeader->dwFreeListSize = dwSizeFreeList;
|
|
// END
|
|
|
|
|
|
FileSeekWrite (lpipb->hfpbIdxFile, (LPB)pHeader,
|
|
MakeFo (0, 0), sizeof (IH20), phr);
|
|
fRet = S_OK;
|
|
goto exit3;
|
|
}
|
|
|
|
/*********************************************************************
|
|
* @func LPB PASCAL | AddWordToBTree |
|
|
* Find the location of a word in the index. This function also
|
|
* sets up all relevant data for the future update
|
|
*
|
|
* @parm LPIPB | lpipb |
|
|
* Pointer to index info
|
|
*
|
|
* @parm LPB | pWord |
|
|
* Word to be searched for. This is a 2-byte preceded Pascal string
|
|
*
|
|
* @parm PWORDINFO | pWordInfo |
|
|
* Pointer to word's info
|
|
*
|
|
* @rdesc
|
|
* S_OK or other errors. In case of success, pWordInfo will
|
|
* be filled with useful data
|
|
*********************************************************************/
|
|
PRIVATE HRESULT NEAR PASCAL AddWordToBTree (_LPIPB lpipb, LPB pWord,
|
|
PWORDINFO pWordInfo)
|
|
{
|
|
int cLevel;
|
|
LPB lpCurPtr;
|
|
int nCmp;
|
|
HRESULT fRet;
|
|
WORD RecSize = 0;
|
|
LPB lpMaxAddress;
|
|
ERRB errb;
|
|
PHRESULT phr = &errb;
|
|
WORD wWlen;
|
|
PNODEINFO pNodeInfo;
|
|
PNODEINFO pChildNode;
|
|
LPB pBTreeWord;
|
|
int cMaxLevel;
|
|
FILEOFFSET nodeOffset;
|
|
PNODEINFO FAR *rgpNodeInfo = lpipb->BTreeData.rgpNodeInfo;
|
|
OCCF occf = lpipb->occf;
|
|
LONG dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
|
|
|
|
#if 0
|
|
Count++;
|
|
if (STRNICMP(pWord+2, "approeeaching", 11) == 0 ||
|
|
STRNICMP(pWord+2, "authenteeic", 11) == 0 ||
|
|
STRNICMP(pWord+2, "eastleeand", 10) == 0)
|
|
_asm int 3;
|
|
#endif
|
|
// Change to 0-based
|
|
cMaxLevel = lpipb->BTreeData.Header.cIdxLevels - 1;
|
|
|
|
// Remember the last level offset
|
|
nodeOffset = rgpNodeInfo[0]->nodeOffset;
|
|
|
|
/* Search in the stem nodes */
|
|
for (cLevel = 0; cLevel < cMaxLevel ; cLevel++)
|
|
{
|
|
//
|
|
// Set variables
|
|
//
|
|
pNodeInfo = rgpNodeInfo[cLevel];
|
|
pChildNode = rgpNodeInfo[cLevel + 1];
|
|
pChildNode->prevNodeOffset = foNil;
|
|
pBTreeWord = pNodeInfo->pTmpResult;
|
|
|
|
// Reload the node if neccessary
|
|
if (!FoEquals(pNodeInfo->nodeOffset, nodeOffset))
|
|
{
|
|
if (pNodeInfo->fFlag == TO_BE_UPDATE)
|
|
{
|
|
if ((FileSeekWrite(lpipb->hfpbIdxFile, pNodeInfo->pBuffer,
|
|
pNodeInfo->nodeOffset, dwBlockSize,
|
|
&errb)) != (LONG)dwBlockSize)
|
|
return(errb);
|
|
}
|
|
pNodeInfo->nodeOffset = nodeOffset;
|
|
if ((fRet = ReadNewNode (lpipb->hfpbIdxFile, pNodeInfo,
|
|
FALSE)) != S_OK)
|
|
{
|
|
return SetErrCode (phr, fRet);
|
|
}
|
|
pNodeInfo->fFlag = 0;
|
|
}
|
|
lpMaxAddress = pNodeInfo->pMaxAddress;
|
|
|
|
lpCurPtr = pNodeInfo->pCurPtr; // points to the LAST ACCESSED word in the block
|
|
|
|
// The format of the stem node
|
|
// cbLeft | (Word | PointerToNode) | Slack
|
|
|
|
while (lpCurPtr < lpMaxAddress - 1)
|
|
{
|
|
// Save the last location. This would be the insertion point for
|
|
// update
|
|
pNodeInfo->pCurPtr = lpCurPtr;
|
|
|
|
// Reset the word length
|
|
wWlen = 0;
|
|
|
|
// Get the compressed word
|
|
lpCurPtr = ExtractWord(pBTreeWord, lpCurPtr, &wWlen);
|
|
|
|
/* Read in NodeId record */
|
|
lpCurPtr += ReadFileOffset (&nodeOffset, lpCurPtr);
|
|
|
|
if ((nCmp = StrCmpPascal2(pWord, pBTreeWord)) == 0)
|
|
nCmp = (int)((WORD)pWordInfo->dwWordLen - wWlen );
|
|
|
|
if (nCmp > 0)
|
|
{
|
|
// We didn't find the location of the word yet
|
|
// Continue searching
|
|
|
|
if (lpCurPtr < pNodeInfo->pMaxAddress - 1)
|
|
{
|
|
MEMCPY (pNodeInfo->pLastWord, pBTreeWord,
|
|
*(LPUW)pBTreeWord + sizeof(WORD)); // erinfox RISC patch
|
|
}
|
|
pChildNode->prevNodeOffset = nodeOffset;
|
|
continue;
|
|
}
|
|
|
|
// We found the location of the word
|
|
break;
|
|
}
|
|
}
|
|
|
|
// At this point, nodeOffset is the node id of the leaf that
|
|
// is supposed to contain the searched word.
|
|
pNodeInfo = rgpNodeInfo[cMaxLevel];
|
|
if (!FoEquals(pNodeInfo->nodeOffset, nodeOffset))
|
|
{
|
|
if (pNodeInfo->fFlag == TO_BE_UPDATE)
|
|
{
|
|
if ((FileSeekWrite(lpipb->hfpbIdxFile, pNodeInfo->pBuffer,
|
|
pNodeInfo->nodeOffset, dwBlockSize,
|
|
phr)) != dwBlockSize)
|
|
return(*phr);
|
|
}
|
|
pNodeInfo->nodeOffset = nodeOffset;
|
|
if ((fRet = ReadNewNode (lpipb->hfpbIdxFile, pNodeInfo,
|
|
TRUE)) != S_OK)
|
|
{
|
|
return SetErrCode (phr, fRet);
|
|
}
|
|
pNodeInfo->fFlag = 0;
|
|
lpCurPtr = pNodeInfo->pCurPtr;
|
|
}
|
|
else
|
|
{
|
|
// Reset all data
|
|
// lpCurPtr = pNodeInfo->pCurPtr = pNodeInfo->pBuffer + sizeof(WORD) + FOFFSET_SIZE;
|
|
lpCurPtr = pNodeInfo->pCurPtr;
|
|
}
|
|
pBTreeWord = pNodeInfo->pTmpResult;
|
|
lpMaxAddress = pNodeInfo->pMaxAddress;
|
|
|
|
// Reset the last word
|
|
*(LPWORD)pNodeInfo->pLastWord = 0;
|
|
|
|
// Leaf node structure: *
|
|
// (Word|FieldId|TopicCnt|PointerToNode|DataSize)*
|
|
for (;;)
|
|
{
|
|
DWORD dwFieldId;
|
|
|
|
// Save the last location. This would be the insertion point for
|
|
// update
|
|
pNodeInfo->pCurPtr = lpCurPtr;
|
|
|
|
if (lpCurPtr >= lpMaxAddress)
|
|
{
|
|
// Add to the end of the node
|
|
if ((fRet = WriteNewDataRecord (lpipb, pWordInfo)) != S_OK)
|
|
return(fRet);
|
|
return AddRecordToBTree (lpipb, pWord, pWordInfo, cMaxLevel, 0);
|
|
}
|
|
|
|
// Get the compressed word
|
|
lpCurPtr = ExtractWord(pBTreeWord, lpCurPtr, &wWlen);
|
|
|
|
// Get fieldif and topic count
|
|
if (occf & OCCF_FIELDID)
|
|
lpCurPtr += CbByteUnpack (&dwFieldId, lpCurPtr);
|
|
lpCurPtr += CbByteUnpack (&pWordInfo->dwIndexTopicCount, lpCurPtr);
|
|
|
|
// Get the data location and size
|
|
lpCurPtr += ReadFileOffset (&pWordInfo->dataLocation, lpCurPtr);
|
|
lpCurPtr += CbByteUnpack(&pWordInfo->dwDataSize, lpCurPtr);
|
|
|
|
if ((nCmp = StrCmpPascal2(pWord, pBTreeWord)) == 0)
|
|
{
|
|
if (occf & OCCF_LENGTH)
|
|
nCmp = (int)((WORD)pWordInfo->dwWordLen - wWlen);
|
|
if (nCmp == 0 && (occf & OCCF_FIELDID))
|
|
nCmp = (int)(pWordInfo->dwFieldId - dwFieldId);
|
|
}
|
|
|
|
if (nCmp > 0)
|
|
{
|
|
// We didn't find the location of the word yet
|
|
// Continue searching
|
|
MEMCPY (pNodeInfo->pLastWord, pBTreeWord,
|
|
*(LPUW)pBTreeWord+sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
|
|
continue;
|
|
}
|
|
if (nCmp == 0)
|
|
{
|
|
if ((fRet = UpdateDataNode (lpipb, pWordInfo)) != S_OK)
|
|
return(fRet);
|
|
|
|
return AddRecordToBTree (lpipb, pWord, pWordInfo, cMaxLevel,
|
|
REPLACE_WORD_01);
|
|
}
|
|
else
|
|
{
|
|
if ((fRet = WriteNewDataRecord (lpipb, pWordInfo)) != S_OK)
|
|
return(fRet);
|
|
return AddRecordToBTree (lpipb, pWord, pWordInfo, cLevel, 0);
|
|
}
|
|
break;
|
|
}
|
|
return S_OK;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
* @doc INTERNAL
|
|
*
|
|
* @func HRESULT PASCAL | ReadNewNode |
|
|
* Read in a new node from the disk if it is not the top node.
|
|
* For the top node, just reset various pointers
|
|
*
|
|
* @parm PNODEINFO | pNodeInfo |
|
|
* Pointer to leaf info
|
|
*
|
|
* @parm int | fLeafNode|
|
|
* TRUE if this is a leaf node
|
|
*
|
|
* @rdesc S_OK if succesful, otherwise other errors. On exit,
|
|
* lpCurPtr wil point to the beginning of the 1st word in the
|
|
* node
|
|
*
|
|
* @rcomm The format of the leaf node is different from a stem node
|
|
* Stem node structure: *
|
|
* CbLeft |* Word | PointerToNode *| Slack *
|
|
* *
|
|
* Leaf node structure: *
|
|
* NxtBlkPtr|CbLeft|*Word|FieldId|TopicCnt|PointerToNode|DataSize*|Slack *
|
|
* *
|
|
*************************************************************************/
|
|
PUBLIC HRESULT PASCAL FAR ReadNewNode (HFPB hfpb, PNODEINFO pNodeInfo,
|
|
int fLeafNode)
|
|
{
|
|
ERRB errb;
|
|
|
|
if (FileSeekRead (hfpb, pNodeInfo->pBuffer, pNodeInfo->nodeOffset,
|
|
pNodeInfo->dwBlockSize, &errb) != (long)pNodeInfo->dwBlockSize)
|
|
return E_BADFILE;
|
|
|
|
pNodeInfo->pCurPtr = pNodeInfo->pBuffer;
|
|
if (fLeafNode)
|
|
{
|
|
pNodeInfo->pCurPtr += ReadFileOffset (&pNodeInfo->nextNodeOffset,
|
|
pNodeInfo->pBuffer);
|
|
}
|
|
else
|
|
pNodeInfo->nextNodeOffset = foNil;
|
|
pNodeInfo->cbLeft = *(LPUW)(pNodeInfo->pCurPtr); // erinfox RISC patch
|
|
pNodeInfo->pCurPtr += sizeof(WORD);
|
|
pNodeInfo->pMaxAddress = pNodeInfo->pBuffer + pNodeInfo->dwBlockSize -
|
|
pNodeInfo->cbLeft;
|
|
*(LPUW)(pNodeInfo->pLastWord) = *(LPUW)(pNodeInfo->pTmpResult) = 0;
|
|
return S_OK;
|
|
}
|
|
|
|
PUBLIC HRESULT PASCAL FAR IndexOpenRW (_LPIPB lpipb, HFPB hfpbSysFile, LSZ lszFilename)
|
|
{
|
|
HFPB hfpb; // Handle to system file
|
|
HRESULT fRet;
|
|
ERRB errb;
|
|
PHRESULT phr = &errb;
|
|
PIH20 pHeader;
|
|
int iIndex;
|
|
LONG i;
|
|
|
|
// Check the existence of the file
|
|
if ((hfpb = FileOpen (hfpbSysFile, lszFilename,
|
|
hfpbSysFile ? FS_SUBFILE : REGULAR_FILE, READ, phr)) == 0)
|
|
{
|
|
return *phr;
|
|
}
|
|
|
|
FileClose (hfpb);
|
|
|
|
// Reopen the file for read/write
|
|
lpipb->hfpbIdxFile = FileOpen (hfpbSysFile, lszFilename,
|
|
hfpbSysFile ? FS_SUBFILE : REGULAR_FILE, READ_WRITE, phr);
|
|
|
|
if ((fRet = ReadIndexHeader(lpipb->hfpbIdxFile,
|
|
pHeader = &lpipb->BTreeData.Header)) != S_OK)
|
|
{
|
|
exit01:
|
|
SetErrCode (phr, fRet);
|
|
FileClose(lpipb->hfpbIdxFile);
|
|
return fRet;
|
|
}
|
|
|
|
if (pHeader->version != VERCURRENT ||
|
|
pHeader->FileStamp != INDEX_STAMP)
|
|
{
|
|
fRet = E_BADVERSION;
|
|
goto exit01;
|
|
}
|
|
|
|
// incoming index and occurrence flags must match those in original index
|
|
if (pHeader->occf != lpipb->occf ||
|
|
pHeader->idxf != lpipb->idxf)
|
|
{
|
|
fRet = E_BADINDEXFLAGS;
|
|
goto exit01;
|
|
}
|
|
|
|
// Update the compression key to be used by WriteDataNode later
|
|
lpipb->cKey[CKEY_TOPIC_ID] = pHeader->ckeyTopicId;
|
|
lpipb->cKey[CKEY_OCC_COUNT] = pHeader->ckeyOccCount;
|
|
iIndex = CKEY_OCC_BASE;
|
|
if (pHeader->occf & OCCF_COUNT)
|
|
lpipb->cKey[iIndex++] = pHeader->ckeyWordCount;
|
|
if (pHeader->occf & OCCF_OFFSET)
|
|
lpipb->cKey[iIndex] = pHeader->ckeyOffset;
|
|
|
|
// Update the maximum TopicId
|
|
if (pHeader->dwMaxTopicId < lpipb->dwMaxTopicId)
|
|
pHeader->dwMaxTopicId = lpipb->dwMaxTopicId;
|
|
else
|
|
lpipb->dwMaxTopicId = pHeader->dwMaxTopicId;
|
|
|
|
|
|
// Get the file size.
|
|
lpipb->foMaxOffset = FileSize (lpipb->hfpbIdxFile, phr);
|
|
if (lpipb->idxf & IDXF_NORMALIZE)
|
|
{
|
|
// Load the sigma table
|
|
if (FoEquals(pHeader->WeightTabOffset, foNil))
|
|
{
|
|
fRet = SetErrCode (phr, E_ASSERT);
|
|
goto exit01;
|
|
}
|
|
|
|
if ((fRet = AllocSigmaTable (lpipb)) != S_OK)
|
|
goto exit01;
|
|
|
|
if (FileSeekRead (lpipb->hfpbIdxFile, lpipb->wi.hrgsigma,
|
|
pHeader->WeightTabOffset, pHeader->WeightTabSize, phr) !=
|
|
(LONG)pHeader->WeightTabSize)
|
|
{
|
|
fRet = errb;
|
|
goto exit01;
|
|
}
|
|
|
|
if (lpipb->bState == DELETING_STATE)
|
|
{
|
|
// Square the sigma table
|
|
|
|
// erinfox: off by one bug. change i = lpipb->dwMaxTopicId + 1
|
|
// to lpipb->dwMaxTopicId because we have only allocated
|
|
// (dwMaxTopicId + 1)*sizeof(float) bytes
|
|
for (i = lpipb->dwMaxTopicId; i >= 0; i--)
|
|
{
|
|
lpipb->wi.hrgsigma[i] = lpipb->wi.hrgsigma[i] *
|
|
lpipb->wi.hrgsigma[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ERIC */
|
|
// Load or create a freelist (dwSize = 0)
|
|
if (lpipb->bState == UPDATING_STATE)
|
|
{
|
|
if (pHeader->dwFreeListSize) // If a freelist is existing, read it, otherwise, create it.
|
|
{
|
|
LPBYTE lpbFreeList;
|
|
|
|
if (pHeader->dwFreeListSize & 0x80000000)
|
|
{
|
|
pHeader->dwFreeListSize &= 0x7FFFFFFF;
|
|
lpipb->foMaxOffset = FoSubFo(lpipb->foMaxOffset,MakeFo(pHeader->dwFreeListSize,0));
|
|
}
|
|
if(!(lpbFreeList = (LPBYTE) _GLOBALALLOCPTR(DLLGMEM_ZEROINIT, pHeader->dwFreeListSize)))
|
|
{
|
|
fRet = SetErrCode (phr, E_OUTOFMEMORY);
|
|
goto exit01;
|
|
}
|
|
|
|
FileSeekRead (lpipb->hfpbIdxFile, (LPBYTE)lpbFreeList,
|
|
pHeader->foFreeListOffset, pHeader->dwFreeListSize, phr);
|
|
|
|
lpipb->hFreeList = FreeListInitFromMem(lpbFreeList, phr );
|
|
_GLOBALFREEPTR(lpbFreeList);
|
|
}
|
|
else
|
|
lpipb->hFreeList = FreeListInit( wDefaultFreeListSize, phr);
|
|
}
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
|
|
PRIVATE PASCAL NEAR AddRecordToBTree (_LPIPB lpipb, LPB pWord,
|
|
PWORDINFO pWordInfo, int cLevel, int fFlag)
|
|
{
|
|
|
|
PNODEINFO pNodeInfo;
|
|
PNODEINFO pTmpNodeInfo;
|
|
LPB pInsertPtr; // Pointer to insertion point
|
|
LPB pWordStorage;
|
|
LPB pLastWord;
|
|
LPB pBuffer;
|
|
BYTE fIsStemNode;
|
|
WORD wWLen;
|
|
WORD wNewRecSize; // New record size
|
|
LONG cbByteMoved; // Number of bytes moved to leave room for new rec
|
|
OCCF occf = lpipb->occf; // Occurrence field flags
|
|
BYTE fLength = occf & OCCF_LENGTH;
|
|
WORD cbLeft; // How many byte left in the current node?
|
|
LONG dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
|
|
BYTE cbSkip;
|
|
BYTE fEndNode;
|
|
ERRB errb;
|
|
|
|
if (cLevel == -1)
|
|
{
|
|
// The tree's level has increased by one
|
|
int i;
|
|
|
|
if (lpipb->BTreeData.Header.cIdxLevels >= MAX_TREE_HEIGHT - 1)
|
|
return E_TREETOOBIG;
|
|
|
|
/* Move down the entries to make room for the top node */
|
|
for (i = lpipb->BTreeData.Header.cIdxLevels; i > 0 ; i-- )
|
|
{
|
|
lpipb->BTreeData.rgpNodeInfo[i] = lpipb->BTreeData.rgpNodeInfo[i-1];
|
|
lpipb->BTreeData.rgpTmpNodeInfo[i] = lpipb->BTreeData.rgpTmpNodeInfo[i-1];
|
|
}
|
|
|
|
// Increase tree level
|
|
lpipb->BTreeData.Header.cIdxLevels ++;
|
|
if ((pNodeInfo = lpipb->BTreeData.rgpNodeInfo[0] = AllocBTreeNode (lpipb)) == NULL)
|
|
return(E_OUTOFMEMORY);
|
|
|
|
if ((pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[0] = AllocBTreeNode (lpipb)) == NULL)
|
|
return(E_OUTOFMEMORY);
|
|
|
|
pWordStorage = (pBuffer = pNodeInfo->pBuffer) + sizeof(WORD);
|
|
|
|
if (fFlag & USE_BOTH_NODE_40)
|
|
{
|
|
if (fFlag & USE_TEMP_FOR_RIGHT_NODE_10)
|
|
{
|
|
// Link to the left child node
|
|
pWordStorage += PrefixCompressWord (pWordStorage,
|
|
lpipb->BTreeData.rgpNodeInfo[1]->pTmpResult,
|
|
EmptyWord, fLength);
|
|
pWordStorage += CopyFileOffset (pWordStorage,
|
|
lpipb->BTreeData.rgpNodeInfo[1]->nodeOffset);
|
|
|
|
// Link to the right child node
|
|
pWordStorage += PrefixCompressWord (pWordStorage,
|
|
lpipb->BTreeData.rgpTmpNodeInfo[1]->pTmpResult,
|
|
lpipb->BTreeData.rgpNodeInfo[1]->pTmpResult, fLength);
|
|
pWordStorage += CopyFileOffset (pWordStorage,
|
|
lpipb->BTreeData.rgpTmpNodeInfo[1]->nodeOffset);
|
|
}
|
|
else
|
|
{
|
|
// Link to the left child node
|
|
pWordStorage += PrefixCompressWord (pWordStorage,
|
|
lpipb->BTreeData.rgpTmpNodeInfo[1]->pTmpResult,
|
|
EmptyWord, fLength);
|
|
pWordStorage += CopyFileOffset (pWordStorage,
|
|
lpipb->BTreeData.rgpTmpNodeInfo[1]->nodeOffset);
|
|
|
|
// Link to the right child node
|
|
pWordStorage += PrefixCompressWord (pWordStorage,
|
|
lpipb->BTreeData.rgpNodeInfo[1]->pTmpResult,
|
|
lpipb->BTreeData.rgpTmpNodeInfo[1]->pTmpResult, fLength);
|
|
pWordStorage += CopyFileOffset (pWordStorage,
|
|
lpipb->BTreeData.rgpNodeInfo[1]->nodeOffset);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Link to the right child node
|
|
pWordStorage += PrefixCompressWord (pWordStorage,
|
|
pWord, EmptyWord, fLength);
|
|
pWordStorage += CopyFileOffset (pWordStorage,
|
|
lpipb->BTreeData.rgpTmpNodeInfo[1]->nodeOffset);
|
|
}
|
|
|
|
// Set all the parameter
|
|
pNodeInfo->pCurPtr = pBuffer + sizeof(WORD);
|
|
pNodeInfo->cbLeft = (LONG)(pBuffer - pWordStorage + dwBlockSize);
|
|
pNodeInfo->pMaxAddress = pBuffer + dwBlockSize - pNodeInfo->cbLeft;
|
|
SETWORD(pBuffer, (WORD)pNodeInfo->cbLeft);
|
|
|
|
|
|
// Write out the new node
|
|
if ((FileSeekWrite(lpipb->hfpbIdxFile, pBuffer,
|
|
lpipb->foMaxOffset, dwBlockSize, &errb)) != (LONG)dwBlockSize)
|
|
return(errb);
|
|
|
|
// Remember the offset of this node
|
|
// Set the pointer to the top stem node
|
|
|
|
lpipb->BTreeData.Header.foIdxRoot = pNodeInfo->nodeOffset =
|
|
lpipb->foMaxOffset;
|
|
lpipb->BTreeData.Header.nidIdxRoot = pNodeInfo->nodeOffset.dwOffset;
|
|
|
|
lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwBlockSize);
|
|
#if 0
|
|
return CheckStemNode (pNodeInfo);
|
|
#else
|
|
return(S_OK);
|
|
#endif
|
|
}
|
|
|
|
|
|
// Initialize data
|
|
pNodeInfo = lpipb->BTreeData.rgpNodeInfo[cLevel];
|
|
pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[cLevel];
|
|
pLastWord = pNodeInfo->pLastWord;
|
|
pBuffer = pNodeInfo->pBuffer;
|
|
if (fIsStemNode = (cLevel < lpipb->BTreeData.Header.cIdxLevels - 1))
|
|
cbSkip = sizeof(WORD);
|
|
else
|
|
cbSkip = sizeof(WORD) + FOFFSET_SIZE;
|
|
|
|
fEndNode = (pNodeInfo->pCurPtr >= pNodeInfo->pMaxAddress);
|
|
|
|
// Calculate how many byte left are there in the old node
|
|
pInsertPtr = pNodeInfo->pCurPtr; // Pointer to insertion point
|
|
cbLeft = (WORD)pNodeInfo->cbLeft;
|
|
|
|
// Handle special simple cases
|
|
if (fFlag & UPDATE_NODE_ADDRESS_08)
|
|
{
|
|
// Skip the next word
|
|
pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
|
|
pInsertPtr, &wWLen);
|
|
|
|
if (fFlag & USE_TEMP_NODE_04)
|
|
{
|
|
CopyFileOffset (pInsertPtr,
|
|
lpipb->BTreeData.rgpTmpNodeInfo[cLevel + 1]->nodeOffset);
|
|
}
|
|
else
|
|
{
|
|
CopyFileOffset (pInsertPtr,
|
|
lpipb->BTreeData.rgpNodeInfo[cLevel + 1]->nodeOffset);
|
|
}
|
|
#if 0
|
|
return(fIsStemNode ? CheckStemNode (pNodeInfo) :
|
|
CheckLeafNode (pNodeInfo, occf));
|
|
#else
|
|
return(S_OK);
|
|
#endif
|
|
}
|
|
|
|
if (fFlag & (REPLACE_WORD_01 | SKIP_NEXT_WORD_20))
|
|
{
|
|
// We get more room from the replaced word
|
|
DWORD dwTemp;
|
|
|
|
// Skip the next word
|
|
if (fFlag & SKIP_NEXT_WORD_20)
|
|
{
|
|
pInsertPtr = ExtractWord(pLastWord, pInsertPtr, &wWLen);
|
|
}
|
|
else
|
|
{
|
|
pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
|
|
pInsertPtr, &wWLen);
|
|
}
|
|
|
|
// Skip the data
|
|
if (fIsStemNode)
|
|
pInsertPtr += FOFFSET_SIZE;
|
|
else
|
|
{
|
|
// Skip field id, topic count. fileoffset, datasize
|
|
if (occf & OCCF_FIELDID)
|
|
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr); // FieldId
|
|
if (occf & OCCF_TOPICID)
|
|
{
|
|
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
|
|
pInsertPtr += FOFFSET_SIZE;
|
|
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
|
|
}
|
|
}
|
|
if (fFlag & SKIP_NEXT_WORD_20)
|
|
pNodeInfo->pCurPtr = pInsertPtr;
|
|
else
|
|
{
|
|
// Remove the old data
|
|
MEMMOVE (pNodeInfo->pCurPtr, pInsertPtr,
|
|
cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr));
|
|
pNodeInfo->pMaxAddress =
|
|
(pInsertPtr = pNodeInfo->pCurPtr) + cbByteMoved;
|
|
cbLeft = (WORD)(dwBlockSize - (pNodeInfo->pMaxAddress - pBuffer));
|
|
}
|
|
if (pInsertPtr >= pNodeInfo->pMaxAddress)
|
|
fEndNode = TRUE;
|
|
}
|
|
|
|
//Calculate the approximate number of bytes needed for the
|
|
// new data by compress it to the temporary block
|
|
|
|
if (fIsStemNode)
|
|
{
|
|
if (pInsertPtr <= pNodeInfo->pBuffer + sizeof(WORD))
|
|
{
|
|
// This is the first word, there is no previous one
|
|
*(LPWORD)pLastWord = 0;
|
|
}
|
|
wNewRecSize = (WORD) CopyNewDataToStemNode (lpipb, pTmpNodeInfo,
|
|
pWord, pLastWord, cLevel, fFlag);
|
|
}
|
|
else
|
|
{
|
|
if (pInsertPtr <= pNodeInfo->pBuffer + sizeof(WORD) + FOFFSET_SIZE)
|
|
{
|
|
// This is the first word, there is no previous one
|
|
*(LPWORD)pLastWord = 0;
|
|
}
|
|
wNewRecSize = (WORD) CopyNewDataToLeafNode (lpipb, pTmpNodeInfo,
|
|
pWordInfo, pWord, pLastWord);
|
|
}
|
|
|
|
wNewRecSize -= cbSkip;
|
|
|
|
// I reserved about 4 byte to ensure that when we have enough room
|
|
// we do have enough room. Compression may change the size of the
|
|
// record, causing us to run out of room when copying the new data
|
|
// over
|
|
|
|
if (cbLeft - sizeof(DWORD) > wNewRecSize)
|
|
{
|
|
// We have enough room for the new data. Just insert the new data
|
|
pWordStorage = pTmpNodeInfo->pCurPtr;
|
|
|
|
if (!fEndNode)
|
|
{
|
|
// We need to recompress the next word
|
|
MEMCPY (pTmpNodeInfo->pTmpResult, pWord,
|
|
*(LPUW)pWord + sizeof(WORD) + sizeof(WORD)); //erinfox RISC patch
|
|
pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
|
|
pInsertPtr, &wWLen);
|
|
cbByteMoved = PrefixCompressWord (pWordStorage,
|
|
pTmpNodeInfo->pTmpResult, pWord, fLength);
|
|
wNewRecSize += (WORD)cbByteMoved;
|
|
|
|
// Reset the last word for pBTreeWord
|
|
MEMCPY (pNodeInfo->pTmpResult, pLastWord,
|
|
*(LPUW)pLastWord + sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
|
|
}
|
|
|
|
// Make room for the new data
|
|
if ((cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr)) <= 0)
|
|
cbByteMoved = 0;
|
|
else
|
|
MEMMOVE(pNodeInfo->pCurPtr + wNewRecSize, pInsertPtr,
|
|
cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr));
|
|
|
|
// Copy the new data
|
|
MEMCPY (pNodeInfo->pCurPtr, pTmpNodeInfo->pBuffer + cbSkip,
|
|
wNewRecSize);
|
|
|
|
// Update data
|
|
|
|
pNodeInfo->pMaxAddress = pNodeInfo->pCurPtr + wNewRecSize +
|
|
cbByteMoved;
|
|
pNodeInfo->cbLeft = cbLeft =
|
|
(WORD)(dwBlockSize - (pNodeInfo->pMaxAddress - pBuffer));
|
|
SETWORD(pNodeInfo->pBuffer + cbSkip - sizeof(WORD),
|
|
(WORD)cbLeft);
|
|
pNodeInfo->fFlag = TO_BE_UPDATE;
|
|
|
|
// Change the parent node
|
|
if (fEndNode && cLevel)
|
|
{
|
|
return (AddRecordToBTree (lpipb, pWord, pWordInfo, cLevel - 1,
|
|
REPLACE_WORD_01));
|
|
}
|
|
#if 0
|
|
return(fIsStemNode ? CheckStemNode (pNodeInfo) :
|
|
CheckLeafNode (pNodeInfo, occf));
|
|
#else
|
|
return(S_OK);
|
|
#endif
|
|
return S_OK;
|
|
}
|
|
|
|
// Case 3: Add to the middle. This is a complex one, since we have
|
|
// to split the node into 2.
|
|
return(SplitNodeAndAddData (lpipb, pWord, pWordInfo, cLevel, fFlag,
|
|
fIsStemNode));
|
|
}
|
|
|
|
PRIVATE int PASCAL NEAR SplitNodeAndAddData (_LPIPB lpipb, LPB pWord,
|
|
PWORDINFO pWordInfo, int cLevel, int fFlag, int fIsStemNode)
|
|
{
|
|
PNODEINFO pNodeInfo;
|
|
PNODEINFO pTmpNodeInfo;
|
|
LONG cbByteMoved;
|
|
WORD leftSize;
|
|
WORD rightSize;
|
|
WORD wWLen;
|
|
LPB pInsertPtr;
|
|
LPB pWordStorage;
|
|
int cbSkip;
|
|
DWORD dwBlockSize;
|
|
HRESULT fRet;
|
|
BYTE fLength = lpipb->occf & OCCF_LENGTH;
|
|
LPB pLastWord;
|
|
LPB pTemp;
|
|
LPB pBuffer;
|
|
|
|
|
|
if (fIsStemNode)
|
|
cbSkip = 0;
|
|
else
|
|
cbSkip = FOFFSET_SIZE;
|
|
|
|
// Variable initialization
|
|
pNodeInfo = lpipb->BTreeData.rgpNodeInfo[cLevel];
|
|
pBuffer = pNodeInfo->pBuffer;
|
|
pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[cLevel];
|
|
pInsertPtr = pNodeInfo->pCurPtr;
|
|
dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
|
|
pLastWord = pNodeInfo->pLastWord;
|
|
|
|
// Calculate approximately the left & right side node sizes
|
|
leftSize = (WORD)(pInsertPtr - pBuffer - cbSkip - sizeof(WORD));
|
|
rightSize = (WORD)(pNodeInfo->pMaxAddress - pNodeInfo->pCurPtr);
|
|
|
|
if (leftSize >= rightSize)
|
|
{
|
|
// We add to the right. The new data will be 1st
|
|
// Example:
|
|
// Add 4 into 1 2 3 5 --> 1 2 3 and 4 5
|
|
if (fIsStemNode)
|
|
{
|
|
CopyNewDataToStemNode (lpipb, pTmpNodeInfo,
|
|
pWord, EmptyWord, cLevel, fFlag);
|
|
pTemp = pTmpNodeInfo->pBuffer + sizeof(WORD);
|
|
}
|
|
else
|
|
{
|
|
CopyNewDataToLeafNode (lpipb, pTmpNodeInfo,
|
|
pWordInfo, pWord, EmptyWord);
|
|
pTemp = pTmpNodeInfo->pBuffer + sizeof(WORD) +
|
|
FOFFSET_SIZE;
|
|
}
|
|
|
|
pWordStorage = pTmpNodeInfo->pCurPtr;
|
|
|
|
// Move back the pointer to the beginning of the word
|
|
// for future reference
|
|
pTmpNodeInfo->pCurPtr = pTemp;
|
|
|
|
if (rightSize > 0)
|
|
{
|
|
// Extract the word on the right of the insertion point
|
|
MEMCPY (pTmpNodeInfo->pTmpResult, pWord,
|
|
*(LPUW)pWord + sizeof(WORD)); // erinfox RISC patch
|
|
pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
|
|
pInsertPtr, &wWLen);
|
|
|
|
pWordStorage += PrefixCompressWord (pWordStorage,
|
|
pTmpNodeInfo->pTmpResult, pWord, fLength);
|
|
|
|
// Copy data on the right of the insertion point to the new node
|
|
MEMCPY (pWordStorage, pInsertPtr,
|
|
cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr));
|
|
pWordStorage += cbByteMoved;
|
|
}
|
|
|
|
pTmpNodeInfo->pMaxAddress = pWordStorage;
|
|
|
|
// Update the right node
|
|
SETWORD(pTmpNodeInfo->pBuffer + cbSkip,
|
|
(WORD)(pTmpNodeInfo->cbLeft =
|
|
(LONG)(dwBlockSize - (pWordStorage - pTmpNodeInfo->pBuffer))));
|
|
pTmpNodeInfo->pMaxAddress = pTmpNodeInfo->pBuffer +
|
|
dwBlockSize - pTmpNodeInfo->cbLeft;
|
|
#if 0
|
|
if (fIsStemNode)
|
|
CheckStemNode (pTmpNodeInfo);
|
|
else
|
|
CheckLeafNode (pTmpNodeInfo, lpipb->occf);
|
|
#endif
|
|
MEMSET (pWordStorage, 0, pTmpNodeInfo->cbLeft);
|
|
|
|
if ((fRet = CreateNewNode (lpipb, cLevel,
|
|
fIsStemNode, NEW_NODE_ON_RIGHT)) != S_OK)
|
|
return(fRet);
|
|
|
|
// Update the left node
|
|
pNodeInfo->fFlag = TO_BE_UPDATE;
|
|
SETWORD(pBuffer + cbSkip, (WORD)(pNodeInfo->cbLeft =
|
|
(LONG)(dwBlockSize - (pNodeInfo->pCurPtr - pBuffer))));
|
|
#ifdef _DEBUG
|
|
MEMSET (pNodeInfo->pCurPtr, 0, pNodeInfo->cbLeft);
|
|
#endif
|
|
pNodeInfo->pMaxAddress = pBuffer + dwBlockSize - pNodeInfo->cbLeft;
|
|
pNodeInfo->fFlag = TO_BE_UPDATE;
|
|
|
|
#if 0
|
|
if (fIsStemNode)
|
|
CheckStemNode (pNodeInfo);
|
|
else
|
|
CheckLeafNode (pNodeInfo, lpipb->occf);
|
|
#endif
|
|
|
|
if (cLevel == 0)
|
|
{
|
|
if (pNodeInfo->pCurPtr >= pNodeInfo->pMaxAddress - 1)
|
|
pNodeInfo->pCurPtr = pNodeInfo->pBuffer + cbSkip + sizeof(WORD);
|
|
GetLastWordInNode (lpipb, pNodeInfo, fIsStemNode);
|
|
GetLastWordInNode (lpipb, pTmpNodeInfo, fIsStemNode);
|
|
return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
|
|
USE_BOTH_NODE_40 | USE_TEMP_FOR_RIGHT_NODE_10);
|
|
}
|
|
if (rightSize > 0)
|
|
{
|
|
if ((fRet = AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
|
|
USE_TEMP_NODE_04 | UPDATE_NODE_ADDRESS_08)) != S_OK)
|
|
return fRet;
|
|
return AddRecordToBTree (lpipb, pNodeInfo->pLastWord, NULL, cLevel - 1, 0);
|
|
}
|
|
|
|
if (fFlag & REPLACE_WORD_01)
|
|
{
|
|
// rightSize == 0 means that we are adding to the end of the block.
|
|
// REPLACE_WORD means that we are replacing the same word, so basically
|
|
// we have to add a new entry for the left block
|
|
if ((fRet = AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
|
|
USE_TEMP_NODE_04 | REPLACE_WORD_01)) != S_OK)
|
|
return fRet;
|
|
|
|
return AddRecordToBTree (lpipb, pNodeInfo->pLastWord, NULL,
|
|
cLevel - 1, 0);
|
|
}
|
|
|
|
// Add to the end
|
|
return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
|
|
USE_TEMP_NODE_04 | SKIP_NEXT_WORD_20);
|
|
}
|
|
|
|
//**********************************************
|
|
//
|
|
// Add the new data to the end of the leftnode
|
|
//
|
|
//**********************************************
|
|
// We add to the left. The new data will be last
|
|
// Example:
|
|
// Add 2 into 1 3 4 5 --> 1 2 and 3 4 5
|
|
|
|
pTmpNodeInfo->pCurPtr = pWordStorage =
|
|
pTmpNodeInfo->pBuffer + cbSkip + sizeof(WORD);
|
|
|
|
// Copy the data on the left to the new node
|
|
if (cbByteMoved = leftSize)
|
|
{
|
|
MEMCPY(pWordStorage, pBuffer + cbSkip + sizeof(WORD), cbByteMoved);
|
|
pWordStorage += cbByteMoved;
|
|
}
|
|
|
|
// Emit new data
|
|
pWordStorage += PrefixCompressWord (pWordStorage,
|
|
pWord, pLastWord, lpipb->occf & OCCF_LENGTH);
|
|
|
|
if (fIsStemNode)
|
|
{
|
|
if (fFlag & USE_TEMP_NODE_04)
|
|
{
|
|
pWordStorage += CopyFileOffset (pWordStorage,
|
|
lpipb->BTreeData.rgpTmpNodeInfo[cLevel+1]->nodeOffset);
|
|
}
|
|
else
|
|
{
|
|
pWordStorage += CopyFileOffset (pWordStorage,
|
|
lpipb->BTreeData.rgpNodeInfo[cLevel+1]->nodeOffset);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Emit field id, topic count. fileoffset, datasize
|
|
if (lpipb->occf & OCCF_FIELDID)
|
|
pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwFieldId);
|
|
|
|
pWordStorage += CbBytePack (pWordStorage,
|
|
pWordInfo->dwMergeTopicCount);
|
|
|
|
pWordStorage += CopyFileOffset (pWordStorage, pWordInfo->dataLocation);
|
|
pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwDataSize);
|
|
}
|
|
|
|
SETWORD (pTmpNodeInfo->pBuffer + cbSkip,
|
|
(WORD)(pTmpNodeInfo->cbLeft = (LONG)(pNodeInfo->dwBlockSize
|
|
- (pWordStorage - pTmpNodeInfo ->pBuffer))));
|
|
|
|
pTmpNodeInfo->pMaxAddress = pWordStorage;
|
|
if ((fRet = CreateNewNode (lpipb, cLevel, fIsStemNode,
|
|
NEW_NODE_ON_LEFT)) != S_OK)
|
|
return(fRet);
|
|
|
|
// Update the right node
|
|
if (leftSize > 0)
|
|
{
|
|
MEMMOVE(pNodeInfo->pCurPtr = pBuffer + cbSkip + sizeof(WORD),
|
|
pInsertPtr, (size_t)(pNodeInfo->pMaxAddress - pInsertPtr));
|
|
pNodeInfo->pMaxAddress -= cbByteMoved;
|
|
|
|
// Reconstruct the 1st word in the node.
|
|
if (fFlag & REPLACE_WORD_01)
|
|
{
|
|
MEMCPY (pTmpNodeInfo->pTmpResult, pWord,
|
|
*(LPUW)pWord + sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
|
|
}
|
|
else
|
|
{
|
|
MEMCPY (pTmpNodeInfo->pTmpResult, pLastWord,
|
|
*(LPUW)pLastWord + sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
|
|
}
|
|
}
|
|
pInsertPtr = pNodeInfo->pCurPtr;
|
|
pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult, pTemp = pInsertPtr, &wWLen);
|
|
cbByteMoved = (LONG)(pInsertPtr - pTemp);
|
|
|
|
// Recompress the word using pLastWord of pTmpNodeInfo
|
|
wWLen = (WORD) PrefixCompressWord (pTmpNodeInfo->pLastWord,
|
|
pTmpNodeInfo->pTmpResult, EmptyWord, fLength);
|
|
|
|
// Reserved room for the word
|
|
pWordStorage = pBuffer + cbSkip + sizeof(WORD);
|
|
MEMMOVE (pWordStorage + wWLen, pInsertPtr,
|
|
(size_t)(pNodeInfo->pMaxAddress - pInsertPtr));
|
|
|
|
// Copy down the word
|
|
MEMCPY(pWordStorage, pTmpNodeInfo->pLastWord, wWLen);
|
|
pNodeInfo->pMaxAddress += wWLen - cbByteMoved;
|
|
|
|
// Update the right node
|
|
SETWORD(pBuffer + cbSkip,
|
|
(WORD)(pNodeInfo->cbLeft =(WORD)(dwBlockSize -
|
|
(pNodeInfo->pMaxAddress - pBuffer))));
|
|
pNodeInfo->fFlag = TO_BE_UPDATE;
|
|
#ifdef _DEBUG
|
|
MEMSET (pNodeInfo->pMaxAddress, 0, pNodeInfo->cbLeft);
|
|
#endif
|
|
if (cLevel == 0)
|
|
{
|
|
GetLastWordInNode (lpipb, pNodeInfo, fIsStemNode);
|
|
GetLastWordInNode (lpipb, pTmpNodeInfo, fIsStemNode);
|
|
return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
|
|
USE_BOTH_NODE_40);
|
|
}
|
|
return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
|
|
USE_TEMP_NODE_04);
|
|
return(fRet);
|
|
}
|
|
|
|
VOID GetLastWordInNode (_LPIPB lpipb, PNODEINFO pNodeInfo, BOOL fIsStemNode)
|
|
{
|
|
LPB pInsertPtr = pNodeInfo->pCurPtr;
|
|
LPB pMaxAddress = pNodeInfo->pMaxAddress;
|
|
WORD wWLen;
|
|
DWORD dwTemp;
|
|
|
|
MEMCPY (pNodeInfo->pTmpResult, EmptyWord, 4);
|
|
|
|
while (pInsertPtr < pNodeInfo->pMaxAddress - 1)
|
|
{
|
|
pInsertPtr = ExtractWord(pNodeInfo->pTmpResult, pInsertPtr, &wWLen);
|
|
if (!fIsStemNode)
|
|
{
|
|
if (lpipb->occf & OCCF_FIELDID)
|
|
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
|
|
if (lpipb->occf & OCCF_TOPICID)
|
|
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);// Topic count
|
|
}
|
|
pInsertPtr += FOFFSET_SIZE; // FileOffset
|
|
if (!fIsStemNode)
|
|
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
|
|
}
|
|
}
|
|
|
|
PRIVATE HRESULT PASCAL NEAR CreateNewNode(_LPIPB lpipb, int cLevel,
|
|
int fIsStemNode, int fAfter)
|
|
{
|
|
PNODEINFO pNodeInfo;
|
|
PNODEINFO pTmpNodeInfo;
|
|
ERRB errb;
|
|
LONG dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
|
|
|
|
pNodeInfo = lpipb->BTreeData.rgpNodeInfo[cLevel];
|
|
pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[cLevel];
|
|
|
|
#ifdef _DEBUG
|
|
dwNewNodeSize += dwBlockSize;
|
|
#endif
|
|
if (!fIsStemNode)
|
|
{
|
|
// Add the new node into the linked list
|
|
if (fAfter)
|
|
CopyFileOffset (pTmpNodeInfo->pBuffer, pNodeInfo->nextNodeOffset);
|
|
else
|
|
CopyFileOffset (pTmpNodeInfo->pBuffer, pNodeInfo->nodeOffset);
|
|
}
|
|
|
|
// Write out the new node
|
|
if ((FileSeekWrite(lpipb->hfpbIdxFile, pTmpNodeInfo->pBuffer,
|
|
lpipb->foMaxOffset, dwBlockSize, &errb)) != (LONG)dwBlockSize)
|
|
return(errb);
|
|
|
|
// Remember the offset of this node
|
|
pTmpNodeInfo->nodeOffset = lpipb->foMaxOffset;
|
|
|
|
if (!fIsStemNode)
|
|
{
|
|
if (fAfter)
|
|
{
|
|
CopyFileOffset (pNodeInfo->pBuffer, lpipb->foMaxOffset);
|
|
pNodeInfo->fFlag = TO_BE_UPDATE;
|
|
}
|
|
else
|
|
{
|
|
|
|
// Update the previous link
|
|
if (!FoEquals(pNodeInfo->prevNodeOffset, foNil))
|
|
{
|
|
BYTE TempBuf[FOFFSET_SIZE + 1];
|
|
|
|
CopyFileOffset (TempBuf,lpipb->foMaxOffset);
|
|
if ((FileSeekWrite(lpipb->hfpbIdxFile, TempBuf,
|
|
pNodeInfo->prevNodeOffset, FOFFSET_SIZE,
|
|
&errb)) != FOFFSET_SIZE)
|
|
return(errb);
|
|
}
|
|
}
|
|
}
|
|
|
|
lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwBlockSize);
|
|
return(S_OK);
|
|
}
|
|
|
|
PRIVATE HRESULT PASCAL NEAR WriteNewDataRecord (_LPIPB lpipb, PWORDINFO pWordInfo)
|
|
{
|
|
PFILEDATA pOutFile = &lpipb->OutFile;
|
|
DWORD dwBlockSize;
|
|
ERRB errb;
|
|
HRESULT fRet;
|
|
FREEBLOCK FreeBlock;
|
|
|
|
// Reset the characteristic of the file
|
|
pOutFile->pCurrent = pOutFile->pMem;
|
|
pOutFile->cbLeft = pOutFile->dwMax;
|
|
pOutFile->ibit = cbitBYTE - 1;
|
|
FileSeek (pOutFile->fFile,
|
|
pOutFile->foPhysicalOffset = foNil, 0, &errb);
|
|
|
|
// Write out the data into the temp file
|
|
if ((dwBlockSize = WriteDataNode (lpipb,
|
|
pWordInfo->dwMergeTopicCount = pWordInfo->dwNewTopicCount, &errb)) == 0)
|
|
return errb;
|
|
|
|
// Write out the output buffer
|
|
if (FileWrite (pOutFile->fFile, pOutFile->pMem,
|
|
(LONG)(pOutFile->pCurrent - pOutFile->pMem), &errb) !=
|
|
(LONG) (pOutFile->pCurrent - pOutFile->pMem))
|
|
return(errb);
|
|
// if ((errb.err = FileFlush (pOutFile->fFile)) != S_OK)
|
|
// return(errb.err);
|
|
|
|
pWordInfo->dwDataSize = dwBlockSize;
|
|
|
|
// Find the smallest free block that fits the new data
|
|
if (GetFreeBlock (lpipb, &FreeBlock, dwBlockSize) != S_OK)
|
|
{
|
|
#ifdef _DEBUGFREE
|
|
_DPF2("GetFreeBlock failed. Requested %ld bytes, appending to EOF(%ld)\n", dwBlockSize, lpipb->foMaxOffset.dwOffset);
|
|
#endif
|
|
// There is no free block large enough to store the data
|
|
if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
|
|
lpipb->foMaxOffset, dwBlockSize)) != S_OK)
|
|
return fRet;
|
|
pWordInfo->dataLocation = lpipb->foMaxOffset;
|
|
|
|
lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwBlockSize);
|
|
|
|
#ifdef _DEBUG
|
|
dwNewDataSize += dwBlockSize;
|
|
#endif
|
|
return(S_OK);
|
|
}
|
|
|
|
// There is a free block large enough to store the data
|
|
if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
|
|
FreeBlock.foBlockOffset, dwBlockSize)) != S_OK)
|
|
return fRet;
|
|
|
|
pWordInfo->dataLocation = FreeBlock.foBlockOffset;
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
// erinfox: return a block from the free list if possible
|
|
PRIVATE HRESULT GetFreeBlock (_LPIPB lpipb, PFREEBLOCK pFreeBlock,
|
|
DWORD dwBlockSize)
|
|
{
|
|
FILEOFFSET foFreeListOffset;
|
|
ERRB errb;
|
|
|
|
// if it can't find a free block, it returns an error
|
|
foFreeListOffset = FreeListGetBestFit(lpipb->hFreeList, MakeFo(dwBlockSize,0), &errb);
|
|
if (FoIsNil(foFreeListOffset))
|
|
{
|
|
return errb;
|
|
}
|
|
|
|
pFreeBlock->foBlockOffset = foFreeListOffset;
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
PRIVATE HRESULT PASCAL NEAR CopyBlockFile (PFILEDATA pFileData, HFPB hfpbDest,
|
|
FILEOFFSET foOffset, DWORD dwBlockSize)
|
|
{
|
|
LONG cbCopied;
|
|
ERRB errb;
|
|
|
|
// Initialize variable
|
|
errb = S_OK;
|
|
|
|
// Seek to the right locations
|
|
FileSeek (pFileData->fFile, foNil, 0, &errb);
|
|
if (errb != S_OK)
|
|
return(errb);
|
|
FileSeek (hfpbDest, foOffset, 0, &errb);
|
|
if (errb != S_OK)
|
|
return(errb);
|
|
|
|
// Do the copy
|
|
while (dwBlockSize)
|
|
{
|
|
if ((cbCopied = dwBlockSize) > pFileData->dwMax)
|
|
cbCopied = pFileData->dwMax;
|
|
if (FileRead (pFileData->fFile, pFileData->pMem, cbCopied, &errb) !=
|
|
cbCopied)
|
|
return(E_FILEREAD);
|
|
if (FileWrite(hfpbDest, pFileData->pMem, cbCopied, &errb) != cbCopied)
|
|
return(E_FILEWRITE);
|
|
dwBlockSize -= cbCopied;
|
|
}
|
|
return(S_OK);
|
|
}
|
|
|
|
PRIVATE HRESULT PASCAL NEAR UpdateDataNode (_LPIPB lpipb, PWORDINFO pWordInfo)
|
|
{
|
|
|
|
// Local replacement Variables
|
|
PBTREEDATA pTreeData = &lpipb->BTreeData;
|
|
PFILEDATA pOutFile = &lpipb->OutFile; // Output data structure
|
|
PFILEDATA pInFile = &lpipb->InFile; // Input data structre
|
|
HFPB fFile = pOutFile->fFile; // Output file handle
|
|
PNODEINFO pIndexDataNode = lpipb->pIndexDataNode;
|
|
DWORD dwNewDataSize;
|
|
ERRB errb;
|
|
|
|
// Working Variables
|
|
DWORD dwEncodedSize = 0; // Size of encoded block
|
|
DWORD dwTopicIdDelta; // Really only used for weight values
|
|
DWORD dwNewTopicId = 0;
|
|
DWORD dwIndexTopicId = 0;
|
|
DWORD dwNewTopicCount;
|
|
DWORD dwIndexTopicCount;
|
|
DWORD dwTopicCount;
|
|
FILEOFFSET foStart; // Physical beginning of bit compression block
|
|
WORD wWeight = 0; // Only used when IDXF_NORMALIZE is set
|
|
DWORD dwTopicId = 0; // Only used when IDXF_NORMALIZE is set
|
|
int cbTemp; // # of compressed bytes that uncompressed
|
|
OCCF occf = lpipb->occf;
|
|
BYTE fetchOldData;
|
|
BYTE fetchNewData;
|
|
PIH20 pHeader = &lpipb->BTreeData.Header;
|
|
HRESULT fRet;
|
|
|
|
// Initialize variables
|
|
wWeight = 0; // UNDONE: Don't need it
|
|
|
|
// Reset the file pointer
|
|
FileSeek (pOutFile->fFile,
|
|
foStart = pOutFile->foPhysicalOffset = foNil, 0, &errb);
|
|
pOutFile->pCurrent = pOutFile->pMem;
|
|
pOutFile->cbLeft = pOutFile->dwMax;
|
|
pOutFile->ibit = cbitBYTE - 1;
|
|
|
|
dwIndexTopicCount = pWordInfo->dwIndexTopicCount;
|
|
dwNewTopicCount = pWordInfo->dwNewTopicCount;
|
|
fetchOldData = fetchNewData = TRUE;
|
|
pWordInfo->dwOldTopicId = pWordInfo->dwNewTopicId = dwTopicCount = 0;
|
|
|
|
// Initialize pIndexDataNode structure
|
|
pIndexDataNode->nodeOffset = pWordInfo->dataLocation;
|
|
pIndexDataNode->dwDataSizeLeft = pWordInfo->dwDataSize;
|
|
|
|
if ((fRet = ReadNewData(pIndexDataNode)) != S_OK)
|
|
return(fRet);
|
|
|
|
while (dwIndexTopicCount && dwNewTopicCount)
|
|
{
|
|
// Get the topicId from the new file
|
|
if (fetchNewData)
|
|
{
|
|
if (pInFile->cbLeft < 2 * sizeof (DWORD))
|
|
{
|
|
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
|
|
pInFile->cbLeft += FileRead (pInFile->fFile, pInFile->pMem +
|
|
pInFile->cbLeft, pInFile->dwMax - pInFile->cbLeft,
|
|
&errb);
|
|
pInFile->dwMax = pInFile->cbLeft;
|
|
pInFile->pCurrent = pInFile->pMem;
|
|
}
|
|
cbTemp = CbByteUnpack (&dwTopicIdDelta, pInFile->pCurrent);
|
|
pInFile->pCurrent += cbTemp;
|
|
pInFile->cbLeft -= cbTemp;
|
|
|
|
pWordInfo->dwNewTopicId = (dwNewTopicId += dwTopicIdDelta);
|
|
fetchNewData = FALSE;
|
|
}
|
|
|
|
if (fetchOldData)
|
|
{
|
|
if (pIndexDataNode->ibit < cbitBYTE - 1)
|
|
{
|
|
pIndexDataNode->ibit = cbitBYTE - 1;
|
|
pIndexDataNode->pCurPtr ++;
|
|
}
|
|
// Get the topicId from the index file
|
|
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyTopicId,
|
|
&dwTopicIdDelta)) != S_OK)
|
|
return fRet;
|
|
pWordInfo->dwIndexTopicId = (dwIndexTopicId += dwTopicIdDelta);
|
|
fetchOldData = FALSE;
|
|
}
|
|
|
|
if (dwIndexTopicId < dwNewTopicId)
|
|
{
|
|
if ((fRet = EmitOldData (lpipb, pIndexDataNode,
|
|
pWordInfo)) != S_OK)
|
|
return(fRet);
|
|
fetchOldData = TRUE;
|
|
dwTopicCount++;
|
|
dwIndexTopicCount --;
|
|
}
|
|
else if (dwIndexTopicId == dwNewTopicId)
|
|
{
|
|
DWORD dwTmp;
|
|
if (lpipb->idxf & IDXF_NORMALIZE)
|
|
{
|
|
if ((fRet = FGetBits(pIndexDataNode, &dwTmp,
|
|
sizeof (USHORT) * cbitBYTE)) != S_OK)
|
|
return fRet;
|
|
}
|
|
|
|
if (occf & OCCF_HAVE_OCCURRENCE)
|
|
{
|
|
if ((fRet = SkipOldData (lpipb, pIndexDataNode)) != S_OK)
|
|
return(fRet);
|
|
}
|
|
fetchOldData = TRUE;
|
|
dwIndexTopicCount --;
|
|
|
|
if ((fRet = EmitNewData (lpipb, pWordInfo, FALSE)) != S_OK)
|
|
return(fRet);
|
|
dwNewTopicCount --;
|
|
fetchNewData = TRUE;
|
|
dwTopicCount++;
|
|
}
|
|
else
|
|
{
|
|
if ((fRet = EmitNewData (lpipb, pWordInfo, TRUE)) != S_OK)
|
|
return(fRet);
|
|
dwNewTopicCount --;
|
|
fetchNewData = TRUE;
|
|
pWordInfo->dwIndexTopicCount++;
|
|
dwTopicCount++;
|
|
}
|
|
}
|
|
while (dwIndexTopicCount)
|
|
{
|
|
if (fetchOldData)
|
|
{
|
|
if (pIndexDataNode->ibit < cbitBYTE - 1)
|
|
{
|
|
pIndexDataNode->ibit = cbitBYTE - 1;
|
|
pIndexDataNode->pCurPtr ++;
|
|
}
|
|
|
|
// Get the topicId from the index file
|
|
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyTopicId,
|
|
&dwTopicIdDelta)) != S_OK)
|
|
return fRet;
|
|
pWordInfo->dwIndexTopicId = (dwIndexTopicId += dwTopicIdDelta);
|
|
fetchOldData = FALSE;
|
|
}
|
|
|
|
if ((fRet = EmitOldData (lpipb, pIndexDataNode,
|
|
pWordInfo)) != S_OK)
|
|
return(fRet);
|
|
fetchOldData = TRUE;
|
|
dwIndexTopicCount --;
|
|
dwTopicCount++;
|
|
}
|
|
while (dwNewTopicCount)
|
|
{
|
|
// Get the topicId from the new file
|
|
if (fetchNewData)
|
|
{
|
|
if (pInFile->cbLeft < 2 * sizeof (DWORD))
|
|
{
|
|
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
|
|
pInFile->cbLeft += FileRead (pInFile->fFile, pInFile->pMem +
|
|
pInFile->cbLeft, pInFile->dwMax - pInFile->cbLeft,
|
|
&errb);
|
|
pInFile->dwMax = pInFile->cbLeft;
|
|
pInFile->pCurrent = pInFile->pMem;
|
|
}
|
|
cbTemp = CbByteUnpack (&dwTopicIdDelta, pInFile->pCurrent);
|
|
pInFile->pCurrent += cbTemp;
|
|
pInFile->cbLeft -= cbTemp;
|
|
pWordInfo->dwNewTopicId = (dwNewTopicId += dwTopicIdDelta);
|
|
fetchNewData = FALSE;
|
|
}
|
|
|
|
if ((fRet = EmitNewData (lpipb, pWordInfo, TRUE)) != S_OK)
|
|
return(fRet);
|
|
fetchNewData = TRUE;
|
|
dwNewTopicCount --;
|
|
dwTopicCount++;
|
|
pWordInfo->dwIndexTopicCount++;
|
|
}
|
|
|
|
// Adjust for some bits used
|
|
if (pOutFile->ibit < cbitBYTE - 1)
|
|
{
|
|
pOutFile->pCurrent++;
|
|
pOutFile->cbLeft--;
|
|
pOutFile->foPhysicalOffset = FoAddDw (pOutFile->foPhysicalOffset, 1);
|
|
}
|
|
|
|
// Flush the output buffer
|
|
if (FileWrite (pOutFile->fFile, pOutFile->pMem,
|
|
(LONG)(pOutFile->pCurrent - pOutFile->pMem), &errb) !=
|
|
(LONG)(pOutFile->pCurrent - pOutFile->pMem))
|
|
return(errb);
|
|
|
|
dwNewDataSize = DwSubFo(pOutFile->foPhysicalOffset, foStart);
|
|
if (pWordInfo->dwDataSize < dwNewDataSize)
|
|
{
|
|
|
|
// ERIC: Find the best fit block here
|
|
// - Add the block pointed by pWordInfo into the free list
|
|
// - Find a new block in the freelist
|
|
// if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
|
|
// foNewDataOffset, dwNewDataSize)) != S_OK)
|
|
// where foNewDataOffset may be the max offset or the freelist
|
|
// block offset
|
|
FILEOFFSET foOffset1, foNewDataOffset;
|
|
WORD wNumBlocksTemp;
|
|
WORD wMaxBlocksTemp;
|
|
|
|
// Before adding that block to the FreeList,
|
|
// look if we need to change the size of the FreeList
|
|
|
|
QFREELIST qFreeList = _GLOBALLOCK(lpipb->hFreeList);
|
|
wNumBlocksTemp = qFreeList->flh.wNumBlocks;
|
|
wMaxBlocksTemp = qFreeList->flh.wMaxBlocks;
|
|
_GLOBALUNLOCK(lpipb->hFreeList);
|
|
|
|
// we use a count of two in the test below, in case not only old block is added but
|
|
// also an entry for the unused portion of the new block (later).
|
|
if (wMaxBlocksTemp < 2 || wNumBlocksTemp >= wMaxBlocksTemp - 2)
|
|
{
|
|
HFREELIST hFreeListTemp;
|
|
|
|
// if the free list can't grow, fall through to FreeListAdd, where the
|
|
// smallest free entry will be overwritten and re-used
|
|
if (wMaxBlocksTemp < MAXWORD - wDefaultFreeListSize)
|
|
{
|
|
hFreeListTemp = FreeListRealloc(lpipb->hFreeList,
|
|
(WORD)(wMaxBlocksTemp + wDefaultFreeListSize),
|
|
&errb);
|
|
if (errb != S_OK)
|
|
return errb;
|
|
lpipb->hFreeList = hFreeListTemp;
|
|
}
|
|
}
|
|
|
|
FreeListAdd(lpipb->hFreeList, pWordInfo->dataLocation, MakeFo(pWordInfo->dwDataSize,0));
|
|
foNewDataOffset = FreeListGetBestFit(lpipb->hFreeList, MakeFo(dwNewDataSize,0), &errb);
|
|
|
|
if (FoIsNil(foNewDataOffset))
|
|
{
|
|
#ifdef _DEBUGFREE
|
|
_DPF2("UpdateDataNode: Grow from %ld to %ld failed: appending to EOF\n", pWordInfo->dwDataSize,\
|
|
dwNewDataSize);
|
|
#endif
|
|
foNewDataOffset = lpipb->foMaxOffset;
|
|
}
|
|
else
|
|
{
|
|
#ifdef _DEBUGFREE
|
|
_DPF3("UpdateDataNode: Grow from %ld to %ld uses free block at %ld\n", pWordInfo->dwDataSize,\
|
|
dwNewDataSize, foNewDataOffset.dwOffset );
|
|
#endif
|
|
|
|
foOffset1 = FreeListGetBlockAt(lpipb->hFreeList, foNewDataOffset, &errb);
|
|
if (FoCompare(foOffset1,MakeFo(sizeof(FREELIST),0)) > 0)
|
|
FreeListAdd(lpipb->hFreeList, FoAddDw(foNewDataOffset,dwNewDataSize),
|
|
FoSubFo(foOffset1,MakeFo(dwNewDataSize,0)));
|
|
}
|
|
|
|
if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
|
|
foNewDataOffset, dwNewDataSize)) != S_OK)
|
|
return fRet;
|
|
|
|
pWordInfo->dataLocation = foNewDataOffset;
|
|
|
|
|
|
//if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
|
|
// lpipb->foMaxOffset, dwNewDataSize)) != S_OK)
|
|
// return fRet;
|
|
//pWordInfo->dataLocation = lpipb->foMaxOffset;
|
|
|
|
|
|
// ERIC: Only increase the size of the file if foMaxOffset is used
|
|
if (FoEquals(foNewDataOffset,lpipb->foMaxOffset))
|
|
{
|
|
lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwNewDataSize);
|
|
|
|
#ifdef _DEBUG
|
|
dwOldDataLoss += pWordInfo->dwDataSize;
|
|
dwOldDataNeed += dwNewDataSize;
|
|
#endif
|
|
}
|
|
|
|
pWordInfo->dwDataSize = dwNewDataSize;
|
|
}
|
|
else
|
|
{
|
|
if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
|
|
pWordInfo->dataLocation, dwNewDataSize)) != S_OK)
|
|
return fRet;
|
|
}
|
|
|
|
pWordInfo->dwMergeTopicCount = dwTopicCount;
|
|
return(S_OK);
|
|
}
|
|
|
|
PUBLIC HRESULT PASCAL FAR SkipOldData (_LPIPB lpipb, PNODEINFO pIndexDataNode)
|
|
{
|
|
HRESULT fRet;
|
|
DWORD dwOccs;
|
|
DWORD dwTmp; // Trash variable.
|
|
OCCF occf = lpipb->occf;
|
|
PIH20 pHeader = &lpipb->BTreeData.Header;
|
|
|
|
// Get the number of occurrences
|
|
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOccCount,
|
|
&dwOccs)) != S_OK)
|
|
return fRet;
|
|
//
|
|
// One pass through here for each occurence in the
|
|
// current sub-list.
|
|
//
|
|
for (; dwOccs; dwOccs--)
|
|
{
|
|
//
|
|
// Keeping word-counts? If so, get it.
|
|
//
|
|
if (occf & OCCF_COUNT)
|
|
{
|
|
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyWordCount,
|
|
&dwTmp)) != S_OK)
|
|
{
|
|
return fRet;
|
|
}
|
|
}
|
|
//
|
|
// Keeping byte-offsets? If so, get it.
|
|
//
|
|
if (occf & OCCF_OFFSET)
|
|
{
|
|
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOffset,
|
|
&dwTmp)) != S_OK)
|
|
return fRet;
|
|
}
|
|
}
|
|
return S_OK;
|
|
}
|
|
|
|
PRIVATE HRESULT PASCAL FAR EmitNewData (_LPIPB lpipb, PWORDINFO pWordInfo,
|
|
BOOL fnewData)
|
|
{
|
|
DWORD dwTopicDelta;
|
|
DWORD dwOccs = 0;
|
|
DWORD dwTemp;
|
|
WORD wWeight = 0;
|
|
PBTREEDATA pTreeData = &lpipb->BTreeData;
|
|
PFILEDATA pInFile = &lpipb->InFile;
|
|
PFILEDATA pOutFile = &lpipb->OutFile;
|
|
OCCF occf = lpipb->occf;
|
|
PIH20 pHeader = &lpipb->BTreeData.Header;
|
|
int cbTemp;
|
|
ERRB errb;
|
|
HRESULT fRet;
|
|
|
|
// Set the delta
|
|
dwTopicDelta = pWordInfo->dwNewTopicId - pWordInfo->dwOldTopicId;
|
|
pWordInfo->dwOldTopicId = pWordInfo->dwNewTopicId;
|
|
|
|
if (pOutFile->ibit < cbitBYTE - 1)
|
|
{
|
|
pOutFile->pCurrent++;
|
|
pOutFile->cbLeft--;
|
|
pOutFile->foPhysicalOffset = FoAddDw (pOutFile->foPhysicalOffset, 1);
|
|
pOutFile->ibit = cbitBYTE - 1;
|
|
}
|
|
|
|
FAddDword (pOutFile, dwTopicDelta, pHeader->ckeyTopicId);
|
|
|
|
if (occf & OCCF_HAVE_OCCURRENCE)
|
|
{
|
|
// Get number of occ data records for this topic
|
|
if (pInFile->cbLeft < 2 * sizeof (DWORD))
|
|
{
|
|
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
|
|
pInFile->cbLeft += FileRead (pInFile->fFile,
|
|
pInFile->pMem + pInFile->cbLeft,
|
|
pInFile->dwMax - pInFile->cbLeft, &errb);
|
|
pInFile->dwMax = pInFile->cbLeft;
|
|
pInFile->pCurrent = pInFile->pMem;
|
|
}
|
|
cbTemp = CbByteUnpack (&dwOccs, pInFile->pCurrent);
|
|
pInFile->pCurrent += cbTemp;
|
|
pInFile->cbLeft -= cbTemp;
|
|
}
|
|
|
|
// If we are term weighing we have to calculate the weight
|
|
if (lpipb->idxf & IDXF_NORMALIZE)
|
|
{
|
|
FLOAT rLog;
|
|
FLOAT rTerm;
|
|
FLOAT rWeight;
|
|
FLOAT fOcc;
|
|
|
|
#ifndef ISBU_IR_CHANGE
|
|
rLog = (float) log10(cHundredMillion/(double)pWordInfo->dwIndexTopicCount);
|
|
rTerm = rLog*rLog;
|
|
if (fnewData)
|
|
{
|
|
fOcc = (float) min(cTFThreshold, dwOccs);
|
|
// Add the new factor into the sigma term
|
|
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] *=
|
|
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId];
|
|
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] += fOcc * fOcc * rTerm;
|
|
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] =
|
|
(float)(sqrt((double)lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId]));
|
|
}
|
|
|
|
// NOTE : The following weight computation, until the assignment to wWeight, is
|
|
// very similar to the weight computation in WriteDataNode() of permind2.c file.
|
|
// Read the explanation there for the hard coded figures and logic appearing below.
|
|
rTerm = (float) (8.0 - log10((double)pWordInfo->dwIndexTopicCount));
|
|
// In extreme cases, rTerm could be 0 or even -ve (when dwTopicCount approaches or
|
|
// exceeds 100,000,000)
|
|
if (rTerm <= (float) 0.0)
|
|
rTerm = cVerySmallWt; // very small value. == log(100 mil/ 95 mil)
|
|
|
|
rWeight = ((float) min(cTFThreshold, dwOccs)) * rTerm * rTerm / lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId];
|
|
// without the additional rTerm, we would probably be between 0.0 and 1.0
|
|
if (rWeight > rTerm)
|
|
wWeight = 0xFFFF;
|
|
else
|
|
wWeight = (WORD) ((float)0xFFFF * rWeight / rTerm);
|
|
#else
|
|
rLog = (float)(1.0) / (float)pWordInfo->dwIndexTopicCount;
|
|
rTerm = rLog * rLog;
|
|
if (fnewData)
|
|
{
|
|
// Add the new factor into the sigma term
|
|
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] *=
|
|
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId];
|
|
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] +=
|
|
dwOccs * rTerm;
|
|
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] =
|
|
(float)(sqrt((double)lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId]));
|
|
}
|
|
rTerm = rTerm * (float)65535.0;
|
|
|
|
rWeight = (float)dwOccs * rTerm /
|
|
(float)(lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId]);
|
|
if (rWeight >= 65535.0)
|
|
wWeight = 65335;
|
|
else
|
|
wWeight = (WORD)rWeight;
|
|
#endif // ISBU_IR_CHANGE
|
|
|
|
// Write the weight to the output buffer
|
|
if ((fRet = FWriteBits (pOutFile, (DWORD)wWeight,
|
|
(BYTE)(sizeof (WORD) * cbitBYTE))) != S_OK)
|
|
return fRet;
|
|
}
|
|
|
|
if ((occf & OCCF_HAVE_OCCURRENCE) == 0)
|
|
return(S_OK);
|
|
|
|
// Write the OccCount
|
|
FAddDword (pOutFile, dwOccs, pHeader->ckeyOccCount);
|
|
|
|
// Encode the occ block
|
|
for (; dwOccs; dwOccs--)
|
|
{
|
|
// Make sure input buffer holds enough data
|
|
if (pInFile->cbLeft < 5 * sizeof (DWORD))
|
|
{
|
|
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
|
|
pInFile->cbLeft += FileRead (pInFile->fFile,
|
|
pInFile->pMem + pInFile->cbLeft,
|
|
pInFile->dwMax - pInFile->cbLeft, &errb);
|
|
pInFile->dwMax = pInFile->cbLeft;
|
|
pInFile->pCurrent = pInFile->pMem;
|
|
}
|
|
|
|
if (occf & OCCF_COUNT)
|
|
{
|
|
cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
|
|
pInFile->pCurrent += cbTemp;
|
|
pInFile->cbLeft -= cbTemp;
|
|
if ((fRet = FAddDword (pOutFile, dwTemp, pHeader->ckeyWordCount))
|
|
!= S_OK)
|
|
return(fRet);
|
|
}
|
|
|
|
if (occf & OCCF_OFFSET)
|
|
{
|
|
cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
|
|
pInFile->pCurrent += cbTemp;
|
|
pInFile->cbLeft -= cbTemp;
|
|
if ((fRet = FAddDword (pOutFile, dwTemp, pHeader->ckeyOffset))
|
|
!= S_OK)
|
|
return(fRet);
|
|
}
|
|
}
|
|
return(S_OK);
|
|
}
|
|
|
|
PRIVATE HRESULT PASCAL FAR EmitOldData (_LPIPB lpipb, PNODEINFO pIndexDataNode,
|
|
PWORDINFO pWordInfo)
|
|
{
|
|
DWORD dwTopicDelta;
|
|
DWORD dwOccs;
|
|
DWORD dwTmp;
|
|
WORD wWeight = 0;
|
|
PFILEDATA pOutFile = &lpipb->OutFile;
|
|
OCCF occf = lpipb->occf;
|
|
HRESULT fRet;
|
|
PIH20 pHeader = &lpipb->BTreeData.Header;
|
|
|
|
if (pOutFile->ibit < cbitBYTE - 1)
|
|
{
|
|
pOutFile->pCurrent++;
|
|
pOutFile->cbLeft--;
|
|
pOutFile->foPhysicalOffset = FoAddDw (pOutFile->foPhysicalOffset, 1);
|
|
pOutFile->ibit = cbitBYTE - 1;
|
|
}
|
|
|
|
// Set the delta
|
|
dwTopicDelta = pWordInfo->dwIndexTopicId - pWordInfo->dwOldTopicId;
|
|
pWordInfo->dwOldTopicId = pWordInfo->dwIndexTopicId;
|
|
|
|
if ((fRet = FAddDword (pOutFile, dwTopicDelta,
|
|
pHeader->ckeyTopicId)) != S_OK)
|
|
return(fRet);
|
|
|
|
|
|
// If we are term weighing we have to calculate the weight
|
|
if (lpipb->idxf & IDXF_NORMALIZE)
|
|
{
|
|
if ((fRet = FGetBits(pIndexDataNode, &dwTmp, sizeof (USHORT) * cbitBYTE))
|
|
!= S_OK)
|
|
return(fRet);
|
|
|
|
// Write the weight to the output buffer
|
|
if ((fRet = FWriteBits (pOutFile, (DWORD)wWeight,
|
|
(BYTE)(sizeof (WORD) * cbitBYTE))) != S_OK)
|
|
return(fRet);
|
|
|
|
}
|
|
|
|
// Don't do anything else if there is nothing else to do!!!
|
|
if ((occf & OCCF_HAVE_OCCURRENCE) == 0)
|
|
return S_OK;
|
|
|
|
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOccCount,
|
|
&dwOccs)) != S_OK)
|
|
return fRet;
|
|
|
|
// Write the OccCount
|
|
if ((fRet = FAddDword (pOutFile, dwOccs,
|
|
pHeader->ckeyOccCount)) != S_OK)
|
|
return(fRet);
|
|
|
|
// Encode the occ block
|
|
for (; dwOccs; dwOccs--)
|
|
{
|
|
if (occf & OCCF_COUNT)
|
|
{
|
|
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyWordCount,
|
|
&dwTmp)) != S_OK)
|
|
return fRet;
|
|
if ((fRet = FAddDword (pOutFile, dwTmp, pHeader->ckeyWordCount))
|
|
!= S_OK)
|
|
return(fRet);
|
|
|
|
}
|
|
if (occf & OCCF_OFFSET)
|
|
{
|
|
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOffset,
|
|
&dwTmp)) != S_OK)
|
|
return fRet;
|
|
if ((fRet = FAddDword (pOutFile, dwTmp, pHeader->ckeyOffset))
|
|
!= S_OK)
|
|
return(fRet);
|
|
}
|
|
}
|
|
return(S_OK);
|
|
}
|
|
|
|
|
|
PRIVATE int PASCAL NEAR CopyNewDataToStemNode (_LPIPB lpipb,
|
|
PNODEINFO pTmpNode, LPB pWord, LPB pLastWord, int cLevel, int fFlag)
|
|
{
|
|
LPB pWordStorage;
|
|
|
|
/************************************************
|
|
* Emit the word data to the temp block
|
|
************************************************/
|
|
|
|
pWordStorage = pTmpNode->pBuffer + sizeof(WORD);
|
|
pWordStorage += PrefixCompressWord (pWordStorage,
|
|
pWord, pLastWord, lpipb->occf & OCCF_LENGTH);
|
|
|
|
// Emit fileoffset
|
|
if (fFlag & USE_TEMP_NODE_04)
|
|
{
|
|
pWordStorage += CopyFileOffset (pWordStorage,
|
|
lpipb->BTreeData.rgpTmpNodeInfo[cLevel+1]->nodeOffset);
|
|
}
|
|
else
|
|
{
|
|
pWordStorage += CopyFileOffset (pWordStorage,
|
|
lpipb->BTreeData.rgpNodeInfo[cLevel+1]->nodeOffset);
|
|
}
|
|
pTmpNode->pCurPtr = pWordStorage;
|
|
SETWORD (pTmpNode->pBuffer, (WORD)(lpipb->BTreeData.Header.dwBlockSize
|
|
- (pWordStorage - pTmpNode->pBuffer)));
|
|
return (int)(pWordStorage - pTmpNode->pBuffer);
|
|
}
|
|
|
|
PRIVATE int PASCAL NEAR CopyNewDataToLeafNode (_LPIPB lpipb, PNODEINFO pTmpNode,
|
|
PWORDINFO pWordInfo, LPB pWord, LPB pLastWord)
|
|
{
|
|
LPB pWordStorage;
|
|
|
|
/************************************************
|
|
* Emit the word data to the temp block
|
|
************************************************/
|
|
|
|
pWordStorage = pTmpNode->pBuffer + FOFFSET_SIZE + sizeof(WORD);
|
|
pWordStorage += PrefixCompressWord (pWordStorage,
|
|
pWord, pLastWord, lpipb->occf & OCCF_LENGTH);
|
|
|
|
// Emit field id, topic count. fileoffset, datasize
|
|
if (lpipb->occf & OCCF_FIELDID)
|
|
pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwFieldId);
|
|
|
|
pWordStorage += CbBytePack (pWordStorage,
|
|
pWordInfo->dwMergeTopicCount);
|
|
|
|
pWordStorage += CopyFileOffset (pWordStorage, pWordInfo->dataLocation);
|
|
pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwDataSize);
|
|
|
|
pTmpNode->pCurPtr = pWordStorage;
|
|
SETWORD (pTmpNode->pBuffer + FOFFSET_SIZE,
|
|
(WORD)(pTmpNode->cbLeft = (LONG)(lpipb->BTreeData.Header.dwBlockSize
|
|
- (pWordStorage - pTmpNode->pBuffer))));
|
|
return (int)(pWordStorage - pTmpNode->pBuffer);
|
|
}
|
|
|
|
PRIVATE HRESULT PASCAL FAR SkipNewData (_LPIPB lpipb, PWORDINFO pWordInfo)
|
|
{
|
|
DWORD dwOccs;
|
|
DWORD dwTemp;
|
|
PBTREEDATA pTreeData = &lpipb->BTreeData;
|
|
PFILEDATA pInFile = &lpipb->InFile;
|
|
PFILEDATA pOutFile = &lpipb->OutFile;
|
|
OCCF occf = lpipb->occf;
|
|
PIH20 pHeader = &lpipb->BTreeData.Header;
|
|
int cbTemp;
|
|
ERRB errb;
|
|
|
|
// Don't do anything else if there is nothing else to do!!!
|
|
if ((occf & OCCF_HAVE_OCCURRENCE) == 0)
|
|
return S_OK;
|
|
|
|
// Get number of occ data records for this topic
|
|
if (pInFile->cbLeft < 2 * sizeof (DWORD))
|
|
{
|
|
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
|
|
pInFile->cbLeft += FileRead (pInFile->fFile,
|
|
pInFile->pMem + pInFile->cbLeft,
|
|
pInFile->dwMax - pInFile->cbLeft, &errb);
|
|
pInFile->dwMax = pInFile->cbLeft;
|
|
pInFile->pCurrent = pInFile->pMem;
|
|
}
|
|
cbTemp = CbByteUnpack (&dwOccs, pInFile->pCurrent);
|
|
pInFile->pCurrent += cbTemp;
|
|
pInFile->cbLeft -= cbTemp;
|
|
|
|
// Encode the occ block
|
|
for (; dwOccs; dwOccs--)
|
|
{
|
|
// Make sure input buffer holds enough data
|
|
if (pInFile->cbLeft < 5 * sizeof (DWORD))
|
|
{
|
|
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
|
|
pInFile->cbLeft += FileRead (pInFile->fFile,
|
|
pInFile->pMem + pInFile->cbLeft,
|
|
pInFile->dwMax - pInFile->cbLeft, &errb);
|
|
pInFile->dwMax = pInFile->cbLeft;
|
|
pInFile->pCurrent = pInFile->pMem;
|
|
}
|
|
|
|
if (occf & OCCF_COUNT)
|
|
{
|
|
cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
|
|
pInFile->pCurrent += cbTemp;
|
|
pInFile->cbLeft -= cbTemp;
|
|
}
|
|
|
|
if (occf & OCCF_OFFSET)
|
|
{
|
|
cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
|
|
pInFile->pCurrent += cbTemp;
|
|
pInFile->cbLeft -= cbTemp;
|
|
}
|
|
}
|
|
return(S_OK);
|
|
}
|
|
|
|
BYTE CurrentWord [1000];
|
|
BYTE LastWord [1000];
|
|
|
|
#if 0
|
|
HRESULT CheckStemNode (PNODEINFO pNodeInfo)
|
|
{
|
|
LPB lpCurPtr;
|
|
WORD wWlen;
|
|
LPB lpMaxAddress = pNodeInfo->pMaxAddress;
|
|
FILEOFFSET nodeOffset;
|
|
|
|
lpCurPtr = pNodeInfo->pBuffer + sizeof(WORD);
|
|
|
|
// Reset the last word
|
|
*(LPWORD)LastWord = 0;
|
|
|
|
do
|
|
{
|
|
lpCurPtr = ExtractWord(CurrentWord, lpCurPtr, &wWlen);
|
|
if (StrCmpPascal2(LastWord, CurrentWord) > 0)
|
|
{
|
|
// _asm int 3;
|
|
return(SetErrCode (NULL, ERR_FAILED));
|
|
}
|
|
lpCurPtr += ReadFileOffset (&nodeOffset, lpCurPtr);
|
|
MEMCPY(LastWord, CurrentWord, wWlen + 2);
|
|
} while (lpCurPtr < lpMaxAddress);
|
|
return(S_OK);
|
|
}
|
|
|
|
|
|
HRESULT CheckLeafNode (PNODEINFO pNodeInfo, int occf)
|
|
{
|
|
LPB lpCurPtr;
|
|
WORD wWlen;
|
|
LPB lpMaxAddress = pNodeInfo->pMaxAddress;
|
|
FILEOFFSET nodeOffset;
|
|
DWORD dwTmp;
|
|
|
|
lpCurPtr = pNodeInfo->pBuffer + sizeof(WORD) + FOFFSET_SIZE;
|
|
|
|
// Reset the last word
|
|
*(LPWORD)LastWord = 0;
|
|
|
|
do
|
|
{
|
|
lpCurPtr = ExtractWord(CurrentWord, lpCurPtr, &wWlen);
|
|
if (StrCmpPascal2(LastWord, CurrentWord) > 0)
|
|
{
|
|
// _asm int 3;
|
|
return(SetErrCode (NULL, ERR_FAILED));
|
|
}
|
|
|
|
MEMCPY(LastWord, CurrentWord, wWlen + 2);
|
|
// Get fieldif and topic count
|
|
if (occf & OCCF_FIELDID)
|
|
lpCurPtr += CbByteUnpack (&dwTmp, lpCurPtr);
|
|
lpCurPtr += CbByteUnpack (&dwTmp, lpCurPtr);
|
|
|
|
// Get the data location and size
|
|
lpCurPtr += ReadFileOffset (&nodeOffset, lpCurPtr);
|
|
lpCurPtr += CbByteUnpack(&dwTmp, lpCurPtr);
|
|
|
|
} while (lpCurPtr < lpMaxAddress);
|
|
return(S_OK);
|
|
}
|
|
#endif
|