#define VER3
/*************************************************************************
*                                                                        *
*  INDEX.C                                                               *
*                                                                        *
*  Copyright (C) Microsoft Corporation 1990-1994                         *
*  All Rights reserved.                                                  *
*                                                                        *
**************************************************************************
*                                                                        *
*  Module Intent                                                         *
*  This is the second stage of the index building process.  After all    *
*  of the word have been add in stage 1, IndexBuild will be called.      *
*  IndexBuild starts the second stage.  We will merge-sort the temp file *
*  generated in phase 1 to create a second temp file to send to phase 3. * 
*                                                                        *
**************************************************************************
*                                                                        *
*  Current Owner: BinhN                                                  *
*                                                                        *
**************************************************************************/

#include <mvopsys.h>
#include <mem.h>
#include <memory.h>
#include <io.h>
#include <math.h>
#include <mvsearch.h>
#include <orkin.h>
#include "common.h"
#include "index.h"

#ifdef _DEBUG
static BYTE NEAR s_aszModule[] = __FILE__;	/* Used by error return functions.*/
#endif


#ifndef _32BIT
#define ESOUTPUT_BUFFER 0xFFFC  // Size of output file buffer
            // This must be at the size of the largest word + 12
            // or word + 14 if OCCF_LENGTH is set
#else
#define ESOUTPUT_BUFFER 0xFFFFC  // Size of output file buffer
            // This must be at the size of the largest word + 12
            // or word + 14 if OCCF_LENGTH is set
#endif

#define FLUSH_NEW_RECORD    1
#define FLUSH_EXCEPT_LAST   2

/*************************************************************************
 *
 *                    INTERNAL PRIVATE FUNCTIONS
 *
 *  All of them should be declared near
 *
 *************************************************************************/
PRIVATE HRESULT  NEAR PASCAL FillInputBuffer (LPESB, HFPB);
PRIVATE HRESULT  NEAR PASCAL ESFlushBuffer (LPESI);
PRIVATE HRESULT  NEAR PASCAL ESFillBuffer (_LPIPB, LPESB);
PRIVATE HRESULT  NEAR PASCAL ESMemory2Disk (_LPIPB, PMERGEHEADER, int);
PRIVATE HRESULT  NEAR PASCAL ProcessFiles (_LPIPB lpipb, LPMERGEPARAMS);
PRIVATE int  NEAR PASCAL CompareRecordBuffers (_LPIPB, LPB, LPB);
PRIVATE VOID NEAR PASCAL PQueueUp (_LPIPB, LPESB FAR *, LONG);
PRIVATE VOID NEAR PASCAL PQueueDown (_LPIPB);
PRIVATE PTOPICDATA PASCAL NEAR MergeTopicNode (PMERGEHEADER, PTOPICDATA, int);
PRIVATE VOID NEAR MergeOccurrence (PTOPICDATA, PTOPICDATA, int);
PRIVATE LPV PASCAL NEAR GetBlockNode (PBLKCOMBO lpBlockCombo);
PRIVATE VOID PASCAL NEAR SetQueue (LPESI pEsi);
PRIVATE HRESULT PASCAL NEAR ESBBlockAllocate (_LPIPB lpipb, DWORD lMemSize);
PRIVATE BOOL PASCAL LoadEsiTemp (_LPIPB lpipb, LPESI lpesi, LPB lpbEsiFile,
    LPB lpbIsiFile, PHRESULT phr);
PRIVATE VOID PASCAL NEAR SaveEsiTemp (LPIPB lpipb, LPESI lpesi);
PRIVATE VOID PASCAL NEAR UpdateEsiTemp (LPIPB lpipb);
PRIVATE BOOL PASCAL NEAR FindTopic(LPMERGEPARAMS lpmp, DWORD dwTopicId);

/*************************************************************************
 *
 *                    INTERNAL PUBLIC FUNCTIONS
 *
 *  All of them should be declared far, unless we know they belong to
 *  the same segment. They should be included in some include file
 *
 *************************************************************************/
PUBLIC HRESULT FAR PASCAL FlushTree(_LPIPB lpipb);
PUBLIC HRESULT FAR PASCAL MergeSortTreeFile (_LPIPB, LPMERGEPARAMS);
HRESULT FAR PASCAL AllocSigmaTable (_LPIPB lpipb);


/*************************************************************************
 *
 *  @doc    EXTERNAL API INDEX
 *
 *  @func   BOOL FAR PASCAL | MVIndexBuild |
 *      This function will build an index file based on the information
 *      collected in the Index parameter block.
 *
 *  @parm   HFPB | hSysFile |
 *      If it is non-null, it is the handle of an already opened system file.
 *      In this case the index is a subfile of the opened system file
 *      If it is 0, the index file is a regular DOS file
 *
 *  @parm   LPIPB | lpipb |
 *      Pointer to Index Parameter Block. This structure contains all the
 *      information necessary to build the index file
 *
 *  @parm   HFPB | hfpb |
 *      Index hfpb if pstrFile is NULL
 *
 *  @parm   LPSTR | pstrFile |
 *      Index filename if hfpb is NULL
 *
 *  @rdesc  S_OK, or other errors
 *
 *  @xref   MVIndexInitiate()
 *************************************************************************/
/*
 *      This operates in three main steps:
 *
 *      1.  Send finish to first phase to dump the buffer.  Then merge-sort
 *      that file into a temporary index.  Keep statistics on the information
 *      written to this temporary index.
 *
 *      2.  Analyze the statistics gathered during the temporary index
 *      building phase.  This analysis results in the choice of
 *      compression processes that will be used in the next step.
 *
 *      3.  Permanent index building phase.  During this phase, the
 *      temporary index is read, compressed like crazy, and written
 *      to a permanent index file.  Unlike the temporary index, the
 *      permanent index contains directory nodes as well as leaf
 *      nodes.
 *
 *************************************************************************/

PUBLIC  HRESULT EXPORT_API FAR PASCAL MVIndexBuild (HFPB hSysFile,
    _LPIPB lpipb, HFPB hfpb, LPSTR pstrFile)
{
    ERRB    errb;
    PHRESULT  phr = &errb;
    BYTE    bKeyIndex = CKEY_OCC_BASE;  // Index into cKey array for compression
    HRESULT     fRet;           // Return value from this function.
    DWORD   loop;

	// Sanity check 
	if (lpipb == NULL || (NULL == hfpb && NULL == pstrFile))
		return E_INVALIDARG;

    // Flush the internal sort
    // Flushes any records in the tree to disk
    fRet = FlushTree(lpipb);

    // Free all memory blocks
    FreeISI (lpipb);
    
    if (fRet != S_OK)
        return(fRet);
        
    // lpipb->lcTopics++;      // Adjust to base-1 from base-0
    
    if (lpipb->esi.cesb == 0) 
        // Nothing to process, there will be no index file
        return S_OK;

    if (lpipb->idxf & KEEP_TEMP_FILE)
        SaveEsiTemp (lpipb, &lpipb->esi);

    // If we're doing term-weighting, set up a huge array to contain the
    // sigma terms.  The size of the array depends on the total # of topics
    // We also create an array of LOG values to save calculations later
    if (lpipb->idxf & IDXF_NORMALIZE)
    {
        if ((fRet = AllocSigmaTable (lpipb)) != S_OK)
            return(fRet);
    }

    if ((fRet = MergeSortTreeFile (lpipb, NULL)) != S_OK)
        return SetErrCode (phr, fRet);
    if ((lpipb->idxf & KEEP_TEMP_FILE) == 0)
    	FileUnlink (NULL, lpipb->isi.aszTempName, REGULAR_FILE);

    // If we are doing term-weighting we have to square root all sigma values
    if (lpipb->idxf & IDXF_NORMALIZE)
    {
		// ISBU_IR_CHANGE not needed here 'cos computing sqrt is necessary in both cases
        for (loop = 0; loop < lpipb->dwMaxTopicId + 1; ++loop)
            lpipb->wi.hrgsigma[loop] = 
                (float)sqrt ((double)lpipb->wi.hrgsigma[loop]);
    }

    // Analyze data to get the best compression scheme
    // TopicId

	 // Note: We can't use fixed field compression for topic, since they
	 // can be modified by update. A fixed field format may become
	 // insufficient to store larger values of topic differences

    VGetBestScheme(&lpipb->cKey[CKEY_TOPIC_ID], 
        &lpipb->BitCount[CKEY_TOPIC_ID][0], lcbitBITSTREAM_ILLEGAL, TRUE);

    // Occurrence Count
    VGetBestScheme(&lpipb->cKey[CKEY_OCC_COUNT], 
        &lpipb->BitCount[CKEY_OCC_COUNT][0], lcbitBITSTREAM_ILLEGAL, TRUE);

    if (lpipb->occf & OCCF_COUNT)
    {
        VGetBestScheme(&lpipb->cKey[bKeyIndex],
            &lpipb->BitCount[bKeyIndex][0], lcbitBITSTREAM_ILLEGAL, TRUE);
        bKeyIndex++;
    }

    if (lpipb->occf & OCCF_OFFSET)
    {
        VGetBestScheme(&lpipb->cKey[bKeyIndex],
            &lpipb->BitCount[bKeyIndex][0], lcbitBITSTREAM_ILLEGAL, TRUE);
        bKeyIndex++;
    }

    if (lpipb->idxf & KEEP_TEMP_FILE)
        UpdateEsiTemp (lpipb);
    
    // Build the permanent index    
    fRet = BuildBTree(hSysFile, lpipb, lpipb->esi.aszTempName, hfpb, pstrFile);
    if (lpipb->idxf & IDXF_NORMALIZE)
    {
        FreeHandle (lpipb->wi.hSigma);
        FreeHandle (lpipb->wi.hLog);
    }
    return fRet;
}


/*************************************************************************
 *
 *  @doc    INDEX
 *
 *  @func   HRESULT NEAR PASCAL | FillInputBuffer |
 *      Fills the buffer by reading from the specified file.
 *
 *  @parm   PESB | pEsb |
 *      Pointer to external sort block to fill
 *
 *  @parm   HFPB | hFile |
 *      Handle to the input file
 *
 *  @rdesc  S_OK, or errors if failed
 *
 *************************************************************************/

HRESULT NEAR PASCAL FillInputBuffer(LPESB pEsb, HFPB hFile)
{
    ERRB errb;
    DWORD dwBytesRead;

    // Read in data
    if ((dwBytesRead = FileSeekRead (hFile, 
        (LPB)pEsb->lrgbMem, pEsb->lfo, pEsb->dwEsbSize, &errb)) == 0)
        return errb;
    
    // Update utility variables
    pEsb->lfo = FoAddDw(pEsb->lfo, dwBytesRead);
    pEsb->dwEsbSize = (CB)dwBytesRead;
    pEsb->ibBuf = 0;
    return S_OK;
}


/*************************************************************************
 *
 *  @doc    INDEX
 *
 *  @func   HRESULT NEAR PASCAL | ESFlushBuffer |
 *      Flushes the output buffer to disk and resets it.
 *
 *  @parm   LPESI | pEsi |
 *      Pointer to ESI block
 *
 *  @rdesc  S_OK, or errors if failed
 *
 *************************************************************************/

HRESULT NEAR PASCAL ESFlushBuffer(LPESI pEsi)
{
    ERRB errb;
    DWORD dwLen;

    dwLen = pEsi->ibBuf;
    if (dwLen != (DWORD)FileWrite (pEsi->hfpb, pEsi->pOutputBuffer,
        dwLen, &errb))
        return errb;

    pEsi->lfoTempOffset = FoAddDw (pEsi->lfoTempOffset, dwLen);
    pEsi->ibBuf = 0;
    return S_OK;
}


/*************************************************************************
 *
 *  @doc    INDEX
 *
 *  @func   HRESULT NEAR PASCAL | ESFillBuffer |
 *      Updates the input buffer with new data from the input file.
 *
 *  @parm   _LPIPB | lpipb |
 *      Pointer to index parameter block
 *
 *  @parm   LPESB | pEsb |
 *      Pointer to ESB block to be filled
 *
 *  @rdesc  S_OK, or other errors
 *************************************************************************/

HRESULT NEAR PASCAL ESFillBuffer(_LPIPB lpipb, LPESB pEsb) 
{
    DWORD dwBytesRead;
    DWORD dwExtra = pEsb->dwEsbSize - pEsb->ibBuf;
    ERRB  errb;

    // Read either the entire buffer size or whatever is left
    dwBytesRead = DwSubFo (pEsb->lfoMax, pEsb->lfo);
    
    if (dwBytesRead > pEsb->dwEsbSize - dwExtra)
        dwBytesRead = pEsb->dwEsbSize - dwExtra;

    // Save unproccessed information to beginning of buffer
    if (dwExtra)
        MEMMOVE ((LPB)pEsb->lrgbMem, pEsb->lrgbMem + pEsb->ibBuf, dwExtra);

    // Read in the new data
    if ((dwBytesRead = FileSeekRead (lpipb->isi.hfpb, (LPB)(pEsb->lrgbMem +
        dwExtra), pEsb->lfo, dwBytesRead, &errb)) == 0 &&
        errb != S_OK)
        return(errb);
        
    pEsb->lfo = FoAddDw(pEsb->lfo, dwBytesRead);
    pEsb->ibBuf = 0;
    pEsb->dwEsbSize = dwBytesRead + dwExtra;
    return(S_OK);
    
}


/*************************************************************************
 *
 *  @doc  INTERNAL INDEXING
 *
 *  @func HRESULT FAR PASCAL | MergeSortTree File |
 *    Sorts the file generated from the tree output into one
 *      list of sorted elements.
 *
 *  @parm _LPIPB | lpipb |
 *    Pointer to index parameter block
 *
 *************************************************************************/

PUBLIC HRESULT PASCAL FAR MergeSortTreeFile (_LPIPB lpipb, LPMERGEPARAMS lpmp)
{
    // Local replacement variables
    LPESI   pEsi;                       // Pointer to external sort info
    LPISI   pIsi;                       // Pointer to internal sort info
    HFPB    hInputFile;                 // Handle to input file
    ERRB     errb;
    PHRESULT   phr = &errb;
    DWORD   cesb;                       // Input buffer count
    LPESB   FAR* lrgPriorityQueue;      // Pointer to Priority Queue
    WORD    uiQueueSize = 0;            // Count of entries in Queue
    DWORD   dwBufferSize;

    // Working variables 
    HRESULT     fRet;
    LPESB   pEsb;       // Temp pointer to linked list

    // Sanity check
    if (lpipb == NULL)
        return E_INVALIDARG;

    // Variables initialization        
    pEsi = &lpipb->esi;         // Pointer to external sort info
    pIsi = &lpipb->isi;         // Pointer to internal sort info
    cesb = pEsi->cesb;          // Input buffer count
    
    // Open input file
    if ((pIsi->hfpb = FileOpen (NULL, pIsi->aszTempName,
        REGULAR_FILE, READ, phr)) == NULL)
        return *phr;
        
    hInputFile = pIsi->hfpb;

    // Allocate & fill input buffers
    for (pEsb = pEsi->lpesbRoot; pEsb != NULL; pEsb = pEsb->lpesbNext)
    {
        DWORD cbRead;

        dwBufferSize = (lpipb->dwMemAllowed * 6) / (8 * pEsi->cesb);
        
        // Alocate buffer space
        if ((pEsb->hMem = _GLOBALALLOC (DLLGMEM_ZEROINIT,
            dwBufferSize)) == NULL)
        {
            fRet = E_OUTOFMEMORY;
exit1:
            FreeEsi (lpipb);
            FileClose(hInputFile);
            pIsi->hfpb = NULL;
            return fRet;
        }
        pEsb->lrgbMem = (LRGB)_GLOBALLOCK (pEsb->hMem);

        if ((cbRead = DwSubFo(pEsb->lfoMax, pEsb->lfo)) > dwBufferSize)
            cbRead = dwBufferSize;

        // Fill buffer from disk
        if (FileSeekRead (hInputFile, pEsb->lrgbMem, pEsb->lfo, 
            cbRead, phr) != (LONG)cbRead)
        {
            fRet = *phr;
            _GLOBALUNLOCK(pEsb->hMem);
            _GLOBALFREE(pEsb->hMem);
            pEsb->hMem = NULL;
            goto exit1;
        }
        
        pEsb->dwEsbSize = cbRead;
        pEsb->ibBuf = 0;
        pEsb->lfo = FoAddDw (pEsb->lfo, cbRead);
    }

    // Allocate a priority queue array. The size of the array
    // is the number of external sort info blocks plus 1, since
    // location 0 is not used.
    if ((pEsi->hPriorityQueue = _GLOBALALLOC (DLLGMEM_ZEROINIT,
        (DWORD)(pEsi->cesb + 1) * sizeof (LPB))) == NULL) 
    {
        fRet = E_OUTOFMEMORY;
        goto exit1;
    }
    pEsi->lrgPriorityQueue = 
        (LPESB FAR *)_GLOBALLOCK (pEsi->hPriorityQueue);
    lrgPriorityQueue = pEsi->lrgPriorityQueue;

    // Attach input buffers to Priority Queue
    // Remebering to start at offset 1 NOT 0 (PQ's have a null 0 element)
    for (pEsb = pEsi->lpesbRoot; pEsb != NULL; pEsb = pEsb->lpesbNext)
    {
        lrgPriorityQueue[++uiQueueSize] = pEsb;
        PQueueUp (lpipb, lrgPriorityQueue, uiQueueSize);
    }
    pEsi->uiQueueSize = uiQueueSize;

    // Clear largest Record Size field
    // lpipb->dwMaxRecordSize = 0;

    fRet = ProcessFiles(lpipb, lpmp);
    _GLOBALUNLOCK (pEsi->hPriorityQueue);
    _GLOBALFREE (pEsi->hPriorityQueue);
    pEsi->hPriorityQueue = NULL;
    goto exit1;
}


/*************************************************************************
 *
 *  @doc    INDEX
 *
 *  @func   HRESULT NEAR PASCAL | ESMemory2Disk |
 *      Copies temp record to output buffer.
 *
 *  @parm   _LPIPB | lpipb |
 *      Pointer to index parameter block
 *
 *  @parm   PMERGEHEADER | pHeader |
 *      Pointer to header to flush
 *
 *  @parm   int | flag |
 *      - if FLUSH_NEW_RECORD, the flush is due to new record, we flush
 *        everything, else we may do a partial flush only
 *      - if FLUSH_EXCEPT_LAST, we don't flush the last topic
 *
 *  @rdesc  S_OK, or other errors
 *************************************************************************/
PRIVATE HRESULT NEAR PASCAL ESMemory2Disk
    (_LPIPB lpipb, PMERGEHEADER pHeader, int flag)
{
    // Local replacement variables
    LPESI   pEsi        = &lpipb->esi;
    LPB     pMax = pEsi->pOutputBuffer + ESOUTPUT_BUFFER - 2 * sizeof(DWORD);
    DWORD   dwOccCount;
    LPB     pOutputBuffer = pEsi->pOutputBuffer;
    ERRB     errb;
    PHRESULT   phr = &errb;
    HRESULT     fRet;
    BYTE    cNumOcc;
    OCCF    occf;

    // Working variables
    PTOPICDATA pTopic;          // Temp var to traverse the topic linked list
    DWORD   loop, sub;          // Various loop counters
    DWORD   dwTopicIdDelta;
    DWORD   OccDelta[5];        // Delta base for all occurrence data
    DWORD   LastOcc[5];
    FLOAT   rLog;               // (1/n)    - IDXF_NORMALIZE is set
    FLOAT   rLogSquared;        // (1/n)^2  - IDXF_NORMALIZE is set
    LPB     pStart;
    LPB     pCurPtr;

    // Set up pointers    
    pStart = pCurPtr = pOutputBuffer + pEsi->ibBuf;
        
    // Variable replacement
    occf = lpipb->occf;
    
    // Size of string
    loop = pHeader->dwStrLen;
    
    // Make sure the string, FileId, Topic Count and Record Size fit
    // We add in and extra DWORD for 5 byte compresssion problems and
    // to cover the Word Length if there is one.
    if ((pStart + loop + sizeof (DWORD) * 5) >= pMax)
    {
        if ((fRet = ESFlushBuffer (pEsi)) != S_OK)
            return(fRet);
            
        pStart = pCurPtr = pOutputBuffer;
    }

    if (pHeader->fEmitRecord == FALSE)
    {
        // If we never emitted the record header then we emitted now
        // Reset the flag
        pHeader->fEmitRecord = TRUE;
        
        // Skip record size field
        pCurPtr += sizeof (DWORD);

        // Pascal string
        MEMCPY (pCurPtr, pHeader->lpbWord, loop);
        pCurPtr += loop;

        // Word Length
        if (occf & OCCF_LENGTH)
            pCurPtr += CbBytePack (pCurPtr, pHeader->dwWordLength);

        // FieldId
        if (occf & OCCF_FIELDID)
            pCurPtr += CbBytePack (pCurPtr, pHeader->dwFieldId);

        // Topic Count
        if (flag & FLUSH_NEW_RECORD)
        {
            // This is the whole record. dwTopicCount value is correct
            SETLONG((LPUL)pCurPtr, pHeader->dwTopicCount);
        }
        else
        {
            // Save the offset for backpatching
            pHeader->foTopicCount = FoAddDw (pEsi->lfoTempOffset,
                (DWORD)(pCurPtr - pOutputBuffer));
            pHeader->pTopicCount = pCurPtr;
        }
        pCurPtr += sizeof(DWORD);

        // Write Record Length
        *(LPUL)pStart = (DWORD)(pCurPtr - pStart - sizeof (DWORD));

    }
    else if (flag & FLUSH_NEW_RECORD)
    {
        // We emit the record before, since pheader->fEmitRecord == TRUE
        // We need to backpatch the topic count
        if (FoCompare(pHeader->foTopicCount, pEsi->lfoTempOffset) >= 0)
        {
            // Everything is still in memory, just do local backpatch
            SETLONG((LPUL)(pHeader->pTopicCount), pHeader->dwTopicCount);
        }
        else
        {
            // Do backpatch in the file by seeking back to the right
            // place
            if (FileSeekWrite(pEsi->hfpb, &pHeader->dwTopicCount,
                pHeader->foTopicCount, sizeof(DWORD), phr) != sizeof(DWORD))
                return(*phr);
            
            // Restore the current file offset
            FileSeek(pEsi->hfpb, pEsi->lfoTempOffset, 0, phr);
        }
    }
    
    // Convert all occ data to delta values & compress them
    pTopic = pHeader->pTopic;
    cNumOcc = lpipb->ucNumOccDataFields;
    
    for (; pTopic;)
    {
        POCCDATA pOccData;
        PTOPICDATA pReleased;
        
        if ((flag & FLUSH_EXCEPT_LAST) && pTopic->pNext == NULL)
            break;
        
        // Set TopicId delta
        dwTopicIdDelta = pTopic->dwTopicId - pHeader->dwLastTopicId;
        pHeader->dwLastTopicId = pTopic->dwTopicId;

        // Save bit size to the statistics array
        lpipb->BitCount[CKEY_TOPIC_ID][CbitBitsDw (dwTopicIdDelta)] += 1;

        // Write TopicID Delta
        if (pCurPtr > pMax)
        {
            pEsi->ibBuf = (DWORD)(pCurPtr - pOutputBuffer);
            if ((fRet = ESFlushBuffer (pEsi)) != S_OK)
                return(fRet);
            pCurPtr = pOutputBuffer;
        }
        pCurPtr += CbBytePack (pCurPtr, dwTopicIdDelta);

        if (cNumOcc == 0)
        {
            pReleased = pTopic;
            pTopic = pTopic->pNext;
        
            // Add the released to the freed linked list
            pReleased->pNext = (PTOPICDATA)lpipb->TopicBlock.pFreeList;
            lpipb->TopicBlock.pFreeList = (PLIST)pReleased;
            lpipb->TopicBlock.dwCount--;
            continue;
        }
            
        if (dwOccCount = pTopic->dwOccCount)
        {
            
            // Reset count occdata delta for every new topic
            MEMSET (OccDelta, 0, 5 * sizeof (DWORD));
            MEMSET (LastOcc, 0, 5 * sizeof (DWORD));

            // Copy Occurrence Count
            if (pCurPtr > pMax)
            {
                pEsi->ibBuf = (DWORD)(pCurPtr - pOutputBuffer);
                if ((fRet = ESFlushBuffer (pEsi)) != S_OK)
                    return(fRet);
                pCurPtr = pOutputBuffer;
            }
            pCurPtr += CbBytePack (pCurPtr, dwOccCount);

            // Save bit size to the statistics array
            lpipb->BitCount[1][CbitBitsDw (dwOccCount)] += 1;

            // Repeat for each occurrence block
            for (pOccData = pTopic->pOccData,
                sub = dwOccCount; sub > 0 && pOccData; --sub)
            {
                LPDW lpDw;
                int  iIndex;
                POCCDATA pReleased;
                
                if (pCurPtr + 5 * sizeof(DWORD) > pMax)
                {
                    pEsi->ibBuf = (DWORD)(pCurPtr - pOutputBuffer);
                    if ((fRet = ESFlushBuffer (pEsi)) != S_OK)
                        return(fRet);
                    pStart = pCurPtr = pOutputBuffer;
                }
                
                lpDw = &pOccData->OccData[0];
                iIndex = CKEY_OCC_BASE;
                
                if (occf & OCCF_COUNT)
                {
                    // Convert each value to a delta value
                    OccDelta[iIndex] = *lpDw - LastOcc[iIndex];
                    LastOcc[iIndex] = *lpDw;
                    lpDw++;
                    
                    // Save to bit size to the statistics array
                    lpipb->BitCount[iIndex][CbitBitsDw (OccDelta[iIndex])] += 1;
                    
                    // Compress occurrence field to buffer
                    pCurPtr += CbBytePack (pCurPtr, OccDelta[iIndex]);
                    iIndex++;
                }
                
                if (occf & OCCF_OFFSET)
                {
                    // Convert each value to a delta value
                    OccDelta[iIndex] = *lpDw - LastOcc[iIndex];
                    LastOcc[iIndex] = *lpDw;
                    lpDw++;
                    
                    // Save to bit size to the statistics array
                    lpipb->BitCount[iIndex][CbitBitsDw (OccDelta[iIndex])] += 1;
                    
                    // Compress occurrence field to buffer
                    pCurPtr += CbBytePack (pCurPtr, OccDelta[iIndex]);
                    iIndex++;
                }
                
                pReleased = pOccData;
                pOccData = pOccData->pNext;
                pReleased->pNext = (POCCDATA)lpipb->OccBlock.pFreeList;
                lpipb->OccBlock.pFreeList = (PLIST)pReleased;
                lpipb->OccBlock.dwCount--;
            }
            
            // Check for mismatch between count and links
#ifdef _DEBUG
            if (sub)
                SetErrCode (phr, E_ASSERT);

            if (pOccData)
                SetErrCode (phr, E_ASSERT);
#endif
        }

        // Update the sigma values if we are doing term weighing
        // erinfox: remove test against flag. Sometimes sigma never
        // got calculated for a topic and that caused a divide by zero
        // later on.
        if ((lpipb->idxf & IDXF_NORMALIZE) /* && (flag & FLUSH_NEW_RECORD)*/)
        {
            
            if (pTopic->dwTopicId > lpipb->dwMaxTopicId)    
            {
                // Incease the size of the sigma table. This can happen when
                // updating with new topics
                _GLOBALUNLOCK (lpipb->wi.hSigma);
                if ((lpipb->wi.hSigma = _GLOBALREALLOC (lpipb->wi.hSigma,
                    (pTopic->dwTopicId + 1) * sizeof(float),
                    DLLGMEM_ZEROINIT)) == NULL)
                {
                    return (SetErrCode(phr, E_OUTOFMEMORY));
                }
                lpipb->wi.hrgsigma = (HRGSIGMA)_GLOBALLOCK(lpipb->wi.hSigma);
                lpipb->dwMaxTopicId =  pTopic->dwTopicId ;
            }
            
			if (lpipb->bState == INDEXING_STATE)
			{
#ifndef ISBU_IR_CHANGE
				FLOAT fOcc;

				if (pHeader->dwTopicCount >= cLOG_MAX)
				{
					// we have to guard against the possibility of the log resulting in 
					// a value <= 0.0. Very rare, but possible in the future. This happens
					// if dwTopicCount approaches or exceeds the N we are using (N == 100 million)
					if (pHeader->dwTopicCount >= cNintyFiveMillion)
						rLog = cVerySmallWt;	// log10(100 mil/ 95 mil) == 0.02
					else
						//rLog = (float) log10(cHundredMillion/(double)pHeader->dwTopicCount);
						rLog = (float) (8.0 - log10((double)pHeader->dwTopicCount));

					rLogSquared = rLog*rLog;
				}
				else
					rLogSquared = lpipb->wi.lrgrLog[(WORD)pHeader->dwTopicCount];

				// Update sigma value
				// NOTE : We are bounding dwOccCount by a value of eTFThreshold
				// The RHS of the equation below has an upperbound of 2 power 30.
				fOcc = (float) min(cTFThreshold, dwOccCount);
				lpipb->wi.hrgsigma[pTopic->dwTopicId] += (SIGMA) fOcc*fOcc*rLogSquared;
					//(SIGMA) (fOcc * fOcc * rLogSquared/(float)0xFFFF);
#else
				// Failed for update : UNDONE
				if (pHeader->dwTopicCount >= cLOG_MAX)
				{
					rLog =  (float)1.0 / (float)pHeader->dwTopicCount;
					rLogSquared = rLog * rLog;
				}
				else
					rLogSquared = lpipb->wi.lrgrLog[(WORD)pHeader->dwTopicCount];
				// Update sigma value
				lpipb->wi.hrgsigma[pTopic->dwTopicId] +=
					(SIGMA)(dwOccCount * dwOccCount) * rLogSquared;
#endif // ISBU_IR_CHANGE
			}
        }
        pReleased = pTopic;
        pTopic = pTopic->pNext;
        
        // Add the released to the freed linked list
        pReleased->pNext = (PTOPICDATA)lpipb->TopicBlock.pFreeList;
        lpipb->TopicBlock.pFreeList = (PLIST)pReleased;
        lpipb->TopicBlock.dwCount--;
    }

    pHeader->pTopic = pHeader->pLastTopic = pTopic;
    
    // Update output offset
    pEsi->ibBuf = (DWORD)(pCurPtr - pOutputBuffer);
    
    return(S_OK);
    
}


/*************************************************************************
 *
 *  @doc    INDEX
 *
 *  @func   HRESULT NEAR PASCAL | ProcessFiles |
 *      Sorts the file generated from the tree output into one
 *      list of sorted elements.
 *
 *  @parm   _LPIPB | lpipb |
 *      Pointer to index parameter block
 *
 *  @rdesc  S_OK, or errors if failed
 *
 *  @notes
 *      This function processed the input buffers and uses dynamic
 *      memory allocation to sort each word as it come in.  Once a
 *      word stops repeating, it is flush to disk and the memory is
 *      reset for the next word.
 *************************************************************************/

HRESULT NEAR PASCAL ProcessFiles(_LPIPB lpipb, LPMERGEPARAMS lpmp)
{
    // Local replacement variables
    LPISI pIsi = &lpipb->isi;
    LPESI pEsi = &lpipb->esi;
    LPESB FAR * lrgPriorityQueue = pEsi->lrgPriorityQueue;
    LONG    uiQueueSize = pEsi->uiQueueSize;
    LPB     pQueuePtr;
    WORD    cNumOcc = lpipb->ucNumOccDataFields;
    WORD    OccSize = sizeof(OCCDATA) - sizeof(DWORD) + cNumOcc *
            sizeof(DWORD);
    int     occf = lpipb->occf;
    LPB     pBufMax;
    HANDLE  hWord;
    LPB     lpbWord;
    DWORD dwUniqueTerm = 0;  // Used for calback function
#ifdef _DEBUG
    BYTE    astWord[300];
    BYTE    astLastWord[300];
#endif

    // Working variables
    PMERGEHEADER pHeader;               // Pointer to merge header
    LPESB   pEsb;                       // Temp ESB pointer
    PTOPICDATA pNewTopic;               // Used to create new topic
    DWORD   loop;                       // Temp loop counter
    HANDLE  hHeader;
    HFPB    hOutputFile;                // Handle to output file
    int     fRet;                       // Return value
    USHORT  uStringSize;                // Size of Psacal String
    ERRB     errb;
    PHRESULT   phr = &errb;

    static  long Count = 0;

    // Setup Block Manager
    if ((fRet = ESBBlockAllocate (lpipb, lpipb->dwMemAllowed / 4)) != S_OK)
        return(fRet);
        
    // Allocate output buffer
    if ((pEsi->hBuf = _GLOBALALLOC
        (DLLGMEM_ZEROINIT, ESOUTPUT_BUFFER)) == NULL)
    {
        fRet = E_OUTOFMEMORY;
exit1:
        return fRet;
    }
    
    pEsi->pOutputBuffer = (LPB)_GLOBALLOCK (pEsi->hBuf);
    pEsi->ibBuf = 0;

    // Create output file
    GETTEMPFILENAME ((char)0, "eso", 0, pEsi->aszTempName);
    
    if ((pEsi->hfpb = FileOpen(NULL, pEsi->aszTempName, 
        REGULAR_FILE, WRITE, &errb)) == NULL)
    {
        fRet = E_FILECREATE;
exit2:
        FreeHandle (pEsi->hBuf);
        pEsi->hBuf = NULL;
        goto exit1;
    }                                     
    hOutputFile = pEsi->hfpb;

    // Setup new record in memory
    if ((hHeader = _GLOBALALLOC 
        (DLLGMEM_ZEROINIT, sizeof (MERGEHEADER))) == NULL)
    {
        fRet = E_OUTOFMEMORY;
exit3:
        FileClose (hOutputFile);
        goto exit2;
    }
    pHeader = (PMERGEHEADER)_GLOBALLOCK (hHeader);
    
    // Allocate buffer for a word, which include 64K + sizeof(WORD) + slack
    if ((hWord = _GLOBALALLOC(DLLGMEM_ZEROINIT, 0x10004)) == NULL)
    {
exit4:
        _GLOBALUNLOCK(hHeader);
        _GLOBALFREE (hHeader);
        goto exit3;
    }
        
    pHeader->lpbWord = lpbWord = (LPB)_GLOBALLOCK(hWord);

#ifdef _DEBUG
    astWord[0] = 0;
#endif

    // Process all input buffers
    do
    {
        DWORD dwWordLength;
        DWORD dwFieldId;
        LPB  lpStart;
        DWORD dwTopicCount;

#ifdef _DEBUG
        Count++;
#endif

        // Grab smallest record and send to buffer
        pEsb = lrgPriorityQueue[1];
        
        // Set the fill limit
        pBufMax = pEsb->lrgbMem + pEsb->dwEsbSize - 256;
        
        if ((pQueuePtr = pEsb->lrgbMem + pEsb->ibBuf) >= pBufMax)
    	{
        	if ((fRet = ESFillBuffer (lpipb, pEsb)) != S_OK)
                goto exit4;
                
        	pQueuePtr = pEsb->lrgbMem;
    	}

        // Save the record beginning
        pQueuePtr += sizeof(DWORD);
        lpStart = pQueuePtr;
        
        // Get string
        uStringSize = GETWORD ((LPUW)pQueuePtr) + sizeof (SHORT);

        pQueuePtr += uStringSize;
#ifdef _DEBUG
        if (pQueuePtr > pEsb->lrgbMem + pEsb->dwEsbSize)
            SetErrCode (phr, E_ASSERT);
#endif

        if (occf & OCCF_LENGTH)
            pQueuePtr += CbByteUnpack (&dwWordLength, pQueuePtr);
        else
            dwWordLength = 0;
            
#ifdef _DEBUG
        if (pQueuePtr >= pEsb->lrgbMem + pEsb->dwEsbSize)
            SetErrCode (phr, E_ASSERT);
#endif
        if (occf & OCCF_FIELDID)
            pQueuePtr += CbByteUnpack (&dwFieldId, pQueuePtr);
        else
            dwFieldId = 0;
            
#ifdef _DEBUG
        if (pQueuePtr > pEsb->lrgbMem + pEsb->dwEsbSize)
            SetErrCode (phr, E_ASSERT);
#endif
        // Is the word in the buffer equal to the new word?
        // If it is not then flush the old word
        if (*(LPUW)pHeader->lpbWord)
        {
            fRet = (StrCmp2BytePascal (pHeader->lpbWord, lpStart)
                || dwWordLength > pHeader->dwWordLength);
            if (fRet == 0)  // Same word, reduce the unique words count
                lpipb->dwUniqueWord--;
            if (fRet || dwFieldId > pHeader->dwFieldId)
            {
#if defined(_DEBUG) && !defined(_MAC)
                // Word out of order
                if (StrCmp2BytePascal (pHeader->lpbWord, lpStart) > 0)
                    assert(FALSE);
#endif
                if ((fRet = ESMemory2Disk (lpipb, pHeader, TRUE)) != S_OK)
                    return(fRet);

                // Reset pHeader
                MEMSET (pHeader, 0, sizeof (MERGEHEADER));

                // Set the word buffer
                pHeader->lpbWord = lpbWord;
#ifdef _DEBUG
                STRCPY(astLastWord, astWord);
#endif
                // Call the user callback every once in a while
                if (!(++dwUniqueTerm % 8192L)
                    && (lpipb->CallbackInfo.dwFlags & ERRFLAG_STATUS))
                {
                    PFCALLBACK_MSG pCallbackInfo = &lpipb->CallbackInfo;
                    CALLBACKINFO Info;

                    Info.dwPhase = 2;
                    Info.dwIndex = (DWORD)((float)dwUniqueTerm / lpipb->dwUniqueWord * 100);
                    fRet = (*pCallbackInfo->MessageFunc)
                        (ERRFLAG_STATUS, pCallbackInfo->pUserData, &Info);
                    if (S_OK != fRet)
                        goto exit5;
                }
            }
        }

        // Update the data
        pHeader->dwFieldId = dwFieldId;
        pHeader->dwWordLength = dwWordLength;
        pHeader->dwStrLen = uStringSize;
            
        // Copy word and header info
        MEMCPY (pHeader->lpbWord, (LPB)lpStart, uStringSize);
#ifdef _DEBUG
        if (uStringSize >= 300)
            uStringSize = 300;
        MEMCPY (astWord, lpStart + 2, uStringSize - 2);
        astWord[uStringSize - 2] = 0;
        //if (STRCMP(astWord, "87db") == 0)
        //   _asm int 3;
#endif

        pQueuePtr += CbByteUnpack (&dwTopicCount, pQueuePtr);
        pHeader->dwTopicCount += dwTopicCount;

#ifdef _DEBUG
        if (pQueuePtr > pEsb->lrgbMem + pEsb->dwEsbSize)
            SetErrCode (phr, E_ASSERT);
#endif
        pNewTopic = NULL;
                
        // Copy topic(s) to memory
        for (loop = dwTopicCount; loop > 0; loop--)
        {
			DWORD dwTopicId;

            // Get the topic id
            pQueuePtr += CbByteUnpack (&dwTopicId, pQueuePtr);

			// kevynct: if there is a to-delete list, and this topic is on it, skip it
			if (lpmp && FindTopic(lpmp, dwTopicId))
			{
	            // Get the occ count
		        if (cNumOcc)
			    {
				    DWORD dwOccCount;
					DWORD dwT;
                
					pQueuePtr += CbByteUnpack (&dwOccCount, pQueuePtr);
#ifdef _DEBUG
	                if (pQueuePtr > pEsb->lrgbMem + pEsb->dwEsbSize)
		                SetErrCode (phr, E_ASSERT);
#endif
			        for (; dwOccCount > 0; dwOccCount--)
					{
	                    // Fill up the buffer if run out of data
		                if (pQueuePtr >= pBufMax)
			        	{
				        	pEsb->ibBuf = (DWORD)(pQueuePtr - pEsb->lrgbMem);
					    	if ((fRet = ESFillBuffer (lpipb, pEsb)) != S_OK)
						        goto exit5;
                     		pQueuePtr = pEsb->lrgbMem;
                		}
					
	                    switch (cNumOcc)
		                {
			                case 5:
				                pQueuePtr += CbByteUnpack (&dwT, pQueuePtr);
					        case 4:
						        pQueuePtr += CbByteUnpack (&dwT, pQueuePtr);
							case 3:
								pQueuePtr += CbByteUnpack (&dwT, pQueuePtr);
	                        case 2:
		                        pQueuePtr += CbByteUnpack (&dwT, pQueuePtr);
			                case 1:
				                pQueuePtr += CbByteUnpack (&dwT, pQueuePtr);
					    }
                    
#ifdef _DEBUG
	                    if (pQueuePtr > pEsb->lrgbMem + pEsb->dwEsbSize)
		                    SetErrCode (phr, E_ASSERT);
#endif
					} // end occ loop
				}	// end if occ non-zero

				pHeader->dwTopicCount--;
				continue;
			}	// end of to-delete condition

            // Allocate a topicdata node
            if ((pNewTopic == NULL) &&
                (pNewTopic = GetBlockNode (&lpipb->TopicBlock)) == NULL)
            {
                if ((fRet = ESMemory2Disk(lpipb, pHeader, FLUSH_EXCEPT_LAST)) != S_OK)
                {
exit5:
                    _GLOBALUNLOCK(hWord);
                    _GLOBALFREE(hWord);
                    goto exit4;
                }
                
                if ((pNewTopic = GetBlockNode (&lpipb->TopicBlock)) == NULL)
                {
                    // Extremely weird, since we just release a bunch of
                    // memory
                    fRet = E_ASSERT;
                    goto exit5;
                }
            }

			pNewTopic->dwTopicId = dwTopicId;

#ifdef _DEBUG
            if (pQueuePtr > pEsb->lrgbMem + pEsb->dwEsbSize)
                SetErrCode (phr, E_ASSERT);
#endif
            // Set the other fields
            pNewTopic->pOccData = pNewTopic->pLastOccData = NULL;

            // Get the occ count
            if (cNumOcc)
            {
                DWORD dwOccCount;
                POCCDATA pOccData;
                LPDW lpDw;
                
                pQueuePtr += CbByteUnpack (&pNewTopic->dwOccCount,
                    pQueuePtr);

#ifdef _DEBUG
                if (pQueuePtr > pEsb->lrgbMem + pEsb->dwEsbSize)
                    SetErrCode (phr, E_ASSERT);
#endif
                for (dwOccCount = pNewTopic->dwOccCount; dwOccCount > 0;
                    dwOccCount--)
                {
                    // Get all occ fields
                    if ((pOccData = (POCCDATA)GetBlockNode
                        (&lpipb->OccBlock)) == NULL )
                    {
                        if ((fRet = ESMemory2Disk(lpipb, pHeader,
                            FLUSH_EXCEPT_LAST)) != S_OK)
                            goto exit5;
                        
                        if ((pOccData =
                            (POCCDATA)GetBlockNode(&lpipb->OccBlock)) == NULL)
                        {
                            // Extremely weird, since we just release a bunch of
                            // memory, unless there are so many duplicates of the same word
							// in the topic

                            fRet = E_TOOMANYDUPS;
                            goto exit5;
                        }
                    }
                    
                    // Fill up the buffer if run out of data
                    if (pQueuePtr >= pBufMax)
                	{
                    	pEsb->ibBuf = (DWORD) (pQueuePtr - pEsb->lrgbMem);
                    	if ((fRet = ESFillBuffer (lpipb, pEsb)) != S_OK)
                            goto exit5;
                    	pQueuePtr = pEsb->lrgbMem;
                	}

                    lpDw = (LPDW)&pOccData->OccData;
                    switch (cNumOcc)
                    {
                        case 5:
                            pQueuePtr += CbByteUnpack (lpDw++, pQueuePtr);
                        case 4:
                            pQueuePtr += CbByteUnpack (lpDw++, pQueuePtr);
                        case 3:
                            pQueuePtr += CbByteUnpack (lpDw++, pQueuePtr);
                        case 2:
                            pQueuePtr += CbByteUnpack (lpDw++, pQueuePtr);
                        case 1:
                            pQueuePtr += CbByteUnpack (lpDw++, pQueuePtr);
                    }
                    
#ifdef _DEBUG
                    if (pQueuePtr > pEsb->lrgbMem + pEsb->dwEsbSize)
                        SetErrCode (phr, E_ASSERT);
#endif
                    // Attach to the linked list
                    // Note that we are assumimg that the occurrences are
                    // already sorted, so no checking is done here
                    if (pNewTopic->pOccData == NULL)
                    {
                        pNewTopic->pLastOccData = pNewTopic->pOccData
                            = pOccData;
                    }
                    else 
                    {
                        // Add to the end of the linked list
                        pNewTopic->pLastOccData->pNext = pOccData;
                        pNewTopic->pLastOccData = pOccData;
                    }
                    pOccData->pNext = NULL;
                }
            }
            
            if (pNewTopic = MergeTopicNode (pHeader, pNewTopic, cNumOcc))
                pHeader->dwTopicCount --;
        }
        
        // Update the offset
        pEsb->ibBuf = (DWORD) (pQueuePtr - pEsb->lrgbMem);
      
        // If next record doesn't fit in buffer
        // Then reset to beginning and load data
        if (pEsb->dwEsbSize - pEsb->ibBuf <= sizeof(DWORD) ||
            pEsb->dwEsbSize -  pEsb->ibBuf <= GETLONG((LPUL)pQueuePtr) + 
            2 * sizeof(DWORD))
        {
        	if ((fRet = ESFillBuffer (lpipb, pEsb)) != S_OK)
                goto exit4;
        }

        // Adjust priority queue
        if (uiQueueSize > 1)
        { 
            if (DwSubFo (pEsb->lfo, pEsb->lfoMax) != 0 &&
                pEsb->ibBuf >= pEsb->dwEsbSize)
            {
                // Replace first record with last
                lrgPriorityQueue[1] = lrgPriorityQueue[uiQueueSize];
                lrgPriorityQueue[uiQueueSize] = NULL;
                uiQueueSize--;
                pEsi->uiQueueSize = uiQueueSize;
            }
#if 0
            else
            {   // If the stream still has input add it back into the Queue
                lrgPriorityQueue[uiQueueSize] = pEsb;
                PQueueUp(lpipb, lrgPriorityQueue, uiQueueSize);
            }
#endif
            PQueueDown(lpipb);  // Maintain sort order
        }
        else if (DwSubFo (pEsb->lfo, pEsb->lfoMax) != 0 &&
            pEsb->ibBuf >=  pEsb->dwEsbSize)
        {
            uiQueueSize--;
            pEsi->uiQueueSize = uiQueueSize;
            if ((fRet = ESMemory2Disk (lpipb, pHeader, FLUSH_NEW_RECORD)) != S_OK)
                return(fRet);
        }
    } while (uiQueueSize);

    fRet = ESFlushBuffer(pEsi);
    goto exit5;
}
                                             

BOOL PASCAL NEAR FindTopic(LPMERGEPARAMS lpmp, DWORD dwTopicId)
{
	register LPDW lpdw;
	LPDW lpdwMac;

    Assert(lpmp->dwCount > 0);
	Assert(lpmp->lpTopicIdLast >= lpmp->rgTopicId);
	Assert(lpmp->lpTopicIdLast < lpmp->rgTopicId + lpmp->dwCount);

	if (lpmp->rgTopicId[0] > dwTopicId 
         || 
         *(lpdwMac = lpmp->rgTopicId + lpmp->dwCount - 1) < dwTopicId)
		return FALSE;

    if (*lpmp->lpTopicIdLast == dwTopicId)
        return TRUE;

	if (*lpmp->lpTopicIdLast > dwTopicId)
	{
		// re-start at the beginning
		lpmp->lpTopicIdLast = lpmp->rgTopicId;
	}
	
	for (lpdw = lpmp->lpTopicIdLast; lpdw < lpdwMac + 1; lpdw++)
		if (*lpdw == dwTopicId)
		{
			lpmp->lpTopicIdLast = lpdw;
			return TRUE;
		}

	return FALSE;
}

/*************************************************************************
 *
 *  @doc  INTERNAL INDEXING
 *
 *  @func int | CompareRecordBuffers |
 *    Called from PQueueUp/Down to sort the input buffers based first
 *    upon the string's, then TopicID's, then word length's, etc.
 *
 *  @parm _LPIPB | lpipb |
 *    Pointer to the index parameter block
 *
 *  @parm LPB | pBuffer A |
 *    Pointer to the first input buffer
 *
 *  @parm   LPB | pBuffer B |
 *      Pointer to the second input buffer
 *
 *  @rdesc
 *      If pBufferA < pBufferB  return < 0
 *      If pBufferA == pBufferB return = 0
 *      If pBufferA > pBufferB  return > 0
 *************************************************************************/

int PASCAL NEAR CompareRecordBuffers (_LPIPB lpipb, LPB pBufferA, LPB pBufferB)
{
    // Local Replacement Variables 
    int     occf = lpipb->occf;
    int     cNumOcc = lpipb->ucNumOccDataFields;
    DWORD   dwOccMin;

    // Working Variables
    int     fRet;            
    int     Len;
    DWORD   dwDataA;
    DWORD   dwDataB;

    pBufferA += sizeof (DWORD);  // Skip record length
    pBufferB += sizeof (DWORD);  // Skip record length
    
    // Compare Pascal strings
    if ((fRet = StrCmp2BytePascal(pBufferA, pBufferB)) != 0)
        return fRet;
        
    pBufferA += (Len = GETWORD ((LPUW)pBufferA) + sizeof (SHORT));
    pBufferB += Len;
    
    // Strings equal - compare FieldIds
    // Compare Word Lengths
    if (occf & OCCF_LENGTH)
    {
        pBufferA += CbByteUnpack (&dwDataA, pBufferA);
        pBufferB += CbByteUnpack (&dwDataB, pBufferB);
        if ((fRet = (int)(dwDataA - dwDataB)) != 0)
            return fRet;
    }

    if (occf & OCCF_FIELDID)
    {
        pBufferA += CbByteUnpack (&dwDataA, pBufferA);
        pBufferB += CbByteUnpack (&dwDataB, pBufferB);
        if ((fRet = (int)(dwDataA - dwDataB)) != 0)
            return fRet;
    }

    
    // Skip topic count
    pBufferA += CbByteUnpack (&dwDataA, pBufferA);
    pBufferB += CbByteUnpack (&dwDataB, pBufferB);
    
    // Compare 1st topic Id
    pBufferA += CbByteUnpack (&dwDataA, pBufferA);
    pBufferB += CbByteUnpack (&dwDataB, pBufferB);
    if ((fRet = (int)(dwDataA - dwDataB)) != 0)
        return fRet;
        
    // Get the occurrence count    
    pBufferA += CbByteUnpack (&dwDataA, pBufferA);
    pBufferB += CbByteUnpack (&dwDataB, pBufferB);
    
    if ((fRet = (int)(dwDataA - dwDataB)) < 0)
        dwOccMin = dwDataA;
    else    
        dwOccMin = dwDataB;
    for (; dwOccMin; dwOccMin--)
    {
        switch (cNumOcc)
        {
            case 5:
                pBufferA += CbByteUnpack (&dwDataA, pBufferA);
                pBufferB += CbByteUnpack (&dwDataB, pBufferB);
                if ((fRet = (int)(dwDataA - dwDataB)) != 0)
                    return fRet;
                break;
            case 4:
                pBufferA += CbByteUnpack (&dwDataA, pBufferA);
                pBufferB += CbByteUnpack (&dwDataB, pBufferB);
                if ((fRet = (int)(dwDataA - dwDataB)) != 0)
                    return fRet;
                break;
            case 3:
                pBufferA += CbByteUnpack (&dwDataA, pBufferA);
                pBufferB += CbByteUnpack (&dwDataB, pBufferB);
                if ((fRet = (int)(dwDataA - dwDataB)) != 0)
                    return fRet;
                break;
            case 2:
                pBufferA += CbByteUnpack (&dwDataA, pBufferA);
                pBufferB += CbByteUnpack (&dwDataB, pBufferB);
                if ((fRet = (int)(dwDataA - dwDataB)) != 0)
                    return fRet;
                break;
            case 1:
                pBufferA += CbByteUnpack (&dwDataA, pBufferA);
                pBufferB += CbByteUnpack (&dwDataB, pBufferB);
                if ((fRet = (int)(dwDataA - dwDataB)) != 0)
                    return fRet;
                break;
        }
    }
    return fRet;
}


/*************************************************************************
 *
 *  @doc  INTERNAL INDEXING
 *
 *  @func VOID | PQueueUp | 
 *    The function restores the heap condition of a PQ, ie. the parent
 *    node must be less than the children. When the top node is inserted
 *    the heap condition may be violated if the resulting node 
 *    is smaller than its parent. In this case the nodes have to
 *    be switched.
 *
 *  @parm LPESI | lpesi |
 *    Pointer to external sort info, which contains all info
 *
 *  @parm   LONG | index |
 *    Index of the inserted node
 *
 *************************************************************************/

VOID PASCAL NEAR PQueueUp 
    (_LPIPB lpipb, LPESB FAR *lrgPriorityQueue, LONG index)
{
    LPESB lpesbTemp;      // Pointer to the inserted node
    LPESB lpesbHalf;      // Pointer to the parent node
    WORD  uiHalf;         // Index of the parent's node

    lpesbTemp = lrgPriorityQueue [index];

    if ((uiHalf = (WORD) (index/2)) == 0)
        return;
    lpesbHalf = lrgPriorityQueue [uiHalf];

    /* If the parent node is greated than the child, then exchange the
     * nodes, The condition uiHalf != index makes sure that we stop
     * at node 0 (top node)
     */
    while (uiHalf && CompareRecordBuffers (lpipb, (LPB)lpesbHalf->lrgbMem + 
        lpesbHalf->ibBuf, (LPB)lpesbTemp->lrgbMem + lpesbTemp->ibBuf) > 0)
    {
        lrgPriorityQueue [index] = lpesbHalf;
        index = uiHalf;
        uiHalf = (WORD)(index/2);
        lpesbHalf = lrgPriorityQueue [uiHalf];
    }
    lrgPriorityQueue[index] = lpesbTemp;
#if BINHN
    SetQueue (&lpipb->esi);
#endif
}


/*************************************************************************
 *
 *  @doc  INTERNAL INDEXING
 *
 *  @func VOID | PQueueDown | 
 *    The function restores the heap condition of a PQ, ie. the parent
 *    node must be less than the children. When the top node is removed
 *    the heap condition may be violated if the resulting node 
 *    is greater than its children. In this case the nodes have to
 *    be switched.
 *
 *  @parm LPESI | lpesi |
 *    Pointer to external sort info, which contains all info
 *
 *************************************************************************/

PRIVATE VOID PASCAL NEAR PQueueDown (_LPIPB lpipb)
{
    LPESI lpesi = &lpipb->esi;
    LPESB FAR *lrgPriorityQueue;
    int CurIndex;
    int ChildIndex;
    int MaxCurIndex;
    int MaxChildIndex;
    LPESB lpesbSaved;
    LPESB lpesbTemp;
    LPESB lpesbChild;

    lrgPriorityQueue = lpesi->lrgPriorityQueue;
    lpesbSaved = lrgPriorityQueue[1];
    MaxCurIndex = (MaxChildIndex = lpesi->uiQueueSize) / 2;

    for (CurIndex = 1; CurIndex <= MaxCurIndex; CurIndex = ChildIndex) 
    {
        // Get child index 
        ChildIndex = CurIndex * 2;
        // Find the minimum of the two children 
        if (ChildIndex < MaxChildIndex) 
        {
            if ((lpesbTemp = lrgPriorityQueue[ChildIndex + 1]) != NULL) 
            {
                lpesbChild = lrgPriorityQueue[ChildIndex];

                // The two children exist. Take the smallest 
                if (CompareRecordBuffers 
                    (lpipb, (LPB)lpesbChild->lrgbMem + lpesbChild->ibBuf,
                    (LPB)lpesbTemp->lrgbMem + lpesbTemp->ibBuf) >= 0)
                    ChildIndex++;
            }
        }

        // If the parent's node is less than the child, then break
        // (heap condition met)
        if (ChildIndex > MaxChildIndex) 
            break;
       
        lpesbTemp = lrgPriorityQueue [ChildIndex];

        if (CompareRecordBuffers (lpipb, (LPB)lpesbSaved->lrgbMem + 
            lpesbSaved->ibBuf, (LPB)lpesbTemp->lrgbMem+lpesbTemp->ibBuf) < 0)
            break;

        // Replace the node 
        lrgPriorityQueue [CurIndex] = lpesbTemp;
    }
    lrgPriorityQueue [CurIndex] = lpesbSaved;
#if _BINHN
    SetQueue (lpesi);
#endif
}


PRIVATE PTOPICDATA PASCAL NEAR MergeTopicNode (PMERGEHEADER pHeader,
    PTOPICDATA pNewTopic, int cNumOcc)
{
    // PTOPICDATA pLastTopic;
    PTOPICDATA pTopic, pPrevTopic;
    int fResult;
    
    if ((pTopic = pHeader->pLastTopic) == NULL) 
    {
        // The list is empty
        pHeader->pTopic = pHeader->pLastTopic = pNewTopic;
        pNewTopic->pNext = NULL;
        return(NULL);
        
    }
    
    fResult = pTopic->dwTopicId - pNewTopic->dwTopicId;
    
    if (fResult < 0)
    {
        // New node. Add to the end
        pNewTopic->pNext = NULL;
        pHeader->pLastTopic->pNext = pNewTopic;
        pHeader->pLastTopic = pNewTopic;
        
        // Reset pNewTopic for next node allocation
        return NULL;
    }
    
    if (fResult == 0)
    {
        // Same topic. Return pNewTopic for reuse
        if (cNumOcc)
        	MergeOccurrence (pTopic, pNewTopic, cNumOcc);
        return(pNewTopic);
    }
    
    // If we get to this point, the list is out of order
    // Try to find the insertion point		
    pTopic = pHeader->pTopic;
    pPrevTopic = NULL;
     
    for (; pTopic->pNext; pTopic = pTopic->pNext)
    {
        if (pTopic->dwTopicId >= pNewTopic->dwTopicId)
        {
            /* We pass the inserted point */
            break;
        }
        pPrevTopic = pTopic;
    }
    
    if (pTopic->dwTopicId == pNewTopic->dwTopicId)
    {
        // Same topic. Return pNewTopic for reuse
        if (cNumOcc)
        	MergeOccurrence (pTopic, pNewTopic, cNumOcc);
        return(pNewTopic);
    }
    
    // Handle empty case
    if (pPrevTopic == NULL)
    {
        /* Insert at the beginning */
        pNewTopic->pNext = pHeader->pTopic;
        pHeader->pTopic = pNewTopic;
    }
    
    else
    {
        /* Inserted at the middle or the end */
        pNewTopic->pNext = pPrevTopic->pNext;
        pPrevTopic->pNext = pNewTopic;
    }
    
    // Update the last topic
    while (pTopic->pNext)
    {
        pTopic = pTopic->pNext;
    }
    pHeader->pLastTopic = pTopic;
    return(NULL);
}    

/*************************************************************************
 *  @doc    PRIVATE
 *  @func   void | MergeOccurrence |
 *      Merge the occurrence by adding them in order
 *************************************************************************/
PRIVATE VOID NEAR MergeOccurrence (PTOPICDATA pOldTopic,
    PTOPICDATA pNewTopic, int cOccNum)
{
    ERRB errb;
    
    if (CompareOccurrence (&pOldTopic->pLastOccData->OccData[0],
        &pNewTopic->pOccData->OccData[0], cOccNum) <= 0)
    {
        // The whole last list is less than the current list. This is
        // what I expect
        // We just linked the 2 lists together
        pOldTopic->pLastOccData->pNext = pNewTopic->pOccData;
        pOldTopic->pLastOccData = pNewTopic->pLastOccData;
        pOldTopic->dwOccCount += pNewTopic->dwOccCount;
        return;
    }
    
    // The current list is less than the old list.
    // This is weird, but still we can handle it
    if (CompareOccurrence (&pNewTopic->pOccData->OccData[0],
        &pOldTopic->pOccData->OccData[0], cOccNum) <= 0)
    {
        pNewTopic->pLastOccData->pNext = pOldTopic->pOccData;
        pOldTopic->pOccData = pNewTopic->pOccData;
        pOldTopic->dwOccCount += pNewTopic->dwOccCount;
        return;
    }
    
    SetErrCode (&errb, E_ASSERT);
}    

/*====================================================================*/
#ifdef BINHN
PRIVATE VOID PASCAL NEAR SetQueue (LPESI pEsi)
{
    unsigned int i = 0;
    LPESB FAR *lrgPriorityQueue;
    
    lrgPriorityQueue = pEsi->lrgPriorityQueue;
    for (i = 0; i < 20 && i < pEsi->cesb ; i++)
    {
         if (lrgPriorityQueue[i])
            pEsi->lpbQueueStr[i] = lrgPriorityQueue[i]->lrgbMem +
            lrgPriorityQueue[i]->ibBuf + 6;
    }
    
}
#endif
    
/************************************************************************
 *  @doc    PRIVATE
 *  @func   HRESULT PASCAL NEAR | ESBBlockAllocate |
 *      Set the memory allocation based on the memory of the machine
 *  @parm   DWORD | lMemSize |
 *      Memory allocated for the indexer
 *  @rdesc  S_OK, or E_OUTOFMEMORY
 ************************************************************************/

PRIVATE HRESULT PASCAL NEAR ESBBlockAllocate (_LPIPB lpipb, DWORD lMemSize)
{
    DWORD dwTopicSize;
    DWORD dwOccSize;
    WORD OccNodeSize = sizeof (OCCDATA) - 1 + sizeof(DWORD) *
        lpipb->ucNumOccDataFields; // About 24bytes
        
    OccNodeSize = (OccNodeSize + 3) & ~3;

    /* The memory is for topic block and occurrence blocks, which
     * should be in the ratio 1:1.5 
     */
    dwTopicSize = (lMemSize * 2) / 5;
    dwOccSize = lMemSize - dwTopicSize;
    
#if 0
    /* Don't do anything if things are too small */
    if (dwTopicSize < MAX_BLOCK_SIZE || dwOccSize < MAX_BLOCK_SIZE)
        return(E_OUTOFMEMORY);
#endif
        
    // Allocate a block manager for topic node 
    
    if ((lpipb->TopicBlock.pBlockMgr = 
        BlockInitiate ((MAX_BLOCK_SIZE * sizeof(TOPICDATA)/sizeof(TOPICDATA)),
            sizeof(TOPICDATA),
            (WORD)(dwTopicSize/MAX_BLOCK_SIZE),
            USE_VIRTUAL_MEMORY | THREADED_ELEMENT)) == NULL)
    { 
exit2:
        return SetErrCode (NULL, E_OUTOFMEMORY);
    }
    lpipb->TopicBlock.pFreeList =
        (PLIST)BlockGetLinkedList(lpipb->TopicBlock.pBlockMgr);
    
    // Allocate a block manager for occ node 
    if ((lpipb->OccBlock.pBlockMgr =
        BlockInitiate((MAX_BLOCK_SIZE * OccNodeSize)/OccNodeSize,
        OccNodeSize, (WORD)(lMemSize / MAX_BLOCK_SIZE), 
        USE_VIRTUAL_MEMORY | THREADED_ELEMENT)) == NULL)
    { 
        BlockFree(lpipb->BTNodeBlock.pBlockMgr);
        lpipb->BTNodeBlock.pBlockMgr = NULL;
        goto exit2;
    }
    lpipb->OccBlock.pFreeList = (PLIST)BlockGetLinkedList(lpipb->OccBlock.pBlockMgr);
    
    return (S_OK);
}

PRIVATE LPV PASCAL NEAR GetBlockNode (PBLKCOMBO pBlockCombo)
{
    PLIST pList;
    
    if (pBlockCombo->pFreeList == NULL)
    {
        if ((BlockGrowth (pBlockCombo->pBlockMgr) != S_OK))
            return (NULL);
        pBlockCombo->pFreeList =
            (PLIST)BlockGetLinkedList(pBlockCombo->pBlockMgr);
    }
    pList = pBlockCombo->pFreeList;
    pBlockCombo->pFreeList = pList->pNext;
    pBlockCombo->dwCount ++;
    // pList->pNext = NULL;
    return (pList);
}

/*************************************************************************
 *
 *  @doc    INTERNAL
 *
 *  @func   BOOL FAR PASCAL | BuildIndexFile |
 *      This function is for debugging purpose only. In normal indexing,
 *      it will never be called. Since collecting words and indexing can
 *      take a long time, debugging the index phase can become a hassle that
 *      take several hours per shot. To minimize the index time for debugging,
 *      all the intermediate files are saved, which are:
 *          - the internal sorted result file, which contains all words and
 *          their occurrences sorted
 *          - the external sorted result file, which is a snap shot of the
 *          ESI structures and its ESB blocks
 *      The only steps left will be processing the occurrence list and doing
 *      permanent index
 *
 *      To use the function, add the following lines in the app:
 *
 *      extern HRESULT PASCAL FAR BuildIndexFile (LPSTR, LPSTR, LPSTR, WORD, WORD,
 *      WORD, INTERRUPT_FUNC, VOID FAR *, STATUS_FUNC, VOID FAR*, PHRESULT);
 *
 *      int fDotest;
 *
 *      if (fDotest) {
 *          return BuildIndexFile ((LPSTR)"c:/tmp/test.mvb!MVINDEX",
 *              (LPSTR)"c:/tmp/esi.tmp", (LPSTR)"c:/tmp/iso.tmp",
 *              OCCF_TOPICID, IDXF_NORMALIZE, 0, (INTERRUPT_FUNC)lpfnInterruptFunc,
 *              (LPV)NULL,
 *              (STATUS_FUNC)lpfnStatusFunc, (LPV)hwndGlobal,
 *              NULL);
 *      }
 *
 *  @parm   HFPB | hfpb |
 *      HFPB for index file if pstrIndexFile is NULL
 *
 *  @parm   LPB | pstrIndexFile |
 *      The .MVB + index file, usually with the format TEST.MVB!MVINDEX
 *
 *  @parm   LPB | lpbEsiFile |
 *      The external sort info file
 *
 *  @parm   LPB | lpbIsiFile |
 *      The internal sorted info filename
 *
 *  @parm   PINDEXINFO | pIndexInfo |
 *      IndexInfo
 *
 *  @rdesc  S_OK if succeeded, else other non-zero error codes
 *************************************************************************/

PUBLIC HRESULT PASCAL EXPORT_API FAR BuildIndexFile
    (HFPB hfpb, LPSTR pstrIndexFile,
    LPB lpbEsiFile, LPB lpbIsiFile, PINDEXINFO pIndexInfo)
{
    _LPIPB lpipb;
    LPESI lpesi;
    BOOL fRet;
    ERRB errb;
    DWORD loop;
    FLOAT rLog;
    BYTE  bKeyIndex = 0;

    if ((lpipb = MVIndexInitiate(pIndexInfo, NULL)) == NULL)
        return E_FAIL;

    lpesi = &lpipb->esi;

    if (LoadEsiTemp (lpipb, lpesi, lpbEsiFile, lpbIsiFile, NULL) != S_OK)
    {
        fRet = E_FAIL;
exit0:
        MVIndexDispose (lpipb);
        return fRet;
    }

    if (lpipb->idxf & IDXF_NORMALIZE)
    {
        // Allocate a huge buffer to contain all the sigma terms
        if ((lpipb->wi.hSigma = _GLOBALALLOC (DLLGMEM_ZEROINIT,
            (LCB)((lpipb->dwMaxTopicId + 1) * sizeof (SIGMA)))) == NULL)
            return SetErrCode (&errb, E_OUTOFMEMORY);
        lpipb->wi.hrgsigma = (HRGSIGMA)_GLOBALLOCK (lpipb->wi.hSigma);

        // Small buffer containing pre-calculated values
        if ((lpipb->wi.hLog = _GLOBALALLOC (DLLGMEM_ZEROINIT,
            (CB)(cLOG_MAX * sizeof (FLOAT)))) == NULL)
            {
                SetErrCode (&errb, (HRESULT)(fRet = E_OUTOFMEMORY));
                FreeHandle (lpipb->wi.hSigma);
                goto exit0;
            }
        lpipb->wi.lrgrLog = (FLOAT FAR *)_GLOBALLOCK (lpipb->wi.hLog);
        // Initialize the array
        for (loop = cLOG_MAX - 1; loop > 0; --loop)
        {
#ifndef ISBU_IR_CHANGE
			rLog = (float) log10(cHundredMillion/(double)loop);
#else
			rLog = (float)1.0 / (float)loop;
#endif	// ISBU_IR_CHANGE
            lpipb->wi.lrgrLog[loop] = rLog * rLog;
        }
    }

    if ((fRet = MergeSortTreeFile (lpipb, NULL)) != S_OK)
        return SetErrCode (&errb, (HRESULT)fRet);
    if ((lpipb->idxf & KEEP_TEMP_FILE) == 0)
    	FileUnlink (NULL, lpipb->isi.aszTempName, REGULAR_FILE);

    // If we are doing term-weighting we have to square root all sigma values
    if (lpipb->idxf & IDXF_NORMALIZE)
    {
		// ISBU_IR_CHANGE not necessary 'cos sqrt computation is necessary in both cases
        for (loop = 0; loop < lpipb->dwMaxTopicId + 1; ++loop)
            lpipb->wi.hrgsigma[loop] = 
                (float)sqrt ((double)lpipb->wi.hrgsigma[loop]);
    }

    // Analyze data to get the best compression scheme
    // TopicId
    VGetBestScheme(&lpipb->cKey[CKEY_TOPIC_ID], 
        &lpipb->BitCount[CKEY_TOPIC_ID][0], lcbitBITSTREAM_ILLEGAL, TRUE);
    // Occurrence Count
    VGetBestScheme(&lpipb->cKey[CKEY_OCC_COUNT], 
        &lpipb->BitCount[CKEY_OCC_COUNT][0], lcbitBITSTREAM_ILLEGAL, TRUE);

    if (lpipb->occf & OCCF_COUNT)
    {
        VGetBestScheme(&lpipb->cKey[bKeyIndex],
            &lpipb->BitCount[bKeyIndex][0], lcbitBITSTREAM_ILLEGAL, TRUE);
        bKeyIndex++;
    }

    if (lpipb->occf & OCCF_OFFSET)
    {
        VGetBestScheme(&lpipb->cKey[bKeyIndex],
            &lpipb->BitCount[bKeyIndex][0], lcbitBITSTREAM_ILLEGAL, TRUE);
        bKeyIndex++;
    }

    // Call the user callback every once in a while
    if (lpipb->CallbackInfo.dwFlags & ERRFLAG_STATUS)
    {
        PFCALLBACK_MSG pCallbackInfo = &lpipb->CallbackInfo;
        CALLBACKINFO Info;

        Info.dwPhase = 2;
        Info.dwIndex = 100;
        fRet = (*pCallbackInfo->MessageFunc)
            (ERRFLAG_STATUS, pCallbackInfo->pUserData, &Info);
        if (S_OK != fRet)
            goto exit0;
    }
    
    // Build the permanent index    
    fRet = BuildBTree(NULL, lpipb, lpipb->esi.aszTempName, hfpb, pstrIndexFile);
    if (lpipb->idxf & IDXF_NORMALIZE)
    {
        FreeHandle (lpipb->wi.hLog);
        FreeHandle (lpipb->wi.hSigma);
    }
    goto exit0;
}

PRIVATE VOID PASCAL NEAR SaveEsiTemp (_LPIPB lpipb, LPESI lpesi)
{
    GHANDLE hfpb;
    LPESB lpesb;
    char szEsi[100];

    GETTEMPFILENAME ((char)0, "foo", 0, szEsi);
    if ((hfpb = FileOpen(NULL, szEsi, REGULAR_FILE, READ_WRITE, NULL)) == NULL) 
        return;

    FileWrite(hfpb, lpipb, sizeof(IPB), NULL);

    for (lpesb = lpesi->lpesbRoot; lpesb; lpesb = lpesb->lpesbNext)
    {
        if (FileWrite(hfpb, lpesb, sizeof(ESB), NULL) != sizeof(ESB))
        {
            FileClose (hfpb);
            FileUnlink (NULL, szEsi, REGULAR_FILE);
            return;
        }
    }
    FileClose (hfpb);
    MEMCPY (lpipb->szEsiTemp, szEsi, 100);
}

PRIVATE VOID PASCAL NEAR UpdateEsiTemp (_LPIPB lpipb)
{
    GHANDLE hfpb;

    if ((hfpb = FileOpen(NULL, lpipb->szEsiTemp, REGULAR_FILE,
        READ_WRITE, NULL)) == NULL) 
        return;

    FileWrite(hfpb, lpipb, sizeof(IPB), NULL);
    FileClose (hfpb);
}

PRIVATE BOOL PASCAL LoadEsiTemp (_LPIPB lpipb, LPESI lpesi, LPB lpbEsiFile,
    LPB lpbIsiFile, PHRESULT phr)
{
    LPESB lpesb;
    HFILE  hFile;
    ESB	esb;
    HANDLE hesb;
    HRESULT fRet;
    IPB ipb;
    LPISI pIsi = &lpipb->isi;         // Pointer to internal sort info

    /* Copy the internal sort info filename */
    MEMCPY (pIsi->aszTempName, lpbIsiFile, lstrlen(lpbIsiFile));

    /* Read in the external sort buffer info */

    if ((hFile = _lopen (lpbEsiFile, READ)) == HFILE_ERROR)
        return E_NOTEXIST;

    /* Read old IPB info */
    _lread (hFile, &ipb, sizeof(IPB));

    /* Transfer meaningful data */

    lpipb->dwIndexedWord = ipb.dwIndexedWord;
    lpipb->dwUniqueWord = ipb.dwUniqueWord;
    lpipb->dwByteCount = ipb.dwByteCount;
    lpipb->dwOccOffbits = ipb.dwOccOffbits;
    lpipb->dwOccExtbits = ipb.dwOccExtbits;
    lpipb->dwMaxFieldId = ipb.dwMaxFieldId;
    lpipb->dwMaxWCount = ipb.dwMaxWCount;
    lpipb->dwMaxOffset = ipb.dwMaxOffset;
    lpipb->dwTotal3bWordLen = ipb.dwTotal3bWordLen;
    lpipb->dwTotal2bWordLen = ipb.dwTotal2bWordLen;
    lpipb->dwTotalUniqueWordLen = ipb.dwTotalUniqueWordLen;
    lpipb->lcTopics = ipb.lcTopics;
    lpipb->dwMaxTopicId = ipb.dwMaxTopicId;
    // lpipb->dwMemAllowed = ipb.dwMemAllowed;
    lpipb->dwMaxRecordSize = ipb.dwMaxRecordSize;
    lpipb->dwMaxEsbRecSize = ipb.dwMaxEsbRecSize;
    lpipb->dwMaxWLen = ipb.dwMaxWLen;
    lpipb->idxf = ipb.idxf;
    
    while ((_lread (hFile, &esb, sizeof(ESB))) == sizeof(ESB))
    {
        if ((hesb = _GLOBALALLOC(GMEM_MOVEABLE | GMEM_ZEROINIT,
            sizeof(ESB))) == NULL) {
                
            fRet = SetErrCode (phr,E_OUTOFMEMORY);
exit0:
            _lclose (hFile);
            return fRet;
        }

        lpesb = (LPESB)_GLOBALLOCK (hesb);

        /* Copy the ESB information */
        *lpesb = esb;

        /* Update the structure */
        lpesb->hStruct = hesb;

        lpesb->lpesbNext = lpesi->lpesbRoot;
        lpesi->lpesbRoot= lpesb;
        lpesi->cesb ++;
    }
    _lclose (hFile);

    fRet = S_OK;
    goto exit0;

}

HRESULT FAR PASCAL AllocSigmaTable (_LPIPB lpipb)
{
    ERRB errb;
    DWORD loop;
	float rLog;

    
    if ((lpipb->wi.hSigma = _GLOBALALLOC (DLLGMEM_ZEROINIT,
        (LCB)((lpipb->dwMaxTopicId + 1) * sizeof (SIGMA)))) == NULL)
        return SetErrCode (&errb, E_OUTOFMEMORY);
    lpipb->wi.hrgsigma = (HRGSIGMA)_GLOBALLOCK (lpipb->wi.hSigma);

    if ((lpipb->wi.hLog = _GLOBALALLOC (DLLGMEM_ZEROINIT,
        (CB)(cLOG_MAX * sizeof (FLOAT)))) == NULL)
    {
        FreeHandle (lpipb->wi.hSigma);
        return SetErrCode (&errb, E_OUTOFMEMORY);
    }
    lpipb->wi.lrgrLog = (FLOAT FAR *)_GLOBALLOCK (lpipb->wi.hLog);
    // Initialize the array
    for (loop = cLOG_MAX - 1; loop > 0; --loop)
    {
#ifndef ISBU_IR_CHANGE
		rLog = (float) log10(cHundredMillion/(double)loop);
#else
        rLog = (float)1.0 / (float)loop;
#endif	// ISBU_IR_CHANGE
        lpipb->wi.lrgrLog[loop] = rLog * rLog;
    }
    return(S_OK);
}