Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1777 lines
58 KiB

  1. /*************************************************************************
  2. * *
  3. * PERMIND2.C *
  4. * *
  5. * Copyright (C) Microsoft Corporation 1990-1994 *
  6. * All Rights reserved. *
  7. * *
  8. **************************************************************************
  9. * *
  10. * Module Intent *
  11. * This is the final stage of the index building process. This module *
  12. * converts the input data into a permanent B-Tree file. *
  13. * *
  14. * Stem node structure: *
  15. * CbLeft |* Word | PointerToNode *| Slack *
  16. * *
  17. * Leaf node structure: *
  18. * NxtBlkPtr|CbLeft|*Word|FieldId|TopicCnt|PointerToNode|DataSize*|Slack *
  19. * *
  20. * Data node structure: *
  21. * |* Topic | OccBlkCnt |* OccBlk *| *| Slack *
  22. * *
  23. * Fields between |* *| repeat based on count values *
  24. * *
  25. **************************************************************************
  26. * *
  27. * Current Owner: BinhN *
  28. * *
  29. **************************************************************************/
  30. #include <mvopsys.h>
  31. #include <mem.h>
  32. #include <memory.h>
  33. #include <math.h>
  34. #include <orkin.h>
  35. #include <mvsearch.h>
  36. #include "common.h"
  37. #include "index.h"
  38. #ifdef _DEBUG
  39. static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/
  40. #endif
  41. /*************************************************************************
  42. *
  43. * PRIVATE PUBLIC FUNCTIONS
  44. *
  45. * All of them should be declared far, unless we know they belong to
  46. * the same segment. They should be included in some include file
  47. *
  48. *************************************************************************/
  49. PUBLIC HRESULT FAR PASCAL BuildBTree (HFPB, _LPIPB, LPB, HFPB, LPSTR);
  50. PUBLIC PNODEINFO FAR PASCAL AllocBTreeNode (_LPIPB);
  51. PUBLIC VOID PASCAL FAR FreeBTreeNode (PNODEINFO pNode);
  52. PUBLIC int FAR PASCAL PrefixCompressWord (LPB, LPB, LPB, int);
  53. PUBLIC HRESULT FAR PASCAL FWriteBits(PFILEDATA, DWORD, BYTE);
  54. PUBLIC DWORD FAR PASCAL WriteDataNode (_LPIPB, DWORD, PHRESULT);
  55. /*************************************************************************
  56. *
  57. * PRIVATE PRIVATE FUNCTIONS
  58. *
  59. *************************************************************************/
  60. PRIVATE HRESULT NEAR PASCAL AddRecordToLeaf (_LPIPB);
  61. PRIVATE HRESULT NEAR PASCAL AddRecordToStem (_LPIPB, LPB);
  62. PRIVATE int NEAR PASCAL CompressDword (PFILEDATA, DWORD);
  63. PRIVATE HRESULT NEAR PASCAL WriteStemNode (_LPIPB, PNODEINFO);
  64. PRIVATE HRESULT NEAR PASCAL WriteLeafNode (_LPIPB);
  65. PRIVATE HRESULT NEAR PASCAL FlushAllNodes (_LPIPB);
  66. // Compression functions
  67. // PRIVATE HRESULT NEAR PASCAL FAddDword (PFILEDATA, DWORD, CKEY);
  68. PRIVATE HRESULT NEAR PASCAL FWriteBool(PFILEDATA, BOOL);
  69. // This table is used to avoid the calculation "(1L << v) - 1". Instead
  70. // you say "argdwBits[v]", which should be faster. The table is useful
  71. // other places, too.
  72. DWORD argdwBits[] =
  73. {
  74. 0x00000000, 0x00000001, 0x00000003, 0x00000007,
  75. 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F,
  76. 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF,
  77. 0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF,
  78. 0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF,
  79. 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF,
  80. 0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF,
  81. 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF,
  82. 0xFFFFFFFF,
  83. };
  84. PRIVATE HRESULT PASCAL NEAR WriteBitStreamDWord (PFILEDATA, DWORD, int);
  85. PRIVATE HRESULT PASCAL NEAR WriteFixedDWord (PFILEDATA, DWORD, int);
  86. PRIVATE HRESULT PASCAL NEAR WriteBellDWord (PFILEDATA, DWORD, int);
  87. FENCODE EncodeTable[] =
  88. {
  89. WriteBitStreamDWord,
  90. WriteFixedDWord,
  91. WriteBellDWord,
  92. NULL,
  93. };
  94. #define FAddDword(p,dw,key) (EncodeTable[(key).cschScheme]((p), (dw), (key).ucCenter))
  95. #define SAFE_SLACK 256
  96. /*************************************************************************
  97. *
  98. * @doc PRIVATE INDEXING
  99. *
  100. * @func HRESULT | BuildBTree |
  101. * Allocates required memory and opens input files to create a B-Tree.
  102. * Parses incoming words and calls AddRecordToLeaf to process them.
  103. *
  104. * @parm HFPB | hfpbSysFile |
  105. * If not NULL, handle to an already opened sysfile
  106. *
  107. * @parm _LPIPB | lpipb |
  108. * Pointer to the index parameter block
  109. *
  110. * @parm LPB | lpszTemp |
  111. * Filename of the temporary input file
  112. *
  113. * @parm LPB | lpszPerm |
  114. * Filename of the permanent B-Tree file
  115. *
  116. * @rdesc Returns S_OK on success or errors if failed
  117. *
  118. *************************************************************************/
  119. HRESULT FAR PASCAL BuildBTree (HFPB hfpbFileSys, _LPIPB lpipb,
  120. LPB lpszTemp, HFPB hfpbPerm, LPSTR lszFilename/*IStream *pistmPerm*/)
  121. {
  122. PFILEDATA pOutFile; // Pointer to output data
  123. PFILEDATA pInFile; // Pointer to input data
  124. DWORD dwBytesRead = 0; // Checks for EOF
  125. DWORD dwLeftover; // Used to adjust input buffer
  126. PBTREEDATA pTreeData = &lpipb->BTreeData; // Structure defining BTree
  127. PIH20 pHeader = &pTreeData->Header; // Replacement variable
  128. HRESULT fRet; // Return value
  129. PNODEINFO pNode; // Pointer to current input node
  130. ERRB errb= S_OK;
  131. PHRESULT phr = &errb;
  132. int iIndex; // Index into the compressed key
  133. DWORD dwUniqueTerm = 0; // Callback variable
  134. BOOL fOpenedFile; // TRUE if we have to close the file
  135. // Open input file
  136. pInFile = &lpipb->InFile;
  137. if ((pInFile->fFile = FileOpen (NULL, lpszTemp,
  138. REGULAR_FILE, READ, phr)) == NULL)
  139. return *phr;
  140. // Allocate input buffer
  141. pInFile->dwMax = FILE_BUFFER;
  142. if ((pInFile->hMem =
  143. _GLOBALALLOC (DLLGMEM_ZEROINIT, pInFile->dwMax + SAFE_SLACK)) == NULL)
  144. {
  145. fRet = E_OUTOFMEMORY;
  146. exit0:
  147. FileClose (pInFile->fFile);
  148. if ((lpipb->idxf & KEEP_TEMP_FILE) == 0)
  149. FileUnlink (NULL, lpszTemp, REGULAR_FILE);
  150. return fRet;
  151. }
  152. pInFile->pMem = _GLOBALLOCK (pInFile->hMem);
  153. pInFile->pCurrent = pInFile->pMem;
  154. pOutFile = &lpipb->OutFile;
  155. /* Open subfile if necessary, (and system file if necessary) */
  156. pOutFile->fFile = hfpbPerm;
  157. if ((fOpenedFile = FsTypeFromHfpb(hfpbPerm) != FS_SUBFILE) &&
  158. (pOutFile->fFile = (HANDLE)FileOpen
  159. (hfpbPerm, lszFilename, hfpbPerm ? FS_SUBFILE : REGULAR_FILE,
  160. READ, phr)) == 0)
  161. {
  162. SetErrCode (&fRet, E_FILENOTFOUND);
  163. exit1:
  164. FreeHandle (pInFile->hMem);
  165. goto exit0;
  166. }
  167. // Allocate output buffer, at least enough for one block
  168. pOutFile->dwMax = FILE_BUFFER;
  169. if (pOutFile->dwMax < (LONG)lpipb->BTreeData.Header.dwBlockSize)
  170. pOutFile->dwMax = lpipb->BTreeData.Header.dwBlockSize;
  171. if ((pOutFile->hMem = _GLOBALALLOC (DLLGMEM_ZEROINIT,
  172. pOutFile->dwMax + SAFE_SLACK)) == NULL)
  173. {
  174. fRet = E_OUTOFMEMORY;
  175. exit2:
  176. if (fOpenedFile)
  177. FileClose (hfpbPerm);
  178. goto exit1;
  179. }
  180. pOutFile->pMem = _GLOBALLOCK (pOutFile->hMem);
  181. // Skip 1K to hold header infomation
  182. pOutFile->pCurrent = pOutFile->pMem + FILE_HEADER;
  183. pOutFile->cbLeft = pOutFile->dwMax - FILE_HEADER;
  184. pOutFile->foPhysicalOffset.dwOffset = FILE_HEADER;
  185. pOutFile->ibit = cbitBYTE - 1;
  186. // Allocate first leaf node
  187. if ((pTreeData->rgpNodeInfo[0] = AllocBTreeNode (lpipb)) == NULL)
  188. {
  189. fRet = E_OUTOFMEMORY;
  190. exit3:
  191. FreeHandle (pOutFile->hMem);
  192. goto exit2;
  193. }
  194. pHeader->nidLast = 1;
  195. pHeader->cIdxLevels = 1;
  196. // pNode points to the leaf node structure
  197. pNode = pTreeData->rgpNodeInfo[0];
  198. pNode->Slack = LEAF_SLACK;
  199. // Set the bytes left in node block
  200. pNode->cbLeft = lpipb->BTreeData.Header.dwBlockSize - FOFFSET_SIZE -
  201. sizeof(WORD);
  202. // Set the word length flag
  203. if (lpipb->occf & OCCF_LENGTH)
  204. pTreeData->fOccfLength = 1;
  205. #if 0
  206. // Save some math time if we're doing term-weighting
  207. if (lpipb->idxf & IDXF_NORMALIZE)
  208. {
  209. MEMSET (pTreeData->argbLog, (BYTE)0, cLOG_MAX * sizeof (BYTE));
  210. if ((hLog = _GLOBALALLOC (GMEM_MOVEABLE,
  211. (CB)(cLOG_MAX * sizeof (FLOAT)))) == NULL)
  212. {
  213. fRet = E_OUTOFMEMORY;
  214. goto exit3;
  215. }
  216. pTreeData->lrgrLog = (float FAR *)_GLOBALLOCK (hLog);
  217. }
  218. else
  219. hLog = NULL;
  220. #endif
  221. // Load the input buffer & repeat until all records are processed
  222. pInFile->dwMax = pInFile->cbLeft =
  223. FileRead (pInFile->fFile, pInFile->pMem, pInFile->dwMax, phr);
  224. do
  225. {
  226. // Call the user callback every once in a while
  227. if (!(++dwUniqueTerm % 8192L)
  228. && (lpipb->CallbackInfo.dwFlags & ERRFLAG_STATUS))
  229. {
  230. PFCALLBACK_MSG pCallbackInfo = &lpipb->CallbackInfo;
  231. CALLBACKINFO Info;
  232. Info.dwPhase = 3;
  233. Info.dwIndex = (DWORD)((float)dwUniqueTerm / lpipb->dwUniqueWord * 100);
  234. fRet = (*pCallbackInfo->MessageFunc)
  235. (ERRFLAG_STATUS, pCallbackInfo->pUserData, &Info);
  236. if (S_OK != fRet)
  237. goto exit4;
  238. }
  239. if ((fRet = AddRecordToLeaf (lpipb)) != S_OK)
  240. goto exit4;
  241. // pInFile->pCurrent points to the record size
  242. // 256 is just an arbitrary number of slack to minimize out of data
  243. // kevynct: pCurrent points to a record length which does not include
  244. // the DWORD record len size, so we add this when checking. Actually, we
  245. // add twice that to be safe.
  246. if (pInFile->cbLeft <= SAFE_SLACK ||
  247. (LONG)(GETLONG ((LPUL)(pInFile->pCurrent)) + 2 * sizeof(DWORD)) >= pInFile->cbLeft)
  248. {
  249. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  250. if ((pInFile->cbLeft += FileRead (pInFile->fFile, pInFile->pMem +
  251. pInFile->cbLeft, pInFile->dwMax - pInFile->cbLeft, phr)) < 0)
  252. {
  253. fRet = *phr;
  254. exit4:
  255. // Free log block used for term-weighting
  256. #if 0
  257. FreeHandle (hLog);
  258. #endif
  259. // Free all node blocks
  260. dwLeftover = 0;
  261. while (pTreeData->rgpNodeInfo[dwLeftover] != NULL)
  262. {
  263. FreeBTreeNode(pTreeData->rgpNodeInfo[dwLeftover++]);
  264. }
  265. goto exit3;
  266. }
  267. pInFile->dwMax = pInFile->cbLeft;
  268. pInFile->pCurrent = pInFile->pMem;
  269. }
  270. } while (fRet == S_OK && pInFile->cbLeft);
  271. // Flush anything left in the output buffer
  272. if ((fRet = FlushAllNodes (lpipb)) != S_OK)
  273. goto exit4;
  274. // Write out the sigma table
  275. if (lpipb->idxf & IDXF_NORMALIZE)
  276. {
  277. pHeader->WeightTabOffset = pOutFile->foPhysicalOffset;
  278. pHeader->WeightTabSize = (LCB)((lpipb->dwMaxTopicId + 1) *
  279. sizeof (SIGMA));
  280. if (FileWrite (pOutFile->fFile, lpipb->wi.hrgsigma,
  281. pHeader->WeightTabSize, phr) != (LONG)pHeader->WeightTabSize)
  282. {
  283. fRet = *phr;
  284. goto exit4;
  285. }
  286. pOutFile->foStartOffset = FoAddDw(pOutFile->foStartOffset,
  287. pHeader->WeightTabSize);
  288. }
  289. // Copy info to header
  290. pHeader->FileStamp = INDEX_STAMP;
  291. pHeader->version = VERCURRENT;
  292. pHeader->occf = lpipb->occf;
  293. pHeader->idxf = lpipb->idxf;
  294. pHeader->lcTopics = lpipb->lcTopics;
  295. pHeader->dwMaxTopicId = lpipb->dwMaxTopicId;
  296. pHeader->dwMaxFieldId = lpipb->dwMaxFieldId;
  297. pHeader->dwMaxWCount = lpipb->dwMaxWCount;
  298. pHeader->dwMaxOffset = lpipb->dwMaxOffset;
  299. pHeader->dwMaxWLen = lpipb->dwMaxWLen;
  300. pHeader->dwTotalWords = lpipb->dwIndexedWord; // Total indexed words
  301. pHeader->dwUniqueWords = lpipb->dwUniqueWord; // Total unique words
  302. pHeader->dwTotal2bWordLen = lpipb->dwTotal2bWordLen;
  303. pHeader->dwTotal3bWordLen = lpipb->dwTotal3bWordLen;
  304. pHeader->dwUniqueWordLen = lpipb->dwTotalUniqueWordLen;
  305. pHeader->ckeyTopicId = lpipb->cKey[CKEY_TOPIC_ID];
  306. pHeader->ckeyOccCount = lpipb->cKey[CKEY_OCC_COUNT];
  307. iIndex = CKEY_OCC_BASE;
  308. if (pHeader->occf & OCCF_COUNT)
  309. pHeader->ckeyWordCount = lpipb->cKey[iIndex++];
  310. if (pHeader->occf & OCCF_OFFSET)
  311. pHeader->ckeyOffset = lpipb->cKey[iIndex];
  312. if (FileSeekWrite (pOutFile->fFile, (LPB)pHeader, MakeFo (0, 0),
  313. sizeof (IH20), phr) != sizeof (IH20))
  314. {
  315. fRet = *phr;
  316. goto exit4;
  317. }
  318. // Call the user callback every once in a while
  319. if (lpipb->CallbackInfo.dwFlags & ERRFLAG_STATUS)
  320. {
  321. PFCALLBACK_MSG pCallbackInfo = &lpipb->CallbackInfo;
  322. CALLBACKINFO Info;
  323. Info.dwPhase = 3;
  324. Info.dwIndex = 100;
  325. fRet = (*pCallbackInfo->MessageFunc)
  326. (ERRFLAG_STATUS, pCallbackInfo->pUserData, &Info);
  327. if (S_OK != fRet)
  328. goto exit4;
  329. }
  330. fRet = S_OK;
  331. goto exit4;
  332. } /* BuildBTree */
  333. /*************************************************************************
  334. *
  335. * @doc PRIVATE INDEXING
  336. *
  337. * @func HRESULT | AddRecordToLeaf |
  338. * Add the record pointed to by pDtreeData->OutFile->pCurrent to the B-Tree
  339. * contained in the structure.
  340. *
  341. * @parm _LPIPB | lpipb |
  342. * Pointer to the index parameter block
  343. *
  344. * @rdesc Returns S_OK on success or errors if failed
  345. *
  346. *************************************************************************/
  347. #ifdef _DEBUG
  348. static BYTE LastWord[4000] = {0};
  349. static BYTE CurWord[4000] = {0};
  350. #endif
  351. HRESULT PASCAL AddRecordToLeaf (_LPIPB lpipb)
  352. {
  353. // Local Replacement Variables
  354. PBTREEDATA pTreeData = &lpipb->BTreeData;
  355. PFILEDATA pOutFile = &lpipb->OutFile; // Output data
  356. PFILEDATA pInFile = &lpipb->InFile; // Input data
  357. HFPB fOutput = pOutFile->fFile; // Output file
  358. HFPB fInput = lpipb->InFile.fFile; // Input file
  359. LPB pInCurPtr = lpipb->InFile.pCurrent; // Input buffer
  360. PNODEINFO pNode;
  361. LPB lpbWord; // Pointer to the word string
  362. OCCF occf = lpipb->occf;
  363. // Working Variables
  364. DWORD dwTopicCount; // Number of topic in record
  365. DWORD dwFieldId;
  366. DWORD dwBlockSize; // Size of the entire occ block
  367. LPB pDest;
  368. WORD uStringSize;
  369. ERRB errb;
  370. // We always start from the leaf node
  371. pNode = pTreeData->rgpNodeInfo[0];
  372. // Set pointer to working buffer
  373. pDest = pNode->pTmpResult;
  374. // Advance input buffer to the word string
  375. pInCurPtr += sizeof (DWORD);
  376. lpbWord = pInCurPtr;
  377. // Insert the word into the buffer
  378. pDest += PrefixCompressWord (pDest, pInCurPtr,
  379. pNode->pLastWord, pTreeData->fOccfLength);
  380. // Get the word length
  381. uStringSize = GETWORD((LPUW)pInCurPtr);
  382. lpipb->dwTotalUniqueWordLen += uStringSize;
  383. // Adjust for the word length storage
  384. uStringSize += sizeof(SHORT);
  385. // Skip the word
  386. pInCurPtr += uStringSize;
  387. #ifdef _DEBUG
  388. STRCPY (LastWord, CurWord);
  389. MEMCPY (CurWord, lpbWord + 2, GETWORD((LPUW)lpbWord));
  390. CurWord[GETWORD((LPUW)lpbWord)] = 0;
  391. if (STRCMP (LastWord, CurWord) > 0)
  392. SetErrCode (NULL, E_ASSERT);
  393. // if (STRCMP (CurWord, "forbidden") == 0)
  394. // _asm int 3;
  395. #endif
  396. // If OccfLength is set skip it now
  397. // (It has already been appended to the compressed word)
  398. if (pTreeData->fOccfLength)
  399. pInCurPtr += CbByteUnpack(&dwBlockSize, pInCurPtr);
  400. // Copy the FieldID
  401. if (occf & OCCF_FIELDID)
  402. {
  403. CbByteUnpack (&dwFieldId, pInCurPtr);
  404. do {
  405. *pDest++ = *pInCurPtr;
  406. } while (*pInCurPtr++ & 0x80);
  407. }
  408. // Get Topic Count
  409. #if 0
  410. CbByteUnpack (&dwTopicCount, pInCurPtr);
  411. do
  412. {
  413. *pDest++ = *pInCurPtr;
  414. } while (*pInCurPtr++ & 0x80);
  415. #else
  416. dwTopicCount = GETLONG((LPUL)pInCurPtr);
  417. pInCurPtr += sizeof(DWORD);
  418. pDest += CbBytePack(pDest, dwTopicCount);
  419. #endif
  420. // Check to see if this entry will fit in the leaf node
  421. // We can't write the data block until we know where the entry
  422. // will be stored. We must add in FOFFSET_SIZE to our current location
  423. // to determine size. We ignore the block size field, so we might encroach
  424. // on the slack by a few bytes.
  425. if (pNode->cbLeft - pNode->Slack < (SHORT)(pDest -pNode->pTmpResult +FOFFSET_SIZE))
  426. {
  427. HRESULT fRet;
  428. if ((fRet = AddRecordToStem (lpipb, lpbWord)) != S_OK)
  429. return(fRet);
  430. // If the prefix count is zero, no problem
  431. // Else we have to re-copy the word, since we are in a new leaf node
  432. if (0 != pNode->pTmpResult[1])
  433. {
  434. dwBlockSize = PrefixCompressWord (pNode->pTmpResult, lpbWord,
  435. pNode->pLastWord, pTreeData->fOccfLength);
  436. pDest = pNode->pTmpResult + dwBlockSize;
  437. if (occf & OCCF_FIELDID)
  438. pDest += CbBytePack (pDest, dwFieldId);
  439. pDest += CbBytePack (pDest, dwTopicCount);
  440. }
  441. }
  442. // Save new word as last word
  443. MEMCPY (pNode->pLastWord, lpbWord, uStringSize + 2);
  444. // Set pointer to beginning of data block
  445. pDest += CopyFileOffset (pDest, pOutFile->foPhysicalOffset);
  446. // Update the bytes left
  447. pInFile->cbLeft -= (LONG) (pInCurPtr - pInFile->pCurrent);
  448. #ifdef _DEBUG
  449. if (pInFile->cbLeft <= 0)
  450. SetErrCode (NULL, E_ASSERT);
  451. #endif
  452. // Compress data block to output buffer and store it's compressed size
  453. pInFile->pCurrent = pInCurPtr;
  454. if ((dwBlockSize = WriteDataNode (lpipb, dwTopicCount, &errb)) == 0)
  455. return errb;
  456. pDest += CbBytePack (pDest, dwBlockSize);
  457. // Copy the temp buffer to the real node
  458. dwBlockSize = (DWORD)(pDest - pNode->pTmpResult);
  459. MEMCPY (pNode->pCurPtr, pNode->pTmpResult, dwBlockSize);
  460. pNode->pCurPtr += dwBlockSize;
  461. pNode->cbLeft -= (WORD)dwBlockSize;
  462. return S_OK;
  463. }
  464. /*************************************************************************
  465. *
  466. * @doc PRIVATE INDEXING
  467. *
  468. * @func DWORD | AddRecordToStem |
  469. * Add a key to a stem node, creating/flushing nodes as necessary.
  470. *
  471. * @parm LPB | lpbWord |
  472. * The word to add the the stem node (last word in the full leaf node)
  473. *
  474. * @rdesc S_OK if successful, or errors if failed
  475. *
  476. *************************************************************************/
  477. HRESULT PASCAL AddRecordToStem (_LPIPB lpipb, LPB lpbWord)
  478. {
  479. SHORT CurLevel = 0;
  480. PNODEINFO pStemNode;
  481. PNODEINFO pLastNode;
  482. PBTREEDATA pTreeData = &lpipb->BTreeData;
  483. PNODEINFO pLeafNode = pTreeData->rgpNodeInfo[0];
  484. LPB pLastWord;
  485. int cbTemp;
  486. ERRB errb = S_OK;
  487. HRESULT fRet;
  488. // Move up through stem nodes until space can be found/made
  489. pStemNode = pLeafNode;
  490. do
  491. {
  492. pLastWord = pStemNode->pLastWord;
  493. pStemNode = pTreeData->rgpNodeInfo[++CurLevel];
  494. if (pStemNode == NULL)
  495. { // Create a new stem node
  496. if ((pStemNode = pTreeData->rgpNodeInfo[CurLevel] =
  497. AllocBTreeNode (lpipb)) == NULL)
  498. return SetErrCode (NULL, E_OUTOFMEMORY);
  499. pStemNode->Slack = STEM_SLACK;
  500. pStemNode->cbLeft = lpipb->BTreeData.Header.dwBlockSize
  501. - sizeof(WORD);
  502. if (++pTreeData->Header.cIdxLevels > MAX_TREE_HEIGHT)
  503. return E_TREETOOBIG;
  504. }
  505. pTreeData->Header.nidLast++;
  506. } while (pStemNode->cbLeft - pStemNode->Slack <
  507. (SHORT)(GETWORD ((LPUW)pLastWord) + sizeof (SHORT) + FOFFSET_SIZE));
  508. // Work back down through the nodes clearing them to disk
  509. while (CurLevel > 1)
  510. {
  511. pLastNode = pTreeData->rgpNodeInfo[--CurLevel];
  512. pLastWord = pLastNode->pLastWord;
  513. // Copy word to stem node
  514. if ((cbTemp = PrefixCompressWord (pStemNode->pCurPtr, pLastWord,
  515. pStemNode->pLastWord, pTreeData->fOccfLength)) == 0)
  516. {
  517. return errb;
  518. }
  519. pStemNode->pCurPtr += cbTemp;
  520. // Update the last word in the stem node
  521. MEMCPY (pStemNode->pLastWord, pLastWord, GETWORD((LPUW)pLastWord)+ 2*sizeof(WORD));
  522. // Set pointer in stem node
  523. CopyFileOffset (pStemNode->pCurPtr,
  524. lpipb->OutFile.foPhysicalOffset);
  525. pStemNode->pCurPtr += FOFFSET_SIZE;
  526. pStemNode->cbLeft -= FOFFSET_SIZE + cbTemp;
  527. #ifdef _DEBUG
  528. if (pStemNode->cbLeft <= 0)
  529. SetErrCode (NULL, E_ASSERT);
  530. #endif
  531. pStemNode = pTreeData->rgpNodeInfo[CurLevel];
  532. if ((fRet = WriteStemNode (lpipb, pStemNode)) != S_OK)
  533. return(fRet);
  534. }
  535. // Clear the leaf node into the first stem node & reset it
  536. // Copy last word to stem node
  537. if ((cbTemp = PrefixCompressWord (pStemNode->pCurPtr,
  538. pLeafNode->pLastWord, pStemNode->pLastWord,
  539. pTreeData->fOccfLength)) == 0)
  540. {
  541. return errb;
  542. }
  543. pStemNode->pCurPtr += cbTemp;
  544. pStemNode->cbLeft -= cbTemp;
  545. #ifdef _DEBUG
  546. if (pStemNode->cbLeft <= 0)
  547. SetErrCode (NULL, E_ASSERT);
  548. #endif
  549. // Update the last word in the stem node
  550. MEMCPY (pStemNode->pLastWord, pLeafNode->pLastWord,
  551. GETWORD((LPUW)(pLeafNode->pLastWord))+2*sizeof(WORD));
  552. // Set pointer to the leaf node
  553. CopyFileOffset (pStemNode->pCurPtr, lpipb->OutFile.foPhysicalOffset);
  554. pStemNode->pCurPtr += FOFFSET_SIZE;
  555. pStemNode->cbLeft -= FOFFSET_SIZE;
  556. #ifdef _DEBUG
  557. if (pStemNode->cbLeft <= 0)
  558. SetErrCode (NULL, E_ASSERT);
  559. #endif
  560. // Flush leaf node to output buffer and reset it
  561. return WriteLeafNode (lpipb);
  562. }
  563. /*************************************************************************
  564. *
  565. * @doc PRIVATE INDEXING
  566. *
  567. * @func int | CompressDword |
  568. * Compresses the input stream into the output buffer using a high
  569. * bit encoding method. If the buffer is full it will be flushed to
  570. * a file.
  571. *
  572. * @parm PFILEDATA | pOutput |
  573. * Pointer to output buffer info
  574. *
  575. * @parm LPDWORD | pSrc |
  576. * Pointer to the uncompressed input stream
  577. *
  578. * @rdesc Returns the number of compressed bytes buffered
  579. *
  580. *************************************************************************/
  581. int PASCAL CompressDword (PFILEDATA pOutput, DWORD dwValue)
  582. {
  583. LPB pDest = pOutput->pCurrent;
  584. int cBytes = 0; // Count of compressed bytes
  585. ERRB errb;
  586. // Any room left in output buffer?
  587. if (sizeof(DWORD) * 2 >= pOutput->cbLeft)
  588. {
  589. DWORD dwSize;
  590. FileWrite (pOutput->fFile, pOutput->pMem,
  591. (dwSize = (DWORD)(pDest - pOutput->pMem)), &errb);
  592. pDest = pOutput->pMem;
  593. pOutput->cbLeft = pOutput->dwMax;
  594. pOutput->foStartOffset = FoAddDw(pOutput->foStartOffset, dwSize);
  595. }
  596. while (dwValue)
  597. {
  598. *pDest = (BYTE)(dwValue & 0x7F);
  599. cBytes++;
  600. dwValue >>= 7;
  601. if (dwValue != 0)
  602. *pDest |= 0x80;
  603. pDest++;
  604. }
  605. pOutput->pCurrent = pDest;
  606. pOutput->foPhysicalOffset =
  607. FoAddDw (pOutput->foPhysicalOffset, (DWORD)cBytes);
  608. pOutput->cbLeft -= cBytes;
  609. #ifdef _DEBUG
  610. if (pOutput->cbLeft <= 0)
  611. SetErrCode (NULL, E_ASSERT);
  612. #endif
  613. return cBytes;
  614. }
  615. /*************************************************************************
  616. *
  617. * @doc PRIVATE INDEXING
  618. *
  619. * @func DWORD | WriteDataNode |
  620. * Compresses the input stream into the output buffer. If the buffer
  621. * is full it will be flushed to a file.
  622. *
  623. * @parm _LPIPB | lpipb |
  624. * Pointer to global buffer
  625. *
  626. * @parm DWORD | dwTopicCount |
  627. * The number of topics in the input stream
  628. *
  629. * @parm PHRESULT | phr |
  630. * Error buffer
  631. *
  632. * @rdesc Returns the number of compressed bytes written
  633. *
  634. *************************************************************************/
  635. PUBLIC DWORD PASCAL FAR WriteDataNode (_LPIPB lpipb,
  636. DWORD dwTopicCount, PHRESULT phr)
  637. {
  638. // Local replacement Variables
  639. PBTREEDATA pTreeData = &lpipb->BTreeData;
  640. PFILEDATA pOutput = &lpipb->OutFile; // Output data structure
  641. PFILEDATA pInFile = &lpipb->InFile; // Input data structre
  642. HFPB fFile = pOutput->fFile; // Output file handle
  643. // Working Variables
  644. DWORD dwBlockSize; // Size of block to compress
  645. DWORD dwEncodedSize = 0; // Size of encoded block
  646. DWORD dwTopicIdDelta; // Really only used for weight values
  647. DWORD TopicLoop;
  648. DWORD dwSlackSize;
  649. DWORD loop;
  650. DWORD dwTemp;
  651. FILEOFFSET foStart; // Physical beginning of bit compression block
  652. FLOAT rTerm; // Only used when IDXF_NORMALIZE is set
  653. FLOAT rWeight; // Only used when IDXF_NORMALIZE is set
  654. WORD wWeight; // Only used when IDXF_NORMALIZE is set
  655. DWORD dwTopicId = 0; // Only used when IDXF_NORMALIZE is set
  656. int cbTemp; // # of compressed bytes that uncompressed
  657. OCCF occf = lpipb->occf;
  658. HRESULT fRet;
  659. foStart = pOutput->foPhysicalOffset;
  660. wWeight = 0; // UNDONE: Don't need it
  661. for (TopicLoop = dwTopicCount; TopicLoop > 0; --TopicLoop)
  662. {
  663. // Move to the byte boundary
  664. if (pOutput->ibit != cbitBYTE - 1)
  665. {
  666. pOutput->ibit = cbitBYTE - 1;
  667. if (--pOutput->cbLeft)
  668. {
  669. pOutput->pCurrent++;
  670. pOutput->foPhysicalOffset = FoAddDw (pOutput->foPhysicalOffset, 1);
  671. }
  672. else
  673. {
  674. if (FileWrite (pOutput->fFile, pOutput->pMem,
  675. dwTemp = (DWORD)(pOutput->pCurrent - pOutput->pMem), phr) != (LONG)dwTemp)
  676. return(0);
  677. pOutput->pCurrent = pOutput->pMem;
  678. pOutput->cbLeft = pOutput->dwMax;
  679. pOutput->foStartOffset = FoAddDw(pOutput->foStartOffset, dwTemp);
  680. #ifdef _DEBUG
  681. MEMSET (pOutput->pMem, 0, pOutput->dwMax);
  682. #endif
  683. }
  684. }
  685. // Store TopicId as necessary
  686. if (pInFile->cbLeft < 2 * sizeof (DWORD))
  687. {
  688. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  689. pInFile->cbLeft += FileRead (pInFile->fFile, pInFile->pMem + pInFile->cbLeft,
  690. pInFile->dwMax - pInFile->cbLeft, phr);
  691. pInFile->dwMax = pInFile->cbLeft;
  692. pInFile->pCurrent = pInFile->pMem;
  693. }
  694. cbTemp = CbByteUnpack (&dwTopicIdDelta, pInFile->pCurrent);
  695. dwTopicId += dwTopicIdDelta; // Get the real TopicID
  696. if ((fRet = FAddDword (pOutput, dwTopicIdDelta,
  697. lpipb->cKey[CKEY_TOPIC_ID])) != S_OK)
  698. {
  699. SetErrCode(phr, fRet);
  700. return(0);
  701. }
  702. pInFile->pCurrent += cbTemp;
  703. pInFile->cbLeft -= cbTemp;
  704. if (occf & OCCF_HAVE_OCCURRENCE)
  705. {
  706. // Get number of occ data records for this topic
  707. if (pInFile->cbLeft < 2 * sizeof (DWORD))
  708. {
  709. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  710. pInFile->cbLeft += FileRead (pInFile->fFile,
  711. pInFile->pMem + pInFile->cbLeft,
  712. pInFile->dwMax - pInFile->cbLeft, phr);
  713. pInFile->dwMax = pInFile->cbLeft;
  714. pInFile->pCurrent = pInFile->pMem;
  715. }
  716. cbTemp = CbByteUnpack (&dwBlockSize, pInFile->pCurrent);
  717. pInFile->pCurrent += cbTemp;
  718. pInFile->cbLeft -= cbTemp;
  719. }
  720. // If we are term weighing we have to calculate the weight
  721. if (lpipb->idxf & IDXF_NORMALIZE)
  722. {
  723. #ifndef ISBU_IR_CHANGE
  724. // log10(x/y) == log10 (x) - log10 (y). Since x in our case is a known constant,
  725. // 100,000,000, I'm replacing that with its equivalent log10 value of 8.0 and subtracting
  726. // the log10(y) from it
  727. rTerm = (float) (8.0 - log10((double) dwTopicCount));
  728. // In extreme cases, rTerm could be 0 or even -ve (when dwTopicCount approaches or
  729. // exceeds 100,000,000)
  730. if (rTerm <= (float) 0.0)
  731. rTerm = cVerySmallWt; // very small value. == log(100 mil/ 95 mil)
  732. // NOTE : rWeight for the doc term would be as follows:
  733. // rWeight = float(min(4096, dwBlockSize)) * rTerm / lpipb->wi.hrgsigma[dwTopicId]
  734. //
  735. // Since rTerm needs to be recomputed again for the query term weight computation,
  736. // and since rTerm will be the same value for the current term ('cos N and n of log(N/n)
  737. // are the same (N = 100 million and n is whatever the doc term freq is for the term),
  738. // we will factor in the second rTerm at index time. This way, we don't have to deal
  739. // with rTerm at search time (reduces computation and query time shortens)
  740. //
  741. // MV 2.0 initially did the same thing. However, BinhN removed the second rTerm
  742. // because he decided to remove the rTerm altogether from the query term weight. He
  743. // did that to keep the scores reasonably high.
  744. rWeight = ((float) min(cTFThreshold, dwBlockSize)) * rTerm * rTerm / lpipb->wi.hrgsigma[dwTopicId];
  745. // without the additional rTerm, we would probably be between 0.0 and 1.0
  746. if (rWeight > rTerm)
  747. wWeight = 0xFFFF;
  748. else
  749. wWeight = (WORD) ((float)0xFFFF * rWeight / rTerm);
  750. #else
  751. rTerm = (float) (65535.0 * 8) / (float)dwTopicCount;
  752. rWeight = (float)dwBlockSize * rTerm / lpipb->wi.hrgsigma[dwTopicId];
  753. if (rWeight >= 65535.0)
  754. wWeight = 65335;
  755. else
  756. wWeight = (WORD)rWeight;
  757. #endif // ISBU_IR_CHANGE
  758. // Write the weight to the output buffer
  759. if ((fRet = FWriteBits (&lpipb->OutFile, (DWORD)wWeight,
  760. (BYTE)(sizeof (WORD) * cbitBYTE))) != S_OK)
  761. {
  762. SetErrCode (phr, fRet);
  763. return(0);
  764. }
  765. }
  766. // Don't do anything else if there is nothing else to do!!!
  767. if ((occf & OCCF_HAVE_OCCURRENCE) == 0)
  768. continue;
  769. // Write the OccCount
  770. if ((fRet = FAddDword (pOutput, dwBlockSize,
  771. lpipb->cKey[CKEY_OCC_COUNT])) != S_OK)
  772. {
  773. SetErrCode (phr, fRet);
  774. return(0);
  775. }
  776. // Encode the occ block
  777. for (loop = dwBlockSize; loop > 0; loop--)
  778. {
  779. int iIndex;
  780. iIndex = CKEY_OCC_BASE;
  781. // Make sure input buffer holds enough data
  782. if (pInFile->cbLeft < 5 * sizeof (DWORD))
  783. {
  784. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  785. pInFile->cbLeft += FileRead (pInFile->fFile,
  786. pInFile->pMem + pInFile->cbLeft,
  787. pInFile->dwMax - pInFile->cbLeft, phr);
  788. pInFile->dwMax = pInFile->cbLeft;
  789. pInFile->pCurrent = pInFile->pMem;
  790. }
  791. if (occf & OCCF_COUNT)
  792. {
  793. cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
  794. pInFile->pCurrent += cbTemp;
  795. pInFile->cbLeft -= cbTemp;
  796. if ((fRet = FAddDword (pOutput, dwTemp, lpipb->cKey[iIndex])) !=
  797. S_OK)
  798. {
  799. SetErrCode (phr, fRet);
  800. return(0);
  801. }
  802. iIndex++;
  803. }
  804. if (occf & OCCF_OFFSET)
  805. {
  806. cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
  807. pInFile->pCurrent += cbTemp;
  808. pInFile->cbLeft -= cbTemp;
  809. if ((fRet = FAddDword (pOutput, dwTemp, lpipb->cKey[iIndex])) !=
  810. S_OK)
  811. {
  812. SetErrCode (phr, fRet);
  813. return(0);
  814. }
  815. }
  816. }
  817. }
  818. // Advance to next byte (we are partially through a byte now)
  819. pOutput->ibit = cbitBYTE - 1;
  820. pOutput->pCurrent++;
  821. pOutput->foPhysicalOffset = FoAddDw (pOutput->foPhysicalOffset, 1);
  822. pOutput->cbLeft--;
  823. #ifdef _DEBUG
  824. if (pOutput->cbLeft <= 0)
  825. SetErrCode (NULL, E_ASSERT);
  826. #endif
  827. dwEncodedSize += DwSubFo (pOutput->foPhysicalOffset, foStart);
  828. // Leave slack space, but not for uncommon words
  829. if (dwTopicCount <= 2)
  830. dwSlackSize = 0;
  831. else
  832. dwSlackSize = dwEncodedSize / 10;
  833. dwEncodedSize += dwSlackSize;
  834. // Keep a running total of all allocated slack space
  835. pTreeData->Header.dwSlackCount += dwSlackSize;
  836. while (dwSlackSize)
  837. {
  838. if (pOutput->cbLeft < (LONG)dwSlackSize)
  839. { // The slack block doesn't fit in the output buffer
  840. // Write as much as we can then flush the buffer and write the rest
  841. // MEMSET (pOutput->pCurrent, 0, pOutput->cbLeft);
  842. DWORD dwSize;
  843. dwSlackSize -= pOutput->cbLeft;
  844. if (0 == FileWrite (fFile, pOutput->pMem,
  845. dwSize = pOutput->dwMax, phr))
  846. {
  847. return 0;
  848. }
  849. pOutput->pCurrent = pOutput->pMem;
  850. pOutput->foPhysicalOffset =
  851. FoAddDw (pOutput->foPhysicalOffset, pOutput->cbLeft);
  852. pOutput->cbLeft = pOutput->dwMax;
  853. pOutput->foStartOffset = FoAddDw(pOutput->foStartOffset, dwSize);
  854. }
  855. else
  856. { // The slack fits, no problems
  857. MEMSET (pOutput->pCurrent, 0, dwSlackSize);
  858. pOutput->pCurrent += dwSlackSize;
  859. pOutput->foPhysicalOffset =
  860. FoAddDw (pOutput->foPhysicalOffset, dwSlackSize);
  861. pOutput->cbLeft -= dwSlackSize;
  862. #ifdef _DEBUG
  863. if (pOutput->cbLeft <= 0)
  864. SetErrCode (NULL, E_ASSERT);
  865. #endif
  866. dwSlackSize = 0;
  867. }
  868. }
  869. return dwEncodedSize;
  870. }
  871. /*************************************************************************
  872. *
  873. * @doc PRIVATE INDEXING
  874. *
  875. * @func void | WriteStemNode |
  876. * Flushes a stem node in the BTree to the output buffer. Once flushed,
  877. * the node is reset to the beginning and filled with zeros.
  878. *
  879. * @parm _LPIPB | lpipb |
  880. * Pointer the IPB structure
  881. *
  882. * @parm PNODEINFO | pNode |
  883. * Pointer to the node to flush
  884. *
  885. *************************************************************************/
  886. PRIVATE HRESULT PASCAL WriteStemNode (_LPIPB lpipb, PNODEINFO pNode)
  887. {
  888. // Local Replacement Variable
  889. PBTREEDATA pTreeData = &lpipb->BTreeData;
  890. PFILEDATA pOutput = &lpipb->OutFile; // Output structure
  891. LPB pDest; // Output buffer
  892. LPB pStart = pNode->pBuffer; // Start of node buffer
  893. // Local Working Variables
  894. DWORD dwBytesLeft; // Bytes left to write
  895. ERRB errb;
  896. #if 0 // Use 2-bytes for cbLeft to simplify the work of update
  897. // Compress CbLeft to output buffer
  898. dwBytesLeft = lpipb->BTreeData.Header.dwBlockSize - FOFFSET_SIZE -
  899. CompressDword (pOutput, (DWORD)pNode->cbLeft);
  900. #else
  901. *(LPUW)(pOutput->pCurrent) = (WORD)pNode->cbLeft;
  902. pOutput->pCurrent += sizeof(WORD);
  903. pOutput->cbLeft -= sizeof(WORD);
  904. pOutput->foPhysicalOffset =
  905. FoAddDw (pOutput->foPhysicalOffset, (DWORD)sizeof(WORD));
  906. dwBytesLeft = lpipb->BTreeData.Header.dwBlockSize - sizeof(WORD);
  907. #endif
  908. pDest = pOutput->pCurrent;
  909. // Keep a running total of all allocated slack space
  910. pTreeData->Header.dwSlackCount += pNode->cbLeft;
  911. // This is why the buffer must be >= BTREE_NODE_SIZE
  912. // This could be put in a loop to avoid that restriction, but it
  913. // is probably not worth it. (See also WriteLeafNode)
  914. if (pOutput->cbLeft < (LONG)dwBytesLeft)
  915. {
  916. LONG dwSize;
  917. if (FileWrite (pOutput->fFile,
  918. pOutput->pMem, dwSize = (DWORD)(pDest - pOutput->pMem), &errb) != dwSize)
  919. return(errb);
  920. pDest = pOutput->pMem;
  921. pOutput->cbLeft = pOutput->dwMax;
  922. pOutput->foStartOffset = FoAddDw(pOutput->foStartOffset, dwSize);
  923. }
  924. MEMCPY (pDest, pStart, dwBytesLeft);
  925. pOutput->foPhysicalOffset =
  926. FoAddDw (pOutput->foPhysicalOffset, dwBytesLeft);
  927. pOutput->cbLeft -= dwBytesLeft;
  928. #ifdef _DEBUG
  929. if (pOutput->cbLeft <= 0)
  930. SetErrCode (NULL, E_ASSERT);
  931. #endif
  932. // Set the external variable
  933. pOutput->pCurrent = pDest + dwBytesLeft;
  934. // Set to all zeros so we know when we have reached the end of data later
  935. MEMSET (pNode->pBuffer, 0, lpipb->BTreeData.Header.dwBlockSize);
  936. pNode->cbLeft = lpipb->BTreeData.Header.dwBlockSize - sizeof(WORD);
  937. pNode->pCurPtr = pNode->pBuffer;
  938. *(PUSHORT)pNode->pLastWord = 0;
  939. return(S_OK);
  940. }
  941. /*************************************************************************
  942. *
  943. * @doc PRIVATE INDEXING
  944. *
  945. * @func void | WriteLeafNode |
  946. * Flushes a leaf node in the BTree to the output buffer. Once flushed,
  947. * the node is reset to the beginning and filled with zeros.
  948. *
  949. * @parm _LPIPB | lpipb |
  950. * Pointer to index block
  951. *
  952. * @rdesc S_OK or other errors
  953. *************************************************************************/
  954. PRIVATE HRESULT PASCAL NEAR WriteLeafNode (_LPIPB lpipb)
  955. {
  956. // Local Replacement Variables
  957. PBTREEDATA pTreeData = &lpipb->BTreeData;
  958. PFILEDATA pOutput = &lpipb->OutFile; // Output data structure
  959. LPB pDest = pOutput->pCurrent; // Output buffer
  960. FILEOFFSET OffsetPointer = pTreeData->OffsetPointer;
  961. FILEOFFSET foPhysicalOffset = pOutput->foPhysicalOffset;
  962. PNODEINFO pNode = pTreeData->rgpNodeInfo[0]; // Leaf node
  963. LPB pStart = pNode->pBuffer; // Beginning of the node buffer
  964. // Working Variables
  965. DWORD dwLeft;
  966. FILEOFFSET StartOffset; // Physical offset of the begining
  967. // of the output buffer
  968. ERRB errb;
  969. // Backpatch the current offset to the last nodes pointer
  970. if (!FoIsNil (OffsetPointer))
  971. {
  972. // Is the backpatch location in the output buffer?
  973. if (FoCompare (OffsetPointer,
  974. (StartOffset = FoSubFo (foPhysicalOffset,
  975. MakeFo ((DWORD)(pDest - pOutput->pMem), 0)))) >= 0)
  976. {
  977. CopyFileOffset (pOutput->pMem + DwSubFo
  978. (OffsetPointer, StartOffset), foPhysicalOffset);
  979. }
  980. else
  981. {
  982. if (FileSeekWrite (pOutput->fFile, &foPhysicalOffset,
  983. OffsetPointer, sizeof (DWORD), &errb) !=
  984. sizeof (DWORD))
  985. return(errb);
  986. FileSeek (pOutput->fFile, StartOffset, 0, NULL);
  987. }
  988. }
  989. // Set the backpatch location for next time
  990. pTreeData->OffsetPointer = foPhysicalOffset;
  991. // Skip the record pointer for this record (will be backpatched next time)
  992. if (pOutput->cbLeft <= 0 )
  993. {
  994. LONG dwSize;
  995. if (FileWrite (pOutput->fFile, pOutput->pMem,
  996. dwSize = (DWORD)(pDest - pOutput->pMem), &errb) != dwSize)
  997. return(errb);
  998. pDest = pOutput->pMem;
  999. pOutput->cbLeft = pOutput->dwMax;
  1000. pOutput->foStartOffset = FoAddDw(pOutput->foStartOffset, dwSize);
  1001. }
  1002. MEMSET (pDest, 0, FOFFSET_SIZE);
  1003. pOutput->cbLeft -= FOFFSET_SIZE;
  1004. #ifdef _DEBUG
  1005. if (pOutput->cbLeft <= 0)
  1006. SetErrCode (NULL, E_ASSERT);
  1007. #endif
  1008. pOutput->pCurrent = pDest + FOFFSET_SIZE;
  1009. pOutput->foPhysicalOffset = FoAddDw (foPhysicalOffset, FOFFSET_SIZE);
  1010. #if 0 // Use 2-bytes for cbLeft to simplify the work of update
  1011. // Compress CbLeft to output buffer
  1012. dwLeft = lpipb->BTreeData.Header.dwBlockSize - FOFFSET_SIZE -
  1013. CompressDword (pOutput, (DWORD)pNode->cbLeft);
  1014. #else
  1015. *(LPUW)(pOutput->pCurrent) = (WORD)pNode->cbLeft;
  1016. pOutput->foPhysicalOffset =
  1017. FoAddDw (pOutput->foPhysicalOffset, (DWORD)sizeof(WORD));
  1018. pOutput->cbLeft -= sizeof(WORD);
  1019. dwLeft = lpipb->BTreeData.Header.dwBlockSize - FOFFSET_SIZE - sizeof(WORD);
  1020. pOutput->pCurrent += sizeof(WORD);
  1021. #endif
  1022. pDest = pOutput->pCurrent;
  1023. // Keep a running total of all allocated slack space
  1024. pTreeData->Header.dwSlackCount += pNode->cbLeft;
  1025. // This is why the buffer must be >= BTREE_NODE_SIZE
  1026. // This could be put in a loop to avoid that restriction, but it
  1027. // is probably not worth it. (See also WriteStemNode)
  1028. if (pOutput->cbLeft < (LONG)dwLeft)
  1029. {
  1030. LONG dwSize;
  1031. if (FileWrite (pOutput->fFile, pOutput->pMem,
  1032. dwSize = (DWORD)(pDest - pOutput->pMem), &errb) != dwSize)
  1033. return(errb);
  1034. pDest = pOutput->pMem;
  1035. pOutput->cbLeft = pOutput->dwMax;
  1036. pOutput->foStartOffset = FoAddDw(pOutput->foStartOffset, dwSize);
  1037. }
  1038. MEMCPY (pDest, pStart, dwLeft);
  1039. pOutput->foPhysicalOffset =
  1040. FoAddDw (pOutput->foPhysicalOffset, dwLeft);
  1041. pOutput->cbLeft -= dwLeft;
  1042. #ifdef _DEBUG
  1043. if (pOutput->cbLeft <= 0)
  1044. SetErrCode (NULL, E_ASSERT);
  1045. #endif
  1046. pOutput->pCurrent = pDest + dwLeft;
  1047. // Reset buffer back to beginning
  1048. MEMSET (pNode->pBuffer, 0, lpipb->BTreeData.Header.dwBlockSize);
  1049. pNode->pCurPtr = pNode->pBuffer;
  1050. // Set the bytes left in node block
  1051. pNode->cbLeft = lpipb->BTreeData.Header.dwBlockSize -
  1052. FOFFSET_SIZE - sizeof(WORD);
  1053. *(PUSHORT)pNode->pLastWord = 0;
  1054. return(S_OK);
  1055. }
  1056. /*************************************************************************
  1057. * @doc PRIVATE INDEXING
  1058. *
  1059. * @func PNODEINFO | AllocBTreeNode |
  1060. * Allocates memory for the node structure as well as the data buffer
  1061. * contained in the structure.
  1062. *
  1063. * @parm _LPIPB | lpipb |
  1064. * Pointer to index parameter block
  1065. *
  1066. * @rdesc Returns a pointer to the newly allocated node
  1067. *************************************************************************/
  1068. PUBLIC PNODEINFO PASCAL FAR AllocBTreeNode (_LPIPB lpipb)
  1069. {
  1070. PNODEINFO pNode;
  1071. // Allocate node structure
  1072. if ((pNode = GlobalLockedStructMemAlloc (sizeof (NODEINFO))) == NULL)
  1073. {
  1074. exit0:
  1075. SetErrCode (NULL, E_OUTOFMEMORY);
  1076. return NULL;
  1077. }
  1078. // Allocate data buffer
  1079. if ((pNode->hMem =
  1080. _GLOBALALLOC (DLLGMEM_ZEROINIT,
  1081. pNode->dwBlockSize = lpipb->BTreeData.Header.dwBlockSize)) == NULL)
  1082. {
  1083. exit1:
  1084. GlobalLockedStructMemFree(pNode);
  1085. goto exit0;
  1086. }
  1087. pNode->pCurPtr = pNode->pBuffer = (LPB)_GLOBALLOCK (pNode->hMem);
  1088. // Allocate a buffer with the maximum word length, which is the block
  1089. // size
  1090. if ((pNode->hLastWord =
  1091. _GLOBALALLOC (DLLGMEM_ZEROINIT, pNode->dwBlockSize)) == NULL)
  1092. {
  1093. exit2:
  1094. FreeHandle (pNode->hMem);
  1095. goto exit1;
  1096. }
  1097. pNode->pLastWord = (LPB)_GLOBALLOCK (pNode->hLastWord);
  1098. // Alllocate temporary result buffer.
  1099. if ((pNode->hTmp =
  1100. _GLOBALALLOC (DLLGMEM_ZEROINIT, pNode->dwBlockSize)) == NULL)
  1101. {
  1102. FreeHandle (pNode->hLastWord);
  1103. goto exit2;
  1104. }
  1105. pNode->pTmpResult = (LPB)_GLOBALLOCK (pNode->hTmp);
  1106. return pNode;
  1107. }
  1108. /*************************************************************************
  1109. * @doc PRIVATE INDEXING
  1110. *
  1111. * @func VOID | FreeBTreeNode |
  1112. * Free all memory allocated for the node
  1113. *
  1114. * @parm PNODEINFO | pNode |
  1115. * BTree node to be freed
  1116. *************************************************************************/
  1117. PUBLIC VOID PASCAL FAR FreeBTreeNode (PNODEINFO pNode)
  1118. {
  1119. if (pNode == NULL)
  1120. return;
  1121. FreeHandle (pNode->hTmp);
  1122. FreeHandle (pNode->hMem);
  1123. FreeHandle (pNode->hLastWord);
  1124. GlobalLockedStructMemFree(pNode);
  1125. }
  1126. /*************************************************************************
  1127. *
  1128. * @doc PRIVATE INDEXING
  1129. *
  1130. * @func HRESULT | PrefixCompressWord |
  1131. * Adds a word to a record based on the last word in the node.
  1132. *
  1133. * @parm LPB | pDest |
  1134. * Pointer to the destination buffer
  1135. *
  1136. * @parm LPB | lpbWord |
  1137. * Pointer to the word string to add to node. The format is:
  1138. * - 2-byte: string length
  1139. * - n-byte: the string itself
  1140. * - cbBytePack: real word length
  1141. *
  1142. * @parm LPB | pLastWord |
  1143. * Pointer to the last word entered in the destination buffer
  1144. *
  1145. * @parm int | fOccfLengthSet |
  1146. * Set to 1 if OCCF_LENGTH field is set, else 0
  1147. *
  1148. * @parm PHRESULT | pErrb |
  1149. * Pointer to error structure
  1150. *
  1151. * @rdesc returns number of bytes written to the destination buffer
  1152. * @rcomm
  1153. * Strings are compressed based on how many beginning bytes
  1154. * (prefix) it has in common woth the previous word. The format is
  1155. * - String's length : 2-byte CbPacked
  1156. * - Prefix length : 1-byte (0 - 127). If the high bit is set
  1157. * another word length is to follow the word
  1158. * - Word : n-byte without the prefix
  1159. * - Word's real length - 2-byte CbPacked: only exist if the
  1160. * prefix length high bit is set
  1161. *************************************************************************/
  1162. PUBLIC int PASCAL FAR PrefixCompressWord
  1163. (LPB pDest, LPB lpbWord, LPB pLastWord, int fOccfLengthSet)
  1164. {
  1165. // Working Variables
  1166. int bPrefix; // The number of prefix bytes that match
  1167. unsigned int wPostfix; // Bytes left over that don't match
  1168. USHORT cbMinWordLen; // Smallest word size between the two words
  1169. LPB pStart = pDest; // Starting position
  1170. DWORD dwRealLength; // The real length of the word
  1171. // Get the minimum word length
  1172. wPostfix = GETWORD ((LPUW)lpbWord);
  1173. if ((cbMinWordLen = GETWORD ((LPUW)pLastWord)) > wPostfix)
  1174. cbMinWordLen = (USHORT) wPostfix;
  1175. // Add one to adjust for two byte word headers (saves an add in the loop)
  1176. cbMinWordLen++;
  1177. for (bPrefix = 2; bPrefix <= cbMinWordLen; bPrefix++)
  1178. {
  1179. if (lpbWord[bPrefix] != pLastWord[bPrefix])
  1180. break;
  1181. }
  1182. // Adjust back to the real value
  1183. bPrefix -= 2;
  1184. // Prefix must be <= 127 (high bit is used to indicate fOccfLength field)
  1185. if (bPrefix > 127)
  1186. bPrefix = 127;
  1187. cbMinWordLen = (USHORT) wPostfix; // Save the word length
  1188. wPostfix -= bPrefix;
  1189. // Add wLen to wPostfix to get total byte count then write it.
  1190. // The extra byte is for the prefix byte
  1191. pDest += (USHORT)CbBytePack (pDest, (DWORD)(wPostfix + 1));
  1192. // If WordLen == string length then don't write WordLen
  1193. if (fOccfLengthSet)
  1194. {
  1195. CbByteUnpack (&dwRealLength, lpbWord + sizeof(WORD) + cbMinWordLen );
  1196. if (dwRealLength == cbMinWordLen)
  1197. fOccfLengthSet = FALSE;
  1198. }
  1199. // Write prefix size
  1200. // If fOccfLengthSet is set, set high bit of bPrefix
  1201. if (fOccfLengthSet)
  1202. *pDest = bPrefix | 0x80;
  1203. else
  1204. *pDest = (BYTE) bPrefix;
  1205. pDest++;
  1206. // Copy the postfix string over
  1207. MEMCPY (pDest, lpbWord + (bPrefix + sizeof (SHORT)), wPostfix);
  1208. pDest += wPostfix;
  1209. // if fOccfLengthSet is set append WordLen to end of word
  1210. // (WordLen field follows word in input stream)
  1211. if (fOccfLengthSet)
  1212. pDest += CbBytePack (pDest, dwRealLength);
  1213. return (int)(pDest - pStart);
  1214. }
  1215. /*************************************************************************
  1216. *
  1217. * @doc PRIVATE INDEXING
  1218. *
  1219. * @func void | FlushAllNodes |
  1220. * Flushes the remaining nodes to disk when the tree is completely built.
  1221. *
  1222. * @parm _LPIPB | lpipb |
  1223. * Pointer to index block
  1224. *
  1225. * @rdesc S_OK on success or errors if failed
  1226. *
  1227. *************************************************************************/
  1228. HRESULT PASCAL FlushAllNodes (_LPIPB lpipb)
  1229. {
  1230. PBTREEDATA pTreeData = &lpipb->BTreeData;
  1231. PFILEDATA pOutput = &lpipb->OutFile;
  1232. PNODEINFO pLeafNode;
  1233. PNODEINFO pStemNode;
  1234. int WordSize;
  1235. BYTE curLevel = 0;
  1236. ERRB errb = S_OK;
  1237. HRESULT fRet;
  1238. pStemNode = pTreeData->rgpNodeInfo[0];
  1239. while (pTreeData->rgpNodeInfo[++curLevel] != NULL)
  1240. {
  1241. pLeafNode = pStemNode;
  1242. pStemNode = pTreeData->rgpNodeInfo[curLevel];
  1243. if ((WordSize = PrefixCompressWord (pStemNode->pCurPtr,
  1244. pLeafNode->pLastWord, pStemNode->pLastWord,
  1245. pTreeData->fOccfLength)) == 0)
  1246. {
  1247. return errb;
  1248. }
  1249. // Save new word as last word
  1250. MEMCPY (pStemNode->pLastWord, pLeafNode->pLastWord,
  1251. GETWORD ((LPUW)pLeafNode->pLastWord) + 2);
  1252. pStemNode->pCurPtr += WordSize;
  1253. pStemNode->cbLeft -= WordSize;
  1254. #ifdef _DEBUG
  1255. if (pOutput->cbLeft <= 0)
  1256. SetErrCode (NULL, E_ASSERT);
  1257. #endif
  1258. CopyFileOffset (pStemNode->pCurPtr,
  1259. lpipb->OutFile.foPhysicalOffset);
  1260. pStemNode->pCurPtr += FOFFSET_SIZE;
  1261. pStemNode->cbLeft -= FOFFSET_SIZE;
  1262. #ifdef _DEBUG
  1263. if (pOutput->cbLeft <= 0)
  1264. SetErrCode (NULL, E_ASSERT);
  1265. #endif
  1266. if (curLevel == 1)
  1267. {
  1268. if ((fRet = WriteLeafNode (lpipb)) != S_OK)
  1269. return(fRet);
  1270. }
  1271. else
  1272. {
  1273. if ((fRet = WriteStemNode (lpipb, pLeafNode)) != S_OK)
  1274. return(fRet);
  1275. }
  1276. }
  1277. // Set the pointer to the top stem node
  1278. pTreeData->Header.foIdxRoot = pOutput->foPhysicalOffset;
  1279. pTreeData->Header.nidIdxRoot = pOutput->foPhysicalOffset.dwOffset;
  1280. if (curLevel == 1)
  1281. {
  1282. if ((fRet = WriteLeafNode (lpipb)) != S_OK)
  1283. return(fRet);
  1284. }
  1285. else
  1286. {
  1287. if ((fRet = WriteStemNode (lpipb, pStemNode)) != S_OK)
  1288. return(fRet);
  1289. }
  1290. {
  1291. LONG dwSize;
  1292. // Flush the output buffer
  1293. if (FileWrite (pOutput->fFile, pOutput->pMem,
  1294. dwSize = (DWORD)(pOutput->pCurrent - pOutput->pMem), &errb) != dwSize)
  1295. return(errb);
  1296. pOutput->foStartOffset = FoAddDw(pOutput->foStartOffset, dwSize);
  1297. }
  1298. return S_OK;
  1299. }
  1300. PRIVATE HRESULT PASCAL NEAR WriteBitStreamDWord (PFILEDATA pOutput, DWORD dw,
  1301. int ckeyCenter)
  1302. {
  1303. BYTE ucBits;
  1304. HRESULT fRet;
  1305. // Bitstream scheme.
  1306. //
  1307. // This writes "dw" one-bits followed by a zero-bit.
  1308. //
  1309. for (; dw;)
  1310. {
  1311. if (dw < cbitBYTE * sizeof(DWORD))
  1312. {
  1313. ucBits = (BYTE)dw;
  1314. dw = 0;
  1315. }
  1316. else
  1317. {
  1318. ucBits = cbitBYTE * sizeof(DWORD);
  1319. dw -= cbitBYTE * sizeof(DWORD);
  1320. }
  1321. if ((fRet = FWriteBits(pOutput, argdwBits[ucBits],
  1322. (BYTE)ucBits)) != S_OK)
  1323. return fRet;
  1324. }
  1325. return FWriteBool(pOutput, 0);
  1326. }
  1327. PRIVATE HRESULT PASCAL NEAR WriteFixedDWord (PFILEDATA pOutput, DWORD dw,
  1328. int ckeyCenter)
  1329. {
  1330. // This just writes "ckey.ucCenter" bits of data.
  1331. return (FWriteBits (pOutput, dw, (BYTE)(ckeyCenter + 1)));
  1332. }
  1333. PRIVATE HRESULT PASCAL NEAR WriteBellDWord (PFILEDATA pOutput, DWORD dw,
  1334. int ckeyCenter)
  1335. {
  1336. BYTE ucBits;
  1337. HRESULT fRet;
  1338. // The "BELL" scheme is more complicated.
  1339. ucBits = (BYTE)CbitBitsDw(dw);
  1340. if (ucBits <= ckeyCenter)
  1341. {
  1342. //
  1343. // Encoding a small value. Write a zero, then write
  1344. // "ckey.ucCenter" bits of the value, which
  1345. // is guaranteed to be enough.
  1346. //
  1347. if ((fRet = FWriteBool(pOutput, 0)) != S_OK)
  1348. return fRet;
  1349. return FWriteBits(pOutput, dw, (BYTE)(ckeyCenter));
  1350. }
  1351. //
  1352. // Encoding a value that won't fit in "ckey.ucCenter" bits.
  1353. // "ucBits" is how many bits it will really take.
  1354. //
  1355. // First, write out "ucBits - ckey.ucCenter" one-bits.
  1356. //
  1357. if ((fRet = FWriteBits(pOutput, argdwBits[ucBits -
  1358. ckeyCenter], (BYTE)(ucBits - ckeyCenter))) != S_OK)
  1359. return fRet;
  1360. //
  1361. // Now, write out the value in "ucBits" bits,
  1362. // but zero the high-bit first.
  1363. //
  1364. return FWriteBits(pOutput, dw & argdwBits[ucBits - 1], ucBits);
  1365. }
  1366. /*************************************************************************
  1367. *
  1368. * @doc PRIVATE INDEXING
  1369. *
  1370. * @func HRESULT | FWriteBits |
  1371. * Writes a bunch of bits into the output buffer.
  1372. *
  1373. * @parm PFILEDATA | pOutput |
  1374. * Pointer to the output data structure
  1375. *
  1376. * @parm DWORD | dwVal |
  1377. * DWORD value to write
  1378. *
  1379. * @parm BYTE | cbits |
  1380. * Number of bits to write from dwVal
  1381. *
  1382. * @rdesc Returns S_OK on success or errors if failed
  1383. *
  1384. *************************************************************************/
  1385. PUBLIC HRESULT FAR PASCAL FWriteBits (PFILEDATA pOutput, DWORD dwVal, BYTE cBits)
  1386. {
  1387. BYTE cbitThisPassBits;
  1388. BYTE bThis;
  1389. ERRB errb;
  1390. static DWORD Count = 0;
  1391. // Loop until no bits left
  1392. for (; cBits;)
  1393. {
  1394. if (pOutput->ibit < 0)
  1395. {
  1396. pOutput->pCurrent++;
  1397. pOutput->foPhysicalOffset =
  1398. FoAddDw (pOutput->foPhysicalOffset, 1);
  1399. pOutput->cbLeft--;
  1400. #ifdef _DEBUG
  1401. if (pOutput->cbLeft <= 0)
  1402. SetErrCode (NULL, E_ASSERT);
  1403. #endif
  1404. // Room left in output buffer?
  1405. if (pOutput->cbLeft <= 256)
  1406. {
  1407. LONG dwSize;
  1408. if (FileWrite (pOutput->fFile, pOutput->pMem,
  1409. dwSize = (DWORD)(pOutput->pCurrent - pOutput->pMem), &errb) !=
  1410. dwSize)
  1411. return(errb);
  1412. pOutput->cbLeft = pOutput->dwMax;
  1413. pOutput->pCurrent = pOutput->pMem;
  1414. pOutput->foStartOffset = FoAddDw(pOutput->foStartOffset,
  1415. dwSize);
  1416. #ifdef _DEBUG
  1417. // MEMSET (pOutput->pMem, 0, pOutput->dwMax);
  1418. // Count++;
  1419. // if (!FoEquals(pOutput->foStartOffset, pOutput->foPhysicalOffset))
  1420. // _asm int 3;
  1421. #endif
  1422. }
  1423. pOutput->ibit = cbitBYTE - 1;
  1424. }
  1425. else
  1426. { // Write some bits.
  1427. cbitThisPassBits = (pOutput->ibit + 1 < cBits) ?
  1428. pOutput->ibit + 1 : cBits;
  1429. bThis = (pOutput->ibit == cbitBYTE - 1) ?
  1430. 0 : *pOutput->pCurrent;
  1431. bThis |= ((dwVal >> (cBits - cbitThisPassBits)) <<
  1432. (pOutput->ibit - cbitThisPassBits + 1));
  1433. *pOutput->pCurrent = (BYTE)bThis;
  1434. pOutput->ibit -= cbitThisPassBits;
  1435. cBits -= (BYTE)cbitThisPassBits;
  1436. }
  1437. }
  1438. return S_OK;
  1439. }
  1440. /*************************************************************************
  1441. *
  1442. * @doc PRIVATE INDEXING
  1443. *
  1444. * @func HRESULT | FWriteBool |
  1445. * Writes a single bit into the output buffer.
  1446. *
  1447. * @parm PFILEDATA | pOutput |
  1448. * Pointer to the output data structure
  1449. *
  1450. * @parm BOOL | dwVal |
  1451. * BOOL value to write
  1452. *
  1453. * @rdesc Returns S_OK on success or errors if failed
  1454. *
  1455. *************************************************************************/
  1456. PRIVATE HRESULT NEAR PASCAL FWriteBool (PFILEDATA pOutput, BOOL fVal)
  1457. {
  1458. HRESULT fRet = E_FAIL;
  1459. ERRB errb;
  1460. if (pOutput->ibit < 0)
  1461. { // This byte is full, point to a new byte
  1462. pOutput->pCurrent++;
  1463. pOutput->foPhysicalOffset =
  1464. FoAddDw (pOutput->foPhysicalOffset, 1);
  1465. pOutput->cbLeft--;
  1466. #ifdef _DEBUG
  1467. if (pOutput->cbLeft <= 0)
  1468. SetErrCode (NULL, E_ASSERT);
  1469. #endif
  1470. // Room left in output buffer?
  1471. if (pOutput->cbLeft <= sizeof(DWORD))
  1472. {
  1473. LONG dwSize;
  1474. if (FileWrite (pOutput->fFile, pOutput->pMem,
  1475. dwSize = (DWORD)(pOutput->pCurrent - pOutput->pMem), &errb) != dwSize)
  1476. return(errb);
  1477. pOutput->pCurrent = pOutput->pMem;
  1478. pOutput->cbLeft = pOutput->dwMax;
  1479. pOutput->foStartOffset = FoAddDw(pOutput->foStartOffset, dwSize);
  1480. #ifdef _DEBUG
  1481. MEMSET (pOutput->pMem, 0, pOutput->dwMax);
  1482. #endif
  1483. }
  1484. pOutput->ibit = cbitBYTE - 1;
  1485. }
  1486. if (pOutput->ibit == cbitBYTE - 1) // Zero out a brand-new byte.
  1487. *pOutput->pCurrent = (BYTE)0;
  1488. if (fVal) // Write my boolean.
  1489. *pOutput->pCurrent |= 1 << pOutput->ibit;
  1490. pOutput->ibit--;
  1491. return S_OK; // Fine.
  1492. }
  1493. HRESULT PASCAL FAR BuildBtreeFromEso (HFPB hfpb,
  1494. LPSTR pstrFilename, LPB lpbEsiFile,
  1495. LPB lpbEsoFile, PINDEXINFO pIndexInfo)
  1496. {
  1497. _LPIPB lpipb;
  1498. HRESULT fRet;
  1499. ERRB errb;
  1500. BYTE bKeyIndex = 0;
  1501. IPB ipb;
  1502. HFILE hFile;
  1503. if ((lpipb = MVIndexInitiate(pIndexInfo, NULL)) == NULL)
  1504. return E_OUTOFMEMORY;
  1505. /* Read in the external sort buffer info */
  1506. if ((hFile = _lopen (lpbEsiFile, READ)) == HFILE_ERROR)
  1507. return E_NOTEXIST;
  1508. /* Read old IPB info */
  1509. _lread (hFile, &ipb, sizeof(IPB));
  1510. /* Transfer meaningful data */
  1511. lpipb->dwIndexedWord = ipb.dwIndexedWord;
  1512. lpipb->dwUniqueWord = ipb.dwUniqueWord;
  1513. lpipb->dwByteCount = ipb.dwByteCount;
  1514. lpipb->dwOccOffbits = ipb.dwOccOffbits;
  1515. lpipb->dwOccExtbits = ipb.dwOccExtbits;
  1516. lpipb->dwMaxFieldId = ipb.dwMaxFieldId;
  1517. lpipb->dwMaxWCount = ipb.dwMaxWCount;
  1518. lpipb->dwMaxOffset = ipb.dwMaxOffset;
  1519. lpipb->dwTotal3bWordLen = ipb.dwTotal3bWordLen;
  1520. lpipb->dwTotal2bWordLen = ipb.dwTotal2bWordLen;
  1521. lpipb->dwTotalUniqueWordLen = ipb.dwTotalUniqueWordLen;
  1522. lpipb->lcTopics = ipb.lcTopics;
  1523. lpipb->dwMaxTopicId = ipb.dwMaxTopicId;
  1524. // lpipb->dwMemAllowed = ipb.dwMemAllowed;
  1525. lpipb->dwMaxRecordSize = ipb.dwMaxRecordSize;
  1526. lpipb->dwMaxEsbRecSize = ipb.dwMaxEsbRecSize;
  1527. lpipb->dwMaxWLen = ipb.dwMaxWLen;
  1528. lpipb->idxf = ipb.idxf;
  1529. if (lpipb->idxf & IDXF_NORMALIZE)
  1530. {
  1531. if ((lpipb->wi.hSigma = _GLOBALALLOC (DLLGMEM_ZEROINIT,
  1532. (LCB)((lpipb->dwMaxTopicId + 1) * sizeof (SIGMA)))) == NULL)
  1533. return SetErrCode (&errb, E_OUTOFMEMORY);
  1534. lpipb->wi.hrgsigma = (HRGSIGMA)_GLOBALLOCK (lpipb->wi.hSigma);
  1535. if ((lpipb->wi.hLog = _GLOBALALLOC (DLLGMEM_ZEROINIT,
  1536. (CB)(cLOG_MAX * sizeof (FLOAT)))) == NULL)
  1537. {
  1538. SetErrCode (&errb, (HRESULT)(fRet = E_OUTOFMEMORY));
  1539. exit1:
  1540. FreeHandle (lpipb->wi.hSigma);
  1541. MVIndexDispose (lpipb);
  1542. return fRet;
  1543. }
  1544. #if 0
  1545. lpipb->wi.lrgrLog = (FLOAT FAR *)_GLOBALLOCK (lpipb->wi.hLog);
  1546. // Initialize the array
  1547. for (loop = cLOG_MAX - 1; loop > 0; --loop)
  1548. {
  1549. rLog = (FLOAT)1.0 / (float)loop;
  1550. lpipb->wi.lrgrLog[loop] = rLog * rLog;
  1551. }
  1552. #endif
  1553. }
  1554. // Build the permanent index
  1555. fRet = BuildBTree(NULL, lpipb, lpbEsoFile, hfpb, pstrFilename);
  1556. if (lpipb->idxf & IDXF_NORMALIZE)
  1557. {
  1558. FreeHandle (lpipb->wi.hLog);
  1559. goto exit1;
  1560. }
  1561. fRet = S_OK;
  1562. goto exit1;
  1563. }