Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2631 lines
88 KiB

  1. /*************************************************************************
  2. * *
  3. * UPDATE.C *
  4. * *
  5. * Copyright (C) Microsoft Corporation 1990-1994 *
  6. * All Rights reserved. *
  7. * *
  8. **************************************************************************
  9. * *
  10. * Module Intent *
  11. * *
  12. **************************************************************************
  13. * *
  14. * Current Owner: BinhN *
  15. * *
  16. **************************************************************************/
  17. #include <mvopsys.h>
  18. #include <math.h>
  19. #include <mem.h>
  20. #include <orkin.h>
  21. #include <mvsearch.h>
  22. #include "common.h"
  23. #include "index.h"
  24. #ifdef _DEBUG
  25. static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/
  26. #endif
  27. #define SAFE_SLACK 48 // Extra safety bytes
  28. #define ESOUTPUT_BUFFER 0xFFFC // Size of output file buffer
  29. // This must be at the size of the largest word + 12
  30. // or word + 14 if OCCF_LENGTH is set
  31. #define ESINPUT_BUFFER 0x7FFC // Size of input file buffers.
  32. // Each ESB block get its own input buffer
  33. // Min Size: Size of index word + ~8 bytes
  34. #define NEW_NODE_ON_LEFT 0
  35. #define NEW_NODE_ON_RIGHT 1
  36. extern FENCODE EncodeTable[];
  37. extern FDECODE DecodeTable[];
  38. #define FAddDword(p,dw,key) EncodeTable[(key).cschScheme]((p), (dw), (key).ucCenter)
  39. #define FGetDword(a,b,c) (*DecodeTable[b.cschScheme])(a, b, c)
  40. typedef struct WORDINFO
  41. {
  42. DWORD dwWordLen;
  43. DWORD dwFieldId;
  44. DWORD dwNewTopicCount;
  45. DWORD dwIndexTopicCount;
  46. DWORD dwMergeTopicCount;
  47. DWORD dwOldTopicId;
  48. DWORD dwNewTopicId;
  49. DWORD dwIndexTopicId;
  50. DWORD dwDataSize;
  51. FILEOFFSET dataLocation;
  52. WORD fFlag;
  53. WORD pad;
  54. } WORDINFO, FAR *PWORDINFO;
  55. typedef struct FREEBLOCK
  56. {
  57. DWORD dwBlockSize;
  58. FILEOFFSET foBlockOffset;
  59. }FREEBLOCK, FAR *PFREEBLOCK;
  60. BYTE EmptyWord[4] = { 0 };
  61. #ifdef _DEBUG
  62. DWORD dwOldDataLoss = 0;
  63. DWORD dwNewDataSize = 0;
  64. DWORD dwOldDataNeed = 0;
  65. DWORD dwNewNodeSize = 0;
  66. #endif
  67. // Flag to denote that the current entry is to be replaced by the new entry
  68. // This happens when:
  69. // - A repeated entry in the leaf node
  70. // - The last entry in the stem node that has to be changed to the last
  71. // word of the leaf node
  72. #define REPLACE_WORD_01 0x0001
  73. // Flag to denote that the last word buffer actually contains the word
  74. // before last. This is needed when we have to replace the last word
  75. // with the new word. In this case we need the word before last to do
  76. // compression
  77. #define ONE_WORD_BEHIND_02 0x0002
  78. // Flag to denote updating the offset field with the temp node offset
  79. #define USE_TEMP_NODE_04 0x0004
  80. // Flag to denote that only the node offset address is to be updated. Since
  81. // this is a fixed record size, this will speed up the update.
  82. #define UPDATE_NODE_ADDRESS_08 0x0008
  83. // rgpTmpNodeInfo is the new right node if set, else it is the left node
  84. #define USE_TEMP_FOR_RIGHT_NODE_10 0x0010
  85. // Flag to denote that we have to skip the next word before inserting a new
  86. // word. This happen when adding a new word to the end of the block, where
  87. // pCurPtr is pointing to the beginning of the last word
  88. #define SKIP_NEXT_WORD_20 0x0020
  89. // Both nodes, rgpNodeInfo and rgpTmpNodeInfo are used as left and right
  90. // children. This happens when a new top node is created
  91. #define USE_BOTH_NODE_40 0x0040
  92. /*************************************************************************
  93. *
  94. * INTERNAL PRIVATE FUNCTIONS
  95. *
  96. * All of them should be declared near
  97. *
  98. *************************************************************************/
  99. PRIVATE HRESULT NEAR PASCAL ESFlushBuffer (LPESI);
  100. PRIVATE HRESULT NEAR PASCAL ESFillBuffer (_LPIPB, LPESB);
  101. PRIVATE void NEAR PASCAL ESMemory2Disk (_LPIPB, PMERGEHEADER);
  102. PRIVATE HRESULT NEAR PASCAL ProcessFiles (_LPIPB lpipb, LPMERGEPARAMS);
  103. PRIVATE int NEAR PASCAL CompareRecordBuffers (_LPIPB, LPB, LPB);
  104. PRIVATE VOID NEAR PASCAL PQueueUp (_LPIPB, LPESB FAR *, LONG);
  105. PRIVATE VOID NEAR PASCAL PQueueDown (_LPIPB);
  106. PRIVATE PTOPICDATA PASCAL NEAR MergeTopicNode (PMERGEHEADER, PTOPICDATA, int);
  107. PRIVATE VOID NEAR MergeOccurrence (PTOPICDATA, PTOPICDATA, int);
  108. PRIVATE HRESULT NEAR PASCAL UpdateIndexBTree (_LPIPB, HFPB, LPB, LPB);
  109. VOID SetQueue (LPESI pEsi);
  110. PRIVATE HRESULT NEAR PASCAL AddWordToBTree (_LPIPB, LPB, PWORDINFO);
  111. PRIVATE HRESULT PASCAL NEAR NewDataInsert(LPIPB lpipb, PFILEDATA pInfile,
  112. PNODEINFO FAR *rgpNodeInfo, LPB pWord, PWORDINFO pWordInfo);
  113. PRIVATE HRESULT PASCAL NEAR CreateNewNode(_LPIPB lpipb, int cLevel,
  114. int fIsStemNode, int fAfter);
  115. PRIVATE PASCAL NEAR AddRecordToBTree (_LPIPB lpipb, LPB pWord,
  116. PWORDINFO pWordInfo, int cLevel, int fReplaceWord);
  117. PRIVATE HRESULT PASCAL NEAR WriteNewDataRecord (_LPIPB, PWORDINFO);
  118. PRIVATE HRESULT GetFreeBlock (_LPIPB, PFREEBLOCK, DWORD);
  119. PRIVATE HRESULT PASCAL NEAR CopyBlockFile (PFILEDATA, HFPB, FILEOFFSET, DWORD);
  120. PRIVATE HRESULT PASCAL FAR EmitOldData (_LPIPB, PNODEINFO, PWORDINFO);
  121. PRIVATE HRESULT PASCAL FAR EmitNewData (_LPIPB, PWORDINFO, BOOL);
  122. PRIVATE HRESULT PASCAL NEAR UpdateDataNode (_LPIPB lpipb, PWORDINFO pWordInfo);
  123. PRIVATE int PASCAL NEAR SplitNodeAndAddData (_LPIPB lpipb, LPB pWord,
  124. PWORDINFO pWordInfo, int cLevel, int fFlag, int fIsStemNode);
  125. PRIVATE int PASCAL NEAR CopyNewDataToStemNode (_LPIPB lpipb,
  126. PNODEINFO pTmpNode, LPB pWord, LPB pLastWord, int cLevel, int fFlag);
  127. PRIVATE int PASCAL NEAR CopyNewDataToLeafNode (_LPIPB lpipb, PNODEINFO pTmpNode,
  128. PWORDINFO pWordInfo, LPB pWord, LPB pLastWord);
  129. VOID GetLastWordInNode (_LPIPB lpipb, PNODEINFO pNodeinfo, BOOL flag);
  130. PRIVATE HRESULT PASCAL FAR SkipNewData (_LPIPB lpipb, PWORDINFO pWordInfo);
  131. HRESULT CheckLeafNode (PNODEINFO pNodeInfo, int occf);
  132. HRESULT CheckStemNode (PNODEINFO pNodeInfo);
  133. /*************************************************************************
  134. *
  135. * INTERNAL PUBLIC FUNCTIONS
  136. *
  137. * All of them should be declared far, unless we know they belong to
  138. * the same segment. They should be included in some include file
  139. *
  140. *************************************************************************/
  141. HRESULT FAR PASCAL FlushTree(_LPIPB lpipb);
  142. PUBLIC HRESULT FAR PASCAL MergeSortTreeFile (_LPIPB, LPMERGEPARAMS);
  143. PUBLIC HRESULT FAR PASCAL FillInputBuffer (LPESB, HFPB);
  144. PUBLIC VOID PASCAL FAR FreeBTreeNode (PNODEINFO pNode);
  145. PUBLIC PNODEINFO PASCAL FAR AllocBTreeNode (_LPIPB lpipb);
  146. PUBLIC PASCAL FAR PrefixCompressWord (LPB, LPB, LPB, int);
  147. PUBLIC DWORD PASCAL FAR WriteDataNode (_LPIPB, DWORD, PHRESULT);
  148. PUBLIC HRESULT PASCAL FAR IndexOpenRW (LPIPB, HFPB, LSZ);
  149. PUBLIC HRESULT PASCAL FAR SkipOldData (_LPIPB, PNODEINFO);
  150. PUBLIC LONG PASCAL FAR CompareDWord (DWORD, DWORD, LPV lpParm);
  151. #ifdef _DEBUG
  152. static LONG Count = 0;
  153. #endif
  154. /*************************************************************************
  155. *
  156. * @doc EXTERNAL API INDEX
  157. *
  158. * @func HRESULT FAR PASCAL | MVIndexUpdate |
  159. * This function will update an index file based on the information
  160. * collected in the Index parameter block.
  161. *
  162. * @parm HFPB | hSysFile |
  163. * System file handle.
  164. * If it is 0, this function will open the system file
  165. * specified in lszFilename, and then close it after finishing the
  166. * index update. If the system file does not exist, then this function
  167. * will create it.
  168. * If it is non-zero, then the system file is already opened. Only the
  169. * index sub-file needs to be created
  170. *
  171. * @parm LSZ | lszFilename |
  172. * Index filename.
  173. * If hSysFile is non-zero, the format is: !index_filename
  174. * if hSysFile is zero, the format is: dos_filename[!index_filename]
  175. * If !index_filename is not specified, the default name will be used
  176. * if hSysFile == 0 and there is no '!', this is a regular DOS file
  177. *
  178. * @parm LPIPB | lpipb |
  179. * Pointer to Index Parameter Block. This structure contains all the
  180. * information necessary to update the index file
  181. * *
  182. * @rdesc S_OK if succeeded, or other errors
  183. *
  184. *************************************************************************/
  185. PUBLIC HRESULT EXPORT_API FAR PASCAL MVIndexUpdate (HFPB hSysFile,
  186. _LPIPB lpipb, LSZ lszFilename)
  187. {
  188. return MVIndexUpdateEx(hSysFile, lpipb, lszFilename, NULL, 0);
  189. }
  190. /*************************************************************************
  191. *
  192. * @doc EXTERNAL API INDEX
  193. *
  194. * @func HRESULT FAR PASCAL | MVIndexUpdateEx |
  195. * This function will update an index file based on the information
  196. * collected in the Index parameter block, and also will "pre-delete" the
  197. * topics in the given list from the LPIPB before updating. This function is useful
  198. * in scenarios where new topics are continuously added into the index
  199. * before knowledge of out-dated topics is available (e.g. netnews).
  200. * This allows a single-pass update once the deletes are known.
  201. *
  202. * @parm HFPB | hSysFile |
  203. * System file handle.
  204. * If it is 0, this function will open the system file
  205. * specified in lszFilename, and then close it after finishing the
  206. * index update. If the system file does not exist, then this function
  207. * will create it.
  208. * If it is non-zero, then the system file is already opened. Only the
  209. * index sub-file needs to be created
  210. *
  211. * @parm LSZ | lszFilename |
  212. * Index filename.
  213. * If hSysFile is non-zero, the format is: !index_filename
  214. * if hSysFile is zero, the format is: dos_filename[!index_filename]
  215. * If !index_filename is not specified, the default name will be used
  216. * if hSysFile == 0 and there is no '!', this is a regular DOS file
  217. *
  218. * @parm LPIPB | lpipb |
  219. * Pointer to Index Parameter Block. This structure contains all the
  220. * information necessary to update the index file
  221. *
  222. * @parm LPDW | lpdwTopicList |
  223. * Pointer to DWORD array of topic UIDs to be deleted
  224. *
  225. * @parm DWORD | dwCount |
  226. * The number of topics in the array
  227. *
  228. * @rdesc S_OK if succeeded, or other errors
  229. *
  230. *************************************************************************/
  231. PUBLIC HRESULT EXPORT_API FAR PASCAL MVIndexUpdateEx (HFPB hSysFile,
  232. _LPIPB lpipb, LSZ lszFilename, DWORD FAR *rgTopicId, DWORD dwCount)
  233. {
  234. ERRB errb;
  235. PHRESULT phr = &errb;
  236. PFILEDATA pOutFile;
  237. MERGEPARAMS mp;
  238. HRESULT fRet; // Return value from this function.
  239. // Flush the internal sort
  240. // Flushes any records in the tree to disk
  241. fRet = FlushTree(lpipb);
  242. // Free all memory blocks
  243. FreeISI (lpipb);
  244. if (fRet != S_OK)
  245. return(fRet);
  246. if (lpipb->esi.cesb == 0)
  247. // Nothing to process, there will be no index file
  248. return S_OK;
  249. // Set the state flag
  250. lpipb->bState = UPDATING_STATE;
  251. // Open the index file
  252. if ((fRet = IndexOpenRW(lpipb, hSysFile, lszFilename)) != S_OK)
  253. {
  254. exit00:
  255. if (lpipb->idxf & IDXF_NORMALIZE)
  256. {
  257. FreeHandle (lpipb->wi.hSigma);
  258. FreeHandle (lpipb->wi.hLog);
  259. lpipb->wi.hSigma = lpipb->wi.hLog = NULL;
  260. }
  261. return fRet;
  262. }
  263. if (rgTopicId && dwCount)
  264. {
  265. // Sort the incoming array
  266. if ((fRet = HugeDataSort((LPV HUGE*)rgTopicId, dwCount,
  267. (FCOMPARE)CompareDWord, NULL, NULL, NULL)) != S_OK)
  268. goto exit00;
  269. mp.rgTopicId = rgTopicId;
  270. mp.dwCount = dwCount;
  271. mp.lpTopicIdLast = rgTopicId;
  272. }
  273. if ((fRet = MergeSortTreeFile (lpipb, (rgTopicId && dwCount) ? &mp: NULL)) != S_OK)
  274. {
  275. FileClose(lpipb->hfpbIdxFile);
  276. fRet = SetErrCode (phr, fRet);
  277. goto exit00;
  278. }
  279. FileUnlink (NULL, lpipb->isi.aszTempName, REGULAR_FILE);
  280. // Open output file
  281. pOutFile = &lpipb->OutFile;
  282. if ((pOutFile->fFile = FileCreate (NULL, lpipb->isi.aszTempName,
  283. REGULAR_FILE, phr)) == NULL)
  284. {
  285. FileClose(lpipb->hfpbIdxFile);
  286. fRet = SetErrCode (phr, fRet);
  287. goto exit00;
  288. }
  289. // Allocate output buffer
  290. pOutFile->dwMax = FILE_BUFFER;
  291. pOutFile->cbLeft = FILE_BUFFER;
  292. if ((pOutFile->hMem = _GLOBALALLOC (DLLGMEM_ZEROINIT,
  293. pOutFile->dwMax + SAFE_SLACK)) == NULL)
  294. {
  295. fRet = E_OUTOFMEMORY;
  296. exit0:
  297. FileClose(lpipb->hfpbIdxFile);
  298. FileClose (pOutFile->fFile);
  299. FileUnlink (NULL, lpipb->isi.aszTempName, REGULAR_FILE);
  300. goto exit00;
  301. }
  302. pOutFile->pCurrent = pOutFile->pMem = _GLOBALLOCK (pOutFile->hMem);
  303. // Build the permanent index
  304. fRet = UpdateIndexBTree(lpipb, hSysFile, lpipb->esi.aszTempName,
  305. lszFilename);
  306. _GLOBALUNLOCK(pOutFile->hMem);
  307. _GLOBALFREE(pOutFile->hMem);
  308. pOutFile->hMem = NULL;
  309. goto exit0;
  310. }
  311. /*************************************************************************
  312. *
  313. * @doc PRIVATE INDEXING
  314. *
  315. * @func HRESULT | UpdateIndexBTree |
  316. * Allocates required memory and opens input files to create a B-Tree.
  317. * Parses incoming words and calls AddRecordToBTree to process them.
  318. *
  319. * @parm _LPIPB | lpipb |
  320. * Pointer to the index parameter block
  321. *
  322. * @parm LPB | lpszTemp |
  323. * Filename of the temporary input file
  324. *
  325. * @parm LPB | szIndexFilename |
  326. * Filename of the permanent B-Tree file
  327. *
  328. * @rdesc Returns S_OK on success or errors if failed
  329. *
  330. *************************************************************************/
  331. PRIVATE HRESULT NEAR PASCAL UpdateIndexBTree (_LPIPB lpipb, HFPB hSysFile,
  332. LPB lpszTemp, LPB szIndexFilename)
  333. {
  334. PFILEDATA pInFile; // Pointer to input data
  335. DWORD dwBytesRead = 0; // Checks for EOF
  336. PNODEINFO FAR * rgpNodeInfo;
  337. PNODEINFO FAR * rgpTmpNodeInfo;
  338. PNODEINFO pIndexDataNode;
  339. ERRB errb;
  340. PHRESULT phr = &errb;
  341. PIH20 pHeader;
  342. int cTreeLevel;
  343. int iIndex;
  344. LPB pWord;
  345. WORDINFO WordInfo;
  346. OCCF occf;
  347. HRESULT fRet; // Return value
  348. FILEOFFSET foFreeListOffset; // File Offset where the FreeList will be saved.
  349. DWORD dwSizeFreeList; // Size of the FreeList to be saved.
  350. rgpNodeInfo = lpipb->BTreeData.rgpNodeInfo;
  351. rgpTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo;
  352. MEMSET(&WordInfo, 0, sizeof(WORDINFO));
  353. // Open input file
  354. pInFile = &lpipb->InFile;
  355. if ((pInFile->fFile = FileOpen (NULL, lpszTemp, REGULAR_FILE,
  356. READ, phr)) == NULL)
  357. return *phr;
  358. // Allocate input buffer
  359. pInFile->dwMax = FILE_BUFFER;
  360. if ((pInFile->hMem =
  361. _GLOBALALLOC (DLLGMEM_ZEROINIT, pInFile->dwMax + SAFE_SLACK)) == NULL)
  362. {
  363. fRet = E_OUTOFMEMORY;
  364. exit0:
  365. FileClose (pInFile->fFile);
  366. FileUnlink (NULL, lpszTemp, REGULAR_FILE);
  367. return fRet;
  368. }
  369. pInFile->pMem = _GLOBALLOCK (pInFile->hMem);
  370. pInFile->pCurrent = pInFile->pMem;
  371. pHeader = &lpipb->BTreeData.Header;
  372. // Allocate BTree block.
  373. for (cTreeLevel = pHeader->cIdxLevels - 1; cTreeLevel >= 0; cTreeLevel --)
  374. {
  375. if ((rgpNodeInfo[cTreeLevel] = AllocBTreeNode (lpipb)) == NULL)
  376. {
  377. fRet = E_OUTOFMEMORY;
  378. goto exit2;
  379. }
  380. if ((rgpTmpNodeInfo[cTreeLevel] = AllocBTreeNode (lpipb)) == NULL)
  381. {
  382. fRet = E_OUTOFMEMORY;
  383. goto exit2;
  384. }
  385. }
  386. if (((lpipb->pIndexDataNode = pIndexDataNode =
  387. AllocBTreeNode (lpipb))) == NULL)
  388. {
  389. fRet = E_OUTOFMEMORY;
  390. goto exit2;
  391. }
  392. // Reallocate a bigger buffer. BTREE_NODE_SIZE is only good for btree node
  393. _GLOBALUNLOCK (pIndexDataNode->hMem);
  394. _GLOBALFREE (pIndexDataNode->hMem);
  395. // Allocate 1M of memory for the data buffer
  396. if ((pIndexDataNode->hMem = _GLOBALALLOC (DLLGMEM_ZEROINIT,
  397. pIndexDataNode->dwBlockSize = FILE_BUFFER)) == NULL)
  398. goto exit2;
  399. pIndexDataNode->pCurPtr = pIndexDataNode->pBuffer =
  400. _GLOBALLOCK (pIndexDataNode->hMem);
  401. lpipb->pIndexDataNode->hfpbIdx = lpipb->hfpbIdxFile; // Index file to read from
  402. // Remember the file offset of this node
  403. rgpNodeInfo[0]->nodeOffset = pHeader->foIdxRoot;
  404. // Read in data for the top stem node
  405. if ((fRet = ReadNewNode(lpipb->hfpbIdxFile, rgpNodeInfo[0],
  406. pHeader->cIdxLevels > 1 ? FALSE : TRUE)) != S_OK)
  407. {
  408. exit2:
  409. FreeHandle (pInFile->hMem);
  410. for (cTreeLevel = pHeader->cIdxLevels - 1; cTreeLevel >= 0; cTreeLevel --)
  411. {
  412. FreeBTreeNode (rgpNodeInfo[cTreeLevel]);
  413. FreeBTreeNode (rgpTmpNodeInfo[cTreeLevel]);
  414. }
  415. goto exit0;
  416. }
  417. // Allocate temporary buffer for word. The buffer is allocated as followed:
  418. // - Max word length * 2: for maximum word length. Minimum is 256
  419. // - 3 byte: word length
  420. // - 5 byte: Field Id
  421. // - 5 byte: Topic count
  422. // - 6 byte: data pointer
  423. // iIndex is used as a tmp
  424. iIndex = (WORD)(lpipb->BTreeData.Header.dwMaxWLen * 2);
  425. if (iIndex < 1024)
  426. iIndex = 1024;
  427. iIndex += 3 + 5 + 5 + 6;
  428. if ((lpipb->hTmpBuf = _GLOBALALLOC (DLLGMEM_ZEROINIT, iIndex * 2)) == NULL)
  429. {
  430. fRet = E_OUTOFMEMORY;
  431. goto exit2;
  432. }
  433. lpipb->pTmpBuf = (LPB)_GLOBALLOCK (lpipb->hTmpBuf);
  434. lpipb->pWord = lpipb->pTmpBuf + iIndex;
  435. // Allocate a big buffer for data
  436. if ((lpipb->hData = _GLOBALALLOC(DLLGMEM_ZEROINIT,
  437. lpipb->dwDataSize = 0x80000)) == NULL)
  438. {
  439. fRet = E_OUTOFMEMORY;
  440. goto exit2;
  441. }
  442. lpipb->pDataBuffer= _GLOBALLOCK(lpipb->hData);
  443. // Load the input buffer & repeat until all records are processed
  444. pInFile->dwMax = pInFile->cbLeft =
  445. FileRead (pInFile->fFile, pInFile->pMem, pInFile->dwMax, phr);
  446. fRet = S_OK;
  447. pWord = lpipb->pWord;
  448. occf = lpipb->BTreeData.Header.occf;
  449. do
  450. {
  451. LPB pSrcPtr;
  452. WORD wLen;
  453. if (pInFile->cbLeft < CB_MAX_WORD_LEN * sizeof(DWORD) * 8)
  454. {
  455. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  456. pInFile->cbLeft += FileRead (pInFile->fFile,
  457. pInFile->pMem + pInFile->cbLeft,
  458. pInFile->dwMax - pInFile->cbLeft, &errb);
  459. pInFile->dwMax = pInFile->cbLeft;
  460. pInFile->pCurrent = pInFile->pMem;
  461. }
  462. // Extract the word and its info
  463. pSrcPtr = pInFile->pCurrent + sizeof(DWORD); // Skip reclength
  464. // Copy the word
  465. MEMCPY (pWord, pSrcPtr, wLen = GETWORD((LPUW)pSrcPtr) + 2);
  466. pSrcPtr += GETWORD((LPUW)pSrcPtr) + 2;
  467. if (occf & OCCF_LENGTH)
  468. {
  469. pSrcPtr += CbByteUnpack(&WordInfo.dwWordLen, pSrcPtr);
  470. CbBytePack (pWord + wLen, WordInfo.dwWordLen);
  471. }
  472. else
  473. {
  474. WordInfo.dwWordLen = wLen - 2;
  475. }
  476. if (occf & OCCF_FIELDID)
  477. pSrcPtr += CbByteUnpack(&WordInfo.dwFieldId, pSrcPtr);
  478. WordInfo.dwNewTopicCount = GETLONG((LPUL)pSrcPtr);
  479. pSrcPtr += sizeof(DWORD);
  480. pInFile->pCurrent = pSrcPtr;
  481. pInFile->cbLeft = (LONG)(pInFile->dwMax - (pSrcPtr - pInFile->pMem));
  482. #if 0
  483. if (STRNICMP(pWord+2, "cylindeeer", 10) == 0)
  484. _asm int 3;
  485. #endif
  486. #if 0
  487. else
  488. {
  489. SkipNewData (lpipb, &WordInfo);
  490. continue;
  491. }
  492. #endif
  493. // Find/Add the record
  494. if ((fRet = AddWordToBTree (lpipb, pWord, &WordInfo)) != S_OK)
  495. {
  496. exit3:
  497. _GLOBALUNLOCK (lpipb->hTmpBuf);
  498. _GLOBALFREE (lpipb->hTmpBuf);
  499. _GLOBALUNLOCK(lpipb->hData);
  500. _GLOBALFREE(lpipb->hData);
  501. FreeBTreeNode (lpipb->pIndexDataNode);
  502. lpipb->hData = lpipb->hTmpBuf = 0;
  503. goto exit2;
  504. }
  505. pSrcPtr = pInFile->pCurrent;
  506. // pInFile->pCurrent points to the record size
  507. if (pInFile->cbLeft <= SAFE_SLACK ||
  508. (LONG)GETLONG ((LPUL)pInFile->pCurrent) >= pInFile->cbLeft)
  509. {
  510. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  511. if ((pInFile->cbLeft += FileRead (pInFile->fFile, pInFile->pMem +
  512. pInFile->cbLeft, pInFile->dwMax - pInFile->cbLeft, phr)) < 0)
  513. {
  514. fRet = *phr;
  515. goto exit3;
  516. }
  517. pInFile->dwMax = pInFile->cbLeft;
  518. pInFile->pCurrent = pInFile->pMem;
  519. }
  520. } while (fRet == S_OK && pInFile->cbLeft);
  521. for (cTreeLevel = pHeader->cIdxLevels - 1; cTreeLevel >= 0; cTreeLevel --)
  522. {
  523. if (rgpNodeInfo[cTreeLevel]->fFlag == TO_BE_UPDATE)
  524. {
  525. if ((FileSeekWrite(lpipb->hfpbIdxFile,
  526. rgpNodeInfo[cTreeLevel]->pBuffer,
  527. rgpNodeInfo[cTreeLevel]->nodeOffset,
  528. lpipb->BTreeData.Header.dwBlockSize, phr)) != (LONG)lpipb->BTreeData.Header.dwBlockSize)
  529. {
  530. fRet = *phr;
  531. goto exit3;
  532. }
  533. }
  534. }
  535. if (lpipb->idxf & IDXF_NORMALIZE)
  536. {
  537. LONG loop;
  538. for (loop = lpipb->dwMaxTopicId; loop >= 0; loop--)
  539. {
  540. lpipb->wi.hrgsigma[loop] =
  541. (float)sqrt ((double)lpipb->wi.hrgsigma[loop]);
  542. }
  543. pHeader->WeightTabSize = (lpipb->dwMaxTopicId + 1)* sizeof(float);
  544. if (FileSeekWrite (lpipb->hfpbIdxFile, lpipb->wi.hrgsigma,
  545. lpipb->foMaxOffset, pHeader->WeightTabSize, phr) !=
  546. (LONG)pHeader->WeightTabSize)
  547. {
  548. fRet = *phr;
  549. goto exit3;
  550. }
  551. pHeader->WeightTabOffset = lpipb->foMaxOffset;
  552. }
  553. // ERIC: 1/ Save the freelist info to the end of the file
  554. // 2/ Update the header with the new freelist offset/size
  555. if (lpipb->hFreeList)
  556. {
  557. LPBYTE lpbFreeList;
  558. dwSizeFreeList = FreeListSize(lpipb->hFreeList,phr);
  559. foFreeListOffset = FreeListGetBestFit(lpipb->hFreeList, MakeFo(dwSizeFreeList,0), phr);
  560. if (FoIsNil(foFreeListOffset))
  561. foFreeListOffset = lpipb->foMaxOffset;
  562. if((lpbFreeList = (LPBYTE) _GLOBALALLOCPTR(DLLGMEM_ZEROINIT, dwSizeFreeList)) == NULL)
  563. return E_OUTOFMEMORY;
  564. FreeListGetMem(lpipb->hFreeList, (LPVOID)lpbFreeList);
  565. FileSeekWrite (lpipb->hfpbIdxFile, (LPBYTE)lpbFreeList,
  566. foFreeListOffset, dwSizeFreeList, phr);
  567. if (FoEquals(foFreeListOffset, lpipb->foMaxOffset))
  568. dwSizeFreeList |= 0x80000000;
  569. FreeListDestroy(lpipb->hFreeList);
  570. lpipb->hFreeList = (HFREELIST) NULL;
  571. _GLOBALFREEPTR(lpbFreeList);
  572. }
  573. // Copy info to header
  574. if (pHeader->lcTopics < lpipb->lcTopics)
  575. pHeader->lcTopics = lpipb->lcTopics;
  576. if (pHeader->dwMaxFieldId < lpipb->dwMaxFieldId)
  577. pHeader->dwMaxFieldId = lpipb->dwMaxFieldId;
  578. if (pHeader->dwMaxWCount < lpipb->dwMaxWCount)
  579. pHeader->dwMaxWCount = lpipb->dwMaxWCount;
  580. if (pHeader->dwMaxOffset < lpipb->dwMaxOffset)
  581. pHeader->dwMaxOffset = lpipb->dwMaxOffset;
  582. if (pHeader->dwMaxWLen < lpipb->dwMaxWLen)
  583. pHeader->dwMaxWLen = lpipb->dwMaxWLen;
  584. pHeader->dwMaxTopicId = lpipb->dwMaxTopicId;
  585. // ERIC: Garbage Collection
  586. pHeader->foFreeListOffset = foFreeListOffset;
  587. pHeader->dwFreeListSize = dwSizeFreeList;
  588. // END
  589. FileSeekWrite (lpipb->hfpbIdxFile, (LPB)pHeader,
  590. MakeFo (0, 0), sizeof (IH20), phr);
  591. fRet = S_OK;
  592. goto exit3;
  593. }
  594. /*********************************************************************
  595. * @func LPB PASCAL | AddWordToBTree |
  596. * Find the location of a word in the index. This function also
  597. * sets up all relevant data for the future update
  598. *
  599. * @parm LPIPB | lpipb |
  600. * Pointer to index info
  601. *
  602. * @parm LPB | pWord |
  603. * Word to be searched for. This is a 2-byte preceded Pascal string
  604. *
  605. * @parm PWORDINFO | pWordInfo |
  606. * Pointer to word's info
  607. *
  608. * @rdesc
  609. * S_OK or other errors. In case of success, pWordInfo will
  610. * be filled with useful data
  611. *********************************************************************/
  612. PRIVATE HRESULT NEAR PASCAL AddWordToBTree (_LPIPB lpipb, LPB pWord,
  613. PWORDINFO pWordInfo)
  614. {
  615. int cLevel;
  616. LPB lpCurPtr;
  617. int nCmp;
  618. HRESULT fRet;
  619. WORD RecSize = 0;
  620. LPB lpMaxAddress;
  621. ERRB errb;
  622. PHRESULT phr = &errb;
  623. WORD wWlen;
  624. PNODEINFO pNodeInfo;
  625. PNODEINFO pChildNode;
  626. LPB pBTreeWord;
  627. int cMaxLevel;
  628. FILEOFFSET nodeOffset;
  629. PNODEINFO FAR *rgpNodeInfo = lpipb->BTreeData.rgpNodeInfo;
  630. OCCF occf = lpipb->occf;
  631. LONG dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
  632. #if 0
  633. Count++;
  634. if (STRNICMP(pWord+2, "approeeaching", 11) == 0 ||
  635. STRNICMP(pWord+2, "authenteeic", 11) == 0 ||
  636. STRNICMP(pWord+2, "eastleeand", 10) == 0)
  637. _asm int 3;
  638. #endif
  639. // Change to 0-based
  640. cMaxLevel = lpipb->BTreeData.Header.cIdxLevels - 1;
  641. // Remember the last level offset
  642. nodeOffset = rgpNodeInfo[0]->nodeOffset;
  643. /* Search in the stem nodes */
  644. for (cLevel = 0; cLevel < cMaxLevel ; cLevel++)
  645. {
  646. //
  647. // Set variables
  648. //
  649. pNodeInfo = rgpNodeInfo[cLevel];
  650. pChildNode = rgpNodeInfo[cLevel + 1];
  651. pChildNode->prevNodeOffset = foNil;
  652. pBTreeWord = pNodeInfo->pTmpResult;
  653. // Reload the node if neccessary
  654. if (!FoEquals(pNodeInfo->nodeOffset, nodeOffset))
  655. {
  656. if (pNodeInfo->fFlag == TO_BE_UPDATE)
  657. {
  658. if ((FileSeekWrite(lpipb->hfpbIdxFile, pNodeInfo->pBuffer,
  659. pNodeInfo->nodeOffset, dwBlockSize,
  660. &errb)) != (LONG)dwBlockSize)
  661. return(errb);
  662. }
  663. pNodeInfo->nodeOffset = nodeOffset;
  664. if ((fRet = ReadNewNode (lpipb->hfpbIdxFile, pNodeInfo,
  665. FALSE)) != S_OK)
  666. {
  667. return SetErrCode (phr, fRet);
  668. }
  669. pNodeInfo->fFlag = 0;
  670. }
  671. lpMaxAddress = pNodeInfo->pMaxAddress;
  672. lpCurPtr = pNodeInfo->pCurPtr; // points to the LAST ACCESSED word in the block
  673. // The format of the stem node
  674. // cbLeft | (Word | PointerToNode) | Slack
  675. while (lpCurPtr < lpMaxAddress - 1)
  676. {
  677. // Save the last location. This would be the insertion point for
  678. // update
  679. pNodeInfo->pCurPtr = lpCurPtr;
  680. // Reset the word length
  681. wWlen = 0;
  682. // Get the compressed word
  683. lpCurPtr = ExtractWord(pBTreeWord, lpCurPtr, &wWlen);
  684. /* Read in NodeId record */
  685. lpCurPtr += ReadFileOffset (&nodeOffset, lpCurPtr);
  686. if ((nCmp = StrCmpPascal2(pWord, pBTreeWord)) == 0)
  687. nCmp = (int)((WORD)pWordInfo->dwWordLen - wWlen );
  688. if (nCmp > 0)
  689. {
  690. // We didn't find the location of the word yet
  691. // Continue searching
  692. if (lpCurPtr < pNodeInfo->pMaxAddress - 1)
  693. {
  694. MEMCPY (pNodeInfo->pLastWord, pBTreeWord,
  695. *(LPUW)pBTreeWord + sizeof(WORD)); // erinfox RISC patch
  696. }
  697. pChildNode->prevNodeOffset = nodeOffset;
  698. continue;
  699. }
  700. // We found the location of the word
  701. break;
  702. }
  703. }
  704. // At this point, nodeOffset is the node id of the leaf that
  705. // is supposed to contain the searched word.
  706. pNodeInfo = rgpNodeInfo[cMaxLevel];
  707. if (!FoEquals(pNodeInfo->nodeOffset, nodeOffset))
  708. {
  709. if (pNodeInfo->fFlag == TO_BE_UPDATE)
  710. {
  711. if ((FileSeekWrite(lpipb->hfpbIdxFile, pNodeInfo->pBuffer,
  712. pNodeInfo->nodeOffset, dwBlockSize,
  713. phr)) != dwBlockSize)
  714. return(*phr);
  715. }
  716. pNodeInfo->nodeOffset = nodeOffset;
  717. if ((fRet = ReadNewNode (lpipb->hfpbIdxFile, pNodeInfo,
  718. TRUE)) != S_OK)
  719. {
  720. return SetErrCode (phr, fRet);
  721. }
  722. pNodeInfo->fFlag = 0;
  723. lpCurPtr = pNodeInfo->pCurPtr;
  724. }
  725. else
  726. {
  727. // Reset all data
  728. // lpCurPtr = pNodeInfo->pCurPtr = pNodeInfo->pBuffer + sizeof(WORD) + FOFFSET_SIZE;
  729. lpCurPtr = pNodeInfo->pCurPtr;
  730. }
  731. pBTreeWord = pNodeInfo->pTmpResult;
  732. lpMaxAddress = pNodeInfo->pMaxAddress;
  733. // Reset the last word
  734. *(LPWORD)pNodeInfo->pLastWord = 0;
  735. // Leaf node structure: *
  736. // (Word|FieldId|TopicCnt|PointerToNode|DataSize)*
  737. for (;;)
  738. {
  739. DWORD dwFieldId;
  740. // Save the last location. This would be the insertion point for
  741. // update
  742. pNodeInfo->pCurPtr = lpCurPtr;
  743. if (lpCurPtr >= lpMaxAddress)
  744. {
  745. // Add to the end of the node
  746. if ((fRet = WriteNewDataRecord (lpipb, pWordInfo)) != S_OK)
  747. return(fRet);
  748. return AddRecordToBTree (lpipb, pWord, pWordInfo, cMaxLevel, 0);
  749. }
  750. // Get the compressed word
  751. lpCurPtr = ExtractWord(pBTreeWord, lpCurPtr, &wWlen);
  752. // Get fieldif and topic count
  753. if (occf & OCCF_FIELDID)
  754. lpCurPtr += CbByteUnpack (&dwFieldId, lpCurPtr);
  755. lpCurPtr += CbByteUnpack (&pWordInfo->dwIndexTopicCount, lpCurPtr);
  756. // Get the data location and size
  757. lpCurPtr += ReadFileOffset (&pWordInfo->dataLocation, lpCurPtr);
  758. lpCurPtr += CbByteUnpack(&pWordInfo->dwDataSize, lpCurPtr);
  759. if ((nCmp = StrCmpPascal2(pWord, pBTreeWord)) == 0)
  760. {
  761. if (occf & OCCF_LENGTH)
  762. nCmp = (int)((WORD)pWordInfo->dwWordLen - wWlen);
  763. if (nCmp == 0 && (occf & OCCF_FIELDID))
  764. nCmp = (int)(pWordInfo->dwFieldId - dwFieldId);
  765. }
  766. if (nCmp > 0)
  767. {
  768. // We didn't find the location of the word yet
  769. // Continue searching
  770. MEMCPY (pNodeInfo->pLastWord, pBTreeWord,
  771. *(LPUW)pBTreeWord+sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
  772. continue;
  773. }
  774. if (nCmp == 0)
  775. {
  776. if ((fRet = UpdateDataNode (lpipb, pWordInfo)) != S_OK)
  777. return(fRet);
  778. return AddRecordToBTree (lpipb, pWord, pWordInfo, cMaxLevel,
  779. REPLACE_WORD_01);
  780. }
  781. else
  782. {
  783. if ((fRet = WriteNewDataRecord (lpipb, pWordInfo)) != S_OK)
  784. return(fRet);
  785. return AddRecordToBTree (lpipb, pWord, pWordInfo, cLevel, 0);
  786. }
  787. break;
  788. }
  789. return S_OK;
  790. }
  791. /*************************************************************************
  792. * @doc INTERNAL
  793. *
  794. * @func HRESULT PASCAL | ReadNewNode |
  795. * Read in a new node from the disk if it is not the top node.
  796. * For the top node, just reset various pointers
  797. *
  798. * @parm PNODEINFO | pNodeInfo |
  799. * Pointer to leaf info
  800. *
  801. * @parm int | fLeafNode|
  802. * TRUE if this is a leaf node
  803. *
  804. * @rdesc S_OK if succesful, otherwise other errors. On exit,
  805. * lpCurPtr wil point to the beginning of the 1st word in the
  806. * node
  807. *
  808. * @rcomm The format of the leaf node is different from a stem node
  809. * Stem node structure: *
  810. * CbLeft |* Word | PointerToNode *| Slack *
  811. * *
  812. * Leaf node structure: *
  813. * NxtBlkPtr|CbLeft|*Word|FieldId|TopicCnt|PointerToNode|DataSize*|Slack *
  814. * *
  815. *************************************************************************/
  816. PUBLIC HRESULT PASCAL FAR ReadNewNode (HFPB hfpb, PNODEINFO pNodeInfo,
  817. int fLeafNode)
  818. {
  819. ERRB errb;
  820. if (FileSeekRead (hfpb, pNodeInfo->pBuffer, pNodeInfo->nodeOffset,
  821. pNodeInfo->dwBlockSize, &errb) != (long)pNodeInfo->dwBlockSize)
  822. return E_BADFILE;
  823. pNodeInfo->pCurPtr = pNodeInfo->pBuffer;
  824. if (fLeafNode)
  825. {
  826. pNodeInfo->pCurPtr += ReadFileOffset (&pNodeInfo->nextNodeOffset,
  827. pNodeInfo->pBuffer);
  828. }
  829. else
  830. pNodeInfo->nextNodeOffset = foNil;
  831. pNodeInfo->cbLeft = *(LPUW)(pNodeInfo->pCurPtr); // erinfox RISC patch
  832. pNodeInfo->pCurPtr += sizeof(WORD);
  833. pNodeInfo->pMaxAddress = pNodeInfo->pBuffer + pNodeInfo->dwBlockSize -
  834. pNodeInfo->cbLeft;
  835. *(LPUW)(pNodeInfo->pLastWord) = *(LPUW)(pNodeInfo->pTmpResult) = 0;
  836. return S_OK;
  837. }
  838. PUBLIC HRESULT PASCAL FAR IndexOpenRW (_LPIPB lpipb, HFPB hfpbSysFile, LSZ lszFilename)
  839. {
  840. HFPB hfpb; // Handle to system file
  841. HRESULT fRet;
  842. ERRB errb;
  843. PHRESULT phr = &errb;
  844. PIH20 pHeader;
  845. int iIndex;
  846. LONG i;
  847. // Check the existence of the file
  848. if ((hfpb = FileOpen (hfpbSysFile, lszFilename,
  849. hfpbSysFile ? FS_SUBFILE : REGULAR_FILE, READ, phr)) == 0)
  850. {
  851. return *phr;
  852. }
  853. FileClose (hfpb);
  854. // Reopen the file for read/write
  855. lpipb->hfpbIdxFile = FileOpen (hfpbSysFile, lszFilename,
  856. hfpbSysFile ? FS_SUBFILE : REGULAR_FILE, READ_WRITE, phr);
  857. if ((fRet = ReadIndexHeader(lpipb->hfpbIdxFile,
  858. pHeader = &lpipb->BTreeData.Header)) != S_OK)
  859. {
  860. exit01:
  861. SetErrCode (phr, fRet);
  862. FileClose(lpipb->hfpbIdxFile);
  863. return fRet;
  864. }
  865. if (pHeader->version != VERCURRENT ||
  866. pHeader->FileStamp != INDEX_STAMP)
  867. {
  868. fRet = E_BADVERSION;
  869. goto exit01;
  870. }
  871. // incoming index and occurrence flags must match those in original index
  872. if (pHeader->occf != lpipb->occf ||
  873. pHeader->idxf != lpipb->idxf)
  874. {
  875. fRet = E_BADINDEXFLAGS;
  876. goto exit01;
  877. }
  878. // Update the compression key to be used by WriteDataNode later
  879. lpipb->cKey[CKEY_TOPIC_ID] = pHeader->ckeyTopicId;
  880. lpipb->cKey[CKEY_OCC_COUNT] = pHeader->ckeyOccCount;
  881. iIndex = CKEY_OCC_BASE;
  882. if (pHeader->occf & OCCF_COUNT)
  883. lpipb->cKey[iIndex++] = pHeader->ckeyWordCount;
  884. if (pHeader->occf & OCCF_OFFSET)
  885. lpipb->cKey[iIndex] = pHeader->ckeyOffset;
  886. // Update the maximum TopicId
  887. if (pHeader->dwMaxTopicId < lpipb->dwMaxTopicId)
  888. pHeader->dwMaxTopicId = lpipb->dwMaxTopicId;
  889. else
  890. lpipb->dwMaxTopicId = pHeader->dwMaxTopicId;
  891. // Get the file size.
  892. lpipb->foMaxOffset = FileSize (lpipb->hfpbIdxFile, phr);
  893. if (lpipb->idxf & IDXF_NORMALIZE)
  894. {
  895. // Load the sigma table
  896. if (FoEquals(pHeader->WeightTabOffset, foNil))
  897. {
  898. fRet = SetErrCode (phr, E_ASSERT);
  899. goto exit01;
  900. }
  901. if ((fRet = AllocSigmaTable (lpipb)) != S_OK)
  902. goto exit01;
  903. if (FileSeekRead (lpipb->hfpbIdxFile, lpipb->wi.hrgsigma,
  904. pHeader->WeightTabOffset, pHeader->WeightTabSize, phr) !=
  905. (LONG)pHeader->WeightTabSize)
  906. {
  907. fRet = errb;
  908. goto exit01;
  909. }
  910. if (lpipb->bState == DELETING_STATE)
  911. {
  912. // Square the sigma table
  913. // erinfox: off by one bug. change i = lpipb->dwMaxTopicId + 1
  914. // to lpipb->dwMaxTopicId because we have only allocated
  915. // (dwMaxTopicId + 1)*sizeof(float) bytes
  916. for (i = lpipb->dwMaxTopicId; i >= 0; i--)
  917. {
  918. lpipb->wi.hrgsigma[i] = lpipb->wi.hrgsigma[i] *
  919. lpipb->wi.hrgsigma[i];
  920. }
  921. }
  922. }
  923. /* ERIC */
  924. // Load or create a freelist (dwSize = 0)
  925. if (lpipb->bState == UPDATING_STATE)
  926. {
  927. if (pHeader->dwFreeListSize) // If a freelist is existing, read it, otherwise, create it.
  928. {
  929. LPBYTE lpbFreeList;
  930. if (pHeader->dwFreeListSize & 0x80000000)
  931. {
  932. pHeader->dwFreeListSize &= 0x7FFFFFFF;
  933. lpipb->foMaxOffset = FoSubFo(lpipb->foMaxOffset,MakeFo(pHeader->dwFreeListSize,0));
  934. }
  935. if(!(lpbFreeList = (LPBYTE) _GLOBALALLOCPTR(DLLGMEM_ZEROINIT, pHeader->dwFreeListSize)))
  936. {
  937. fRet = SetErrCode (phr, E_OUTOFMEMORY);
  938. goto exit01;
  939. }
  940. FileSeekRead (lpipb->hfpbIdxFile, (LPBYTE)lpbFreeList,
  941. pHeader->foFreeListOffset, pHeader->dwFreeListSize, phr);
  942. lpipb->hFreeList = FreeListInitFromMem(lpbFreeList, phr );
  943. _GLOBALFREEPTR(lpbFreeList);
  944. }
  945. else
  946. lpipb->hFreeList = FreeListInit( wDefaultFreeListSize, phr);
  947. }
  948. return S_OK;
  949. }
  950. PRIVATE PASCAL NEAR AddRecordToBTree (_LPIPB lpipb, LPB pWord,
  951. PWORDINFO pWordInfo, int cLevel, int fFlag)
  952. {
  953. PNODEINFO pNodeInfo;
  954. PNODEINFO pTmpNodeInfo;
  955. LPB pInsertPtr; // Pointer to insertion point
  956. LPB pWordStorage;
  957. LPB pLastWord;
  958. LPB pBuffer;
  959. BYTE fIsStemNode;
  960. WORD wWLen;
  961. WORD wNewRecSize; // New record size
  962. LONG cbByteMoved; // Number of bytes moved to leave room for new rec
  963. OCCF occf = lpipb->occf; // Occurrence field flags
  964. BYTE fLength = occf & OCCF_LENGTH;
  965. WORD cbLeft; // How many byte left in the current node?
  966. LONG dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
  967. BYTE cbSkip;
  968. BYTE fEndNode;
  969. ERRB errb;
  970. if (cLevel == -1)
  971. {
  972. // The tree's level has increased by one
  973. int i;
  974. if (lpipb->BTreeData.Header.cIdxLevels >= MAX_TREE_HEIGHT - 1)
  975. return E_TREETOOBIG;
  976. /* Move down the entries to make room for the top node */
  977. for (i = lpipb->BTreeData.Header.cIdxLevels; i > 0 ; i-- )
  978. {
  979. lpipb->BTreeData.rgpNodeInfo[i] = lpipb->BTreeData.rgpNodeInfo[i-1];
  980. lpipb->BTreeData.rgpTmpNodeInfo[i] = lpipb->BTreeData.rgpTmpNodeInfo[i-1];
  981. }
  982. // Increase tree level
  983. lpipb->BTreeData.Header.cIdxLevels ++;
  984. if ((pNodeInfo = lpipb->BTreeData.rgpNodeInfo[0] = AllocBTreeNode (lpipb)) == NULL)
  985. return(E_OUTOFMEMORY);
  986. if ((pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[0] = AllocBTreeNode (lpipb)) == NULL)
  987. return(E_OUTOFMEMORY);
  988. pWordStorage = (pBuffer = pNodeInfo->pBuffer) + sizeof(WORD);
  989. if (fFlag & USE_BOTH_NODE_40)
  990. {
  991. if (fFlag & USE_TEMP_FOR_RIGHT_NODE_10)
  992. {
  993. // Link to the left child node
  994. pWordStorage += PrefixCompressWord (pWordStorage,
  995. lpipb->BTreeData.rgpNodeInfo[1]->pTmpResult,
  996. EmptyWord, fLength);
  997. pWordStorage += CopyFileOffset (pWordStorage,
  998. lpipb->BTreeData.rgpNodeInfo[1]->nodeOffset);
  999. // Link to the right child node
  1000. pWordStorage += PrefixCompressWord (pWordStorage,
  1001. lpipb->BTreeData.rgpTmpNodeInfo[1]->pTmpResult,
  1002. lpipb->BTreeData.rgpNodeInfo[1]->pTmpResult, fLength);
  1003. pWordStorage += CopyFileOffset (pWordStorage,
  1004. lpipb->BTreeData.rgpTmpNodeInfo[1]->nodeOffset);
  1005. }
  1006. else
  1007. {
  1008. // Link to the left child node
  1009. pWordStorage += PrefixCompressWord (pWordStorage,
  1010. lpipb->BTreeData.rgpTmpNodeInfo[1]->pTmpResult,
  1011. EmptyWord, fLength);
  1012. pWordStorage += CopyFileOffset (pWordStorage,
  1013. lpipb->BTreeData.rgpTmpNodeInfo[1]->nodeOffset);
  1014. // Link to the right child node
  1015. pWordStorage += PrefixCompressWord (pWordStorage,
  1016. lpipb->BTreeData.rgpNodeInfo[1]->pTmpResult,
  1017. lpipb->BTreeData.rgpTmpNodeInfo[1]->pTmpResult, fLength);
  1018. pWordStorage += CopyFileOffset (pWordStorage,
  1019. lpipb->BTreeData.rgpNodeInfo[1]->nodeOffset);
  1020. }
  1021. }
  1022. else
  1023. {
  1024. // Link to the right child node
  1025. pWordStorage += PrefixCompressWord (pWordStorage,
  1026. pWord, EmptyWord, fLength);
  1027. pWordStorage += CopyFileOffset (pWordStorage,
  1028. lpipb->BTreeData.rgpTmpNodeInfo[1]->nodeOffset);
  1029. }
  1030. // Set all the parameter
  1031. pNodeInfo->pCurPtr = pBuffer + sizeof(WORD);
  1032. pNodeInfo->cbLeft = (LONG)(pBuffer - pWordStorage + dwBlockSize);
  1033. pNodeInfo->pMaxAddress = pBuffer + dwBlockSize - pNodeInfo->cbLeft;
  1034. SETWORD(pBuffer, (WORD)pNodeInfo->cbLeft);
  1035. // Write out the new node
  1036. if ((FileSeekWrite(lpipb->hfpbIdxFile, pBuffer,
  1037. lpipb->foMaxOffset, dwBlockSize, &errb)) != (LONG)dwBlockSize)
  1038. return(errb);
  1039. // Remember the offset of this node
  1040. // Set the pointer to the top stem node
  1041. lpipb->BTreeData.Header.foIdxRoot = pNodeInfo->nodeOffset =
  1042. lpipb->foMaxOffset;
  1043. lpipb->BTreeData.Header.nidIdxRoot = pNodeInfo->nodeOffset.dwOffset;
  1044. lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwBlockSize);
  1045. #if 0
  1046. return CheckStemNode (pNodeInfo);
  1047. #else
  1048. return(S_OK);
  1049. #endif
  1050. }
  1051. // Initialize data
  1052. pNodeInfo = lpipb->BTreeData.rgpNodeInfo[cLevel];
  1053. pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[cLevel];
  1054. pLastWord = pNodeInfo->pLastWord;
  1055. pBuffer = pNodeInfo->pBuffer;
  1056. if (fIsStemNode = (cLevel < lpipb->BTreeData.Header.cIdxLevels - 1))
  1057. cbSkip = sizeof(WORD);
  1058. else
  1059. cbSkip = sizeof(WORD) + FOFFSET_SIZE;
  1060. fEndNode = (pNodeInfo->pCurPtr >= pNodeInfo->pMaxAddress);
  1061. // Calculate how many byte left are there in the old node
  1062. pInsertPtr = pNodeInfo->pCurPtr; // Pointer to insertion point
  1063. cbLeft = (WORD)pNodeInfo->cbLeft;
  1064. // Handle special simple cases
  1065. if (fFlag & UPDATE_NODE_ADDRESS_08)
  1066. {
  1067. // Skip the next word
  1068. pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
  1069. pInsertPtr, &wWLen);
  1070. if (fFlag & USE_TEMP_NODE_04)
  1071. {
  1072. CopyFileOffset (pInsertPtr,
  1073. lpipb->BTreeData.rgpTmpNodeInfo[cLevel + 1]->nodeOffset);
  1074. }
  1075. else
  1076. {
  1077. CopyFileOffset (pInsertPtr,
  1078. lpipb->BTreeData.rgpNodeInfo[cLevel + 1]->nodeOffset);
  1079. }
  1080. #if 0
  1081. return(fIsStemNode ? CheckStemNode (pNodeInfo) :
  1082. CheckLeafNode (pNodeInfo, occf));
  1083. #else
  1084. return(S_OK);
  1085. #endif
  1086. }
  1087. if (fFlag & (REPLACE_WORD_01 | SKIP_NEXT_WORD_20))
  1088. {
  1089. // We get more room from the replaced word
  1090. DWORD dwTemp;
  1091. // Skip the next word
  1092. if (fFlag & SKIP_NEXT_WORD_20)
  1093. {
  1094. pInsertPtr = ExtractWord(pLastWord, pInsertPtr, &wWLen);
  1095. }
  1096. else
  1097. {
  1098. pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
  1099. pInsertPtr, &wWLen);
  1100. }
  1101. // Skip the data
  1102. if (fIsStemNode)
  1103. pInsertPtr += FOFFSET_SIZE;
  1104. else
  1105. {
  1106. // Skip field id, topic count. fileoffset, datasize
  1107. if (occf & OCCF_FIELDID)
  1108. pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr); // FieldId
  1109. if (occf & OCCF_TOPICID)
  1110. {
  1111. pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
  1112. pInsertPtr += FOFFSET_SIZE;
  1113. pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
  1114. }
  1115. }
  1116. if (fFlag & SKIP_NEXT_WORD_20)
  1117. pNodeInfo->pCurPtr = pInsertPtr;
  1118. else
  1119. {
  1120. // Remove the old data
  1121. MEMMOVE (pNodeInfo->pCurPtr, pInsertPtr,
  1122. cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr));
  1123. pNodeInfo->pMaxAddress =
  1124. (pInsertPtr = pNodeInfo->pCurPtr) + cbByteMoved;
  1125. cbLeft = (WORD)(dwBlockSize - (pNodeInfo->pMaxAddress - pBuffer));
  1126. }
  1127. if (pInsertPtr >= pNodeInfo->pMaxAddress)
  1128. fEndNode = TRUE;
  1129. }
  1130. //Calculate the approximate number of bytes needed for the
  1131. // new data by compress it to the temporary block
  1132. if (fIsStemNode)
  1133. {
  1134. if (pInsertPtr <= pNodeInfo->pBuffer + sizeof(WORD))
  1135. {
  1136. // This is the first word, there is no previous one
  1137. *(LPWORD)pLastWord = 0;
  1138. }
  1139. wNewRecSize = (WORD) CopyNewDataToStemNode (lpipb, pTmpNodeInfo,
  1140. pWord, pLastWord, cLevel, fFlag);
  1141. }
  1142. else
  1143. {
  1144. if (pInsertPtr <= pNodeInfo->pBuffer + sizeof(WORD) + FOFFSET_SIZE)
  1145. {
  1146. // This is the first word, there is no previous one
  1147. *(LPWORD)pLastWord = 0;
  1148. }
  1149. wNewRecSize = (WORD) CopyNewDataToLeafNode (lpipb, pTmpNodeInfo,
  1150. pWordInfo, pWord, pLastWord);
  1151. }
  1152. wNewRecSize -= cbSkip;
  1153. // I reserved about 4 byte to ensure that when we have enough room
  1154. // we do have enough room. Compression may change the size of the
  1155. // record, causing us to run out of room when copying the new data
  1156. // over
  1157. if (cbLeft - sizeof(DWORD) > wNewRecSize)
  1158. {
  1159. // We have enough room for the new data. Just insert the new data
  1160. pWordStorage = pTmpNodeInfo->pCurPtr;
  1161. if (!fEndNode)
  1162. {
  1163. // We need to recompress the next word
  1164. MEMCPY (pTmpNodeInfo->pTmpResult, pWord,
  1165. *(LPUW)pWord + sizeof(WORD) + sizeof(WORD)); //erinfox RISC patch
  1166. pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
  1167. pInsertPtr, &wWLen);
  1168. cbByteMoved = PrefixCompressWord (pWordStorage,
  1169. pTmpNodeInfo->pTmpResult, pWord, fLength);
  1170. wNewRecSize += (WORD)cbByteMoved;
  1171. // Reset the last word for pBTreeWord
  1172. MEMCPY (pNodeInfo->pTmpResult, pLastWord,
  1173. *(LPUW)pLastWord + sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
  1174. }
  1175. // Make room for the new data
  1176. if ((cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr)) <= 0)
  1177. cbByteMoved = 0;
  1178. else
  1179. MEMMOVE(pNodeInfo->pCurPtr + wNewRecSize, pInsertPtr,
  1180. cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr));
  1181. // Copy the new data
  1182. MEMCPY (pNodeInfo->pCurPtr, pTmpNodeInfo->pBuffer + cbSkip,
  1183. wNewRecSize);
  1184. // Update data
  1185. pNodeInfo->pMaxAddress = pNodeInfo->pCurPtr + wNewRecSize +
  1186. cbByteMoved;
  1187. pNodeInfo->cbLeft = cbLeft =
  1188. (WORD)(dwBlockSize - (pNodeInfo->pMaxAddress - pBuffer));
  1189. SETWORD(pNodeInfo->pBuffer + cbSkip - sizeof(WORD),
  1190. (WORD)cbLeft);
  1191. pNodeInfo->fFlag = TO_BE_UPDATE;
  1192. // Change the parent node
  1193. if (fEndNode && cLevel)
  1194. {
  1195. return (AddRecordToBTree (lpipb, pWord, pWordInfo, cLevel - 1,
  1196. REPLACE_WORD_01));
  1197. }
  1198. #if 0
  1199. return(fIsStemNode ? CheckStemNode (pNodeInfo) :
  1200. CheckLeafNode (pNodeInfo, occf));
  1201. #else
  1202. return(S_OK);
  1203. #endif
  1204. return S_OK;
  1205. }
  1206. // Case 3: Add to the middle. This is a complex one, since we have
  1207. // to split the node into 2.
  1208. return(SplitNodeAndAddData (lpipb, pWord, pWordInfo, cLevel, fFlag,
  1209. fIsStemNode));
  1210. }
  1211. PRIVATE int PASCAL NEAR SplitNodeAndAddData (_LPIPB lpipb, LPB pWord,
  1212. PWORDINFO pWordInfo, int cLevel, int fFlag, int fIsStemNode)
  1213. {
  1214. PNODEINFO pNodeInfo;
  1215. PNODEINFO pTmpNodeInfo;
  1216. LONG cbByteMoved;
  1217. WORD leftSize;
  1218. WORD rightSize;
  1219. WORD wWLen;
  1220. LPB pInsertPtr;
  1221. LPB pWordStorage;
  1222. int cbSkip;
  1223. DWORD dwBlockSize;
  1224. HRESULT fRet;
  1225. BYTE fLength = lpipb->occf & OCCF_LENGTH;
  1226. LPB pLastWord;
  1227. LPB pTemp;
  1228. LPB pBuffer;
  1229. if (fIsStemNode)
  1230. cbSkip = 0;
  1231. else
  1232. cbSkip = FOFFSET_SIZE;
  1233. // Variable initialization
  1234. pNodeInfo = lpipb->BTreeData.rgpNodeInfo[cLevel];
  1235. pBuffer = pNodeInfo->pBuffer;
  1236. pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[cLevel];
  1237. pInsertPtr = pNodeInfo->pCurPtr;
  1238. dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
  1239. pLastWord = pNodeInfo->pLastWord;
  1240. // Calculate approximately the left & right side node sizes
  1241. leftSize = (WORD)(pInsertPtr - pBuffer - cbSkip - sizeof(WORD));
  1242. rightSize = (WORD)(pNodeInfo->pMaxAddress - pNodeInfo->pCurPtr);
  1243. if (leftSize >= rightSize)
  1244. {
  1245. // We add to the right. The new data will be 1st
  1246. // Example:
  1247. // Add 4 into 1 2 3 5 --> 1 2 3 and 4 5
  1248. if (fIsStemNode)
  1249. {
  1250. CopyNewDataToStemNode (lpipb, pTmpNodeInfo,
  1251. pWord, EmptyWord, cLevel, fFlag);
  1252. pTemp = pTmpNodeInfo->pBuffer + sizeof(WORD);
  1253. }
  1254. else
  1255. {
  1256. CopyNewDataToLeafNode (lpipb, pTmpNodeInfo,
  1257. pWordInfo, pWord, EmptyWord);
  1258. pTemp = pTmpNodeInfo->pBuffer + sizeof(WORD) +
  1259. FOFFSET_SIZE;
  1260. }
  1261. pWordStorage = pTmpNodeInfo->pCurPtr;
  1262. // Move back the pointer to the beginning of the word
  1263. // for future reference
  1264. pTmpNodeInfo->pCurPtr = pTemp;
  1265. if (rightSize > 0)
  1266. {
  1267. // Extract the word on the right of the insertion point
  1268. MEMCPY (pTmpNodeInfo->pTmpResult, pWord,
  1269. *(LPUW)pWord + sizeof(WORD)); // erinfox RISC patch
  1270. pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
  1271. pInsertPtr, &wWLen);
  1272. pWordStorage += PrefixCompressWord (pWordStorage,
  1273. pTmpNodeInfo->pTmpResult, pWord, fLength);
  1274. // Copy data on the right of the insertion point to the new node
  1275. MEMCPY (pWordStorage, pInsertPtr,
  1276. cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr));
  1277. pWordStorage += cbByteMoved;
  1278. }
  1279. pTmpNodeInfo->pMaxAddress = pWordStorage;
  1280. // Update the right node
  1281. SETWORD(pTmpNodeInfo->pBuffer + cbSkip,
  1282. (WORD)(pTmpNodeInfo->cbLeft =
  1283. (LONG)(dwBlockSize - (pWordStorage - pTmpNodeInfo->pBuffer))));
  1284. pTmpNodeInfo->pMaxAddress = pTmpNodeInfo->pBuffer +
  1285. dwBlockSize - pTmpNodeInfo->cbLeft;
  1286. #if 0
  1287. if (fIsStemNode)
  1288. CheckStemNode (pTmpNodeInfo);
  1289. else
  1290. CheckLeafNode (pTmpNodeInfo, lpipb->occf);
  1291. #endif
  1292. MEMSET (pWordStorage, 0, pTmpNodeInfo->cbLeft);
  1293. if ((fRet = CreateNewNode (lpipb, cLevel,
  1294. fIsStemNode, NEW_NODE_ON_RIGHT)) != S_OK)
  1295. return(fRet);
  1296. // Update the left node
  1297. pNodeInfo->fFlag = TO_BE_UPDATE;
  1298. SETWORD(pBuffer + cbSkip, (WORD)(pNodeInfo->cbLeft =
  1299. (LONG)(dwBlockSize - (pNodeInfo->pCurPtr - pBuffer))));
  1300. #ifdef _DEBUG
  1301. MEMSET (pNodeInfo->pCurPtr, 0, pNodeInfo->cbLeft);
  1302. #endif
  1303. pNodeInfo->pMaxAddress = pBuffer + dwBlockSize - pNodeInfo->cbLeft;
  1304. pNodeInfo->fFlag = TO_BE_UPDATE;
  1305. #if 0
  1306. if (fIsStemNode)
  1307. CheckStemNode (pNodeInfo);
  1308. else
  1309. CheckLeafNode (pNodeInfo, lpipb->occf);
  1310. #endif
  1311. if (cLevel == 0)
  1312. {
  1313. if (pNodeInfo->pCurPtr >= pNodeInfo->pMaxAddress - 1)
  1314. pNodeInfo->pCurPtr = pNodeInfo->pBuffer + cbSkip + sizeof(WORD);
  1315. GetLastWordInNode (lpipb, pNodeInfo, fIsStemNode);
  1316. GetLastWordInNode (lpipb, pTmpNodeInfo, fIsStemNode);
  1317. return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
  1318. USE_BOTH_NODE_40 | USE_TEMP_FOR_RIGHT_NODE_10);
  1319. }
  1320. if (rightSize > 0)
  1321. {
  1322. if ((fRet = AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
  1323. USE_TEMP_NODE_04 | UPDATE_NODE_ADDRESS_08)) != S_OK)
  1324. return fRet;
  1325. return AddRecordToBTree (lpipb, pNodeInfo->pLastWord, NULL, cLevel - 1, 0);
  1326. }
  1327. if (fFlag & REPLACE_WORD_01)
  1328. {
  1329. // rightSize == 0 means that we are adding to the end of the block.
  1330. // REPLACE_WORD means that we are replacing the same word, so basically
  1331. // we have to add a new entry for the left block
  1332. if ((fRet = AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
  1333. USE_TEMP_NODE_04 | REPLACE_WORD_01)) != S_OK)
  1334. return fRet;
  1335. return AddRecordToBTree (lpipb, pNodeInfo->pLastWord, NULL,
  1336. cLevel - 1, 0);
  1337. }
  1338. // Add to the end
  1339. return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
  1340. USE_TEMP_NODE_04 | SKIP_NEXT_WORD_20);
  1341. }
  1342. //**********************************************
  1343. //
  1344. // Add the new data to the end of the leftnode
  1345. //
  1346. //**********************************************
  1347. // We add to the left. The new data will be last
  1348. // Example:
  1349. // Add 2 into 1 3 4 5 --> 1 2 and 3 4 5
  1350. pTmpNodeInfo->pCurPtr = pWordStorage =
  1351. pTmpNodeInfo->pBuffer + cbSkip + sizeof(WORD);
  1352. // Copy the data on the left to the new node
  1353. if (cbByteMoved = leftSize)
  1354. {
  1355. MEMCPY(pWordStorage, pBuffer + cbSkip + sizeof(WORD), cbByteMoved);
  1356. pWordStorage += cbByteMoved;
  1357. }
  1358. // Emit new data
  1359. pWordStorage += PrefixCompressWord (pWordStorage,
  1360. pWord, pLastWord, lpipb->occf & OCCF_LENGTH);
  1361. if (fIsStemNode)
  1362. {
  1363. if (fFlag & USE_TEMP_NODE_04)
  1364. {
  1365. pWordStorage += CopyFileOffset (pWordStorage,
  1366. lpipb->BTreeData.rgpTmpNodeInfo[cLevel+1]->nodeOffset);
  1367. }
  1368. else
  1369. {
  1370. pWordStorage += CopyFileOffset (pWordStorage,
  1371. lpipb->BTreeData.rgpNodeInfo[cLevel+1]->nodeOffset);
  1372. }
  1373. }
  1374. else
  1375. {
  1376. // Emit field id, topic count. fileoffset, datasize
  1377. if (lpipb->occf & OCCF_FIELDID)
  1378. pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwFieldId);
  1379. pWordStorage += CbBytePack (pWordStorage,
  1380. pWordInfo->dwMergeTopicCount);
  1381. pWordStorage += CopyFileOffset (pWordStorage, pWordInfo->dataLocation);
  1382. pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwDataSize);
  1383. }
  1384. SETWORD (pTmpNodeInfo->pBuffer + cbSkip,
  1385. (WORD)(pTmpNodeInfo->cbLeft = (LONG)(pNodeInfo->dwBlockSize
  1386. - (pWordStorage - pTmpNodeInfo ->pBuffer))));
  1387. pTmpNodeInfo->pMaxAddress = pWordStorage;
  1388. if ((fRet = CreateNewNode (lpipb, cLevel, fIsStemNode,
  1389. NEW_NODE_ON_LEFT)) != S_OK)
  1390. return(fRet);
  1391. // Update the right node
  1392. if (leftSize > 0)
  1393. {
  1394. MEMMOVE(pNodeInfo->pCurPtr = pBuffer + cbSkip + sizeof(WORD),
  1395. pInsertPtr, (size_t)(pNodeInfo->pMaxAddress - pInsertPtr));
  1396. pNodeInfo->pMaxAddress -= cbByteMoved;
  1397. // Reconstruct the 1st word in the node.
  1398. if (fFlag & REPLACE_WORD_01)
  1399. {
  1400. MEMCPY (pTmpNodeInfo->pTmpResult, pWord,
  1401. *(LPUW)pWord + sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
  1402. }
  1403. else
  1404. {
  1405. MEMCPY (pTmpNodeInfo->pTmpResult, pLastWord,
  1406. *(LPUW)pLastWord + sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
  1407. }
  1408. }
  1409. pInsertPtr = pNodeInfo->pCurPtr;
  1410. pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult, pTemp = pInsertPtr, &wWLen);
  1411. cbByteMoved = (LONG)(pInsertPtr - pTemp);
  1412. // Recompress the word using pLastWord of pTmpNodeInfo
  1413. wWLen = (WORD) PrefixCompressWord (pTmpNodeInfo->pLastWord,
  1414. pTmpNodeInfo->pTmpResult, EmptyWord, fLength);
  1415. // Reserved room for the word
  1416. pWordStorage = pBuffer + cbSkip + sizeof(WORD);
  1417. MEMMOVE (pWordStorage + wWLen, pInsertPtr,
  1418. (size_t)(pNodeInfo->pMaxAddress - pInsertPtr));
  1419. // Copy down the word
  1420. MEMCPY(pWordStorage, pTmpNodeInfo->pLastWord, wWLen);
  1421. pNodeInfo->pMaxAddress += wWLen - cbByteMoved;
  1422. // Update the right node
  1423. SETWORD(pBuffer + cbSkip,
  1424. (WORD)(pNodeInfo->cbLeft =(WORD)(dwBlockSize -
  1425. (pNodeInfo->pMaxAddress - pBuffer))));
  1426. pNodeInfo->fFlag = TO_BE_UPDATE;
  1427. #ifdef _DEBUG
  1428. MEMSET (pNodeInfo->pMaxAddress, 0, pNodeInfo->cbLeft);
  1429. #endif
  1430. if (cLevel == 0)
  1431. {
  1432. GetLastWordInNode (lpipb, pNodeInfo, fIsStemNode);
  1433. GetLastWordInNode (lpipb, pTmpNodeInfo, fIsStemNode);
  1434. return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
  1435. USE_BOTH_NODE_40);
  1436. }
  1437. return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
  1438. USE_TEMP_NODE_04);
  1439. return(fRet);
  1440. }
  1441. VOID GetLastWordInNode (_LPIPB lpipb, PNODEINFO pNodeInfo, BOOL fIsStemNode)
  1442. {
  1443. LPB pInsertPtr = pNodeInfo->pCurPtr;
  1444. LPB pMaxAddress = pNodeInfo->pMaxAddress;
  1445. WORD wWLen;
  1446. DWORD dwTemp;
  1447. MEMCPY (pNodeInfo->pTmpResult, EmptyWord, 4);
  1448. while (pInsertPtr < pNodeInfo->pMaxAddress - 1)
  1449. {
  1450. pInsertPtr = ExtractWord(pNodeInfo->pTmpResult, pInsertPtr, &wWLen);
  1451. if (!fIsStemNode)
  1452. {
  1453. if (lpipb->occf & OCCF_FIELDID)
  1454. pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
  1455. if (lpipb->occf & OCCF_TOPICID)
  1456. pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);// Topic count
  1457. }
  1458. pInsertPtr += FOFFSET_SIZE; // FileOffset
  1459. if (!fIsStemNode)
  1460. pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
  1461. }
  1462. }
  1463. PRIVATE HRESULT PASCAL NEAR CreateNewNode(_LPIPB lpipb, int cLevel,
  1464. int fIsStemNode, int fAfter)
  1465. {
  1466. PNODEINFO pNodeInfo;
  1467. PNODEINFO pTmpNodeInfo;
  1468. ERRB errb;
  1469. LONG dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
  1470. pNodeInfo = lpipb->BTreeData.rgpNodeInfo[cLevel];
  1471. pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[cLevel];
  1472. #ifdef _DEBUG
  1473. dwNewNodeSize += dwBlockSize;
  1474. #endif
  1475. if (!fIsStemNode)
  1476. {
  1477. // Add the new node into the linked list
  1478. if (fAfter)
  1479. CopyFileOffset (pTmpNodeInfo->pBuffer, pNodeInfo->nextNodeOffset);
  1480. else
  1481. CopyFileOffset (pTmpNodeInfo->pBuffer, pNodeInfo->nodeOffset);
  1482. }
  1483. // Write out the new node
  1484. if ((FileSeekWrite(lpipb->hfpbIdxFile, pTmpNodeInfo->pBuffer,
  1485. lpipb->foMaxOffset, dwBlockSize, &errb)) != (LONG)dwBlockSize)
  1486. return(errb);
  1487. // Remember the offset of this node
  1488. pTmpNodeInfo->nodeOffset = lpipb->foMaxOffset;
  1489. if (!fIsStemNode)
  1490. {
  1491. if (fAfter)
  1492. {
  1493. CopyFileOffset (pNodeInfo->pBuffer, lpipb->foMaxOffset);
  1494. pNodeInfo->fFlag = TO_BE_UPDATE;
  1495. }
  1496. else
  1497. {
  1498. // Update the previous link
  1499. if (!FoEquals(pNodeInfo->prevNodeOffset, foNil))
  1500. {
  1501. BYTE TempBuf[FOFFSET_SIZE + 1];
  1502. CopyFileOffset (TempBuf,lpipb->foMaxOffset);
  1503. if ((FileSeekWrite(lpipb->hfpbIdxFile, TempBuf,
  1504. pNodeInfo->prevNodeOffset, FOFFSET_SIZE,
  1505. &errb)) != FOFFSET_SIZE)
  1506. return(errb);
  1507. }
  1508. }
  1509. }
  1510. lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwBlockSize);
  1511. return(S_OK);
  1512. }
  1513. PRIVATE HRESULT PASCAL NEAR WriteNewDataRecord (_LPIPB lpipb, PWORDINFO pWordInfo)
  1514. {
  1515. PFILEDATA pOutFile = &lpipb->OutFile;
  1516. DWORD dwBlockSize;
  1517. ERRB errb;
  1518. HRESULT fRet;
  1519. FREEBLOCK FreeBlock;
  1520. // Reset the characteristic of the file
  1521. pOutFile->pCurrent = pOutFile->pMem;
  1522. pOutFile->cbLeft = pOutFile->dwMax;
  1523. pOutFile->ibit = cbitBYTE - 1;
  1524. FileSeek (pOutFile->fFile,
  1525. pOutFile->foPhysicalOffset = foNil, 0, &errb);
  1526. // Write out the data into the temp file
  1527. if ((dwBlockSize = WriteDataNode (lpipb,
  1528. pWordInfo->dwMergeTopicCount = pWordInfo->dwNewTopicCount, &errb)) == 0)
  1529. return errb;
  1530. // Write out the output buffer
  1531. if (FileWrite (pOutFile->fFile, pOutFile->pMem,
  1532. (LONG)(pOutFile->pCurrent - pOutFile->pMem), &errb) !=
  1533. (LONG) (pOutFile->pCurrent - pOutFile->pMem))
  1534. return(errb);
  1535. // if ((errb.err = FileFlush (pOutFile->fFile)) != S_OK)
  1536. // return(errb.err);
  1537. pWordInfo->dwDataSize = dwBlockSize;
  1538. // Find the smallest free block that fits the new data
  1539. if (GetFreeBlock (lpipb, &FreeBlock, dwBlockSize) != S_OK)
  1540. {
  1541. #ifdef _DEBUGFREE
  1542. _DPF2("GetFreeBlock failed. Requested %ld bytes, appending to EOF(%ld)\n", dwBlockSize, lpipb->foMaxOffset.dwOffset);
  1543. #endif
  1544. // There is no free block large enough to store the data
  1545. if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
  1546. lpipb->foMaxOffset, dwBlockSize)) != S_OK)
  1547. return fRet;
  1548. pWordInfo->dataLocation = lpipb->foMaxOffset;
  1549. lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwBlockSize);
  1550. #ifdef _DEBUG
  1551. dwNewDataSize += dwBlockSize;
  1552. #endif
  1553. return(S_OK);
  1554. }
  1555. // There is a free block large enough to store the data
  1556. if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
  1557. FreeBlock.foBlockOffset, dwBlockSize)) != S_OK)
  1558. return fRet;
  1559. pWordInfo->dataLocation = FreeBlock.foBlockOffset;
  1560. return S_OK;
  1561. }
  1562. // erinfox: return a block from the free list if possible
  1563. PRIVATE HRESULT GetFreeBlock (_LPIPB lpipb, PFREEBLOCK pFreeBlock,
  1564. DWORD dwBlockSize)
  1565. {
  1566. FILEOFFSET foFreeListOffset;
  1567. ERRB errb;
  1568. // if it can't find a free block, it returns an error
  1569. foFreeListOffset = FreeListGetBestFit(lpipb->hFreeList, MakeFo(dwBlockSize,0), &errb);
  1570. if (FoIsNil(foFreeListOffset))
  1571. {
  1572. return errb;
  1573. }
  1574. pFreeBlock->foBlockOffset = foFreeListOffset;
  1575. return S_OK;
  1576. }
  1577. PRIVATE HRESULT PASCAL NEAR CopyBlockFile (PFILEDATA pFileData, HFPB hfpbDest,
  1578. FILEOFFSET foOffset, DWORD dwBlockSize)
  1579. {
  1580. LONG cbCopied;
  1581. ERRB errb;
  1582. // Initialize variable
  1583. errb = S_OK;
  1584. // Seek to the right locations
  1585. FileSeek (pFileData->fFile, foNil, 0, &errb);
  1586. if (errb != S_OK)
  1587. return(errb);
  1588. FileSeek (hfpbDest, foOffset, 0, &errb);
  1589. if (errb != S_OK)
  1590. return(errb);
  1591. // Do the copy
  1592. while (dwBlockSize)
  1593. {
  1594. if ((cbCopied = dwBlockSize) > pFileData->dwMax)
  1595. cbCopied = pFileData->dwMax;
  1596. if (FileRead (pFileData->fFile, pFileData->pMem, cbCopied, &errb) !=
  1597. cbCopied)
  1598. return(E_FILEREAD);
  1599. if (FileWrite(hfpbDest, pFileData->pMem, cbCopied, &errb) != cbCopied)
  1600. return(E_FILEWRITE);
  1601. dwBlockSize -= cbCopied;
  1602. }
  1603. return(S_OK);
  1604. }
  1605. PRIVATE HRESULT PASCAL NEAR UpdateDataNode (_LPIPB lpipb, PWORDINFO pWordInfo)
  1606. {
  1607. // Local replacement Variables
  1608. PBTREEDATA pTreeData = &lpipb->BTreeData;
  1609. PFILEDATA pOutFile = &lpipb->OutFile; // Output data structure
  1610. PFILEDATA pInFile = &lpipb->InFile; // Input data structre
  1611. HFPB fFile = pOutFile->fFile; // Output file handle
  1612. PNODEINFO pIndexDataNode = lpipb->pIndexDataNode;
  1613. DWORD dwNewDataSize;
  1614. ERRB errb;
  1615. // Working Variables
  1616. DWORD dwEncodedSize = 0; // Size of encoded block
  1617. DWORD dwTopicIdDelta; // Really only used for weight values
  1618. DWORD dwNewTopicId = 0;
  1619. DWORD dwIndexTopicId = 0;
  1620. DWORD dwNewTopicCount;
  1621. DWORD dwIndexTopicCount;
  1622. DWORD dwTopicCount;
  1623. FILEOFFSET foStart; // Physical beginning of bit compression block
  1624. WORD wWeight = 0; // Only used when IDXF_NORMALIZE is set
  1625. DWORD dwTopicId = 0; // Only used when IDXF_NORMALIZE is set
  1626. int cbTemp; // # of compressed bytes that uncompressed
  1627. OCCF occf = lpipb->occf;
  1628. BYTE fetchOldData;
  1629. BYTE fetchNewData;
  1630. PIH20 pHeader = &lpipb->BTreeData.Header;
  1631. HRESULT fRet;
  1632. // Initialize variables
  1633. wWeight = 0; // UNDONE: Don't need it
  1634. // Reset the file pointer
  1635. FileSeek (pOutFile->fFile,
  1636. foStart = pOutFile->foPhysicalOffset = foNil, 0, &errb);
  1637. pOutFile->pCurrent = pOutFile->pMem;
  1638. pOutFile->cbLeft = pOutFile->dwMax;
  1639. pOutFile->ibit = cbitBYTE - 1;
  1640. dwIndexTopicCount = pWordInfo->dwIndexTopicCount;
  1641. dwNewTopicCount = pWordInfo->dwNewTopicCount;
  1642. fetchOldData = fetchNewData = TRUE;
  1643. pWordInfo->dwOldTopicId = pWordInfo->dwNewTopicId = dwTopicCount = 0;
  1644. // Initialize pIndexDataNode structure
  1645. pIndexDataNode->nodeOffset = pWordInfo->dataLocation;
  1646. pIndexDataNode->dwDataSizeLeft = pWordInfo->dwDataSize;
  1647. if ((fRet = ReadNewData(pIndexDataNode)) != S_OK)
  1648. return(fRet);
  1649. while (dwIndexTopicCount && dwNewTopicCount)
  1650. {
  1651. // Get the topicId from the new file
  1652. if (fetchNewData)
  1653. {
  1654. if (pInFile->cbLeft < 2 * sizeof (DWORD))
  1655. {
  1656. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  1657. pInFile->cbLeft += FileRead (pInFile->fFile, pInFile->pMem +
  1658. pInFile->cbLeft, pInFile->dwMax - pInFile->cbLeft,
  1659. &errb);
  1660. pInFile->dwMax = pInFile->cbLeft;
  1661. pInFile->pCurrent = pInFile->pMem;
  1662. }
  1663. cbTemp = CbByteUnpack (&dwTopicIdDelta, pInFile->pCurrent);
  1664. pInFile->pCurrent += cbTemp;
  1665. pInFile->cbLeft -= cbTemp;
  1666. pWordInfo->dwNewTopicId = (dwNewTopicId += dwTopicIdDelta);
  1667. fetchNewData = FALSE;
  1668. }
  1669. if (fetchOldData)
  1670. {
  1671. if (pIndexDataNode->ibit < cbitBYTE - 1)
  1672. {
  1673. pIndexDataNode->ibit = cbitBYTE - 1;
  1674. pIndexDataNode->pCurPtr ++;
  1675. }
  1676. // Get the topicId from the index file
  1677. if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyTopicId,
  1678. &dwTopicIdDelta)) != S_OK)
  1679. return fRet;
  1680. pWordInfo->dwIndexTopicId = (dwIndexTopicId += dwTopicIdDelta);
  1681. fetchOldData = FALSE;
  1682. }
  1683. if (dwIndexTopicId < dwNewTopicId)
  1684. {
  1685. if ((fRet = EmitOldData (lpipb, pIndexDataNode,
  1686. pWordInfo)) != S_OK)
  1687. return(fRet);
  1688. fetchOldData = TRUE;
  1689. dwTopicCount++;
  1690. dwIndexTopicCount --;
  1691. }
  1692. else if (dwIndexTopicId == dwNewTopicId)
  1693. {
  1694. DWORD dwTmp;
  1695. if (lpipb->idxf & IDXF_NORMALIZE)
  1696. {
  1697. if ((fRet = FGetBits(pIndexDataNode, &dwTmp,
  1698. sizeof (USHORT) * cbitBYTE)) != S_OK)
  1699. return fRet;
  1700. }
  1701. if (occf & OCCF_HAVE_OCCURRENCE)
  1702. {
  1703. if ((fRet = SkipOldData (lpipb, pIndexDataNode)) != S_OK)
  1704. return(fRet);
  1705. }
  1706. fetchOldData = TRUE;
  1707. dwIndexTopicCount --;
  1708. if ((fRet = EmitNewData (lpipb, pWordInfo, FALSE)) != S_OK)
  1709. return(fRet);
  1710. dwNewTopicCount --;
  1711. fetchNewData = TRUE;
  1712. dwTopicCount++;
  1713. }
  1714. else
  1715. {
  1716. if ((fRet = EmitNewData (lpipb, pWordInfo, TRUE)) != S_OK)
  1717. return(fRet);
  1718. dwNewTopicCount --;
  1719. fetchNewData = TRUE;
  1720. pWordInfo->dwIndexTopicCount++;
  1721. dwTopicCount++;
  1722. }
  1723. }
  1724. while (dwIndexTopicCount)
  1725. {
  1726. if (fetchOldData)
  1727. {
  1728. if (pIndexDataNode->ibit < cbitBYTE - 1)
  1729. {
  1730. pIndexDataNode->ibit = cbitBYTE - 1;
  1731. pIndexDataNode->pCurPtr ++;
  1732. }
  1733. // Get the topicId from the index file
  1734. if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyTopicId,
  1735. &dwTopicIdDelta)) != S_OK)
  1736. return fRet;
  1737. pWordInfo->dwIndexTopicId = (dwIndexTopicId += dwTopicIdDelta);
  1738. fetchOldData = FALSE;
  1739. }
  1740. if ((fRet = EmitOldData (lpipb, pIndexDataNode,
  1741. pWordInfo)) != S_OK)
  1742. return(fRet);
  1743. fetchOldData = TRUE;
  1744. dwIndexTopicCount --;
  1745. dwTopicCount++;
  1746. }
  1747. while (dwNewTopicCount)
  1748. {
  1749. // Get the topicId from the new file
  1750. if (fetchNewData)
  1751. {
  1752. if (pInFile->cbLeft < 2 * sizeof (DWORD))
  1753. {
  1754. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  1755. pInFile->cbLeft += FileRead (pInFile->fFile, pInFile->pMem +
  1756. pInFile->cbLeft, pInFile->dwMax - pInFile->cbLeft,
  1757. &errb);
  1758. pInFile->dwMax = pInFile->cbLeft;
  1759. pInFile->pCurrent = pInFile->pMem;
  1760. }
  1761. cbTemp = CbByteUnpack (&dwTopicIdDelta, pInFile->pCurrent);
  1762. pInFile->pCurrent += cbTemp;
  1763. pInFile->cbLeft -= cbTemp;
  1764. pWordInfo->dwNewTopicId = (dwNewTopicId += dwTopicIdDelta);
  1765. fetchNewData = FALSE;
  1766. }
  1767. if ((fRet = EmitNewData (lpipb, pWordInfo, TRUE)) != S_OK)
  1768. return(fRet);
  1769. fetchNewData = TRUE;
  1770. dwNewTopicCount --;
  1771. dwTopicCount++;
  1772. pWordInfo->dwIndexTopicCount++;
  1773. }
  1774. // Adjust for some bits used
  1775. if (pOutFile->ibit < cbitBYTE - 1)
  1776. {
  1777. pOutFile->pCurrent++;
  1778. pOutFile->cbLeft--;
  1779. pOutFile->foPhysicalOffset = FoAddDw (pOutFile->foPhysicalOffset, 1);
  1780. }
  1781. // Flush the output buffer
  1782. if (FileWrite (pOutFile->fFile, pOutFile->pMem,
  1783. (LONG)(pOutFile->pCurrent - pOutFile->pMem), &errb) !=
  1784. (LONG)(pOutFile->pCurrent - pOutFile->pMem))
  1785. return(errb);
  1786. dwNewDataSize = DwSubFo(pOutFile->foPhysicalOffset, foStart);
  1787. if (pWordInfo->dwDataSize < dwNewDataSize)
  1788. {
  1789. // ERIC: Find the best fit block here
  1790. // - Add the block pointed by pWordInfo into the free list
  1791. // - Find a new block in the freelist
  1792. // if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
  1793. // foNewDataOffset, dwNewDataSize)) != S_OK)
  1794. // where foNewDataOffset may be the max offset or the freelist
  1795. // block offset
  1796. FILEOFFSET foOffset1, foNewDataOffset;
  1797. WORD wNumBlocksTemp;
  1798. WORD wMaxBlocksTemp;
  1799. // Before adding that block to the FreeList,
  1800. // look if we need to change the size of the FreeList
  1801. QFREELIST qFreeList = _GLOBALLOCK(lpipb->hFreeList);
  1802. wNumBlocksTemp = qFreeList->flh.wNumBlocks;
  1803. wMaxBlocksTemp = qFreeList->flh.wMaxBlocks;
  1804. _GLOBALUNLOCK(lpipb->hFreeList);
  1805. // we use a count of two in the test below, in case not only old block is added but
  1806. // also an entry for the unused portion of the new block (later).
  1807. if (wMaxBlocksTemp < 2 || wNumBlocksTemp >= wMaxBlocksTemp - 2)
  1808. {
  1809. HFREELIST hFreeListTemp;
  1810. // if the free list can't grow, fall through to FreeListAdd, where the
  1811. // smallest free entry will be overwritten and re-used
  1812. if (wMaxBlocksTemp < MAXWORD - wDefaultFreeListSize)
  1813. {
  1814. hFreeListTemp = FreeListRealloc(lpipb->hFreeList,
  1815. (WORD)(wMaxBlocksTemp + wDefaultFreeListSize),
  1816. &errb);
  1817. if (errb != S_OK)
  1818. return errb;
  1819. lpipb->hFreeList = hFreeListTemp;
  1820. }
  1821. }
  1822. FreeListAdd(lpipb->hFreeList, pWordInfo->dataLocation, MakeFo(pWordInfo->dwDataSize,0));
  1823. foNewDataOffset = FreeListGetBestFit(lpipb->hFreeList, MakeFo(dwNewDataSize,0), &errb);
  1824. if (FoIsNil(foNewDataOffset))
  1825. {
  1826. #ifdef _DEBUGFREE
  1827. _DPF2("UpdateDataNode: Grow from %ld to %ld failed: appending to EOF\n", pWordInfo->dwDataSize,\
  1828. dwNewDataSize);
  1829. #endif
  1830. foNewDataOffset = lpipb->foMaxOffset;
  1831. }
  1832. else
  1833. {
  1834. #ifdef _DEBUGFREE
  1835. _DPF3("UpdateDataNode: Grow from %ld to %ld uses free block at %ld\n", pWordInfo->dwDataSize,\
  1836. dwNewDataSize, foNewDataOffset.dwOffset );
  1837. #endif
  1838. foOffset1 = FreeListGetBlockAt(lpipb->hFreeList, foNewDataOffset, &errb);
  1839. if (FoCompare(foOffset1,MakeFo(sizeof(FREELIST),0)) > 0)
  1840. FreeListAdd(lpipb->hFreeList, FoAddDw(foNewDataOffset,dwNewDataSize),
  1841. FoSubFo(foOffset1,MakeFo(dwNewDataSize,0)));
  1842. }
  1843. if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
  1844. foNewDataOffset, dwNewDataSize)) != S_OK)
  1845. return fRet;
  1846. pWordInfo->dataLocation = foNewDataOffset;
  1847. //if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
  1848. // lpipb->foMaxOffset, dwNewDataSize)) != S_OK)
  1849. // return fRet;
  1850. //pWordInfo->dataLocation = lpipb->foMaxOffset;
  1851. // ERIC: Only increase the size of the file if foMaxOffset is used
  1852. if (FoEquals(foNewDataOffset,lpipb->foMaxOffset))
  1853. {
  1854. lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwNewDataSize);
  1855. #ifdef _DEBUG
  1856. dwOldDataLoss += pWordInfo->dwDataSize;
  1857. dwOldDataNeed += dwNewDataSize;
  1858. #endif
  1859. }
  1860. pWordInfo->dwDataSize = dwNewDataSize;
  1861. }
  1862. else
  1863. {
  1864. if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
  1865. pWordInfo->dataLocation, dwNewDataSize)) != S_OK)
  1866. return fRet;
  1867. }
  1868. pWordInfo->dwMergeTopicCount = dwTopicCount;
  1869. return(S_OK);
  1870. }
  1871. PUBLIC HRESULT PASCAL FAR SkipOldData (_LPIPB lpipb, PNODEINFO pIndexDataNode)
  1872. {
  1873. HRESULT fRet;
  1874. DWORD dwOccs;
  1875. DWORD dwTmp; // Trash variable.
  1876. OCCF occf = lpipb->occf;
  1877. PIH20 pHeader = &lpipb->BTreeData.Header;
  1878. // Get the number of occurrences
  1879. if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOccCount,
  1880. &dwOccs)) != S_OK)
  1881. return fRet;
  1882. //
  1883. // One pass through here for each occurence in the
  1884. // current sub-list.
  1885. //
  1886. for (; dwOccs; dwOccs--)
  1887. {
  1888. //
  1889. // Keeping word-counts? If so, get it.
  1890. //
  1891. if (occf & OCCF_COUNT)
  1892. {
  1893. if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyWordCount,
  1894. &dwTmp)) != S_OK)
  1895. {
  1896. return fRet;
  1897. }
  1898. }
  1899. //
  1900. // Keeping byte-offsets? If so, get it.
  1901. //
  1902. if (occf & OCCF_OFFSET)
  1903. {
  1904. if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOffset,
  1905. &dwTmp)) != S_OK)
  1906. return fRet;
  1907. }
  1908. }
  1909. return S_OK;
  1910. }
  1911. PRIVATE HRESULT PASCAL FAR EmitNewData (_LPIPB lpipb, PWORDINFO pWordInfo,
  1912. BOOL fnewData)
  1913. {
  1914. DWORD dwTopicDelta;
  1915. DWORD dwOccs = 0;
  1916. DWORD dwTemp;
  1917. WORD wWeight = 0;
  1918. PBTREEDATA pTreeData = &lpipb->BTreeData;
  1919. PFILEDATA pInFile = &lpipb->InFile;
  1920. PFILEDATA pOutFile = &lpipb->OutFile;
  1921. OCCF occf = lpipb->occf;
  1922. PIH20 pHeader = &lpipb->BTreeData.Header;
  1923. int cbTemp;
  1924. ERRB errb;
  1925. HRESULT fRet;
  1926. // Set the delta
  1927. dwTopicDelta = pWordInfo->dwNewTopicId - pWordInfo->dwOldTopicId;
  1928. pWordInfo->dwOldTopicId = pWordInfo->dwNewTopicId;
  1929. if (pOutFile->ibit < cbitBYTE - 1)
  1930. {
  1931. pOutFile->pCurrent++;
  1932. pOutFile->cbLeft--;
  1933. pOutFile->foPhysicalOffset = FoAddDw (pOutFile->foPhysicalOffset, 1);
  1934. pOutFile->ibit = cbitBYTE - 1;
  1935. }
  1936. FAddDword (pOutFile, dwTopicDelta, pHeader->ckeyTopicId);
  1937. if (occf & OCCF_HAVE_OCCURRENCE)
  1938. {
  1939. // Get number of occ data records for this topic
  1940. if (pInFile->cbLeft < 2 * sizeof (DWORD))
  1941. {
  1942. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  1943. pInFile->cbLeft += FileRead (pInFile->fFile,
  1944. pInFile->pMem + pInFile->cbLeft,
  1945. pInFile->dwMax - pInFile->cbLeft, &errb);
  1946. pInFile->dwMax = pInFile->cbLeft;
  1947. pInFile->pCurrent = pInFile->pMem;
  1948. }
  1949. cbTemp = CbByteUnpack (&dwOccs, pInFile->pCurrent);
  1950. pInFile->pCurrent += cbTemp;
  1951. pInFile->cbLeft -= cbTemp;
  1952. }
  1953. // If we are term weighing we have to calculate the weight
  1954. if (lpipb->idxf & IDXF_NORMALIZE)
  1955. {
  1956. FLOAT rLog;
  1957. FLOAT rTerm;
  1958. FLOAT rWeight;
  1959. FLOAT fOcc;
  1960. #ifndef ISBU_IR_CHANGE
  1961. rLog = (float) log10(cHundredMillion/(double)pWordInfo->dwIndexTopicCount);
  1962. rTerm = rLog*rLog;
  1963. if (fnewData)
  1964. {
  1965. fOcc = (float) min(cTFThreshold, dwOccs);
  1966. // Add the new factor into the sigma term
  1967. lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] *=
  1968. lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId];
  1969. lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] += fOcc * fOcc * rTerm;
  1970. lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] =
  1971. (float)(sqrt((double)lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId]));
  1972. }
  1973. // NOTE : The following weight computation, until the assignment to wWeight, is
  1974. // very similar to the weight computation in WriteDataNode() of permind2.c file.
  1975. // Read the explanation there for the hard coded figures and logic appearing below.
  1976. rTerm = (float) (8.0 - log10((double)pWordInfo->dwIndexTopicCount));
  1977. // In extreme cases, rTerm could be 0 or even -ve (when dwTopicCount approaches or
  1978. // exceeds 100,000,000)
  1979. if (rTerm <= (float) 0.0)
  1980. rTerm = cVerySmallWt; // very small value. == log(100 mil/ 95 mil)
  1981. rWeight = ((float) min(cTFThreshold, dwOccs)) * rTerm * rTerm / lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId];
  1982. // without the additional rTerm, we would probably be between 0.0 and 1.0
  1983. if (rWeight > rTerm)
  1984. wWeight = 0xFFFF;
  1985. else
  1986. wWeight = (WORD) ((float)0xFFFF * rWeight / rTerm);
  1987. #else
  1988. rLog = (float)(1.0) / (float)pWordInfo->dwIndexTopicCount;
  1989. rTerm = rLog * rLog;
  1990. if (fnewData)
  1991. {
  1992. // Add the new factor into the sigma term
  1993. lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] *=
  1994. lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId];
  1995. lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] +=
  1996. dwOccs * rTerm;
  1997. lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] =
  1998. (float)(sqrt((double)lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId]));
  1999. }
  2000. rTerm = rTerm * (float)65535.0;
  2001. rWeight = (float)dwOccs * rTerm /
  2002. (float)(lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId]);
  2003. if (rWeight >= 65535.0)
  2004. wWeight = 65335;
  2005. else
  2006. wWeight = (WORD)rWeight;
  2007. #endif // ISBU_IR_CHANGE
  2008. // Write the weight to the output buffer
  2009. if ((fRet = FWriteBits (pOutFile, (DWORD)wWeight,
  2010. (BYTE)(sizeof (WORD) * cbitBYTE))) != S_OK)
  2011. return fRet;
  2012. }
  2013. if ((occf & OCCF_HAVE_OCCURRENCE) == 0)
  2014. return(S_OK);
  2015. // Write the OccCount
  2016. FAddDword (pOutFile, dwOccs, pHeader->ckeyOccCount);
  2017. // Encode the occ block
  2018. for (; dwOccs; dwOccs--)
  2019. {
  2020. // Make sure input buffer holds enough data
  2021. if (pInFile->cbLeft < 5 * sizeof (DWORD))
  2022. {
  2023. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  2024. pInFile->cbLeft += FileRead (pInFile->fFile,
  2025. pInFile->pMem + pInFile->cbLeft,
  2026. pInFile->dwMax - pInFile->cbLeft, &errb);
  2027. pInFile->dwMax = pInFile->cbLeft;
  2028. pInFile->pCurrent = pInFile->pMem;
  2029. }
  2030. if (occf & OCCF_COUNT)
  2031. {
  2032. cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
  2033. pInFile->pCurrent += cbTemp;
  2034. pInFile->cbLeft -= cbTemp;
  2035. if ((fRet = FAddDword (pOutFile, dwTemp, pHeader->ckeyWordCount))
  2036. != S_OK)
  2037. return(fRet);
  2038. }
  2039. if (occf & OCCF_OFFSET)
  2040. {
  2041. cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
  2042. pInFile->pCurrent += cbTemp;
  2043. pInFile->cbLeft -= cbTemp;
  2044. if ((fRet = FAddDword (pOutFile, dwTemp, pHeader->ckeyOffset))
  2045. != S_OK)
  2046. return(fRet);
  2047. }
  2048. }
  2049. return(S_OK);
  2050. }
  2051. PRIVATE HRESULT PASCAL FAR EmitOldData (_LPIPB lpipb, PNODEINFO pIndexDataNode,
  2052. PWORDINFO pWordInfo)
  2053. {
  2054. DWORD dwTopicDelta;
  2055. DWORD dwOccs;
  2056. DWORD dwTmp;
  2057. WORD wWeight = 0;
  2058. PFILEDATA pOutFile = &lpipb->OutFile;
  2059. OCCF occf = lpipb->occf;
  2060. HRESULT fRet;
  2061. PIH20 pHeader = &lpipb->BTreeData.Header;
  2062. if (pOutFile->ibit < cbitBYTE - 1)
  2063. {
  2064. pOutFile->pCurrent++;
  2065. pOutFile->cbLeft--;
  2066. pOutFile->foPhysicalOffset = FoAddDw (pOutFile->foPhysicalOffset, 1);
  2067. pOutFile->ibit = cbitBYTE - 1;
  2068. }
  2069. // Set the delta
  2070. dwTopicDelta = pWordInfo->dwIndexTopicId - pWordInfo->dwOldTopicId;
  2071. pWordInfo->dwOldTopicId = pWordInfo->dwIndexTopicId;
  2072. if ((fRet = FAddDword (pOutFile, dwTopicDelta,
  2073. pHeader->ckeyTopicId)) != S_OK)
  2074. return(fRet);
  2075. // If we are term weighing we have to calculate the weight
  2076. if (lpipb->idxf & IDXF_NORMALIZE)
  2077. {
  2078. if ((fRet = FGetBits(pIndexDataNode, &dwTmp, sizeof (USHORT) * cbitBYTE))
  2079. != S_OK)
  2080. return(fRet);
  2081. // Write the weight to the output buffer
  2082. if ((fRet = FWriteBits (pOutFile, (DWORD)wWeight,
  2083. (BYTE)(sizeof (WORD) * cbitBYTE))) != S_OK)
  2084. return(fRet);
  2085. }
  2086. // Don't do anything else if there is nothing else to do!!!
  2087. if ((occf & OCCF_HAVE_OCCURRENCE) == 0)
  2088. return S_OK;
  2089. if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOccCount,
  2090. &dwOccs)) != S_OK)
  2091. return fRet;
  2092. // Write the OccCount
  2093. if ((fRet = FAddDword (pOutFile, dwOccs,
  2094. pHeader->ckeyOccCount)) != S_OK)
  2095. return(fRet);
  2096. // Encode the occ block
  2097. for (; dwOccs; dwOccs--)
  2098. {
  2099. if (occf & OCCF_COUNT)
  2100. {
  2101. if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyWordCount,
  2102. &dwTmp)) != S_OK)
  2103. return fRet;
  2104. if ((fRet = FAddDword (pOutFile, dwTmp, pHeader->ckeyWordCount))
  2105. != S_OK)
  2106. return(fRet);
  2107. }
  2108. if (occf & OCCF_OFFSET)
  2109. {
  2110. if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOffset,
  2111. &dwTmp)) != S_OK)
  2112. return fRet;
  2113. if ((fRet = FAddDword (pOutFile, dwTmp, pHeader->ckeyOffset))
  2114. != S_OK)
  2115. return(fRet);
  2116. }
  2117. }
  2118. return(S_OK);
  2119. }
  2120. PRIVATE int PASCAL NEAR CopyNewDataToStemNode (_LPIPB lpipb,
  2121. PNODEINFO pTmpNode, LPB pWord, LPB pLastWord, int cLevel, int fFlag)
  2122. {
  2123. LPB pWordStorage;
  2124. /************************************************
  2125. * Emit the word data to the temp block
  2126. ************************************************/
  2127. pWordStorage = pTmpNode->pBuffer + sizeof(WORD);
  2128. pWordStorage += PrefixCompressWord (pWordStorage,
  2129. pWord, pLastWord, lpipb->occf & OCCF_LENGTH);
  2130. // Emit fileoffset
  2131. if (fFlag & USE_TEMP_NODE_04)
  2132. {
  2133. pWordStorage += CopyFileOffset (pWordStorage,
  2134. lpipb->BTreeData.rgpTmpNodeInfo[cLevel+1]->nodeOffset);
  2135. }
  2136. else
  2137. {
  2138. pWordStorage += CopyFileOffset (pWordStorage,
  2139. lpipb->BTreeData.rgpNodeInfo[cLevel+1]->nodeOffset);
  2140. }
  2141. pTmpNode->pCurPtr = pWordStorage;
  2142. SETWORD (pTmpNode->pBuffer, (WORD)(lpipb->BTreeData.Header.dwBlockSize
  2143. - (pWordStorage - pTmpNode->pBuffer)));
  2144. return (int)(pWordStorage - pTmpNode->pBuffer);
  2145. }
  2146. PRIVATE int PASCAL NEAR CopyNewDataToLeafNode (_LPIPB lpipb, PNODEINFO pTmpNode,
  2147. PWORDINFO pWordInfo, LPB pWord, LPB pLastWord)
  2148. {
  2149. LPB pWordStorage;
  2150. /************************************************
  2151. * Emit the word data to the temp block
  2152. ************************************************/
  2153. pWordStorage = pTmpNode->pBuffer + FOFFSET_SIZE + sizeof(WORD);
  2154. pWordStorage += PrefixCompressWord (pWordStorage,
  2155. pWord, pLastWord, lpipb->occf & OCCF_LENGTH);
  2156. // Emit field id, topic count. fileoffset, datasize
  2157. if (lpipb->occf & OCCF_FIELDID)
  2158. pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwFieldId);
  2159. pWordStorage += CbBytePack (pWordStorage,
  2160. pWordInfo->dwMergeTopicCount);
  2161. pWordStorage += CopyFileOffset (pWordStorage, pWordInfo->dataLocation);
  2162. pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwDataSize);
  2163. pTmpNode->pCurPtr = pWordStorage;
  2164. SETWORD (pTmpNode->pBuffer + FOFFSET_SIZE,
  2165. (WORD)(pTmpNode->cbLeft = (LONG)(lpipb->BTreeData.Header.dwBlockSize
  2166. - (pWordStorage - pTmpNode->pBuffer))));
  2167. return (int)(pWordStorage - pTmpNode->pBuffer);
  2168. }
  2169. PRIVATE HRESULT PASCAL FAR SkipNewData (_LPIPB lpipb, PWORDINFO pWordInfo)
  2170. {
  2171. DWORD dwOccs;
  2172. DWORD dwTemp;
  2173. PBTREEDATA pTreeData = &lpipb->BTreeData;
  2174. PFILEDATA pInFile = &lpipb->InFile;
  2175. PFILEDATA pOutFile = &lpipb->OutFile;
  2176. OCCF occf = lpipb->occf;
  2177. PIH20 pHeader = &lpipb->BTreeData.Header;
  2178. int cbTemp;
  2179. ERRB errb;
  2180. // Don't do anything else if there is nothing else to do!!!
  2181. if ((occf & OCCF_HAVE_OCCURRENCE) == 0)
  2182. return S_OK;
  2183. // Get number of occ data records for this topic
  2184. if (pInFile->cbLeft < 2 * sizeof (DWORD))
  2185. {
  2186. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  2187. pInFile->cbLeft += FileRead (pInFile->fFile,
  2188. pInFile->pMem + pInFile->cbLeft,
  2189. pInFile->dwMax - pInFile->cbLeft, &errb);
  2190. pInFile->dwMax = pInFile->cbLeft;
  2191. pInFile->pCurrent = pInFile->pMem;
  2192. }
  2193. cbTemp = CbByteUnpack (&dwOccs, pInFile->pCurrent);
  2194. pInFile->pCurrent += cbTemp;
  2195. pInFile->cbLeft -= cbTemp;
  2196. // Encode the occ block
  2197. for (; dwOccs; dwOccs--)
  2198. {
  2199. // Make sure input buffer holds enough data
  2200. if (pInFile->cbLeft < 5 * sizeof (DWORD))
  2201. {
  2202. MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
  2203. pInFile->cbLeft += FileRead (pInFile->fFile,
  2204. pInFile->pMem + pInFile->cbLeft,
  2205. pInFile->dwMax - pInFile->cbLeft, &errb);
  2206. pInFile->dwMax = pInFile->cbLeft;
  2207. pInFile->pCurrent = pInFile->pMem;
  2208. }
  2209. if (occf & OCCF_COUNT)
  2210. {
  2211. cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
  2212. pInFile->pCurrent += cbTemp;
  2213. pInFile->cbLeft -= cbTemp;
  2214. }
  2215. if (occf & OCCF_OFFSET)
  2216. {
  2217. cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
  2218. pInFile->pCurrent += cbTemp;
  2219. pInFile->cbLeft -= cbTemp;
  2220. }
  2221. }
  2222. return(S_OK);
  2223. }
  2224. BYTE CurrentWord [1000];
  2225. BYTE LastWord [1000];
  2226. #if 0
  2227. HRESULT CheckStemNode (PNODEINFO pNodeInfo)
  2228. {
  2229. LPB lpCurPtr;
  2230. WORD wWlen;
  2231. LPB lpMaxAddress = pNodeInfo->pMaxAddress;
  2232. FILEOFFSET nodeOffset;
  2233. lpCurPtr = pNodeInfo->pBuffer + sizeof(WORD);
  2234. // Reset the last word
  2235. *(LPWORD)LastWord = 0;
  2236. do
  2237. {
  2238. lpCurPtr = ExtractWord(CurrentWord, lpCurPtr, &wWlen);
  2239. if (StrCmpPascal2(LastWord, CurrentWord) > 0)
  2240. {
  2241. // _asm int 3;
  2242. return(SetErrCode (NULL, ERR_FAILED));
  2243. }
  2244. lpCurPtr += ReadFileOffset (&nodeOffset, lpCurPtr);
  2245. MEMCPY(LastWord, CurrentWord, wWlen + 2);
  2246. } while (lpCurPtr < lpMaxAddress);
  2247. return(S_OK);
  2248. }
  2249. HRESULT CheckLeafNode (PNODEINFO pNodeInfo, int occf)
  2250. {
  2251. LPB lpCurPtr;
  2252. WORD wWlen;
  2253. LPB lpMaxAddress = pNodeInfo->pMaxAddress;
  2254. FILEOFFSET nodeOffset;
  2255. DWORD dwTmp;
  2256. lpCurPtr = pNodeInfo->pBuffer + sizeof(WORD) + FOFFSET_SIZE;
  2257. // Reset the last word
  2258. *(LPWORD)LastWord = 0;
  2259. do
  2260. {
  2261. lpCurPtr = ExtractWord(CurrentWord, lpCurPtr, &wWlen);
  2262. if (StrCmpPascal2(LastWord, CurrentWord) > 0)
  2263. {
  2264. // _asm int 3;
  2265. return(SetErrCode (NULL, ERR_FAILED));
  2266. }
  2267. MEMCPY(LastWord, CurrentWord, wWlen + 2);
  2268. // Get fieldif and topic count
  2269. if (occf & OCCF_FIELDID)
  2270. lpCurPtr += CbByteUnpack (&dwTmp, lpCurPtr);
  2271. lpCurPtr += CbByteUnpack (&dwTmp, lpCurPtr);
  2272. // Get the data location and size
  2273. lpCurPtr += ReadFileOffset (&nodeOffset, lpCurPtr);
  2274. lpCurPtr += CbByteUnpack(&dwTmp, lpCurPtr);
  2275. } while (lpCurPtr < lpMaxAddress);
  2276. return(S_OK);
  2277. }
  2278. #endif