Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1643 lines
49 KiB

  1. /*************************************************************************
  2. * *
  3. * COLLECT.C *
  4. * *
  5. * Copyright (C) Microsoft Corporation 1990-1994 *
  6. * All Rights reserved. *
  7. * *
  8. **************************************************************************
  9. * *
  10. * Module Intent *
  11. * *
  12. * This modules is the first stage in the index building process. The *
  13. * primary functoin of stage 1 is to collect and sort all of the words *
  14. * to be indexed. Before processing can begin, the user must call *
  15. * IndexInitiate to initialize the indexing variables (IPB). Words are *
  16. * added via a call to IndexAddWord and are stored in a Balanced Tree *
  17. * until an OOM condition occurrs. The tree is dumped and reset to *
  18. * receive further words. *
  19. * *
  20. **************************************************************************
  21. * *
  22. * Current Owner: BinhN *
  23. * *
  24. **************************************************************************/
  25. #include <mvopsys.h>
  26. #include <mem.h>
  27. #include <memory.h>
  28. #include <math.h>
  29. #include <orkin.h>
  30. #include <mvsearch.h>
  31. #include "common.h"
  32. #include "index.h"
  33. #ifdef _DEBUG
  34. static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/
  35. #endif
  36. #define MAX_OCCDATA 5
  37. #define ISBUFFER_SIZE 0xFFFC // Size of OUTPUT buffers for collect2.c
  38. // The output is DWORD aligned
  39. // And the buffer *MUST* BE a multiple of 4
  40. // Min size: size of largest index word
  41. #define MIN_REQUIRED_MEM 0x400000 // 4-meg minimum
  42. /*************************************************************************
  43. *
  44. * INTERNAL PUBLIC FUNCTIONS
  45. *
  46. * All of them should be declared far, unless we know they belong to
  47. * the same segment. They should be included in some include file
  48. *
  49. *************************************************************************/
  50. PUBLIC VOID FAR PASCAL FreeISI (LPIPB);
  51. PUBLIC void FAR PASCAL FreeEsi (LPIPB);
  52. /*************************************************************************
  53. *
  54. * INTERNAL PRIVATE FUNCTIONS
  55. *
  56. *************************************************************************/
  57. PRIVATE PBTNODE NEAR PASCAL AddNode (_LPIPB, LST, LPOCC, PHRESULT);
  58. PRIVATE HRESULT NEAR PASCAL AddTopic (_LPIPB, PSTRDATA, LPOCC);
  59. PRIVATE void NEAR PASCAL AddOccurrence (PTOPICDATA, POCCDATA, int);
  60. PRIVATE HRESULT NEAR PASCAL WriteBuffer (_LPIPB, LPB);
  61. PRIVATE HRESULT NEAR PASCAL TraverseWrite (_LPIPB, PBTNODE, int);
  62. PRIVATE void NEAR PASCAL BalanceTree (LPISI, PBTNODE);
  63. PRIVATE void NEAR PASCAL LeftRotate (LPISI, PBTNODE);
  64. PRIVATE void NEAR PASCAL RightRotate (LPISI, PBTNODE);
  65. PRIVATE HRESULT PASCAL NEAR IndexBlockAllocate (LPIPB lpipb, LONG lMemSize);
  66. PRIVATE void NEAR PASCAL VerifyTree (PBTNODE pRoot);
  67. /*************************************************************************
  68. *
  69. * PUBLIC API FUNCTIONS
  70. *
  71. * All of them should be declared far and included in some .DEF file
  72. *
  73. *************************************************************************/
  74. PUBLIC LPIPB EXPORT_API FAR PASCAL MVIndexInitiate(PINDEXINFO pIndexInfo,
  75. PHRESULT phr);
  76. PUBLIC void EXPORT_API FAR PASCAL MVIndexDispose (LPIPB);
  77. PUBLIC HRESULT EXPORT_API FAR PASCAL MVIndexAddWord (LPIPB, LST, LPOCC);
  78. PUBLIC LPDWORD EXPORT_API PASCAL FAR TotalIndexedWord (LPIPB);
  79. /*************************************************************************
  80. *
  81. * INTERNAL PUBLIC FUNCTIONS
  82. *
  83. * All of them should be declared far and included in some .h file
  84. *
  85. *************************************************************************/
  86. PUBLIC HRESULT FAR PASCAL SortFlushISI (_LPIPB);
  87. PUBLIC int FAR PASCAL CompareOccurrence (LPDW, LPDW, int);
  88. PUBLIC int FAR PASCAL StrCmp2BytePascal (LPB, LPB);
  89. PUBLIC HRESULT FAR PASCAL FlushTree (_LPIPB);
  90. /*************************************************************************
  91. *
  92. * @doc API EXTERNAL INDEXING
  93. *
  94. * @func LPIPB FAR PASCAL | MVIndexInitiate |
  95. * The function allocates a index parameter block. The block is used
  96. * in all places during indexing. This function must be called
  97. * prior to any other indexing funtion.
  98. *
  99. * @parm PINDEXINFO | pIndexInfo |
  100. * Pointer to the index information data
  101. *
  102. * @parm PHRESULT | phr |
  103. * Pointer to error buffer.
  104. *
  105. * @rdesc Pointer to the block, or NULL if error. The error buffer
  106. * contains the description of the error
  107. *
  108. *************************************************************************/
  109. PUBLIC LPIPB EXPORT_API FAR PASCAL MVIndexInitiate(PINDEXINFO pIndexInfo,
  110. PHRESULT phr)
  111. {
  112. _LPIPB lpipb; // Pointer to index paramet block
  113. HRESULT fRet;
  114. // foNil should, of course, be nil
  115. // In this case foNil is only used by incremental update
  116. ITASSERT(0 == foNil.dwOffset && 0 == foNil.dwHigh);
  117. if (pIndexInfo == NULL)
  118. {
  119. SetErrCode (phr, E_INVALIDARG);
  120. return(NULL);
  121. }
  122. // Allocate the block. All the fields are initialized to 0
  123. if ((lpipb = GlobalLockedStructMemAlloc (sizeof (IPB))) == NULL)
  124. {
  125. SetErrCode (phr, E_OUTOFMEMORY);
  126. return (NULL);
  127. }
  128. // Initialize "idxf", make sure that "occf" has "OCCF_TOPICID" set.
  129. lpipb->idxf = (WORD)(pIndexInfo->Idxf);
  130. lpipb->occf = (WORD)(pIndexInfo->Occf | OCCF_TOPICID);
  131. // Initialize some fields
  132. lpipb->dwLastIndexedTopic = (DWORD)-1;
  133. // Set the number of occurrence fields in the occurrence block
  134. if (pIndexInfo->Occf & OCCF_COUNT)
  135. lpipb->ucNumOccDataFields++;
  136. if (pIndexInfo->Occf & OCCF_OFFSET)
  137. lpipb->ucNumOccDataFields++;
  138. // Clear sort file handle
  139. lpipb->dwUniqueWord = 0;
  140. lpipb->esi.lpesbRoot = NULL;
  141. // Allocate all the necessary memory block
  142. if ((lpipb->dwMemAllowed = pIndexInfo->dwMemSize) < MIN_REQUIRED_MEM)
  143. lpipb->dwMemAllowed = MIN_REQUIRED_MEM;
  144. if ((fRet = IndexBlockAllocate (lpipb, lpipb->dwMemAllowed)) != S_OK)
  145. {
  146. SetErrCode (phr, fRet);
  147. GlobalLockedStructMemFree (lpipb);
  148. return (NULL);
  149. }
  150. if (pIndexInfo->dwBlockSize <= BTREE_NODE_SIZE)
  151. lpipb->BTreeData.Header.dwBlockSize = BTREE_NODE_SIZE;
  152. else
  153. lpipb->BTreeData.Header.dwBlockSize = pIndexInfo->dwBlockSize;
  154. lpipb->BTreeData.Header.dwCodePageID = pIndexInfo->dwCodePageID;
  155. lpipb->BTreeData.Header.lcid = pIndexInfo->lcid;
  156. lpipb->BTreeData.Header.dwBreakerInstID = pIndexInfo->dwBreakerInstID;
  157. // Set the callback key
  158. lpipb->dwKey = CALLBACKKEY;
  159. return (lpipb);
  160. }
  161. /*************************************************************************
  162. *
  163. * @doc API EXTERNAL INDEXING
  164. *
  165. * @func void FAR PASCAL | MVIndexDispose |
  166. * Release all memory associated with the index parameter block.
  167. * Must be called after indexing is complete.
  168. *
  169. * @parm _LPIPB | lpipb |
  170. * Pointer to index parameter block
  171. *
  172. *************************************************************************/
  173. PUBLIC void EXPORT_API FAR PASCAL MVIndexDispose(_LPIPB lpipb)
  174. {
  175. // Sanity check
  176. if (lpipb == NULL)
  177. return;
  178. // Free all memory associated with internal sort
  179. FreeISI(lpipb);
  180. // Free all memory associated with external sort
  181. FreeEsi(lpipb);
  182. GlobalLockedStructMemFree (lpipb);
  183. }
  184. /*************************************************************************
  185. *
  186. * @doc PRIVATE INDEXING
  187. *
  188. * @func VOID PASCAL NEAR | FreeISI |
  189. * Free all blocks, and temporary file associated with the internal
  190. * sort
  191. *
  192. * @parm _LPIPB | lpipb |
  193. * Pointer to index parameter block
  194. *
  195. *************************************************************************/
  196. PUBLIC VOID PASCAL NEAR FreeISI (_LPIPB lpipb)
  197. {
  198. // Release temporary file buffer
  199. FreeHandle (lpipb->isi.hSortBuffer);
  200. lpipb->isi.hSortBuffer = NULL;
  201. if (lpipb->isi.hfpb)
  202. {
  203. FileClose (lpipb->isi.hfpb);
  204. lpipb->isi.hfpb = NULL;
  205. }
  206. if (lpipb->pDataBlock)
  207. {
  208. BlockFree (lpipb->pDataBlock);
  209. lpipb->pDataBlock = NULL;
  210. }
  211. if (lpipb->BTNodeBlock.pBlockMgr)
  212. {
  213. BlockFree (lpipb->BTNodeBlock.pBlockMgr);
  214. lpipb->BTNodeBlock.pBlockMgr = NULL;
  215. lpipb->BTNodeBlock.pFreeList = NULL; // Free list of Btnode
  216. }
  217. if (lpipb->TopicBlock.pBlockMgr)
  218. {
  219. BlockFree (lpipb->TopicBlock.pBlockMgr);
  220. lpipb->TopicBlock.pBlockMgr = NULL;
  221. lpipb->TopicBlock.pFreeList = NULL; // Free list of topic node
  222. }
  223. if (lpipb->OccBlock.pBlockMgr)
  224. {
  225. BlockFree (lpipb->OccBlock.pBlockMgr);
  226. lpipb->OccBlock.pBlockMgr = NULL;
  227. lpipb->OccBlock.pFreeList = NULL; // Free list of occurrence nodes
  228. }
  229. }
  230. /*************************************************************************
  231. * @doc API EXTERNAL INDEXING
  232. *
  233. * @func HRESULT FAR PASCAL | MVIndexAddWord |
  234. * This function will add a word into the index.
  235. *
  236. * @parm LPIPB | lpipb |
  237. * Index parameter block being operated on
  238. *
  239. * @parm LST | lstWord |
  240. * Word being indexed. (Pascal style with 2-byte header)
  241. *
  242. * @parm LPOCC | lpocc |
  243. * Occurence data associated with this word. It is assumed that the
  244. * occurrence block contains NO UNINITIALIZED DATA, ie. non-used
  245. * fields must be set to 0
  246. *
  247. * @rdesc S_OK, if successful, else other error
  248. *
  249. * @comm
  250. * The data are copied into the buffer managed by the block manager
  251. * and arranged as a Red/Black tree to speed sorting.
  252. *************************************************************************/
  253. static OCC NullOcc = { 0 };
  254. PUBLIC HRESULT EXPORT_API FAR PASCAL MVIndexAddWord (_LPIPB lpipb,
  255. LST lstWord, LPOCC lpOcc)
  256. {
  257. // Local replacement variables
  258. ERRB errb; // Pointer to error variable
  259. LPISI pIsi; // Internal Sort Information
  260. PBTNODE pRoot; // Root of the Balanced Tree
  261. // Working variables
  262. PBTNODE pNode; // Used to traverse the tree to find
  263. // to find the insertion point
  264. PBTNODE FAR *ppNode; // Used to add children to the tree
  265. int result; // String compare results
  266. int wLen; // Word length
  267. LST lstStart; // Saved starting position
  268. #ifdef _DEBUG
  269. char Buffer[200];
  270. #endif
  271. #ifdef _DEBUGREDBLACK
  272. int iLeft = 0;
  273. int iRight = 0;
  274. #endif
  275. // Various flags
  276. int fCompareField;
  277. // Sanity check
  278. if (lpipb == NULL)
  279. return(E_INVALIDARG);
  280. // Handle null case
  281. if (lstWord == NULL)
  282. return(S_OK);
  283. fCompareField = lpipb->occf & OCCF_FIELDID;
  284. pIsi = &lpipb->isi; // Internal Sort Information
  285. pRoot = pIsi->pBalanceTree; // Root of the Balanced Tree
  286. // Working variables
  287. ppNode = NULL; // Used to add children to the tree
  288. lstStart = lstWord; // Saved starting position
  289. if (lpOcc == NULL)
  290. lpOcc = &NullOcc;
  291. // Get statistics
  292. lpipb->dwIndexedWord++;
  293. // Count unique TopicId's
  294. if (lpipb->dwLastIndexedTopic != lpOcc->dwTopicID)
  295. {
  296. lpipb->lcTopics++;
  297. lpipb->dwLastIndexedTopic = lpOcc->dwTopicID;
  298. }
  299. if (lpOcc->dwTopicID > lpipb->dwMaxTopicId)
  300. {
  301. lpipb->dwMaxTopicId = lpOcc->dwTopicID;
  302. }
  303. wLen = GETWORD((LPUW)(lstStart = lstWord));
  304. // Save statistical information about the total length of all words
  305. if (wLen > 2)
  306. lpipb->dwTotal3bWordLen += wLen;
  307. else
  308. lpipb->dwTotal2bWordLen += wLen;
  309. lstWord += sizeof(WORD);
  310. #ifdef _DEBUG
  311. if (wLen >= 200)
  312. {
  313. strncpy (Buffer, lstWord, 198);
  314. Buffer[199] = 0;
  315. }
  316. else
  317. {
  318. strncpy (Buffer, lstWord, wLen);
  319. Buffer[wLen] = 0;
  320. }
  321. // if (STRICMP (Buffer, "erin") == 0)
  322. // _asm int 3;
  323. #endif
  324. // Call the user callback every once in a while
  325. if (!(lpipb->dwIndexedWord % 65536L)
  326. && (lpipb->CallbackInfo.dwFlags & ERRFLAG_STATUS))
  327. {
  328. PFCALLBACK_MSG pCallbackInfo = &lpipb->CallbackInfo;
  329. CALLBACKINFO Info;
  330. HRESULT err;
  331. Info.dwPhase = 1;
  332. Info.dwIndex = lpipb->dwIndexedWord;
  333. err = (*pCallbackInfo->MessageFunc)
  334. (ERRFLAG_STATUS, pCallbackInfo->pUserData, &Info);
  335. if (S_OK != err)
  336. return (err);
  337. }
  338. SubmitWord:
  339. // Is this the first word in the tree?
  340. if (pRoot == NULL)
  341. {
  342. if ((pRoot = AddNode (lpipb, lstStart, lpOcc, &errb)) == NULL)
  343. return (SetErrCode (NULL, E_OUTOFMEMORY));
  344. // Adjust tree data
  345. pRoot->color = BLACK;
  346. pRoot->pParent = NULL;
  347. pIsi->pBalanceTree = pRoot;
  348. // Set statistical info
  349. lpipb->dwByteCount = GETWORD ((LPUW)pRoot->StringData.pText);
  350. lpipb->dwMaxFieldId = pRoot->StringData.dwField;
  351. return (S_OK);
  352. }
  353. // Set traversal node to root node
  354. pNode = pRoot;
  355. for (; ; ) // Traverse the tree forever
  356. {
  357. int len; // Used for string compare block
  358. LPB lpbWord1, lpbWord2; // Used for string compare block
  359. PSTRDATA pString;
  360. /**********************************************
  361. * This section of code does a string compare
  362. **********************************************/
  363. lpbWord1 = lstWord;
  364. pString = &pNode->StringData;
  365. lpbWord2 = pString->pText;
  366. // Get the minimum length
  367. if ((result = wLen - GETWORD ((LPUW)lpbWord2)) > 0)
  368. len = GETWORD ((LPUW)lpbWord2);
  369. else
  370. len = wLen;
  371. // Skip the lengths
  372. lpbWord2 += sizeof (WORD);
  373. // Start compare byte per byte
  374. for (; len > 0; len--, lpbWord1++, lpbWord2++)
  375. {
  376. if (*lpbWord1 != *lpbWord2)
  377. break;
  378. }
  379. if (len != 0)
  380. result = *lpbWord1 - *lpbWord2;
  381. /**********************************
  382. * COMPARE FIELDID AND WORD LENGTH
  383. **********************************/
  384. if (result == 0)
  385. {
  386. // If the WordLength and FieldId are the same as the current
  387. // nodes' then we update the current record
  388. if (fCompareField)
  389. result = lpOcc->dwFieldId - pString->dwField;
  390. if (result == 0)
  391. result = lpOcc->wWordLen - (WORD)pString->dwWordLength;
  392. if (result == 0)
  393. {
  394. if (AddTopic (lpipb, pString, lpOcc) == S_OK)
  395. return (S_OK);
  396. // Add failed. Flush the tree to disk & resubmit word
  397. if ((result = FlushTree(lpipb)) == S_OK)
  398. {
  399. pRoot = pIsi->pBalanceTree;
  400. goto SubmitWord;
  401. }
  402. return (SetErrCode (NULL, (HRESULT)result));
  403. }
  404. // Fall through in case result is non-zero
  405. }
  406. // Descend tree or add new node
  407. if (result < 0)
  408. {
  409. if (pNode->pLeft != NULL)
  410. {
  411. pNode = pNode->pLeft;
  412. #ifdef _DEBUGREDBLACK
  413. iLeft++;
  414. #endif
  415. continue;
  416. }
  417. else
  418. ppNode = &pNode->pLeft;
  419. }
  420. else
  421. {
  422. if (pNode->pRight != NULL)
  423. {
  424. pNode = pNode->pRight;
  425. #ifdef _DEBUGREDBLACK
  426. iRight++;
  427. #endif
  428. continue;
  429. }
  430. else
  431. ppNode = &pNode->pRight;
  432. }
  433. #ifdef _DEBUGREDBLACK
  434. _DPF3("Added node '%s' at left %d, right %d\n", Buffer, iLeft, iRight);
  435. #endif
  436. // Add the new node to the tree
  437. *ppNode = AddNode (lpipb, lstStart, lpOcc, &errb);
  438. // If node is NULL we will flush the tree and resubmit the word
  439. if (*ppNode == NULL)
  440. {
  441. if ((result = FlushTree(lpipb)) != S_OK)
  442. return (result);
  443. pRoot = pIsi->pBalanceTree;
  444. ppNode = NULL;
  445. goto SubmitWord;
  446. }
  447. (*ppNode)->pParent = pNode;
  448. // This is the only place that the nodes get balanced
  449. BalanceTree (pIsi, *ppNode);
  450. #ifdef _DEBUGREDBLACK
  451. VerifyTree (pIsi->pBalanceTree);
  452. #endif
  453. return (S_OK);
  454. }
  455. }
  456. /*************************************************************************
  457. * @doc API EXTERNAL INDEXING
  458. *
  459. * @func LPDWORD PASCAL FAR | TotalIndexedWord |
  460. * Return the total number of words indexed (for statistical purpose
  461. * only)
  462. *
  463. * @parm LPIPB | lpipb |
  464. * Pointer to index parameter block
  465. *
  466. * @rdesc Return pointer to the total number of words indexed
  467. *************************************************************************/
  468. PUBLIC LPDWORD PASCAL FAR TotalIndexedWord(_LPIPB lpipb)
  469. {
  470. return (&lpipb->dwUniqueWord);
  471. }
  472. /*************************************************************************
  473. *
  474. * @doc PRIVATE INDEXING
  475. *
  476. * @func void NEAR PASCAL | FreeEsi |
  477. * Gets rid of all external-sort blocks attached to an IPB.
  478. * These blocks are formed into a single linked list
  479. * Also closes the file associated with the external sort.
  480. *
  481. * @parm _LPIPB | lpipb |
  482. * Pointer to index parameter block where all the info is stored
  483. *
  484. *************************************************************************/
  485. PUBLIC VOID FAR PASCAL FreeEsi(_LPIPB lpipb)
  486. {
  487. LPESB lpesb; /* Linked-list walk pointer. */
  488. LPESB lpesbNext; /* Next ESB in chain. */
  489. LPESI lpesi; /* Pointer to external sort info struct */
  490. /* Get pointer to the ESI block */
  491. lpesi = &lpipb->esi;
  492. for (lpesb = lpesi->lpesbRoot; lpesb != NULL; lpesb = lpesbNext)
  493. {
  494. /* Get pointer to the next block */
  495. lpesbNext = lpesb->lpesbNext;
  496. if (lpesb->hMem)
  497. {
  498. _GLOBALUNLOCK(lpesb->hMem);
  499. _GLOBALFREE(lpesb->hMem);
  500. }
  501. /* Free the block */
  502. GlobalLockedStructMemFree (lpesb);
  503. }
  504. lpesi->lpesbRoot = NULL; /* No more chain. */
  505. lpesi->cesb = 0; /* Everyone freed */
  506. // Delete the internal sorting result file
  507. if ((lpipb->idxf & KEEP_TEMP_FILE) == 0)
  508. FileUnlink (NULL, lpipb->isi.aszTempName, REGULAR_FILE);
  509. }
  510. /*************************************************************************
  511. *
  512. * @doc PRIVATE INDEXING
  513. *
  514. * @func PBTNODE NEAR PASCAL | AddNode |
  515. * Inserts a new node into the tree.
  516. *
  517. * @parm _LPIPB | lpipb |
  518. * Pointer to index parameter block
  519. *
  520. * @parm LST | lpb |
  521. * Word being indexed.
  522. *
  523. * @parm LPOCC | lpOcc |
  524. * Pointer to occurrence data
  525. *
  526. * @parm PHRESULT | phr |
  527. * Pointer to error structure
  528. *
  529. * @rdesc Pointer to the newly created node
  530. *
  531. * @comm
  532. * The nodes parent pointer must be set externally.
  533. *
  534. *************************************************************************/
  535. PBTNODE NEAR PASCAL AddNode (_LPIPB lpipb, LST lpbWord,
  536. LPOCC lpOcc, PHRESULT phr)
  537. {
  538. // Local replacement variables
  539. LPV pDataBlock = lpipb->pDataBlock; // Pointer to Block Manager
  540. int occf = lpipb->occf;
  541. // Working variables
  542. PBTNODE pNode; // This will point to the new node
  543. PSTRDATA pString; // Pointer to string block
  544. PTOPICDATA pTopic; // Pointer to topic block
  545. POCCDATA pOcc;
  546. LPDW lpDw;
  547. // Create space for new node & topic & occ & copy the string
  548. #if 0
  549. if ((pNode = (PBTNODE)GetBlockNode (&lpipb->BTNodeBlock)) == NULL ||
  550. (pTopic = (PTOPICDATA)GetBlockNode (&lpipb->TopicBlock)) == NULL ||
  551. #else
  552. if ((pNode = (PBTNODE)BlockGetBlock(pDataBlock, sizeof(BTNODE))) == NULL ||
  553. (pTopic = (PTOPICDATA)BlockGetBlock (pDataBlock, sizeof(TOPICDATA))) == NULL ||
  554. #endif
  555. (pNode->StringData.pText = (LPB)BlockCopy (lpipb->pDataBlock,
  556. lpbWord, GETWORD((LPUW)lpbWord) + sizeof (SHORT), 0)) == NULL)
  557. {
  558. return (NULL);
  559. }
  560. pString = &pNode->StringData;
  561. /* Initialize all the fields */
  562. // Node Information. Parent field is set outside of this function
  563. pNode->pLeft = pNode->pRight = NULL;
  564. pNode->color = RED;
  565. /* Set the string fields */
  566. pString->pTopic = pString->pLastTopic = pTopic;
  567. pString->dwTopicCount = 1;
  568. // It doesn't hurt to copy the data even if we don't use it
  569. // It also saves a compare just to set it
  570. pString->dwField = lpOcc->dwFieldId;
  571. pString->dwWordLength = lpOcc->wWordLen;
  572. // Set the topic fields data
  573. pTopic->pNext = NULL;
  574. pTopic->dwTopicId = lpOcc->dwTopicID;
  575. if (occf & (OCCF_COUNT | OCCF_OFFSET))
  576. {
  577. #if 1
  578. if ((pOcc = (POCCDATA)BlockGetBlock (pDataBlock,
  579. sizeof(OCCDATA) * lpipb->ucNumOccDataFields)) == NULL)
  580. return(NULL);
  581. #else
  582. if ((pOcc = (POCCDATA)GetBlockNode (&lpipb->OccBlock)) == NULL )
  583. return(NULL);
  584. #endif
  585. // Set the occ fields
  586. pOcc->pNext = NULL;
  587. // Generate occ data block
  588. lpDw = pOcc->OccData;
  589. if (occf & OCCF_COUNT)
  590. *lpDw++ = lpOcc->dwCount;
  591. if (occf & OCCF_OFFSET)
  592. *lpDw = lpOcc->dwOffset;
  593. pTopic->pLastOccData = pTopic->pOccData = pOcc;
  594. pTopic->dwOccCount = 1;
  595. }
  596. else
  597. {
  598. pTopic->pLastOccData = pTopic->pOccData = NULL;
  599. pTopic->dwOccCount = 0;
  600. }
  601. // Set Statistical information
  602. if (lpipb->dwMaxWLen < GETWORD ((LPUW)pString->pText))
  603. lpipb->dwMaxWLen = GETWORD ((LPUW)pString->pText);
  604. if (lpipb->dwMaxFieldId < pString->dwField)
  605. lpipb->dwMaxFieldId = pString->dwField;
  606. lpipb->dwUniqueWord++;
  607. lpipb->dwByteCount += GETWORD ((LPUW)pString->pText);
  608. return (pNode);
  609. }
  610. /*************************************************************************
  611. *
  612. * @doc PRIVATE INDEXING
  613. *
  614. * @func int FAR PASCAL | CompareOccurrence |
  615. * Compares two Occurrence data pointers starting from the first
  616. * element and continuing until the elements are unequal.
  617. *
  618. * @parm LPB | lpStr1 |
  619. * Pointer to the first Occurence to compare
  620. *
  621. * @parm LPB | pOcc2 |
  622. * Pointer to the second Occurence to compare
  623. *
  624. * @parm int | max |
  625. * The number of occurrence fields to compare
  626. *
  627. * @rdesc
  628. * negative value : If pOcc1 is less than pOcc2
  629. * 0 : if pOcc1 is equal to pOcc2
  630. * positive value : If pOcc1 is greater than pOcc2
  631. *
  632. * @comm
  633. * The use of switch statment is for speed since this function is
  634. * called for so many times
  635. *************************************************************************/
  636. int FAR PASCAL CompareOccurrence (LPDW pOcc1, LPDW pOcc2, int max)
  637. {
  638. int result;
  639. switch (max)
  640. {
  641. case 5:
  642. if (result = (int)(*pOcc1 - *pOcc2))
  643. return (result);
  644. pOcc1++;
  645. pOcc2++;
  646. case 4:
  647. if (result = (int)(*pOcc1 - *pOcc2))
  648. return (result);
  649. pOcc1++;
  650. pOcc2++;
  651. case 3:
  652. if (result = (int)(*pOcc1 - *pOcc2))
  653. return (result);
  654. pOcc1++;
  655. pOcc2++;
  656. case 2:
  657. if (result = (int)(*pOcc1 - *pOcc2))
  658. return (result);
  659. pOcc1++;
  660. pOcc2++;
  661. case 1:
  662. return ((int)(*pOcc1 - *pOcc2));
  663. default:
  664. // This can only an error, since we knows that max
  665. // can never be > 5
  666. return (0);
  667. }
  668. }
  669. /*************************************************************************
  670. *
  671. * @doc INTERNAL INDEXING
  672. *
  673. * @func HRESULT | AddTopic |
  674. * Inserts a new topic into a nodes topic list or a new occurrence
  675. * if a topic with the same TopicId already exists.
  676. *
  677. * @parm _LPIPB | lpipb |
  678. * Pointer to index parameter block
  679. *
  680. * @parm PSTRDATA | pString |
  681. * Pointer to node structure
  682. *
  683. * @parm LPOCC | lpOcc |
  684. * Pointer occurrence data
  685. *
  686. * @rdesc S_OK, or errors if failed
  687. *
  688. *************************************************************************/
  689. HRESULT NEAR PASCAL AddTopic (_LPIPB lpipb, PSTRDATA pString, LPOCC lpOcc)
  690. {
  691. // Local replacement variables
  692. LPV pDataBlock = lpipb->pDataBlock;
  693. int occf = lpipb->occf;
  694. DWORD dwNewTopicId = lpOcc->dwTopicID;
  695. POCCDATA pOcc;
  696. // Working variables
  697. // int topicCount; // Iterates through current topics
  698. PTOPICDATA pTopic, pPrevTopic;
  699. LPDW lpDw;
  700. int fResult;
  701. /* Set up a new occurrence block */
  702. if (occf & (OCCF_COUNT | OCCF_OFFSET))
  703. {
  704. if ((pOcc = (POCCDATA)BlockGetBlock (pDataBlock,
  705. sizeof(OCCDATA) * lpipb->ucNumOccDataFields)) == NULL)
  706. return (E_OUTOFMEMORY);
  707. lpDw = pOcc->OccData;
  708. if (occf & OCCF_COUNT)
  709. *lpDw++ = lpOcc->dwCount;
  710. if (occf & OCCF_OFFSET)
  711. *lpDw = lpOcc->dwOffset;
  712. pOcc->pNext = NULL;
  713. }
  714. else
  715. pOcc = NULL;
  716. // Check from last point of insertion
  717. pTopic = pString->pLastTopic;
  718. if (pTopic->dwTopicId == dwNewTopicId)
  719. {
  720. append_occ_info:
  721. // Match. We don't have to do anything. That's is the majority
  722. // of the case. Just add the occdata to the end
  723. if (pOcc)
  724. {
  725. pTopic->pLastOccData->pNext = pOcc;
  726. pTopic->pLastOccData = pOcc;
  727. pTopic->dwOccCount++;
  728. }
  729. goto Update;
  730. }
  731. if (pTopic->dwTopicId < dwNewTopicId)
  732. {
  733. // kevynct: scan ahead to insertion point. Usually with sorted lists
  734. // this won't be far at all.
  735. pPrevTopic = pTopic;
  736. if (pTopic->pNext)
  737. {
  738. for (; (fResult = pTopic->dwTopicId - dwNewTopicId) < 0 && pTopic->pNext;
  739. pPrevTopic = pTopic, pTopic = pTopic->pNext)
  740. ; // empty loop!
  741. if (fResult == 0)
  742. {
  743. pString->pLastTopic = pTopic;
  744. goto append_occ_info;
  745. }
  746. }
  747. if ((pTopic = (PTOPICDATA)BlockGetBlock (pDataBlock,
  748. sizeof(TOPICDATA))) == NULL)
  749. return (E_OUTOFMEMORY);
  750. // Set the topic fields data
  751. if (pOcc)
  752. {
  753. pTopic->pLastOccData = pTopic->pOccData = pOcc;
  754. pTopic->dwOccCount = 1;
  755. }
  756. else
  757. {
  758. pTopic->pLastOccData = pTopic->pOccData = NULL;
  759. pTopic->dwOccCount = 0;
  760. }
  761. pTopic->dwTopicId = dwNewTopicId;
  762. insert_middle_or_end:
  763. // Add to middle or end of list
  764. pTopic->pNext = pPrevTopic->pNext;
  765. pPrevTopic->pNext = pTopic;
  766. pString->dwTopicCount++;
  767. pString->pLastTopic = pTopic;
  768. goto Update;
  769. }
  770. // It means that topics are not inserted
  771. // in order. It can only happen if somebody is using the
  772. // indexer for some special, non-topic related index build
  773. // Move to the right node
  774. pPrevTopic = NULL;
  775. for (pTopic = pString->pTopic;
  776. (fResult = pTopic->dwTopicId - dwNewTopicId) < 0 && pTopic->pNext;
  777. pPrevTopic = pTopic, pTopic = pTopic->pNext);
  778. if (fResult == 0)
  779. {
  780. // Match. Just add the occdata to the end
  781. if (pOcc)
  782. {
  783. pTopic->pLastOccData->pNext = pOcc;
  784. pTopic->pLastOccData = pOcc;
  785. pTopic->dwOccCount++;
  786. }
  787. }
  788. else
  789. {
  790. // A new topic node is needed
  791. if ((pTopic = (PTOPICDATA)BlockGetBlock (pDataBlock,
  792. sizeof(TOPICDATA))) == NULL)
  793. return (E_OUTOFMEMORY);
  794. // Set the topic fields data
  795. if (pOcc)
  796. {
  797. pTopic->pLastOccData = pTopic->pOccData = pOcc;
  798. pTopic->dwOccCount = 1;
  799. }
  800. else
  801. {
  802. pTopic->pLastOccData = pTopic->pOccData = NULL;
  803. pTopic->dwOccCount = 0;
  804. }
  805. pTopic->dwTopicId = dwNewTopicId;
  806. // Add to the beginning if empty
  807. if (pPrevTopic == NULL)
  808. {
  809. // Add to the beginning
  810. pTopic->pNext = pString->pTopic;
  811. pString->pTopic = pTopic;
  812. pString->dwTopicCount++;
  813. pString->pLastTopic = pTopic;
  814. goto Update;
  815. }
  816. goto insert_middle_or_end;
  817. }
  818. Update:
  819. // Update statistical information
  820. if (lpipb->dwMaxWCount < lpOcc->dwCount)
  821. lpipb->dwMaxWCount = lpOcc->dwCount;
  822. if (lpipb->dwMaxOffset < lpOcc->dwOffset)
  823. lpipb->dwMaxOffset = lpOcc->dwOffset;
  824. return S_OK;
  825. }
  826. /*************************************************************************
  827. *
  828. * @doc INTERNAL INDEXING
  829. *
  830. * @func int | StrCmp2BytePascal |
  831. * Compares two Pascal style strings against eachother.
  832. * The strings must have a 2 byte length field, *NOT* 1 byte.
  833. *
  834. * @parm LPB | lpStr1 |
  835. * Pointer to string one
  836. *
  837. * @parm LPB | lpStr2 |
  838. * Pointer to string two
  839. *
  840. * @rdesc
  841. * negative value : If pOcc1 is less than pOcc2
  842. * 0 : if pOcc1 is equal to pOcc2
  843. * positive value : If pOcc1 is greater than pOcc2
  844. *
  845. *************************************************************************/
  846. int FAR PASCAL StrCmp2BytePascal(LPB lpStr1, LPB lpStr2)
  847. {
  848. int fRet;
  849. int register len;
  850. // Get the minimum length
  851. if ((fRet = GETWORD ((LPUW)lpStr1) - GETWORD ((LPUW)lpStr2)) > 0)
  852. len = GETWORD ((LPUW)lpStr2);
  853. else
  854. len = GETWORD ((LPUW)lpStr1);
  855. // Skip the lengths */
  856. lpStr1 += sizeof (SHORT);
  857. lpStr2 += sizeof (SHORT);
  858. // Start compare byte per byte */
  859. for (; len > 0; len--, lpStr1++, lpStr2++)
  860. {
  861. if (*lpStr1 != *lpStr2)
  862. break;
  863. }
  864. if (len == 0)
  865. return (fRet);
  866. return (*lpStr1 - *lpStr2);
  867. }
  868. /*************************************************************************
  869. *
  870. * @doc INTERNAL INDEXING
  871. *
  872. * @func HRESULT | FlushTree |
  873. * Flushes the tree to disk.
  874. *
  875. * @parm _LPIPB | lpipb |
  876. * Pointer to index parameter block
  877. *
  878. * @rdesc S_OK, or errors if failed
  879. *
  880. * @comm
  881. * This function holds the output file open until the tree has been
  882. * completely written to disk. The physical offset of the written
  883. * data is stored in the ESB blocks so that the word can be merged
  884. * in the next index phase.
  885. *
  886. *************************************************************************/
  887. PUBLIC HRESULT FAR PASCAL FlushTree(_LPIPB lpipb)
  888. {
  889. // Local replacement variables
  890. LPISI pIsi = &lpipb->isi;
  891. LPESI pEsi = &lpipb->esi;
  892. PBTNODE pBalanceTree = pIsi->pBalanceTree;
  893. ERRB errb;
  894. PHRESULT phr = &errb;
  895. // Local working variables
  896. LPESB pNewEsb;
  897. HRESULT fRet;
  898. // Make sure that the tree actually has nodes
  899. if (pBalanceTree == NULL)
  900. return (S_OK);
  901. // Open output file & clear working variables
  902. if (pIsi->hfpb == NULL)
  903. {
  904. // Allocate output buffer
  905. if ((pIsi->hSortBuffer = _GLOBALALLOC
  906. (DLLGMEM_ZEROINIT, ISBUFFER_SIZE)) == NULL)
  907. return (E_OUTOFMEMORY);
  908. pIsi->pSortBuffer = (LPB)_GLOBALLOCK (pIsi->hSortBuffer);
  909. // Get temp filename & open file
  910. GETTEMPFILENAME ((char)0, (LPB)"iso", (WORD)0, pIsi->aszTempName);
  911. if ((pIsi->hfpb = FileOpen (NULL, pIsi->aszTempName,
  912. REGULAR_FILE, READ_WRITE, phr)) == NULL)
  913. return (*phr);
  914. pIsi->dwRecLength = 0;
  915. pEsi->cesb = 0;
  916. }
  917. // Allocate new ESB structure & set starting values
  918. if ((pNewEsb = GlobalLockedStructMemAlloc (sizeof (ESB))) == NULL)
  919. return (E_OUTOFMEMORY);
  920. // Remember the starting offset
  921. pNewEsb->lfo = pIsi->lfo;
  922. // Reset the current insertion point
  923. pIsi->pCurPtr = pIsi->pSortBuffer;
  924. // Actually ouput the tree data
  925. if ((fRet = TraverseWrite(lpipb, pBalanceTree, 0)) != S_OK)
  926. return (fRet);
  927. // Flush remaining buffer to disk
  928. if ((fRet = WriteBuffer(lpipb, pIsi->pCurPtr)) != S_OK)
  929. return(fRet);
  930. // Set the ESB maximum record length
  931. pNewEsb->dwEsbSize = pIsi->dwMaxEsbRecSize;
  932. pIsi->dwMaxEsbRecSize = 0;
  933. // Store end offset in list
  934. pNewEsb->lfoMax = pIsi->lfo;
  935. // Update the fileoffset
  936. pIsi->lfo = pNewEsb->lfoMax;
  937. if (pEsi->lpesbRoot == NULL)
  938. pNewEsb->lpesbNext = NULL;
  939. else
  940. pNewEsb->lpesbNext = pEsi->lpesbRoot;
  941. pEsi->lpesbRoot = pNewEsb;
  942. pEsi->cesb++;
  943. // Reset tree heap & root node
  944. BlockReset (lpipb->pDataBlock);
  945. BlockReset (lpipb->BTNodeBlock.pBlockMgr);
  946. lpipb->BTNodeBlock.pFreeList =
  947. (PLIST)BlockGetLinkedList(lpipb->BTNodeBlock.pBlockMgr);
  948. BlockReset (lpipb->TopicBlock.pBlockMgr);
  949. lpipb->TopicBlock.pFreeList =
  950. (PLIST)BlockGetLinkedList(lpipb->TopicBlock.pBlockMgr);
  951. BlockReset (lpipb->OccBlock.pBlockMgr);
  952. lpipb->OccBlock.pFreeList =
  953. (PLIST)BlockGetLinkedList(lpipb->OccBlock.pBlockMgr);
  954. pIsi->pBalanceTree = NULL;
  955. return (S_OK);
  956. }
  957. /*************************************************************************
  958. *
  959. * @doc INTERNAL INDEXING
  960. *
  961. * @func HRESULT | WriteBuffer |
  962. * Physically writes buffer to disk. This will write from the beginning
  963. * of the sort buffer to pStartRec. It then copies whatever left
  964. * in the sort buffer back to the beginning of it
  965. *
  966. * @parm _LPIPB | lpipb |
  967. * Pointer to index parameter block
  968. *
  969. * @parm LPB | copyEnd |
  970. * Pointer to the end of the next block of data to copy
  971. *
  972. * @rdesc S_OK or errors
  973. *************************************************************************/
  974. HRESULT NEAR PASCAL WriteBuffer (_LPIPB lpipb, LPB copyEnd)
  975. {
  976. // Local replacement variables
  977. LPISI pIsi = &lpipb->isi;
  978. LPB pSortBuffer;
  979. ERRB errb;
  980. PHRESULT phr = &errb;
  981. DWORD cbWritten; // Number of bytes to write to disk (bytes)
  982. DWORD cbCopied; // Size of extra data to move to buffers front
  983. LPB copyStart;
  984. pSortBuffer = (LPB)pIsi->pSortBuffer;
  985. // Find what should be left in the buffer.
  986. // copyStart will pointer to the beginning of data to be recopied, ie.
  987. // the beginning of a record
  988. // - if pIsi->pStartRec == -1 : there is no beginning of record
  989. // so we have nothing to recopy
  990. // - if pIsi->pStartRec == pSortBuffer, again the whole thing is
  991. // to be written out, and there is nothing to recopy
  992. if ((copyStart = pIsi->pStartRec) == (LPB)-1 || copyStart == pSortBuffer)
  993. copyStart = copyEnd;
  994. if ((cbWritten = (DWORD)(copyStart - pSortBuffer)) == 0)
  995. return(S_OK); // Nothing to copy
  996. cbCopied = (DWORD)(copyEnd - copyStart);
  997. // Update backpatching data
  998. if (pIsi->pStartRec == pSortBuffer)
  999. {
  1000. pIsi->pStartRec = (LPB)-1; // The pointer is invalid
  1001. pIsi->lfoRecBackPatch = pIsi->lfo; // Remember the place for backpatch
  1002. }
  1003. // Write the buffer to disk
  1004. if (cbWritten != (DWORD)
  1005. FileWrite(pIsi->hfpb, pSortBuffer, cbWritten, phr))
  1006. {
  1007. return (*phr);
  1008. }
  1009. pIsi->lfo = FoAddDw (pIsi->lfo, cbWritten);
  1010. // Only copy if extra data exists
  1011. if (cbCopied)
  1012. {
  1013. MEMMOVE(pSortBuffer, copyStart, cbCopied);
  1014. if (pIsi->pStartRec == copyStart)
  1015. pIsi->pStartRec = pSortBuffer;
  1016. }
  1017. // Reset pStartRec & pCurPtr
  1018. pIsi->pCurPtr = pSortBuffer + cbCopied;
  1019. return S_OK;
  1020. }
  1021. /*************************************************************************
  1022. *
  1023. * @doc INTERNAL INDEXING
  1024. *
  1025. * @func HRESULT | TraverseWrite |
  1026. * Copies the node data to the output buffer.
  1027. *
  1028. * @parm _LPIPB | lpipb |
  1029. * Pointer to index parameter block
  1030. *
  1031. * @parm PBTNODE | node |
  1032. * Node to copy to buffer
  1033. *
  1034. * @parm LPB | pBuffer |
  1035. * Buffer to copy node into
  1036. *
  1037. * @parm int | Level |
  1038. * Current tree level (starting with 1)
  1039. *
  1040. * @rdesc S_OK, or errors if failed
  1041. *
  1042. * @comm
  1043. * This is currently a recursive routine. It should probably be
  1044. * changed to be non-recursive to save on speed at run-time.
  1045. *
  1046. *************************************************************************/
  1047. HRESULT NEAR PASCAL TraverseWrite (_LPIPB lpipb, PBTNODE node, int Level)
  1048. {
  1049. // Local replacement pointers
  1050. PSTRDATA pString = &node->StringData;
  1051. LPISI pIsi = &lpipb->isi; // Internal sort information
  1052. LPB pText = pString->pText; // The word string
  1053. POCCDATA pOccData;
  1054. WORD ucNumOccDataFields = lpipb->ucNumOccDataFields;
  1055. PTOPICDATA pTopic = pString->pTopic;
  1056. ERRB errb;
  1057. PHRESULT phr = &errb;
  1058. // Working variables
  1059. DWORD topicLoop, occLoop; // Loop counters
  1060. WORD wLength; // DWORD aligned length of string
  1061. BYTE filledBuffer = 0; // Count if record fills entire buffer
  1062. LPB pBaseBuffer; // Start of entire buffer
  1063. LPB pCurPtr;
  1064. LPB pMaxPtr;
  1065. HRESULT fRet;
  1066. // Keep track of how deep the tree is
  1067. if (Level > pIsi->DeepLevel)
  1068. pIsi->DeepLevel = (BYTE) Level;
  1069. #ifdef _DEBUG
  1070. if (Level >= 65)
  1071. { // This would be a HUGE tree!
  1072. return SetErrCode (phr, E_ASSERT);
  1073. }
  1074. #endif
  1075. // Traverse the left sub tree
  1076. if (node->pLeft != NULL)
  1077. {
  1078. if ((fRet = TraverseWrite(lpipb, node->pLeft, Level + 1)) != S_OK)
  1079. return(fRet);
  1080. }
  1081. /* Initialize */
  1082. pBaseBuffer = (LPB)pIsi->pSortBuffer;
  1083. pMaxPtr = pBaseBuffer + ISBUFFER_SIZE - sizeof(DWORD); // Leave some room
  1084. pCurPtr = pIsi->pCurPtr; // Get starting point
  1085. // Reset the record length field
  1086. pIsi->dwRecLength = 0;
  1087. // Get the Pascal string length
  1088. wLength = GETWORD ((LPUW)pText) + sizeof (SHORT);
  1089. //wLength = (wLength + 3) & (~3);
  1090. // Check for minimum room
  1091. if (pMaxPtr <= pCurPtr + wLength + // String length
  1092. sizeof (DWORD) + // Record length
  1093. sizeof (DWORD) + // FieldId
  1094. sizeof (WORD) + // Word length
  1095. sizeof (DWORD) ) // TopicCount
  1096. {
  1097. if ((fRet = WriteBuffer (lpipb, pCurPtr)) != S_OK)
  1098. return fRet;
  1099. pCurPtr = pIsi->pCurPtr; // Reset insertion point
  1100. }
  1101. // Remember record length position to be backpatched
  1102. pIsi->pStartRec = pCurPtr;
  1103. pCurPtr += sizeof (DWORD);
  1104. MEMCPY(pCurPtr, pText, wLength);
  1105. pCurPtr += wLength; // Add aligned offset
  1106. // Copy the Word Length only if flag is set
  1107. if (lpipb->occf & OCCF_LENGTH)
  1108. pCurPtr += CbBytePack (pCurPtr, pString->dwWordLength);
  1109. // Copy FieldId only if flag is set
  1110. if (lpipb->occf & OCCF_FIELDID)
  1111. pCurPtr += CbBytePack (pCurPtr, pString->dwField);
  1112. // Topic Count
  1113. if (lpipb->occf & OCCF_TOPICID)
  1114. pCurPtr += CbBytePack (pCurPtr, pString->dwTopicCount);
  1115. else
  1116. pString->dwTopicCount = 0;
  1117. // Add in all topics
  1118. for (topicLoop = pString->dwTopicCount; topicLoop > 0; --topicLoop)
  1119. {
  1120. // Check buffer overflow
  1121. if (pMaxPtr <= pCurPtr + sizeof (DWORD) // TopicId
  1122. + sizeof (DWORD)) // Occurrence count
  1123. {
  1124. pIsi->dwRecLength += (DWORD)(pCurPtr - pIsi->pCurPtr);
  1125. if ((fRet = WriteBuffer (lpipb, pCurPtr)) != S_OK)
  1126. return fRet;
  1127. pCurPtr = pIsi->pCurPtr; // Reset insertion point
  1128. }
  1129. pCurPtr += CbBytePack (pCurPtr, pTopic->dwTopicId);
  1130. if (occLoop = pTopic->dwOccCount)
  1131. {
  1132. pCurPtr += CbBytePack (pCurPtr, pTopic->dwOccCount);
  1133. pOccData = pTopic->pOccData;
  1134. // Add in all occurrence data
  1135. for (occLoop = pTopic->dwOccCount; occLoop > 0; --occLoop)
  1136. {
  1137. LPDW lpDw;
  1138. // Check buffer overflow
  1139. if (pMaxPtr <= pCurPtr + MAX_OCCDATA * sizeof (DWORD))
  1140. {
  1141. pIsi->dwRecLength += (DWORD)(pCurPtr - pIsi->pCurPtr);
  1142. if ((fRet = WriteBuffer (lpipb, pCurPtr)) != S_OK)
  1143. return fRet;
  1144. pCurPtr = pIsi->pCurPtr; // Reset insertion point
  1145. }
  1146. lpDw = (LPDW)pOccData->OccData;
  1147. switch (ucNumOccDataFields)
  1148. {
  1149. case 5:
  1150. pCurPtr += CbBytePack (pCurPtr, *lpDw++);
  1151. case 4:
  1152. pCurPtr += CbBytePack (pCurPtr, *lpDw++);
  1153. case 3:
  1154. pCurPtr += CbBytePack (pCurPtr, *lpDw++);
  1155. case 2:
  1156. pCurPtr += CbBytePack (pCurPtr, *lpDw++);
  1157. case 1:
  1158. pCurPtr += CbBytePack (pCurPtr, *lpDw++);
  1159. }
  1160. pOccData = pOccData->pNext;
  1161. }
  1162. }
  1163. pTopic = pTopic->pNext;
  1164. }
  1165. // Update the record length
  1166. pIsi->dwRecLength += (DWORD)(pCurPtr - pIsi->pCurPtr);
  1167. // Keep track of the maximum record size for merging.
  1168. // - 1 for the current ESB. This helps speeding up the merging sequence
  1169. // since we don't have to worry about a record being split
  1170. if (pIsi->dwRecLength > pIsi->dwMaxEsbRecSize)
  1171. pIsi->dwMaxEsbRecSize = pIsi->dwRecLength;
  1172. // Set record length
  1173. if (pIsi->pStartRec != (LPB)-1)
  1174. {
  1175. // Everything is still in memory
  1176. *(LPUL)pIsi->pStartRec = pIsi->dwRecLength;
  1177. }
  1178. else
  1179. {
  1180. // We have to do backpatching
  1181. if (sizeof (DWORD) != FileSeekWrite (pIsi->hfpb, &pIsi->dwRecLength,
  1182. pIsi->lfoRecBackPatch, sizeof (DWORD), phr))
  1183. return *phr;
  1184. FileSeek (lpipb->isi.hfpb, pIsi->lfo, 0, phr);
  1185. }
  1186. // Update the current insertion point, and prepare for the next record
  1187. pIsi->pStartRec = pIsi->pCurPtr = pCurPtr;
  1188. if (node->pRight != NULL)
  1189. return TraverseWrite(lpipb, node->pRight, Level + 1);
  1190. return(S_OK);
  1191. }
  1192. /*************************************************************************
  1193. *
  1194. * @doc INTERNAL INDEXING
  1195. *
  1196. * @func VOID NEAR PASCAL | BalanceTree |
  1197. * Balances the tree using a Red/Black algorithm.
  1198. *
  1199. * @parm LPISI | pIsi |
  1200. * Pointer to Internal sort data
  1201. *
  1202. * @parm PBTNODE | node |
  1203. * Pointer to the node that was just inserted
  1204. *
  1205. * @comm
  1206. * This routine must be called after EVERY new node is inserted in
  1207. * the tree to maintain proper balance.
  1208. * A Red/Black tree must maintain the following conditions:
  1209. * Every node is colored either red or black
  1210. * Every leaf node must be black
  1211. * If a node is red, then both of its children must be black
  1212. * Every path from the root to a leaf must contain the same
  1213. * number of black nodes
  1214. *
  1215. *************************************************************************/
  1216. void NEAR PASCAL BalanceTree(LPISI pIsi, PBTNODE node)
  1217. {
  1218. PBTNODE y;
  1219. PBTNODE pParentNode;
  1220. node->color = RED;
  1221. while (node != pIsi->pBalanceTree && node->pParent->color == RED)
  1222. {
  1223. pParentNode = node->pParent;
  1224. if (pParentNode != NULL && pParentNode->pParent != NULL &&
  1225. pParentNode == pParentNode->pParent->pLeft)
  1226. {
  1227. y = pParentNode->pParent->pRight;
  1228. if (y != NULL && y->color == RED)
  1229. {
  1230. pParentNode->color = BLACK;
  1231. y->color = BLACK;
  1232. pParentNode->pParent->color = RED;
  1233. node = pParentNode->pParent;
  1234. pParentNode = node->pParent;
  1235. }
  1236. else
  1237. {
  1238. if (node == pParentNode->pRight)
  1239. {
  1240. node = pParentNode;
  1241. // LeftRotate change the parent node
  1242. LeftRotate(pIsi, node);
  1243. pParentNode = node->pParent;
  1244. }
  1245. pParentNode->color = BLACK;
  1246. pParentNode->pParent->color = RED;
  1247. // RightRotate change the parent node
  1248. RightRotate(pIsi, pParentNode);
  1249. pParentNode = node->pParent;
  1250. }
  1251. }
  1252. else
  1253. {
  1254. if (pParentNode != NULL && pParentNode->pParent != NULL)
  1255. y = pParentNode->pParent->pLeft;
  1256. else
  1257. y = NULL;
  1258. if (y != NULL && y->color == RED)
  1259. {
  1260. pParentNode->color = BLACK;
  1261. y->color = BLACK;
  1262. pParentNode->pParent->color = RED;
  1263. node = pParentNode->pParent;
  1264. pParentNode = node->pParent;
  1265. }
  1266. else
  1267. {
  1268. if (node == pParentNode->pLeft)
  1269. {
  1270. // RightRotate change the parent node
  1271. RightRotate(pIsi, node);
  1272. node->color = BLACK;
  1273. node = node->pRight;
  1274. pParentNode = node->pParent;
  1275. }
  1276. pParentNode->color = BLACK;
  1277. pParentNode->pParent->color = RED;
  1278. // LeftRotste change the parent node
  1279. LeftRotate(pIsi, pParentNode->pParent);
  1280. pParentNode = node->pParent;
  1281. }
  1282. }
  1283. }
  1284. pIsi->pBalanceTree->color = BLACK;
  1285. }
  1286. /*************************************************************************
  1287. *
  1288. * @doc INTERNAL INDEXING
  1289. *
  1290. * @func VOID NEAR PASCAL | LeftRotate |
  1291. * Rotates two nodes in the tree.
  1292. *
  1293. * @parm _LPIPB | lpipb |
  1294. * Pointer to index parameter block
  1295. *
  1296. * @parm PBTNODE | node |
  1297. * The X node to process (see notes)
  1298. *
  1299. * @comm
  1300. *
  1301. * X Y
  1302. * / \ / \
  1303. * a Y ---> X c
  1304. * / \ / \
  1305. * b c a b
  1306. *************************************************************************/
  1307. void NEAR PASCAL LeftRotate(LPISI pIsi, PBTNODE node)
  1308. {
  1309. PBTNODE y = node->pRight;
  1310. node->pRight = y->pLeft;
  1311. if (y->pLeft != NULL)
  1312. y->pLeft->pParent = node;
  1313. y->pParent = node->pParent;
  1314. if (y->pParent == NULL)
  1315. pIsi->pBalanceTree = y;
  1316. else
  1317. {
  1318. if (node == node->pParent->pLeft)
  1319. node->pParent->pLeft = y;
  1320. else
  1321. node->pParent->pRight = y;
  1322. }
  1323. y->pLeft = node;
  1324. node->pParent = y;
  1325. }
  1326. /*************************************************************************
  1327. *
  1328. * @doc INTERNAL INDEXING
  1329. *
  1330. * @func VOID NEAR PASCAL | RightRotate |
  1331. * Rotates two nodes in the tree.
  1332. *
  1333. * @parm _LPIPB | lpipb |
  1334. * Pointer to index parameter block
  1335. *
  1336. * @parm PBTNODE | node |
  1337. * The X node to process (see notes)
  1338. *
  1339. * @comm
  1340. *
  1341. * Y X
  1342. * / \ / \
  1343. * X c ---> a Y
  1344. * / \ / \
  1345. * a b b c
  1346. *************************************************************************/
  1347. void NEAR PASCAL RightRotate(LPISI pIsi, PBTNODE node)
  1348. {
  1349. PBTNODE y = node->pParent;
  1350. y->pLeft = node->pRight;
  1351. if (y->pLeft != NULL)
  1352. y->pLeft->pParent = y;
  1353. node->pParent = y->pParent;
  1354. if (node->pParent == NULL)
  1355. pIsi->pBalanceTree = node;
  1356. else
  1357. {
  1358. if (y == node->pParent->pLeft)
  1359. node->pParent->pLeft = node;
  1360. else
  1361. node->pParent->pRight = node;
  1362. }
  1363. node->pRight = y;
  1364. y->pParent = node;
  1365. }
  1366. /************************************************************************
  1367. * @doc PRIVATE
  1368. * @func HRESULT PASCAL NEAR | IndexBlockAllocate |
  1369. * Set the memory allocation based on the memory of the machine
  1370. * @parm DWORD | dwmemSize |
  1371. * Memory allocated for the indexer
  1372. * @rdesc S_OK, or E_OUTOFMEMORY
  1373. ************************************************************************/
  1374. PRIVATE HRESULT PASCAL NEAR IndexBlockAllocate (_LPIPB lpipb, LONG lMemSize)
  1375. {
  1376. if ((lpipb->pDataBlock = BlockInitiate (MAX_BLOCK_SIZE, 0,
  1377. (WORD)(lMemSize/MAX_BLOCK_SIZE), USE_VIRTUAL_MEMORY)) == NULL)
  1378. return(E_OUTOFMEMORY);
  1379. return(S_OK);
  1380. }
  1381. #ifdef _DEBUGREDBLACK
  1382. /*
  1383. * @comm
  1384. * This routine must be called after EVERY new node is inserted in
  1385. * the tree to maintain proper balance.
  1386. * A Red/Black tree must maintain the following conditions:
  1387. * Every node is colored either red or black
  1388. * Every leaf node must be black
  1389. * If a node is red, then both of its children must be black
  1390. * Every path from the root to a leaf must contain the same
  1391. * number of black nodes
  1392. */
  1393. void PreOrdTrav (PBTNODE pNode, int iLevel, char cChildType)
  1394. {
  1395. if (pNode == NULL)
  1396. {
  1397. OutputDebugString ("*\n");
  1398. return;
  1399. }
  1400. _DPF4 ("Chl: %c Col: %c Lev: %d\n", cChildType,
  1401. pNode->color == RED ? 'R' : 'B', iLevel);
  1402. iLevel++;
  1403. PreOrdTrav (pNode->pLeft, iLevel, 'L');
  1404. PreOrdTrav (pNode->pRight, iLevel, 'R');
  1405. }
  1406. void NEAR PASCAL VerifyTree (PBTNODE pRoot)
  1407. {
  1408. PreOrdTrav (pRoot, 0, 'R');
  1409. OutputDebugString ("End Tree\n");
  1410. }
  1411. #endif /* _DEBUG */