Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

768 lines
17 KiB

4 years ago
  1. /***********************************************************************
  2. * Microsoft (R) 32-Bit Incremental Linker
  3. *
  4. * Copyright (C) Microsoft Corp 1992-95. All rights reserved.
  5. *
  6. * File: hash.cpp
  7. *
  8. * File Comments:
  9. *
  10. * Generic dynamic hash tables implemented on top of dynamic arrays
  11. *
  12. ***********************************************************************/
  13. #include "link.h"
  14. static VOID Expand_HT(IN PHT, IN PVOID);
  15. VOID
  16. SetStatus_HT(
  17. PHT pht,
  18. WORD flags)
  19. /*++
  20. Routine Description:
  21. Set the hash table status
  22. Arguments:
  23. pht - hash table structure
  24. flags - hash table status flags.
  25. Return Value:
  26. new flags
  27. --*/
  28. {
  29. assert(pht);
  30. pht->flags = flags;
  31. }
  32. WORD
  33. GetStatus_HT(
  34. PHT pht)
  35. /*++
  36. Routine Description:
  37. Get the hash table status
  38. Arguments:
  39. pht - hash table structure
  40. Return Value:
  41. new flags
  42. --*/
  43. {
  44. assert(pht);
  45. return(pht->flags);
  46. }
  47. VOID Init_HT(
  48. OUT PPHT ppht,
  49. DWORD celementInChunk,
  50. DWORD cchunkInDir,
  51. const char *(*SzFromPv)(PVOID, PVOID),
  52. WORD flags)
  53. /*++
  54. Routine Description:
  55. Initialize the hash table.
  56. Arguments:
  57. ppht - pointer to a pointer to the hash table
  58. celementsInChunk - number of elements in a dynamic array chunk
  59. cchunksInDir - number of chunks in a dynamic array directory
  60. Return Value:
  61. none
  62. --*/
  63. {
  64. assert(ppht);
  65. *ppht = (PHT) Calloc(1, sizeof(HT));
  66. // the initial number of buckets is arbitrary and can be tuned
  67. (*ppht)->cbuckets = celementInChunk;
  68. (*ppht)->iNextToSplitMax = celementInChunk;
  69. (*ppht)->iNextToSplit = 0L;
  70. (*ppht)->cExpands = 0L;
  71. (*ppht)->pstateStack = NULL;
  72. (*ppht)->celementInChunk = celementInChunk;
  73. (*ppht)->cchunkInDir = cchunkInDir;
  74. (*ppht)->SzFromPv = SzFromPv;
  75. // set the hash table status to not full and inserts allowed
  76. (*ppht)->flags = 0;
  77. (*ppht)->flags |= flags;
  78. // allocate a directory
  79. (*ppht)->rgpchunk = (PCHUNK *) Calloc(cchunkInDir, sizeof(PCHUNK));
  80. // allocate a chunk
  81. (*ppht)->rgpchunk[0] = (PCHUNK) Calloc(1, sizeof(CHUNK));
  82. // allocate elements for a chunk
  83. (*ppht)->rgpchunk[0]->rgpelement =
  84. (PELEMENT *) Calloc(celementInChunk, sizeof(PELEMENT));
  85. }
  86. VOID
  87. Free_HT (
  88. IN OUT PPHT ppht
  89. )
  90. /*++
  91. Routine Description:
  92. Free's up the hash table.
  93. Arguments:
  94. ppht - pointer to a pointer to the hash table
  95. Return Value:
  96. none
  97. --*/
  98. {
  99. PELEMENT pelement, pelementNext;
  100. DWORD ibucket;
  101. DWORD iChunk, iChunkOld;
  102. PHT pht;
  103. assert(ppht);
  104. assert(*ppht);
  105. pht = *ppht;
  106. // free all the elements & the array of ptrs to elements
  107. iChunkOld = 0;
  108. for (ibucket = 0; ibucket < pht->cbuckets; ibucket++) {
  109. assert(ibucket / pht->celementInChunk < pht->cchunkInDir);
  110. iChunk = ibucket / pht->celementInChunk;
  111. pelement = pht->rgpchunk[iChunk]->
  112. rgpelement[ibucket % pht->celementInChunk];
  113. // free all elements in this bucket
  114. while (pelement) {
  115. pelementNext = pelement->pelementNext;
  116. // TEMPORARY: elements are allocated from permanent heap (and not individually)
  117. // free(pelement);
  118. pelement = pelementNext;
  119. }
  120. // free the array of ptrs to elements of chunk (previous) & chunk itself
  121. if (iChunk > iChunkOld) {
  122. // UNDONE: This memory isn't safe to free. It is allocated by
  123. // UNDONE: Calloc().
  124. free(pht->rgpchunk[iChunkOld]->rgpelement);
  125. free(pht->rgpchunk[iChunkOld]);
  126. iChunkOld++;
  127. }
  128. }
  129. // UNDONE: This memory isn't safe to free. It is allocated by
  130. // UNDONE: Calloc().
  131. // handle last chunk
  132. free(pht->rgpchunk[iChunkOld]->rgpelement);
  133. free(pht->rgpchunk[iChunkOld]);
  134. // free the hash table directory
  135. free(pht->rgpchunk);
  136. // free the hash table struct itself
  137. free(pht);
  138. // done
  139. *ppht = NULL;
  140. }
  141. __inline DWORD UlHash_HT(
  142. const char *Name,
  143. PHT pht)
  144. /*++
  145. Routine Description:
  146. Hash a name and return an unsigned long reflecting the name.
  147. Arguments:
  148. Name - pointer to symbol name to hash
  149. ppht - pointer to a pointer to the hash table to hash into
  150. Return Value:
  151. hash value
  152. --*/
  153. {
  154. DWORD ulHash;
  155. DWORD ulK;
  156. DWORD ulAddress;
  157. CONST DWORD ulPrime = 1048583; // magic prime constant, see header
  158. const BYTE *pb;
  159. assert(Name);
  160. assert(pht);
  161. // hash function, this can be changed to tweak performance
  162. for(pb = (BYTE *) Name, ulHash = 0; *pb;) {
  163. ulHash = (ulHash << 2) + *pb++;
  164. if ((ulK = ulHash & 0xc000) != 0) {
  165. ulHash ^= (ulK >> 11);
  166. ulHash ^= ulK;
  167. }
  168. ulHash ^= (ulHash << 5) + (ulHash >> 3);
  169. }
  170. ulHash %= ulPrime;
  171. // account for possible grown hash table
  172. ulAddress = ulHash % pht->iNextToSplitMax;
  173. if (ulAddress < pht->iNextToSplit) {
  174. ulAddress = ulHash % (pht->iNextToSplitMax * 2);
  175. }
  176. return (ulAddress);
  177. }
  178. PELEMENT PelementLookup_HT(
  179. const char *Name,
  180. HT *pht,
  181. BOOL fAllocNew,
  182. PVOID pvBlk,
  183. PBOOL pfNew)
  184. /*++
  185. Routine Description:
  186. Lookup Name in the symbol table and return its record. If Name is not
  187. found and fAllocNew == 1 allocate a new ELEMENT and blast it in, otherwise
  188. return NULL;
  189. Arguments:
  190. Name - pointer to symbol name to hash
  191. pht - pointer to a pointer to the hash table to hash into
  192. fAllocNew - allocate a new element if fAllocNew == 1 and the element does
  193. does not already exist
  194. pfNew - *pfNew set to !0 iff Name was not found
  195. - set on entry of desired
  196. Return Value:
  197. pointer to the generic contents of a hashtable element
  198. --*/
  199. {
  200. ELEMENT **ppelementFirst;
  201. ELEMENT *pelement;
  202. DWORD ulAddress;
  203. DWORD iDirectory;
  204. DWORD iChunk;
  205. DWORD ulLoad;
  206. const char *sz;
  207. assert(pht);
  208. assert(Name);
  209. // calculate the load or average chain length
  210. assert(pht->cbuckets);
  211. ulLoad = (pht->celements << 4) / pht->cbuckets;
  212. // If the load is greater than an arbitrary threshold, grow the
  213. // table. 1 is an arbitrary constant and can be adjusted.
  214. // This MUST be done before the new element is put into the table since
  215. // this would put an element without a pv pointer. If Expand_HT() compares
  216. // an elements contents based on the method pht->SzFromPv we will assert.
  217. if (ulLoad > 48) {
  218. Expand_HT(pht, pvBlk);
  219. }
  220. // get bucket
  221. ulAddress = UlHash_HT(Name, pht);
  222. iDirectory = ulAddress / pht->celementInChunk;
  223. iChunk = ulAddress % pht->celementInChunk;
  224. assert(iDirectory < pht->cchunkInDir);
  225. assert(iChunk < pht->celementInChunk);
  226. ppelementFirst = &(pht->rgpchunk[iDirectory]->rgpelement[iChunk]);
  227. assert(ppelementFirst);
  228. // search the buckets
  229. pelement = *ppelementFirst;
  230. while (pelement) {
  231. sz = pht->SzFromPv(pelement->pv, pvBlk);
  232. assert(sz != NULL);
  233. if (!strcmp(Name, sz)) {
  234. // found it
  235. return pelement;
  236. }
  237. pelement = pelement->pelementNext;
  238. }
  239. if (!fAllocNew) {
  240. // return without allocating new element
  241. return (NULL);
  242. }
  243. assert(!(GetStatus_HT(pht) & HT_InsertsNotAllowed));
  244. // set to new element
  245. *pfNew = 1;
  246. // element doesn't exist, so blast it in
  247. pelement = fINCR ? (ELEMENT *) Malloc(sizeof(ELEMENT)) :
  248. (ELEMENT *) ALLOC_PERM(sizeof(ELEMENT));
  249. assert(pelement);
  250. memset(pelement, 0, sizeof(ELEMENT));
  251. pelement->pelementNext = *ppelementFirst;
  252. (*ppelementFirst) = pelement;
  253. pht->celements++;
  254. return (pelement);
  255. }
  256. static VOID
  257. Expand_HT(
  258. PHT pht,
  259. PVOID pvBlk
  260. )
  261. /*++
  262. Routine Description:
  263. Expand the hash table if possible. The only thing that would hamper
  264. the address space of the hash table from being expanded is if the
  265. underlying dynamic array structure is full.
  266. Arguments:
  267. pht - pointer to the hash table to expand
  268. Return Value:
  269. none
  270. --*/
  271. {
  272. DWORD iNewAddress;
  273. DWORD iOldChunk;
  274. DWORD iNewChunk;
  275. ELEMENT *pelementCur;
  276. ELEMENT *pelementPrev;
  277. ELEMENT *pelementLastOfNew;
  278. CHUNK *pchunkOld;
  279. CHUNK *pchunkNew;
  280. assert(pht);
  281. assert(!(GetStatus_HT(pht) & HT_Full));
  282. // see if we have reached the maximum size of the table
  283. if (!((pht->iNextToSplit + pht->iNextToSplitMax) <
  284. (pht->cchunkInDir * pht->celementInChunk))) {
  285. SetStatus_HT(pht, (WORD)(GetStatus_HT(pht) | HT_Full));
  286. return;
  287. }
  288. pht->cExpands++;
  289. // locate the bucket to be split
  290. assert(pht->cchunkInDir);
  291. assert(pht->celementInChunk);
  292. assert((pht->iNextToSplit / pht->celementInChunk) < pht->cchunkInDir);
  293. pchunkOld = pht->rgpchunk[pht->iNextToSplit / pht->celementInChunk];
  294. assert(pchunkOld);
  295. iOldChunk = pht->iNextToSplit % pht->celementInChunk;
  296. // expand the address space and if necessary allocate a new chunk
  297. iNewAddress = pht->iNextToSplitMax + pht->iNextToSplit;
  298. assert(pht->rgpchunk);
  299. if (iNewAddress % pht->celementInChunk == 0) {
  300. assert((iNewAddress / pht->celementInChunk) < pht->cchunkInDir);
  301. pht->rgpchunk[iNewAddress / pht->celementInChunk] =
  302. (PCHUNK) Calloc(1, sizeof(CHUNK));
  303. pht->rgpchunk[iNewAddress / pht->celementInChunk]->rgpelement =
  304. (PELEMENT *) Calloc(pht->celementInChunk, sizeof(PELEMENT));
  305. }
  306. assert((iNewAddress / pht->celementInChunk) < pht->cchunkInDir);
  307. pchunkNew = pht->rgpchunk[iNewAddress / pht->celementInChunk];
  308. assert(pchunkNew);
  309. iNewChunk = iNewAddress % pht->celementInChunk;
  310. // adjust the state variables
  311. pht->iNextToSplit++;
  312. if (pht->iNextToSplit == pht->iNextToSplitMax) {
  313. pht->iNextToSplitMax *= 2;
  314. pht->iNextToSplit = 0;
  315. }
  316. pht->cbuckets++;
  317. // relocate records to the new bucket
  318. assert(iOldChunk < pht->celementInChunk);
  319. pelementCur = pchunkOld->rgpelement[iOldChunk];
  320. pelementPrev = NULL;
  321. pelementLastOfNew = NULL;
  322. assert(pchunkNew->rgpelement);
  323. assert(iNewChunk < pht->celementInChunk);
  324. pchunkNew->rgpelement[iNewChunk] = NULL;
  325. while (pelementCur) {
  326. assert(pelementCur);
  327. if (UlHash_HT(pht->SzFromPv(pelementCur->pv, pvBlk), pht) == iNewAddress) {
  328. if (pelementLastOfNew == NULL) {
  329. assert(iNewChunk < pht->celementInChunk);
  330. assert(pchunkNew);
  331. pchunkNew->rgpelement[iNewChunk] = pelementCur;
  332. } else {
  333. assert(pelementLastOfNew);
  334. pelementLastOfNew->pelementNext = pelementCur;
  335. }
  336. if (pelementPrev == NULL) {
  337. assert(iOldChunk < pht->celementInChunk);
  338. assert(pchunkOld);
  339. assert(pelementCur);
  340. assert(pchunkOld->rgpelement);
  341. pchunkOld->rgpelement[iOldChunk] = pelementCur->pelementNext;
  342. } else {
  343. assert(pelementPrev);
  344. assert(pelementCur);
  345. pelementPrev->pelementNext = pelementCur->pelementNext;
  346. }
  347. pelementLastOfNew = pelementCur;
  348. pelementCur = pelementCur->pelementNext;
  349. pelementLastOfNew->pelementNext = NULL;
  350. } else {
  351. pelementPrev = pelementCur;
  352. pelementCur = pelementCur->pelementNext;
  353. }
  354. }
  355. }
  356. #if DBG
  357. VOID
  358. Statistics_HT(
  359. HT *pht)
  360. /*++
  361. Routine Description:
  362. Dump statistics of the hashtable use to stdout. This is a debug
  363. routine.
  364. Arguments:
  365. pht - pointer to the hash table to dump statistics on
  366. Return Value:
  367. none
  368. --*/
  369. {
  370. DWORD rgulChainCounts[] = {0L, 0L, 0L, 0L, 0L, 0L, 0L};
  371. DWORD ulChainMax = 0;
  372. DWORD ulThis;
  373. DWORD iChunk;
  374. DWORD iDir;
  375. DWORD i;
  376. ELEMENT *pelement;
  377. CHUNK *pchunk;
  378. for (i = 0; i < pht->cbuckets; i++) {
  379. iDir = i / pht->celementInChunk;
  380. iChunk = i % pht->celementInChunk;
  381. pchunk = pht->rgpchunk[0];
  382. pelement = pchunk->rgpelement[iChunk];
  383. ulThis = 0;
  384. while (pelement) {
  385. ulThis++;
  386. pelement = pelement->pelementNext;
  387. }
  388. if (ulThis < 6) {
  389. rgulChainCounts[ulThis]++;
  390. } else {
  391. rgulChainCounts[6]++;
  392. }
  393. if (ulThis > ulChainMax) {
  394. ulChainMax = ulThis;
  395. }
  396. }
  397. DBPRINT("\nHash Table Statistics\n");
  398. DBPRINT("---------------------\n");
  399. DBPRINT("celementsInChunk .... %lu\n", pht->celementInChunk);
  400. DBPRINT("cchunksInDir ........ %lu\n", pht->cchunkInDir);
  401. DBPRINT("# elements .......... %lu\n", pht->celements);
  402. DBPRINT("# buckets ........... %lu\n", pht->cbuckets);
  403. assert(pht->cbuckets);
  404. DBPRINT("load ................ %f\n",
  405. (float) pht->celements / (float) pht->cbuckets);
  406. DBPRINT("# table expands ..... %lu\n", pht->cExpands);
  407. DBPRINT("maximum bucket size . %lu\n", ulChainMax);
  408. DBPRINT("flags................ %u\n", pht->flags);
  409. DBPRINT("# buckets of size 0 . %lu\n", rgulChainCounts[0]);
  410. DBPRINT("# buckets of size 1 . %lu\n", rgulChainCounts[1]);
  411. DBPRINT("# buckets of size 2 . %lu\n", rgulChainCounts[2]);
  412. DBPRINT("# buckets of size 3 . %lu\n", rgulChainCounts[3]);
  413. DBPRINT("# buckets of size 4 . %lu\n", rgulChainCounts[4]);
  414. DBPRINT("# buckets of size 5 . %lu\n", rgulChainCounts[5]);
  415. DBPRINT("# buckets over 5 .... %lu\n\n", rgulChainCounts[6]);
  416. fflush(stdout);
  417. }
  418. #endif // DBG
  419. VOID
  420. InitEnumeration_HT(
  421. PHT pht)
  422. /*++
  423. Routine Description:
  424. Initialize the enumeration of a hashtable.
  425. Arguments:
  426. pht - pointer to the hash table to enumerate
  427. Return Value:
  428. none
  429. --*/
  430. {
  431. PSTATE pstate;
  432. pstate = pht->pstateStack;
  433. pht->pstateStack = (PSTATE) PvAllocZ(sizeof(STATE));
  434. pht->pstateStack->pstateNext = pstate;
  435. pht->pstateStack->iLast = 0L;
  436. pht->pstateStack->cFound = 0L;
  437. pht->pstateStack->pelementLast = NULL;
  438. }
  439. PELEMENT
  440. PelementEnumerateNext_HT(
  441. PHT pht)
  442. /*++
  443. Routine Description:
  444. Get the next element in the enumeration of a hash table.
  445. Arguments:
  446. pht - pointer to the hash table to enumerate
  447. Return Value:
  448. none
  449. --*/
  450. {
  451. #define iLastS (pht->pstateStack->iLast)
  452. #define pelementLastS (pht->pstateStack->pelementLast)
  453. #define cFoundS (pht->pstateStack->cFound)
  454. ELEMENT *pelement;
  455. assert(pht);
  456. assert(pht->pstateStack);
  457. if (cFoundS >= pht->celements) {
  458. // we completed the enumeration
  459. return NULL;
  460. }
  461. // check the next element in the bucket
  462. pelement = pelementLastS;
  463. if (pelement) {
  464. // there was someone in the bucket, got it
  465. pelementLastS = pelement->pelementNext;
  466. cFoundS++;
  467. } else {
  468. // there wasn't anyone in the bucket, find another bucket
  469. for (;;) {
  470. if (iLastS >= pht->cbuckets) {
  471. // there are no more buckets to enumerate
  472. return NULL;
  473. }
  474. // calculate the next bucket
  475. assert(pht->rgpchunk[iLastS / pht->celementInChunk]);
  476. pelement = pht->rgpchunk[iLastS / pht->celementInChunk]->
  477. rgpelement[iLastS % pht->celementInChunk];
  478. // increment to the next bucket
  479. iLastS++;
  480. if (pelement) {
  481. // found a bucket with elements in it, got it
  482. cFoundS++;
  483. pelementLastS = pelement->pelementNext;
  484. break;
  485. }
  486. }
  487. }
  488. return (pelement);
  489. #undef iLastS
  490. #undef pelementLastS
  491. #undef cFoundS
  492. }
  493. VOID
  494. TerminateEnumerate_HT(
  495. PHT pht)
  496. /*++
  497. Routine Description:
  498. Terminate and enumeration and free up a state.
  499. Arguments:
  500. pht - hast table
  501. Return Value:
  502. none
  503. --*/
  504. {
  505. PSTATE pstate;
  506. assert(pht);
  507. pstate = pht->pstateStack;
  508. assert(pstate);
  509. pht->pstateStack = pstate->pstateNext;
  510. FreePv(pstate);
  511. }
  512. DWORD
  513. Celement_HT(
  514. PHT pht)
  515. /*++
  516. Routine Description:
  517. Return the number of elements in a hash table.
  518. Arguments:
  519. pht - hast table
  520. Return Value:
  521. 0 if hash table is non-empty, !0 otherwise
  522. --*/
  523. {
  524. assert(pht);
  525. return (pht->celements);
  526. }
  527. #if DBG
  528. VOID
  529. Dump_HT(
  530. PHT pht,
  531. PVOID pvBlk)
  532. /*++
  533. Routine Description:
  534. Dump a hash table to standard out. This is a debug routine.
  535. Arguments:
  536. pht - hast table
  537. Return Value:
  538. None.
  539. --*/
  540. {
  541. PELEMENT pelement;
  542. DWORD ibucket;
  543. assert(pht);
  544. DBPRINT("beginning dump of hash table\n");
  545. DBPRINT("----------------------------\n");
  546. for (ibucket = 0; ibucket < pht->cbuckets; ibucket++) {
  547. assert(ibucket / pht->celementInChunk < pht->cchunkInDir);
  548. pelement = pht->rgpchunk[ibucket / pht->celementInChunk]->
  549. rgpelement[ibucket % pht->celementInChunk];
  550. DBPRINT("bucket = %u\n", ibucket);
  551. while (pelement) {
  552. DBPRINT(" %s\n", pht->SzFromPv(pelement->pv, pvBlk));
  553. pelement = pelement->pelementNext;
  554. }
  555. }
  556. DBPRINT("-------------------------\n");
  557. DBPRINT("ending dump of hash table\n\n");
  558. }
  559. #endif // DBG