Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

524 lines
15 KiB

  1. //+-------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1994 - 2000.
  5. //
  6. // File: colhash.cxx
  7. //
  8. // Contents: Hash table compressions for large tables.
  9. //
  10. // Classes: CCompressedColHash
  11. //
  12. // Functions: GuidHash - Hash function for GUIDs
  13. //
  14. // History: 13 Apr 1994 AlanW Created
  15. //
  16. //--------------------------------------------------------------------------
  17. #include "pch.cxx"
  18. #pragma hdrstop
  19. #include <objcur.hxx>
  20. #include <tblvarnt.hxx>
  21. #include "tabledbg.hxx"
  22. #include "colcompr.hxx"
  23. const USHORT MAX_HASH_TABLE_SIZE = 32767; // Maximum hash table size
  24. //+-------------------------------------------------------------------------
  25. //
  26. // Function: GuidHash, public
  27. //
  28. // Synopsis: Hash a GUID value for use in a hash table.
  29. //
  30. // Arguments: [pbData] - pointer to the value to be hashed.
  31. // [cbData] - should be sizeof (GUID), unused
  32. //
  33. // Returns: ULONG - Hash value for the input GUID
  34. //
  35. // Notes: The hash function just xors a few selected fields out
  36. // of the GUID structure. It is intended to work well for
  37. // both generated GUIDs (from UuidCreate) and administratively
  38. // assigned GUIDs like OLE IIDs and CLSIDs.
  39. //
  40. //--------------------------------------------------------------------------
  41. ULONG GuidHash(
  42. BYTE *pbData,
  43. USHORT cbData
  44. ) {
  45. UNALIGNED GUID *pGuid = (GUID *)pbData;
  46. return (pGuid->Data1 ^
  47. (pGuid->Data4[0]<<16) ^
  48. (pGuid->Data4[6]<<8) ^
  49. (pGuid->Data4[7]));
  50. }
  51. //+-------------------------------------------------------------------------
  52. //
  53. // Method: CCompressedColHash::DefaultHash, public static
  54. //
  55. // Synopsis: Generic hash function
  56. //
  57. // Arguments: [pbData] - pointer to the value to be hashed.
  58. // [cbData] - size of pbData
  59. //
  60. // Returns: ULONG - Hash value for the input data
  61. //
  62. //--------------------------------------------------------------------------
  63. //static
  64. ULONG CCompressedColHash::DefaultHash(
  65. BYTE *pbData,
  66. USHORT cbData
  67. ) {
  68. ULONG ulRet = cbData;
  69. while (cbData--)
  70. ulRet = (ulRet<<1) ^ *pbData++;
  71. return ulRet;
  72. }
  73. //+-------------------------------------------------------------------------
  74. //
  75. // Method: CCompressedColHash::CCompressedColHash, public
  76. //
  77. // Synopsis: Constructor for a hash compressed column.
  78. //
  79. // Arguments: [vtData] - type of each data item
  80. // [cbDataWidth] - size of each data item
  81. // [pfnHashFunction] - pointer to hash function
  82. //
  83. // Returns: pKey is filled in with the index of the data item in
  84. // the data array.
  85. //
  86. // Notes:
  87. //
  88. //--------------------------------------------------------------------------
  89. CCompressedColHash::CCompressedColHash(
  90. VARTYPE vtData,
  91. USHORT cbDataWidth,
  92. PFNHASH pfnHashFunction) :
  93. CCompressedCol(
  94. vtData, // DataType
  95. sizeof (HASHKEY), // _cbKeyWidth
  96. CCompressedCol::FixedHash // _CompressionType
  97. ),
  98. _cbDataWidth(cbDataWidth),
  99. _pfnHash(pfnHashFunction),
  100. _pHashTable(NULL), _cHashEntries(0),
  101. _pDataItems(NULL), _cDataItems(0),
  102. _fGrowthInProgress(FALSE),
  103. _pData(NULL), _cbData(0),
  104. _ulMemCounter(0)
  105. {
  106. }
  107. CCompressedColHash::~CCompressedColHash( )
  108. {
  109. if (_pData) {
  110. TblPageDealloc(_pData, _ulMemCounter);
  111. _pData = NULL;
  112. _cbData = 0;
  113. }
  114. Win4Assert(_ulMemCounter == 0);
  115. }
  116. //+-------------------------------------------------------------------------
  117. //
  118. // Method: CCompressedColHash::AddData, public
  119. //
  120. // Synopsis: Add a data entry to the hash table if it is not
  121. // already there.
  122. //
  123. // Arguments: [pVarnt] - pointer to data item
  124. // [pKey] - pointer to lookup key value
  125. // [reIndicator] - returns an indicator variable for
  126. // problems
  127. //
  128. // Returns: pKey is filled in with the index of the data item in
  129. // the data array. reIndicator is filled with an indication
  130. // of problems.
  131. //
  132. // Notes:
  133. //
  134. //--------------------------------------------------------------------------
  135. VOID CCompressedColHash::AddData(
  136. PROPVARIANT const * const pVarnt,
  137. ULONG* pKey,
  138. GetValueResult& reIndicator
  139. ) {
  140. //
  141. // Specially handle the VT_EMPTY case
  142. //
  143. if (pVarnt->vt == VT_EMPTY) {
  144. *pKey = 0;
  145. reIndicator = GVRSuccess;
  146. return;
  147. }
  148. CTableVariant *pVar = (CTableVariant *)pVarnt;
  149. Win4Assert(pVarnt->vt == DataType);
  150. BYTE *pbData ;
  151. USHORT cbData = (USHORT) pVar->VarDataSize();
  152. Win4Assert(cbData && cbData == _cbDataWidth);
  153. if (pVar->VariantPointerInFirstWord( )) {
  154. pbData = (BYTE *) pVar->pszVal;
  155. } else {
  156. Win4Assert(pVar->VariantPointerInSecondWord( ));
  157. pbData = (BYTE *) pVar->blob.pBlobData;
  158. }
  159. _AddData(pbData, cbData, pKey);
  160. reIndicator = GVRSuccess;
  161. return;
  162. }
  163. //+-------------------------------------------------------------------------
  164. //
  165. // Method: CCompressedColHash::_AddData, protected
  166. //
  167. // Synopsis: Helper for the public AddData method. Adds
  168. // a data entry to the hash table (if it does not already
  169. // exist).
  170. //
  171. // Arguments: [pbData] - pointer to data item
  172. // [cbDataSize] - size of data item
  173. // [pKey] - pointer to lookup key value
  174. //
  175. // Returns: pKey is filled in with the index of the data item in
  176. // the data array.
  177. //
  178. // Notes:
  179. //
  180. //--------------------------------------------------------------------------
  181. VOID CCompressedColHash::_AddData(
  182. BYTE *pbData,
  183. USHORT cbDataSize,
  184. ULONG* pKey
  185. ) {
  186. Win4Assert(cbDataSize == _cbDataWidth);
  187. if (_pData == NULL) {
  188. _GrowHashTable();
  189. }
  190. ULONG ulHash = _pfnHash(pbData, cbDataSize);
  191. ulHash %= _cHashEntries;
  192. HASHKEY* pusHashChain = &_pHashTable[ulHash];
  193. HASHKEY* pusNextData;
  194. USHORT cChainLength = 0;
  195. while (*pusHashChain != 0) {
  196. cChainLength++;
  197. pusNextData = _IndexHashkey( *pusHashChain );
  198. if (memcmp((BYTE *) (pusNextData+1), pbData, cbDataSize) == 0) {
  199. //
  200. // Found the data item. Return its index.
  201. //
  202. *pKey = *pusHashChain;
  203. return;
  204. }
  205. pusHashChain = pusNextData;
  206. }
  207. if (cChainLength > _maxChain)
  208. _maxChain = cChainLength;
  209. pusNextData = (HASHKEY *) ((BYTE *)_pDataItems +
  210. (_cDataItems) * (sizeof (HASHKEY) + _cbDataWidth));
  211. if (((BYTE*)pusNextData + (sizeof (HASHKEY) + _cbDataWidth) -
  212. (BYTE *)_pData) > (int) _cbData ||
  213. (_cDataItems > (ULONG) ( _cHashEntries * 3 ) &&
  214. _cHashEntries < MAX_HASH_TABLE_SIZE &&
  215. !_fGrowthInProgress)) {
  216. //
  217. // The new data will not fit in the table, or the hash chains will
  218. // be too long. Grow the table, then recurse. The table may be
  219. // rehashed, and can be moved when grown, so the lookup we've
  220. // already done may be invalid.
  221. //
  222. _GrowHashTable();
  223. _AddData(pbData, cbDataSize, pKey);
  224. return;
  225. }
  226. //
  227. // Now add the new data item. The data item consists of a USHORT
  228. // for the hash chain, followed by the buffer for the fixed size
  229. // data item.
  230. //
  231. *pKey = *pusHashChain = ++_cDataItems;
  232. Win4Assert(_cDataItems != 0); // check for overflow
  233. *pusNextData++ = 0;
  234. RtlCopyMemory((BYTE *)pusNextData, pbData, _cbDataWidth);
  235. }
  236. //+-------------------------------------------------------------------------
  237. //
  238. // Method: CCompressedColHash::_Rehash, protected
  239. //
  240. // Synopsis: Helper function for the _GrowHashTable method.
  241. // reinserts an existing item into the hash table.
  242. //
  243. // Arguments: [pbData] - pointer to data item
  244. // [kData] - index to the data item in the table
  245. //
  246. // Returns: Nothing
  247. //
  248. // Notes:
  249. //
  250. //--------------------------------------------------------------------------
  251. VOID CCompressedColHash::_Rehash(
  252. HASHKEY kData,
  253. BYTE *pbData
  254. ) {
  255. Win4Assert(_pData != NULL && kData > 0 && kData <= _cDataItems);
  256. ULONG ulHash = _pfnHash(pbData, _cbDataWidth);
  257. ulHash %= _cHashEntries;
  258. HASHKEY* pusHashChain = &_pHashTable[ulHash];
  259. HASHKEY* pusNextData;
  260. USHORT cChainLength = 0;
  261. while (*pusHashChain != 0) {
  262. cChainLength++;
  263. pusNextData = _IndexHashkey( *pusHashChain );
  264. pusHashChain = pusNextData;
  265. }
  266. if (cChainLength > _maxChain)
  267. _maxChain = cChainLength;
  268. pusNextData = _IndexHashkey( kData );
  269. //
  270. // Now add the data item to the hash chain.
  271. //
  272. *pusHashChain = kData;
  273. *pusNextData++ = 0;
  274. Win4Assert((BYTE*)pusNextData == pbData);
  275. return;
  276. }
  277. //+-------------------------------------------------------------------------
  278. //
  279. // Method: CCompressedColHash::GetData, public
  280. //
  281. // Synopsis: Retrieve a value from the hash table.
  282. //
  283. // Arguments: [pVarnt] - pointer to variant in which to return the data
  284. // [PreferredType] - Peferred data type
  285. // [ulKey] - the lookup key value
  286. // [PropId] - (unused) property id being retrieved.
  287. //
  288. // Returns: pVarnt is filled with the result of the lookup.
  289. //
  290. // Notes: The PreferredType expresses the caller's preference only.
  291. // This method is free to return whatever type is most
  292. // convenient.
  293. //
  294. // The returned data does not conform to any alignment
  295. // restrictions on the data.
  296. //
  297. //--------------------------------------------------------------------------
  298. GetValueResult CCompressedColHash::GetData(
  299. PROPVARIANT * pVarnt,
  300. VARTYPE PreferredType,
  301. ULONG ulKey,
  302. PROPID PropId
  303. ) {
  304. CTableVariant *pVar = (CTableVariant *)pVarnt;
  305. Win4Assert(PreferredType == DataType && ulKey >= 1 && ulKey <= _cDataItems);
  306. if (ulKey >= 1 && ulKey <= _cDataItems) {
  307. pVarnt->vt = DataType;
  308. BYTE *pbData = ((BYTE *)_pDataItems +
  309. (ulKey-1) * (sizeof (HASHKEY) + _cbDataWidth)) +
  310. sizeof (HASHKEY);
  311. if (pVar->VariantPointerInFirstWord( )) {
  312. pVar->pszVal = (CHAR*)pbData;
  313. } else {
  314. Win4Assert(pVar->VariantPointerInSecondWord( ));
  315. pVar->blob.pBlobData = pbData;
  316. }
  317. return GVRSuccess;
  318. } else {
  319. pVarnt->vt = VT_EMPTY;
  320. return GVRNotAvailable;
  321. }
  322. }
  323. void CCompressedColHash::FreeVariant(PROPVARIANT * pvarnt) { }
  324. //+-------------------------------------------------------------------------
  325. //
  326. // Method: CCompressedColHash::_GrowHashTable, protected
  327. //
  328. // Synopsis: Grow the space allocated to the hash table and data
  329. // items.
  330. //
  331. // Arguments: - none -
  332. //
  333. // Returns: Nothing
  334. //
  335. // Notes: Also called to allocate the initial data area.
  336. //
  337. // The number of hash buckets starts out at a low
  338. // number, then is increased as the amount of data
  339. // grows. Data items must be rehashed when this occurs.
  340. // Since items are identified by their offset in the
  341. // data array, this must not change while rehashing.
  342. //
  343. //--------------------------------------------------------------------------
  344. const unsigned MIN_HASH_TABLE_SIZE = 11; // Minimum hash table size
  345. inline USHORT CCompressedColHash::_NextHashSize(
  346. HASHKEY cItems,
  347. USHORT cHash
  348. ) {
  349. do {
  350. cHash = cHash*2 + 1;
  351. } while (cHash < _cDataItems);
  352. return (cHash < MAX_HASH_TABLE_SIZE) ? cHash : MAX_HASH_TABLE_SIZE;
  353. }
  354. VOID CCompressedColHash::_GrowHashTable( void )
  355. {
  356. ULONG cbSize;
  357. USHORT cNewHashEntries;
  358. int fRehash = FALSE;
  359. Win4Assert(!_fGrowthInProgress &&
  360. "Recursive call to CCompressedColHash::_GrowHashTable");
  361. _fGrowthInProgress = TRUE;
  362. if (_pData == NULL) {
  363. cNewHashEntries = MIN_HASH_TABLE_SIZE;
  364. } else if (_cHashEntries < MAX_HASH_TABLE_SIZE &&
  365. (_cDataItems > (ULONG) _cHashEntries*2 ||
  366. (_cDataItems > _cHashEntries && _maxChain > 3))) {
  367. cNewHashEntries = _NextHashSize(_cDataItems, _cHashEntries);
  368. fRehash = TRUE;
  369. tbDebugOut((DEB_ITRACE, "Growing hash table, old,new sizes = %d,%d\n",
  370. _cHashEntries, cNewHashEntries));
  371. }
  372. //
  373. // Compute the required size of the hash table and data
  374. //
  375. cbSize = _cHashEntries * sizeof(HASHKEY);
  376. cbSize += (_cDataItems + 4) * (_cbDataWidth + sizeof (HASHKEY));
  377. cbSize = TblPageGrowSize(cbSize, TRUE);
  378. Win4Assert(cbSize > _cbData || (fRehash && cbSize == _cbData));
  379. BYTE *pbNewData;
  380. if (_pData && cbSize < TBL_PAGE_MAX_SEGMENT_SIZE) {
  381. pbNewData = (BYTE *)
  382. TblPageRealloc(_pData, _ulMemCounter, cbSize, 0);
  383. } else {
  384. pbNewData =
  385. (BYTE *)TblPageAlloc(cbSize, _ulMemCounter, TBL_SIG_COMPRESSED);
  386. }
  387. tbDebugOut((DEB_ITRACE, "New hash table at = %x\n", pbNewData));
  388. if (_pData != NULL && !fRehash) {
  389. if (_pData != pbNewData) {
  390. RtlCopyMemory(pbNewData, _pData, _cbData);
  391. TblPageDealloc(_pData, _ulMemCounter, _cbData);
  392. _pData = pbNewData;
  393. }
  394. _cbData = cbSize;
  395. _pHashTable = (HASHKEY *) _pData;
  396. _pDataItems = (BYTE *) (_pHashTable + _cHashEntries);
  397. } else {
  398. BYTE *pOldDataItems = _pDataItems;
  399. VOID *pOldData = _pData;
  400. ULONG cbOldSize = _cbData;
  401. _pData = pbNewData;
  402. _cbData = cbSize;
  403. _pHashTable = (HASHKEY *)_pData;
  404. _cHashEntries = cNewHashEntries;
  405. _pDataItems = (BYTE *) (_pHashTable + _cHashEntries);
  406. if (pOldData != NULL)
  407. RtlMoveMemory(_pDataItems,
  408. pOldDataItems,
  409. _cDataItems * (sizeof (HASHKEY) + _cbDataWidth));
  410. RtlZeroMemory(_pHashTable, cNewHashEntries * sizeof (HASHKEY));
  411. _maxChain = 0;
  412. //
  413. // Now re-add all old data items to the hash table.
  414. //
  415. pOldDataItems = _pDataItems;
  416. for (HASHKEY i=1; i<=_cDataItems; i++) {
  417. pOldDataItems += sizeof (HASHKEY); // skip hash chain
  418. _Rehash(i, pOldDataItems);
  419. pOldDataItems += _cbDataWidth; // skip data item
  420. }
  421. if (pOldData != NULL && pOldData != _pData)
  422. TblPageDealloc(pOldData, _ulMemCounter, cbOldSize);
  423. }
  424. _fGrowthInProgress = FALSE;
  425. return;
  426. }
  427. //+---------------------------------------------------------------------------
  428. //
  429. // Function: _ClearAll
  430. //
  431. // Synopsis: Method clears all the data in the "fixed width" part of the
  432. // memory buffer.
  433. //
  434. // Arguments: (none)
  435. //
  436. // History: 12-16-94 srikants Created
  437. //
  438. // Notes:
  439. //
  440. //----------------------------------------------------------------------------
  441. void CCompressedColHash::_ClearAll()
  442. {
  443. RtlZeroMemory(_pHashTable, _cHashEntries * sizeof (HASHKEY));
  444. RtlZeroMemory(_pDataItems, _cDataItems * _cbDataWidth );
  445. _cDataItems = 0;
  446. }