Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

441 lines
12 KiB

  1. /*******************************************************************************
  2. * StringBlob.h *
  3. *--------------*
  4. * Description:
  5. * This is the header file for the CStringBlob class used internally by SAPI.
  6. *
  7. * Copyright 1998-2000 Microsoft Corporation All Rights Reserved.
  8. *
  9. *******************************************************************************/
  10. #ifndef _STRINGBLOB_H_
  11. #define _STRINGBLOB_H_ 1
  12. #ifndef SPDebug_h
  13. #include <SPDebug.h>
  14. #endif
  15. #include <math.h>
  16. template <class XCHAR>
  17. class CStringBlobT
  18. {
  19. XCHAR * m_pData; // List of words, end-to-end
  20. ULONG m_cchAllocated; // Size of m_pData
  21. ULONG * m_aichWords; // Word index => offset in m_pData [1] is index of start of second word
  22. ULONG m_cwords; // Number of words
  23. ULONG m_cwordsAllocated; // Size of m_aichWords
  24. ULONG * m_aulBuckets; // Hash table containing indices of words or 0 for empty buckets
  25. ULONG m_cBuckets; // Number of buckets in hash table
  26. public:
  27. CStringBlobT()
  28. {
  29. m_pData = NULL;
  30. m_cchAllocated = 0;
  31. m_aichWords = NULL;
  32. m_cwords = 0;
  33. m_cwordsAllocated = 0;
  34. m_aulBuckets = NULL;
  35. m_cBuckets = 0;
  36. }
  37. ~CStringBlobT()
  38. {
  39. Clear();
  40. }
  41. void Detach(XCHAR **ppszWordList, ULONG *pulSize)
  42. {
  43. *ppszWordList = NULL;
  44. if (m_pData)
  45. {
  46. ULONG cchDesired = StringSize();
  47. ULONG cbSize = SerializeSize(); // byte count, ULONG multiple
  48. *ppszWordList = (XCHAR*)::CoTaskMemRealloc(m_pData, cbSize);
  49. if (*ppszWordList == NULL)
  50. {
  51. *ppszWordList = m_pData;
  52. cbSize = m_cchAllocated * sizeof(XCHAR);
  53. }
  54. m_pData = NULL;
  55. Clear();
  56. if (pulSize)
  57. {
  58. *pulSize = cbSize;
  59. }
  60. }
  61. }
  62. void Clear()
  63. {
  64. if (m_pData)
  65. {
  66. ::CoTaskMemFree(m_pData);
  67. m_pData = NULL;
  68. }
  69. m_cchAllocated = 0;
  70. free(m_aichWords);
  71. m_aichWords = NULL;
  72. m_cwordsAllocated = 0;
  73. m_cwords = 0;
  74. free(m_aulBuckets);
  75. m_aulBuckets = NULL;
  76. m_cBuckets = 0;
  77. }
  78. HRESULT InitFrom(const XCHAR * pszStringArray, ULONG cch)
  79. {
  80. SPDBG_ASSERT(m_pData == NULL);
  81. if (cch)
  82. {
  83. ULONG cbSize = (cch * sizeof(XCHAR) + 3) & ~3;
  84. m_pData = (XCHAR *)::CoTaskMemAlloc(cbSize);
  85. if (m_pData == NULL)
  86. return E_OUTOFMEMORY;
  87. m_cchAllocated = cch;
  88. SPDBG_ASSERT(pszStringArray[0] == 0); // First string is always empty.
  89. // First pass to copy data and count strings.
  90. const XCHAR * pszPastEnd = pszStringArray + cch;
  91. const XCHAR * psz = pszStringArray;
  92. XCHAR * pszOut = m_pData;
  93. ULONG cwords = 0;
  94. while (psz < pszPastEnd)
  95. {
  96. if ((*pszOut++ = *psz++) == 0)
  97. ++cwords;
  98. }
  99. m_aichWords = (ULONG *) malloc(sizeof(ULONG) * cwords);
  100. if (m_aichWords == NULL)
  101. return E_OUTOFMEMORY;
  102. m_cwordsAllocated = cwords;
  103. m_cwords = cwords - 1; // Doesn't count leading 0
  104. HRESULT hr = SetHashSize(cwords * 2 + 1);
  105. if (FAILED(hr))
  106. return hr;
  107. // Second pass to fill in indices and hash table.
  108. psz = pszStringArray + 1;
  109. const WCHAR * pszWordStart = psz;
  110. ULONG ulID = 1;
  111. m_aichWords[0] = 1;
  112. while (psz < pszPastEnd)
  113. {
  114. if (*(psz++) == 0)
  115. {
  116. SPDBG_ASSERT(ulID < m_cwordsAllocated);
  117. m_aichWords[ulID] = (ULONG)(psz - pszStringArray); // can't have more than 4 million chars!
  118. m_aulBuckets[FindIndex(pszWordStart)] = ulID;
  119. pszWordStart = psz;
  120. ++ulID;
  121. }
  122. }
  123. }
  124. return S_OK;
  125. }
  126. ULONG HashKey(const XCHAR * pszString, ULONG * pcchIncNull = NULL)
  127. {
  128. ULONG hash = 0;
  129. ULONG cchIncNull = 1; // one for the NULL
  130. for (const XCHAR * pch = pszString; *pch; ++pch, ++cchIncNull)
  131. hash = hash * 65599 + *pch;
  132. if (pcchIncNull)
  133. *pcchIncNull = cchIncNull;
  134. return hash;
  135. }
  136. // find index for string -- returns 0 if not found
  137. ULONG FindIndex(const XCHAR * psz)
  138. {
  139. SPDBG_ASSERT(psz);
  140. ULONG cchIncNull;
  141. ULONG start = HashKey(psz, &cchIncNull) % m_cBuckets;
  142. ULONG index = start;
  143. do
  144. {
  145. // Not in table; return index where it should be placed.
  146. if (m_aulBuckets[index] == 0)
  147. return index;
  148. // Compare length and if it matches compare full string.
  149. if (m_aichWords[m_aulBuckets[index]] - m_aichWords[m_aulBuckets[index] - 1] == cchIncNull &&
  150. IsEqual(m_aichWords[m_aulBuckets[index] - 1], psz))
  151. {
  152. // Found this word already in the table.
  153. return index;
  154. }
  155. if (++index >= m_cBuckets)
  156. index -= m_cBuckets;
  157. } while (index != start);
  158. SPDBG_ASSERT(m_cwords == m_cBuckets); // Shouldn't ever get here
  159. return (ULONG) -1;
  160. }
  161. // Returns ID; use IndexFromId to recover string offset
  162. ULONG Find(const XCHAR * psz)
  163. {
  164. if (psz == NULL || m_cwords == 0)
  165. return 0;
  166. // Should always succeed in finding a bucket, since hash table is >2x larger than # of elements.
  167. ULONG ibucket = FindIndex(psz);
  168. return m_aulBuckets[ibucket]; // May be 0 if not in table
  169. }
  170. ULONG primeNext(ULONG val)
  171. {
  172. if (val < 2)
  173. val = 2; /* the smallest prime number */
  174. for (;;)
  175. {
  176. /* Is val a prime number? */
  177. ULONG maxFactor = (ULONG) sqrt ((double) val);
  178. /* Is i a factor of val? */
  179. for (ULONG i = 2; i <= maxFactor; i++)
  180. if (val % i == 0)
  181. break;
  182. if (i > maxFactor)
  183. return (val);
  184. val++;
  185. }
  186. }
  187. HRESULT SetHashSize(ULONG cbuckets)
  188. {
  189. if (cbuckets > m_cBuckets)
  190. {
  191. ULONG * oldtable = m_aulBuckets;
  192. ULONG oldentry = m_cBuckets;
  193. ULONG prime = primeNext(cbuckets);
  194. // Alloc new table.
  195. m_aulBuckets = (ULONG *) malloc(prime * sizeof(ULONG));
  196. if (m_aulBuckets == NULL)
  197. {
  198. m_aulBuckets = oldtable;
  199. return E_OUTOFMEMORY;
  200. }
  201. for (ULONG i=0; i < prime; i++)
  202. {
  203. m_aulBuckets[i] = 0;
  204. }
  205. m_cBuckets = prime;
  206. for (i = 0; i < oldentry; i++)
  207. {
  208. if (oldtable[i] != 0)
  209. {
  210. ULONG ibucket = FindIndex(m_pData + m_aichWords[oldtable[i] - 1]);
  211. m_aulBuckets[ibucket] = oldtable[i];
  212. }
  213. }
  214. free(oldtable);
  215. }
  216. return S_OK;
  217. }
  218. //
  219. // The ID for a NULL string is always 0, the ID for subsequent strings is the
  220. // index of the string + 1;
  221. //
  222. HRESULT Add(const XCHAR * psz, ULONG * pichOffset, ULONG *pulID = NULL)
  223. {
  224. ULONG ID = 0;
  225. if (psz)
  226. {
  227. // Grow if we're more than half full.
  228. if (m_cwords * 2 >= m_cBuckets)
  229. {
  230. HRESULT hr = SetHashSize(m_cwords * 3 + 17);
  231. if (FAILED(hr))
  232. return hr;
  233. }
  234. // Find out where this element should end up in hash table.
  235. ULONG ibucket = FindIndex(psz);
  236. if (m_aulBuckets[ibucket] == 0)
  237. {
  238. // Not found in hash table. Append it to the end.
  239. // Grow ID=>index mapping array if necessary.
  240. if (m_cwords + 1 >= m_cwordsAllocated) // 1 extra for init. zero
  241. {
  242. void * pvNew = realloc(m_aichWords, sizeof(*m_aichWords) * (m_cwords + 100));
  243. if (pvNew == NULL)
  244. return E_OUTOFMEMORY;
  245. m_aichWords = (ULONG *)pvNew;
  246. m_cwordsAllocated = m_cwords + 100;
  247. m_aichWords[0] = 1;
  248. }
  249. // Grow string storage if necessary.
  250. ULONG cchIncNull = xcslen(psz);
  251. if (m_aichWords[m_cwords] + cchIncNull > m_cchAllocated)
  252. {
  253. ULONG cbDesired = ((m_cchAllocated + cchIncNull) * sizeof(XCHAR) + 0x2003) & ~3;
  254. void * pvNew = ::CoTaskMemRealloc(m_pData, cbDesired);
  255. if (pvNew == NULL)
  256. {
  257. return E_OUTOFMEMORY;
  258. }
  259. m_pData = (XCHAR *)pvNew;
  260. m_pData[0] = 0;
  261. m_cchAllocated = cbDesired / sizeof(XCHAR);
  262. }
  263. memcpy(m_pData + m_aichWords[m_cwords], psz, cchIncNull * sizeof(XCHAR));
  264. ++m_cwords;
  265. m_aichWords[m_cwords] = m_aichWords[m_cwords - 1] + cchIncNull;
  266. // Fill in hash table entry with index of string.
  267. m_aulBuckets[ibucket] = m_cwords;
  268. ID = m_cwords;
  269. }
  270. else
  271. {
  272. // It was already there.
  273. ID = m_aulBuckets[ibucket];
  274. }
  275. }
  276. *pichOffset = ID ? m_aichWords[ID - 1] : 0;
  277. if (pulID)
  278. {
  279. *pulID = ID;
  280. }
  281. return S_OK;
  282. }
  283. const ULONG GetNumItems() const
  284. {
  285. return m_cwords;
  286. }
  287. const XCHAR * String(ULONG ichOffset) const
  288. {
  289. return ichOffset ? m_pData + ichOffset : NULL;
  290. }
  291. static int xcscmp(const WCHAR * p0, const WCHAR * p1)
  292. {
  293. return wcscmp(p0, p1);
  294. }
  295. static int xcscmp(const char * p0, const char * p1)
  296. {
  297. return strcmp(p0, p1);
  298. }
  299. static int xcslen(const WCHAR * p)
  300. {
  301. return wcslen(p) + 1;
  302. }
  303. static int xcslen(const char * p)
  304. {
  305. return strlen(p) + 1;
  306. }
  307. BOOL IsEqual(ULONG ichOffset, const XCHAR * psz)
  308. {
  309. if (ichOffset)
  310. {
  311. return (psz ? (xcscmp(m_pData + ichOffset, psz) == 0) : FALSE);
  312. }
  313. else
  314. {
  315. return (psz == NULL);
  316. }
  317. }
  318. ULONG StringSize(void) const
  319. {
  320. return m_cwords ? m_aichWords[m_cwords] : 0;
  321. }
  322. ULONG IndexFromId(ULONG ulID) const
  323. {
  324. SPDBG_ASSERT(ulID <= m_cwords);
  325. if (ulID > 0)
  326. {
  327. return m_aichWords[ulID - 1];
  328. }
  329. return 0;
  330. }
  331. const XCHAR * Item(ULONG ulID) const
  332. {
  333. SPDBG_ASSERT(ulID <= m_cwords);
  334. if ((ulID < 1) || m_pData == NULL)
  335. {
  336. return NULL;
  337. }
  338. return m_pData + IndexFromId(ulID);
  339. }
  340. ULONG SerializeSize() const
  341. {
  342. return (StringSize() * sizeof(XCHAR) + 3) & ~3;
  343. }
  344. const XCHAR * SerializeData()
  345. {
  346. ULONG cchWrite = StringSize();
  347. if (cchWrite)
  348. {
  349. const ULONG cb = cchWrite * sizeof(XCHAR);
  350. if (cb % 4) // We know there's room since data is always DWORD aligned by
  351. {
  352. memset(m_pData + cchWrite, 0xcc, 4 - (cb & 3)); // Junk data so make sure it's not null
  353. }
  354. }
  355. return m_pData;
  356. }
  357. };
  358. typedef class CStringBlobT<WCHAR> CStringBlob;
  359. typedef class CStringBlobT<WCHAR> CStringBlobW;
  360. typedef class CStringBlobT<char> CStringBlobA;
  361. #endif // _STRINGBLOB_H_