Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

499 lines
15 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose: Defines a large symbol table (intp sized handles, can store more than 64k strings)
  4. //
  5. // $Header: $
  6. // $NoKeywords: $
  7. //===========================================================================//
  8. #ifndef UTLSYMBOLLARGE_H
  9. #define UTLSYMBOLLARGE_H
  10. #ifdef _WIN32
  11. #pragma once
  12. #endif
  13. #include "tier0/threadtools.h"
  14. #include "tier1/utltshash.h"
  15. #include "tier1/stringpool.h"
  16. #include "tier0/vprof.h"
  17. #include "tier1/utltshash.h"
  18. //-----------------------------------------------------------------------------
  19. // CUtlSymbolTableLarge:
  20. // description:
  21. // This class defines a symbol table, which allows us to perform mappings
  22. // of strings to symbols and back.
  23. //
  24. // This class stores the strings in a series of string pools. The returned CUtlSymbolLarge is just a pointer
  25. // to the string data, the hash precedes it in memory and is used to speed up searching, etc.
  26. //-----------------------------------------------------------------------------
  27. typedef intp UtlSymLargeId_t;
  28. #define UTL_INVAL_SYMBOL_LARGE ((UtlSymLargeId_t)~0)
  29. class CUtlSymbolLarge
  30. {
  31. public:
  32. // constructor, destructor
  33. CUtlSymbolLarge()
  34. {
  35. u.m_Id = UTL_INVAL_SYMBOL_LARGE;
  36. }
  37. CUtlSymbolLarge( UtlSymLargeId_t id )
  38. {
  39. u.m_Id = id;
  40. }
  41. CUtlSymbolLarge( CUtlSymbolLarge const& sym )
  42. {
  43. u.m_Id = sym.u.m_Id;
  44. }
  45. // operator=
  46. CUtlSymbolLarge& operator=( CUtlSymbolLarge const& src )
  47. {
  48. u.m_Id = src.u.m_Id;
  49. return *this;
  50. }
  51. // operator==
  52. bool operator==( CUtlSymbolLarge const& src ) const
  53. {
  54. return u.m_Id == src.u.m_Id;
  55. }
  56. // operator==
  57. bool operator==( UtlSymLargeId_t const& src ) const
  58. {
  59. return u.m_Id == src;
  60. }
  61. // operator==
  62. bool operator!=( CUtlSymbolLarge const& src ) const
  63. {
  64. return u.m_Id != src.u.m_Id;
  65. }
  66. // operator==
  67. bool operator!=( UtlSymLargeId_t const& src ) const
  68. {
  69. return u.m_Id != src;
  70. }
  71. // Gets at the symbol
  72. operator UtlSymLargeId_t const() const
  73. {
  74. return u.m_Id;
  75. }
  76. // Gets the string associated with the symbol
  77. inline const char* String( ) const
  78. {
  79. if ( u.m_Id == UTL_INVAL_SYMBOL_LARGE )
  80. return "";
  81. return u.m_pAsString;
  82. }
  83. inline bool IsValid() const
  84. {
  85. return u.m_Id != UTL_INVAL_SYMBOL_LARGE ? true : false;
  86. }
  87. private:
  88. // Disallowed
  89. CUtlSymbolLarge( const char* pStr ); // they need to go through the table to assign the ptr
  90. bool operator==( const char* pStr ) const; // disallow since we don't know if the table this is from was case sensitive or not... maybe we don't care
  91. union
  92. {
  93. UtlSymLargeId_t m_Id;
  94. char const *m_pAsString;
  95. } u;
  96. };
  97. #define MIN_STRING_POOL_SIZE 2048
  98. inline uint32 CUtlSymbolLarge_Hash( bool CASEINSENSITIVE, const char *pString, int len )
  99. {
  100. return ( CASEINSENSITIVE ? HashStringCaseless( pString ) : HashString( pString ) );
  101. }
  102. typedef uint32 LargeSymbolTableHashDecoration_t;
  103. // The structure consists of the hash immediately followed by the string data
  104. struct CUtlSymbolTableLargeBaseTreeEntry_t
  105. {
  106. LargeSymbolTableHashDecoration_t m_Hash;
  107. // Variable length string data
  108. char m_String[1];
  109. bool IsEmpty() const
  110. {
  111. return ( ( m_Hash == 0 ) && ( 0 == m_String[0] ) );
  112. }
  113. char const *String() const
  114. {
  115. return (const char *)&m_String[ 0 ];
  116. }
  117. CUtlSymbolLarge ToSymbol() const
  118. {
  119. return reinterpret_cast< UtlSymLargeId_t >( String() );
  120. }
  121. LargeSymbolTableHashDecoration_t HashValue() const
  122. {
  123. return m_Hash;
  124. }
  125. };
  126. template< class TreeType, bool CASEINSENSITIVE >
  127. class CTreeEntryLess
  128. {
  129. public:
  130. CTreeEntryLess( int ignored = 0 ) {} // permits default initialization to NULL in CUtlRBTree
  131. bool operator!() const { return false; }
  132. bool operator()( CUtlSymbolTableLargeBaseTreeEntry_t * const &left, CUtlSymbolTableLargeBaseTreeEntry_t * const &right ) const
  133. {
  134. // compare the hashes
  135. if ( left->m_Hash == right->m_Hash )
  136. {
  137. // if the hashes match compare the strings
  138. if ( !CASEINSENSITIVE )
  139. return strcmp( left->String(), right->String() ) < 0;
  140. else
  141. return V_stricmp( left->String(), right->String() ) < 0;
  142. }
  143. else
  144. {
  145. return left->m_Hash < right->m_Hash;
  146. }
  147. }
  148. };
  149. // For non-threaded versions, simply index into CUtlRBTree
  150. template< bool CASEINSENSITIVE >
  151. class CNonThreadsafeTree : public CUtlRBTree<CUtlSymbolTableLargeBaseTreeEntry_t *, intp, CTreeEntryLess< CNonThreadsafeTree< CASEINSENSITIVE >, CASEINSENSITIVE > >
  152. {
  153. public:
  154. typedef CUtlRBTree<CUtlSymbolTableLargeBaseTreeEntry_t *, intp, CTreeEntryLess< CNonThreadsafeTree, CASEINSENSITIVE > > CNonThreadsafeTreeType;
  155. CNonThreadsafeTree() :
  156. CNonThreadsafeTreeType( 0, 16 )
  157. {
  158. }
  159. inline void Commit()
  160. {
  161. // Nothing, only matters for thread-safe tables
  162. }
  163. inline int Insert( CUtlSymbolTableLargeBaseTreeEntry_t *entry )
  164. {
  165. return CNonThreadsafeTreeType::Insert( entry );
  166. }
  167. inline int Find( CUtlSymbolTableLargeBaseTreeEntry_t *entry ) const
  168. {
  169. return CNonThreadsafeTreeType::Find( entry );
  170. }
  171. inline int InvalidIndex() const
  172. {
  173. return CNonThreadsafeTreeType::InvalidIndex();
  174. }
  175. inline int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const
  176. {
  177. CUtlVector< CUtlSymbolTableLargeBaseTreeEntry_t * > list;
  178. list.EnsureCount( nCount );
  179. for ( int i = 0; i < nCount; ++i )
  180. {
  181. pElements[ i ] = CNonThreadsafeTreeType::Element( i )->ToSymbol();
  182. }
  183. return nCount;
  184. }
  185. };
  186. // Since CUtlSymbolTableLargeBaseTreeEntry_t already has the hash
  187. // contained inside of it, don't need to recompute a hash here
  188. template < int BUCKET_COUNT, class KEYTYPE, bool CASEINSENSITIVE >
  189. class CCThreadsafeTreeHashMethod
  190. {
  191. public:
  192. static int Hash( const KEYTYPE &key, int nBucketMask )
  193. {
  194. uint32 nHash = key->HashValue();
  195. return ( nHash & nBucketMask );
  196. }
  197. static bool Compare( CUtlSymbolTableLargeBaseTreeEntry_t * const &lhs, CUtlSymbolTableLargeBaseTreeEntry_t * const &rhs )
  198. {
  199. if ( lhs->m_Hash != rhs->m_Hash )
  200. return false;
  201. if ( !CASEINSENSITIVE )
  202. {
  203. return ( !Q_strcmp( lhs->String(), rhs->String() ) ? true : false );
  204. }
  205. return ( !Q_stricmp( lhs->String(), rhs->String() ) ? true : false );
  206. }
  207. };
  208. /*
  209. NOTE: So the only crappy thing about using a CUtlTSHash here is that the KEYTYPE is a CUtlSymbolTableLargeBaseTreeEntry_t ptr which has both the
  210. hash and the string since with strings there is a good chance of hash collision after you have a fair number of strings so we have to implement
  211. a Compare method (above) which falls back to strcmp/stricmp if the hashes are equal. This means that all of the data is in the KEYTYPE of the hash and the
  212. payload doesn't matter. So I made the payload also be a pointer to a CUtlSymbolTableLargeBaseTreeEntry_t since that makes using the API more convenient
  213. TODO: If we have a CUtlTSHash that was all about the existence of the KEYTYPE and didn't require a payload (or template on 'void') then we could eliminate
  214. 50% of the pointer overhead used for this data structure.
  215. */
  216. // Thread safe version is based on the
  217. template < bool CASEINSENSITIVE >
  218. class CThreadsafeTree : public CUtlTSHash< CUtlSymbolTableLargeBaseTreeEntry_t *, 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CCThreadsafeTreeHashMethod< 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CASEINSENSITIVE > >
  219. {
  220. public:
  221. typedef CUtlTSHash< CUtlSymbolTableLargeBaseTreeEntry_t *, 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CCThreadsafeTreeHashMethod< 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CASEINSENSITIVE > > CThreadsafeTreeType;
  222. CThreadsafeTree() :
  223. CThreadsafeTreeType( 32 )
  224. {
  225. }
  226. inline void Commit()
  227. {
  228. CThreadsafeTreeType::Commit();
  229. }
  230. inline int Insert( CUtlSymbolTableLargeBaseTreeEntry_t *entry )
  231. {
  232. return CThreadsafeTreeType::Insert( entry, entry );
  233. }
  234. inline int Find( CUtlSymbolTableLargeBaseTreeEntry_t *entry )
  235. {
  236. return CThreadsafeTreeType::Find( entry );
  237. }
  238. inline int InvalidIndex() const
  239. {
  240. return CThreadsafeTreeType::InvalidHandle();
  241. }
  242. inline int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const
  243. {
  244. CUtlVector< UtlTSHashHandle_t > list;
  245. list.EnsureCount( nCount );
  246. int c = CThreadsafeTreeType::GetElements( nFirstElement, nCount, list.Base() );
  247. for ( int i = 0; i < c; ++i )
  248. {
  249. pElements[ i ] = CThreadsafeTreeType::Element( list[ i ] )->ToSymbol();
  250. }
  251. return c;
  252. }
  253. };
  254. // Base Class for threaded and non-threaded types
  255. template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE = MIN_STRING_POOL_SIZE >
  256. class CUtlSymbolTableLargeBase
  257. {
  258. public:
  259. // constructor, destructor
  260. CUtlSymbolTableLargeBase();
  261. ~CUtlSymbolTableLargeBase();
  262. // Finds and/or creates a symbol based on the string
  263. CUtlSymbolLarge AddString( const char* pString );
  264. // Finds the symbol for pString
  265. CUtlSymbolLarge Find( const char* pString ) const;
  266. // Remove all symbols in the table.
  267. void RemoveAll();
  268. int GetNumStrings( void ) const
  269. {
  270. return m_Lookup.Count();
  271. }
  272. void Commit()
  273. {
  274. m_Lookup.Commit();
  275. }
  276. // Returns elements in the table
  277. int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const
  278. {
  279. return m_Lookup.GetElements( nFirstElement, nCount, pElements );
  280. }
  281. uint64 GetMemoryUsage() const
  282. {
  283. uint64 unBytesUsed = 0u;
  284. for ( int i=0; i < m_StringPools.Count(); i++ )
  285. {
  286. StringPool_t *pPool = m_StringPools[i];
  287. unBytesUsed += (uint64)pPool->m_TotalLen;
  288. }
  289. return unBytesUsed;
  290. }
  291. protected:
  292. struct StringPool_t
  293. {
  294. int m_TotalLen; // How large is
  295. int m_SpaceUsed;
  296. char m_Data[1];
  297. };
  298. TreeType m_Lookup;
  299. // stores the string data
  300. CUtlVector< StringPool_t * > m_StringPools;
  301. private:
  302. int FindPoolWithSpace( int len ) const;
  303. };
  304. //-----------------------------------------------------------------------------
  305. // constructor, destructor
  306. //-----------------------------------------------------------------------------
  307. template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
  308. inline CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE >::CUtlSymbolTableLargeBase() :
  309. m_StringPools( 8 )
  310. {
  311. }
  312. template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
  313. inline CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::~CUtlSymbolTableLargeBase()
  314. {
  315. // Release the stringpool string data
  316. RemoveAll();
  317. }
  318. template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
  319. inline CUtlSymbolLarge CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::Find( const char* pString ) const
  320. {
  321. VPROF( "CUtlSymbolLarge::Find" );
  322. if (!pString)
  323. return CUtlSymbolLarge();
  324. // Passing this special invalid symbol makes the comparison function
  325. // use the string passed in the context
  326. int len = Q_strlen( pString ) + 1;
  327. CUtlSymbolTableLargeBaseTreeEntry_t *search = (CUtlSymbolTableLargeBaseTreeEntry_t *)_alloca( len + sizeof( LargeSymbolTableHashDecoration_t ) );
  328. search->m_Hash = CUtlSymbolLarge_Hash( CASEINSENSITIVE, pString, len );
  329. Q_memcpy( (char *)&search->m_String[ 0 ], pString, len );
  330. int idx = const_cast< TreeType & >(m_Lookup).Find( search );
  331. if ( idx == m_Lookup.InvalidIndex() )
  332. return UTL_INVAL_SYMBOL_LARGE;
  333. const CUtlSymbolTableLargeBaseTreeEntry_t *entry = m_Lookup[ idx ];
  334. return entry->ToSymbol();
  335. }
  336. template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
  337. inline int CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::FindPoolWithSpace( int len ) const
  338. {
  339. for ( int i=0; i < m_StringPools.Count(); i++ )
  340. {
  341. StringPool_t *pPool = m_StringPools[i];
  342. if ( (pPool->m_TotalLen - pPool->m_SpaceUsed) >= len )
  343. {
  344. return i;
  345. }
  346. }
  347. return -1;
  348. }
  349. //-----------------------------------------------------------------------------
  350. // Finds and/or creates a symbol based on the string
  351. //-----------------------------------------------------------------------------
  352. template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
  353. inline CUtlSymbolLarge CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::AddString( const char* pString )
  354. {
  355. VPROF("CUtlSymbolLarge::AddString");
  356. if (!pString)
  357. return UTL_INVAL_SYMBOL_LARGE;
  358. CUtlSymbolLarge id = Find( pString );
  359. if ( id != UTL_INVAL_SYMBOL_LARGE )
  360. return id;
  361. int lenString = Q_strlen(pString) + 1; // length of just the string
  362. int lenDecorated = lenString + sizeof(LargeSymbolTableHashDecoration_t); // and with its hash decoration
  363. // make sure that all strings are aligned on 2-byte boundaries so the hashes will read correctly
  364. // This assert seems to be invalid because LargeSymbolTableHashDecoration_t is always
  365. // a uint32, by design.
  366. //COMPILE_TIME_ASSERT(sizeof(LargeSymbolTableHashDecoration_t) == sizeof(intp));
  367. lenDecorated = ALIGN_VALUE(lenDecorated, sizeof( intp ) );
  368. // Find a pool with space for this string, or allocate a new one.
  369. int iPool = FindPoolWithSpace( lenDecorated );
  370. if ( iPool == -1 )
  371. {
  372. // Add a new pool.
  373. int newPoolSize = MAX( lenDecorated + sizeof( StringPool_t ), POOL_SIZE );
  374. StringPool_t *pPool = (StringPool_t*)malloc( newPoolSize );
  375. pPool->m_TotalLen = newPoolSize - sizeof( StringPool_t );
  376. pPool->m_SpaceUsed = 0;
  377. iPool = m_StringPools.AddToTail( pPool );
  378. }
  379. // Compute a hash
  380. LargeSymbolTableHashDecoration_t hash = CUtlSymbolLarge_Hash( CASEINSENSITIVE, pString, lenString );
  381. // Copy the string in.
  382. StringPool_t *pPool = m_StringPools[iPool];
  383. // Assert( pPool->m_SpaceUsed < 0xFFFF ); // Pool could be bigger than 2k
  384. // This should never happen, because if we had a string > 64k, it
  385. // would have been given its entire own pool.
  386. CUtlSymbolTableLargeBaseTreeEntry_t *entry = ( CUtlSymbolTableLargeBaseTreeEntry_t * )&pPool->m_Data[ pPool->m_SpaceUsed ];
  387. pPool->m_SpaceUsed += lenDecorated;
  388. entry->m_Hash = hash;
  389. char *pText = (char *)&entry->m_String [ 0 ];
  390. Q_memcpy( pText, pString, lenString );
  391. // insert the string into the database
  392. MEM_ALLOC_CREDIT();
  393. int idx = m_Lookup.Insert( entry );
  394. return m_Lookup.Element( idx )->ToSymbol();
  395. }
  396. //-----------------------------------------------------------------------------
  397. // Remove all symbols in the table.
  398. //-----------------------------------------------------------------------------
  399. template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
  400. inline void CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::RemoveAll()
  401. {
  402. m_Lookup.Purge();
  403. for ( int i=0; i < m_StringPools.Count(); i++ )
  404. {
  405. StringPool_t * pString = m_StringPools[i];
  406. free( pString );
  407. }
  408. m_StringPools.RemoveAll();
  409. }
  410. // Case-sensitive
  411. typedef CUtlSymbolTableLargeBase< CNonThreadsafeTree< false >, false > CUtlSymbolTableLarge;
  412. // Case-insensitive
  413. typedef CUtlSymbolTableLargeBase< CNonThreadsafeTree< true >, true > CUtlSymbolTableLarge_CI;
  414. // Multi-threaded case-sensitive
  415. typedef CUtlSymbolTableLargeBase< CThreadsafeTree< false >, false > CUtlSymbolTableLargeMT;
  416. // Multi-threaded case-insensitive
  417. typedef CUtlSymbolTableLargeBase< CThreadsafeTree< true >, true > CUtlSymbolTableLargeMT_CI;
  418. #endif // UTLSYMBOLLARGE_H