Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

480 lines
18 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose: See gpubufferallocator.h
  4. //
  5. // $NoKeywords: $
  6. //
  7. //===========================================================================//
  8. #include "gpubufferallocator.h"
  9. #include "dynamicvb.h"
  10. #include "dynamicib.h"
  11. // NOTE: This has to be the last file included!
  12. #include "tier0/memdbgon.h"
  13. #if defined( _X360 )
  14. //-----------------------------------------------------------------------------
  15. // globals
  16. //-----------------------------------------------------------------------------
  17. #include "utlmap.h"
  18. MEMALLOC_DEFINE_EXTERNAL_TRACKING( XMem_CGPUBufferPool );
  19. // Track non-pooled VB/IB physical allocations (used by CGPUBufferAllocator::SpewStats)
  20. CInterlockedInt g_NumIndividualVBPhysAllocs = 0;
  21. CInterlockedInt g_SizeIndividualVBPhysAllocs = 0;
  22. CInterlockedInt g_NumIndividualIBPhysAllocs = 0;
  23. CInterlockedInt g_SizeIndividualIBPhysAllocs = 0;
  24. //=============================================================================
  25. //=============================================================================
  26. // CGPUBufferAllocator
  27. //=============================================================================
  28. //=============================================================================
  29. CGPUBufferAllocator::CGPUBufferAllocator( void )
  30. : m_nBufferPools( 0 ),
  31. m_bEnabled( true )
  32. {
  33. memset( &( m_BufferPools[ 0 ] ), 0, sizeof( m_BufferPools ) );
  34. m_bEnabled = USE_GPU_BUFFER_ALLOCATOR && !CommandLine()->FindParm( "-no_gpu_buffer_allocator" );
  35. if ( m_bEnabled )
  36. {
  37. // Start with one pool (the size should be the lowest-common-denominator for all maps)
  38. AllocatePool( INITIAL_POOL_SIZE );
  39. }
  40. }
  41. CGPUBufferAllocator::~CGPUBufferAllocator( void )
  42. {
  43. for ( int i = 0; i < m_nBufferPools; i++ )
  44. {
  45. delete m_BufferPools[ i ];
  46. }
  47. }
  48. //-----------------------------------------------------------------------------
  49. // Allocate a new memory pool
  50. //-----------------------------------------------------------------------------
  51. bool CGPUBufferAllocator::AllocatePool( int nPoolSize )
  52. {
  53. if ( m_nBufferPools == MAX_POOLS )
  54. return false;
  55. m_BufferPools[ m_nBufferPools ] = new CGPUBufferPool( nPoolSize );
  56. if ( m_BufferPools[ m_nBufferPools ]->m_pMemory == NULL )
  57. {
  58. // Physical alloc failed! Continue without crashing, we *might* get away with it...
  59. ExecuteOnce( DebuggerBreakIfDebugging() );
  60. ExecuteNTimes( 15, Warning( "CGPUBufferAllocator::AllocatePool - physical allocation failed! Physical fragmentation is in bad shape... falling back to non-pooled VB/IB allocations. Brace for a crash :o/\n" ) );
  61. delete m_BufferPools[ m_nBufferPools ];
  62. m_BufferPools[ m_nBufferPools ] = NULL;
  63. return false;
  64. }
  65. m_nBufferPools++;
  66. return true;
  67. }
  68. //-----------------------------------------------------------------------------
  69. // Make a new GPUBufferHandle_t to represent a given buffer allocation
  70. //-----------------------------------------------------------------------------
  71. inline GPUBufferHandle_t CGPUBufferAllocator::MakeGPUBufferHandle( int nPoolNum, int nPoolEntry )
  72. {
  73. GPUBufferHandle_t newHandle;
  74. newHandle.nPoolNum = nPoolNum;
  75. newHandle.nPoolEntry = nPoolEntry;
  76. newHandle.pMemory = m_BufferPools[ nPoolNum ]->m_pMemory + m_BufferPools[ nPoolNum ]->m_PoolEntries[ nPoolEntry ].nOffset;
  77. return newHandle;
  78. }
  79. //-----------------------------------------------------------------------------
  80. // Try to allocate a block of the given size from one of our pools
  81. //-----------------------------------------------------------------------------
  82. bool CGPUBufferAllocator::AllocateBuffer( GPUBufferHandle_t *pHandle, int nBufferSize, void *pObject, bool bIsVertexBuffer )
  83. {
  84. if ( m_bEnabled && ( nBufferSize <= MAX_BUFFER_SIZE ) )
  85. {
  86. // Try to allocate at the end of one of our pools
  87. for ( int nPool = 0; nPool < m_nBufferPools; nPool++ )
  88. {
  89. int nPoolEntry = m_BufferPools[ nPool ]->Allocate( nBufferSize, bIsVertexBuffer, pObject );
  90. if ( nPoolEntry >= 0 )
  91. {
  92. // Tada.
  93. *pHandle = MakeGPUBufferHandle( nPool, nPoolEntry );
  94. return true;
  95. }
  96. if ( nPool == ( m_nBufferPools - 1 ) )
  97. {
  98. // Allocate a new pool (in which this buffer should DEFINITELY fit!)
  99. COMPILE_TIME_ASSERT( ADDITIONAL_POOL_SIZE >= MAX_BUFFER_SIZE );
  100. AllocatePool( ADDITIONAL_POOL_SIZE );
  101. }
  102. }
  103. }
  104. return false;
  105. }
  106. //-----------------------------------------------------------------------------
  107. // Clear the given allocation from our pools (NOTE: the memory cannot be reused until Defrag() is called)
  108. //-----------------------------------------------------------------------------
  109. void CGPUBufferAllocator::DeallocateBuffer( const GPUBufferHandle_t *pHandle )
  110. {
  111. Assert( pHandle );
  112. if ( pHandle )
  113. {
  114. Assert( ( pHandle->nPoolNum >= 0 ) && ( pHandle->nPoolNum < m_nBufferPools ) );
  115. if ( ( pHandle->nPoolNum >= 0 ) && ( pHandle->nPoolNum < m_nBufferPools ) )
  116. {
  117. m_BufferPools[ pHandle->nPoolNum ]->Deallocate( pHandle );
  118. }
  119. }
  120. }
  121. //-----------------------------------------------------------------------------
  122. // If appropriate, allocate this VB's memory from one of our pools
  123. //-----------------------------------------------------------------------------
  124. bool CGPUBufferAllocator::AllocateVertexBuffer( CVertexBuffer *pVertexBuffer, int nBufferSize )
  125. {
  126. AUTO_LOCK( m_mutex );
  127. bool bIsVertexBuffer = true;
  128. GPUBufferHandle_t handle;
  129. if ( AllocateBuffer( &handle, nBufferSize, (void *)pVertexBuffer, bIsVertexBuffer ) )
  130. {
  131. // Success - give the VB the handle to this allocation
  132. pVertexBuffer->SetBufferAllocationHandle( handle );
  133. return true;
  134. }
  135. return false;
  136. }
  137. //-----------------------------------------------------------------------------
  138. // Deallocate this VB's memory from our pools
  139. //-----------------------------------------------------------------------------
  140. void CGPUBufferAllocator::DeallocateVertexBuffer( CVertexBuffer *pVertexBuffer )
  141. {
  142. AUTO_LOCK( m_mutex );
  143. // Remove the allocation from the pool and clear the VB's handle
  144. DeallocateBuffer( pVertexBuffer->GetBufferAllocationHandle() );
  145. pVertexBuffer->SetBufferAllocationHandle( GPUBufferHandle_t() );
  146. }
  147. //-----------------------------------------------------------------------------
  148. // If appropriate, allocate this IB's memory from one of our pools
  149. //-----------------------------------------------------------------------------
  150. bool CGPUBufferAllocator::AllocateIndexBuffer( CIndexBuffer *pIndexBuffer, int nBufferSize )
  151. {
  152. AUTO_LOCK( m_mutex );
  153. bool bIsNOTVertexBuffer = false;
  154. GPUBufferHandle_t handle;
  155. if ( AllocateBuffer( &handle, nBufferSize, (void *)pIndexBuffer, bIsNOTVertexBuffer ) )
  156. {
  157. // Success - give the IB the handle to this allocation
  158. pIndexBuffer->SetBufferAllocationHandle( handle );
  159. return true;
  160. }
  161. return false;
  162. }
  163. //-----------------------------------------------------------------------------
  164. // Deallocate this IB's memory from our pools
  165. //-----------------------------------------------------------------------------
  166. void CGPUBufferAllocator::DeallocateIndexBuffer( CIndexBuffer *pIndexBuffer )
  167. {
  168. AUTO_LOCK( m_mutex );
  169. // Remove the allocation from the pool and clear the IB's handle
  170. DeallocateBuffer( pIndexBuffer->GetBufferAllocationHandle() );
  171. pIndexBuffer->SetBufferAllocationHandle( GPUBufferHandle_t() );
  172. }
  173. //-----------------------------------------------------------------------------
  174. // Move a buffer from one location to another (could be movement within the same pool)
  175. //-----------------------------------------------------------------------------
  176. void CGPUBufferAllocator::MoveBufferMemory( int nDstPool, int *pnDstEntry, int *pnDstOffset, CGPUBufferPool &srcPool, GPUBufferPoolEntry_t &srcEntry )
  177. {
  178. // Move the data
  179. CGPUBufferPool &dstPool = *m_BufferPools[ nDstPool ];
  180. byte *pDest = dstPool.m_pMemory + *pnDstOffset;
  181. byte *pSource = srcPool.m_pMemory + srcEntry.nOffset;
  182. if ( pDest != pSource )
  183. V_memmove( pDest, pSource, srcEntry.nSize );
  184. // Update the destination pool's allocation entry (NOTE: this could be srcEntry, so srcEntry.nOffset would change)
  185. dstPool.m_PoolEntries[ *pnDstEntry ] = srcEntry;
  186. dstPool.m_PoolEntries[ *pnDstEntry ].nOffset = *pnDstOffset;
  187. // Tell the VB/IB about the updated allocation
  188. GPUBufferHandle_t newHandle = MakeGPUBufferHandle( nDstPool, *pnDstEntry );
  189. if ( srcEntry.bIsVertexBuffer )
  190. srcEntry.pVertexBuffer->SetBufferAllocationHandle( newHandle );
  191. else
  192. srcEntry.pIndexBuffer->SetBufferAllocationHandle( newHandle );
  193. // Move the write address past this entry and increment the pool high water mark
  194. *pnDstOffset += srcEntry.nSize;
  195. *pnDstEntry += 1;
  196. dstPool.m_nBytesUsed += srcEntry.nSize;
  197. }
  198. //-----------------------------------------------------------------------------
  199. // Reclaim space freed by destroyed buffers and compact our pools ready for new allocations
  200. //-----------------------------------------------------------------------------
  201. void CGPUBufferAllocator::Compact( void )
  202. {
  203. // NOTE: this must only be called during map transitions, no rendering must be in flight and everything must be single-threaded!
  204. AUTO_LOCK( m_mutex );
  205. // SpewStats(); // pre-compact state
  206. CFastTimer timer;
  207. timer.Start();
  208. // Shuffle all pools to get rid of the empty space occupied by freed buffers.
  209. // We just walk the pools and entries in order, moving each buffer down within the same pool,
  210. // or to the end of a previous pool (if, after compaction, it now has free space).
  211. // Each pool should end up with contiguous, usable free space (may be zero bytes) at the end.
  212. int nDstPool = 0, nDstEntry = 0, nDstOffset = 0;
  213. for ( int nSrcPool = 0; nSrcPool < m_nBufferPools; nSrcPool++ )
  214. {
  215. CGPUBufferPool &srcPool = *m_BufferPools[ nSrcPool ];
  216. srcPool.m_nBytesUsed = 0; // Re-fill each pool from scratch
  217. int nEntriesRemainingInPool = 0;
  218. for ( int nSrcEntry = 0; nSrcEntry < srcPool.m_PoolEntries.Count(); nSrcEntry++ )
  219. {
  220. GPUBufferPoolEntry_t &srcEntry = srcPool.m_PoolEntries[ nSrcEntry ];
  221. if ( srcEntry.pVertexBuffer )
  222. {
  223. // First, try to move the buffer into one of the previous (already-compacted) pools
  224. bool bDone = false;
  225. while ( nDstPool < nSrcPool )
  226. {
  227. CGPUBufferPool &dstPool = *m_BufferPools[ nDstPool ];
  228. if ( ( nDstOffset + srcEntry.nSize ) <= dstPool.m_nSize )
  229. {
  230. // Add this buffer to the end of dstPool
  231. Assert( nDstEntry == dstPool.m_PoolEntries.Count() );
  232. dstPool.m_PoolEntries.AddToTail();
  233. MoveBufferMemory( nDstPool, &nDstEntry, &nDstOffset, srcPool, srcEntry );
  234. bDone = true;
  235. break;
  236. }
  237. else
  238. {
  239. // This pool is full, start writing into the next one
  240. nDstPool++;
  241. nDstEntry = 0;
  242. nDstOffset = 0;
  243. }
  244. }
  245. // If that fails, just shuffle the entry down within srcPool
  246. if ( !bDone )
  247. {
  248. Assert( nSrcPool == nDstPool );
  249. MoveBufferMemory( nDstPool, &nDstEntry, &nDstOffset, srcPool, srcEntry );
  250. nEntriesRemainingInPool++;
  251. }
  252. }
  253. }
  254. // Discard unused entries from the end of the pool (freed buffers, or buffers moved to other pools)
  255. srcPool.m_PoolEntries.SetCountNonDestructively( nEntriesRemainingInPool );
  256. }
  257. // Now free empty pools (keep the first (very large) one around, since fragmentation makes freeing+reallocing it a big risk)
  258. int nBytesFreed = 0;
  259. for ( int nPool = ( m_nBufferPools - 1 ); nPool > 0; nPool-- )
  260. {
  261. if ( m_BufferPools[ nPool ]->m_PoolEntries.Count() )
  262. break;
  263. nBytesFreed += m_BufferPools[ nPool ]->m_nSize;
  264. Assert( m_BufferPools[ nPool ]->m_nBytesUsed == 0 );
  265. delete m_BufferPools[ nPool ];
  266. m_nBufferPools--;
  267. }
  268. if ( m_nBufferPools > 1 )
  269. {
  270. // The above compaction algorithm could waste space due to large allocs causing nDstPool to increment before that pool
  271. // is actually full. With our current usage pattern (total in-use memory is less than INITIAL_POOL_SIZE, whenever Compact
  272. // is called), that doesn't matter. If that changes (i.e. the below warning fires), then the fix would be:
  273. // - for each pool, sort its entries by size (largest first) and try to allocate them on the end of prior (already-compacted) pools
  274. // - pack whatever remains in the pool down, and proceed to the next pool
  275. ExecuteOnce( Warning( "CGPUBufferAllocator::Compact may be wasting memory due to changed usage patterns (see code for suggested fix)." ) );
  276. }
  277. #ifdef _X360
  278. // Invalidate the GPU caches for all pooled memory, since stuff has moved around
  279. for ( int nPool = 0; nPool < m_nBufferPools; nPool++ )
  280. {
  281. Dx9Device()->InvalidateGpuCache( m_BufferPools[ nPool ]->m_pMemory, m_BufferPools[ nPool ]->m_nSize, 0 );
  282. }
  283. #endif
  284. timer.End();
  285. float compactTime = (float)timer.GetDuration().GetSeconds();
  286. Msg( "CGPUBufferAllocator::Compact took %.2f seconds, and freed %.1fkb\n", compactTime, ( nBytesFreed / 1024.0f ) );
  287. // SpewStats(); // post-compact state
  288. }
  289. //-----------------------------------------------------------------------------
  290. // Spew statistics about pool usage, so we can tune our constant values
  291. //-----------------------------------------------------------------------------
  292. void CGPUBufferAllocator::SpewStats( bool bBrief )
  293. {
  294. AUTO_LOCK( m_mutex );
  295. int nMemAllocated = 0;
  296. int nMemUsed = 0;
  297. int nOldMemWasted = 0;
  298. int nVBsInPools = 0;
  299. int nIBsInPools = 0;
  300. int nFreedBuffers = 0;
  301. int nFreedBufferMem = 0;
  302. for ( int i = 0; i < m_nBufferPools; i++ )
  303. {
  304. CGPUBufferPool *pool = m_BufferPools[ i ];
  305. nMemAllocated += pool->m_nSize;
  306. nMemUsed += pool->m_nBytesUsed;
  307. for ( int j = 0; j < pool->m_PoolEntries.Count(); j++ )
  308. {
  309. GPUBufferPoolEntry_t &poolEntry = pool->m_PoolEntries[ j ];
  310. if ( poolEntry.pVertexBuffer )
  311. {
  312. // Figure out how much memory we WOULD have allocated for this buffer, if we'd allocated it individually:
  313. nOldMemWasted += ALIGN_VALUE( poolEntry.nSize, 4096 ) - poolEntry.nSize;
  314. if ( poolEntry.bIsVertexBuffer ) nVBsInPools++;
  315. if ( !poolEntry.bIsVertexBuffer ) nIBsInPools++;
  316. }
  317. else
  318. {
  319. nFreedBuffers++;
  320. nFreedBufferMem += poolEntry.nSize;
  321. }
  322. }
  323. }
  324. // NOTE: 'unused' memory doesn't count memory used by freed buffers, which should be zero during gameplay. The purpose is
  325. // to measure wastage at the END of a pool, to help determine ideal values for ADDITIONAL_POOL_SIZE and MAX_BUFFER_SIZE.
  326. int nMemUnused = nMemAllocated - nMemUsed;
  327. const float KB = 1024.0f, MB = KB*KB;
  328. if ( bBrief )
  329. {
  330. ConMsg( "[GPUBUFLOG] Pools:%2d | Size:%5.1fMB | Unused:%5.1fMB | Freed:%5.1fMB | Unpooled:%5.1fMB\n",
  331. m_nBufferPools, nMemAllocated / MB, nMemUnused / MB, nFreedBufferMem / MB, ( g_SizeIndividualVBPhysAllocs + g_SizeIndividualIBPhysAllocs ) / MB );
  332. }
  333. else
  334. {
  335. Msg( "\nGPU Buffer Allocator stats:\n" );
  336. Msg( " -- %5d -- Num Pools allocated\n", m_nBufferPools );
  337. Msg( " -- %7.1fMB -- Memory allocated to pools\n", nMemAllocated / MB );
  338. Msg( " -- %7.1fkb -- Unused memory at tail-end of pools\n", nMemUnused / KB );
  339. Msg( " -- %7.1fkb -- Memory saved by allocating buffers from pools\n", nOldMemWasted / KB );
  340. Msg( " -- %5d -- Number of VBs allocated from pools\n", nVBsInPools );
  341. Msg( " -- %5d -- Number of IBs allocated from pools\n", nIBsInPools );
  342. Msg( " -- %5d -- Number of freed buffers in pools (should be zero during gameplay)\n", nFreedBuffers );
  343. Msg( " -- %7.1fkb -- Memory used by freed buffers in pools\n", nFreedBufferMem / KB );
  344. Msg( " -- %7.1fkb -- Mem allocated for NON-pooled VBs (%d VBs)\n", g_SizeIndividualVBPhysAllocs / KB, g_NumIndividualVBPhysAllocs );
  345. Msg( " -- %7.1fkb -- Mem allocated for NON-pooled IBs (%d IBs)\n", g_SizeIndividualIBPhysAllocs / KB, g_NumIndividualVBPhysAllocs );
  346. Msg( "\n" );
  347. }
  348. }
  349. //=============================================================================
  350. //=============================================================================
  351. // CGPUBufferPool
  352. //=============================================================================
  353. //=============================================================================
  354. CGPUBufferPool::CGPUBufferPool( int nSize )
  355. : m_PoolEntries( POOL_ENTRIES_GROW_SIZE, POOL_ENTRIES_INIT_SIZE ),
  356. m_nSize( 0 ),
  357. m_nBytesUsed( 0 )
  358. {
  359. // NOTE: write-combining (PAGE_WRITECOMBINE) is deliberately not used, since it slows down 'Compact' hugely (and doesn't noticeably benefit load times)
  360. m_pMemory = (byte*)XPhysicalAlloc( nSize, MAXULONG_PTR, 0, PAGE_READWRITE );
  361. if ( m_pMemory )
  362. {
  363. MemAlloc_RegisterExternalAllocation( XMem_CGPUBufferPool, m_pMemory, XPhysicalSize( m_pMemory ) );
  364. m_nSize = nSize;
  365. }
  366. }
  367. CGPUBufferPool::~CGPUBufferPool( void )
  368. {
  369. for ( int i = 0; i < m_PoolEntries.Count(); i++ )
  370. {
  371. if ( m_PoolEntries[ i ].pVertexBuffer )
  372. {
  373. // Buffers should be cleaned up before the CGPUBufferAllocator is shut down!
  374. Assert( 0 );
  375. Warning( "ERROR: Un-freed %s in CGPUBufferPool on shut down! (%6.1fKB\n",
  376. ( m_PoolEntries[ i ].bIsVertexBuffer ? "VB" : "IB" ), ( m_PoolEntries[ i ].nSize / 1024.0f ) );
  377. break;
  378. }
  379. }
  380. if ( m_pMemory )
  381. {
  382. MemAlloc_RegisterExternalDeallocation( XMem_CGPUBufferPool, m_pMemory, XPhysicalSize( m_pMemory ) );
  383. XPhysicalFree( m_pMemory );
  384. m_pMemory = 0;
  385. }
  386. m_nSize = m_nBytesUsed = 0;
  387. }
  388. //-----------------------------------------------------------------------------
  389. // Attempt to allocate a buffer of the given size in this pool
  390. //-----------------------------------------------------------------------------
  391. int CGPUBufferPool::Allocate( int nBufferSize, bool bIsVertexBuffer, void *pObject )
  392. {
  393. // Align the buffer size
  394. nBufferSize = ALIGN_VALUE( nBufferSize, POOL_ENTRY_ALIGNMENT );
  395. // Check available space
  396. if ( ( m_nBytesUsed + nBufferSize ) > m_nSize )
  397. return -1;
  398. int nPoolEntry = m_PoolEntries.AddToTail();
  399. GPUBufferPoolEntry_t &poolEntry = m_PoolEntries[ nPoolEntry ];
  400. poolEntry.nOffset = m_nBytesUsed;
  401. poolEntry.nSize = nBufferSize;
  402. poolEntry.bIsVertexBuffer = bIsVertexBuffer;
  403. poolEntry.pVertexBuffer = (CVertexBuffer *)pObject;
  404. // Update 'used space' high watermark
  405. m_nBytesUsed += nBufferSize;
  406. return nPoolEntry;
  407. }
  408. //-----------------------------------------------------------------------------
  409. // Deallocate the given entry from this pool
  410. //-----------------------------------------------------------------------------
  411. void CGPUBufferPool::Deallocate( const GPUBufferHandle_t *pHandle )
  412. {
  413. Assert( m_PoolEntries.IsValidIndex( pHandle->nPoolEntry ) );
  414. if ( m_PoolEntries.IsValidIndex( pHandle->nPoolEntry ) )
  415. {
  416. Assert( m_PoolEntries[ pHandle->nPoolEntry ].pVertexBuffer );
  417. m_PoolEntries[ pHandle->nPoolEntry ].pVertexBuffer = NULL;
  418. }
  419. }
  420. #endif // _X360