Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

941 lines
37 KiB

  1. //========== Copyright � 2010, Valve Corporation, All rights reserved. ========
  2. #include "dxabstract.h"
  3. #include "ps3gcmstate.h"
  4. #include "utlmap.h"
  5. #include "ps3/ps3gcmlabels.h"
  6. #include "sys/tty.h"
  7. #include "convar.h"
  8. //#include "vjobs/spudrawqueue_shared.h"
  9. #include "spugcm.h"
  10. #include "memdbgon.h"
  11. PLATFORM_OVERRIDE_MEM_ALLOC_INTERNAL_PS3_IMPL
  12. //////////////////////////////////////////////////////////////////////////
  13. #if 1 // #ifndef _CERT
  14. #define TRACK_ALLOC_STATS 1
  15. #endif
  16. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  17. ConVar r_ps3_gcmnocompact( "r_ps3_gcmnocompact", "0" );
  18. ConVar r_ps3_gcmlowcompact( "r_ps3_gcmlowcompact", "0" );
  19. #endif
  20. static CThreadFastMutex s_AllocMutex;
  21. static int32 s_uiGcmLocalMemoryAllocatorMutexLockCount;
  22. struct CGcmLocalMemoryAllocatorMutexLockCounter_t
  23. {
  24. CGcmLocalMemoryAllocatorMutexLockCounter_t() { Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount >= 0 ); ++ s_uiGcmLocalMemoryAllocatorMutexLockCount; }
  25. ~CGcmLocalMemoryAllocatorMutexLockCounter_t() { Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount > 0 ); -- s_uiGcmLocalMemoryAllocatorMutexLockCount; }
  26. };
  27. #define PS3ALLOCMTX AUTO_LOCK( s_AllocMutex ); CGcmLocalMemoryAllocatorMutexLockCounter_t aLockCounter;
  28. bool IsItSafeToRefreshFrontBufferNonInteractivePs3()
  29. {
  30. // NOTE: only main thread can refresh front buffer
  31. if ( !ThreadInMainThread() )
  32. return false;
  33. AUTO_LOCK( s_AllocMutex );
  34. Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount >= 0 );
  35. return s_uiGcmLocalMemoryAllocatorMutexLockCount <= 0;
  36. }
  37. struct CPs3gcmLocalMemoryBlockMutable : public CPs3gcmLocalMemoryBlock
  38. {
  39. inline uint32 & MutableOffset() { return m_nLocalMemoryOffset; }
  40. inline uint32 & MutableSize() { return m_uiSize; }
  41. inline CPs3gcmAllocationType_t & MutableType() { return m_uType; }
  42. inline uint32 & MutableIndex() { return m_uiIndex; }
  43. };
  44. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  45. static const uint64 g_GcmLocalMemoryBlockDebugCookieAllocated = 0xA110CA7EDA110CA7ull;
  46. static const uint64 g_GcmLocalMemoryBlockDebugCookieFree = 0xFEEFEEFEEFEEFEEFllu;
  47. #endif
  48. struct CPs3gcmLocalMemoryAllocator
  49. {
  50. //////////////////////////////////////////////////////////////////////////
  51. //
  52. // Allocated memory tracking
  53. //
  54. uint32 m_nOffsetMin; // RSX Local Memory allocated by Initialization that will never be released
  55. uint32 m_nOffsetMax; // Ceiling of allocatable RSX Local Memory (because the top portion is reserved for zcull/etc.), top portion managed separately
  56. uint32 m_nOffsetUnallocated; // RSX Local Memory offset of not yet allocated memory (between Min and Max)
  57. CUtlVector< CPs3gcmLocalMemoryBlockMutable * > m_arrAllocations; // Sorted array of all allocations
  58. //////////////////////////////////////////////////////////////////////////
  59. //
  60. // Free blocks tracking
  61. //
  62. struct LocalMemoryAllocation_t
  63. {
  64. CPs3gcmLocalMemoryBlockMutable m_block;
  65. uint32 m_uiFenceNumber;
  66. LocalMemoryAllocation_t *m_pNext;
  67. };
  68. LocalMemoryAllocation_t *m_pPendingFreeBlock;
  69. LocalMemoryAllocation_t *m_pFreeBlock;
  70. static uint32 sm_uiFenceNumber;
  71. uint32 m_uiFenceLastKnown;
  72. static uint32 volatile *sm_puiFenceLocation;
  73. //////////////////////////////////////////////////////////////////////////
  74. //
  75. // Implementation
  76. //
  77. inline bool Alloc( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock );
  78. inline void Free( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock );
  79. inline uint32 Reclaim( bool bForce = false );
  80. inline void Compact();
  81. // Helper methods
  82. inline LocalMemoryAllocation_t * FindFreeBlock( uint32 uiAlignBytes, uint32 uiSize );
  83. inline bool IsFenceCompleted( uint32 uiCurrentFenceValue, uint32 uiCheckStoredFenceValue );
  84. inline void TrackAllocStats( CPs3gcmAllocationType_t uAllocType, int nDelta );
  85. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  86. inline void ValidateAllBlocks();
  87. #endif
  88. }
  89. g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolCount];
  90. uint32 CPs3gcmLocalMemoryAllocator::sm_uiFenceNumber;
  91. uint32 volatile * CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation;
  92. // RSX memory usage stats tracking:
  93. static GPUMemoryStats g_RsxMemoryStats;
  94. struct GPUMemoryStats_Pool
  95. {
  96. int nDefaultPoolSize;
  97. int nDefaultPoolUsed;
  98. int nRTPoolUsed;
  99. int nDynamicPoolUsed;
  100. int nMainMemUsed;
  101. int nUnknownPoolUsed;
  102. };
  103. GPUMemoryStats_Pool g_RsxMemoryStats_Pool;
  104. static inline uint32 Ps3gcmHelper_ComputeTiledAreaMemorySize( uint32 nCount, uint32 w, uint32 h, uint32 bpp )
  105. {
  106. uint32 nTilePitch = cellGcmGetTiledPitchSize( w * bpp );
  107. uint32 uiSize = nTilePitch * AlignValue( h, 32 );
  108. uiSize *= nCount;
  109. uiSize = AlignValue( uiSize, PS3GCMALLOCATIONALIGN( kAllocPs3gcmColorBufferMisc ) );
  110. return uiSize;
  111. }
  112. void Ps3gcmLocalMemoryAllocator_Init()
  113. {
  114. PS3ALLOCMTX
  115. if ( !CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation )
  116. {
  117. CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation = cellGcmGetLabelAddress( GCM_LABEL_MEMORY_FREE );
  118. *CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation = 0;
  119. }
  120. // Pool boundaries
  121. uint32 uiGcmAllocBegin = g_ps3gcmGlobalState.m_nLocalBaseOffset;
  122. uint32 uiGcmAllocEnd = uiGcmAllocBegin + g_ps3gcmGlobalState.m_nLocalSize;
  123. // Memory should be allocated for large frame buffers
  124. uint32 uiMemorySizeBuffer[2] = { MAX( 1280, g_ps3gcmGlobalState.m_nRenderSize[0] ), MAX( 720, g_ps3gcmGlobalState.m_nRenderSize[1] ) };
  125. uint32 uiFactor[2] = { uiMemorySizeBuffer[0]*uiMemorySizeBuffer[1], 1280*720 };
  126. // Configuration of pool memory (can be #ifdef'd for every game)
  127. static const uint32 s_PoolMemoryLayout[/*kGcmAllocPoolCount*/] =
  128. {
  129. #if defined( CSTRIKE15 )
  130. // mhansen - We had to adjust the memory values a bit for cstrike15 to get a map to load
  131. // PS3_BUILDFIX - We need to revisit this to determine the proper size later on
  132. // mdonofrio - render target allocations revisited for PS3
  133. // potential to save some more (~12Mb) from TiledColourFB (only need two really.
  134. // wait for other rendering optimisation/rework to be finished first before attempting.
  135. /*kGcmAllocPoolDefault = */ 0,
  136. /*kGcmAllocPoolDynamicNewPath = */ 5 * 1024 * 1024, // 5 MB
  137. /*kGcmAllocPoolDynamic = */ 11 * 1024 * 1024, // 11 MB
  138. /*kGcmAllocPoolTiledColorFB = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2 + CPs3gcmDisplay::SURFACE_COUNT, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ), // 3 buffers allocated in CreateRSXBuffers + 2 _rt_fullFrameFB - can probably get this down to 2 if we 1. don't use MLAA and 2. we clean up the post-pro rendering to use the front buffer as a textureand 3. tidy up aliasing for rt_fullframeFB and rt_fullFrameFB1
  139. /*kGcmAllocPoolTiledColorFBQ = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, uiMemorySizeBuffer[0]/4, uiMemorySizeBuffer[1]/4, 4 ), // fits 2 1/4 size framebuffer textures
  140. /*kGcmAllocPoolTiledColor512 = */ 0,
  141. /*kGcmAllocPoolTiledColorMisc = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 1, 640, 640, 4 ) + Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, 1024, 512, 4) + Ps3gcmHelper_ComputeTiledAreaMemorySize(1, 32, 32, 4), // // 1x 1/2 size smoke/fog buffer, 2xWater(1024x512x32bpp), EyeGlint(32x32x32bpp), *Monitor(256x256x32bpp), *RTTFlashlightShadows(864x864x8bpp) - * we don't need these for CS15
  142. /*kGcmAllocPoolTiledD24S8 = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ), // only 2 depth buffer targets required (current and saved off), + reserve space for 1/2 size depth buffer for smoke/fog
  143. /*kGcmAllocPoolMainMemory = */ 0, // configured based on mapped IO memory
  144. /*kGcmAllocPoolMallocMemory = */ 0, // using malloc
  145. #else
  146. /*kGcmAllocPoolDefault = */ 0,
  147. /*kGcmAllocPoolDynamicNewPath = */ 5 * 1024 * 1024, // 5 MB
  148. /*kGcmAllocPoolDynamic = */ 10 * 1024 * 1024, // 10 MB
  149. /*kGcmAllocPoolTiledColorFB = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2 * CPs3gcmDisplay::SURFACE_COUNT, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ), // fits 6 of full framebuffer textures
  150. /*kGcmAllocPoolTiledColorFBQ = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 4, uiMemorySizeBuffer[0]/4, uiMemorySizeBuffer[1]/4, 4 ), // fits 4 quarters of framebuffer textures
  151. /*kGcmAllocPoolTiledColor512 = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, 512, 512, 4 ), // fits 2 512x512 RGBA textures
  152. /*kGcmAllocPoolTiledColorMisc = */ 5 * 1024 * 1024, // 5 MB
  153. /*kGcmAllocPoolTiledD24S8 = */ uint64( 15 * 1024 * 1024 ) * uiFactor[0]/uiFactor[1], // 15 MB
  154. /*kGcmAllocPoolMainMemory = */ 0, // configured based on mapped IO memory
  155. /*kGcmAllocPoolMallocMemory = */ 0, // using malloc
  156. #endif
  157. };
  158. COMPILE_TIME_ASSERT( ARRAYSIZE( s_PoolMemoryLayout ) == ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ) );
  159. for ( int j = ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); j -- > 0; )
  160. {
  161. const uint32 uiSize = AlignValue( s_PoolMemoryLayout[j], 1024 * 1024 ); // Align it on 1 MB boundaries, all our pools are large
  162. g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax = uiGcmAllocEnd;
  163. uiGcmAllocEnd -= uiSize;
  164. g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin =
  165. g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetUnallocated = uiGcmAllocEnd;
  166. }
  167. // Default pool setup (rest of local memory)
  168. g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMax = uiGcmAllocEnd;
  169. g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMin =
  170. g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetUnallocated = uiGcmAllocBegin;
  171. // Main memory mapped pool
  172. g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMin =
  173. g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetUnallocated = uint32( g_ps3gcmGlobalState.m_pRsxMainMemoryPoolBuffer ) + g_ps3gcmGlobalState.m_nIoOffsetDelta;
  174. g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMax = g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMin + g_ps3gcmGlobalState.m_nRsxMainMemoryPoolBufferSize;
  175. // Store initial capacity for memory stats tracking:
  176. g_RsxMemoryStats.nGPUMemSize = g_ps3gcmGlobalState.m_nLocalSize;
  177. g_RsxMemoryStats_Pool.nDefaultPoolSize = g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMin;
  178. //
  179. // Setup preset tiled regions
  180. //
  181. {
  182. CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColorFB;
  183. uint8 uiBank = 0; // bank 0..3
  184. uint32 nRenderPitch = cellGcmGetTiledPitchSize( g_ps3gcmGlobalState.m_nRenderSize[0] * 4 );
  185. uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
  186. cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
  187. g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
  188. g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
  189. nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
  190. ( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
  191. uiBank );
  192. cellGcmBindTile( uiTileIndex );
  193. }
  194. {
  195. CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColorFBQ;
  196. uint8 uiBank = 1; // bank 0..3
  197. uint32 nRenderPitch = cellGcmGetTiledPitchSize( g_ps3gcmGlobalState.m_nRenderSize[0] * 4 / 4 );
  198. uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
  199. cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
  200. g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
  201. g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
  202. nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
  203. ( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
  204. uiBank );
  205. cellGcmBindTile( uiTileIndex );
  206. }
  207. {
  208. CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColor512;
  209. uint8 uiBank = 2; // bank 0..3
  210. uint32 nRenderPitch = cellGcmGetTiledPitchSize( 512 * 4 );
  211. uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
  212. cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
  213. g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
  214. g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
  215. nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
  216. ( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
  217. uiBank );
  218. cellGcmBindTile( uiTileIndex );
  219. }
  220. #ifndef _CERT
  221. static const char * s_PoolMemoryNames[] =
  222. {
  223. /*kGcmAllocPoolDefault = */ "Default Pool",
  224. /*kGcmAllocPoolDynamicNewPath = */ "Dynamic New ",
  225. /*kGcmAllocPoolDynamic = */ "Dynamic IBVB",
  226. /*kGcmAllocPoolTiledColorFB = */ "FullFrameRTs",
  227. /*kGcmAllocPoolTiledColorFBQ = */ "1/4Frame RTs",
  228. /*kGcmAllocPoolTiledColor512 = */ "512x512 RTs ",
  229. /*kGcmAllocPoolTiledColorMisc = */ "All Misc RTs",
  230. /*kGcmAllocPoolTiledD24S8 = */ "DepthStencil",
  231. /*kGcmAllocPoolMainMemory = */ "Main Memory ",
  232. /*kGcmAllocPoolMallocMemory = */ "MallocMemory",
  233. };
  234. COMPILE_TIME_ASSERT( ARRAYSIZE( s_PoolMemoryNames ) == ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ) );
  235. Msg( "RSX Local Memory layout:\n" );
  236. for ( int j = 0; j < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ j )
  237. {
  238. Msg( " %s 0x%08X - 0x%08X [ %9.3f MB ]\n",
  239. s_PoolMemoryNames[j],
  240. g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin,
  241. g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax,
  242. (g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin) / 1024.f / 1024.f );
  243. }
  244. Msg( "Total size: %d MB\n", g_ps3gcmGlobalState.m_nLocalSize / 1024 / 1024 );
  245. #endif
  246. }
  247. void Ps3gcmLocalMemoryAllocator_Reclaim()
  248. {
  249. PS3ALLOCMTX
  250. for ( int k = 0; k < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ k )
  251. g_ps3gcmLocalMemoryAllocator[ k ].Reclaim();
  252. }
  253. void Ps3gcmLocalMemoryAllocator_Compact()
  254. {
  255. #define PS3GCMCOMPACTPROFILE 0
  256. #if PS3GCMCOMPACTPROFILE
  257. float flTimeStart = Plat_FloatTime();
  258. uint32 uiFree = g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated;
  259. #endif
  260. // Let RSX wait for final flip
  261. GCM_FUNC( cellGcmSetWaitFlip );
  262. // Let PPU wait for all RSX commands done (include waitFlip)
  263. g_ps3gcmGlobalState.CmdBufferFinish();
  264. #if PS3GCMCOMPACTPROFILE
  265. float flTimeWait = Plat_FloatTime() - flTimeStart;
  266. #endif
  267. {
  268. PS3ALLOCMTX
  269. for ( int k = 0; k < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ k )
  270. {
  271. g_ps3gcmLocalMemoryAllocator[ k ].Compact();
  272. }
  273. }
  274. #if PS3GCMCOMPACTPROFILE
  275. float flTimePrepareTransfer = Plat_FloatTime() - flTimeStart;
  276. #endif
  277. // Wait for all RSX memory to be transferred
  278. g_ps3gcmGlobalState.CmdBufferFinish();
  279. #if PS3GCMCOMPACTPROFILE
  280. float flTimeDone = Plat_FloatTime() - flTimeStart;
  281. char chBuffer[64];
  282. Q_snprintf( chBuffer, ARRAYSIZE( chBuffer ), "COMPACT: %0.3f / %0.3f / %0.3f sec\n",
  283. flTimeWait, flTimePrepareTransfer, flTimeDone );
  284. uint32 dummy;
  285. sys_tty_write( SYS_TTYP6, chBuffer, Q_strlen( chBuffer ), &dummy );
  286. Q_snprintf( chBuffer, ARRAYSIZE( chBuffer ), "COMPACT: %0.3f -> %0.3f MB (%0.3f MB free)\n",
  287. uiFree / 1024.f / 1024.f, g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated / 1024.f / 1024.f,
  288. (g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
  289. sys_tty_write( SYS_TTYP6, chBuffer, Q_strlen( chBuffer ), &dummy );
  290. #endif
  291. }
  292. void Ps3gcmLocalMemoryAllocator_CompactWithReason( char const *szReason )
  293. {
  294. double flTimeCompactStart = Plat_FloatTime();
  295. DevMsg( "====== GCM LOCAL MEMORY COMPACT : %s =====\n", szReason );
  296. uint32 uiFreeMemoryBeforeCompact = g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated;
  297. DevMsg( "RSX Local Memory Free: %0.3f MB; compacting...\n", (g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
  298. Ps3gcmLocalMemoryAllocator_Compact();
  299. DevMsg( "RSX Local Memory Compacted %0.3f MB in %0.3f sec\n",
  300. (uiFreeMemoryBeforeCompact - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f,
  301. Plat_FloatTime() - flTimeCompactStart );
  302. DevMsg( "RSX Local Memory Free: %0.3f MB\n", (g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
  303. }
  304. bool CPs3gcmLocalMemoryBlock::Alloc()
  305. {
  306. PS3ALLOCMTX
  307. return g_ps3gcmLocalMemoryAllocator[PS3GCMALLOCATIONPOOL(m_uType)].Alloc( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( this ) );
  308. }
  309. void CPs3gcmLocalMemoryBlock::Free()
  310. {
  311. PS3ALLOCMTX
  312. g_ps3gcmLocalMemoryAllocator[PS3GCMALLOCATIONPOOL(m_uType)].Free( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( this ) );
  313. }
  314. //////////////////////////////////////////////////////////////////////////
  315. //
  316. // Private implementation of PS3 local memory allocator
  317. //
  318. inline bool CPs3gcmLocalMemoryAllocator::Alloc( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock )
  319. {
  320. TrackAllocStats( pBlock->MutableType(), pBlock->MutableSize() );
  321. uint32 uAlignBytes = PS3GCMALLOCATIONALIGN( pBlock->MutableType() );
  322. Assert( IsPowerOfTwo( uAlignBytes ) );
  323. double flAllocatorStallTime = 0.0f;
  324. bool bCompactPerformed = true;
  325. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  326. bCompactPerformed = !r_ps3_gcmlowcompact.GetBool();
  327. #endif
  328. retry_allocation:
  329. // Try to find a free block
  330. if ( LocalMemoryAllocation_t *pFreeBlock = FindFreeBlock( uAlignBytes, pBlock->MutableSize() ) )
  331. {
  332. pBlock->MutableOffset() = pFreeBlock->m_block.MutableOffset();
  333. pBlock->MutableIndex() = pFreeBlock->m_block.MutableIndex();
  334. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  335. if ( m_arrAllocations[ pBlock->MutableIndex() ] != &pFreeBlock->m_block )
  336. Error( "<vitaliy> GCM Local Memory Allocator Error (attempt to reuse invalid free block)!" );
  337. #endif
  338. m_arrAllocations[ pBlock->MutableIndex() ] = reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( pBlock );
  339. delete pFreeBlock;
  340. }
  341. else if ( this != &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMallocMemory ] )
  342. {
  343. // Allocate new block
  344. uint32 uiOldUnallocatedEdge = m_nOffsetUnallocated;
  345. uint32 uiFreeBlock = ( m_nOffsetUnallocated + uAlignBytes - 1 ) & ~( uAlignBytes - 1 );
  346. // Check if there's enough space in this pool for the requested block
  347. if ( uiFreeBlock + pBlock->MutableSize() > m_nOffsetMax )
  348. {
  349. // There's not enough space in this pool
  350. if ( m_pPendingFreeBlock )
  351. {
  352. // There are pending free blocks, we just need to wait for
  353. // RSX to finish rendering using them
  354. if ( !flAllocatorStallTime )
  355. {
  356. flAllocatorStallTime = Plat_FloatTime();
  357. g_ps3gcmGlobalState.CmdBufferFlush( CPs3gcmGlobalState::kFlushForcefully );
  358. }
  359. while ( Reclaim() < pBlock->MutableSize() && m_pPendingFreeBlock )
  360. {
  361. ThreadSleep( 1 );
  362. }
  363. goto retry_allocation;
  364. }
  365. else if ( !bCompactPerformed )
  366. {
  367. // Let PPU wait for all RSX commands done
  368. g_ps3gcmGlobalState.CmdBufferFinish();
  369. uint32 uiFragmentedFreeSpace = m_nOffsetMax - m_nOffsetUnallocated;
  370. for ( LocalMemoryAllocation_t *pFreeFragment = m_pFreeBlock; pFreeFragment; pFreeFragment = pFreeFragment->m_pNext )
  371. uiFragmentedFreeSpace += pFreeFragment->m_block.MutableSize();
  372. Warning(
  373. "**************** GCM LOCAL MEMORY LOW *****************\n"
  374. "<vitaliy> GCM Local Memory Allocator#%d pool compacting!\n"
  375. " Requested allocation %u bytes.\n"
  376. " Pool capacity %u bytes.\n"
  377. " Free fragmented space %u bytes.\n"
  378. " Unallocated %u bytes.\n"
  379. " Used %u bytes.\n",
  380. this - g_ps3gcmLocalMemoryAllocator,
  381. ( uint32 ) pBlock->MutableSize(),
  382. m_nOffsetMax - m_nOffsetMin,
  383. uiFragmentedFreeSpace,
  384. m_nOffsetMax - m_nOffsetUnallocated,
  385. m_nOffsetUnallocated - m_nOffsetMin
  386. );
  387. Compact();
  388. Warning( " ---> Compacted pool#%d has %u unallocated bytes.\n",
  389. this - g_ps3gcmLocalMemoryAllocator,
  390. m_nOffsetMax - m_nOffsetUnallocated );
  391. bCompactPerformed = true;
  392. // Wait for all RSX memory to be transferred
  393. g_ps3gcmGlobalState.CmdBufferFinish();
  394. goto retry_allocation;
  395. }
  396. else
  397. {
  398. // Main memory pool returns failure so caller can try local pool.
  399. if (this == &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ]) return false;
  400. uint32 uiFragmentedFreeSpace = m_nOffsetMax - m_nOffsetUnallocated;
  401. for ( LocalMemoryAllocation_t *pFreeFragment = m_pFreeBlock; pFreeFragment; pFreeFragment = pFreeFragment->m_pNext )
  402. uiFragmentedFreeSpace += pFreeFragment->m_block.MutableSize();
  403. Error(
  404. "********* OUT OF GCM LOCAL MEMORY ********************\n"
  405. "<vitaliy> GCM Local Memory Allocator#%d pool exhausted!\n"
  406. " Failed allocation %u bytes.\n"
  407. " Pool capacity %u bytes.\n"
  408. " Free fragmented space %u bytes.\n"
  409. " Unallocated %u bytes.\n"
  410. " Used %u bytes.\n",
  411. this - g_ps3gcmLocalMemoryAllocator,
  412. ( uint32 ) pBlock->MutableSize(),
  413. m_nOffsetMax - m_nOffsetMin,
  414. uiFragmentedFreeSpace,
  415. m_nOffsetMax - m_nOffsetUnallocated,
  416. m_nOffsetUnallocated - m_nOffsetMin
  417. );
  418. }
  419. }
  420. // update the pointer to "unallocated" realm
  421. m_nOffsetUnallocated = uiFreeBlock + pBlock->MutableSize();
  422. // this is the last allocation so far
  423. pBlock->MutableIndex() = m_arrAllocations.AddToTail( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( pBlock ) );
  424. pBlock->MutableOffset() = uiFreeBlock;
  425. }
  426. else
  427. {
  428. MEM_ALLOC_CREDIT_( "GCM Malloc Pool" );
  429. void *pvMallocMemory = MemAlloc_AllocAligned( pBlock->MutableSize(), uAlignBytes );
  430. pBlock->MutableOffset() = (uint32) pvMallocMemory;
  431. pBlock->MutableIndex() = ~0;
  432. }
  433. if ( flAllocatorStallTime )
  434. g_ps3gcmGlobalState.m_flAllocatorStallTimeWaitingRSX += Plat_FloatTime() - flAllocatorStallTime;
  435. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  436. // PS3 doesn't allow more than 8 zcull regions (index 0..7)
  437. if ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_arrAllocations.Count() > 8 )
  438. Error( "PS3 number of zcull regions exceeded!\n" );
  439. // PS3 doesn't allow more than 15 tiles regions (index 0..14)
  440. if ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_arrAllocations.Count() +
  441. g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorMisc].m_arrAllocations.Count() +
  442. ( kGcmAllocPoolTiledColorMisc - kGcmAllocPoolTiledColorFB )
  443. > 15 )
  444. Error( "PS3 number of tiled regions exceeded!\n" );
  445. pBlock->m_dbgGuardCookie = g_GcmLocalMemoryBlockDebugCookieAllocated;
  446. #endif
  447. return true;
  448. }
  449. inline void CPs3gcmLocalMemoryAllocator::Free( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock )
  450. {
  451. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  452. if ( !pBlock ||
  453. pBlock->m_dbgGuardCookie != g_GcmLocalMemoryBlockDebugCookieAllocated ||
  454. ( ( pBlock->MutableIndex() != ~0 ) && ( m_arrAllocations[ pBlock->MutableIndex() ] != pBlock ) ) )
  455. {
  456. //DebuggerBreak();
  457. Error( "<vitaliy> Attempt to free not allocated GCM local memory block!" );
  458. }
  459. pBlock->m_dbgGuardCookie = g_GcmLocalMemoryBlockDebugCookieFree;
  460. #endif
  461. LocalMemoryAllocation_t *pDealloc = new LocalMemoryAllocation_t;
  462. pDealloc->m_block = *pBlock;
  463. pDealloc->m_uiFenceNumber = ++ sm_uiFenceNumber;
  464. pDealloc->m_pNext = m_pPendingFreeBlock;
  465. GCM_FUNC( cellGcmSetWriteBackEndLabel, GCM_LABEL_MEMORY_FREE, sm_uiFenceNumber );
  466. m_pPendingFreeBlock = pDealloc;
  467. TrackAllocStats( pBlock->MutableType(), - pBlock->MutableSize() );
  468. if ( pBlock->MutableIndex() != ~0 )
  469. {
  470. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  471. if ( m_arrAllocations[ pBlock->MutableIndex() ] != pBlock )
  472. Error( "<vitaliy> GCM Local Memory Allocator Error (freeing block that is not properly registered)!" );
  473. #endif
  474. m_arrAllocations[ pBlock->MutableIndex() ] = &pDealloc->m_block;
  475. }
  476. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  477. pBlock->MutableOffset() = ~0;
  478. pBlock->MutableIndex() = ~0;
  479. #endif
  480. }
  481. inline bool CPs3gcmLocalMemoryAllocator::IsFenceCompleted( uint32 uiCurrentFenceValue, uint32 uiCheckStoredFenceValue )
  482. {
  483. #if GCM_ALLOW_NULL_FLIPS
  484. extern bool g_ps3_nullflips;
  485. if ( g_ps3_nullflips )
  486. return true;
  487. #endif
  488. // Needs to handle the counter wrapping around
  489. return ( ( uiCurrentFenceValue - m_uiFenceLastKnown ) >= ( uiCheckStoredFenceValue - m_uiFenceLastKnown ) );
  490. }
  491. inline uint32 CPs3gcmLocalMemoryAllocator::Reclaim( bool bForce )
  492. {
  493. uint32 uiLargestBlockSizeReclaimed = 0;
  494. uint32 uiCurrentFenceValue = *sm_puiFenceLocation;
  495. // Walk pending free blocks and see if they are no longer
  496. // in use by RSX:
  497. LocalMemoryAllocation_t **p = &m_pPendingFreeBlock;
  498. if ( !bForce ) while ( (*p) && !IsFenceCompleted( uiCurrentFenceValue, (*p)->m_uiFenceNumber ) )
  499. p = &( (*p)->m_pNext );
  500. // Now p is pointing to the chain of free blocks
  501. // chain that has been completed (due to the nature of
  502. // pushing new deallocation at the head of the pending
  503. // list)
  504. if ( *p )
  505. {
  506. LocalMemoryAllocation_t *pCompletedChain = *p;
  507. *p = NULL; // Terminate the chain
  508. // Handle the special case of malloc reclaim - free all memory
  509. if ( this == &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMallocMemory ] )
  510. {
  511. MEM_ALLOC_CREDIT_( "GCM Malloc Pool" );
  512. for ( LocalMemoryAllocation_t *pActualFree = pCompletedChain; pActualFree; )
  513. {
  514. MemAlloc_FreeAligned( pActualFree->m_block.DataInMallocMemory() );
  515. LocalMemoryAllocation_t *pDelete = pActualFree;
  516. pActualFree = pActualFree->m_pNext;
  517. delete pDelete;
  518. }
  519. pCompletedChain = NULL;
  520. }
  521. // Relink the completed pending chain into
  522. // the free blocks chain
  523. LocalMemoryAllocation_t **ppFree = &m_pFreeBlock;
  524. while ( *ppFree )
  525. ppFree = &( (*ppFree)->m_pNext );
  526. *ppFree = pCompletedChain;
  527. // Recompute actual free sizes of the completed chain
  528. // Actual free size is the delta between block offset and next block offset
  529. // When there's no next block then its delta between block offset and unallocated edge
  530. for ( LocalMemoryAllocation_t *pActualFree = pCompletedChain; pActualFree; pActualFree = pActualFree->m_pNext )
  531. {
  532. uint32 uiIdx = pActualFree->m_block.MutableIndex() + 1;
  533. uint32 uiNextOffset = m_nOffsetUnallocated;
  534. if ( uiIdx < m_arrAllocations.Count() )
  535. {
  536. CPs3gcmLocalMemoryBlockMutable * RESTRICT pNextBlock = m_arrAllocations[ uiIdx ];
  537. uiNextOffset = pNextBlock->Offset();
  538. }
  539. uint32 uiActualBlockSize = uiNextOffset - pActualFree->m_block.Offset();
  540. pActualFree->m_block.MutableSize() = uiActualBlockSize;
  541. uiLargestBlockSizeReclaimed = MAX( uiLargestBlockSizeReclaimed, uiActualBlockSize );
  542. }
  543. }
  544. // Remember the last known fence value
  545. m_uiFenceLastKnown = uiCurrentFenceValue;
  546. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  547. ValidateAllBlocks();
  548. #endif
  549. return uiLargestBlockSizeReclaimed;
  550. }
  551. inline CPs3gcmLocalMemoryAllocator::LocalMemoryAllocation_t * CPs3gcmLocalMemoryAllocator::FindFreeBlock( uint32 uiAlignBytes, uint32 uiSize )
  552. {
  553. LocalMemoryAllocation_t **ppBest = NULL;
  554. uint32 uiSizeMax = uiSize * 11/10; // we don't want to inflate requested size by > 10%
  555. for ( LocalMemoryAllocation_t **p = &m_pFreeBlock;
  556. (*p);
  557. p = &( (*p)->m_pNext ) )
  558. {
  559. if ( (*p)->m_block.MutableSize() >= uiSize && (*p)->m_block.MutableSize() <= uiSizeMax &&
  560. !( (*p)->m_block.Offset() & ( uiAlignBytes - 1 ) ) )
  561. {
  562. if ( !ppBest || ( (*p)->m_block.MutableSize() <= (*ppBest)->m_block.MutableSize() ) )
  563. {
  564. ppBest = p;
  565. }
  566. }
  567. }
  568. if ( ppBest )
  569. {
  570. LocalMemoryAllocation_t *pFree = (*ppBest);
  571. (*ppBest) = pFree->m_pNext;
  572. pFree->m_pNext = NULL;
  573. return pFree;
  574. }
  575. return NULL;
  576. }
  577. inline bool TrackAllocStats_Pool( CPs3gcmAllocationType_t uAllocType, int nDelta )
  578. {
  579. CPs3gcmAllocationPool_t pool = PS3GCMALLOCATIONPOOL( uAllocType );
  580. int *stat = &g_RsxMemoryStats_Pool.nUnknownPoolUsed;
  581. bool bInRSXMem = true;
  582. switch( pool )
  583. {
  584. case kGcmAllocPoolDefault:
  585. stat = &g_RsxMemoryStats_Pool.nDefaultPoolUsed;
  586. break;
  587. case kGcmAllocPoolDynamicNewPath:
  588. case kGcmAllocPoolDynamic:
  589. stat = &g_RsxMemoryStats_Pool.nDynamicPoolUsed;
  590. break;
  591. case kGcmAllocPoolTiledColorFB:
  592. case kGcmAllocPoolTiledColorFBQ:
  593. case kGcmAllocPoolTiledColor512:
  594. case kGcmAllocPoolTiledColorMisc:
  595. case kGcmAllocPoolTiledD24S8:
  596. stat = &g_RsxMemoryStats_Pool.nRTPoolUsed;
  597. break;
  598. case kGcmAllocPoolMainMemory: // Unused, unless PS3GCM_VBIB_IN_IO_MEMORY set to 1
  599. case kGcmAllocPoolMallocMemory:
  600. stat = &g_RsxMemoryStats_Pool.nMainMemUsed;
  601. bInRSXMem = false; // In main memory!
  602. break;
  603. }
  604. *stat += nDelta;
  605. Assert( 0 <= (int)*stat );
  606. // Report free memory only from the default pool (the other pools are pre-sized to fixed limits, and all
  607. // geom/textures go into the default pool, so that's where content-driven variation/failures will occur)
  608. g_RsxMemoryStats.nGPUMemFree = g_RsxMemoryStats_Pool.nDefaultPoolSize - g_RsxMemoryStats_Pool.nDefaultPoolUsed;
  609. return bInRSXMem;
  610. }
  611. inline void CPs3gcmLocalMemoryAllocator::TrackAllocStats( CPs3gcmAllocationType_t uAllocType, int nDelta )
  612. {
  613. #if TRACK_ALLOC_STATS
  614. // Early-out for allocations not in RSX memory:
  615. if ( !TrackAllocStats_Pool( uAllocType, nDelta ) )
  616. return;
  617. unsigned int *stat = &g_RsxMemoryStats.nUnknown;
  618. switch( uAllocType )
  619. {
  620. case kAllocPs3gcmColorBufferMisc:
  621. case kAllocPs3gcmColorBufferFB:
  622. case kAllocPs3gcmColorBufferFBQ:
  623. case kAllocPs3gcmColorBuffer512:
  624. case kAllocPs3gcmDepthBuffer:
  625. stat = &g_RsxMemoryStats.nRTSize;
  626. break;
  627. case kAllocPs3gcmTextureData:
  628. case kAllocPs3gcmTextureData0:
  629. stat = &g_RsxMemoryStats.nTextureSize;
  630. break;
  631. case kAllocPs3GcmVertexBuffer:
  632. stat = &g_RsxMemoryStats.nVBSize;
  633. break;
  634. case kAllocPs3GcmIndexBuffer:
  635. stat = &g_RsxMemoryStats.nIBSize;
  636. break;
  637. case kAllocPs3GcmShader:
  638. case kAllocPs3GcmEdgeGeomBuffer:
  639. case kAllocPs3GcmVertexBufferDynamic:
  640. case kAllocPs3GcmIndexBufferDynamic:
  641. case kAllocPs3GcmDynamicBufferPool:
  642. case kAllocPs3GcmVertexBufferDma:
  643. case kAllocPs3GcmIndexBufferDma:
  644. // Treat these as misc unless they become big/variable
  645. break;
  646. }
  647. *stat += nDelta;
  648. Assert( 0 <= (int)*stat );
  649. #endif // TRACK_ALLOC_STATS
  650. }
  651. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  652. #define VALIDATECONDITION( x ) if( !( x ) ) { Error( "<vitaliy> GCM Local Memory Allocation block %p index %d is corrupt [line %d]!\n", pBlock, k, __LINE__ ); }
  653. inline void CPs3gcmLocalMemoryAllocator::ValidateAllBlocks()
  654. {
  655. // Traverse the allocated list and validate debug guards and patch-back indices
  656. CUtlVector< uint32 > arrFreeBlocksIdx;
  657. uint32 uiLastAllocatedOffset = m_nOffsetMin;
  658. for ( int k = 0, kEnd = m_arrAllocations.Count(); k < kEnd; ++ k )
  659. {
  660. CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock = m_arrAllocations[k];
  661. VALIDATECONDITION( pBlock );
  662. VALIDATECONDITION( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieAllocated || pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree );
  663. VALIDATECONDITION( pBlock->MutableIndex() < m_arrAllocations.Count() );
  664. VALIDATECONDITION( pBlock->MutableIndex() == k );
  665. VALIDATECONDITION( m_arrAllocations[ pBlock->MutableIndex() ] == pBlock );
  666. VALIDATECONDITION( pBlock->Offset() >= uiLastAllocatedOffset );
  667. uiLastAllocatedOffset = pBlock->Offset() + pBlock->MutableSize();
  668. VALIDATECONDITION( uiLastAllocatedOffset <= m_nOffsetMax );
  669. if ( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree )
  670. arrFreeBlocksIdx.AddToTail( k );
  671. }
  672. // Traverse free lists and validate
  673. LocalMemoryAllocation_t * arrFree[] = { m_pPendingFreeBlock, m_pFreeBlock };
  674. for ( int j = 0; j < ARRAYSIZE( arrFree ); ++ j )
  675. for ( LocalMemoryAllocation_t *p = arrFree[j]; p; p = p->m_pNext )
  676. {
  677. int k = j;
  678. CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock = &p->m_block;
  679. VALIDATECONDITION( pBlock );
  680. VALIDATECONDITION( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree );
  681. k = pBlock->MutableIndex();
  682. if ( pBlock->MutableIndex() != ~0 )
  683. {
  684. VALIDATECONDITION( pBlock->MutableIndex() < m_arrAllocations.Count() );
  685. VALIDATECONDITION( m_arrAllocations[ pBlock->MutableIndex() ] == pBlock );
  686. VALIDATECONDITION( arrFreeBlocksIdx.FindAndFastRemove( pBlock->MutableIndex() ) );
  687. }
  688. }
  689. int k = 0;
  690. void *pBlock = 0;
  691. VALIDATECONDITION( !arrFreeBlocksIdx.Count() );
  692. }
  693. #endif
  694. inline void CPs3gcmLocalMemoryAllocator::Compact()
  695. {
  696. GCM_PERF_PUSH_MARKER( "LocalMemory:Compact" );
  697. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  698. ValidateAllBlocks();
  699. if ( r_ps3_gcmnocompact.GetBool() )
  700. return;
  701. #endif
  702. // Reclaim all memory (NOTE: all pending blocks must be reclaimed since both RSX and PPU have stopped rendering!)
  703. Reclaim();
  704. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  705. if ( m_pPendingFreeBlock )
  706. Warning( "GCM Local Memory Allocator Compact forces pending free blocks to be reclaimed.\n" );
  707. ValidateAllBlocks();
  708. #endif
  709. if ( m_pPendingFreeBlock )
  710. Reclaim( true );
  711. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  712. if ( m_pPendingFreeBlock )
  713. Error( "<vitaliy> GCM Local Memory Allocator Compact requires RSX and PPU rendering to be paused! (pending free blocks have not been reclaimed)\n" );
  714. ValidateAllBlocks();
  715. #endif
  716. // Walk the free blocks chain and patch-back NULL pointers into allocation tracking system
  717. while ( m_pFreeBlock )
  718. {
  719. LocalMemoryAllocation_t *p = m_pFreeBlock;
  720. m_pFreeBlock = p->m_pNext;
  721. m_arrAllocations[ p->m_block.MutableIndex() ] = NULL;
  722. delete p;
  723. }
  724. Assert( !m_pFreeBlock && !m_pPendingFreeBlock );
  725. // These are elements requiring reallocation
  726. uint32 uiCount = m_arrAllocations.Count();
  727. CPs3gcmLocalMemoryBlockMutable **pReallocationBlocks = m_arrAllocations.Base();
  728. // Here "correct" implementation would be to copy off m_arrAllocations vector onto stack for iteration,
  729. // RemoveAll from m_arrAllocations vector and allocate all blocks again.
  730. // We will cheat since we know that we will allocate same number of elements and directly write zero
  731. // into m_arrAllocations m_Size member, then we will still be able to use the memory of the vector
  732. // for reading blocks requiring compact reallocation, and AddToTail will still fill the vector with
  733. // correct data.
  734. struct AllocatorCompactVectorCheat : public CUtlVector< CPs3gcmLocalMemoryBlockMutable * > { inline void ResetCountPreservingMemoryContents() { m_Size = 0; } };
  735. ( ( AllocatorCompactVectorCheat * ) ( char * ) &m_arrAllocations )->ResetCountPreservingMemoryContents();
  736. m_nOffsetUnallocated = m_nOffsetMin;
  737. // Prepare RSX for data buffer transfers in local memory
  738. uint nTransferMode = ( ( this - &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ] ) < kGcmAllocPoolMainMemory ) ? CELL_GCM_TRANSFER_LOCAL_TO_LOCAL : CELL_GCM_TRANSFER_MAIN_TO_MAIN;
  739. Assert( nTransferMode < 4 );
  740. GCM_FUNC( cellGcmSetTransferDataMode, nTransferMode ); // unnecessary if we do this on SPU
  741. Assert( !g_spuGcm.IsDeferredDrawQueue() );
  742. // Reallocate all blocks
  743. for ( ; uiCount; -- uiCount, ++ pReallocationBlocks )
  744. {
  745. CPs3gcmLocalMemoryBlockMutable *pBlock = *pReallocationBlocks;
  746. if ( !pBlock )
  747. continue;
  748. uint32 nOldOffset = pBlock->Offset();
  749. TrackAllocStats( pBlock->MutableType(), - pBlock->MutableSize() );
  750. Alloc( pBlock );
  751. if ( nOldOffset == pBlock->Offset() )
  752. continue;
  753. // Have RSX transfer blocks data. RSX may hang if there's WriteLabel between the Format and Offset commands,
  754. // so reserve space for both of them up front
  755. SpuDrawTransfer_t * pTransfer = g_spuGcm.GetDrawQueue()->AllocWithHeader<SpuDrawTransfer_t>( SPUDRAWQUEUE_TRANSFER_METHOD | nTransferMode );
  756. pTransfer->m_nLineSize = pBlock->MutableSize();
  757. pTransfer->m_nOldOffset = nOldOffset;
  758. pTransfer->m_nNewOffset = pBlock->Offset();
  759. }
  760. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  761. ValidateAllBlocks();
  762. #endif
  763. GCM_PERF_MARKER( "Compact:Complete" );
  764. }
  765. //////////////////////////////////////////////////////////////////////////
  766. //
  767. // Computation of tiled memory
  768. //
  769. uint32 CPs3gcmLocalMemoryBlock::TiledMemoryTagAreaBase() const
  770. {
  771. CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
  772. if ( ePool == kGcmAllocPoolTiledColorMisc ) // Misc color tiles are placed at the front of tag area after preset pools
  773. return ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
  774. if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are placed in the end of tag area (0-0x7FF is offset range)
  775. return 0x800 - ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_nOffsetMin + m_uiSize ) / 0x10000;
  776. if ( ePool == kGcmAllocPoolTiledColorFB ) // FB color tiles go first
  777. return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
  778. if ( ePool == kGcmAllocPoolTiledColorFBQ ) // FBQ color tiles go next
  779. return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFBQ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
  780. if ( ePool == kGcmAllocPoolTiledColor512 ) // 512 color tiles go next
  781. return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColor512].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
  782. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  783. Error( "<vitaliy> Cannot compute tiled memory tag base from a non-tiled-pool allocation!\n" );
  784. #endif
  785. return ~0;
  786. }
  787. uint32 CPs3gcmLocalMemoryBlock::TiledMemoryIndex() const
  788. {
  789. CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
  790. if ( ePool == kGcmAllocPoolTiledColorMisc ) // Color tiles are placed in the front
  791. return m_uiIndex + kGcmAllocPoolTiledColorMisc - kGcmAllocPoolTiledColorFB;
  792. if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are placed as last tiles
  793. return 14 - m_uiIndex;
  794. return ePool - kGcmAllocPoolTiledColorFB;
  795. }
  796. uint32 CPs3gcmLocalMemoryBlock::ZcullMemoryIndex() const
  797. {
  798. CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
  799. if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are the only zcull tiles
  800. return m_uiIndex;
  801. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  802. Error( "<vitaliy> Cannot compute zcull index from a non-zcull allocation!\n" );
  803. #endif
  804. return ~0;
  805. }
  806. uint32 CPs3gcmLocalMemoryBlock::ZcullMemoryStart() const
  807. {
  808. CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
  809. if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are the only zcull tiles
  810. return ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_nOffsetMin ) / 4; // 1 byte per pixel, D24S8 is 4 bytes per pixel, implicitly 4096 aligned because offset is 64Kb aligned
  811. #ifdef GCMLOCALMEMORYBLOCKDEBUG
  812. Error( "<vitaliy> Cannot compute zcull memory start from a non-zcull allocation!\n" );
  813. #endif
  814. return ~0;
  815. }
  816. //////////////////////////////////////////////////////////////////////////
  817. //
  818. // Allow shaderapi to query GPU memory stats:
  819. //
  820. void GetGPUMemoryStats( GPUMemoryStats &stats )
  821. {
  822. stats = g_RsxMemoryStats;
  823. }