Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

273 lines
11 KiB

  1. //================ Copyright (c) 1996-2010 Valve Corporation. All Rights Reserved. =================
  2. //
  3. // Double ring buffer used for bidirectional communication between RSX and SPU
  4. // One ring buffer is (externally managed) jobchain(s) that call into entries in IO address space
  5. // that RSX patches (changing from JTS to RET). The other ring buffer is supposedly in local memory
  6. // and is supposedly split into segments. RSX consumes the local memory buffer, and releases it
  7. // segment-by-segment. SPU runs ahead of it and produces the segments and notifies the RSX
  8. // using JTS external to the classes here
  9. //
  10. //
  11. #include <cell/spurs.h>
  12. #include "ps3/dxabstract_gcm_shared.h"
  13. #include "ps3/rsx_spu_double_ring.h"
  14. #include "ps3/vjobchain4.h"
  15. #include "vjobs/pcring.h"
  16. #include "ps3/ps3gcmlabels.h"
  17. // Can be LWSYNC or NOP if followed by RET
  18. // Must be RET otherwise
  19. // JTS->LWSYNC mutation allows for only 4-byte inline transfer from RSX, guaranteeing atomicity
  20. #define MUTABLE_GUARD_COMMAND CELL_SPURS_JOB_COMMAND_LWSYNC
  21. #ifndef SPU
  22. void RsxSpuDoubleRing::SetIoBuffer( void * pIoBuffer, uint nIoBufferByteSize )
  23. {
  24. m_pIoBuffer = ( IoBufferEntry_t * )pIoBuffer;
  25. m_nIoBufferNextIndex = 0;
  26. m_nIoBufferCount = nIoBufferByteSize / sizeof( *m_pIoBuffer );
  27. Assert( !( m_nIoBufferCount & ( m_nIoBufferCount - 1 ) ) );
  28. // Don't initialize if IO buffer is being measured
  29. if ( !m_pIoBuffer )
  30. return;
  31. // init all to RET, which means it's released and most of them ready to be reused
  32. for( int i = 0; i < m_nIoBufferCount; ++i )
  33. {
  34. m_pIoBuffer[i].m_nMutableGuard = MUTABLE_GUARD_COMMAND;
  35. m_pIoBuffer[i].m_nConstRet = CELL_SPURS_JOB_COMMAND_RET;
  36. }
  37. }
  38. void RsxSpuDoubleRing::OnGcmInit( uint nIoBufferOffsetDelta )
  39. {
  40. m_nIoBufferOffsetDelta = nIoBufferOffsetDelta;
  41. }
  42. void RsxSpuDoubleRing::SetRsxBuffer( void * eaRsxBuffer, uint nRsxBufferSize, uint nIdealSegmentSize, uint nMaxJobsPerSegment )
  43. {
  44. if( nIdealSegmentSize & ( nIdealSegmentSize - 1 ) )
  45. {
  46. Error( "RsxSpuDoubleRing: invalid ideal segment size %d, must be a power of 2\n", nIdealSegmentSize );
  47. }
  48. if( nIdealSegmentSize > nRsxBufferSize / 2 )
  49. {
  50. Error( "RsxSpuDoubleRing: invalid ideal segment size %d (full buffer size %d), must be at most half the buffer size", nIdealSegmentSize, nRsxBufferSize );
  51. }
  52. m_nMaxSegmentsPerRing = nRsxBufferSize / MIN( nIdealSegmentSize, nMaxJobsPerSegment * 128 );
  53. if( m_nIoBufferCount < /*ARRAYSIZE( m_pIoBaseGuards )*/4 * m_nMaxSegmentsPerRing ) // + 1 for the initial slot
  54. {
  55. Error( "RsxSpuDoubleRing: IO buffer is too small: there may be up to %d segments per ring, and there are only %d IO guard (JTS-RET) elements. Make IO buffer at least %u bytes large.\n", m_nMaxSegmentsPerRing, m_nIoBufferCount, 4 * m_nMaxSegmentsPerRing * sizeof( *m_pIoBuffer ) );
  56. }
  57. m_nIdealSegmentSize = nIdealSegmentSize;
  58. m_nMaxJobsPerSegment = nMaxJobsPerSegment;
  59. m_eaRsxBuffer = ( uintp )eaRsxBuffer;
  60. m_eaRsxBufferEnd = m_eaRsxBuffer + nRsxBufferSize;
  61. m_nIoBufferNextIndex = 0;
  62. m_nRingRsxNextSegment = 0; // consider the rsx ring already done
  63. // nothing is allocated by SPU
  64. m_eaRingSpuBase = m_eaRsxBufferEnd;
  65. // we consider that the last segment was signaled beyond the end of this segment
  66. m_eaRingSpuLastSegment = m_eaRsxBufferEnd;
  67. m_nRingSpuJobCount = 0;
  68. // this segment is for reference to the bottom of rsx buffer only
  69. // the whole RSX buffer is free for SPU to use
  70. m_eaRingRsxBase = m_eaRsxBuffer;
  71. m_ringSpu.EnsureCapacity( m_nMaxSegmentsPerRing );
  72. m_ringRsx.EnsureCapacity( m_nMaxSegmentsPerRing );
  73. }
  74. #endif
  75. void RsxSpuDoubleRing::InternalGuardAndLock( VjobChain4 * pSyncChain, uintp eaRsxMem )
  76. {
  77. if( m_nRingRsxNextSegment >= m_ringRsx.Count() )
  78. {
  79. // if we exhausted all RSX ring segments, it only may mean that m_eaRingRsxBase == m_eaRsxBuffer, so this can not happen
  80. VjobSpuLog(
  81. "RsxSpuDoubleRing::InternalGuardAndLock: Unexpected error in RSX-SPU double ring, something's very wrong\n"
  82. "Please tell Sergiy the following numbers: %d,%d,%d,%d. @%X,@%X\n",
  83. m_nRingRsxNextSegment, m_ringRsx.Count(), m_ringSpu.Count(), m_ringSpu.GetCapacity(),
  84. m_eaRingRsxBase, m_eaRingSpuBase
  85. );
  86. }
  87. // the next most common case when we have to wait for RSX: we don't have to switch the ring because there's plenty of space still available
  88. // find the next segment to wait for ( may skip several segments )
  89. Assert( m_nRingRsxNextSegment < m_ringRsx.Count() );
  90. Segment_t segment;
  91. if( m_nRingRsxNextSegment >= m_ringRsx.Count() || m_ringSpu.Count() >= m_ringSpu.GetCapacity() )
  92. {
  93. VjobSpuLog(
  94. "RsxSpuDoubleRing::InternalGuardAndLock() hit an error condition, but will try to continue\n"
  95. "Please tell Sergiy the following numbers: %d>=%d|%d>=%d. @%X,@%X\n",
  96. m_nRingRsxNextSegment, m_ringRsx.Count(), m_ringSpu.Count(), m_ringSpu.GetCapacity(),
  97. m_eaRingRsxBase, m_eaRingSpuBase
  98. );
  99. }
  100. for( ; ; )
  101. {
  102. segment = m_ringRsx[m_nRingRsxNextSegment++];
  103. Assert( segment.m_eaBase < m_eaRingRsxBase );
  104. if( eaRsxMem >= segment.m_eaBase )
  105. {
  106. break; // we found the segment to wait on
  107. }
  108. if( m_nRingRsxNextSegment >= m_ringRsx.Count() )
  109. {
  110. // we exhausted all segments in the ring, so wait for the last segment and assume that'll be the end of this ring
  111. segment.m_eaBase = m_eaRsxBuffer;
  112. break;
  113. }
  114. }
  115. // we either found the segment to wait on here, or exhausted all segments from the RSX ring.
  116. // even if we exhausted all segments, it still means we found the LAST segment and we'll use that segment as the guard
  117. uint64 * eaCall = pSyncChain->Push( ); // wait for the RSX to finish rendering from this memory before writing into it
  118. VjobDmaPutfUint64( CELL_SPURS_JOB_COMMAND_CALL( segment.m_pSpuJts ), (uint32)eaCall, VJOB_IOBUFFER_DMATAG );
  119. m_eaRingSpuBase = eaRsxMem;
  120. m_eaRingRsxBase = segment.m_eaBase;
  121. }
  122. // Important side effects: may add to m_ringSpu
  123. void RsxSpuDoubleRing::InternalSwitchRing( VjobChain4 * pSyncChain )
  124. {
  125. // if we haven't already, we need to wait for the segment 0 to avoid racing over it with SPU (to ensure serialization)
  126. if( m_nRingRsxNextSegment < m_ringRsx.Count() )
  127. {
  128. // this should be a very rare occurence, because we don't normally jump across multiple segments; usually we have many allocations in a single segment
  129. uint64 * eaCall = pSyncChain->Push( );
  130. VjobDmaPutfUint64( CELL_SPURS_JOB_COMMAND_CALL( m_ringRsx.Tail().m_pSpuJts ), (uint32)eaCall, VJOB_IOBUFFER_DMATAG );
  131. }
  132. if( m_eaRingSpuBase < m_eaRingSpuLastSegment )
  133. {
  134. // since the last segment was created, there were allocations. Create a new segment to sync up to those allocations
  135. Assert( m_eaRsxBuffer <= m_eaRingSpuBase );
  136. m_eaRingSpuBase = m_eaRsxBuffer;
  137. CommitSpuSegment( );
  138. }
  139. else
  140. {
  141. // since the last segment was created, there were NO allocations. Extend the last segment to include the slack we're dropping now
  142. m_ringSpu.Tail().m_eaBase = m_eaRsxBuffer;
  143. }
  144. // now we switch the ring : SPU ring becomes RSX ring, RSX ring retires
  145. //m_ringRsx.RemoveAll();
  146. //m_ringRsx.Swap( m_ringSpu );
  147. m_ringRsx.Assign( m_ringSpu );
  148. m_ringSpu.RemoveAll();
  149. AssertSpuMsg( m_ringRsx.Count() >= 2, "RSX ring has only %d segments! Something is very wrong with RSX-SPU double-ring\n", m_ringRsx.Count() );
  150. Assert( m_ringRsx.Count() < m_nMaxSegmentsPerRing );
  151. /*
  152. for( uint i = ARRAYSIZE( m_pIoBaseGuards ); i--> 1; ) // range: ARRAYSIZE( m_pIoBaseGuards ) - 1 ... 1
  153. {
  154. m_pIoBaseGuards[i] = m_pIoBaseGuards[i - 1];
  155. }
  156. m_pIoBaseGuards[0] = m_ringRsx.Tail().m_pSpuJts;
  157. */
  158. m_eaRingSpuBase = m_eaRsxBufferEnd;
  159. m_eaRingSpuLastSegment = m_eaRsxBufferEnd;
  160. m_eaRingRsxBase = m_eaRsxBufferEnd;
  161. m_nRingSpuJobCount = 0;
  162. m_nRingRsxNextSegment = 0;
  163. // IMPORTANT RSX L2 CACHE INVALIDATION POINT
  164. // we've run out of a ring; start a new one, invalidate the texture cache because we're using it for fragment programs and
  165. // the new ring will reuse the same memory which can be in RSX L2 cache, which doesn't invalidate when we DMA the new content into the new ring
  166. GCM_FUNC( cellGcmSetInvalidateTextureCache, CELL_GCM_INVALIDATE_TEXTURE );
  167. }
  168. inline void WaitGuard( volatile uint64 *pGuard, uint64 nValueToWaitFor )
  169. {
  170. int nAttempts = 0;
  171. while( VjobDmaGetUint64( (uint)pGuard, DMATAG_SYNC, 0, 0 ) != nValueToWaitFor )
  172. {
  173. if( 100 == nAttempts++ )
  174. {
  175. VjobSpuLog( "Stall in WaitGuard : probably not enough IO buffer memory for the SPU side ring\n" );
  176. }
  177. }
  178. /*
  179. if( *pGuard != nValueToWaitFor )
  180. {
  181. g_nWaitGuardSpins++;
  182. extern bool g_bEnableStallWarnings;
  183. if( g_bEnableStallWarnings )
  184. {
  185. Warning( "Stall in WaitGuard : probably not enough IO buffer memory for the SPU side ring\n" );
  186. }
  187. while( *pGuard != nValueToWaitFor )
  188. {
  189. g_nWaitGuardSpins++;
  190. sys_timer_usleep( 60 );
  191. }
  192. }
  193. */
  194. }
  195. // creates a new segment in SPU ring, allocates a JTS-RET guard for it, and pushes GCM command to release it
  196. // Assumption: the memory in eaBase and up has already been used up by RSX commands up to this point
  197. // Important side effects: adds to m_ringSpu
  198. void RsxSpuDoubleRing::CommitSpuSegment( )
  199. {
  200. // check that RSX ran away at least 2 segments ahead; this guarantees that there are no SPU jobs waiting to be unblocked by any IoBuffer guards
  201. volatile uint64 * pIoBaseGuard = &( m_pIoBuffer[( m_nIoBufferNextIndex + m_nMaxSegmentsPerRing * 3 ) & ( m_nIoBufferCount - 1 )].m_nMutableGuard ); //m_pIoBaseGuards[ ARRAYSIZE( m_pIoBaseGuards ) - 1 ];
  202. WaitGuard( pIoBaseGuard, MUTABLE_GUARD_COMMAND );
  203. uint64 * eaJtsRetGuard = &( m_pIoBuffer[ ( m_nIoBufferNextIndex++ ) & ( m_nIoBufferCount - 1 ) ].m_nMutableGuard );
  204. Assert( VjobDmaGetUint64( ( uint )eaJtsRetGuard, DMATAG_SYNC, 0, 0 ) == MUTABLE_GUARD_COMMAND );
  205. VjobDmaPutfUint64( CELL_SPURS_JOB_COMMAND_JTS, (uint)eaJtsRetGuard, VJOB_IOBUFFER_DMATAG );
  206. m_ringSpu.AddToTail( Segment_t( m_eaRingSpuBase, eaJtsRetGuard ) );
  207. // Signal from RSX to SPU that RSX is done with this segment of local buffer and will go ahead and render using the next shader
  208. void * lsCmdBufferData = NULL;
  209. GCM_CTX_RESERVE( 2 + 4 + 10 + 2 + 4 ); // don't let callback insert anything between the following commands
  210. //GCM_FUNC( cellGcmSetWriteBackEndLabel, GCM_LABEL_DEBUG0, uintp( eaJtsRetGuard ) );
  211. GCM_FUNC( cellGcmSetTransferLocation, CELL_GCM_LOCATION_MAIN );
  212. GCM_FUNC( cellGcmSetInlineTransferPointer, uintp( eaJtsRetGuard ) + m_nIoBufferOffsetDelta, 2, &lsCmdBufferData );
  213. // CELL_SPURS_JOB_OPCODE_RET (7|(14 << 3))
  214. ( ( uint32* )lsCmdBufferData )[0] = ( uint32( uint64( MUTABLE_GUARD_COMMAND ) >> 32 ) ); // uint64 to avoid any compiler issues
  215. ( ( uint32* )lsCmdBufferData )[1] = ( uint32( MUTABLE_GUARD_COMMAND ) );
  216. GCM_FUNC( cellGcmSetWriteTextureLabel, GCM_LABEL_DEBUG_FPCP_RING, uintp( eaJtsRetGuard ) );
  217. m_eaRingSpuLastSegment = m_eaRingSpuBase;
  218. m_nRingSpuJobCount = 0;
  219. }